1 /* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6 */
7
8 #ifndef Py_BUILD_CORE_BUILTIN
9 # define Py_BUILD_CORE_MODULE 1
10 #endif
11
12 #include "Python.h"
13
14
15 #include <stdlib.h> // free()
16 #include <string.h>
17
18 #include <lzma.h>
19
20 // Blocks output buffer wrappers
21 #include "pycore_blocks_output_buffer.h"
22
23 #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
24 #error "The maximum block size accepted by liblzma is SIZE_MAX."
25 #endif
26
27 /* On success, return value >= 0
28 On failure, return -1 */
29 static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,Py_ssize_t max_length,uint8_t ** next_out,size_t * avail_out)30 OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
31 uint8_t **next_out, size_t *avail_out)
32 {
33 Py_ssize_t allocated;
34
35 allocated = _BlocksOutputBuffer_InitAndGrow(
36 buffer, max_length, (void**) next_out);
37 *avail_out = (size_t) allocated;
38 return allocated;
39 }
40
41 /* On success, return value >= 0
42 On failure, return -1 */
43 static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer * buffer,uint8_t ** next_out,size_t * avail_out)44 OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
45 uint8_t **next_out, size_t *avail_out)
46 {
47 Py_ssize_t allocated;
48
49 allocated = _BlocksOutputBuffer_Grow(
50 buffer, (void**) next_out, (Py_ssize_t) *avail_out);
51 *avail_out = (size_t) allocated;
52 return allocated;
53 }
54
55 static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,size_t avail_out)56 OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
57 {
58 return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
59 }
60
61 static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer * buffer,size_t avail_out)62 OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
63 {
64 return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
65 }
66
67 static inline void
OutputBuffer_OnError(_BlocksOutputBuffer * buffer)68 OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
69 {
70 _BlocksOutputBuffer_OnError(buffer);
71 }
72
73
74 #define ACQUIRE_LOCK(obj) do { \
75 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
76 Py_BEGIN_ALLOW_THREADS \
77 PyThread_acquire_lock((obj)->lock, 1); \
78 Py_END_ALLOW_THREADS \
79 } } while (0)
80 #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
81
82 typedef struct {
83 PyTypeObject *lzma_compressor_type;
84 PyTypeObject *lzma_decompressor_type;
85 PyObject *error;
86 PyObject *empty_tuple;
87 } _lzma_state;
88
89 static inline _lzma_state*
get_lzma_state(PyObject * module)90 get_lzma_state(PyObject *module)
91 {
92 void *state = PyModule_GetState(module);
93 assert(state != NULL);
94 return (_lzma_state *)state;
95 }
96
97 /* Container formats: */
98 enum {
99 FORMAT_AUTO,
100 FORMAT_XZ,
101 FORMAT_ALONE,
102 FORMAT_RAW,
103 };
104
105 #define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
106
107
108 typedef struct {
109 PyObject_HEAD
110 lzma_allocator alloc;
111 lzma_stream lzs;
112 int flushed;
113 PyThread_type_lock lock;
114 } Compressor;
115
116 typedef struct {
117 PyObject_HEAD
118 lzma_allocator alloc;
119 lzma_stream lzs;
120 int check;
121 char eof;
122 PyObject *unused_data;
123 char needs_input;
124 uint8_t *input_buffer;
125 size_t input_buffer_size;
126 PyThread_type_lock lock;
127 } Decompressor;
128
129 /* Helper functions. */
130
131 static int
catch_lzma_error(_lzma_state * state,lzma_ret lzret)132 catch_lzma_error(_lzma_state *state, lzma_ret lzret)
133 {
134 switch (lzret) {
135 case LZMA_OK:
136 case LZMA_GET_CHECK:
137 case LZMA_NO_CHECK:
138 case LZMA_STREAM_END:
139 return 0;
140 case LZMA_UNSUPPORTED_CHECK:
141 PyErr_SetString(state->error, "Unsupported integrity check");
142 return 1;
143 case LZMA_MEM_ERROR:
144 PyErr_NoMemory();
145 return 1;
146 case LZMA_MEMLIMIT_ERROR:
147 PyErr_SetString(state->error, "Memory usage limit exceeded");
148 return 1;
149 case LZMA_FORMAT_ERROR:
150 PyErr_SetString(state->error, "Input format not supported by decoder");
151 return 1;
152 case LZMA_OPTIONS_ERROR:
153 PyErr_SetString(state->error, "Invalid or unsupported options");
154 return 1;
155 case LZMA_DATA_ERROR:
156 PyErr_SetString(state->error, "Corrupt input data");
157 return 1;
158 case LZMA_BUF_ERROR:
159 PyErr_SetString(state->error, "Insufficient buffer space");
160 return 1;
161 case LZMA_PROG_ERROR:
162 PyErr_SetString(state->error, "Internal error");
163 return 1;
164 default:
165 PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
166 return 1;
167 }
168 }
169
170 static void*
PyLzma_Malloc(void * opaque,size_t items,size_t size)171 PyLzma_Malloc(void *opaque, size_t items, size_t size)
172 {
173 if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
174 return NULL;
175 }
176 /* PyMem_Malloc() cannot be used:
177 the GIL is not held when lzma_code() is called */
178 return PyMem_RawMalloc(items * size);
179 }
180
181 static void
PyLzma_Free(void * opaque,void * ptr)182 PyLzma_Free(void *opaque, void *ptr)
183 {
184 PyMem_RawFree(ptr);
185 }
186
187
188 /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
189 since the predefined conversion specifiers do not suit our needs:
190
191 uint32_t - the "I" (unsigned int) specifier is the right size, but
192 silently ignores overflows on conversion.
193
194 lzma_vli - the "K" (unsigned long long) specifier is the right
195 size, but like "I" it silently ignores overflows on conversion.
196
197 lzma_mode and lzma_match_finder - these are enumeration types, and
198 so the size of each is implementation-defined. Worse, different
199 enum types can be of different sizes within the same program, so
200 to be strictly correct, we need to define two separate converters.
201 */
202
203 #define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
204 static int \
205 FUNCNAME(PyObject *obj, void *ptr) \
206 { \
207 unsigned long long val; \
208 \
209 val = PyLong_AsUnsignedLongLong(obj); \
210 if (PyErr_Occurred()) \
211 return 0; \
212 if ((unsigned long long)(TYPE)val != val) { \
213 PyErr_SetString(PyExc_OverflowError, \
214 "Value too large for " #TYPE " type"); \
215 return 0; \
216 } \
217 *(TYPE *)ptr = (TYPE)val; \
218 return 1; \
219 }
220
INT_TYPE_CONVERTER_FUNC(uint32_t,uint32_converter)221 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
222 INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
223 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
224 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
225
226 #undef INT_TYPE_CONVERTER_FUNC
227
228
229 /* Filter specifier parsing.
230
231 This code handles converting filter specifiers (Python dicts) into
232 the C lzma_filter structs expected by liblzma. */
233
234 static void *
235 parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
236 {
237 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
238 "pb", "mode", "nice_len", "mf", "depth", NULL};
239 PyObject *id;
240 PyObject *preset_obj;
241 uint32_t preset = LZMA_PRESET_DEFAULT;
242 lzma_options_lzma *options;
243
244 /* First, fill in default values for all the options using a preset.
245 Then, override the defaults with any values given by the caller. */
246
247 if (PyMapping_GetOptionalItemString(spec, "preset", &preset_obj) < 0) {
248 return NULL;
249 }
250 if (preset_obj != NULL) {
251 int ok = uint32_converter(preset_obj, &preset);
252 Py_DECREF(preset_obj);
253 if (!ok) {
254 return NULL;
255 }
256 }
257
258 options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
259 if (options == NULL) {
260 return PyErr_NoMemory();
261 }
262
263 if (lzma_lzma_preset(options, preset)) {
264 PyMem_Free(options);
265 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
266 return NULL;
267 }
268
269 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
270 "|OOO&O&O&O&O&O&O&O&", optnames,
271 &id, &preset_obj,
272 uint32_converter, &options->dict_size,
273 uint32_converter, &options->lc,
274 uint32_converter, &options->lp,
275 uint32_converter, &options->pb,
276 lzma_mode_converter, &options->mode,
277 uint32_converter, &options->nice_len,
278 lzma_mf_converter, &options->mf,
279 uint32_converter, &options->depth)) {
280 PyErr_SetString(PyExc_ValueError,
281 "Invalid filter specifier for LZMA filter");
282 PyMem_Free(options);
283 return NULL;
284 }
285
286 return options;
287 }
288
289 static void *
parse_filter_spec_delta(_lzma_state * state,PyObject * spec)290 parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
291 {
292 static char *optnames[] = {"id", "dist", NULL};
293 PyObject *id;
294 uint32_t dist = 1;
295 lzma_options_delta *options;
296
297 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
298 &id, uint32_converter, &dist)) {
299 PyErr_SetString(PyExc_ValueError,
300 "Invalid filter specifier for delta filter");
301 return NULL;
302 }
303
304 options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
305 if (options == NULL) {
306 return PyErr_NoMemory();
307 }
308 options->type = LZMA_DELTA_TYPE_BYTE;
309 options->dist = dist;
310 return options;
311 }
312
313 static void *
parse_filter_spec_bcj(_lzma_state * state,PyObject * spec)314 parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
315 {
316 static char *optnames[] = {"id", "start_offset", NULL};
317 PyObject *id;
318 uint32_t start_offset = 0;
319 lzma_options_bcj *options;
320
321 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
322 &id, uint32_converter, &start_offset)) {
323 PyErr_SetString(PyExc_ValueError,
324 "Invalid filter specifier for BCJ filter");
325 return NULL;
326 }
327
328 options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
329 if (options == NULL) {
330 return PyErr_NoMemory();
331 }
332 options->start_offset = start_offset;
333 return options;
334 }
335
336 static int
lzma_filter_converter(_lzma_state * state,PyObject * spec,void * ptr)337 lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
338 {
339 lzma_filter *f = (lzma_filter *)ptr;
340 PyObject *id_obj;
341
342 if (!PyMapping_Check(spec)) {
343 PyErr_SetString(PyExc_TypeError,
344 "Filter specifier must be a dict or dict-like object");
345 return 0;
346 }
347 if (PyMapping_GetOptionalItemString(spec, "id", &id_obj) < 0) {
348 return 0;
349 }
350 if (id_obj == NULL) {
351 PyErr_SetString(PyExc_ValueError,
352 "Filter specifier must have an \"id\" entry");
353 return 0;
354 }
355 f->id = PyLong_AsUnsignedLongLong(id_obj);
356 Py_DECREF(id_obj);
357 if (PyErr_Occurred()) {
358 return 0;
359 }
360
361 switch (f->id) {
362 case LZMA_FILTER_LZMA1:
363 case LZMA_FILTER_LZMA2:
364 f->options = parse_filter_spec_lzma(state, spec);
365 return f->options != NULL;
366 case LZMA_FILTER_DELTA:
367 f->options = parse_filter_spec_delta(state, spec);
368 return f->options != NULL;
369 case LZMA_FILTER_X86:
370 case LZMA_FILTER_POWERPC:
371 case LZMA_FILTER_IA64:
372 case LZMA_FILTER_ARM:
373 case LZMA_FILTER_ARMTHUMB:
374 case LZMA_FILTER_SPARC:
375 f->options = parse_filter_spec_bcj(state, spec);
376 return f->options != NULL;
377 default:
378 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
379 return 0;
380 }
381 }
382
383 static void
free_filter_chain(lzma_filter filters[])384 free_filter_chain(lzma_filter filters[])
385 {
386 for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
387 PyMem_Free(filters[i].options);
388 }
389 }
390
391 static int
parse_filter_chain_spec(_lzma_state * state,lzma_filter filters[],PyObject * filterspecs)392 parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
393 {
394 Py_ssize_t i, num_filters;
395
396 num_filters = PySequence_Length(filterspecs);
397 if (num_filters == -1) {
398 return -1;
399 }
400 if (num_filters > LZMA_FILTERS_MAX) {
401 PyErr_Format(PyExc_ValueError,
402 "Too many filters - liblzma supports a maximum of %d",
403 LZMA_FILTERS_MAX);
404 return -1;
405 }
406
407 for (i = 0; i < num_filters; i++) {
408 int ok = 1;
409 PyObject *spec = PySequence_GetItem(filterspecs, i);
410 if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
411 ok = 0;
412 }
413 Py_XDECREF(spec);
414 if (!ok) {
415 filters[i].id = LZMA_VLI_UNKNOWN;
416 free_filter_chain(filters);
417 return -1;
418 }
419 }
420 filters[num_filters].id = LZMA_VLI_UNKNOWN;
421 return 0;
422 }
423
424
425 /* Filter specifier construction.
426
427 This code handles converting C lzma_filter structs into
428 Python-level filter specifiers (represented as dicts). */
429
430 static int
spec_add_field(PyObject * spec,const char * key,unsigned long long value)431 spec_add_field(PyObject *spec, const char *key, unsigned long long value)
432 {
433 PyObject *value_object = PyLong_FromUnsignedLongLong(value);
434 if (value_object == NULL) {
435 return -1;
436 }
437 PyObject *key_object = PyUnicode_InternFromString(key);
438 if (key_object == NULL) {
439 Py_DECREF(value_object);
440 return -1;
441 }
442 int status = PyDict_SetItem(spec, key_object, value_object);
443 Py_DECREF(key_object);
444 Py_DECREF(value_object);
445 return status;
446 }
447
448 static PyObject *
build_filter_spec(const lzma_filter * f)449 build_filter_spec(const lzma_filter *f)
450 {
451 PyObject *spec;
452
453 spec = PyDict_New();
454 if (spec == NULL) {
455 return NULL;
456 }
457
458 #define ADD_FIELD(SOURCE, FIELD) \
459 do { \
460 if (spec_add_field(spec, #FIELD, SOURCE->FIELD) == -1) \
461 goto error;\
462 } while (0)
463
464 ADD_FIELD(f, id);
465
466 switch (f->id) {
467 /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
468 lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
469 dict_size field is used. */
470 case LZMA_FILTER_LZMA1: {
471 lzma_options_lzma *options = f->options;
472 ADD_FIELD(options, lc);
473 ADD_FIELD(options, lp);
474 ADD_FIELD(options, pb);
475 ADD_FIELD(options, dict_size);
476 break;
477 }
478 case LZMA_FILTER_LZMA2: {
479 lzma_options_lzma *options = f->options;
480 ADD_FIELD(options, dict_size);
481 break;
482 }
483 case LZMA_FILTER_DELTA: {
484 lzma_options_delta *options = f->options;
485 ADD_FIELD(options, dist);
486 break;
487 }
488 case LZMA_FILTER_X86:
489 case LZMA_FILTER_POWERPC:
490 case LZMA_FILTER_IA64:
491 case LZMA_FILTER_ARM:
492 case LZMA_FILTER_ARMTHUMB:
493 case LZMA_FILTER_SPARC: {
494 lzma_options_bcj *options = f->options;
495 if (options) {
496 ADD_FIELD(options, start_offset);
497 }
498 break;
499 }
500 default:
501 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
502 goto error;
503 }
504
505 #undef ADD_FIELD
506
507 return spec;
508
509 error:
510 Py_DECREF(spec);
511 return NULL;
512 }
513
514
515 /*[clinic input]
516 module _lzma
517 class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
518 class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
519 [clinic start generated code]*/
520 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
521
522 #include "clinic/_lzmamodule.c.h"
523
524 /*[python input]
525
526 class lzma_vli_converter(CConverter):
527 type = 'lzma_vli'
528 converter = 'lzma_vli_converter'
529
530 class lzma_filter_converter(CConverter):
531 type = 'lzma_filter'
532 converter = 'lzma_filter_converter'
533 c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
534
535 def cleanup(self):
536 name = ensure_legal_c_identifier(self.name)
537 return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
538 ' PyMem_Free(%(name)s.options);\n') % {'name': name}
539
540 [python start generated code]*/
541 /*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
542
543
544 /* LZMACompressor class. */
545
546 static PyObject *
compress(Compressor * c,uint8_t * data,size_t len,lzma_action action)547 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
548 {
549 PyObject *result;
550 _BlocksOutputBuffer buffer = {.list = NULL};
551 _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
552 assert(state != NULL);
553
554 if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
555 goto error;
556 }
557 c->lzs.next_in = data;
558 c->lzs.avail_in = len;
559
560 for (;;) {
561 lzma_ret lzret;
562
563 Py_BEGIN_ALLOW_THREADS
564 lzret = lzma_code(&c->lzs, action);
565 Py_END_ALLOW_THREADS
566
567 if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
568 lzret = LZMA_OK; /* That wasn't a real error */
569 }
570 if (catch_lzma_error(state, lzret)) {
571 goto error;
572 }
573 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
574 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
575 break;
576 } else if (c->lzs.avail_out == 0) {
577 if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
578 goto error;
579 }
580 }
581 }
582
583 result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
584 if (result != NULL) {
585 return result;
586 }
587
588 error:
589 OutputBuffer_OnError(&buffer);
590 return NULL;
591 }
592
593 /*[clinic input]
594 _lzma.LZMACompressor.compress
595
596 data: Py_buffer
597 /
598
599 Provide data to the compressor object.
600
601 Returns a chunk of compressed data if possible, or b'' otherwise.
602
603 When you have finished providing data to the compressor, call the
604 flush() method to finish the compression process.
605 [clinic start generated code]*/
606
607 static PyObject *
_lzma_LZMACompressor_compress_impl(Compressor * self,Py_buffer * data)608 _lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
609 /*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
610 {
611 PyObject *result = NULL;
612
613 ACQUIRE_LOCK(self);
614 if (self->flushed) {
615 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
616 }
617 else {
618 result = compress(self, data->buf, data->len, LZMA_RUN);
619 }
620 RELEASE_LOCK(self);
621 return result;
622 }
623
624 /*[clinic input]
625 _lzma.LZMACompressor.flush
626
627 Finish the compression process.
628
629 Returns the compressed data left in internal buffers.
630
631 The compressor object may not be used after this method is called.
632 [clinic start generated code]*/
633
634 static PyObject *
_lzma_LZMACompressor_flush_impl(Compressor * self)635 _lzma_LZMACompressor_flush_impl(Compressor *self)
636 /*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
637 {
638 PyObject *result = NULL;
639
640 ACQUIRE_LOCK(self);
641 if (self->flushed) {
642 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
643 } else {
644 self->flushed = 1;
645 result = compress(self, NULL, 0, LZMA_FINISH);
646 }
647 RELEASE_LOCK(self);
648 return result;
649 }
650
651 static int
Compressor_init_xz(_lzma_state * state,lzma_stream * lzs,int check,uint32_t preset,PyObject * filterspecs)652 Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
653 int check, uint32_t preset, PyObject *filterspecs)
654 {
655 lzma_ret lzret;
656
657 if (filterspecs == Py_None) {
658 lzret = lzma_easy_encoder(lzs, preset, check);
659 } else {
660 lzma_filter filters[LZMA_FILTERS_MAX + 1];
661
662 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
663 return -1;
664 lzret = lzma_stream_encoder(lzs, filters, check);
665 free_filter_chain(filters);
666 }
667 if (catch_lzma_error(state, lzret)) {
668 return -1;
669 }
670 else {
671 return 0;
672 }
673 }
674
675 static int
Compressor_init_alone(_lzma_state * state,lzma_stream * lzs,uint32_t preset,PyObject * filterspecs)676 Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
677 {
678 lzma_ret lzret;
679
680 if (filterspecs == Py_None) {
681 lzma_options_lzma options;
682
683 if (lzma_lzma_preset(&options, preset)) {
684 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
685 return -1;
686 }
687 lzret = lzma_alone_encoder(lzs, &options);
688 } else {
689 lzma_filter filters[LZMA_FILTERS_MAX + 1];
690
691 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
692 return -1;
693 if (filters[0].id == LZMA_FILTER_LZMA1 &&
694 filters[1].id == LZMA_VLI_UNKNOWN) {
695 lzret = lzma_alone_encoder(lzs, filters[0].options);
696 } else {
697 PyErr_SetString(PyExc_ValueError,
698 "Invalid filter chain for FORMAT_ALONE - "
699 "must be a single LZMA1 filter");
700 lzret = LZMA_PROG_ERROR;
701 }
702 free_filter_chain(filters);
703 }
704 if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
705 return -1;
706 }
707 else {
708 return 0;
709 }
710 }
711
712 static int
Compressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)713 Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
714 {
715 lzma_filter filters[LZMA_FILTERS_MAX + 1];
716 lzma_ret lzret;
717
718 if (filterspecs == Py_None) {
719 PyErr_SetString(PyExc_ValueError,
720 "Must specify filters for FORMAT_RAW");
721 return -1;
722 }
723 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
724 return -1;
725 }
726 lzret = lzma_raw_encoder(lzs, filters);
727 free_filter_chain(filters);
728 if (catch_lzma_error(state, lzret)) {
729 return -1;
730 }
731 else {
732 return 0;
733 }
734 }
735
736 /*[-clinic input]
737 @classmethod
738 _lzma.LZMACompressor.__new__
739
740 format: int(c_default="FORMAT_XZ") = FORMAT_XZ
741 The container format to use for the output. This can
742 be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
743
744 check: int(c_default="-1") = unspecified
745 The integrity check to use. For FORMAT_XZ, the default
746 is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity
747 checks; for these formats, check must be omitted, or be CHECK_NONE.
748
749 preset: object = None
750 If provided should be an integer in the range 0-9, optionally
751 OR-ed with the constant PRESET_EXTREME.
752
753 filters: object = None
754 If provided should be a sequence of dicts. Each dict should
755 have an entry for "id" indicating the ID of the filter, plus
756 additional entries for options to the filter.
757
758 Create a compressor object for compressing data incrementally.
759
760 The settings used by the compressor can be specified either as a
761 preset compression level (with the 'preset' argument), or in detail
762 as a custom filter chain (with the 'filters' argument). For FORMAT_XZ
763 and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
764 level. For FORMAT_RAW, the caller must always specify a filter chain;
765 the raw compressor does not support preset compression levels.
766
767 For one-shot compression, use the compress() function instead.
768 [-clinic start generated code]*/
769 static PyObject *
Compressor_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)770 Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
771 {
772 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
773 int format = FORMAT_XZ;
774 int check = -1;
775 uint32_t preset = LZMA_PRESET_DEFAULT;
776 PyObject *preset_obj = Py_None;
777 PyObject *filterspecs = Py_None;
778 Compressor *self;
779
780 _lzma_state *state = PyType_GetModuleState(type);
781 assert(state != NULL);
782 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
783 "|iiOO:LZMACompressor", arg_names,
784 &format, &check, &preset_obj,
785 &filterspecs)) {
786 return NULL;
787 }
788
789 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
790 PyErr_SetString(PyExc_ValueError,
791 "Integrity checks are only supported by FORMAT_XZ");
792 return NULL;
793 }
794
795 if (preset_obj != Py_None && filterspecs != Py_None) {
796 PyErr_SetString(PyExc_ValueError,
797 "Cannot specify both preset and filter chain");
798 return NULL;
799 }
800
801 if (preset_obj != Py_None && !uint32_converter(preset_obj, &preset)) {
802 return NULL;
803 }
804
805 assert(type != NULL && type->tp_alloc != NULL);
806 self = (Compressor *)type->tp_alloc(type, 0);
807 if (self == NULL) {
808 return NULL;
809 }
810
811 self->alloc.opaque = NULL;
812 self->alloc.alloc = PyLzma_Malloc;
813 self->alloc.free = PyLzma_Free;
814 self->lzs.allocator = &self->alloc;
815
816 self->lock = PyThread_allocate_lock();
817 if (self->lock == NULL) {
818 Py_DECREF(self);
819 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
820 return NULL;
821 }
822
823 self->flushed = 0;
824 switch (format) {
825 case FORMAT_XZ:
826 if (check == -1) {
827 check = LZMA_CHECK_CRC64;
828 }
829 if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
830 goto error;
831 }
832 break;
833
834 case FORMAT_ALONE:
835 if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
836 goto error;
837 }
838 break;
839
840 case FORMAT_RAW:
841 if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
842 goto error;
843 }
844 break;
845
846 default:
847 PyErr_Format(PyExc_ValueError,
848 "Invalid container format: %d", format);
849 goto error;
850 }
851
852 return (PyObject *)self;
853
854 error:
855 Py_DECREF(self);
856 return NULL;
857 }
858
859 static void
Compressor_dealloc(Compressor * self)860 Compressor_dealloc(Compressor *self)
861 {
862 lzma_end(&self->lzs);
863 if (self->lock != NULL) {
864 PyThread_free_lock(self->lock);
865 }
866 PyTypeObject *tp = Py_TYPE(self);
867 tp->tp_free((PyObject *)self);
868 Py_DECREF(tp);
869 }
870
871 static PyMethodDef Compressor_methods[] = {
872 _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
873 _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
874 {NULL}
875 };
876
877 static int
Compressor_traverse(Compressor * self,visitproc visit,void * arg)878 Compressor_traverse(Compressor *self, visitproc visit, void *arg)
879 {
880 Py_VISIT(Py_TYPE(self));
881 return 0;
882 }
883
884 PyDoc_STRVAR(Compressor_doc,
885 "LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
886 "\n"
887 "Create a compressor object for compressing data incrementally.\n"
888 "\n"
889 "format specifies the container format to use for the output. This can\n"
890 "be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
891 "\n"
892 "check specifies the integrity check to use. For FORMAT_XZ, the default\n"
893 "is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
894 "checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
895 "\n"
896 "The settings used by the compressor can be specified either as a\n"
897 "preset compression level (with the 'preset' argument), or in detail\n"
898 "as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
899 "and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
900 "level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
901 "the raw compressor does not support preset compression levels.\n"
902 "\n"
903 "preset (if provided) should be an integer in the range 0-9, optionally\n"
904 "OR-ed with the constant PRESET_EXTREME.\n"
905 "\n"
906 "filters (if provided) should be a sequence of dicts. Each dict should\n"
907 "have an entry for \"id\" indicating the ID of the filter, plus\n"
908 "additional entries for options to the filter.\n"
909 "\n"
910 "For one-shot compression, use the compress() function instead.\n");
911
912 static PyType_Slot lzma_compressor_type_slots[] = {
913 {Py_tp_dealloc, Compressor_dealloc},
914 {Py_tp_methods, Compressor_methods},
915 {Py_tp_new, Compressor_new},
916 {Py_tp_doc, (char *)Compressor_doc},
917 {Py_tp_traverse, Compressor_traverse},
918 {0, 0}
919 };
920
921 static PyType_Spec lzma_compressor_type_spec = {
922 .name = "_lzma.LZMACompressor",
923 .basicsize = sizeof(Compressor),
924 // Calling PyType_GetModuleState() on a subclass is not safe.
925 // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
926 // which prevents to create a subclass.
927 // So calling PyType_GetModuleState() in this file is always safe.
928 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
929 .slots = lzma_compressor_type_slots,
930 };
931
932 /* LZMADecompressor class. */
933
934 /* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
935 buffer is allocated dynamically and returned. At most max_length bytes are
936 returned, so some of the input may not be consumed. d->lzs.next_in and
937 d->lzs.avail_in are updated to reflect the consumed input. */
938 static PyObject*
decompress_buf(Decompressor * d,Py_ssize_t max_length)939 decompress_buf(Decompressor *d, Py_ssize_t max_length)
940 {
941 PyObject *result;
942 lzma_stream *lzs = &d->lzs;
943 _BlocksOutputBuffer buffer = {.list = NULL};
944 _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
945 assert(state != NULL);
946
947 if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
948 goto error;
949 }
950
951 for (;;) {
952 lzma_ret lzret;
953
954 Py_BEGIN_ALLOW_THREADS
955 lzret = lzma_code(lzs, LZMA_RUN);
956 Py_END_ALLOW_THREADS
957
958 if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
959 lzret = LZMA_OK; /* That wasn't a real error */
960 }
961 if (catch_lzma_error(state, lzret)) {
962 goto error;
963 }
964 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
965 d->check = lzma_get_check(&d->lzs);
966 }
967 if (lzret == LZMA_STREAM_END) {
968 d->eof = 1;
969 break;
970 } else if (lzs->avail_out == 0) {
971 /* Need to check lzs->avail_out before lzs->avail_in.
972 Maybe lzs's internal state still have a few bytes
973 can be output, grow the output buffer and continue
974 if max_lengh < 0. */
975 if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
976 break;
977 }
978 if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
979 goto error;
980 }
981 } else if (lzs->avail_in == 0) {
982 break;
983 }
984 }
985
986 result = OutputBuffer_Finish(&buffer, lzs->avail_out);
987 if (result != NULL) {
988 return result;
989 }
990
991 error:
992 OutputBuffer_OnError(&buffer);
993 return NULL;
994 }
995
996 static PyObject *
decompress(Decompressor * d,uint8_t * data,size_t len,Py_ssize_t max_length)997 decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
998 {
999 char input_buffer_in_use;
1000 PyObject *result;
1001 lzma_stream *lzs = &d->lzs;
1002
1003 /* Prepend unconsumed input if necessary */
1004 if (lzs->next_in != NULL) {
1005 size_t avail_now, avail_total;
1006
1007 /* Number of bytes we can append to input buffer */
1008 avail_now = (d->input_buffer + d->input_buffer_size)
1009 - (lzs->next_in + lzs->avail_in);
1010
1011 /* Number of bytes we can append if we move existing
1012 contents to beginning of buffer (overwriting
1013 consumed input) */
1014 avail_total = d->input_buffer_size - lzs->avail_in;
1015
1016 if (avail_total < len) {
1017 size_t offset = lzs->next_in - d->input_buffer;
1018 uint8_t *tmp;
1019 size_t new_size = d->input_buffer_size + len - avail_now;
1020
1021 /* Assign to temporary variable first, so we don't
1022 lose address of allocated buffer if realloc fails */
1023 tmp = PyMem_Realloc(d->input_buffer, new_size);
1024 if (tmp == NULL) {
1025 PyErr_SetNone(PyExc_MemoryError);
1026 return NULL;
1027 }
1028 d->input_buffer = tmp;
1029 d->input_buffer_size = new_size;
1030
1031 lzs->next_in = d->input_buffer + offset;
1032 }
1033 else if (avail_now < len) {
1034 memmove(d->input_buffer, lzs->next_in,
1035 lzs->avail_in);
1036 lzs->next_in = d->input_buffer;
1037 }
1038 memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1039 lzs->avail_in += len;
1040 input_buffer_in_use = 1;
1041 }
1042 else {
1043 lzs->next_in = data;
1044 lzs->avail_in = len;
1045 input_buffer_in_use = 0;
1046 }
1047
1048 result = decompress_buf(d, max_length);
1049 if (result == NULL) {
1050 lzs->next_in = NULL;
1051 return NULL;
1052 }
1053
1054 if (d->eof) {
1055 d->needs_input = 0;
1056 if (lzs->avail_in > 0) {
1057 Py_XSETREF(d->unused_data,
1058 PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
1059 if (d->unused_data == NULL) {
1060 goto error;
1061 }
1062 }
1063 }
1064 else if (lzs->avail_in == 0) {
1065 lzs->next_in = NULL;
1066
1067 if (lzs->avail_out == 0) {
1068 /* (avail_in==0 && avail_out==0)
1069 Maybe lzs's internal state still have a few bytes can
1070 be output, try to output them next time. */
1071 d->needs_input = 0;
1072
1073 /* If max_length < 0, lzs->avail_out always > 0 */
1074 assert(max_length >= 0);
1075 } else {
1076 /* Input buffer exhausted, output buffer has space. */
1077 d->needs_input = 1;
1078 }
1079 }
1080 else {
1081 d->needs_input = 0;
1082
1083 /* If we did not use the input buffer, we now have
1084 to copy the tail from the caller's buffer into the
1085 input buffer */
1086 if (!input_buffer_in_use) {
1087
1088 /* Discard buffer if it's too small
1089 (resizing it may needlessly copy the current contents) */
1090 if (d->input_buffer != NULL &&
1091 d->input_buffer_size < lzs->avail_in) {
1092 PyMem_Free(d->input_buffer);
1093 d->input_buffer = NULL;
1094 }
1095
1096 /* Allocate if necessary */
1097 if (d->input_buffer == NULL) {
1098 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1099 if (d->input_buffer == NULL) {
1100 PyErr_SetNone(PyExc_MemoryError);
1101 goto error;
1102 }
1103 d->input_buffer_size = lzs->avail_in;
1104 }
1105
1106 /* Copy tail */
1107 memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1108 lzs->next_in = d->input_buffer;
1109 }
1110 }
1111
1112 return result;
1113
1114 error:
1115 Py_XDECREF(result);
1116 return NULL;
1117 }
1118
1119 /*[clinic input]
1120 _lzma.LZMADecompressor.decompress
1121
1122 data: Py_buffer
1123 max_length: Py_ssize_t=-1
1124
1125 Decompress *data*, returning uncompressed data as bytes.
1126
1127 If *max_length* is nonnegative, returns at most *max_length* bytes of
1128 decompressed data. If this limit is reached and further output can be
1129 produced, *self.needs_input* will be set to ``False``. In this case, the next
1130 call to *decompress()* may provide *data* as b'' to obtain more of the output.
1131
1132 If all of the input data was decompressed and returned (either because this
1133 was less than *max_length* bytes, or because *max_length* was negative),
1134 *self.needs_input* will be set to True.
1135
1136 Attempting to decompress data after the end of stream is reached raises an
1137 EOFError. Any data found after the end of the stream is ignored and saved in
1138 the unused_data attribute.
1139 [clinic start generated code]*/
1140
1141 static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor * self,Py_buffer * data,Py_ssize_t max_length)1142 _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1143 Py_ssize_t max_length)
1144 /*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
1145 {
1146 PyObject *result = NULL;
1147
1148 ACQUIRE_LOCK(self);
1149 if (self->eof)
1150 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1151 else
1152 result = decompress(self, data->buf, data->len, max_length);
1153 RELEASE_LOCK(self);
1154 return result;
1155 }
1156
1157 static int
Decompressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)1158 Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
1159 {
1160 lzma_filter filters[LZMA_FILTERS_MAX + 1];
1161 lzma_ret lzret;
1162
1163 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
1164 return -1;
1165 }
1166 lzret = lzma_raw_decoder(lzs, filters);
1167 free_filter_chain(filters);
1168 if (catch_lzma_error(state, lzret)) {
1169 return -1;
1170 }
1171 else {
1172 return 0;
1173 }
1174 }
1175
1176 /*[clinic input]
1177 @classmethod
1178 _lzma.LZMADecompressor.__new__
1179
1180 format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1181 Specifies the container format of the input stream. If this is
1182 FORMAT_AUTO (the default), the decompressor will automatically detect
1183 whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with
1184 FORMAT_RAW cannot be autodetected.
1185
1186 memlimit: object = None
1187 Limit the amount of memory used by the decompressor. This will cause
1188 decompression to fail if the input cannot be decompressed within the
1189 given limit.
1190
1191 filters: object = None
1192 A custom filter chain. This argument is required for FORMAT_RAW, and
1193 not accepted with any other format. When provided, this should be a
1194 sequence of dicts, each indicating the ID and options for a single
1195 filter.
1196
1197 Create a decompressor object for decompressing data incrementally.
1198
1199 For one-shot decompression, use the decompress() function instead.
1200 [clinic start generated code]*/
1201
1202 static PyObject *
_lzma_LZMADecompressor_impl(PyTypeObject * type,int format,PyObject * memlimit,PyObject * filters)1203 _lzma_LZMADecompressor_impl(PyTypeObject *type, int format,
1204 PyObject *memlimit, PyObject *filters)
1205 /*[clinic end generated code: output=2d46d5e70f10bc7f input=ca40cd1cb1202b0d]*/
1206 {
1207 Decompressor *self;
1208 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1209 uint64_t memlimit_ = UINT64_MAX;
1210 lzma_ret lzret;
1211 _lzma_state *state = PyType_GetModuleState(type);
1212 assert(state != NULL);
1213
1214 if (memlimit != Py_None) {
1215 if (format == FORMAT_RAW) {
1216 PyErr_SetString(PyExc_ValueError,
1217 "Cannot specify memory limit with FORMAT_RAW");
1218 return NULL;
1219 }
1220 memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1221 if (PyErr_Occurred()) {
1222 return NULL;
1223 }
1224 }
1225
1226 if (format == FORMAT_RAW && filters == Py_None) {
1227 PyErr_SetString(PyExc_ValueError,
1228 "Must specify filters for FORMAT_RAW");
1229 return NULL;
1230 } else if (format != FORMAT_RAW && filters != Py_None) {
1231 PyErr_SetString(PyExc_ValueError,
1232 "Cannot specify filters except with FORMAT_RAW");
1233 return NULL;
1234 }
1235
1236 assert(type != NULL && type->tp_alloc != NULL);
1237 self = (Decompressor *)type->tp_alloc(type, 0);
1238 if (self == NULL) {
1239 return NULL;
1240 }
1241 self->alloc.opaque = NULL;
1242 self->alloc.alloc = PyLzma_Malloc;
1243 self->alloc.free = PyLzma_Free;
1244 self->lzs.allocator = &self->alloc;
1245 self->lzs.next_in = NULL;
1246
1247 self->lock = PyThread_allocate_lock();
1248 if (self->lock == NULL) {
1249 Py_DECREF(self);
1250 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1251 return NULL;
1252 }
1253
1254 self->check = LZMA_CHECK_UNKNOWN;
1255 self->needs_input = 1;
1256 self->input_buffer = NULL;
1257 self->input_buffer_size = 0;
1258 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
1259 if (self->unused_data == NULL) {
1260 goto error;
1261 }
1262
1263 switch (format) {
1264 case FORMAT_AUTO:
1265 lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1266 if (catch_lzma_error(state, lzret)) {
1267 goto error;
1268 }
1269 break;
1270
1271 case FORMAT_XZ:
1272 lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1273 if (catch_lzma_error(state, lzret)) {
1274 goto error;
1275 }
1276 break;
1277
1278 case FORMAT_ALONE:
1279 self->check = LZMA_CHECK_NONE;
1280 lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1281 if (catch_lzma_error(state, lzret)) {
1282 goto error;
1283 }
1284 break;
1285
1286 case FORMAT_RAW:
1287 self->check = LZMA_CHECK_NONE;
1288 if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
1289 goto error;
1290 }
1291 break;
1292
1293 default:
1294 PyErr_Format(PyExc_ValueError,
1295 "Invalid container format: %d", format);
1296 goto error;
1297 }
1298
1299 return (PyObject *)self;
1300
1301 error:
1302 Py_DECREF(self);
1303 return NULL;
1304 }
1305
1306 static void
Decompressor_dealloc(Decompressor * self)1307 Decompressor_dealloc(Decompressor *self)
1308 {
1309 if(self->input_buffer != NULL)
1310 PyMem_Free(self->input_buffer);
1311
1312 lzma_end(&self->lzs);
1313 Py_CLEAR(self->unused_data);
1314 if (self->lock != NULL) {
1315 PyThread_free_lock(self->lock);
1316 }
1317 PyTypeObject *tp = Py_TYPE(self);
1318 tp->tp_free((PyObject *)self);
1319 Py_DECREF(tp);
1320 }
1321
1322 static int
Decompressor_traverse(Decompressor * self,visitproc visit,void * arg)1323 Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1324 {
1325 Py_VISIT(Py_TYPE(self));
1326 return 0;
1327 }
1328
1329 static PyMethodDef Decompressor_methods[] = {
1330 _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
1331 {NULL}
1332 };
1333
1334 PyDoc_STRVAR(Decompressor_check_doc,
1335 "ID of the integrity check used by the input stream.");
1336
1337 PyDoc_STRVAR(Decompressor_eof_doc,
1338 "True if the end-of-stream marker has been reached.");
1339
1340 PyDoc_STRVAR(Decompressor_needs_input_doc,
1341 "True if more input is needed before more decompressed data can be produced.");
1342
1343 PyDoc_STRVAR(Decompressor_unused_data_doc,
1344 "Data found after the end of the compressed stream.");
1345
1346 static PyMemberDef Decompressor_members[] = {
1347 {"check", Py_T_INT, offsetof(Decompressor, check), Py_READONLY,
1348 Decompressor_check_doc},
1349 {"eof", Py_T_BOOL, offsetof(Decompressor, eof), Py_READONLY,
1350 Decompressor_eof_doc},
1351 {"needs_input", Py_T_BOOL, offsetof(Decompressor, needs_input), Py_READONLY,
1352 Decompressor_needs_input_doc},
1353 {"unused_data", Py_T_OBJECT_EX, offsetof(Decompressor, unused_data), Py_READONLY,
1354 Decompressor_unused_data_doc},
1355 {NULL}
1356 };
1357
1358 static PyType_Slot lzma_decompressor_type_slots[] = {
1359 {Py_tp_dealloc, Decompressor_dealloc},
1360 {Py_tp_methods, Decompressor_methods},
1361 {Py_tp_new, _lzma_LZMADecompressor},
1362 {Py_tp_doc, (char *)_lzma_LZMADecompressor__doc__},
1363 {Py_tp_traverse, Decompressor_traverse},
1364 {Py_tp_members, Decompressor_members},
1365 {0, 0}
1366 };
1367
1368 static PyType_Spec lzma_decompressor_type_spec = {
1369 .name = "_lzma.LZMADecompressor",
1370 .basicsize = sizeof(Decompressor),
1371 // Calling PyType_GetModuleState() on a subclass is not safe.
1372 // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1373 // which prevents to create a subclass.
1374 // So calling PyType_GetModuleState() in this file is always safe.
1375 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
1376 .slots = lzma_decompressor_type_slots,
1377 };
1378
1379
1380 /* Module-level functions. */
1381
1382 /*[clinic input]
1383 _lzma.is_check_supported
1384 check_id: int
1385 /
1386
1387 Test whether the given integrity check is supported.
1388
1389 Always returns True for CHECK_NONE and CHECK_CRC32.
1390 [clinic start generated code]*/
1391
1392 static PyObject *
_lzma_is_check_supported_impl(PyObject * module,int check_id)1393 _lzma_is_check_supported_impl(PyObject *module, int check_id)
1394 /*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
1395 {
1396 return PyBool_FromLong(lzma_check_is_supported(check_id));
1397 }
1398
1399 PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1400 "_encode_filter_properties($module, filter, /)\n"
1401 "--\n"
1402 "\n"
1403 "Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1404 "\n"
1405 "The result does not include the filter ID itself, only the options.");
1406
1407 #define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF \
1408 {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
1409
1410 static PyObject *
1411 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
1412
1413 static PyObject *
_lzma__encode_filter_properties(PyObject * module,PyObject * arg)1414 _lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1415 {
1416 PyObject *return_value = NULL;
1417 lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1418 _lzma_state *state = get_lzma_state(module);
1419 assert(state != NULL);
1420 if (!lzma_filter_converter(state, arg, &filter)) {
1421 goto exit;
1422 }
1423 return_value = _lzma__encode_filter_properties_impl(module, filter);
1424
1425 exit:
1426 /* Cleanup for filter */
1427 if (filter.id != LZMA_VLI_UNKNOWN) {
1428 PyMem_Free(filter.options);
1429 }
1430
1431 return return_value;
1432 }
1433
1434 static PyObject *
_lzma__encode_filter_properties_impl(PyObject * module,lzma_filter filter)1435 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1436 {
1437 lzma_ret lzret;
1438 uint32_t encoded_size;
1439 PyObject *result = NULL;
1440 _lzma_state *state = get_lzma_state(module);
1441 assert(state != NULL);
1442
1443 lzret = lzma_properties_size(&encoded_size, &filter);
1444 if (catch_lzma_error(state, lzret))
1445 goto error;
1446
1447 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1448 if (result == NULL)
1449 goto error;
1450
1451 lzret = lzma_properties_encode(
1452 &filter, (uint8_t *)PyBytes_AS_STRING(result));
1453 if (catch_lzma_error(state, lzret)) {
1454 goto error;
1455 }
1456
1457 return result;
1458
1459 error:
1460 Py_XDECREF(result);
1461 return NULL;
1462 }
1463
1464
1465 /*[clinic input]
1466 _lzma._decode_filter_properties
1467 filter_id: lzma_vli
1468 encoded_props: Py_buffer
1469 /
1470
1471 Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1472
1473 The result does not include the filter ID itself, only the options.
1474 [clinic start generated code]*/
1475
1476 static PyObject *
_lzma__decode_filter_properties_impl(PyObject * module,lzma_vli filter_id,Py_buffer * encoded_props)1477 _lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
1478 Py_buffer *encoded_props)
1479 /*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
1480 {
1481 lzma_filter filter;
1482 lzma_ret lzret;
1483 PyObject *result = NULL;
1484 filter.id = filter_id;
1485 _lzma_state *state = get_lzma_state(module);
1486 assert(state != NULL);
1487
1488 lzret = lzma_properties_decode(
1489 &filter, NULL, encoded_props->buf, encoded_props->len);
1490 if (catch_lzma_error(state, lzret)) {
1491 return NULL;
1492 }
1493
1494 result = build_filter_spec(&filter);
1495
1496 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1497 allocated by lzma_properties_decode() using the default allocator. */
1498 free(filter.options);
1499 return result;
1500 }
1501
1502 /* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1503 would not work correctly on platforms with 32-bit longs. */
1504 static int
module_add_int_constant(PyObject * m,const char * name,long long value)1505 module_add_int_constant(PyObject *m, const char *name, long long value)
1506 {
1507 return PyModule_Add(m, name, PyLong_FromLongLong(value));
1508 }
1509
1510 static int
lzma_exec(PyObject * module)1511 lzma_exec(PyObject *module)
1512 {
1513 #define ADD_INT_PREFIX_MACRO(module, macro) \
1514 do { \
1515 if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) { \
1516 return -1; \
1517 } \
1518 } while(0)
1519
1520 #define ADD_INT_MACRO(module, macro) \
1521 do { \
1522 if (PyModule_AddIntMacro(module, macro) < 0) { \
1523 return -1; \
1524 } \
1525 } while (0)
1526
1527
1528 _lzma_state *state = get_lzma_state(module);
1529
1530 state->empty_tuple = PyTuple_New(0);
1531 if (state->empty_tuple == NULL) {
1532 return -1;
1533 }
1534
1535 ADD_INT_MACRO(module, FORMAT_AUTO);
1536 ADD_INT_MACRO(module, FORMAT_XZ);
1537 ADD_INT_MACRO(module, FORMAT_ALONE);
1538 ADD_INT_MACRO(module, FORMAT_RAW);
1539 ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1540 ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1541 ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1542 ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1543 ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1544 ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1545 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1546 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1547 ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1548 ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1549 ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1550 ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1551 ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1552 ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1553 ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1554 ADD_INT_PREFIX_MACRO(module, MF_HC3);
1555 ADD_INT_PREFIX_MACRO(module, MF_HC4);
1556 ADD_INT_PREFIX_MACRO(module, MF_BT2);
1557 ADD_INT_PREFIX_MACRO(module, MF_BT3);
1558 ADD_INT_PREFIX_MACRO(module, MF_BT4);
1559 ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1560 ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1561 ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1562 ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1563
1564 state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1565 if (state->error == NULL) {
1566 return -1;
1567 }
1568
1569 if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1570 return -1;
1571 }
1572
1573
1574 state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1575 &lzma_compressor_type_spec, NULL);
1576 if (state->lzma_compressor_type == NULL) {
1577 return -1;
1578 }
1579
1580 if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1581 return -1;
1582 }
1583
1584 state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1585 &lzma_decompressor_type_spec, NULL);
1586 if (state->lzma_decompressor_type == NULL) {
1587 return -1;
1588 }
1589
1590 if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1591 return -1;
1592 }
1593
1594 return 0;
1595 }
1596
1597 static PyMethodDef lzma_methods[] = {
1598 _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1599 _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1600 _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1601 {NULL}
1602 };
1603
1604 static PyModuleDef_Slot lzma_slots[] = {
1605 {Py_mod_exec, lzma_exec},
1606 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1607 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1608 {0, NULL}
1609 };
1610
1611 static int
lzma_traverse(PyObject * module,visitproc visit,void * arg)1612 lzma_traverse(PyObject *module, visitproc visit, void *arg)
1613 {
1614 _lzma_state *state = get_lzma_state(module);
1615 Py_VISIT(state->lzma_compressor_type);
1616 Py_VISIT(state->lzma_decompressor_type);
1617 Py_VISIT(state->error);
1618 Py_VISIT(state->empty_tuple);
1619 return 0;
1620 }
1621
1622 static int
lzma_clear(PyObject * module)1623 lzma_clear(PyObject *module)
1624 {
1625 _lzma_state *state = get_lzma_state(module);
1626 Py_CLEAR(state->lzma_compressor_type);
1627 Py_CLEAR(state->lzma_decompressor_type);
1628 Py_CLEAR(state->error);
1629 Py_CLEAR(state->empty_tuple);
1630 return 0;
1631 }
1632
1633 static void
lzma_free(void * module)1634 lzma_free(void *module)
1635 {
1636 lzma_clear((PyObject *)module);
1637 }
1638
1639 static PyModuleDef _lzmamodule = {
1640 PyModuleDef_HEAD_INIT,
1641 .m_name = "_lzma",
1642 .m_size = sizeof(_lzma_state),
1643 .m_methods = lzma_methods,
1644 .m_slots = lzma_slots,
1645 .m_traverse = lzma_traverse,
1646 .m_clear = lzma_clear,
1647 .m_free = lzma_free,
1648 };
1649
1650 PyMODINIT_FUNC
PyInit__lzma(void)1651 PyInit__lzma(void)
1652 {
1653 return PyModuleDef_Init(&_lzmamodule);
1654 }
1655