1 /* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6 */
7
8 #define PY_SSIZE_T_CLEAN
9
10 #include "Python.h"
11 #include "structmember.h" // PyMemberDef
12
13 #include <stdarg.h>
14 #include <string.h>
15
16 #include <lzma.h>
17
18 // Blocks output buffer wrappers
19 #include "pycore_blocks_output_buffer.h"
20
21 #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
22 #error "The maximum block size accepted by liblzma is SIZE_MAX."
23 #endif
24
25 /* On success, return value >= 0
26 On failure, return -1 */
27 static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,Py_ssize_t max_length,uint8_t ** next_out,size_t * avail_out)28 OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
29 uint8_t **next_out, size_t *avail_out)
30 {
31 Py_ssize_t allocated;
32
33 allocated = _BlocksOutputBuffer_InitAndGrow(
34 buffer, max_length, (void**) next_out);
35 *avail_out = (size_t) allocated;
36 return allocated;
37 }
38
39 /* On success, return value >= 0
40 On failure, return -1 */
41 static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer * buffer,uint8_t ** next_out,size_t * avail_out)42 OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
43 uint8_t **next_out, size_t *avail_out)
44 {
45 Py_ssize_t allocated;
46
47 allocated = _BlocksOutputBuffer_Grow(
48 buffer, (void**) next_out, (Py_ssize_t) *avail_out);
49 *avail_out = (size_t) allocated;
50 return allocated;
51 }
52
53 static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,size_t avail_out)54 OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
55 {
56 return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
57 }
58
59 static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer * buffer,size_t avail_out)60 OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
61 {
62 return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
63 }
64
65 static inline void
OutputBuffer_OnError(_BlocksOutputBuffer * buffer)66 OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
67 {
68 _BlocksOutputBuffer_OnError(buffer);
69 }
70
71
72 #define ACQUIRE_LOCK(obj) do { \
73 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
74 Py_BEGIN_ALLOW_THREADS \
75 PyThread_acquire_lock((obj)->lock, 1); \
76 Py_END_ALLOW_THREADS \
77 } } while (0)
78 #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
79
80 typedef struct {
81 PyTypeObject *lzma_compressor_type;
82 PyTypeObject *lzma_decompressor_type;
83 PyObject *error;
84 PyObject *empty_tuple;
85 } _lzma_state;
86
87 static inline _lzma_state*
get_lzma_state(PyObject * module)88 get_lzma_state(PyObject *module)
89 {
90 void *state = PyModule_GetState(module);
91 assert(state != NULL);
92 return (_lzma_state *)state;
93 }
94
95 /* Container formats: */
96 enum {
97 FORMAT_AUTO,
98 FORMAT_XZ,
99 FORMAT_ALONE,
100 FORMAT_RAW,
101 };
102
103 #define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
104
105
106 typedef struct {
107 PyObject_HEAD
108 lzma_allocator alloc;
109 lzma_stream lzs;
110 int flushed;
111 PyThread_type_lock lock;
112 } Compressor;
113
114 typedef struct {
115 PyObject_HEAD
116 lzma_allocator alloc;
117 lzma_stream lzs;
118 int check;
119 char eof;
120 PyObject *unused_data;
121 char needs_input;
122 uint8_t *input_buffer;
123 size_t input_buffer_size;
124 PyThread_type_lock lock;
125 } Decompressor;
126
127 /* Helper functions. */
128
129 static int
catch_lzma_error(_lzma_state * state,lzma_ret lzret)130 catch_lzma_error(_lzma_state *state, lzma_ret lzret)
131 {
132 switch (lzret) {
133 case LZMA_OK:
134 case LZMA_GET_CHECK:
135 case LZMA_NO_CHECK:
136 case LZMA_STREAM_END:
137 return 0;
138 case LZMA_UNSUPPORTED_CHECK:
139 PyErr_SetString(state->error, "Unsupported integrity check");
140 return 1;
141 case LZMA_MEM_ERROR:
142 PyErr_NoMemory();
143 return 1;
144 case LZMA_MEMLIMIT_ERROR:
145 PyErr_SetString(state->error, "Memory usage limit exceeded");
146 return 1;
147 case LZMA_FORMAT_ERROR:
148 PyErr_SetString(state->error, "Input format not supported by decoder");
149 return 1;
150 case LZMA_OPTIONS_ERROR:
151 PyErr_SetString(state->error, "Invalid or unsupported options");
152 return 1;
153 case LZMA_DATA_ERROR:
154 PyErr_SetString(state->error, "Corrupt input data");
155 return 1;
156 case LZMA_BUF_ERROR:
157 PyErr_SetString(state->error, "Insufficient buffer space");
158 return 1;
159 case LZMA_PROG_ERROR:
160 PyErr_SetString(state->error, "Internal error");
161 return 1;
162 default:
163 PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
164 return 1;
165 }
166 }
167
168 static void*
PyLzma_Malloc(void * opaque,size_t items,size_t size)169 PyLzma_Malloc(void *opaque, size_t items, size_t size)
170 {
171 if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
172 return NULL;
173 }
174 /* PyMem_Malloc() cannot be used:
175 the GIL is not held when lzma_code() is called */
176 return PyMem_RawMalloc(items * size);
177 }
178
179 static void
PyLzma_Free(void * opaque,void * ptr)180 PyLzma_Free(void *opaque, void *ptr)
181 {
182 PyMem_RawFree(ptr);
183 }
184
185
186 /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
187 since the predefined conversion specifiers do not suit our needs:
188
189 uint32_t - the "I" (unsigned int) specifier is the right size, but
190 silently ignores overflows on conversion.
191
192 lzma_vli - the "K" (unsigned long long) specifier is the right
193 size, but like "I" it silently ignores overflows on conversion.
194
195 lzma_mode and lzma_match_finder - these are enumeration types, and
196 so the size of each is implementation-defined. Worse, different
197 enum types can be of different sizes within the same program, so
198 to be strictly correct, we need to define two separate converters.
199 */
200
201 #define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
202 static int \
203 FUNCNAME(PyObject *obj, void *ptr) \
204 { \
205 unsigned long long val; \
206 \
207 val = PyLong_AsUnsignedLongLong(obj); \
208 if (PyErr_Occurred()) \
209 return 0; \
210 if ((unsigned long long)(TYPE)val != val) { \
211 PyErr_SetString(PyExc_OverflowError, \
212 "Value too large for " #TYPE " type"); \
213 return 0; \
214 } \
215 *(TYPE *)ptr = (TYPE)val; \
216 return 1; \
217 }
218
INT_TYPE_CONVERTER_FUNC(uint32_t,uint32_converter)219 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
220 INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
221 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
222 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
223
224 #undef INT_TYPE_CONVERTER_FUNC
225
226
227 /* Filter specifier parsing.
228
229 This code handles converting filter specifiers (Python dicts) into
230 the C lzma_filter structs expected by liblzma. */
231
232 static void *
233 parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
234 {
235 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
236 "pb", "mode", "nice_len", "mf", "depth", NULL};
237 PyObject *id;
238 PyObject *preset_obj;
239 uint32_t preset = LZMA_PRESET_DEFAULT;
240 lzma_options_lzma *options;
241
242 /* First, fill in default values for all the options using a preset.
243 Then, override the defaults with any values given by the caller. */
244
245 preset_obj = PyMapping_GetItemString(spec, "preset");
246 if (preset_obj == NULL) {
247 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
248 PyErr_Clear();
249 }
250 else {
251 return NULL;
252 }
253 } else {
254 int ok = uint32_converter(preset_obj, &preset);
255 Py_DECREF(preset_obj);
256 if (!ok) {
257 return NULL;
258 }
259 }
260
261 options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
262 if (options == NULL) {
263 return PyErr_NoMemory();
264 }
265
266 if (lzma_lzma_preset(options, preset)) {
267 PyMem_Free(options);
268 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
269 return NULL;
270 }
271
272 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
273 "|OOO&O&O&O&O&O&O&O&", optnames,
274 &id, &preset_obj,
275 uint32_converter, &options->dict_size,
276 uint32_converter, &options->lc,
277 uint32_converter, &options->lp,
278 uint32_converter, &options->pb,
279 lzma_mode_converter, &options->mode,
280 uint32_converter, &options->nice_len,
281 lzma_mf_converter, &options->mf,
282 uint32_converter, &options->depth)) {
283 PyErr_SetString(PyExc_ValueError,
284 "Invalid filter specifier for LZMA filter");
285 PyMem_Free(options);
286 return NULL;
287 }
288
289 return options;
290 }
291
292 static void *
parse_filter_spec_delta(_lzma_state * state,PyObject * spec)293 parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
294 {
295 static char *optnames[] = {"id", "dist", NULL};
296 PyObject *id;
297 uint32_t dist = 1;
298 lzma_options_delta *options;
299
300 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
301 &id, uint32_converter, &dist)) {
302 PyErr_SetString(PyExc_ValueError,
303 "Invalid filter specifier for delta filter");
304 return NULL;
305 }
306
307 options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
308 if (options == NULL) {
309 return PyErr_NoMemory();
310 }
311 options->type = LZMA_DELTA_TYPE_BYTE;
312 options->dist = dist;
313 return options;
314 }
315
316 static void *
parse_filter_spec_bcj(_lzma_state * state,PyObject * spec)317 parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
318 {
319 static char *optnames[] = {"id", "start_offset", NULL};
320 PyObject *id;
321 uint32_t start_offset = 0;
322 lzma_options_bcj *options;
323
324 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
325 &id, uint32_converter, &start_offset)) {
326 PyErr_SetString(PyExc_ValueError,
327 "Invalid filter specifier for BCJ filter");
328 return NULL;
329 }
330
331 options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
332 if (options == NULL) {
333 return PyErr_NoMemory();
334 }
335 options->start_offset = start_offset;
336 return options;
337 }
338
339 static int
lzma_filter_converter(_lzma_state * state,PyObject * spec,void * ptr)340 lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
341 {
342 lzma_filter *f = (lzma_filter *)ptr;
343 PyObject *id_obj;
344
345 if (!PyMapping_Check(spec)) {
346 PyErr_SetString(PyExc_TypeError,
347 "Filter specifier must be a dict or dict-like object");
348 return 0;
349 }
350 id_obj = PyMapping_GetItemString(spec, "id");
351 if (id_obj == NULL) {
352 if (PyErr_ExceptionMatches(PyExc_KeyError))
353 PyErr_SetString(PyExc_ValueError,
354 "Filter specifier must have an \"id\" entry");
355 return 0;
356 }
357 f->id = PyLong_AsUnsignedLongLong(id_obj);
358 Py_DECREF(id_obj);
359 if (PyErr_Occurred()) {
360 return 0;
361 }
362
363 switch (f->id) {
364 case LZMA_FILTER_LZMA1:
365 case LZMA_FILTER_LZMA2:
366 f->options = parse_filter_spec_lzma(state, spec);
367 return f->options != NULL;
368 case LZMA_FILTER_DELTA:
369 f->options = parse_filter_spec_delta(state, spec);
370 return f->options != NULL;
371 case LZMA_FILTER_X86:
372 case LZMA_FILTER_POWERPC:
373 case LZMA_FILTER_IA64:
374 case LZMA_FILTER_ARM:
375 case LZMA_FILTER_ARMTHUMB:
376 case LZMA_FILTER_SPARC:
377 f->options = parse_filter_spec_bcj(state, spec);
378 return f->options != NULL;
379 default:
380 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
381 return 0;
382 }
383 }
384
385 static void
free_filter_chain(lzma_filter filters[])386 free_filter_chain(lzma_filter filters[])
387 {
388 for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
389 PyMem_Free(filters[i].options);
390 }
391 }
392
393 static int
parse_filter_chain_spec(_lzma_state * state,lzma_filter filters[],PyObject * filterspecs)394 parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
395 {
396 Py_ssize_t i, num_filters;
397
398 num_filters = PySequence_Length(filterspecs);
399 if (num_filters == -1) {
400 return -1;
401 }
402 if (num_filters > LZMA_FILTERS_MAX) {
403 PyErr_Format(PyExc_ValueError,
404 "Too many filters - liblzma supports a maximum of %d",
405 LZMA_FILTERS_MAX);
406 return -1;
407 }
408
409 for (i = 0; i < num_filters; i++) {
410 int ok = 1;
411 PyObject *spec = PySequence_GetItem(filterspecs, i);
412 if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
413 ok = 0;
414 }
415 Py_XDECREF(spec);
416 if (!ok) {
417 filters[i].id = LZMA_VLI_UNKNOWN;
418 free_filter_chain(filters);
419 return -1;
420 }
421 }
422 filters[num_filters].id = LZMA_VLI_UNKNOWN;
423 return 0;
424 }
425
426
427 /* Filter specifier construction.
428
429 This code handles converting C lzma_filter structs into
430 Python-level filter specifiers (represented as dicts). */
431
432 static int
spec_add_field(PyObject * spec,_Py_Identifier * key,unsigned long long value)433 spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned long long value)
434 {
435 int status;
436 PyObject *value_object;
437
438 value_object = PyLong_FromUnsignedLongLong(value);
439 if (value_object == NULL) {
440 return -1;
441 }
442
443 status = _PyDict_SetItemId(spec, key, value_object);
444 Py_DECREF(value_object);
445 return status;
446 }
447
448 static PyObject *
build_filter_spec(const lzma_filter * f)449 build_filter_spec(const lzma_filter *f)
450 {
451 PyObject *spec;
452
453 spec = PyDict_New();
454 if (spec == NULL) {
455 return NULL;
456 }
457
458 #define ADD_FIELD(SOURCE, FIELD) \
459 do { \
460 _Py_IDENTIFIER(FIELD); \
461 if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
462 goto error;\
463 } while (0)
464
465 ADD_FIELD(f, id);
466
467 switch (f->id) {
468 /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
469 lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
470 dict_size field is used. */
471 case LZMA_FILTER_LZMA1: {
472 lzma_options_lzma *options = f->options;
473 ADD_FIELD(options, lc);
474 ADD_FIELD(options, lp);
475 ADD_FIELD(options, pb);
476 ADD_FIELD(options, dict_size);
477 break;
478 }
479 case LZMA_FILTER_LZMA2: {
480 lzma_options_lzma *options = f->options;
481 ADD_FIELD(options, dict_size);
482 break;
483 }
484 case LZMA_FILTER_DELTA: {
485 lzma_options_delta *options = f->options;
486 ADD_FIELD(options, dist);
487 break;
488 }
489 case LZMA_FILTER_X86:
490 case LZMA_FILTER_POWERPC:
491 case LZMA_FILTER_IA64:
492 case LZMA_FILTER_ARM:
493 case LZMA_FILTER_ARMTHUMB:
494 case LZMA_FILTER_SPARC: {
495 lzma_options_bcj *options = f->options;
496 ADD_FIELD(options, start_offset);
497 break;
498 }
499 default:
500 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
501 goto error;
502 }
503
504 #undef ADD_FIELD
505
506 return spec;
507
508 error:
509 Py_DECREF(spec);
510 return NULL;
511 }
512
513
514 /*[clinic input]
515 module _lzma
516 class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
517 class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
518 [clinic start generated code]*/
519 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
520
521 #include "clinic/_lzmamodule.c.h"
522
523 /*[python input]
524
525 class lzma_vli_converter(CConverter):
526 type = 'lzma_vli'
527 converter = 'lzma_vli_converter'
528
529 class lzma_filter_converter(CConverter):
530 type = 'lzma_filter'
531 converter = 'lzma_filter_converter'
532 c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
533
534 def cleanup(self):
535 name = ensure_legal_c_identifier(self.name)
536 return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
537 ' PyMem_Free(%(name)s.options);\n') % {'name': name}
538
539 [python start generated code]*/
540 /*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
541
542
543 /* LZMACompressor class. */
544
545 static PyObject *
compress(Compressor * c,uint8_t * data,size_t len,lzma_action action)546 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
547 {
548 PyObject *result;
549 _BlocksOutputBuffer buffer = {.list = NULL};
550 _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
551 assert(state != NULL);
552
553 if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
554 goto error;
555 }
556 c->lzs.next_in = data;
557 c->lzs.avail_in = len;
558
559 for (;;) {
560 lzma_ret lzret;
561
562 Py_BEGIN_ALLOW_THREADS
563 lzret = lzma_code(&c->lzs, action);
564 Py_END_ALLOW_THREADS
565
566 if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
567 lzret = LZMA_OK; /* That wasn't a real error */
568 }
569 if (catch_lzma_error(state, lzret)) {
570 goto error;
571 }
572 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
573 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
574 break;
575 } else if (c->lzs.avail_out == 0) {
576 if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
577 goto error;
578 }
579 }
580 }
581
582 result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
583 if (result != NULL) {
584 return result;
585 }
586
587 error:
588 OutputBuffer_OnError(&buffer);
589 return NULL;
590 }
591
592 /*[clinic input]
593 _lzma.LZMACompressor.compress
594
595 data: Py_buffer
596 /
597
598 Provide data to the compressor object.
599
600 Returns a chunk of compressed data if possible, or b'' otherwise.
601
602 When you have finished providing data to the compressor, call the
603 flush() method to finish the compression process.
604 [clinic start generated code]*/
605
606 static PyObject *
_lzma_LZMACompressor_compress_impl(Compressor * self,Py_buffer * data)607 _lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
608 /*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
609 {
610 PyObject *result = NULL;
611
612 ACQUIRE_LOCK(self);
613 if (self->flushed) {
614 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
615 }
616 else {
617 result = compress(self, data->buf, data->len, LZMA_RUN);
618 }
619 RELEASE_LOCK(self);
620 return result;
621 }
622
623 /*[clinic input]
624 _lzma.LZMACompressor.flush
625
626 Finish the compression process.
627
628 Returns the compressed data left in internal buffers.
629
630 The compressor object may not be used after this method is called.
631 [clinic start generated code]*/
632
633 static PyObject *
_lzma_LZMACompressor_flush_impl(Compressor * self)634 _lzma_LZMACompressor_flush_impl(Compressor *self)
635 /*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
636 {
637 PyObject *result = NULL;
638
639 ACQUIRE_LOCK(self);
640 if (self->flushed) {
641 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
642 } else {
643 self->flushed = 1;
644 result = compress(self, NULL, 0, LZMA_FINISH);
645 }
646 RELEASE_LOCK(self);
647 return result;
648 }
649
650 static int
Compressor_init_xz(_lzma_state * state,lzma_stream * lzs,int check,uint32_t preset,PyObject * filterspecs)651 Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
652 int check, uint32_t preset, PyObject *filterspecs)
653 {
654 lzma_ret lzret;
655
656 if (filterspecs == Py_None) {
657 lzret = lzma_easy_encoder(lzs, preset, check);
658 } else {
659 lzma_filter filters[LZMA_FILTERS_MAX + 1];
660
661 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
662 return -1;
663 lzret = lzma_stream_encoder(lzs, filters, check);
664 free_filter_chain(filters);
665 }
666 if (catch_lzma_error(state, lzret)) {
667 return -1;
668 }
669 else {
670 return 0;
671 }
672 }
673
674 static int
Compressor_init_alone(_lzma_state * state,lzma_stream * lzs,uint32_t preset,PyObject * filterspecs)675 Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
676 {
677 lzma_ret lzret;
678
679 if (filterspecs == Py_None) {
680 lzma_options_lzma options;
681
682 if (lzma_lzma_preset(&options, preset)) {
683 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
684 return -1;
685 }
686 lzret = lzma_alone_encoder(lzs, &options);
687 } else {
688 lzma_filter filters[LZMA_FILTERS_MAX + 1];
689
690 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
691 return -1;
692 if (filters[0].id == LZMA_FILTER_LZMA1 &&
693 filters[1].id == LZMA_VLI_UNKNOWN) {
694 lzret = lzma_alone_encoder(lzs, filters[0].options);
695 } else {
696 PyErr_SetString(PyExc_ValueError,
697 "Invalid filter chain for FORMAT_ALONE - "
698 "must be a single LZMA1 filter");
699 lzret = LZMA_PROG_ERROR;
700 }
701 free_filter_chain(filters);
702 }
703 if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
704 return -1;
705 }
706 else {
707 return 0;
708 }
709 }
710
711 static int
Compressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)712 Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
713 {
714 lzma_filter filters[LZMA_FILTERS_MAX + 1];
715 lzma_ret lzret;
716
717 if (filterspecs == Py_None) {
718 PyErr_SetString(PyExc_ValueError,
719 "Must specify filters for FORMAT_RAW");
720 return -1;
721 }
722 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
723 return -1;
724 }
725 lzret = lzma_raw_encoder(lzs, filters);
726 free_filter_chain(filters);
727 if (catch_lzma_error(state, lzret)) {
728 return -1;
729 }
730 else {
731 return 0;
732 }
733 }
734
735 /*[-clinic input]
736 _lzma.LZMACompressor.__init__
737
738 format: int(c_default="FORMAT_XZ") = FORMAT_XZ
739 The container format to use for the output. This can
740 be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
741
742 check: int(c_default="-1") = unspecified
743 The integrity check to use. For FORMAT_XZ, the default
744 is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity
745 checks; for these formats, check must be omitted, or be CHECK_NONE.
746
747 preset: object = None
748 If provided should be an integer in the range 0-9, optionally
749 OR-ed with the constant PRESET_EXTREME.
750
751 filters: object = None
752 If provided should be a sequence of dicts. Each dict should
753 have an entry for "id" indicating the ID of the filter, plus
754 additional entries for options to the filter.
755
756 Create a compressor object for compressing data incrementally.
757
758 The settings used by the compressor can be specified either as a
759 preset compression level (with the 'preset' argument), or in detail
760 as a custom filter chain (with the 'filters' argument). For FORMAT_XZ
761 and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
762 level. For FORMAT_RAW, the caller must always specify a filter chain;
763 the raw compressor does not support preset compression levels.
764
765 For one-shot compression, use the compress() function instead.
766 [-clinic start generated code]*/
767 static int
Compressor_init(Compressor * self,PyObject * args,PyObject * kwargs)768 Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
769 {
770 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
771 int format = FORMAT_XZ;
772 int check = -1;
773 uint32_t preset = LZMA_PRESET_DEFAULT;
774 PyObject *preset_obj = Py_None;
775 PyObject *filterspecs = Py_None;
776 _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
777 assert(state != NULL);
778 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
779 "|iiOO:LZMACompressor", arg_names,
780 &format, &check, &preset_obj,
781 &filterspecs)) {
782 return -1;
783 }
784
785 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
786 PyErr_SetString(PyExc_ValueError,
787 "Integrity checks are only supported by FORMAT_XZ");
788 return -1;
789 }
790
791 if (preset_obj != Py_None && filterspecs != Py_None) {
792 PyErr_SetString(PyExc_ValueError,
793 "Cannot specify both preset and filter chain");
794 return -1;
795 }
796
797 if (preset_obj != Py_None) {
798 if (!uint32_converter(preset_obj, &preset)) {
799 return -1;
800 }
801 }
802
803 self->alloc.opaque = NULL;
804 self->alloc.alloc = PyLzma_Malloc;
805 self->alloc.free = PyLzma_Free;
806 self->lzs.allocator = &self->alloc;
807
808 self->lock = PyThread_allocate_lock();
809 if (self->lock == NULL) {
810 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
811 return -1;
812 }
813
814 self->flushed = 0;
815 switch (format) {
816 case FORMAT_XZ:
817 if (check == -1) {
818 check = LZMA_CHECK_CRC64;
819 }
820 if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
821 break;
822 }
823 return 0;
824
825 case FORMAT_ALONE:
826 if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
827 break;
828 }
829 return 0;
830
831 case FORMAT_RAW:
832 if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
833 break;
834 }
835 return 0;
836
837 default:
838 PyErr_Format(PyExc_ValueError,
839 "Invalid container format: %d", format);
840 break;
841 }
842
843 PyThread_free_lock(self->lock);
844 self->lock = NULL;
845 return -1;
846 }
847
848 static void
Compressor_dealloc(Compressor * self)849 Compressor_dealloc(Compressor *self)
850 {
851 lzma_end(&self->lzs);
852 if (self->lock != NULL) {
853 PyThread_free_lock(self->lock);
854 }
855 PyTypeObject *tp = Py_TYPE(self);
856 tp->tp_free((PyObject *)self);
857 Py_DECREF(tp);
858 }
859
860 static PyMethodDef Compressor_methods[] = {
861 _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
862 _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
863 {NULL}
864 };
865
866 static int
Compressor_traverse(Compressor * self,visitproc visit,void * arg)867 Compressor_traverse(Compressor *self, visitproc visit, void *arg)
868 {
869 Py_VISIT(Py_TYPE(self));
870 return 0;
871 }
872
873 PyDoc_STRVAR(Compressor_doc,
874 "LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
875 "\n"
876 "Create a compressor object for compressing data incrementally.\n"
877 "\n"
878 "format specifies the container format to use for the output. This can\n"
879 "be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
880 "\n"
881 "check specifies the integrity check to use. For FORMAT_XZ, the default\n"
882 "is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
883 "checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
884 "\n"
885 "The settings used by the compressor can be specified either as a\n"
886 "preset compression level (with the 'preset' argument), or in detail\n"
887 "as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
888 "and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
889 "level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
890 "the raw compressor does not support preset compression levels.\n"
891 "\n"
892 "preset (if provided) should be an integer in the range 0-9, optionally\n"
893 "OR-ed with the constant PRESET_EXTREME.\n"
894 "\n"
895 "filters (if provided) should be a sequence of dicts. Each dict should\n"
896 "have an entry for \"id\" indicating the ID of the filter, plus\n"
897 "additional entries for options to the filter.\n"
898 "\n"
899 "For one-shot compression, use the compress() function instead.\n");
900
901 static PyType_Slot lzma_compressor_type_slots[] = {
902 {Py_tp_dealloc, Compressor_dealloc},
903 {Py_tp_methods, Compressor_methods},
904 {Py_tp_init, Compressor_init},
905 {Py_tp_new, PyType_GenericNew},
906 {Py_tp_doc, (char *)Compressor_doc},
907 {Py_tp_traverse, Compressor_traverse},
908 {0, 0}
909 };
910
911 static PyType_Spec lzma_compressor_type_spec = {
912 .name = "_lzma.LZMACompressor",
913 .basicsize = sizeof(Compressor),
914 // Calling PyType_GetModuleState() on a subclass is not safe.
915 // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
916 // which prevents to create a subclass.
917 // So calling PyType_GetModuleState() in this file is always safe.
918 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
919 .slots = lzma_compressor_type_slots,
920 };
921
922 /* LZMADecompressor class. */
923
924 /* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
925 buffer is allocated dynamically and returned. At most max_length bytes are
926 returned, so some of the input may not be consumed. d->lzs.next_in and
927 d->lzs.avail_in are updated to reflect the consumed input. */
928 static PyObject*
decompress_buf(Decompressor * d,Py_ssize_t max_length)929 decompress_buf(Decompressor *d, Py_ssize_t max_length)
930 {
931 PyObject *result;
932 lzma_stream *lzs = &d->lzs;
933 _BlocksOutputBuffer buffer = {.list = NULL};
934 _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
935 assert(state != NULL);
936
937 if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
938 goto error;
939 }
940
941 for (;;) {
942 lzma_ret lzret;
943
944 Py_BEGIN_ALLOW_THREADS
945 lzret = lzma_code(lzs, LZMA_RUN);
946 Py_END_ALLOW_THREADS
947
948 if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
949 lzret = LZMA_OK; /* That wasn't a real error */
950 }
951 if (catch_lzma_error(state, lzret)) {
952 goto error;
953 }
954 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
955 d->check = lzma_get_check(&d->lzs);
956 }
957 if (lzret == LZMA_STREAM_END) {
958 d->eof = 1;
959 break;
960 } else if (lzs->avail_out == 0) {
961 /* Need to check lzs->avail_out before lzs->avail_in.
962 Maybe lzs's internal state still have a few bytes
963 can be output, grow the output buffer and continue
964 if max_lengh < 0. */
965 if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
966 break;
967 }
968 if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
969 goto error;
970 }
971 } else if (lzs->avail_in == 0) {
972 break;
973 }
974 }
975
976 result = OutputBuffer_Finish(&buffer, lzs->avail_out);
977 if (result != NULL) {
978 return result;
979 }
980
981 error:
982 OutputBuffer_OnError(&buffer);
983 return NULL;
984 }
985
986 static PyObject *
decompress(Decompressor * d,uint8_t * data,size_t len,Py_ssize_t max_length)987 decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
988 {
989 char input_buffer_in_use;
990 PyObject *result;
991 lzma_stream *lzs = &d->lzs;
992
993 /* Prepend unconsumed input if necessary */
994 if (lzs->next_in != NULL) {
995 size_t avail_now, avail_total;
996
997 /* Number of bytes we can append to input buffer */
998 avail_now = (d->input_buffer + d->input_buffer_size)
999 - (lzs->next_in + lzs->avail_in);
1000
1001 /* Number of bytes we can append if we move existing
1002 contents to beginning of buffer (overwriting
1003 consumed input) */
1004 avail_total = d->input_buffer_size - lzs->avail_in;
1005
1006 if (avail_total < len) {
1007 size_t offset = lzs->next_in - d->input_buffer;
1008 uint8_t *tmp;
1009 size_t new_size = d->input_buffer_size + len - avail_now;
1010
1011 /* Assign to temporary variable first, so we don't
1012 lose address of allocated buffer if realloc fails */
1013 tmp = PyMem_Realloc(d->input_buffer, new_size);
1014 if (tmp == NULL) {
1015 PyErr_SetNone(PyExc_MemoryError);
1016 return NULL;
1017 }
1018 d->input_buffer = tmp;
1019 d->input_buffer_size = new_size;
1020
1021 lzs->next_in = d->input_buffer + offset;
1022 }
1023 else if (avail_now < len) {
1024 memmove(d->input_buffer, lzs->next_in,
1025 lzs->avail_in);
1026 lzs->next_in = d->input_buffer;
1027 }
1028 memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1029 lzs->avail_in += len;
1030 input_buffer_in_use = 1;
1031 }
1032 else {
1033 lzs->next_in = data;
1034 lzs->avail_in = len;
1035 input_buffer_in_use = 0;
1036 }
1037
1038 result = decompress_buf(d, max_length);
1039 if (result == NULL) {
1040 lzs->next_in = NULL;
1041 return NULL;
1042 }
1043
1044 if (d->eof) {
1045 d->needs_input = 0;
1046 if (lzs->avail_in > 0) {
1047 Py_XSETREF(d->unused_data,
1048 PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
1049 if (d->unused_data == NULL) {
1050 goto error;
1051 }
1052 }
1053 }
1054 else if (lzs->avail_in == 0) {
1055 lzs->next_in = NULL;
1056
1057 if (lzs->avail_out == 0) {
1058 /* (avail_in==0 && avail_out==0)
1059 Maybe lzs's internal state still have a few bytes can
1060 be output, try to output them next time. */
1061 d->needs_input = 0;
1062
1063 /* If max_length < 0, lzs->avail_out always > 0 */
1064 assert(max_length >= 0);
1065 } else {
1066 /* Input buffer exhausted, output buffer has space. */
1067 d->needs_input = 1;
1068 }
1069 }
1070 else {
1071 d->needs_input = 0;
1072
1073 /* If we did not use the input buffer, we now have
1074 to copy the tail from the caller's buffer into the
1075 input buffer */
1076 if (!input_buffer_in_use) {
1077
1078 /* Discard buffer if it's too small
1079 (resizing it may needlessly copy the current contents) */
1080 if (d->input_buffer != NULL &&
1081 d->input_buffer_size < lzs->avail_in) {
1082 PyMem_Free(d->input_buffer);
1083 d->input_buffer = NULL;
1084 }
1085
1086 /* Allocate if necessary */
1087 if (d->input_buffer == NULL) {
1088 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1089 if (d->input_buffer == NULL) {
1090 PyErr_SetNone(PyExc_MemoryError);
1091 goto error;
1092 }
1093 d->input_buffer_size = lzs->avail_in;
1094 }
1095
1096 /* Copy tail */
1097 memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1098 lzs->next_in = d->input_buffer;
1099 }
1100 }
1101
1102 return result;
1103
1104 error:
1105 Py_XDECREF(result);
1106 return NULL;
1107 }
1108
1109 /*[clinic input]
1110 _lzma.LZMADecompressor.decompress
1111
1112 data: Py_buffer
1113 max_length: Py_ssize_t=-1
1114
1115 Decompress *data*, returning uncompressed data as bytes.
1116
1117 If *max_length* is nonnegative, returns at most *max_length* bytes of
1118 decompressed data. If this limit is reached and further output can be
1119 produced, *self.needs_input* will be set to ``False``. In this case, the next
1120 call to *decompress()* may provide *data* as b'' to obtain more of the output.
1121
1122 If all of the input data was decompressed and returned (either because this
1123 was less than *max_length* bytes, or because *max_length* was negative),
1124 *self.needs_input* will be set to True.
1125
1126 Attempting to decompress data after the end of stream is reached raises an
1127 EOFError. Any data found after the end of the stream is ignored and saved in
1128 the unused_data attribute.
1129 [clinic start generated code]*/
1130
1131 static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor * self,Py_buffer * data,Py_ssize_t max_length)1132 _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1133 Py_ssize_t max_length)
1134 /*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
1135 {
1136 PyObject *result = NULL;
1137
1138 ACQUIRE_LOCK(self);
1139 if (self->eof)
1140 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1141 else
1142 result = decompress(self, data->buf, data->len, max_length);
1143 RELEASE_LOCK(self);
1144 return result;
1145 }
1146
1147 static int
Decompressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)1148 Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
1149 {
1150 lzma_filter filters[LZMA_FILTERS_MAX + 1];
1151 lzma_ret lzret;
1152
1153 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
1154 return -1;
1155 }
1156 lzret = lzma_raw_decoder(lzs, filters);
1157 free_filter_chain(filters);
1158 if (catch_lzma_error(state, lzret)) {
1159 return -1;
1160 }
1161 else {
1162 return 0;
1163 }
1164 }
1165
1166 /*[clinic input]
1167 _lzma.LZMADecompressor.__init__
1168
1169 format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1170 Specifies the container format of the input stream. If this is
1171 FORMAT_AUTO (the default), the decompressor will automatically detect
1172 whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with
1173 FORMAT_RAW cannot be autodetected.
1174
1175 memlimit: object = None
1176 Limit the amount of memory used by the decompressor. This will cause
1177 decompression to fail if the input cannot be decompressed within the
1178 given limit.
1179
1180 filters: object = None
1181 A custom filter chain. This argument is required for FORMAT_RAW, and
1182 not accepted with any other format. When provided, this should be a
1183 sequence of dicts, each indicating the ID and options for a single
1184 filter.
1185
1186 Create a decompressor object for decompressing data incrementally.
1187
1188 For one-shot decompression, use the decompress() function instead.
1189 [clinic start generated code]*/
1190
1191 static int
_lzma_LZMADecompressor___init___impl(Decompressor * self,int format,PyObject * memlimit,PyObject * filters)1192 _lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
1193 PyObject *memlimit, PyObject *filters)
1194 /*[clinic end generated code: output=3e1821f8aa36564c input=81fe684a6c2f8a27]*/
1195 {
1196 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1197 uint64_t memlimit_ = UINT64_MAX;
1198 lzma_ret lzret;
1199 _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
1200 assert(state != NULL);
1201
1202 if (memlimit != Py_None) {
1203 if (format == FORMAT_RAW) {
1204 PyErr_SetString(PyExc_ValueError,
1205 "Cannot specify memory limit with FORMAT_RAW");
1206 return -1;
1207 }
1208 memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1209 if (PyErr_Occurred()) {
1210 return -1;
1211 }
1212 }
1213
1214 if (format == FORMAT_RAW && filters == Py_None) {
1215 PyErr_SetString(PyExc_ValueError,
1216 "Must specify filters for FORMAT_RAW");
1217 return -1;
1218 } else if (format != FORMAT_RAW && filters != Py_None) {
1219 PyErr_SetString(PyExc_ValueError,
1220 "Cannot specify filters except with FORMAT_RAW");
1221 return -1;
1222 }
1223
1224 self->alloc.opaque = NULL;
1225 self->alloc.alloc = PyLzma_Malloc;
1226 self->alloc.free = PyLzma_Free;
1227 self->lzs.allocator = &self->alloc;
1228 self->lzs.next_in = NULL;
1229
1230 PyThread_type_lock lock = PyThread_allocate_lock();
1231 if (lock == NULL) {
1232 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1233 return -1;
1234 }
1235 if (self->lock != NULL) {
1236 PyThread_free_lock(self->lock);
1237 }
1238 self->lock = lock;
1239
1240 self->check = LZMA_CHECK_UNKNOWN;
1241 self->needs_input = 1;
1242 self->input_buffer = NULL;
1243 self->input_buffer_size = 0;
1244 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
1245 if (self->unused_data == NULL) {
1246 goto error;
1247 }
1248
1249 switch (format) {
1250 case FORMAT_AUTO:
1251 lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1252 if (catch_lzma_error(state, lzret)) {
1253 break;
1254 }
1255 return 0;
1256
1257 case FORMAT_XZ:
1258 lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1259 if (catch_lzma_error(state, lzret)) {
1260 break;
1261 }
1262 return 0;
1263
1264 case FORMAT_ALONE:
1265 self->check = LZMA_CHECK_NONE;
1266 lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1267 if (catch_lzma_error(state, lzret)) {
1268 break;
1269 }
1270 return 0;
1271
1272 case FORMAT_RAW:
1273 self->check = LZMA_CHECK_NONE;
1274 if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
1275 break;
1276 }
1277 return 0;
1278
1279 default:
1280 PyErr_Format(PyExc_ValueError,
1281 "Invalid container format: %d", format);
1282 break;
1283 }
1284
1285 error:
1286 Py_CLEAR(self->unused_data);
1287 PyThread_free_lock(self->lock);
1288 self->lock = NULL;
1289 return -1;
1290 }
1291
1292 static void
Decompressor_dealloc(Decompressor * self)1293 Decompressor_dealloc(Decompressor *self)
1294 {
1295 if(self->input_buffer != NULL)
1296 PyMem_Free(self->input_buffer);
1297
1298 lzma_end(&self->lzs);
1299 Py_CLEAR(self->unused_data);
1300 if (self->lock != NULL) {
1301 PyThread_free_lock(self->lock);
1302 }
1303 PyTypeObject *tp = Py_TYPE(self);
1304 tp->tp_free((PyObject *)self);
1305 Py_DECREF(tp);
1306 }
1307
1308 static int
Decompressor_traverse(Decompressor * self,visitproc visit,void * arg)1309 Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1310 {
1311 Py_VISIT(Py_TYPE(self));
1312 return 0;
1313 }
1314
1315 static PyMethodDef Decompressor_methods[] = {
1316 _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
1317 {NULL}
1318 };
1319
1320 PyDoc_STRVAR(Decompressor_check_doc,
1321 "ID of the integrity check used by the input stream.");
1322
1323 PyDoc_STRVAR(Decompressor_eof_doc,
1324 "True if the end-of-stream marker has been reached.");
1325
1326 PyDoc_STRVAR(Decompressor_needs_input_doc,
1327 "True if more input is needed before more decompressed data can be produced.");
1328
1329 PyDoc_STRVAR(Decompressor_unused_data_doc,
1330 "Data found after the end of the compressed stream.");
1331
1332 static PyMemberDef Decompressor_members[] = {
1333 {"check", T_INT, offsetof(Decompressor, check), READONLY,
1334 Decompressor_check_doc},
1335 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1336 Decompressor_eof_doc},
1337 {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1338 Decompressor_needs_input_doc},
1339 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1340 Decompressor_unused_data_doc},
1341 {NULL}
1342 };
1343
1344 static PyType_Slot lzma_decompressor_type_slots[] = {
1345 {Py_tp_dealloc, Decompressor_dealloc},
1346 {Py_tp_methods, Decompressor_methods},
1347 {Py_tp_init, _lzma_LZMADecompressor___init__},
1348 {Py_tp_new, PyType_GenericNew},
1349 {Py_tp_doc, (char *)_lzma_LZMADecompressor___init____doc__},
1350 {Py_tp_traverse, Decompressor_traverse},
1351 {Py_tp_members, Decompressor_members},
1352 {0, 0}
1353 };
1354
1355 static PyType_Spec lzma_decompressor_type_spec = {
1356 .name = "_lzma.LZMADecompressor",
1357 .basicsize = sizeof(Decompressor),
1358 // Calling PyType_GetModuleState() on a subclass is not safe.
1359 // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1360 // which prevents to create a subclass.
1361 // So calling PyType_GetModuleState() in this file is always safe.
1362 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
1363 .slots = lzma_decompressor_type_slots,
1364 };
1365
1366
1367 /* Module-level functions. */
1368
1369 /*[clinic input]
1370 _lzma.is_check_supported
1371 check_id: int
1372 /
1373
1374 Test whether the given integrity check is supported.
1375
1376 Always returns True for CHECK_NONE and CHECK_CRC32.
1377 [clinic start generated code]*/
1378
1379 static PyObject *
_lzma_is_check_supported_impl(PyObject * module,int check_id)1380 _lzma_is_check_supported_impl(PyObject *module, int check_id)
1381 /*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
1382 {
1383 return PyBool_FromLong(lzma_check_is_supported(check_id));
1384 }
1385
1386 PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1387 "_encode_filter_properties($module, filter, /)\n"
1388 "--\n"
1389 "\n"
1390 "Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1391 "\n"
1392 "The result does not include the filter ID itself, only the options.");
1393
1394 #define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF \
1395 {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
1396
1397 static PyObject *
1398 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
1399
1400 static PyObject *
_lzma__encode_filter_properties(PyObject * module,PyObject * arg)1401 _lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1402 {
1403 PyObject *return_value = NULL;
1404 lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1405 _lzma_state *state = get_lzma_state(module);
1406 assert(state != NULL);
1407 if (!lzma_filter_converter(state, arg, &filter)) {
1408 goto exit;
1409 }
1410 return_value = _lzma__encode_filter_properties_impl(module, filter);
1411
1412 exit:
1413 /* Cleanup for filter */
1414 if (filter.id != LZMA_VLI_UNKNOWN) {
1415 PyMem_Free(filter.options);
1416 }
1417
1418 return return_value;
1419 }
1420
1421 static PyObject *
_lzma__encode_filter_properties_impl(PyObject * module,lzma_filter filter)1422 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1423 {
1424 lzma_ret lzret;
1425 uint32_t encoded_size;
1426 PyObject *result = NULL;
1427 _lzma_state *state = get_lzma_state(module);
1428 assert(state != NULL);
1429
1430 lzret = lzma_properties_size(&encoded_size, &filter);
1431 if (catch_lzma_error(state, lzret))
1432 goto error;
1433
1434 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1435 if (result == NULL)
1436 goto error;
1437
1438 lzret = lzma_properties_encode(
1439 &filter, (uint8_t *)PyBytes_AS_STRING(result));
1440 if (catch_lzma_error(state, lzret)) {
1441 goto error;
1442 }
1443
1444 return result;
1445
1446 error:
1447 Py_XDECREF(result);
1448 return NULL;
1449 }
1450
1451
1452 /*[clinic input]
1453 _lzma._decode_filter_properties
1454 filter_id: lzma_vli
1455 encoded_props: Py_buffer
1456 /
1457
1458 Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1459
1460 The result does not include the filter ID itself, only the options.
1461 [clinic start generated code]*/
1462
1463 static PyObject *
_lzma__decode_filter_properties_impl(PyObject * module,lzma_vli filter_id,Py_buffer * encoded_props)1464 _lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
1465 Py_buffer *encoded_props)
1466 /*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
1467 {
1468 lzma_filter filter;
1469 lzma_ret lzret;
1470 PyObject *result = NULL;
1471 filter.id = filter_id;
1472 _lzma_state *state = get_lzma_state(module);
1473 assert(state != NULL);
1474
1475 lzret = lzma_properties_decode(
1476 &filter, NULL, encoded_props->buf, encoded_props->len);
1477 if (catch_lzma_error(state, lzret)) {
1478 return NULL;
1479 }
1480
1481 result = build_filter_spec(&filter);
1482
1483 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1484 allocated by lzma_properties_decode() using the default allocator. */
1485 free(filter.options);
1486 return result;
1487 }
1488
1489 /* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1490 would not work correctly on platforms with 32-bit longs. */
1491 static int
module_add_int_constant(PyObject * m,const char * name,long long value)1492 module_add_int_constant(PyObject *m, const char *name, long long value)
1493 {
1494 PyObject *o = PyLong_FromLongLong(value);
1495 if (o == NULL) {
1496 return -1;
1497 }
1498 if (PyModule_AddObject(m, name, o) == 0) {
1499 return 0;
1500 }
1501 Py_DECREF(o);
1502 return -1;
1503 }
1504
1505 static int
lzma_exec(PyObject * module)1506 lzma_exec(PyObject *module)
1507 {
1508 #define ADD_INT_PREFIX_MACRO(module, macro) \
1509 do { \
1510 if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) { \
1511 return -1; \
1512 } \
1513 } while(0)
1514
1515 #define ADD_INT_MACRO(module, macro) \
1516 do { \
1517 if (PyModule_AddIntMacro(module, macro) < 0) { \
1518 return -1; \
1519 } \
1520 } while (0)
1521
1522
1523 _lzma_state *state = get_lzma_state(module);
1524
1525 state->empty_tuple = PyTuple_New(0);
1526 if (state->empty_tuple == NULL) {
1527 return -1;
1528 }
1529
1530 ADD_INT_MACRO(module, FORMAT_AUTO);
1531 ADD_INT_MACRO(module, FORMAT_XZ);
1532 ADD_INT_MACRO(module, FORMAT_ALONE);
1533 ADD_INT_MACRO(module, FORMAT_RAW);
1534 ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1535 ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1536 ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1537 ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1538 ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1539 ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1540 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1541 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1542 ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1543 ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1544 ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1545 ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1546 ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1547 ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1548 ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1549 ADD_INT_PREFIX_MACRO(module, MF_HC3);
1550 ADD_INT_PREFIX_MACRO(module, MF_HC4);
1551 ADD_INT_PREFIX_MACRO(module, MF_BT2);
1552 ADD_INT_PREFIX_MACRO(module, MF_BT3);
1553 ADD_INT_PREFIX_MACRO(module, MF_BT4);
1554 ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1555 ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1556 ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1557 ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1558
1559 state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1560 if (state->error == NULL) {
1561 return -1;
1562 }
1563
1564 if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1565 return -1;
1566 }
1567
1568
1569 state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1570 &lzma_compressor_type_spec, NULL);
1571 if (state->lzma_compressor_type == NULL) {
1572 return -1;
1573 }
1574
1575 if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1576 return -1;
1577 }
1578
1579 state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1580 &lzma_decompressor_type_spec, NULL);
1581 if (state->lzma_decompressor_type == NULL) {
1582 return -1;
1583 }
1584
1585 if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1586 return -1;
1587 }
1588
1589 return 0;
1590 }
1591
1592 static PyMethodDef lzma_methods[] = {
1593 _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1594 _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1595 _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1596 {NULL}
1597 };
1598
1599 static PyModuleDef_Slot lzma_slots[] = {
1600 {Py_mod_exec, lzma_exec},
1601 {0, NULL}
1602 };
1603
1604 static int
lzma_traverse(PyObject * module,visitproc visit,void * arg)1605 lzma_traverse(PyObject *module, visitproc visit, void *arg)
1606 {
1607 _lzma_state *state = get_lzma_state(module);
1608 Py_VISIT(state->lzma_compressor_type);
1609 Py_VISIT(state->lzma_decompressor_type);
1610 Py_VISIT(state->error);
1611 Py_VISIT(state->empty_tuple);
1612 return 0;
1613 }
1614
1615 static int
lzma_clear(PyObject * module)1616 lzma_clear(PyObject *module)
1617 {
1618 _lzma_state *state = get_lzma_state(module);
1619 Py_CLEAR(state->lzma_compressor_type);
1620 Py_CLEAR(state->lzma_decompressor_type);
1621 Py_CLEAR(state->error);
1622 Py_CLEAR(state->empty_tuple);
1623 return 0;
1624 }
1625
1626 static void
lzma_free(void * module)1627 lzma_free(void *module)
1628 {
1629 lzma_clear((PyObject *)module);
1630 }
1631
1632 static PyModuleDef _lzmamodule = {
1633 PyModuleDef_HEAD_INIT,
1634 .m_name = "_lzma",
1635 .m_size = sizeof(_lzma_state),
1636 .m_methods = lzma_methods,
1637 .m_slots = lzma_slots,
1638 .m_traverse = lzma_traverse,
1639 .m_clear = lzma_clear,
1640 .m_free = lzma_free,
1641 };
1642
1643 PyMODINIT_FUNC
PyInit__lzma(void)1644 PyInit__lzma(void)
1645 {
1646 return PyModuleDef_Init(&_lzmamodule);
1647 }
1648