• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Xz.h - Xz interface
2 2023-04-13 : Igor Pavlov : Public domain */
3 
4 #ifndef ZIP7_INC_XZ_H
5 #define ZIP7_INC_XZ_H
6 
7 #include "Sha256.h"
8 #include "Delta.h"
9 
10 EXTERN_C_BEGIN
11 
12 #define XZ_ID_Subblock 1
13 #define XZ_ID_Delta 3
14 #define XZ_ID_X86   4
15 #define XZ_ID_PPC   5
16 #define XZ_ID_IA64  6
17 #define XZ_ID_ARM   7
18 #define XZ_ID_ARMT  8
19 #define XZ_ID_SPARC 9
20 #define XZ_ID_ARM64 0xa
21 #define XZ_ID_LZMA2 0x21
22 
23 unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value);
24 unsigned Xz_WriteVarInt(Byte *buf, UInt64 v);
25 
26 /* ---------- xz block ---------- */
27 
28 #define XZ_BLOCK_HEADER_SIZE_MAX 1024
29 
30 #define XZ_NUM_FILTERS_MAX 4
31 #define XZ_BF_NUM_FILTERS_MASK 3
32 #define XZ_BF_PACK_SIZE (1 << 6)
33 #define XZ_BF_UNPACK_SIZE (1 << 7)
34 
35 #define XZ_FILTER_PROPS_SIZE_MAX 20
36 
37 typedef struct
38 {
39   UInt64 id;
40   UInt32 propsSize;
41   Byte props[XZ_FILTER_PROPS_SIZE_MAX];
42 } CXzFilter;
43 
44 typedef struct
45 {
46   UInt64 packSize;
47   UInt64 unpackSize;
48   Byte flags;
49   CXzFilter filters[XZ_NUM_FILTERS_MAX];
50 } CXzBlock;
51 
52 #define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
53 #define XzBlock_HasPackSize(p)   (((p)->flags & XZ_BF_PACK_SIZE) != 0)
54 #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0)
55 #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
56 
57 SRes XzBlock_Parse(CXzBlock *p, const Byte *header);
58 SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes);
59 
60 /* ---------- xz stream ---------- */
61 
62 #define XZ_SIG_SIZE 6
63 #define XZ_FOOTER_SIG_SIZE 2
64 
65 extern const Byte XZ_SIG[XZ_SIG_SIZE];
66 
67 /*
68 extern const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE];
69 */
70 
71 #define XZ_FOOTER_SIG_0 'Y'
72 #define XZ_FOOTER_SIG_1 'Z'
73 
74 #define XZ_STREAM_FLAGS_SIZE 2
75 #define XZ_STREAM_CRC_SIZE 4
76 
77 #define XZ_STREAM_HEADER_SIZE (XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE)
78 #define XZ_STREAM_FOOTER_SIZE (XZ_FOOTER_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE + 4)
79 
80 #define XZ_CHECK_MASK 0xF
81 #define XZ_CHECK_NO 0
82 #define XZ_CHECK_CRC32 1
83 #define XZ_CHECK_CRC64 4
84 #define XZ_CHECK_SHA256 10
85 
86 typedef struct
87 {
88   unsigned mode;
89   UInt32 crc;
90   UInt64 crc64;
91   CSha256 sha;
92 } CXzCheck;
93 
94 void XzCheck_Init(CXzCheck *p, unsigned mode);
95 void XzCheck_Update(CXzCheck *p, const void *data, size_t size);
96 int XzCheck_Final(CXzCheck *p, Byte *digest);
97 
98 typedef UInt16 CXzStreamFlags;
99 
100 #define XzFlags_IsSupported(f) ((f) <= XZ_CHECK_MASK)
101 #define XzFlags_GetCheckType(f) ((f) & XZ_CHECK_MASK)
102 #define XzFlags_HasDataCrc32(f) (Xz_GetCheckType(f) == XZ_CHECK_CRC32)
103 unsigned XzFlags_GetCheckSize(CXzStreamFlags f);
104 
105 SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf);
106 SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream);
107 
108 typedef struct
109 {
110   UInt64 unpackSize;
111   UInt64 totalSize;
112 } CXzBlockSizes;
113 
114 typedef struct
115 {
116   CXzStreamFlags flags;
117   // Byte _pad[6];
118   size_t numBlocks;
119   CXzBlockSizes *blocks;
120   UInt64 startOffset;
121 } CXzStream;
122 
123 void Xz_Construct(CXzStream *p);
124 void Xz_Free(CXzStream *p, ISzAllocPtr alloc);
125 
126 #define XZ_SIZE_OVERFLOW ((UInt64)(Int64)-1)
127 
128 UInt64 Xz_GetUnpackSize(const CXzStream *p);
129 UInt64 Xz_GetPackSize(const CXzStream *p);
130 
131 typedef struct
132 {
133   size_t num;
134   size_t numAllocated;
135   CXzStream *streams;
136 } CXzs;
137 
138 void Xzs_Construct(CXzs *p);
139 void Xzs_Free(CXzs *p, ISzAllocPtr alloc);
140 SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc);
141 
142 UInt64 Xzs_GetNumBlocks(const CXzs *p);
143 UInt64 Xzs_GetUnpackSize(const CXzs *p);
144 
145 
146 // ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder
147 
148 typedef enum
149 {
150   CODER_STATUS_NOT_SPECIFIED,               /* use main error code instead */
151   CODER_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
152   CODER_STATUS_NOT_FINISHED,                /* stream was not finished */
153   CODER_STATUS_NEEDS_MORE_INPUT             /* you must provide more input bytes */
154 } ECoderStatus;
155 
156 
157 // ECoderFinishMode values are identical to ELzmaFinishMode
158 
159 typedef enum
160 {
161   CODER_FINISH_ANY,   /* finish at any point */
162   CODER_FINISH_END    /* block must be finished at the end */
163 } ECoderFinishMode;
164 
165 
166 typedef struct
167 {
168   void *p; // state object;
169   void (*Free)(void *p, ISzAllocPtr alloc);
170   SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc);
171   void (*Init)(void *p);
172   SRes (*Code2)(void *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
173       int srcWasFinished, ECoderFinishMode finishMode,
174       // int *wasFinished,
175       ECoderStatus *status);
176   SizeT (*Filter)(void *p, Byte *data, SizeT size);
177 } IStateCoder;
178 
179 
180 typedef struct
181 {
182   UInt32 methodId;
183   UInt32 delta;
184   UInt32 ip;
185   UInt32 X86_State;
186   Byte delta_State[DELTA_STATE_SIZE];
187 } CXzBcFilterStateBase;
188 
189 typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size);
190 
191 SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,
192     Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc);
193 
194 
195 #define MIXCODER_NUM_FILTERS_MAX 4
196 
197 typedef struct
198 {
199   ISzAllocPtr alloc;
200   Byte *buf;
201   unsigned numCoders;
202 
203   Byte *outBuf;
204   size_t outBufSize;
205   size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode)
206   BoolInt wasFinished;
207   SRes res;
208   ECoderStatus status;
209   // BoolInt SingleBufMode;
210 
211   int finished[MIXCODER_NUM_FILTERS_MAX - 1];
212   size_t pos[MIXCODER_NUM_FILTERS_MAX - 1];
213   size_t size[MIXCODER_NUM_FILTERS_MAX - 1];
214   UInt64 ids[MIXCODER_NUM_FILTERS_MAX];
215   SRes results[MIXCODER_NUM_FILTERS_MAX];
216   IStateCoder coders[MIXCODER_NUM_FILTERS_MAX];
217 } CMixCoder;
218 
219 
220 typedef enum
221 {
222   XZ_STATE_STREAM_HEADER,
223   XZ_STATE_STREAM_INDEX,
224   XZ_STATE_STREAM_INDEX_CRC,
225   XZ_STATE_STREAM_FOOTER,
226   XZ_STATE_STREAM_PADDING,
227   XZ_STATE_BLOCK_HEADER,
228   XZ_STATE_BLOCK,
229   XZ_STATE_BLOCK_FOOTER
230 } EXzState;
231 
232 
233 typedef struct
234 {
235   EXzState state;
236   UInt32 pos;
237   unsigned alignPos;
238   unsigned indexPreSize;
239 
240   CXzStreamFlags streamFlags;
241 
242   UInt32 blockHeaderSize;
243   UInt64 packSize;
244   UInt64 unpackSize;
245 
246   UInt64 numBlocks; // number of finished blocks in current stream
247   UInt64 indexSize;
248   UInt64 indexPos;
249   UInt64 padSize;
250 
251   UInt64 numStartedStreams;
252   UInt64 numFinishedStreams;
253   UInt64 numTotalBlocks;
254 
255   UInt32 crc;
256   CMixCoder decoder;
257   CXzBlock block;
258   CXzCheck check;
259   CSha256 sha;
260 
261   BoolInt parseMode;
262   BoolInt headerParsedOk;
263   BoolInt decodeToStreamSignature;
264   unsigned decodeOnlyOneBlock;
265 
266   Byte *outBuf;
267   size_t outBufSize;
268   size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked
269 
270   Byte shaDigest[SHA256_DIGEST_SIZE];
271   Byte buf[XZ_BLOCK_HEADER_SIZE_MAX];
272 } CXzUnpacker;
273 
274 /* alloc : aligned for cache line allocation is better */
275 void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc);
276 void XzUnpacker_Init(CXzUnpacker *p);
277 void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize);
278 void XzUnpacker_Free(CXzUnpacker *p);
279 
280 /*
281   XzUnpacker
282   The sequence for decoding functions:
283   {
284     XzUnpacker_Construct()
285     [Decoding_Calls]
286     XzUnpacker_Free()
287   }
288 
289   [Decoding_Calls]
290 
291   There are 3 types of interfaces for [Decoding_Calls] calls:
292 
293   Interface-1 : Partial output buffers:
294     {
295       XzUnpacker_Init()
296       for()
297       {
298         XzUnpacker_Code();
299       }
300       XzUnpacker_IsStreamWasFinished()
301     }
302 
303   Interface-2 : Direct output buffer:
304     Use it, if you know exact size of decoded data, and you need
305     whole xz unpacked data in one output buffer.
306     xz unpacker doesn't allocate additional buffer for lzma2 dictionary in that mode.
307     {
308       XzUnpacker_Init()
309       XzUnpacker_SetOutBufMode(); // to set output buffer and size
310       for()
311       {
312         XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code()
313       }
314       XzUnpacker_IsStreamWasFinished()
315     }
316 
317   Interface-3 : Direct output buffer : One call full decoding
318     It unpacks whole input buffer to output buffer in one call.
319     It uses Interface-2 internally.
320     {
321       XzUnpacker_CodeFull()
322       XzUnpacker_IsStreamWasFinished()
323     }
324 */
325 
326 /*
327 finishMode:
328   It has meaning only if the decoding reaches output limit (*destLen).
329   CODER_FINISH_ANY - use smallest number of input bytes
330   CODER_FINISH_END - read EndOfStream marker after decoding
331 
332 Returns:
333   SZ_OK
334     status:
335       CODER_STATUS_NOT_FINISHED,
336       CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases:
337          1) it needs more input data to finish current xz stream
338          2) xz stream was finished successfully. But the decoder supports multiple
339             concatented xz streams. So it expects more input data for new xz streams.
340          Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully.
341 
342   SZ_ERROR_MEM  - Memory allocation error
343   SZ_ERROR_DATA - Data error
344   SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
345   SZ_ERROR_CRC  - CRC error
346   // SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
347 
348   SZ_ERROR_NO_ARCHIVE - the error with xz Stream Header with one of the following reasons:
349      - xz Stream Signature failure
350      - CRC32 of xz Stream Header is failed
351      - The size of Stream padding is not multiple of four bytes.
352     It's possible to get that error, if xz stream was finished and the stream
353     contains some another data. In that case you can call XzUnpacker_GetExtraSize()
354     function to get real size of xz stream.
355 */
356 
357 
358 SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
359     const Byte *src, SizeT *srcLen, int srcFinished,
360     ECoderFinishMode finishMode, ECoderStatus *status);
361 
362 SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
363     const Byte *src, SizeT *srcLen,
364     ECoderFinishMode finishMode, ECoderStatus *status);
365 
366 /*
367 If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished()
368 after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code().
369 */
370 
371 BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
372 
373 /*
374 XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes,
375  if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state.
376 These bytes can be some data after xz archive, or
377 it can be start of new xz stream.
378 
379 Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of
380 xz stream in two cases, if XzUnpacker_Code() returns:
381   res == SZ_OK && status == CODER_STATUS_NEEDS_MORE_INPUT
382   res == SZ_ERROR_NO_ARCHIVE
383 */
384 
385 UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p);
386 
387 
388 /*
389   for random block decoding:
390     XzUnpacker_Init();
391     set CXzUnpacker::streamFlags
392     XzUnpacker_PrepareToRandomBlockDecoding()
393     loop
394     {
395       XzUnpacker_Code()
396       XzUnpacker_IsBlockFinished()
397     }
398 */
399 
400 void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p);
401 BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
402 
403 #define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags))
404 
405 
406 
407 
408 
409 
410 /* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */
411 
412 /*
413   if (CXzDecMtProps::numThreads > 1), the decoder can try to use
414   Multi-Threading. The decoder analyses xz block header, and if
415   there are pack size and unpack size values stored in xz block header,
416   the decoder reads compressed data of block to internal buffers,
417   and then it can start parallel decoding, if there are another blocks.
418   The decoder can switch back to Single-Thread decoding after some conditions.
419 
420   The sequence of calls for xz decoding with in/out Streams:
421   {
422     XzDecMt_Create()
423     XzDecMtProps_Init(XzDecMtProps) to set default values of properties
424     // then you can change some XzDecMtProps parameters with required values
425     // here you can set the number of threads and (memUseMax) - the maximum
426     Memory usage for multithreading decoding.
427     for()
428     {
429       XzDecMt_Decode() // one call per one file
430     }
431     XzDecMt_Destroy()
432   }
433 */
434 
435 
436 typedef struct
437 {
438   size_t inBufSize_ST;    // size of input buffer for Single-Thread decoding
439   size_t outStep_ST;      // size of output buffer for Single-Thread decoding
440   BoolInt ignoreErrors;   // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
441 
442   #ifndef Z7_ST
443   unsigned numThreads;    // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
444   size_t inBufSize_MT;    // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
445   size_t memUseMax;       // the limit of total memory usage for Multi-Thread decoding.
446                           // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer.
447   #endif
448 } CXzDecMtProps;
449 
450 void XzDecMtProps_Init(CXzDecMtProps *p);
451 
452 typedef struct CXzDecMt CXzDecMt;
453 typedef CXzDecMt * CXzDecMtHandle;
454 // Z7_DECLARE_HANDLE(CXzDecMtHandle)
455 
456 /*
457   alloc    : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
458   allocMid : for big allocations, aligned allocation is better
459 */
460 
461 CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid);
462 void XzDecMt_Destroy(CXzDecMtHandle p);
463 
464 
465 typedef struct
466 {
467   Byte UnpackSize_Defined;
468   Byte NumStreams_Defined;
469   Byte NumBlocks_Defined;
470 
471   Byte DataAfterEnd;      // there are some additional data after good xz streams, and that data is not new xz stream.
472   Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data
473 
474   UInt64 InSize;          // pack size processed. That value doesn't include the data after
475                           // end of xz stream, if that data was not correct
476   UInt64 OutSize;
477 
478   UInt64 NumStreams;
479   UInt64 NumBlocks;
480 
481   SRes DecodeRes;         // the error code of xz streams data decoding
482   SRes ReadRes;           // error code from ISeqInStream:Read()
483   SRes ProgressRes;       // error code from ICompressProgress:Progress()
484 
485   SRes CombinedRes;       // Combined result error code that shows main rusult
486                           // = S_OK, if there is no error.
487                           // but check also (DataAfterEnd) that can show additional minor errors.
488 
489   SRes CombinedRes_Type;  // = SZ_ERROR_READ,     if error from ISeqInStream
490                           // = SZ_ERROR_PROGRESS, if error from ICompressProgress
491                           // = SZ_ERROR_WRITE,    if error from ISeqOutStream
492                           // = SZ_ERROR_* codes for decoding
493 } CXzStatInfo;
494 
495 void XzStatInfo_Clear(CXzStatInfo *p);
496 
497 /*
498 
499 XzDecMt_Decode()
500 SRes: it's combined decoding result. It also is equal to stat->CombinedRes.
501 
502   SZ_OK               - no error
503                         check also output value in (stat->DataAfterEnd)
504                         that can show additional possible error
505 
506   SZ_ERROR_MEM        - Memory allocation error
507   SZ_ERROR_NO_ARCHIVE - is not xz archive
508   SZ_ERROR_ARCHIVE    - Headers error
509   SZ_ERROR_DATA       - Data Error
510   SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
511   SZ_ERROR_CRC        - CRC Error
512   SZ_ERROR_INPUT_EOF  - it needs more input data
513   SZ_ERROR_WRITE      - ISeqOutStream error
514   (SZ_ERROR_READ)     - ISeqInStream errors
515   (SZ_ERROR_PROGRESS) - ICompressProgress errors
516   // SZ_ERROR_THREAD     - error in multi-threading functions
517   MY_SRes_HRESULT_FROM_WRes(WRes_error) - error in multi-threading function
518 */
519 
520 SRes XzDecMt_Decode(CXzDecMtHandle p,
521     const CXzDecMtProps *props,
522     const UInt64 *outDataSize, // NULL means undefined
523     int finishMode,            // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished
524     ISeqOutStreamPtr outStream,
525     // Byte *outBuf, size_t *outBufSize,
526     ISeqInStreamPtr inStream,
527     // const Byte *inData, size_t inDataSize,
528     CXzStatInfo *stat,         // out: decoding results and statistics
529     int *isMT,                 // out: 0 means that ST (Single-Thread) version was used
530                                //      1 means that MT (Multi-Thread) version was used
531     ICompressProgressPtr progress);
532 
533 EXTERN_C_END
534 
535 #endif
536