• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Xz.h - Xz interface
2 2021-04-01 : Igor Pavlov : Public domain */
3 
4 #ifndef __XZ_H
5 #define __XZ_H
6 
7 #include "Sha256.h"
8 
9 EXTERN_C_BEGIN
10 
11 #define XZ_ID_Subblock 1
12 #define XZ_ID_Delta 3
13 #define XZ_ID_X86 4
14 #define XZ_ID_PPC 5
15 #define XZ_ID_IA64 6
16 #define XZ_ID_ARM 7
17 #define XZ_ID_ARMT 8
18 #define XZ_ID_SPARC 9
19 #define XZ_ID_LZMA2 0x21
20 
21 unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value);
22 unsigned Xz_WriteVarInt(Byte *buf, UInt64 v);
23 
24 /* ---------- xz block ---------- */
25 
26 #define XZ_BLOCK_HEADER_SIZE_MAX 1024
27 
28 #define XZ_NUM_FILTERS_MAX 4
29 #define XZ_BF_NUM_FILTERS_MASK 3
30 #define XZ_BF_PACK_SIZE (1 << 6)
31 #define XZ_BF_UNPACK_SIZE (1 << 7)
32 
33 #define XZ_FILTER_PROPS_SIZE_MAX 20
34 
35 typedef struct
36 {
37   UInt64 id;
38   UInt32 propsSize;
39   Byte props[XZ_FILTER_PROPS_SIZE_MAX];
40 } CXzFilter;
41 
42 typedef struct
43 {
44   UInt64 packSize;
45   UInt64 unpackSize;
46   Byte flags;
47   CXzFilter filters[XZ_NUM_FILTERS_MAX];
48 } CXzBlock;
49 
50 #define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
51 #define XzBlock_HasPackSize(p)   (((p)->flags & XZ_BF_PACK_SIZE) != 0)
52 #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0)
53 #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
54 
55 SRes XzBlock_Parse(CXzBlock *p, const Byte *header);
56 SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes);
57 
58 /* ---------- xz stream ---------- */
59 
60 #define XZ_SIG_SIZE 6
61 #define XZ_FOOTER_SIG_SIZE 2
62 
63 extern const Byte XZ_SIG[XZ_SIG_SIZE];
64 
65 /*
66 extern const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE];
67 */
68 
69 #define XZ_FOOTER_SIG_0 'Y'
70 #define XZ_FOOTER_SIG_1 'Z'
71 
72 #define XZ_STREAM_FLAGS_SIZE 2
73 #define XZ_STREAM_CRC_SIZE 4
74 
75 #define XZ_STREAM_HEADER_SIZE (XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE)
76 #define XZ_STREAM_FOOTER_SIZE (XZ_FOOTER_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE + 4)
77 
78 #define XZ_CHECK_MASK 0xF
79 #define XZ_CHECK_NO 0
80 #define XZ_CHECK_CRC32 1
81 #define XZ_CHECK_CRC64 4
82 #define XZ_CHECK_SHA256 10
83 
84 typedef struct
85 {
86   unsigned mode;
87   UInt32 crc;
88   UInt64 crc64;
89   CSha256 sha;
90 } CXzCheck;
91 
92 void XzCheck_Init(CXzCheck *p, unsigned mode);
93 void XzCheck_Update(CXzCheck *p, const void *data, size_t size);
94 int XzCheck_Final(CXzCheck *p, Byte *digest);
95 
96 typedef UInt16 CXzStreamFlags;
97 
98 #define XzFlags_IsSupported(f) ((f) <= XZ_CHECK_MASK)
99 #define XzFlags_GetCheckType(f) ((f) & XZ_CHECK_MASK)
100 #define XzFlags_HasDataCrc32(f) (Xz_GetCheckType(f) == XZ_CHECK_CRC32)
101 unsigned XzFlags_GetCheckSize(CXzStreamFlags f);
102 
103 SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf);
104 SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream);
105 
106 typedef struct
107 {
108   UInt64 unpackSize;
109   UInt64 totalSize;
110 } CXzBlockSizes;
111 
112 typedef struct
113 {
114   CXzStreamFlags flags;
115   size_t numBlocks;
116   CXzBlockSizes *blocks;
117   UInt64 startOffset;
118 } CXzStream;
119 
120 void Xz_Construct(CXzStream *p);
121 void Xz_Free(CXzStream *p, ISzAllocPtr alloc);
122 
123 #define XZ_SIZE_OVERFLOW ((UInt64)(Int64)-1)
124 
125 UInt64 Xz_GetUnpackSize(const CXzStream *p);
126 UInt64 Xz_GetPackSize(const CXzStream *p);
127 
128 typedef struct
129 {
130   size_t num;
131   size_t numAllocated;
132   CXzStream *streams;
133 } CXzs;
134 
135 void Xzs_Construct(CXzs *p);
136 void Xzs_Free(CXzs *p, ISzAllocPtr alloc);
137 SRes Xzs_ReadBackward(CXzs *p, ILookInStream *inStream, Int64 *startOffset, ICompressProgress *progress, ISzAllocPtr alloc);
138 
139 UInt64 Xzs_GetNumBlocks(const CXzs *p);
140 UInt64 Xzs_GetUnpackSize(const CXzs *p);
141 
142 
143 // ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder
144 
145 typedef enum
146 {
147   CODER_STATUS_NOT_SPECIFIED,               /* use main error code instead */
148   CODER_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
149   CODER_STATUS_NOT_FINISHED,                /* stream was not finished */
150   CODER_STATUS_NEEDS_MORE_INPUT             /* you must provide more input bytes */
151 } ECoderStatus;
152 
153 
154 // ECoderFinishMode values are identical to ELzmaFinishMode
155 
156 typedef enum
157 {
158   CODER_FINISH_ANY,   /* finish at any point */
159   CODER_FINISH_END    /* block must be finished at the end */
160 } ECoderFinishMode;
161 
162 
163 typedef struct _IStateCoder
164 {
165   void *p;
166   void (*Free)(void *p, ISzAllocPtr alloc);
167   SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc);
168   void (*Init)(void *p);
169   SRes (*Code2)(void *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
170       int srcWasFinished, ECoderFinishMode finishMode,
171       // int *wasFinished,
172       ECoderStatus *status);
173   SizeT (*Filter)(void *p, Byte *data, SizeT size);
174 } IStateCoder;
175 
176 
177 
178 #define MIXCODER_NUM_FILTERS_MAX 4
179 
180 typedef struct
181 {
182   ISzAllocPtr alloc;
183   Byte *buf;
184   unsigned numCoders;
185 
186   Byte *outBuf;
187   size_t outBufSize;
188   size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode)
189   BoolInt wasFinished;
190   SRes res;
191   ECoderStatus status;
192   // BoolInt SingleBufMode;
193 
194   int finished[MIXCODER_NUM_FILTERS_MAX - 1];
195   size_t pos[MIXCODER_NUM_FILTERS_MAX - 1];
196   size_t size[MIXCODER_NUM_FILTERS_MAX - 1];
197   UInt64 ids[MIXCODER_NUM_FILTERS_MAX];
198   SRes results[MIXCODER_NUM_FILTERS_MAX];
199   IStateCoder coders[MIXCODER_NUM_FILTERS_MAX];
200 } CMixCoder;
201 
202 
203 typedef enum
204 {
205   XZ_STATE_STREAM_HEADER,
206   XZ_STATE_STREAM_INDEX,
207   XZ_STATE_STREAM_INDEX_CRC,
208   XZ_STATE_STREAM_FOOTER,
209   XZ_STATE_STREAM_PADDING,
210   XZ_STATE_BLOCK_HEADER,
211   XZ_STATE_BLOCK,
212   XZ_STATE_BLOCK_FOOTER
213 } EXzState;
214 
215 
216 typedef struct
217 {
218   EXzState state;
219   UInt32 pos;
220   unsigned alignPos;
221   unsigned indexPreSize;
222 
223   CXzStreamFlags streamFlags;
224 
225   UInt32 blockHeaderSize;
226   UInt64 packSize;
227   UInt64 unpackSize;
228 
229   UInt64 numBlocks; // number of finished blocks in current stream
230   UInt64 indexSize;
231   UInt64 indexPos;
232   UInt64 padSize;
233 
234   UInt64 numStartedStreams;
235   UInt64 numFinishedStreams;
236   UInt64 numTotalBlocks;
237 
238   UInt32 crc;
239   CMixCoder decoder;
240   CXzBlock block;
241   CXzCheck check;
242   CSha256 sha;
243 
244   BoolInt parseMode;
245   BoolInt headerParsedOk;
246   BoolInt decodeToStreamSignature;
247   unsigned decodeOnlyOneBlock;
248 
249   Byte *outBuf;
250   size_t outBufSize;
251   size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked
252 
253   Byte shaDigest[SHA256_DIGEST_SIZE];
254   Byte buf[XZ_BLOCK_HEADER_SIZE_MAX];
255 } CXzUnpacker;
256 
257 /* alloc : aligned for cache line allocation is better */
258 void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc);
259 void XzUnpacker_Init(CXzUnpacker *p);
260 void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize);
261 void XzUnpacker_Free(CXzUnpacker *p);
262 
263 /*
264   XzUnpacker
265   The sequence for decoding functions:
266   {
267     XzUnpacker_Construct()
268     [Decoding_Calls]
269     XzUnpacker_Free()
270   }
271 
272   [Decoding_Calls]
273 
274   There are 3 types of interfaces for [Decoding_Calls] calls:
275 
276   Interface-1 : Partial output buffers:
277     {
278       XzUnpacker_Init()
279       for()
280       {
281         XzUnpacker_Code();
282       }
283       XzUnpacker_IsStreamWasFinished()
284     }
285 
286   Interface-2 : Direct output buffer:
287     Use it, if you know exact size of decoded data, and you need
288     whole xz unpacked data in one output buffer.
289     xz unpacker doesn't allocate additional buffer for lzma2 dictionary in that mode.
290     {
291       XzUnpacker_Init()
292       XzUnpacker_SetOutBufMode(); // to set output buffer and size
293       for()
294       {
295         XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code()
296       }
297       XzUnpacker_IsStreamWasFinished()
298     }
299 
300   Interface-3 : Direct output buffer : One call full decoding
301     It unpacks whole input buffer to output buffer in one call.
302     It uses Interface-2 internally.
303     {
304       XzUnpacker_CodeFull()
305       XzUnpacker_IsStreamWasFinished()
306     }
307 */
308 
309 /*
310 finishMode:
311   It has meaning only if the decoding reaches output limit (*destLen).
312   CODER_FINISH_ANY - use smallest number of input bytes
313   CODER_FINISH_END - read EndOfStream marker after decoding
314 
315 Returns:
316   SZ_OK
317     status:
318       CODER_STATUS_NOT_FINISHED,
319       CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases:
320          1) it needs more input data to finish current xz stream
321          2) xz stream was finished successfully. But the decoder supports multiple
322             concatented xz streams. So it expects more input data for new xz streams.
323          Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully.
324 
325   SZ_ERROR_MEM  - Memory allocation error
326   SZ_ERROR_DATA - Data error
327   SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
328   SZ_ERROR_CRC  - CRC error
329   // SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
330 
331   SZ_ERROR_NO_ARCHIVE - the error with xz Stream Header with one of the following reasons:
332      - xz Stream Signature failure
333      - CRC32 of xz Stream Header is failed
334      - The size of Stream padding is not multiple of four bytes.
335     It's possible to get that error, if xz stream was finished and the stream
336     contains some another data. In that case you can call XzUnpacker_GetExtraSize()
337     function to get real size of xz stream.
338 */
339 
340 
341 SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
342     const Byte *src, SizeT *srcLen, int srcFinished,
343     ECoderFinishMode finishMode, ECoderStatus *status);
344 
345 SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
346     const Byte *src, SizeT *srcLen,
347     ECoderFinishMode finishMode, ECoderStatus *status);
348 
349 /*
350 If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished()
351 after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code().
352 */
353 
354 BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
355 
356 /*
357 XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes,
358  if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state.
359 These bytes can be some data after xz archive, or
360 it can be start of new xz stream.
361 
362 Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of
363 xz stream in two cases, if XzUnpacker_Code() returns:
364   res == SZ_OK && status == CODER_STATUS_NEEDS_MORE_INPUT
365   res == SZ_ERROR_NO_ARCHIVE
366 */
367 
368 UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p);
369 
370 
371 /*
372   for random block decoding:
373     XzUnpacker_Init();
374     set CXzUnpacker::streamFlags
375     XzUnpacker_PrepareToRandomBlockDecoding()
376     loop
377     {
378       XzUnpacker_Code()
379       XzUnpacker_IsBlockFinished()
380     }
381 */
382 
383 void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p);
384 BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
385 
386 #define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags))
387 
388 
389 
390 
391 
392 
393 /* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */
394 
395 /*
396   if (CXzDecMtProps::numThreads > 1), the decoder can try to use
397   Multi-Threading. The decoder analyses xz block header, and if
398   there are pack size and unpack size values stored in xz block header,
399   the decoder reads compressed data of block to internal buffers,
400   and then it can start parallel decoding, if there are another blocks.
401   The decoder can switch back to Single-Thread decoding after some conditions.
402 
403   The sequence of calls for xz decoding with in/out Streams:
404   {
405     XzDecMt_Create()
406     XzDecMtProps_Init(XzDecMtProps) to set default values of properties
407     // then you can change some XzDecMtProps parameters with required values
408     // here you can set the number of threads and (memUseMax) - the maximum
409     Memory usage for multithreading decoding.
410     for()
411     {
412       XzDecMt_Decode() // one call per one file
413     }
414     XzDecMt_Destroy()
415   }
416 */
417 
418 
419 typedef struct
420 {
421   size_t inBufSize_ST;    // size of input buffer for Single-Thread decoding
422   size_t outStep_ST;      // size of output buffer for Single-Thread decoding
423   BoolInt ignoreErrors;   // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
424 
425   #ifndef _7ZIP_ST
426   unsigned numThreads;    // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
427   size_t inBufSize_MT;    // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
428   size_t memUseMax;       // the limit of total memory usage for Multi-Thread decoding.
429                           // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer.
430   #endif
431 } CXzDecMtProps;
432 
433 void XzDecMtProps_Init(CXzDecMtProps *p);
434 
435 
436 typedef void * CXzDecMtHandle;
437 
438 /*
439   alloc    : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
440   allocMid : for big allocations, aligned allocation is better
441 */
442 
443 CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid);
444 void XzDecMt_Destroy(CXzDecMtHandle p);
445 
446 
447 typedef struct
448 {
449   Byte UnpackSize_Defined;
450   Byte NumStreams_Defined;
451   Byte NumBlocks_Defined;
452 
453   Byte DataAfterEnd;      // there are some additional data after good xz streams, and that data is not new xz stream.
454   Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data
455 
456   UInt64 InSize;          // pack size processed. That value doesn't include the data after
457                           // end of xz stream, if that data was not correct
458   UInt64 OutSize;
459 
460   UInt64 NumStreams;
461   UInt64 NumBlocks;
462 
463   SRes DecodeRes;         // the error code of xz streams data decoding
464   SRes ReadRes;           // error code from ISeqInStream:Read()
465   SRes ProgressRes;       // error code from ICompressProgress:Progress()
466 
467   SRes CombinedRes;       // Combined result error code that shows main rusult
468                           // = S_OK, if there is no error.
469                           // but check also (DataAfterEnd) that can show additional minor errors.
470 
471   SRes CombinedRes_Type;  // = SZ_ERROR_READ,     if error from ISeqInStream
472                           // = SZ_ERROR_PROGRESS, if error from ICompressProgress
473                           // = SZ_ERROR_WRITE,    if error from ISeqOutStream
474                           // = SZ_ERROR_* codes for decoding
475 } CXzStatInfo;
476 
477 void XzStatInfo_Clear(CXzStatInfo *p);
478 
479 /*
480 
481 XzDecMt_Decode()
482 SRes: it's combined decoding result. It also is equal to stat->CombinedRes.
483 
484   SZ_OK               - no error
485                         check also output value in (stat->DataAfterEnd)
486                         that can show additional possible error
487 
488   SZ_ERROR_MEM        - Memory allocation error
489   SZ_ERROR_NO_ARCHIVE - is not xz archive
490   SZ_ERROR_ARCHIVE    - Headers error
491   SZ_ERROR_DATA       - Data Error
492   SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
493   SZ_ERROR_CRC        - CRC Error
494   SZ_ERROR_INPUT_EOF  - it needs more input data
495   SZ_ERROR_WRITE      - ISeqOutStream error
496   (SZ_ERROR_READ)     - ISeqInStream errors
497   (SZ_ERROR_PROGRESS) - ICompressProgress errors
498   // SZ_ERROR_THREAD     - error in multi-threading functions
499   MY_SRes_HRESULT_FROM_WRes(WRes_error) - error in multi-threading function
500 */
501 
502 SRes XzDecMt_Decode(CXzDecMtHandle p,
503     const CXzDecMtProps *props,
504     const UInt64 *outDataSize, // NULL means undefined
505     int finishMode,            // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished
506     ISeqOutStream *outStream,
507     // Byte *outBuf, size_t *outBufSize,
508     ISeqInStream *inStream,
509     // const Byte *inData, size_t inDataSize,
510     CXzStatInfo *stat,         // out: decoding results and statistics
511     int *isMT,                 // out: 0 means that ST (Single-Thread) version was used
512                                //      1 means that MT (Multi-Thread) version was used
513     ICompressProgress *progress);
514 
515 EXTERN_C_END
516 
517 #endif
518