1 /* Xz.h - Xz interface 2 2023-04-13 : Igor Pavlov : Public domain */ 3 4 #ifndef ZIP7_INC_XZ_H 5 #define ZIP7_INC_XZ_H 6 7 #include "Sha256.h" 8 #include "Delta.h" 9 10 EXTERN_C_BEGIN 11 12 #define XZ_ID_Subblock 1 13 #define XZ_ID_Delta 3 14 #define XZ_ID_X86 4 15 #define XZ_ID_PPC 5 16 #define XZ_ID_IA64 6 17 #define XZ_ID_ARM 7 18 #define XZ_ID_ARMT 8 19 #define XZ_ID_SPARC 9 20 #define XZ_ID_ARM64 0xa 21 #define XZ_ID_LZMA2 0x21 22 23 unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value); 24 unsigned Xz_WriteVarInt(Byte *buf, UInt64 v); 25 26 /* ---------- xz block ---------- */ 27 28 #define XZ_BLOCK_HEADER_SIZE_MAX 1024 29 30 #define XZ_NUM_FILTERS_MAX 4 31 #define XZ_BF_NUM_FILTERS_MASK 3 32 #define XZ_BF_PACK_SIZE (1 << 6) 33 #define XZ_BF_UNPACK_SIZE (1 << 7) 34 35 #define XZ_FILTER_PROPS_SIZE_MAX 20 36 37 typedef struct 38 { 39 UInt64 id; 40 UInt32 propsSize; 41 Byte props[XZ_FILTER_PROPS_SIZE_MAX]; 42 } CXzFilter; 43 44 typedef struct 45 { 46 UInt64 packSize; 47 UInt64 unpackSize; 48 Byte flags; 49 CXzFilter filters[XZ_NUM_FILTERS_MAX]; 50 } CXzBlock; 51 52 #define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1) 53 #define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0) 54 #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0) 55 #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0) 56 57 SRes XzBlock_Parse(CXzBlock *p, const Byte *header); 58 SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes); 59 60 /* ---------- xz stream ---------- */ 61 62 #define XZ_SIG_SIZE 6 63 #define XZ_FOOTER_SIG_SIZE 2 64 65 extern const Byte XZ_SIG[XZ_SIG_SIZE]; 66 67 /* 68 extern const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE]; 69 */ 70 71 #define XZ_FOOTER_SIG_0 'Y' 72 #define XZ_FOOTER_SIG_1 'Z' 73 74 #define XZ_STREAM_FLAGS_SIZE 2 75 #define XZ_STREAM_CRC_SIZE 4 76 77 #define XZ_STREAM_HEADER_SIZE (XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE) 78 #define XZ_STREAM_FOOTER_SIZE (XZ_FOOTER_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE + 4) 79 80 #define XZ_CHECK_MASK 0xF 81 #define XZ_CHECK_NO 0 82 #define XZ_CHECK_CRC32 1 83 #define XZ_CHECK_CRC64 4 84 #define XZ_CHECK_SHA256 10 85 86 typedef struct 87 { 88 unsigned mode; 89 UInt32 crc; 90 UInt64 crc64; 91 CSha256 sha; 92 } CXzCheck; 93 94 void XzCheck_Init(CXzCheck *p, unsigned mode); 95 void XzCheck_Update(CXzCheck *p, const void *data, size_t size); 96 int XzCheck_Final(CXzCheck *p, Byte *digest); 97 98 typedef UInt16 CXzStreamFlags; 99 100 #define XzFlags_IsSupported(f) ((f) <= XZ_CHECK_MASK) 101 #define XzFlags_GetCheckType(f) ((f) & XZ_CHECK_MASK) 102 #define XzFlags_HasDataCrc32(f) (Xz_GetCheckType(f) == XZ_CHECK_CRC32) 103 unsigned XzFlags_GetCheckSize(CXzStreamFlags f); 104 105 SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf); 106 SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream); 107 108 typedef struct 109 { 110 UInt64 unpackSize; 111 UInt64 totalSize; 112 } CXzBlockSizes; 113 114 typedef struct 115 { 116 CXzStreamFlags flags; 117 // Byte _pad[6]; 118 size_t numBlocks; 119 CXzBlockSizes *blocks; 120 UInt64 startOffset; 121 } CXzStream; 122 123 void Xz_Construct(CXzStream *p); 124 void Xz_Free(CXzStream *p, ISzAllocPtr alloc); 125 126 #define XZ_SIZE_OVERFLOW ((UInt64)(Int64)-1) 127 128 UInt64 Xz_GetUnpackSize(const CXzStream *p); 129 UInt64 Xz_GetPackSize(const CXzStream *p); 130 131 typedef struct 132 { 133 size_t num; 134 size_t numAllocated; 135 CXzStream *streams; 136 } CXzs; 137 138 void Xzs_Construct(CXzs *p); 139 void Xzs_Free(CXzs *p, ISzAllocPtr alloc); 140 SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc); 141 142 UInt64 Xzs_GetNumBlocks(const CXzs *p); 143 UInt64 Xzs_GetUnpackSize(const CXzs *p); 144 145 146 // ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder 147 148 typedef enum 149 { 150 CODER_STATUS_NOT_SPECIFIED, /* use main error code instead */ 151 CODER_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ 152 CODER_STATUS_NOT_FINISHED, /* stream was not finished */ 153 CODER_STATUS_NEEDS_MORE_INPUT /* you must provide more input bytes */ 154 } ECoderStatus; 155 156 157 // ECoderFinishMode values are identical to ELzmaFinishMode 158 159 typedef enum 160 { 161 CODER_FINISH_ANY, /* finish at any point */ 162 CODER_FINISH_END /* block must be finished at the end */ 163 } ECoderFinishMode; 164 165 166 typedef struct 167 { 168 void *p; // state object; 169 void (*Free)(void *p, ISzAllocPtr alloc); 170 SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc); 171 void (*Init)(void *p); 172 SRes (*Code2)(void *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, 173 int srcWasFinished, ECoderFinishMode finishMode, 174 // int *wasFinished, 175 ECoderStatus *status); 176 SizeT (*Filter)(void *p, Byte *data, SizeT size); 177 } IStateCoder; 178 179 180 typedef struct 181 { 182 UInt32 methodId; 183 UInt32 delta; 184 UInt32 ip; 185 UInt32 X86_State; 186 Byte delta_State[DELTA_STATE_SIZE]; 187 } CXzBcFilterStateBase; 188 189 typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size); 190 191 SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, 192 Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc); 193 194 195 #define MIXCODER_NUM_FILTERS_MAX 4 196 197 typedef struct 198 { 199 ISzAllocPtr alloc; 200 Byte *buf; 201 unsigned numCoders; 202 203 Byte *outBuf; 204 size_t outBufSize; 205 size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode) 206 BoolInt wasFinished; 207 SRes res; 208 ECoderStatus status; 209 // BoolInt SingleBufMode; 210 211 int finished[MIXCODER_NUM_FILTERS_MAX - 1]; 212 size_t pos[MIXCODER_NUM_FILTERS_MAX - 1]; 213 size_t size[MIXCODER_NUM_FILTERS_MAX - 1]; 214 UInt64 ids[MIXCODER_NUM_FILTERS_MAX]; 215 SRes results[MIXCODER_NUM_FILTERS_MAX]; 216 IStateCoder coders[MIXCODER_NUM_FILTERS_MAX]; 217 } CMixCoder; 218 219 220 typedef enum 221 { 222 XZ_STATE_STREAM_HEADER, 223 XZ_STATE_STREAM_INDEX, 224 XZ_STATE_STREAM_INDEX_CRC, 225 XZ_STATE_STREAM_FOOTER, 226 XZ_STATE_STREAM_PADDING, 227 XZ_STATE_BLOCK_HEADER, 228 XZ_STATE_BLOCK, 229 XZ_STATE_BLOCK_FOOTER 230 } EXzState; 231 232 233 typedef struct 234 { 235 EXzState state; 236 UInt32 pos; 237 unsigned alignPos; 238 unsigned indexPreSize; 239 240 CXzStreamFlags streamFlags; 241 242 UInt32 blockHeaderSize; 243 UInt64 packSize; 244 UInt64 unpackSize; 245 246 UInt64 numBlocks; // number of finished blocks in current stream 247 UInt64 indexSize; 248 UInt64 indexPos; 249 UInt64 padSize; 250 251 UInt64 numStartedStreams; 252 UInt64 numFinishedStreams; 253 UInt64 numTotalBlocks; 254 255 UInt32 crc; 256 CMixCoder decoder; 257 CXzBlock block; 258 CXzCheck check; 259 CSha256 sha; 260 261 BoolInt parseMode; 262 BoolInt headerParsedOk; 263 BoolInt decodeToStreamSignature; 264 unsigned decodeOnlyOneBlock; 265 266 Byte *outBuf; 267 size_t outBufSize; 268 size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked 269 270 Byte shaDigest[SHA256_DIGEST_SIZE]; 271 Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; 272 } CXzUnpacker; 273 274 /* alloc : aligned for cache line allocation is better */ 275 void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc); 276 void XzUnpacker_Init(CXzUnpacker *p); 277 void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize); 278 void XzUnpacker_Free(CXzUnpacker *p); 279 280 /* 281 XzUnpacker 282 The sequence for decoding functions: 283 { 284 XzUnpacker_Construct() 285 [Decoding_Calls] 286 XzUnpacker_Free() 287 } 288 289 [Decoding_Calls] 290 291 There are 3 types of interfaces for [Decoding_Calls] calls: 292 293 Interface-1 : Partial output buffers: 294 { 295 XzUnpacker_Init() 296 for() 297 { 298 XzUnpacker_Code(); 299 } 300 XzUnpacker_IsStreamWasFinished() 301 } 302 303 Interface-2 : Direct output buffer: 304 Use it, if you know exact size of decoded data, and you need 305 whole xz unpacked data in one output buffer. 306 xz unpacker doesn't allocate additional buffer for lzma2 dictionary in that mode. 307 { 308 XzUnpacker_Init() 309 XzUnpacker_SetOutBufMode(); // to set output buffer and size 310 for() 311 { 312 XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code() 313 } 314 XzUnpacker_IsStreamWasFinished() 315 } 316 317 Interface-3 : Direct output buffer : One call full decoding 318 It unpacks whole input buffer to output buffer in one call. 319 It uses Interface-2 internally. 320 { 321 XzUnpacker_CodeFull() 322 XzUnpacker_IsStreamWasFinished() 323 } 324 */ 325 326 /* 327 finishMode: 328 It has meaning only if the decoding reaches output limit (*destLen). 329 CODER_FINISH_ANY - use smallest number of input bytes 330 CODER_FINISH_END - read EndOfStream marker after decoding 331 332 Returns: 333 SZ_OK 334 status: 335 CODER_STATUS_NOT_FINISHED, 336 CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases: 337 1) it needs more input data to finish current xz stream 338 2) xz stream was finished successfully. But the decoder supports multiple 339 concatented xz streams. So it expects more input data for new xz streams. 340 Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully. 341 342 SZ_ERROR_MEM - Memory allocation error 343 SZ_ERROR_DATA - Data error 344 SZ_ERROR_UNSUPPORTED - Unsupported method or method properties 345 SZ_ERROR_CRC - CRC error 346 // SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). 347 348 SZ_ERROR_NO_ARCHIVE - the error with xz Stream Header with one of the following reasons: 349 - xz Stream Signature failure 350 - CRC32 of xz Stream Header is failed 351 - The size of Stream padding is not multiple of four bytes. 352 It's possible to get that error, if xz stream was finished and the stream 353 contains some another data. In that case you can call XzUnpacker_GetExtraSize() 354 function to get real size of xz stream. 355 */ 356 357 358 SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, 359 const Byte *src, SizeT *srcLen, int srcFinished, 360 ECoderFinishMode finishMode, ECoderStatus *status); 361 362 SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen, 363 const Byte *src, SizeT *srcLen, 364 ECoderFinishMode finishMode, ECoderStatus *status); 365 366 /* 367 If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished() 368 after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code(). 369 */ 370 371 BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p); 372 373 /* 374 XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes, 375 if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state. 376 These bytes can be some data after xz archive, or 377 it can be start of new xz stream. 378 379 Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of 380 xz stream in two cases, if XzUnpacker_Code() returns: 381 res == SZ_OK && status == CODER_STATUS_NEEDS_MORE_INPUT 382 res == SZ_ERROR_NO_ARCHIVE 383 */ 384 385 UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p); 386 387 388 /* 389 for random block decoding: 390 XzUnpacker_Init(); 391 set CXzUnpacker::streamFlags 392 XzUnpacker_PrepareToRandomBlockDecoding() 393 loop 394 { 395 XzUnpacker_Code() 396 XzUnpacker_IsBlockFinished() 397 } 398 */ 399 400 void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p); 401 BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p); 402 403 #define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags)) 404 405 406 407 408 409 410 /* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */ 411 412 /* 413 if (CXzDecMtProps::numThreads > 1), the decoder can try to use 414 Multi-Threading. The decoder analyses xz block header, and if 415 there are pack size and unpack size values stored in xz block header, 416 the decoder reads compressed data of block to internal buffers, 417 and then it can start parallel decoding, if there are another blocks. 418 The decoder can switch back to Single-Thread decoding after some conditions. 419 420 The sequence of calls for xz decoding with in/out Streams: 421 { 422 XzDecMt_Create() 423 XzDecMtProps_Init(XzDecMtProps) to set default values of properties 424 // then you can change some XzDecMtProps parameters with required values 425 // here you can set the number of threads and (memUseMax) - the maximum 426 Memory usage for multithreading decoding. 427 for() 428 { 429 XzDecMt_Decode() // one call per one file 430 } 431 XzDecMt_Destroy() 432 } 433 */ 434 435 436 typedef struct 437 { 438 size_t inBufSize_ST; // size of input buffer for Single-Thread decoding 439 size_t outStep_ST; // size of output buffer for Single-Thread decoding 440 BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data. 441 442 #ifndef Z7_ST 443 unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding 444 size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created 445 size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding. 446 // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer. 447 #endif 448 } CXzDecMtProps; 449 450 void XzDecMtProps_Init(CXzDecMtProps *p); 451 452 typedef struct CXzDecMt CXzDecMt; 453 typedef CXzDecMt * CXzDecMtHandle; 454 // Z7_DECLARE_HANDLE(CXzDecMtHandle) 455 456 /* 457 alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc). 458 allocMid : for big allocations, aligned allocation is better 459 */ 460 461 CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid); 462 void XzDecMt_Destroy(CXzDecMtHandle p); 463 464 465 typedef struct 466 { 467 Byte UnpackSize_Defined; 468 Byte NumStreams_Defined; 469 Byte NumBlocks_Defined; 470 471 Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream. 472 Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data 473 474 UInt64 InSize; // pack size processed. That value doesn't include the data after 475 // end of xz stream, if that data was not correct 476 UInt64 OutSize; 477 478 UInt64 NumStreams; 479 UInt64 NumBlocks; 480 481 SRes DecodeRes; // the error code of xz streams data decoding 482 SRes ReadRes; // error code from ISeqInStream:Read() 483 SRes ProgressRes; // error code from ICompressProgress:Progress() 484 485 SRes CombinedRes; // Combined result error code that shows main rusult 486 // = S_OK, if there is no error. 487 // but check also (DataAfterEnd) that can show additional minor errors. 488 489 SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream 490 // = SZ_ERROR_PROGRESS, if error from ICompressProgress 491 // = SZ_ERROR_WRITE, if error from ISeqOutStream 492 // = SZ_ERROR_* codes for decoding 493 } CXzStatInfo; 494 495 void XzStatInfo_Clear(CXzStatInfo *p); 496 497 /* 498 499 XzDecMt_Decode() 500 SRes: it's combined decoding result. It also is equal to stat->CombinedRes. 501 502 SZ_OK - no error 503 check also output value in (stat->DataAfterEnd) 504 that can show additional possible error 505 506 SZ_ERROR_MEM - Memory allocation error 507 SZ_ERROR_NO_ARCHIVE - is not xz archive 508 SZ_ERROR_ARCHIVE - Headers error 509 SZ_ERROR_DATA - Data Error 510 SZ_ERROR_UNSUPPORTED - Unsupported method or method properties 511 SZ_ERROR_CRC - CRC Error 512 SZ_ERROR_INPUT_EOF - it needs more input data 513 SZ_ERROR_WRITE - ISeqOutStream error 514 (SZ_ERROR_READ) - ISeqInStream errors 515 (SZ_ERROR_PROGRESS) - ICompressProgress errors 516 // SZ_ERROR_THREAD - error in multi-threading functions 517 MY_SRes_HRESULT_FROM_WRes(WRes_error) - error in multi-threading function 518 */ 519 520 SRes XzDecMt_Decode(CXzDecMtHandle p, 521 const CXzDecMtProps *props, 522 const UInt64 *outDataSize, // NULL means undefined 523 int finishMode, // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished 524 ISeqOutStreamPtr outStream, 525 // Byte *outBuf, size_t *outBufSize, 526 ISeqInStreamPtr inStream, 527 // const Byte *inData, size_t inDataSize, 528 CXzStatInfo *stat, // out: decoding results and statistics 529 int *isMT, // out: 0 means that ST (Single-Thread) version was used 530 // 1 means that MT (Multi-Thread) version was used 531 ICompressProgressPtr progress); 532 533 EXTERN_C_END 534 535 #endif 536