1 #ifndef SEEKABLE_H 2 #define SEEKABLE_H 3 4 #if defined (__cplusplus) 5 extern "C" { 6 #endif 7 8 #include <stdio.h> 9 #include "zstd.h" /* ZSTDLIB_API */ 10 11 12 #define ZSTD_seekTableFooterSize 9 13 14 #define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1 15 16 #define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U 17 18 /* Limit the maximum size to avoid any potential issues storing the compressed size */ 19 #define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U 20 21 /*-**************************************************************************** 22 * Seekable Format 23 * 24 * The seekable format splits the compressed data into a series of "frames", 25 * each compressed individually so that decompression of a section in the 26 * middle of an archive only requires zstd to decompress at most a frame's 27 * worth of extra data, instead of the entire archive. 28 ******************************************************************************/ 29 30 typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream; 31 typedef struct ZSTD_seekable_s ZSTD_seekable; 32 typedef struct ZSTD_seekTable_s ZSTD_seekTable; 33 34 /*-**************************************************************************** 35 * Seekable compression - HowTo 36 * A ZSTD_seekable_CStream object is required to tracking streaming operation. 37 * Use ZSTD_seekable_createCStream() and ZSTD_seekable_freeCStream() to create/ 38 * release resources. 39 * 40 * Streaming objects are reusable to avoid allocation and deallocation, 41 * to start a new compression operation call ZSTD_seekable_initCStream() on the 42 * compressor. 43 * 44 * Data streamed to the seekable compressor will automatically be split into 45 * frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()), 46 * or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is 47 * called or when the default maximum frame size (2GB) is reached. 48 * 49 * Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object 50 * for a new compression operation. 51 * `maxFrameSize` indicates the size at which to automatically start a new 52 * seekable frame. `maxFrameSize == 0` implies the default maximum size. 53 * `checksumFlag` indicates whether or not the seek table should include frame 54 * checksums on the uncompressed data for verification. 55 * @return : a size hint for input to provide for compression, or an error code 56 * checkable with ZSTD_isError() 57 * 58 * Use ZSTD_seekable_compressStream() repetitively to consume input stream. 59 * The function will automatically update both `pos` fields. 60 * Note that it may not consume the entire input, in which case `pos < size`, 61 * and it's up to the caller to present again remaining data. 62 * @return : a size hint, preferred nb of bytes to use as input for next 63 * function call or an error code, which can be tested using 64 * ZSTD_isError(). 65 * Note 1 : it's just a hint, to help latency a little, any other 66 * value will work fine. 67 * 68 * At any time, call ZSTD_seekable_endFrame() to end the current frame and 69 * start a new one. 70 * 71 * ZSTD_seekable_endStream() will end the current frame, and then write the seek 72 * table so that decompressors can efficiently find compressed frames. 73 * ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush 74 * all the necessary data to `output`. In this case, it should be called again 75 * until all remaining data is flushed out and 0 is returned. 76 ******************************************************************************/ 77 78 /*===== Seekable compressor management =====*/ 79 ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void); 80 ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs); 81 82 /*===== Seekable compression functions =====*/ 83 ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize); 84 ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); 85 ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output); 86 ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output); 87 88 /*= Raw seek table API 89 * These functions allow for the seek table to be constructed directly. 90 * This table can then be appended to a file of concatenated frames. 91 * This allows the frames to be compressed independently, even in parallel, 92 * and compiled together afterward into a seekable archive. 93 * 94 * Use ZSTD_seekable_createFrameLog() to allocate and initialize a tracking 95 * structure. 96 * 97 * Call ZSTD_seekable_logFrame() once for each frame in the archive. 98 * checksum is optional, and will not be used if checksumFlag was 0 when the 99 * frame log was created. If present, it should be the least significant 32 100 * bits of the XXH64 hash of the uncompressed data. 101 * 102 * Call ZSTD_seekable_writeSeekTable to serialize the data into a seek table. 103 * If the entire table was written, the return value will be 0. Otherwise, 104 * it will be equal to the number of bytes left to write. */ 105 typedef struct ZSTD_frameLog_s ZSTD_frameLog; 106 ZSTDLIB_API ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag); 107 ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl); 108 ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum); 109 ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output); 110 111 112 /*-**************************************************************************** 113 * Seekable decompression - HowTo 114 * A ZSTD_seekable object is required to tracking the seekTable. 115 * 116 * Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the 117 * the seek table provided in the input. 118 * There are three modes for ZSTD_seekable_init: 119 * - ZSTD_seekable_initBuff() : An in-memory API. The data contained in 120 * `src` should be the entire seekable file, including the seek table. 121 * `src` should be kept alive and unmodified until the ZSTD_seekable object 122 * is freed or reset. 123 * - ZSTD_seekable_initFile() : A simplified file API using stdio. fread and 124 * fseek will be used to access the required data for building the seek 125 * table and doing decompression operations. `src` should not be closed 126 * or modified until the ZSTD_seekable object is freed or reset. 127 * - ZSTD_seekable_initAdvanced() : A general API allowing the client to 128 * provide its own read and seek callbacks. 129 * + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`. 130 * Premature EOF should be treated as an error. 131 * + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`, 132 * where origin is either SEEK_SET (beginning of 133 * file), or SEEK_END (end of file). 134 * Both functions should return a non-negative value in case of success, and a 135 * negative value in case of failure. If implementing using this API and 136 * stdio, be careful with files larger than 4GB and fseek. All of these 137 * functions return an error code checkable with ZSTD_isError(). 138 * 139 * Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed 140 * offset `offset`. ZSTD_seekable_decompress may have to decompress the entire 141 * prefix of the frame before the desired data if it has not already processed 142 * this section. If ZSTD_seekable_decompress is called multiple times for a 143 * consecutive range of data, it will efficiently retain the decompressor object 144 * and avoid redecompressing frame prefixes. The return value is the number of 145 * bytes decompressed, or an error code checkable with ZSTD_isError(). 146 * 147 * The seek table access functions can be used to obtain the data contained 148 * in the seek table. If frameIndex is larger than the value returned by 149 * ZSTD_seekable_getNumFrames(), they will return error codes checkable with 150 * ZSTD_isError(). Note that since the offset access functions return 151 * unsigned long long instead of size_t, in this case they will instead return 152 * the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE. 153 ******************************************************************************/ 154 155 /*===== Seekable decompressor management =====*/ 156 ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void); 157 ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs); 158 159 /*===== Seekable decompression functions =====*/ 160 ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize); 161 ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src); 162 ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset); 163 ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex); 164 165 #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2) 166 /*===== Seekable seek table access functions =====*/ 167 ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs); 168 ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex); 169 ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex); 170 ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex); 171 ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex); 172 ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset); 173 174 175 /*-**************************************************************************** 176 * Direct exploitation of the seekTable 177 * 178 * Memory constrained use cases that manage multiple archives 179 * benefit from retaining multiple archive seek tables 180 * without retaining a ZSTD_seekable instance for each. 181 * 182 * Below API allow the above-mentioned use cases 183 * to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable, 184 * then throw the ZSTD_seekable away to save memory. 185 * 186 * Standard ZSTD operations can then be used 187 * to decompress frames based on seek table offsets. 188 ******************************************************************************/ 189 190 /*===== Independent seek table management =====*/ 191 ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs); 192 ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st); 193 194 /*===== Direct seek table access functions =====*/ 195 ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st); 196 ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex); 197 ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex); 198 ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex); 199 ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex); 200 ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset); 201 202 203 /*===== Seekable advanced I/O API =====*/ 204 typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n); 205 typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin); 206 typedef struct { 207 void* opaque; 208 ZSTD_seekable_read* read; 209 ZSTD_seekable_seek* seek; 210 } ZSTD_seekable_customFile; 211 ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src); 212 213 #if defined (__cplusplus) 214 } 215 #endif 216 217 #endif 218