• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef SEEKABLE_H
2 #define SEEKABLE_H
3 
4 #if defined (__cplusplus)
5 extern "C" {
6 #endif
7 
8 #include <stdio.h>
9 #include "zstd.h"   /* ZSTDLIB_API */
10 
11 
12 #define ZSTD_seekTableFooterSize 9
13 
14 #define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1
15 
16 #define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U
17 
18 /* Limit the maximum size to avoid any potential issues storing the compressed size */
19 #define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U
20 
21 /*-****************************************************************************
22 *  Seekable Format
23 *
24 *  The seekable format splits the compressed data into a series of "frames",
25 *  each compressed individually so that decompression of a section in the
26 *  middle of an archive only requires zstd to decompress at most a frame's
27 *  worth of extra data, instead of the entire archive.
28 ******************************************************************************/
29 
30 typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
31 typedef struct ZSTD_seekable_s ZSTD_seekable;
32 typedef struct ZSTD_seekTable_s ZSTD_seekTable;
33 
34 /*-****************************************************************************
35 *  Seekable compression - HowTo
36 *  A ZSTD_seekable_CStream object is required to tracking streaming operation.
37 *  Use ZSTD_seekable_createCStream() and ZSTD_seekable_freeCStream() to create/
38 *  release resources.
39 *
40 *  Streaming objects are reusable to avoid allocation and deallocation,
41 *  to start a new compression operation call ZSTD_seekable_initCStream() on the
42 *  compressor.
43 *
44 *  Data streamed to the seekable compressor will automatically be split into
45 *  frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()),
46 *  or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is
47 *  called or when the default maximum frame size (2GB) is reached.
48 *
49 *  Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object
50 *  for a new compression operation.
51 *  `maxFrameSize` indicates the size at which to automatically start a new
52 *  seekable frame.  `maxFrameSize == 0` implies the default maximum size.
53 *  `checksumFlag` indicates whether or not the seek table should include frame
54 *  checksums on the uncompressed data for verification.
55 *  @return : a size hint for input to provide for compression, or an error code
56 *            checkable with ZSTD_isError()
57 *
58 *  Use ZSTD_seekable_compressStream() repetitively to consume input stream.
59 *  The function will automatically update both `pos` fields.
60 *  Note that it may not consume the entire input, in which case `pos < size`,
61 *  and it's up to the caller to present again remaining data.
62 *  @return : a size hint, preferred nb of bytes to use as input for next
63 *            function call or an error code, which can be tested using
64 *            ZSTD_isError().
65 *            Note 1 : it's just a hint, to help latency a little, any other
66 *                     value will work fine.
67 *
68 *  At any time, call ZSTD_seekable_endFrame() to end the current frame and
69 *  start a new one.
70 *
71 *  ZSTD_seekable_endStream() will end the current frame, and then write the seek
72 *  table so that decompressors can efficiently find compressed frames.
73 *  ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush
74 *  all the necessary data to `output`.  In this case, it should be called again
75 *  until all remaining data is flushed out and 0 is returned.
76 ******************************************************************************/
77 
78 /*===== Seekable compressor management =====*/
79 ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void);
80 ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs);
81 
82 /*===== Seekable compression functions =====*/
83 ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize);
84 ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
85 ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
86 ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
87 
88 /*= Raw seek table API
89  *  These functions allow for the seek table to be constructed directly.
90  *  This table can then be appended to a file of concatenated frames.
91  *  This allows the frames to be compressed independently, even in parallel,
92  *  and compiled together afterward into a seekable archive.
93  *
94  *  Use ZSTD_seekable_createFrameLog() to allocate and initialize a tracking
95  *  structure.
96  *
97  *  Call ZSTD_seekable_logFrame() once for each frame in the archive.
98  *  checksum is optional, and will not be used if checksumFlag was 0 when the
99  *  frame log was created.  If present, it should be the least significant 32
100  *  bits of the XXH64 hash of the uncompressed data.
101  *
102  *  Call ZSTD_seekable_writeSeekTable to serialize the data into a seek table.
103  *  If the entire table was written, the return value will be 0.  Otherwise,
104  *  it will be equal to the number of bytes left to write. */
105 typedef struct ZSTD_frameLog_s ZSTD_frameLog;
106 ZSTDLIB_API ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag);
107 ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl);
108 ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum);
109 ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output);
110 
111 
112 /*-****************************************************************************
113 *  Seekable decompression - HowTo
114 *  A ZSTD_seekable object is required to tracking the seekTable.
115 *
116 *  Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the
117 *  the seek table provided in the input.
118 *  There are three modes for ZSTD_seekable_init:
119 *    - ZSTD_seekable_initBuff() : An in-memory API.  The data contained in
120 *      `src` should be the entire seekable file, including the seek table.
121 *      `src` should be kept alive and unmodified until the ZSTD_seekable object
122 *      is freed or reset.
123 *    - ZSTD_seekable_initFile() : A simplified file API using stdio.  fread and
124 *      fseek will be used to access the required data for building the seek
125 *      table and doing decompression operations.  `src` should not be closed
126 *      or modified until the ZSTD_seekable object is freed or reset.
127 *    - ZSTD_seekable_initAdvanced() : A general API allowing the client to
128 *      provide its own read and seek callbacks.
129 *        + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`.
130 *                                 Premature EOF should be treated as an error.
131 *        + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`,
132 *                                 where origin is either SEEK_SET (beginning of
133 *                                 file), or SEEK_END (end of file).
134 *  Both functions should return a non-negative value in case of success, and a
135 *  negative value in case of failure.  If implementing using this API and
136 *  stdio, be careful with files larger than 4GB and fseek.  All of these
137 *  functions return an error code checkable with ZSTD_isError().
138 *
139 *  Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed
140 *  offset `offset`.  ZSTD_seekable_decompress may have to decompress the entire
141 *  prefix of the frame before the desired data if it has not already processed
142 *  this section. If ZSTD_seekable_decompress is called multiple times for a
143 *  consecutive range of data, it will efficiently retain the decompressor object
144 *  and avoid redecompressing frame prefixes.  The return value is the number of
145 *  bytes decompressed, or an error code checkable with ZSTD_isError().
146 *
147 *  The seek table access functions can be used to obtain the data contained
148 *  in the seek table.  If frameIndex is larger than the value returned by
149 *  ZSTD_seekable_getNumFrames(), they will return error codes checkable with
150 *  ZSTD_isError().  Note that since the offset access functions return
151 *  unsigned long long instead of size_t, in this case they will instead return
152 *  the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE.
153 ******************************************************************************/
154 
155 /*===== Seekable decompressor management =====*/
156 ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void);
157 ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs);
158 
159 /*===== Seekable decompression functions =====*/
160 ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize);
161 ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src);
162 ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset);
163 ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);
164 
165 #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
166 /*===== Seekable seek table access functions =====*/
167 ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs);
168 ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
169 ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
170 ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
171 ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
172 ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset);
173 
174 
175 /*-****************************************************************************
176 *  Direct exploitation of the seekTable
177 *
178 *  Memory constrained use cases that manage multiple archives
179 *  benefit from retaining multiple archive seek tables
180 *  without retaining a ZSTD_seekable instance for each.
181 *
182 *  Below API allow the above-mentioned use cases
183 *  to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable,
184 *  then throw the ZSTD_seekable away to save memory.
185 *
186 *  Standard ZSTD operations can then be used
187 *  to decompress frames based on seek table offsets.
188 ******************************************************************************/
189 
190 /*===== Independent seek table management =====*/
191 ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs);
192 ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st);
193 
194 /*===== Direct seek table access functions =====*/
195 ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st);
196 ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
197 ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
198 ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
199 ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
200 ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset);
201 
202 
203 /*===== Seekable advanced I/O API =====*/
204 typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
205 typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin);
206 typedef struct {
207     void* opaque;
208     ZSTD_seekable_read* read;
209     ZSTD_seekable_seek* seek;
210 } ZSTD_seekable_customFile;
211 ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src);
212 
213 #if defined (__cplusplus)
214 }
215 #endif
216 
217 #endif
218