1 /* Copyright 2013 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* Implementation of Brotli compressor. */
8
9 #include <brotli/encode.h>
10 #include <brotli/shared_dictionary.h>
11 #include <brotli/types.h>
12
13 #include <stdlib.h> /* free, malloc */
14 #include <string.h> /* memcpy, memset */
15
16 #include "../common/constants.h"
17 #include "../common/context.h"
18 #include "../common/platform.h"
19 #include "../common/version.h"
20 #include "backward_references.h"
21 #include "backward_references_hq.h"
22 #include "bit_cost.h"
23 #include "brotli_bit_stream.h"
24 #include "compress_fragment.h"
25 #include "compress_fragment_two_pass.h"
26 #include "dictionary_hash.h"
27 #include "encoder_dict.h"
28 #include "entropy_encode.h"
29 #include "fast_log.h"
30 #include "hash.h"
31 #include "histogram.h"
32 #include "memory.h"
33 #include "metablock.h"
34 #include "prefix.h"
35 #include "state.h"
36 #include "quality.h"
37 #include "ringbuffer.h"
38 #include "utf8_util.h"
39 #include "write_bits.h"
40
41 #if defined(__cplusplus) || defined(c_plusplus)
42 extern "C" {
43 #endif
44
45 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
46
InputBlockSize(BrotliEncoderState * s)47 static size_t InputBlockSize(BrotliEncoderState* s) {
48 return (size_t)1 << s->params.lgblock;
49 }
50
UnprocessedInputSize(BrotliEncoderState * s)51 static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
52 return s->input_pos_ - s->last_processed_pos_;
53 }
54
RemainingInputBlockSize(BrotliEncoderState * s)55 static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
56 const uint64_t delta = UnprocessedInputSize(s);
57 size_t block_size = InputBlockSize(s);
58 if (delta >= block_size) return 0;
59 return block_size - (size_t)delta;
60 }
61
BrotliEncoderSetParameter(BrotliEncoderState * state,BrotliEncoderParameter p,uint32_t value)62 BROTLI_BOOL BrotliEncoderSetParameter(
63 BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
64 /* Changing parameters on the fly is not implemented yet. */
65 if (state->is_initialized_) return BROTLI_FALSE;
66 /* TODO(eustas): Validate/clamp parameters here. */
67 switch (p) {
68 case BROTLI_PARAM_MODE:
69 state->params.mode = (BrotliEncoderMode)value;
70 return BROTLI_TRUE;
71
72 case BROTLI_PARAM_QUALITY:
73 state->params.quality = (int)value;
74 return BROTLI_TRUE;
75
76 case BROTLI_PARAM_LGWIN:
77 state->params.lgwin = (int)value;
78 return BROTLI_TRUE;
79
80 case BROTLI_PARAM_LGBLOCK:
81 state->params.lgblock = (int)value;
82 return BROTLI_TRUE;
83
84 case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
85 if ((value != 0) && (value != 1)) return BROTLI_FALSE;
86 state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
87 return BROTLI_TRUE;
88
89 case BROTLI_PARAM_SIZE_HINT:
90 state->params.size_hint = value;
91 return BROTLI_TRUE;
92
93 case BROTLI_PARAM_LARGE_WINDOW:
94 state->params.large_window = TO_BROTLI_BOOL(!!value);
95 return BROTLI_TRUE;
96
97 case BROTLI_PARAM_NPOSTFIX:
98 state->params.dist.distance_postfix_bits = value;
99 return BROTLI_TRUE;
100
101 case BROTLI_PARAM_NDIRECT:
102 state->params.dist.num_direct_distance_codes = value;
103 return BROTLI_TRUE;
104
105 case BROTLI_PARAM_STREAM_OFFSET:
106 if (value > (1u << 30)) return BROTLI_FALSE;
107 state->params.stream_offset = value;
108 return BROTLI_TRUE;
109
110 default: return BROTLI_FALSE;
111 }
112 }
113
114 /* Wraps 64-bit input position to 32-bit ring-buffer position preserving
115 "not-a-first-lap" feature. */
WrapPosition(uint64_t position)116 static uint32_t WrapPosition(uint64_t position) {
117 uint32_t result = (uint32_t)position;
118 uint64_t gb = position >> 30;
119 if (gb > 2) {
120 /* Wrap every 2GiB; The first 3GB are continuous. */
121 result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
122 }
123 return result;
124 }
125
GetBrotliStorage(BrotliEncoderState * s,size_t size)126 static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
127 MemoryManager* m = &s->memory_manager_;
128 if (s->storage_size_ < size) {
129 BROTLI_FREE(m, s->storage_);
130 s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
131 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
132 s->storage_size_ = size;
133 }
134 return s->storage_;
135 }
136
HashTableSize(size_t max_table_size,size_t input_size)137 static size_t HashTableSize(size_t max_table_size, size_t input_size) {
138 size_t htsize = 256;
139 while (htsize < max_table_size && htsize < input_size) {
140 htsize <<= 1;
141 }
142 return htsize;
143 }
144
GetHashTable(BrotliEncoderState * s,int quality,size_t input_size,size_t * table_size)145 static int* GetHashTable(BrotliEncoderState* s, int quality,
146 size_t input_size, size_t* table_size) {
147 /* Use smaller hash table when input.size() is smaller, since we
148 fill the table, incurring O(hash table size) overhead for
149 compression, and if the input is short, we won't need that
150 many hash table entries anyway. */
151 MemoryManager* m = &s->memory_manager_;
152 const size_t max_table_size = MaxHashTableSize(quality);
153 size_t htsize = HashTableSize(max_table_size, input_size);
154 int* table;
155 BROTLI_DCHECK(max_table_size >= 256);
156 if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
157 /* Only odd shifts are supported by fast-one-pass. */
158 if ((htsize & 0xAAAAA) == 0) {
159 htsize <<= 1;
160 }
161 }
162
163 if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
164 table = s->small_table_;
165 } else {
166 if (htsize > s->large_table_size_) {
167 s->large_table_size_ = htsize;
168 BROTLI_FREE(m, s->large_table_);
169 s->large_table_ = BROTLI_ALLOC(m, int, htsize);
170 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
171 }
172 table = s->large_table_;
173 }
174
175 *table_size = htsize;
176 memset(table, 0, htsize * sizeof(*table));
177 return table;
178 }
179
EncodeWindowBits(int lgwin,BROTLI_BOOL large_window,uint16_t * last_bytes,uint8_t * last_bytes_bits)180 static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
181 uint16_t* last_bytes, uint8_t* last_bytes_bits) {
182 if (large_window) {
183 *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
184 *last_bytes_bits = 14;
185 } else {
186 if (lgwin == 16) {
187 *last_bytes = 0;
188 *last_bytes_bits = 1;
189 } else if (lgwin == 17) {
190 *last_bytes = 1;
191 *last_bytes_bits = 7;
192 } else if (lgwin > 17) {
193 *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
194 *last_bytes_bits = 4;
195 } else {
196 *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
197 *last_bytes_bits = 7;
198 }
199 }
200 }
201
202 /* TODO(eustas): move to compress_fragment.c? */
203 /* Initializes the command and distance prefix codes for the first block. */
InitCommandPrefixCodes(BrotliOnePassArena * s)204 static void InitCommandPrefixCodes(BrotliOnePassArena* s) {
205 static const uint8_t kDefaultCommandDepths[128] = {
206 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
207 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
208 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
209 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
210 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
212 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
213 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
214 };
215 static const uint16_t kDefaultCommandBits[128] = {
216 0, 0, 8, 9, 3, 35, 7, 71,
217 39, 103, 23, 47, 175, 111, 239, 31,
218 0, 0, 0, 4, 12, 2, 10, 6,
219 13, 29, 11, 43, 27, 59, 87, 55,
220 15, 79, 319, 831, 191, 703, 447, 959,
221 0, 14, 1, 25, 5, 21, 19, 51,
222 119, 159, 95, 223, 479, 991, 63, 575,
223 127, 639, 383, 895, 255, 767, 511, 1023,
224 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
226 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
227 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
228 };
229 static const uint8_t kDefaultCommandCode[] = {
230 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
231 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
232 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
233 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
234 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
235 };
236 static const size_t kDefaultCommandCodeNumBits = 448;
237 COPY_ARRAY(s->cmd_depth, kDefaultCommandDepths);
238 COPY_ARRAY(s->cmd_bits, kDefaultCommandBits);
239
240 /* Initialize the pre-compressed form of the command and distance prefix
241 codes. */
242 COPY_ARRAY(s->cmd_code, kDefaultCommandCode);
243 s->cmd_code_numbits = kDefaultCommandCodeNumBits;
244 }
245
246 /* Decide about the context map based on the ability of the prediction
247 ability of the previous byte UTF8-prefix on the next byte. The
248 prediction ability is calculated as Shannon entropy. Here we need
249 Shannon entropy instead of 'BitsEntropy' since the prefix will be
250 encoded with the remaining 6 bits of the following byte, and
251 BitsEntropy will assume that symbol to be stored alone using Huffman
252 coding. */
ChooseContextMap(int quality,uint32_t * bigram_histo,size_t * num_literal_contexts,const uint32_t ** literal_context_map)253 static void ChooseContextMap(int quality,
254 uint32_t* bigram_histo,
255 size_t* num_literal_contexts,
256 const uint32_t** literal_context_map) {
257 static const uint32_t kStaticContextMapContinuation[64] = {
258 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
259 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
262 };
263 static const uint32_t kStaticContextMapSimpleUTF8[64] = {
264 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 };
269
270 uint32_t monogram_histo[3] = { 0 };
271 uint32_t two_prefix_histo[6] = { 0 };
272 size_t total;
273 size_t i;
274 size_t dummy;
275 double entropy[4];
276 for (i = 0; i < 9; ++i) {
277 monogram_histo[i % 3] += bigram_histo[i];
278 two_prefix_histo[i % 6] += bigram_histo[i];
279 }
280 entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
281 entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
282 ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
283 entropy[3] = 0;
284 for (i = 0; i < 3; ++i) {
285 entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
286 }
287
288 total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
289 BROTLI_DCHECK(total != 0);
290 entropy[0] = 1.0 / (double)total;
291 entropy[1] *= entropy[0];
292 entropy[2] *= entropy[0];
293 entropy[3] *= entropy[0];
294
295 if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
296 /* 3 context models is a bit slower, don't use it at lower qualities. */
297 entropy[3] = entropy[1] * 10;
298 }
299 /* If expected savings by symbol are less than 0.2 bits, skip the
300 context modeling -- in exchange for faster decoding speed. */
301 if (entropy[1] - entropy[2] < 0.2 &&
302 entropy[1] - entropy[3] < 0.2) {
303 *num_literal_contexts = 1;
304 } else if (entropy[2] - entropy[3] < 0.02) {
305 *num_literal_contexts = 2;
306 *literal_context_map = kStaticContextMapSimpleUTF8;
307 } else {
308 *num_literal_contexts = 3;
309 *literal_context_map = kStaticContextMapContinuation;
310 }
311 }
312
313 /* Decide if we want to use a more complex static context map containing 13
314 context values, based on the entropy reduction of histograms over the
315 first 5 bits of literals. */
ShouldUseComplexStaticContextMap(const uint8_t * input,size_t start_pos,size_t length,size_t mask,int quality,size_t size_hint,size_t * num_literal_contexts,const uint32_t ** literal_context_map,uint32_t * arena)316 static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
317 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
318 size_t* num_literal_contexts, const uint32_t** literal_context_map,
319 uint32_t* arena) {
320 static const uint32_t kStaticContextMapComplexUTF8[64] = {
321 11, 11, 12, 12, /* 0 special */
322 0, 0, 0, 0, /* 4 lf */
323 1, 1, 9, 9, /* 8 space */
324 2, 2, 2, 2, /* !, first after space/lf and after something else. */
325 1, 1, 1, 1, /* " */
326 8, 3, 3, 3, /* % */
327 1, 1, 1, 1, /* ({[ */
328 2, 2, 2, 2, /* }]) */
329 8, 4, 4, 4, /* :; */
330 8, 7, 4, 4, /* . */
331 8, 0, 0, 0, /* > */
332 3, 3, 3, 3, /* [0..9] */
333 5, 5, 10, 5, /* [A-Z] */
334 5, 5, 10, 5,
335 6, 6, 6, 6, /* [a-z] */
336 6, 6, 6, 6,
337 };
338 BROTLI_UNUSED(quality);
339 /* Try the more complex static context map only for long data. */
340 if (size_hint < (1 << 20)) {
341 return BROTLI_FALSE;
342 } else {
343 const size_t end_pos = start_pos + length;
344 /* To make entropy calculations faster, we collect histograms
345 over the 5 most significant bits of literals. One histogram
346 without context and 13 additional histograms for each context value. */
347 uint32_t* BROTLI_RESTRICT const combined_histo = arena;
348 uint32_t* BROTLI_RESTRICT const context_histo = arena + 32;
349 uint32_t total = 0;
350 double entropy[3];
351 size_t dummy;
352 size_t i;
353 ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
354 memset(arena, 0, sizeof(arena[0]) * 32 * 14);
355 for (; start_pos + 64 <= end_pos; start_pos += 4096) {
356 const size_t stride_end_pos = start_pos + 64;
357 uint8_t prev2 = input[start_pos & mask];
358 uint8_t prev1 = input[(start_pos + 1) & mask];
359 size_t pos;
360 /* To make the analysis of the data faster we only examine 64 byte long
361 strides at every 4kB intervals. */
362 for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
363 const uint8_t literal = input[pos & mask];
364 const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
365 BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
366 ++total;
367 ++combined_histo[literal >> 3];
368 ++context_histo[(context << 5) + (literal >> 3)];
369 prev2 = prev1;
370 prev1 = literal;
371 }
372 }
373 entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
374 entropy[2] = 0;
375 for (i = 0; i < 13; ++i) {
376 entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &dummy);
377 }
378 entropy[0] = 1.0 / (double)total;
379 entropy[1] *= entropy[0];
380 entropy[2] *= entropy[0];
381 /* The triggering heuristics below were tuned by compressing the individual
382 files of the silesia corpus. If we skip this kind of context modeling
383 for not very well compressible input (i.e. entropy using context modeling
384 is 60% of maximal entropy) or if expected savings by symbol are less
385 than 0.2 bits, then in every case when it triggers, the final compression
386 ratio is improved. Note however that this heuristics might be too strict
387 for some cases and could be tuned further. */
388 if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
389 return BROTLI_FALSE;
390 } else {
391 *num_literal_contexts = 13;
392 *literal_context_map = kStaticContextMapComplexUTF8;
393 return BROTLI_TRUE;
394 }
395 }
396 }
397
DecideOverLiteralContextModeling(const uint8_t * input,size_t start_pos,size_t length,size_t mask,int quality,size_t size_hint,size_t * num_literal_contexts,const uint32_t ** literal_context_map,uint32_t * arena)398 static void DecideOverLiteralContextModeling(const uint8_t* input,
399 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
400 size_t* num_literal_contexts, const uint32_t** literal_context_map,
401 uint32_t* arena) {
402 if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
403 return;
404 } else if (ShouldUseComplexStaticContextMap(
405 input, start_pos, length, mask, quality, size_hint,
406 num_literal_contexts, literal_context_map, arena)) {
407 /* Context map was already set, nothing else to do. */
408 } else {
409 /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
410 UTF8 data faster we only examine 64 byte long strides at every 4kB
411 intervals. */
412 const size_t end_pos = start_pos + length;
413 uint32_t* BROTLI_RESTRICT const bigram_prefix_histo = arena;
414 memset(bigram_prefix_histo, 0, sizeof(arena[0]) * 9);
415 for (; start_pos + 64 <= end_pos; start_pos += 4096) {
416 static const int lut[4] = { 0, 0, 1, 2 };
417 const size_t stride_end_pos = start_pos + 64;
418 int prev = lut[input[start_pos & mask] >> 6] * 3;
419 size_t pos;
420 for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
421 const uint8_t literal = input[pos & mask];
422 ++bigram_prefix_histo[prev + lut[literal >> 6]];
423 prev = lut[literal >> 6] * 3;
424 }
425 }
426 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
427 literal_context_map);
428 }
429 }
430
ShouldCompress(const uint8_t * data,const size_t mask,const uint64_t last_flush_pos,const size_t bytes,const size_t num_literals,const size_t num_commands)431 static BROTLI_BOOL ShouldCompress(
432 const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
433 const size_t bytes, const size_t num_literals, const size_t num_commands) {
434 /* TODO(eustas): find more precise minimal block overhead. */
435 if (bytes <= 2) return BROTLI_FALSE;
436 if (num_commands < (bytes >> 8) + 2) {
437 if ((double)num_literals > 0.99 * (double)bytes) {
438 uint32_t literal_histo[256] = { 0 };
439 static const uint32_t kSampleRate = 13;
440 static const double kMinEntropy = 7.92;
441 const double bit_cost_threshold =
442 (double)bytes * kMinEntropy / kSampleRate;
443 size_t t = (bytes + kSampleRate - 1) / kSampleRate;
444 uint32_t pos = (uint32_t)last_flush_pos;
445 size_t i;
446 for (i = 0; i < t; i++) {
447 ++literal_histo[data[pos & mask]];
448 pos += kSampleRate;
449 }
450 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
451 return BROTLI_FALSE;
452 }
453 }
454 }
455 return BROTLI_TRUE;
456 }
457
458 /* Chooses the literal context mode for a metablock */
ChooseContextMode(const BrotliEncoderParams * params,const uint8_t * data,const size_t pos,const size_t mask,const size_t length)459 static ContextType ChooseContextMode(const BrotliEncoderParams* params,
460 const uint8_t* data, const size_t pos, const size_t mask,
461 const size_t length) {
462 /* We only do the computation for the option of something else than
463 CONTEXT_UTF8 for the highest qualities */
464 if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
465 !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
466 return CONTEXT_SIGNED;
467 }
468 return CONTEXT_UTF8;
469 }
470
WriteMetaBlockInternal(MemoryManager * m,const uint8_t * data,const size_t mask,const uint64_t last_flush_pos,const size_t bytes,const BROTLI_BOOL is_last,ContextType literal_context_mode,const BrotliEncoderParams * params,const uint8_t prev_byte,const uint8_t prev_byte2,const size_t num_literals,const size_t num_commands,Command * commands,const int * saved_dist_cache,int * dist_cache,size_t * storage_ix,uint8_t * storage)471 static void WriteMetaBlockInternal(MemoryManager* m,
472 const uint8_t* data,
473 const size_t mask,
474 const uint64_t last_flush_pos,
475 const size_t bytes,
476 const BROTLI_BOOL is_last,
477 ContextType literal_context_mode,
478 const BrotliEncoderParams* params,
479 const uint8_t prev_byte,
480 const uint8_t prev_byte2,
481 const size_t num_literals,
482 const size_t num_commands,
483 Command* commands,
484 const int* saved_dist_cache,
485 int* dist_cache,
486 size_t* storage_ix,
487 uint8_t* storage) {
488 const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
489 uint16_t last_bytes;
490 uint8_t last_bytes_bits;
491 ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
492 BrotliEncoderParams block_params = *params;
493
494 if (bytes == 0) {
495 /* Write the ISLAST and ISEMPTY bits. */
496 BrotliWriteBits(2, 3, storage_ix, storage);
497 *storage_ix = (*storage_ix + 7u) & ~7u;
498 return;
499 }
500
501 if (!ShouldCompress(data, mask, last_flush_pos, bytes,
502 num_literals, num_commands)) {
503 /* Restore the distance cache, as its last update by
504 CreateBackwardReferences is now unused. */
505 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
506 BrotliStoreUncompressedMetaBlock(is_last, data,
507 wrapped_last_flush_pos, mask, bytes,
508 storage_ix, storage);
509 return;
510 }
511
512 BROTLI_DCHECK(*storage_ix <= 14);
513 last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
514 last_bytes_bits = (uint8_t)(*storage_ix);
515 if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
516 BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
517 bytes, mask, is_last, params,
518 commands, num_commands,
519 storage_ix, storage);
520 if (BROTLI_IS_OOM(m)) return;
521 } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
522 BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
523 bytes, mask, is_last, params,
524 commands, num_commands,
525 storage_ix, storage);
526 if (BROTLI_IS_OOM(m)) return;
527 } else {
528 MetaBlockSplit mb;
529 InitMetaBlockSplit(&mb);
530 if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
531 size_t num_literal_contexts = 1;
532 const uint32_t* literal_context_map = NULL;
533 if (!params->disable_literal_context_modeling) {
534 /* TODO(eustas): pull to higher level and reuse. */
535 uint32_t* arena = BROTLI_ALLOC(m, uint32_t, 14 * 32);
536 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
537 DecideOverLiteralContextModeling(
538 data, wrapped_last_flush_pos, bytes, mask, params->quality,
539 params->size_hint, &num_literal_contexts,
540 &literal_context_map, arena);
541 BROTLI_FREE(m, arena);
542 }
543 BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
544 prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
545 literal_context_map, commands, num_commands, &mb);
546 if (BROTLI_IS_OOM(m)) return;
547 } else {
548 BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
549 prev_byte, prev_byte2,
550 commands, num_commands,
551 literal_context_mode,
552 &mb);
553 if (BROTLI_IS_OOM(m)) return;
554 }
555 if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
556 /* The number of distance symbols effectively used for distance
557 histograms. It might be less than distance alphabet size
558 for "Large Window Brotli" (32-bit). */
559 BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
560 }
561 BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
562 prev_byte, prev_byte2,
563 is_last,
564 &block_params,
565 literal_context_mode,
566 commands, num_commands,
567 &mb,
568 storage_ix, storage);
569 if (BROTLI_IS_OOM(m)) return;
570 DestroyMetaBlockSplit(m, &mb);
571 }
572 if (bytes + 4 < (*storage_ix >> 3)) {
573 /* Restore the distance cache and last byte. */
574 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
575 storage[0] = (uint8_t)last_bytes;
576 storage[1] = (uint8_t)(last_bytes >> 8);
577 *storage_ix = last_bytes_bits;
578 BrotliStoreUncompressedMetaBlock(is_last, data,
579 wrapped_last_flush_pos, mask,
580 bytes, storage_ix, storage);
581 }
582 }
583
ChooseDistanceParams(BrotliEncoderParams * params)584 static void ChooseDistanceParams(BrotliEncoderParams* params) {
585 uint32_t distance_postfix_bits = 0;
586 uint32_t num_direct_distance_codes = 0;
587
588 if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
589 uint32_t ndirect_msb;
590 if (params->mode == BROTLI_MODE_FONT) {
591 distance_postfix_bits = 1;
592 num_direct_distance_codes = 12;
593 } else {
594 distance_postfix_bits = params->dist.distance_postfix_bits;
595 num_direct_distance_codes = params->dist.num_direct_distance_codes;
596 }
597 ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
598 if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
599 num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
600 (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
601 distance_postfix_bits = 0;
602 num_direct_distance_codes = 0;
603 }
604 }
605
606 BrotliInitDistanceParams(¶ms->dist, distance_postfix_bits,
607 num_direct_distance_codes, params->large_window);
608 }
609
EnsureInitialized(BrotliEncoderState * s)610 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
611 MemoryManager* m = &s->memory_manager_;
612 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
613 if (s->is_initialized_) return BROTLI_TRUE;
614
615 s->last_bytes_bits_ = 0;
616 s->last_bytes_ = 0;
617 s->flint_ = BROTLI_FLINT_DONE;
618 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
619
620 SanitizeParams(&s->params);
621 s->params.lgblock = ComputeLgBlock(&s->params);
622 ChooseDistanceParams(&s->params);
623
624 if (s->params.stream_offset != 0) {
625 s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
626 /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
627 s->dist_cache_[0] = -16;
628 s->dist_cache_[1] = -16;
629 s->dist_cache_[2] = -16;
630 s->dist_cache_[3] = -16;
631 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
632 }
633
634 RingBufferSetup(&s->params, &s->ringbuffer_);
635
636 /* Initialize last byte with stream header. */
637 {
638 int lgwin = s->params.lgwin;
639 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
640 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
641 lgwin = BROTLI_MAX(int, lgwin, 18);
642 }
643 if (s->params.stream_offset == 0) {
644 EncodeWindowBits(lgwin, s->params.large_window,
645 &s->last_bytes_, &s->last_bytes_bits_);
646 } else {
647 /* Bigger values have the same effect, but could cause overflows. */
648 s->params.stream_offset = BROTLI_MIN(size_t,
649 s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
650 }
651 }
652
653 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
654 s->one_pass_arena_ = BROTLI_ALLOC(m, BrotliOnePassArena, 1);
655 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
656 InitCommandPrefixCodes(s->one_pass_arena_);
657 } else if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
658 s->two_pass_arena_ = BROTLI_ALLOC(m, BrotliTwoPassArena, 1);
659 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
660 }
661
662 s->is_initialized_ = BROTLI_TRUE;
663 return BROTLI_TRUE;
664 }
665
BrotliEncoderInitParams(BrotliEncoderParams * params)666 static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
667 params->mode = BROTLI_DEFAULT_MODE;
668 params->large_window = BROTLI_FALSE;
669 params->quality = BROTLI_DEFAULT_QUALITY;
670 params->lgwin = BROTLI_DEFAULT_WINDOW;
671 params->lgblock = 0;
672 params->stream_offset = 0;
673 params->size_hint = 0;
674 params->disable_literal_context_modeling = BROTLI_FALSE;
675 BrotliInitSharedEncoderDictionary(¶ms->dictionary);
676 params->dist.distance_postfix_bits = 0;
677 params->dist.num_direct_distance_codes = 0;
678 params->dist.alphabet_size_max =
679 BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
680 params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
681 params->dist.max_distance = BROTLI_MAX_DISTANCE;
682 }
683
BrotliEncoderCleanupParams(MemoryManager * m,BrotliEncoderParams * params)684 static void BrotliEncoderCleanupParams(MemoryManager* m,
685 BrotliEncoderParams* params) {
686 BrotliCleanupSharedEncoderDictionary(m, ¶ms->dictionary);
687 }
688
BrotliEncoderInitState(BrotliEncoderState * s)689 static void BrotliEncoderInitState(BrotliEncoderState* s) {
690 BrotliEncoderInitParams(&s->params);
691 s->input_pos_ = 0;
692 s->num_commands_ = 0;
693 s->num_literals_ = 0;
694 s->last_insert_len_ = 0;
695 s->last_flush_pos_ = 0;
696 s->last_processed_pos_ = 0;
697 s->prev_byte_ = 0;
698 s->prev_byte2_ = 0;
699 s->storage_size_ = 0;
700 s->storage_ = 0;
701 HasherInit(&s->hasher_);
702 s->large_table_ = NULL;
703 s->large_table_size_ = 0;
704 s->one_pass_arena_ = NULL;
705 s->two_pass_arena_ = NULL;
706 s->command_buf_ = NULL;
707 s->literal_buf_ = NULL;
708 s->total_in_ = 0;
709 s->next_out_ = NULL;
710 s->available_out_ = 0;
711 s->total_out_ = 0;
712 s->stream_state_ = BROTLI_STREAM_PROCESSING;
713 s->is_last_block_emitted_ = BROTLI_FALSE;
714 s->is_initialized_ = BROTLI_FALSE;
715
716 RingBufferInit(&s->ringbuffer_);
717
718 s->commands_ = 0;
719 s->cmd_alloc_size_ = 0;
720
721 /* Initialize distance cache. */
722 s->dist_cache_[0] = 4;
723 s->dist_cache_[1] = 11;
724 s->dist_cache_[2] = 15;
725 s->dist_cache_[3] = 16;
726 /* Save the state of the distance cache in case we need to restore it for
727 emitting an uncompressed block. */
728 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
729 }
730
BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,brotli_free_func free_func,void * opaque)731 BrotliEncoderState* BrotliEncoderCreateInstance(
732 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
733 BrotliEncoderState* state = (BrotliEncoderState*)BrotliBootstrapAlloc(
734 sizeof(BrotliEncoderState), alloc_func, free_func, opaque);
735 if (state == NULL) {
736 /* BROTLI_DUMP(); */
737 return 0;
738 }
739 BrotliInitMemoryManager(
740 &state->memory_manager_, alloc_func, free_func, opaque);
741 BrotliEncoderInitState(state);
742 return state;
743 }
744
745 #ifdef BROTLI_REPORTING
746 /* When BROTLI_REPORTING is defined extra reporting module have to be linked. */
747 void BrotliEncoderOnFinish(const BrotliEncoderState* s);
748 #define BROTLI_ENCODER_ON_FINISH(s) BrotliEncoderOnFinish(s);
749 #else
750 #if !defined(BROTLI_ENCODER_ON_FINISH)
751 #define BROTLI_ENCODER_ON_FINISH(s) (void)(s);
752 #endif
753 #endif
754
BrotliEncoderCleanupState(BrotliEncoderState * s)755 static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
756 MemoryManager* m = &s->memory_manager_;
757
758 BROTLI_ENCODER_ON_FINISH(s);
759
760 if (BROTLI_IS_OOM(m)) {
761 BrotliWipeOutMemoryManager(m);
762 return;
763 }
764
765 BROTLI_FREE(m, s->storage_);
766 BROTLI_FREE(m, s->commands_);
767 RingBufferFree(m, &s->ringbuffer_);
768 DestroyHasher(m, &s->hasher_);
769 BROTLI_FREE(m, s->large_table_);
770 BROTLI_FREE(m, s->one_pass_arena_);
771 BROTLI_FREE(m, s->two_pass_arena_);
772 BROTLI_FREE(m, s->command_buf_);
773 BROTLI_FREE(m, s->literal_buf_);
774 BrotliEncoderCleanupParams(m, &s->params);
775 }
776
777 /* Deinitializes and frees BrotliEncoderState instance. */
BrotliEncoderDestroyInstance(BrotliEncoderState * state)778 void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
779 if (!state) {
780 return;
781 } else {
782 BrotliEncoderCleanupState(state);
783 BrotliBootstrapFree(state, &state->memory_manager_);
784 }
785 }
786
787 /*
788 Copies the given input data to the internal ring buffer of the compressor.
789 No processing of the data occurs at this time and this function can be
790 called multiple times before calling WriteBrotliData() to process the
791 accumulated input. At most input_block_size() bytes of input data can be
792 copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
793 */
CopyInputToRingBuffer(BrotliEncoderState * s,const size_t input_size,const uint8_t * input_buffer)794 static void CopyInputToRingBuffer(BrotliEncoderState* s,
795 const size_t input_size,
796 const uint8_t* input_buffer) {
797 RingBuffer* ringbuffer_ = &s->ringbuffer_;
798 MemoryManager* m = &s->memory_manager_;
799 RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
800 if (BROTLI_IS_OOM(m)) return;
801 s->input_pos_ += input_size;
802
803 /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
804 hashing not depend on uninitialized data. This makes compression
805 deterministic and it prevents uninitialized memory warnings in Valgrind.
806 Even without erasing, the output would be valid (but nondeterministic).
807
808 Background information: The compressor stores short (at most 8 bytes)
809 substrings of the input already read in a hash table, and detects
810 repetitions by looking up such substrings in the hash table. If it
811 can find a substring, it checks whether the substring is really there
812 in the ring buffer (or it's just a hash collision). Should the hash
813 table become corrupt, this check makes sure that the output is
814 still valid, albeit the compression ratio would be bad.
815
816 The compressor populates the hash table from the ring buffer as it's
817 reading new bytes from the input. However, at the last few indexes of
818 the ring buffer, there are not enough bytes to build full-length
819 substrings from. Since the hash table always contains full-length
820 substrings, we erase with dummy zeros here to make sure that those
821 substrings will contain zeros at the end instead of uninitialized
822 data.
823
824 Please note that erasing is not necessary (because the
825 memory region is already initialized since he ring buffer
826 has a `tail' that holds a copy of the beginning,) so we
827 skip erasing if we have already gone around at least once in
828 the ring buffer.
829
830 Only clear during the first round of ring-buffer writes. On
831 subsequent rounds data in the ring-buffer would be affected. */
832 if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
833 /* This is the first time when the ring buffer is being written.
834 We clear 7 bytes just after the bytes that have been copied from
835 the input buffer.
836
837 The ring-buffer has a "tail" that holds a copy of the beginning,
838 but only once the ring buffer has been fully written once, i.e.,
839 pos <= mask. For the first time, we need to write values
840 in this tail (where index may be larger than mask), so that
841 we have exactly defined behavior and don't read uninitialized
842 memory. Due to performance reasons, hashing reads data using a
843 LOAD64, which can go 7 bytes beyond the bytes written in the
844 ring-buffer. */
845 memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
846 }
847 }
848
849 /* Marks all input as processed.
850 Returns true if position wrapping occurs. */
UpdateLastProcessedPos(BrotliEncoderState * s)851 static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
852 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
853 uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
854 s->last_processed_pos_ = s->input_pos_;
855 return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
856 }
857
ExtendLastCommand(BrotliEncoderState * s,uint32_t * bytes,uint32_t * wrapped_last_processed_pos)858 static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
859 uint32_t* wrapped_last_processed_pos) {
860 Command* last_command = &s->commands_[s->num_commands_ - 1];
861 const uint8_t* data = s->ringbuffer_.buffer_;
862 const uint32_t mask = s->ringbuffer_.mask_;
863 uint64_t max_backward_distance =
864 (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
865 uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
866 uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
867 uint64_t max_distance = last_processed_pos < max_backward_distance ?
868 last_processed_pos : max_backward_distance;
869 uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
870 uint32_t distance_code = CommandRestoreDistanceCode(last_command,
871 &s->params.dist);
872 const CompoundDictionary* dict = &s->params.dictionary.compound;
873 size_t compound_dictionary_size = dict->total_size;
874 if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
875 distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
876 if (cmd_dist <= max_distance) {
877 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
878 data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
879 last_command->copy_len_++;
880 (*bytes)--;
881 (*wrapped_last_processed_pos)++;
882 }
883 } else {
884 if ((cmd_dist - max_distance - 1) < compound_dictionary_size &&
885 last_copy_len < cmd_dist - max_distance) {
886 size_t address =
887 compound_dictionary_size - (size_t)(cmd_dist - max_distance) +
888 (size_t)last_copy_len;
889 size_t br_index = 0;
890 size_t br_offset;
891 const uint8_t* chunk;
892 size_t chunk_length;
893 while (address >= dict->chunk_offsets[br_index + 1]) br_index++;
894 br_offset = address - dict->chunk_offsets[br_index];
895 chunk = dict->chunk_source[br_index];
896 chunk_length =
897 dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index];
898 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
899 chunk[br_offset]) {
900 last_command->copy_len_++;
901 (*bytes)--;
902 (*wrapped_last_processed_pos)++;
903 if (++br_offset == chunk_length) {
904 br_index++;
905 br_offset = 0;
906 if (br_index != dict->num_chunks) {
907 chunk = dict->chunk_source[br_index];
908 chunk_length = dict->chunk_offsets[br_index + 1] -
909 dict->chunk_offsets[br_index];
910 } else {
911 break;
912 }
913 }
914 }
915 }
916 }
917 /* The copy length is at most the metablock size, and thus expressible. */
918 GetLengthCode(last_command->insert_len_,
919 (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
920 (int)(last_command->copy_len_ >> 25)),
921 TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
922 &last_command->cmd_prefix_);
923 }
924 }
925
926 /*
927 Processes the accumulated input data and sets |*out_size| to the length of
928 the new output meta-block, or to zero if no new output meta-block has been
929 created (in this case the processed input data is buffered internally).
930 If |*out_size| is positive, |*output| points to the start of the output
931 data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
932 always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
933 to 7 bits of the last byte of output. Byte-alignment could be enforced by
934 emitting an empty meta-data block.
935 Returns BROTLI_FALSE if the size of the input data is larger than
936 input_block_size().
937 */
EncodeData(BrotliEncoderState * s,const BROTLI_BOOL is_last,const BROTLI_BOOL force_flush,size_t * out_size,uint8_t ** output)938 static BROTLI_BOOL EncodeData(
939 BrotliEncoderState* s, const BROTLI_BOOL is_last,
940 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
941 const uint64_t delta = UnprocessedInputSize(s);
942 uint32_t bytes = (uint32_t)delta;
943 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
944 uint8_t* data;
945 uint32_t mask;
946 MemoryManager* m = &s->memory_manager_;
947 ContextType literal_context_mode;
948 ContextLut literal_context_lut;
949 BROTLI_BOOL fast_compress =
950 s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
951 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY;
952
953 data = s->ringbuffer_.buffer_;
954 mask = s->ringbuffer_.mask_;
955
956 if (delta == 0) { /* No new input; still might want to flush or finish. */
957 if (!data) { /* No input has been processed so far. */
958 if (is_last) { /* Emit complete finalized stream. */
959 BROTLI_DCHECK(s->last_bytes_bits_ <= 14);
960 s->last_bytes_ |= (uint16_t)(3u << s->last_bytes_bits_);
961 s->last_bytes_bits_ = (uint8_t)(s->last_bytes_bits_ + 2u);
962 s->tiny_buf_.u8[0] = (uint8_t)s->last_bytes_;
963 s->tiny_buf_.u8[1] = (uint8_t)(s->last_bytes_ >> 8);
964 *output = s->tiny_buf_.u8;
965 *out_size = (s->last_bytes_bits_ + 7u) >> 3u;
966 return BROTLI_TRUE;
967 } else { /* No data, not last -> no-op. */
968 *out_size = 0;
969 return BROTLI_TRUE;
970 }
971 } else {
972 /* Fast compress performs flush every block -> flush is no-op. */
973 if (!is_last && (!force_flush || fast_compress)) { /* Another no-op. */
974 *out_size = 0;
975 return BROTLI_TRUE;
976 }
977 }
978 }
979 BROTLI_DCHECK(data);
980
981 if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
982 /* Adding more blocks after "last" block is forbidden. */
983 if (s->is_last_block_emitted_) return BROTLI_FALSE;
984 if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
985
986 if (delta > InputBlockSize(s)) {
987 return BROTLI_FALSE;
988 }
989 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
990 !s->command_buf_) {
991 s->command_buf_ =
992 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
993 s->literal_buf_ =
994 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
995 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
996 BROTLI_IS_NULL(s->literal_buf_)) {
997 return BROTLI_FALSE;
998 }
999 }
1000
1001 if (fast_compress) {
1002 uint8_t* storage;
1003 size_t storage_ix = s->last_bytes_bits_;
1004 size_t table_size;
1005 int* table;
1006
1007 storage = GetBrotliStorage(s, 2 * bytes + 503);
1008 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1009 storage[0] = (uint8_t)s->last_bytes_;
1010 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1011 table = GetHashTable(s, s->params.quality, bytes, &table_size);
1012 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1013 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1014 BrotliCompressFragmentFast(
1015 s->one_pass_arena_, &data[wrapped_last_processed_pos & mask],
1016 bytes, is_last,
1017 table, table_size,
1018 &storage_ix, storage);
1019 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1020 } else {
1021 BrotliCompressFragmentTwoPass(
1022 s->two_pass_arena_, &data[wrapped_last_processed_pos & mask],
1023 bytes, is_last,
1024 s->command_buf_, s->literal_buf_,
1025 table, table_size,
1026 &storage_ix, storage);
1027 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1028 }
1029 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1030 s->last_bytes_bits_ = storage_ix & 7u;
1031 UpdateLastProcessedPos(s);
1032 *output = &storage[0];
1033 *out_size = storage_ix >> 3;
1034 return BROTLI_TRUE;
1035 }
1036
1037 {
1038 /* Theoretical max number of commands is 1 per 2 bytes. */
1039 size_t newsize = s->num_commands_ + bytes / 2 + 1;
1040 if (newsize > s->cmd_alloc_size_) {
1041 Command* new_commands;
1042 /* Reserve a bit more memory to allow merging with a next block
1043 without reallocation: that would impact speed. */
1044 newsize += (bytes / 4) + 16;
1045 s->cmd_alloc_size_ = newsize;
1046 new_commands = BROTLI_ALLOC(m, Command, newsize);
1047 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
1048 if (s->commands_) {
1049 memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
1050 BROTLI_FREE(m, s->commands_);
1051 }
1052 s->commands_ = new_commands;
1053 }
1054 }
1055
1056 InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
1057 wrapped_last_processed_pos, bytes, is_last);
1058
1059 literal_context_mode = ChooseContextMode(
1060 &s->params, data, WrapPosition(s->last_flush_pos_),
1061 mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
1062 literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1063
1064 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1065
1066 if (s->num_commands_ && s->last_insert_len_ == 0) {
1067 ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
1068 }
1069
1070 if (s->params.quality == ZOPFLIFICATION_QUALITY) {
1071 BROTLI_DCHECK(s->params.hasher.type == 10);
1072 BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1073 data, mask, literal_context_lut, &s->params,
1074 &s->hasher_, s->dist_cache_,
1075 &s->last_insert_len_, &s->commands_[s->num_commands_],
1076 &s->num_commands_, &s->num_literals_);
1077 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1078 } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
1079 BROTLI_DCHECK(s->params.hasher.type == 10);
1080 BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1081 data, mask, literal_context_lut, &s->params,
1082 &s->hasher_, s->dist_cache_,
1083 &s->last_insert_len_, &s->commands_[s->num_commands_],
1084 &s->num_commands_, &s->num_literals_);
1085 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1086 } else {
1087 BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
1088 data, mask, literal_context_lut, &s->params,
1089 &s->hasher_, s->dist_cache_,
1090 &s->last_insert_len_, &s->commands_[s->num_commands_],
1091 &s->num_commands_, &s->num_literals_);
1092 }
1093
1094 {
1095 const size_t max_length = MaxMetablockSize(&s->params);
1096 const size_t max_literals = max_length / 8;
1097 const size_t max_commands = max_length / 8;
1098 const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
1099 /* If maximal possible additional block doesn't fit metablock, flush now. */
1100 /* TODO(eustas): Postpone decision until next block arrives? */
1101 const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
1102 processed_bytes + InputBlockSize(s) <= max_length);
1103 /* If block splitting is not used, then flush as soon as there is some
1104 amount of commands / literals produced. */
1105 const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
1106 s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
1107 s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
1108 if (!is_last && !force_flush && !should_flush &&
1109 next_input_fits_metablock &&
1110 s->num_literals_ < max_literals &&
1111 s->num_commands_ < max_commands) {
1112 /* Merge with next input block. Everything will happen later. */
1113 if (UpdateLastProcessedPos(s)) {
1114 HasherReset(&s->hasher_);
1115 }
1116 *out_size = 0;
1117 return BROTLI_TRUE;
1118 }
1119 }
1120
1121 /* Create the last insert-only command. */
1122 if (s->last_insert_len_ > 0) {
1123 InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
1124 s->num_literals_ += s->last_insert_len_;
1125 s->last_insert_len_ = 0;
1126 }
1127
1128 if (!is_last && s->input_pos_ == s->last_flush_pos_) {
1129 /* We have no new input data and we don't have to finish the stream, so
1130 nothing to do. */
1131 *out_size = 0;
1132 return BROTLI_TRUE;
1133 }
1134 BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
1135 BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
1136 BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1137 {
1138 const uint32_t metablock_size =
1139 (uint32_t)(s->input_pos_ - s->last_flush_pos_);
1140 uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
1141 size_t storage_ix = s->last_bytes_bits_;
1142 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1143 storage[0] = (uint8_t)s->last_bytes_;
1144 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1145 WriteMetaBlockInternal(
1146 m, data, mask, s->last_flush_pos_, metablock_size, is_last,
1147 literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
1148 s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
1149 s->dist_cache_, &storage_ix, storage);
1150 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1151 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1152 s->last_bytes_bits_ = storage_ix & 7u;
1153 s->last_flush_pos_ = s->input_pos_;
1154 if (UpdateLastProcessedPos(s)) {
1155 HasherReset(&s->hasher_);
1156 }
1157 if (s->last_flush_pos_ > 0) {
1158 s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
1159 }
1160 if (s->last_flush_pos_ > 1) {
1161 s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
1162 }
1163 s->num_commands_ = 0;
1164 s->num_literals_ = 0;
1165 /* Save the state of the distance cache in case we need to restore it for
1166 emitting an uncompressed block. */
1167 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
1168 *output = &storage[0];
1169 *out_size = storage_ix >> 3;
1170 return BROTLI_TRUE;
1171 }
1172 }
1173
1174 /* Dumps remaining output bits and metadata header to |header|.
1175 Returns number of produced bytes.
1176 REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
1177 REQUIRED: |block_size| <= (1 << 24). */
WriteMetadataHeader(BrotliEncoderState * s,const size_t block_size,uint8_t * header)1178 static size_t WriteMetadataHeader(
1179 BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
1180 size_t storage_ix;
1181 storage_ix = s->last_bytes_bits_;
1182 header[0] = (uint8_t)s->last_bytes_;
1183 header[1] = (uint8_t)(s->last_bytes_ >> 8);
1184 s->last_bytes_ = 0;
1185 s->last_bytes_bits_ = 0;
1186
1187 BrotliWriteBits(1, 0, &storage_ix, header);
1188 BrotliWriteBits(2, 3, &storage_ix, header);
1189 BrotliWriteBits(1, 0, &storage_ix, header);
1190 if (block_size == 0) {
1191 BrotliWriteBits(2, 0, &storage_ix, header);
1192 } else {
1193 uint32_t nbits = (block_size == 1) ? 1 :
1194 (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
1195 uint32_t nbytes = (nbits + 7) / 8;
1196 BrotliWriteBits(2, nbytes, &storage_ix, header);
1197 BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
1198 }
1199 return (storage_ix + 7u) >> 3;
1200 }
1201
BrotliEncoderMaxCompressedSize(size_t input_size)1202 size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
1203 /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
1204 size_t num_large_blocks = input_size >> 14;
1205 size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
1206 size_t result = input_size + overhead;
1207 if (input_size == 0) return 2;
1208 return (result < input_size) ? 0 : result;
1209 }
1210
1211 /* Wraps data to uncompressed brotli stream with minimal window size.
1212 |output| should point at region with at least BrotliEncoderMaxCompressedSize
1213 addressable bytes.
1214 Returns the length of stream. */
MakeUncompressedStream(const uint8_t * input,size_t input_size,uint8_t * output)1215 static size_t MakeUncompressedStream(
1216 const uint8_t* input, size_t input_size, uint8_t* output) {
1217 size_t size = input_size;
1218 size_t result = 0;
1219 size_t offset = 0;
1220 if (input_size == 0) {
1221 output[0] = 6;
1222 return 1;
1223 }
1224 output[result++] = 0x21; /* window bits = 10, is_last = false */
1225 output[result++] = 0x03; /* empty metadata, padding */
1226 while (size > 0) {
1227 uint32_t nibbles = 0;
1228 uint32_t chunk_size;
1229 uint32_t bits;
1230 chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
1231 if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
1232 bits =
1233 (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
1234 output[result++] = (uint8_t)bits;
1235 output[result++] = (uint8_t)(bits >> 8);
1236 output[result++] = (uint8_t)(bits >> 16);
1237 if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
1238 memcpy(&output[result], &input[offset], chunk_size);
1239 result += chunk_size;
1240 offset += chunk_size;
1241 size -= chunk_size;
1242 }
1243 output[result++] = 3;
1244 return result;
1245 }
1246
BrotliEncoderCompress(int quality,int lgwin,BrotliEncoderMode mode,size_t input_size,const uint8_t input_buffer[BROTLI_ARRAY_PARAM (input_size)],size_t * encoded_size,uint8_t encoded_buffer[BROTLI_ARRAY_PARAM (* encoded_size)])1247 BROTLI_BOOL BrotliEncoderCompress(
1248 int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
1249 const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
1250 size_t* encoded_size,
1251 uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]) {
1252 BrotliEncoderState* s;
1253 size_t out_size = *encoded_size;
1254 const uint8_t* input_start = input_buffer;
1255 uint8_t* output_start = encoded_buffer;
1256 size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
1257 if (out_size == 0) {
1258 /* Output buffer needs at least one byte. */
1259 return BROTLI_FALSE;
1260 }
1261 if (input_size == 0) {
1262 /* Handle the special case of empty input. */
1263 *encoded_size = 1;
1264 *encoded_buffer = 6;
1265 return BROTLI_TRUE;
1266 }
1267
1268 s = BrotliEncoderCreateInstance(0, 0, 0);
1269 if (!s) {
1270 return BROTLI_FALSE;
1271 } else {
1272 size_t available_in = input_size;
1273 const uint8_t* next_in = input_buffer;
1274 size_t available_out = *encoded_size;
1275 uint8_t* next_out = encoded_buffer;
1276 size_t total_out = 0;
1277 BROTLI_BOOL result = BROTLI_FALSE;
1278 /* TODO(eustas): check that parameters are sane. */
1279 BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
1280 BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
1281 BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
1282 BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
1283 if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1284 BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
1285 }
1286 result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
1287 &available_in, &next_in, &available_out, &next_out, &total_out);
1288 if (!BrotliEncoderIsFinished(s)) result = 0;
1289 *encoded_size = total_out;
1290 BrotliEncoderDestroyInstance(s);
1291 if (!result || (max_out_size && *encoded_size > max_out_size)) {
1292 goto fallback;
1293 }
1294 return BROTLI_TRUE;
1295 }
1296 fallback:
1297 *encoded_size = 0;
1298 if (!max_out_size) return BROTLI_FALSE;
1299 if (out_size >= max_out_size) {
1300 *encoded_size =
1301 MakeUncompressedStream(input_start, input_size, output_start);
1302 return BROTLI_TRUE;
1303 }
1304 return BROTLI_FALSE;
1305 }
1306
InjectBytePaddingBlock(BrotliEncoderState * s)1307 static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1308 uint32_t seal = s->last_bytes_;
1309 size_t seal_bits = s->last_bytes_bits_;
1310 uint8_t* destination;
1311 s->last_bytes_ = 0;
1312 s->last_bytes_bits_ = 0;
1313 /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
1314 seal |= 0x6u << seal_bits;
1315 seal_bits += 6;
1316 /* If we have already created storage, then append to it.
1317 Storage is valid until next block is being compressed. */
1318 if (s->next_out_) {
1319 destination = s->next_out_ + s->available_out_;
1320 } else {
1321 destination = s->tiny_buf_.u8;
1322 s->next_out_ = destination;
1323 }
1324 destination[0] = (uint8_t)seal;
1325 if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
1326 if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
1327 s->available_out_ += (seal_bits + 7) >> 3;
1328 }
1329
1330 /* Fills the |total_out|, if it is not NULL. */
SetTotalOut(BrotliEncoderState * s,size_t * total_out)1331 static void SetTotalOut(BrotliEncoderState* s, size_t* total_out) {
1332 if (total_out) {
1333 /* Saturating conversion uint64_t -> size_t */
1334 size_t result = (size_t)-1;
1335 if (s->total_out_ < result) {
1336 result = (size_t)s->total_out_;
1337 }
1338 *total_out = result;
1339 }
1340 }
1341
1342 /* Injects padding bits or pushes compressed data to output.
1343 Returns false if nothing is done. */
InjectFlushOrPushOutput(BrotliEncoderState * s,size_t * available_out,uint8_t ** next_out,size_t * total_out)1344 static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
1345 size_t* available_out, uint8_t** next_out, size_t* total_out) {
1346 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1347 s->last_bytes_bits_ != 0) {
1348 InjectBytePaddingBlock(s);
1349 return BROTLI_TRUE;
1350 }
1351
1352 if (s->available_out_ != 0 && *available_out != 0) {
1353 size_t copy_output_size =
1354 BROTLI_MIN(size_t, s->available_out_, *available_out);
1355 memcpy(*next_out, s->next_out_, copy_output_size);
1356 *next_out += copy_output_size;
1357 *available_out -= copy_output_size;
1358 s->next_out_ += copy_output_size;
1359 s->available_out_ -= copy_output_size;
1360 s->total_out_ += copy_output_size;
1361 SetTotalOut(s, total_out);
1362 return BROTLI_TRUE;
1363 }
1364
1365 return BROTLI_FALSE;
1366 }
1367
CheckFlushComplete(BrotliEncoderState * s)1368 static void CheckFlushComplete(BrotliEncoderState* s) {
1369 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1370 s->available_out_ == 0) {
1371 s->stream_state_ = BROTLI_STREAM_PROCESSING;
1372 s->next_out_ = 0;
1373 }
1374 }
1375
BrotliEncoderCompressStreamFast(BrotliEncoderState * s,BrotliEncoderOperation op,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1376 static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1377 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1378 const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1379 size_t* total_out) {
1380 const size_t block_size_limit = (size_t)1 << s->params.lgwin;
1381 const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
1382 BROTLI_MIN(size_t, *available_in, block_size_limit));
1383 uint32_t* tmp_command_buf = NULL;
1384 uint32_t* command_buf = NULL;
1385 uint8_t* tmp_literal_buf = NULL;
1386 uint8_t* literal_buf = NULL;
1387 MemoryManager* m = &s->memory_manager_;
1388 if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
1389 s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
1390 return BROTLI_FALSE;
1391 }
1392 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1393 if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
1394 s->command_buf_ =
1395 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1396 s->literal_buf_ =
1397 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1398 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1399 BROTLI_IS_NULL(s->literal_buf_)) {
1400 return BROTLI_FALSE;
1401 }
1402 }
1403 if (s->command_buf_) {
1404 command_buf = s->command_buf_;
1405 literal_buf = s->literal_buf_;
1406 } else {
1407 tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
1408 tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
1409 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
1410 BROTLI_IS_NULL(tmp_literal_buf)) {
1411 return BROTLI_FALSE;
1412 }
1413 command_buf = tmp_command_buf;
1414 literal_buf = tmp_literal_buf;
1415 }
1416 }
1417
1418 while (BROTLI_TRUE) {
1419 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1420 continue;
1421 }
1422
1423 /* Compress block only when internal output buffer is empty, stream is not
1424 finished, there is no pending flush request, and there is either
1425 additional input or pending operation. */
1426 if (s->available_out_ == 0 &&
1427 s->stream_state_ == BROTLI_STREAM_PROCESSING &&
1428 (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
1429 size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
1430 BROTLI_BOOL is_last =
1431 (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
1432 BROTLI_BOOL force_flush =
1433 (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
1434 size_t max_out_size = 2 * block_size + 503;
1435 BROTLI_BOOL inplace = BROTLI_TRUE;
1436 uint8_t* storage = NULL;
1437 size_t storage_ix = s->last_bytes_bits_;
1438 size_t table_size;
1439 int* table;
1440
1441 if (force_flush && block_size == 0) {
1442 s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1443 continue;
1444 }
1445 if (max_out_size <= *available_out) {
1446 storage = *next_out;
1447 } else {
1448 inplace = BROTLI_FALSE;
1449 storage = GetBrotliStorage(s, max_out_size);
1450 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1451 }
1452 storage[0] = (uint8_t)s->last_bytes_;
1453 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1454 table = GetHashTable(s, s->params.quality, block_size, &table_size);
1455 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1456
1457 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1458 BrotliCompressFragmentFast(s->one_pass_arena_, *next_in, block_size,
1459 is_last, table, table_size, &storage_ix, storage);
1460 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1461 } else {
1462 BrotliCompressFragmentTwoPass(s->two_pass_arena_, *next_in, block_size,
1463 is_last, command_buf, literal_buf, table, table_size,
1464 &storage_ix, storage);
1465 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1466 }
1467 if (block_size != 0) {
1468 *next_in += block_size;
1469 *available_in -= block_size;
1470 s->total_in_ += block_size;
1471 }
1472 if (inplace) {
1473 size_t out_bytes = storage_ix >> 3;
1474 BROTLI_DCHECK(out_bytes <= *available_out);
1475 BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
1476 *next_out += out_bytes;
1477 *available_out -= out_bytes;
1478 s->total_out_ += out_bytes;
1479 SetTotalOut(s, total_out);
1480 } else {
1481 size_t out_bytes = storage_ix >> 3;
1482 s->next_out_ = storage;
1483 s->available_out_ = out_bytes;
1484 }
1485 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1486 s->last_bytes_bits_ = storage_ix & 7u;
1487
1488 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1489 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1490 continue;
1491 }
1492 break;
1493 }
1494 BROTLI_FREE(m, tmp_command_buf);
1495 BROTLI_FREE(m, tmp_literal_buf);
1496 CheckFlushComplete(s);
1497 return BROTLI_TRUE;
1498 }
1499
ProcessMetadata(BrotliEncoderState * s,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1500 static BROTLI_BOOL ProcessMetadata(
1501 BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
1502 size_t* available_out, uint8_t** next_out, size_t* total_out) {
1503 if (*available_in > (1u << 24)) return BROTLI_FALSE;
1504 /* Switch to metadata block workflow, if required. */
1505 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1506 s->remaining_metadata_bytes_ = (uint32_t)*available_in;
1507 s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
1508 }
1509 if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
1510 s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
1511 return BROTLI_FALSE;
1512 }
1513
1514 while (BROTLI_TRUE) {
1515 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1516 continue;
1517 }
1518 if (s->available_out_ != 0) break;
1519
1520 if (s->input_pos_ != s->last_flush_pos_) {
1521 BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
1522 &s->available_out_, &s->next_out_);
1523 if (!result) return BROTLI_FALSE;
1524 continue;
1525 }
1526
1527 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
1528 s->next_out_ = s->tiny_buf_.u8;
1529 s->available_out_ =
1530 WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
1531 s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
1532 continue;
1533 } else {
1534 /* Exit workflow only when there is no more input and no more output.
1535 Otherwise client may continue producing empty metadata blocks. */
1536 if (s->remaining_metadata_bytes_ == 0) {
1537 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
1538 s->stream_state_ = BROTLI_STREAM_PROCESSING;
1539 break;
1540 }
1541 if (*available_out) {
1542 /* Directly copy input to output. */
1543 uint32_t copy = (uint32_t)BROTLI_MIN(
1544 size_t, s->remaining_metadata_bytes_, *available_out);
1545 memcpy(*next_out, *next_in, copy);
1546 *next_in += copy;
1547 *available_in -= copy;
1548 s->total_in_ += copy; /* not actually data input, though */
1549 s->remaining_metadata_bytes_ -= copy;
1550 *next_out += copy;
1551 *available_out -= copy;
1552 } else {
1553 /* This guarantees progress in "TakeOutput" workflow. */
1554 uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
1555 s->next_out_ = s->tiny_buf_.u8;
1556 memcpy(s->next_out_, *next_in, copy);
1557 *next_in += copy;
1558 *available_in -= copy;
1559 s->total_in_ += copy; /* not actually data input, though */
1560 s->remaining_metadata_bytes_ -= copy;
1561 s->available_out_ = copy;
1562 }
1563 continue;
1564 }
1565 }
1566
1567 return BROTLI_TRUE;
1568 }
1569
UpdateSizeHint(BrotliEncoderState * s,size_t available_in)1570 static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
1571 if (s->params.size_hint == 0) {
1572 uint64_t delta = UnprocessedInputSize(s);
1573 uint64_t tail = available_in;
1574 uint32_t limit = 1u << 30;
1575 uint32_t total;
1576 if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
1577 total = limit;
1578 } else {
1579 total = (uint32_t)(delta + tail);
1580 }
1581 s->params.size_hint = total;
1582 }
1583 }
1584
BrotliEncoderCompressStream(BrotliEncoderState * s,BrotliEncoderOperation op,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1585 BROTLI_BOOL BrotliEncoderCompressStream(
1586 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1587 const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1588 size_t* total_out) {
1589 if (!EnsureInitialized(s)) return BROTLI_FALSE;
1590
1591 /* Unfinished metadata block; check requirements. */
1592 if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
1593 if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
1594 if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
1595 }
1596
1597 if (op == BROTLI_OPERATION_EMIT_METADATA) {
1598 UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */
1599 return ProcessMetadata(
1600 s, available_in, next_in, available_out, next_out, total_out);
1601 }
1602
1603 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
1604 s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
1605 return BROTLI_FALSE;
1606 }
1607
1608 if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
1609 return BROTLI_FALSE;
1610 }
1611 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1612 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1613 return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
1614 available_out, next_out, total_out);
1615 }
1616 while (BROTLI_TRUE) {
1617 size_t remaining_block_size = RemainingInputBlockSize(s);
1618 /* Shorten input to flint size. */
1619 if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
1620 remaining_block_size = (size_t)s->flint_;
1621 }
1622
1623 if (remaining_block_size != 0 && *available_in != 0) {
1624 size_t copy_input_size =
1625 BROTLI_MIN(size_t, remaining_block_size, *available_in);
1626 CopyInputToRingBuffer(s, copy_input_size, *next_in);
1627 *next_in += copy_input_size;
1628 *available_in -= copy_input_size;
1629 s->total_in_ += copy_input_size;
1630 if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
1631 continue;
1632 }
1633
1634 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1635 /* Exit the "emit flint" workflow. */
1636 if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
1637 CheckFlushComplete(s);
1638 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1639 s->flint_ = BROTLI_FLINT_DONE;
1640 }
1641 }
1642 continue;
1643 }
1644
1645 /* Compress data only when internal output buffer is empty, stream is not
1646 finished and there is no pending flush request. */
1647 if (s->available_out_ == 0 &&
1648 s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1649 if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
1650 BROTLI_BOOL is_last = TO_BROTLI_BOOL(
1651 (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
1652 BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
1653 (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
1654 BROTLI_BOOL result;
1655 /* Force emitting (uncompressed) piece containing flint. */
1656 if (!is_last && s->flint_ == 0) {
1657 s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
1658 force_flush = BROTLI_TRUE;
1659 }
1660 UpdateSizeHint(s, *available_in);
1661 result = EncodeData(s, is_last, force_flush,
1662 &s->available_out_, &s->next_out_);
1663 if (!result) return BROTLI_FALSE;
1664 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1665 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1666 continue;
1667 }
1668 }
1669 break;
1670 }
1671 CheckFlushComplete(s);
1672 return BROTLI_TRUE;
1673 }
1674
BrotliEncoderIsFinished(BrotliEncoderState * s)1675 BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
1676 return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
1677 !BrotliEncoderHasMoreOutput(s));
1678 }
1679
BrotliEncoderHasMoreOutput(BrotliEncoderState * s)1680 BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
1681 return TO_BROTLI_BOOL(s->available_out_ != 0);
1682 }
1683
BrotliEncoderTakeOutput(BrotliEncoderState * s,size_t * size)1684 const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
1685 size_t consumed_size = s->available_out_;
1686 uint8_t* result = s->next_out_;
1687 if (*size) {
1688 consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
1689 }
1690 if (consumed_size) {
1691 s->next_out_ += consumed_size;
1692 s->available_out_ -= consumed_size;
1693 s->total_out_ += consumed_size;
1694 CheckFlushComplete(s);
1695 *size = consumed_size;
1696 } else {
1697 *size = 0;
1698 result = 0;
1699 }
1700 return result;
1701 }
1702
BrotliEncoderVersion(void)1703 uint32_t BrotliEncoderVersion(void) {
1704 return BROTLI_VERSION;
1705 }
1706
BrotliEncoderPrepareDictionary(BrotliSharedDictionaryType type,size_t size,const uint8_t data[BROTLI_ARRAY_PARAM (size)],int quality,brotli_alloc_func alloc_func,brotli_free_func free_func,void * opaque)1707 BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary(
1708 BrotliSharedDictionaryType type, size_t size,
1709 const uint8_t data[BROTLI_ARRAY_PARAM(size)], int quality,
1710 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
1711 ManagedDictionary* managed_dictionary = NULL;
1712 BROTLI_BOOL type_is_known = BROTLI_FALSE;
1713 type_is_known |= (type == BROTLI_SHARED_DICTIONARY_RAW);
1714 #if defined(BROTLI_EXPERIMENTAL)
1715 type_is_known |= (type == BROTLI_SHARED_DICTIONARY_SERIALIZED);
1716 #endif /* BROTLI_EXPERIMENTAL */
1717 if (!type_is_known) {
1718 return NULL;
1719 }
1720 managed_dictionary =
1721 BrotliCreateManagedDictionary(alloc_func, free_func, opaque);
1722 if (managed_dictionary == NULL) {
1723 return NULL;
1724 }
1725 if (type == BROTLI_SHARED_DICTIONARY_RAW) {
1726 managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary(
1727 &managed_dictionary->memory_manager_, data, size);
1728 }
1729 #if defined(BROTLI_EXPERIMENTAL)
1730 if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
1731 SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate(
1732 &managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary));
1733 managed_dictionary->dictionary = (uint32_t*)dict;
1734 if (dict != NULL) {
1735 BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary(
1736 &managed_dictionary->memory_manager_, data, size, quality, dict);
1737 if (!ok) {
1738 BrotliFree(&managed_dictionary->memory_manager_, dict);
1739 managed_dictionary->dictionary = NULL;
1740 }
1741 }
1742 }
1743 #else /* BROTLI_EXPERIMENTAL */
1744 (void)quality;
1745 #endif /* BROTLI_EXPERIMENTAL */
1746 if (managed_dictionary->dictionary == NULL) {
1747 BrotliDestroyManagedDictionary(managed_dictionary);
1748 return NULL;
1749 }
1750 return (BrotliEncoderPreparedDictionary*)managed_dictionary;
1751 }
1752
BrotliEncoderDestroyPreparedDictionary(BrotliEncoderPreparedDictionary * dictionary)1753 void BrotliEncoderDestroyPreparedDictionary(
1754 BrotliEncoderPreparedDictionary* dictionary) {
1755 ManagedDictionary* dict = (ManagedDictionary*)dictionary;
1756 if (!dictionary) return;
1757 /* First field of dictionary structs. */
1758 /* Only managed dictionaries are eligible for destruction by this method. */
1759 if (dict->magic != kManagedDictionaryMagic) {
1760 return;
1761 }
1762 if (dict->dictionary == NULL) {
1763 /* This should never ever happen. */
1764 } else if (*dict->dictionary == kLeanPreparedDictionaryMagic) {
1765 DestroyPreparedDictionary(
1766 &dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
1767 } else if (*dict->dictionary == kSharedDictionaryMagic) {
1768 BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_,
1769 (SharedEncoderDictionary*)dict->dictionary);
1770 BrotliFree(&dict->memory_manager_, dict->dictionary);
1771 } else {
1772 /* There is also kPreparedDictionaryMagic, but such instances should be
1773 * constructed and destroyed by different means. */
1774 }
1775 dict->dictionary = NULL;
1776 BrotliDestroyManagedDictionary(dict);
1777 }
1778
BrotliEncoderAttachPreparedDictionary(BrotliEncoderState * state,const BrotliEncoderPreparedDictionary * dictionary)1779 BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
1780 const BrotliEncoderPreparedDictionary* dictionary) {
1781 /* First field of dictionary structs */
1782 const BrotliEncoderPreparedDictionary* dict = dictionary;
1783 uint32_t magic = *((const uint32_t*)dict);
1784 SharedEncoderDictionary* current = NULL;
1785 if (magic == kManagedDictionaryMagic) {
1786 /* Unwrap managed dictionary. */
1787 ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict;
1788 magic = *managed_dictionary->dictionary;
1789 dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
1790 }
1791 current = &state->params.dictionary;
1792 if (magic == kPreparedDictionaryMagic ||
1793 magic == kLeanPreparedDictionaryMagic) {
1794 const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
1795 if (!AttachPreparedDictionary(¤t->compound, prepared)) {
1796 return BROTLI_FALSE;
1797 }
1798 } else if (magic == kSharedDictionaryMagic) {
1799 const SharedEncoderDictionary* attached =
1800 (const SharedEncoderDictionary*)dict;
1801 BROTLI_BOOL was_default = !current->contextual.context_based &&
1802 current->contextual.num_dictionaries == 1 &&
1803 current->contextual.dict[0]->hash_table_words ==
1804 kStaticDictionaryHashWords &&
1805 current->contextual.dict[0]->hash_table_lengths ==
1806 kStaticDictionaryHashLengths;
1807 BROTLI_BOOL new_default = !attached->contextual.context_based &&
1808 attached->contextual.num_dictionaries == 1 &&
1809 attached->contextual.dict[0]->hash_table_words ==
1810 kStaticDictionaryHashWords &&
1811 attached->contextual.dict[0]->hash_table_lengths ==
1812 kStaticDictionaryHashLengths;
1813 size_t i;
1814 if (state->is_initialized_) return BROTLI_FALSE;
1815 current->max_quality =
1816 BROTLI_MIN(int, current->max_quality, attached->max_quality);
1817 for (i = 0; i < attached->compound.num_chunks; i++) {
1818 if (!AttachPreparedDictionary(¤t->compound,
1819 attached->compound.chunks[i])) {
1820 return BROTLI_FALSE;
1821 }
1822 }
1823 if (!new_default) {
1824 if (!was_default) return BROTLI_FALSE;
1825 /* Copy by value, but then set num_instances_ to 0 because their memory
1826 is managed by attached, not by current */
1827 current->contextual = attached->contextual;
1828 current->contextual.num_instances_ = 0;
1829 }
1830 } else {
1831 return BROTLI_FALSE;
1832 }
1833 return BROTLI_TRUE;
1834 }
1835
BrotliEncoderEstimatePeakMemoryUsage(int quality,int lgwin,size_t input_size)1836 size_t BrotliEncoderEstimatePeakMemoryUsage(int quality, int lgwin,
1837 size_t input_size) {
1838 BrotliEncoderParams params;
1839 size_t memory_manager_slots = BROTLI_ENCODER_MEMORY_MANAGER_SLOTS;
1840 size_t memory_manager_size = memory_manager_slots * sizeof(void*);
1841 BrotliEncoderInitParams(¶ms);
1842 params.quality = quality;
1843 params.lgwin = lgwin;
1844 params.size_hint = input_size;
1845 params.large_window = lgwin > BROTLI_MAX_WINDOW_BITS;
1846 SanitizeParams(¶ms);
1847 params.lgblock = ComputeLgBlock(¶ms);
1848 ChooseHasher(¶ms, ¶ms.hasher);
1849 if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1850 params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1851 size_t state_size = sizeof(BrotliEncoderState);
1852 size_t block_size = BROTLI_MIN(size_t, input_size, (1ul << params.lgwin));
1853 size_t hash_table_size =
1854 HashTableSize(MaxHashTableSize(params.quality), block_size);
1855 size_t hash_size =
1856 (hash_table_size < (1u << 10)) ? 0 : sizeof(int) * hash_table_size;
1857 size_t cmdbuf_size = params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY ?
1858 5 * BROTLI_MIN(size_t, block_size, 1ul << 17) : 0;
1859 if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1860 state_size += sizeof(BrotliOnePassArena);
1861 } else {
1862 state_size += sizeof(BrotliTwoPassArena);
1863 }
1864 return hash_size + cmdbuf_size + state_size;
1865 } else {
1866 size_t short_ringbuffer_size = (size_t)1 << params.lgblock;
1867 int ringbuffer_bits = ComputeRbBits(¶ms);
1868 size_t ringbuffer_size = input_size < short_ringbuffer_size ?
1869 input_size : (1u << ringbuffer_bits) + short_ringbuffer_size;
1870 size_t hash_size[4] = {0};
1871 size_t metablock_size =
1872 BROTLI_MIN(size_t, input_size, MaxMetablockSize(¶ms));
1873 size_t inputblock_size =
1874 BROTLI_MIN(size_t, input_size, (size_t)1 << params.lgblock);
1875 size_t cmdbuf_size = metablock_size * 2 + inputblock_size * 6;
1876 size_t outbuf_size = metablock_size * 2 + 503;
1877 size_t histogram_size = 0;
1878 HasherSize(¶ms, BROTLI_TRUE, input_size, hash_size);
1879 if (params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
1880 cmdbuf_size = BROTLI_MIN(size_t, cmdbuf_size,
1881 MAX_NUM_DELAYED_SYMBOLS * sizeof(Command) + inputblock_size * 12);
1882 }
1883 if (params.quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
1884 /* Only a very rough estimation, based on enwik8. */
1885 histogram_size = 200 << 20;
1886 } else if (params.quality >= MIN_QUALITY_FOR_BLOCK_SPLIT) {
1887 size_t literal_histograms =
1888 BROTLI_MIN(size_t, metablock_size / 6144, 256);
1889 size_t command_histograms =
1890 BROTLI_MIN(size_t, metablock_size / 6144, 256);
1891 size_t distance_histograms =
1892 BROTLI_MIN(size_t, metablock_size / 6144, 256);
1893 histogram_size = literal_histograms * sizeof(HistogramLiteral) +
1894 command_histograms * sizeof(HistogramCommand) +
1895 distance_histograms * sizeof(HistogramDistance);
1896 }
1897 return (memory_manager_size + ringbuffer_size +
1898 hash_size[0] + hash_size[1] + hash_size[2] + hash_size[3] +
1899 cmdbuf_size +
1900 outbuf_size +
1901 histogram_size);
1902 }
1903 }
BrotliEncoderGetPreparedDictionarySize(const BrotliEncoderPreparedDictionary * prepared_dictionary)1904 size_t BrotliEncoderGetPreparedDictionarySize(
1905 const BrotliEncoderPreparedDictionary* prepared_dictionary) {
1906 /* First field of dictionary structs */
1907 const BrotliEncoderPreparedDictionary* prepared = prepared_dictionary;
1908 uint32_t magic = *((const uint32_t*)prepared);
1909 size_t overhead = 0;
1910 if (magic == kManagedDictionaryMagic) {
1911 const ManagedDictionary* managed = (const ManagedDictionary*)prepared;
1912 overhead = sizeof(ManagedDictionary);
1913 magic = *managed->dictionary;
1914 prepared = (const BrotliEncoderPreparedDictionary*)managed->dictionary;
1915 }
1916
1917 if (magic == kPreparedDictionaryMagic) {
1918 const PreparedDictionary* dictionary =
1919 (const PreparedDictionary*)prepared;
1920 /* Keep in sync with step 3 of CreatePreparedDictionary */
1921 return sizeof(PreparedDictionary) + dictionary->source_size +
1922 (sizeof(uint32_t) << dictionary->slot_bits) +
1923 (sizeof(uint16_t) << dictionary->bucket_bits) +
1924 (sizeof(uint32_t) * dictionary->num_items) + overhead;
1925 } else if (magic == kLeanPreparedDictionaryMagic) {
1926 const PreparedDictionary* dictionary =
1927 (const PreparedDictionary*)prepared;
1928 /* Keep in sync with step 3 of CreatePreparedDictionary */
1929 return sizeof(PreparedDictionary) + sizeof(uint8_t*) +
1930 (sizeof(uint32_t) << dictionary->slot_bits) +
1931 (sizeof(uint16_t) << dictionary->bucket_bits) +
1932 (sizeof(uint32_t) * dictionary->num_items) + overhead;
1933 } else if (magic == kSharedDictionaryMagic) {
1934 const SharedEncoderDictionary* dictionary =
1935 (const SharedEncoderDictionary*)prepared;
1936 const CompoundDictionary* compound = &dictionary->compound;
1937 const ContextualEncoderDictionary* contextual = &dictionary->contextual;
1938 size_t result = sizeof(*dictionary);
1939 size_t i;
1940 size_t num_instances;
1941 const BrotliEncoderDictionary* instances;
1942 for (i = 0; i < compound->num_prepared_instances_; i++) {
1943 size_t size = BrotliEncoderGetPreparedDictionarySize(
1944 (const BrotliEncoderPreparedDictionary*)
1945 compound->prepared_instances_[i]);
1946 if (!size) return 0; /* error */
1947 result += size;
1948 }
1949 if (contextual->context_based) {
1950 num_instances = contextual->num_instances_;
1951 instances = contextual->instances_;
1952 result += sizeof(*instances) * num_instances;
1953 } else {
1954 num_instances = 1;
1955 instances = &contextual->instance_;
1956 }
1957 for (i = 0; i < num_instances; i++) {
1958 const BrotliEncoderDictionary* dict = &instances[i];
1959 result += dict->trie.pool_capacity * sizeof(BrotliTrieNode);
1960 if (dict->hash_table_data_words_) {
1961 result += sizeof(kStaticDictionaryHashWords);
1962 }
1963 if (dict->hash_table_data_lengths_) {
1964 result += sizeof(kStaticDictionaryHashLengths);
1965 }
1966 if (dict->buckets_data_) {
1967 result += sizeof(*dict->buckets_data_) * dict->buckets_alloc_size_;
1968 }
1969 if (dict->dict_words_data_) {
1970 result += sizeof(*dict->dict_words) * dict->dict_words_alloc_size_;
1971 }
1972 if (dict->words_instance_) {
1973 result += sizeof(*dict->words_instance_);
1974 /* data_size not added here: it is never allocated by the
1975 SharedEncoderDictionary, instead it always points to the file
1976 already loaded in memory. So if the caller wants to include
1977 this memory as well, add the size of the loaded dictionary
1978 file to this. */
1979 }
1980 }
1981 return result + overhead;
1982 }
1983 return 0; /* error */
1984 }
1985
1986 #if defined(BROTLI_TEST)
1987 size_t MakeUncompressedStreamForTest(const uint8_t*, size_t, uint8_t*);
MakeUncompressedStreamForTest(const uint8_t * input,size_t input_size,uint8_t * output)1988 size_t MakeUncompressedStreamForTest(
1989 const uint8_t* input, size_t input_size, uint8_t* output) {
1990 return MakeUncompressedStream(input, input_size, output);
1991 }
1992 #endif
1993
1994 #if defined(__cplusplus) || defined(c_plusplus)
1995 } /* extern "C" */
1996 #endif
1997