• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Implementation of Brotli compressor. */
8 
9 #include <brotli/encode.h>
10 #include <brotli/shared_dictionary.h>
11 #include <brotli/types.h>
12 
13 #include <stdlib.h>  /* free, malloc */
14 #include <string.h>  /* memcpy, memset */
15 
16 #include "../common/constants.h"
17 #include "../common/context.h"
18 #include "../common/platform.h"
19 #include "../common/version.h"
20 #include "backward_references.h"
21 #include "backward_references_hq.h"
22 #include "bit_cost.h"
23 #include "brotli_bit_stream.h"
24 #include "compress_fragment.h"
25 #include "compress_fragment_two_pass.h"
26 #include "dictionary_hash.h"
27 #include "encoder_dict.h"
28 #include "entropy_encode.h"
29 #include "fast_log.h"
30 #include "hash.h"
31 #include "histogram.h"
32 #include "memory.h"
33 #include "metablock.h"
34 #include "prefix.h"
35 #include "state.h"
36 #include "quality.h"
37 #include "ringbuffer.h"
38 #include "utf8_util.h"
39 #include "write_bits.h"
40 
41 #if defined(__cplusplus) || defined(c_plusplus)
42 extern "C" {
43 #endif
44 
45 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
46 
InputBlockSize(BrotliEncoderState * s)47 static size_t InputBlockSize(BrotliEncoderState* s) {
48   return (size_t)1 << s->params.lgblock;
49 }
50 
UnprocessedInputSize(BrotliEncoderState * s)51 static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
52   return s->input_pos_ - s->last_processed_pos_;
53 }
54 
RemainingInputBlockSize(BrotliEncoderState * s)55 static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
56   const uint64_t delta = UnprocessedInputSize(s);
57   size_t block_size = InputBlockSize(s);
58   if (delta >= block_size) return 0;
59   return block_size - (size_t)delta;
60 }
61 
BrotliEncoderSetParameter(BrotliEncoderState * state,BrotliEncoderParameter p,uint32_t value)62 BROTLI_BOOL BrotliEncoderSetParameter(
63     BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
64   /* Changing parameters on the fly is not implemented yet. */
65   if (state->is_initialized_) return BROTLI_FALSE;
66   /* TODO(eustas): Validate/clamp parameters here. */
67   switch (p) {
68     case BROTLI_PARAM_MODE:
69       state->params.mode = (BrotliEncoderMode)value;
70       return BROTLI_TRUE;
71 
72     case BROTLI_PARAM_QUALITY:
73       state->params.quality = (int)value;
74       return BROTLI_TRUE;
75 
76     case BROTLI_PARAM_LGWIN:
77       state->params.lgwin = (int)value;
78       return BROTLI_TRUE;
79 
80     case BROTLI_PARAM_LGBLOCK:
81       state->params.lgblock = (int)value;
82       return BROTLI_TRUE;
83 
84     case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
85       if ((value != 0) && (value != 1)) return BROTLI_FALSE;
86       state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
87       return BROTLI_TRUE;
88 
89     case BROTLI_PARAM_SIZE_HINT:
90       state->params.size_hint = value;
91       return BROTLI_TRUE;
92 
93     case BROTLI_PARAM_LARGE_WINDOW:
94       state->params.large_window = TO_BROTLI_BOOL(!!value);
95       return BROTLI_TRUE;
96 
97     case BROTLI_PARAM_NPOSTFIX:
98       state->params.dist.distance_postfix_bits = value;
99       return BROTLI_TRUE;
100 
101     case BROTLI_PARAM_NDIRECT:
102       state->params.dist.num_direct_distance_codes = value;
103       return BROTLI_TRUE;
104 
105     case BROTLI_PARAM_STREAM_OFFSET:
106       if (value > (1u << 30)) return BROTLI_FALSE;
107       state->params.stream_offset = value;
108       return BROTLI_TRUE;
109 
110     default: return BROTLI_FALSE;
111   }
112 }
113 
114 /* Wraps 64-bit input position to 32-bit ring-buffer position preserving
115    "not-a-first-lap" feature. */
WrapPosition(uint64_t position)116 static uint32_t WrapPosition(uint64_t position) {
117   uint32_t result = (uint32_t)position;
118   uint64_t gb = position >> 30;
119   if (gb > 2) {
120     /* Wrap every 2GiB; The first 3GB are continuous. */
121     result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
122   }
123   return result;
124 }
125 
GetBrotliStorage(BrotliEncoderState * s,size_t size)126 static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
127   MemoryManager* m = &s->memory_manager_;
128   if (s->storage_size_ < size) {
129     BROTLI_FREE(m, s->storage_);
130     s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
131     if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
132     s->storage_size_ = size;
133   }
134   return s->storage_;
135 }
136 
HashTableSize(size_t max_table_size,size_t input_size)137 static size_t HashTableSize(size_t max_table_size, size_t input_size) {
138   size_t htsize = 256;
139   while (htsize < max_table_size && htsize < input_size) {
140     htsize <<= 1;
141   }
142   return htsize;
143 }
144 
GetHashTable(BrotliEncoderState * s,int quality,size_t input_size,size_t * table_size)145 static int* GetHashTable(BrotliEncoderState* s, int quality,
146                          size_t input_size, size_t* table_size) {
147   /* Use smaller hash table when input.size() is smaller, since we
148      fill the table, incurring O(hash table size) overhead for
149      compression, and if the input is short, we won't need that
150      many hash table entries anyway. */
151   MemoryManager* m = &s->memory_manager_;
152   const size_t max_table_size = MaxHashTableSize(quality);
153   size_t htsize = HashTableSize(max_table_size, input_size);
154   int* table;
155   BROTLI_DCHECK(max_table_size >= 256);
156   if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
157     /* Only odd shifts are supported by fast-one-pass. */
158     if ((htsize & 0xAAAAA) == 0) {
159       htsize <<= 1;
160     }
161   }
162 
163   if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
164     table = s->small_table_;
165   } else {
166     if (htsize > s->large_table_size_) {
167       s->large_table_size_ = htsize;
168       BROTLI_FREE(m, s->large_table_);
169       s->large_table_ = BROTLI_ALLOC(m, int, htsize);
170       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
171     }
172     table = s->large_table_;
173   }
174 
175   *table_size = htsize;
176   memset(table, 0, htsize * sizeof(*table));
177   return table;
178 }
179 
EncodeWindowBits(int lgwin,BROTLI_BOOL large_window,uint16_t * last_bytes,uint8_t * last_bytes_bits)180 static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
181     uint16_t* last_bytes, uint8_t* last_bytes_bits) {
182   if (large_window) {
183     *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
184     *last_bytes_bits = 14;
185   } else {
186     if (lgwin == 16) {
187       *last_bytes = 0;
188       *last_bytes_bits = 1;
189     } else if (lgwin == 17) {
190       *last_bytes = 1;
191       *last_bytes_bits = 7;
192     } else if (lgwin > 17) {
193       *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
194       *last_bytes_bits = 4;
195     } else {
196       *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
197       *last_bytes_bits = 7;
198     }
199   }
200 }
201 
202 /* TODO(eustas): move to compress_fragment.c? */
203 /* Initializes the command and distance prefix codes for the first block. */
InitCommandPrefixCodes(BrotliOnePassArena * s)204 static void InitCommandPrefixCodes(BrotliOnePassArena* s) {
205   static const uint8_t kDefaultCommandDepths[128] = {
206     0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
207     0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
208     7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
209     7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
210     5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211     6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
212     4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
213     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
214   };
215   static const uint16_t kDefaultCommandBits[128] = {
216     0,   0,   8,   9,   3,  35,   7,   71,
217     39, 103,  23,  47, 175, 111, 239,   31,
218     0,   0,   0,   4,  12,   2,  10,    6,
219     13,  29,  11,  43,  27,  59,  87,   55,
220     15,  79, 319, 831, 191, 703, 447,  959,
221     0,  14,   1,  25,   5,  21,  19,   51,
222     119, 159,  95, 223, 479, 991,  63,  575,
223     127, 639, 383, 895, 255, 767, 511, 1023,
224     14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225     27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
226     2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
227     767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
228   };
229   static const uint8_t kDefaultCommandCode[] = {
230     0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
231     0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
232     0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
233     0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
234     0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
235   };
236   static const size_t kDefaultCommandCodeNumBits = 448;
237   COPY_ARRAY(s->cmd_depth, kDefaultCommandDepths);
238   COPY_ARRAY(s->cmd_bits, kDefaultCommandBits);
239 
240   /* Initialize the pre-compressed form of the command and distance prefix
241      codes. */
242   COPY_ARRAY(s->cmd_code, kDefaultCommandCode);
243   s->cmd_code_numbits = kDefaultCommandCodeNumBits;
244 }
245 
246 /* Decide about the context map based on the ability of the prediction
247    ability of the previous byte UTF8-prefix on the next byte. The
248    prediction ability is calculated as Shannon entropy. Here we need
249    Shannon entropy instead of 'BitsEntropy' since the prefix will be
250    encoded with the remaining 6 bits of the following byte, and
251    BitsEntropy will assume that symbol to be stored alone using Huffman
252    coding. */
ChooseContextMap(int quality,uint32_t * bigram_histo,size_t * num_literal_contexts,const uint32_t ** literal_context_map)253 static void ChooseContextMap(int quality,
254                              uint32_t* bigram_histo,
255                              size_t* num_literal_contexts,
256                              const uint32_t** literal_context_map) {
257   static const uint32_t kStaticContextMapContinuation[64] = {
258     1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
259     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
261     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
262   };
263   static const uint32_t kStaticContextMapSimpleUTF8[64] = {
264     0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268   };
269 
270   uint32_t monogram_histo[3] = { 0 };
271   uint32_t two_prefix_histo[6] = { 0 };
272   size_t total;
273   size_t i;
274   size_t dummy;
275   double entropy[4];
276   for (i = 0; i < 9; ++i) {
277     monogram_histo[i % 3] += bigram_histo[i];
278     two_prefix_histo[i % 6] += bigram_histo[i];
279   }
280   entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
281   entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
282                 ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
283   entropy[3] = 0;
284   for (i = 0; i < 3; ++i) {
285     entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
286   }
287 
288   total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
289   BROTLI_DCHECK(total != 0);
290   entropy[0] = 1.0 / (double)total;
291   entropy[1] *= entropy[0];
292   entropy[2] *= entropy[0];
293   entropy[3] *= entropy[0];
294 
295   if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
296     /* 3 context models is a bit slower, don't use it at lower qualities. */
297     entropy[3] = entropy[1] * 10;
298   }
299   /* If expected savings by symbol are less than 0.2 bits, skip the
300      context modeling -- in exchange for faster decoding speed. */
301   if (entropy[1] - entropy[2] < 0.2 &&
302       entropy[1] - entropy[3] < 0.2) {
303     *num_literal_contexts = 1;
304   } else if (entropy[2] - entropy[3] < 0.02) {
305     *num_literal_contexts = 2;
306     *literal_context_map = kStaticContextMapSimpleUTF8;
307   } else {
308     *num_literal_contexts = 3;
309     *literal_context_map = kStaticContextMapContinuation;
310   }
311 }
312 
313 /* Decide if we want to use a more complex static context map containing 13
314    context values, based on the entropy reduction of histograms over the
315    first 5 bits of literals. */
ShouldUseComplexStaticContextMap(const uint8_t * input,size_t start_pos,size_t length,size_t mask,int quality,size_t size_hint,size_t * num_literal_contexts,const uint32_t ** literal_context_map,uint32_t * arena)316 static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
317     size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
318     size_t* num_literal_contexts, const uint32_t** literal_context_map,
319     uint32_t* arena) {
320   static const uint32_t kStaticContextMapComplexUTF8[64] = {
321     11, 11, 12, 12, /* 0 special */
322     0, 0, 0, 0, /* 4 lf */
323     1, 1, 9, 9, /* 8 space */
324     2, 2, 2, 2, /* !, first after space/lf and after something else. */
325     1, 1, 1, 1, /* " */
326     8, 3, 3, 3, /* % */
327     1, 1, 1, 1, /* ({[ */
328     2, 2, 2, 2, /* }]) */
329     8, 4, 4, 4, /* :; */
330     8, 7, 4, 4, /* . */
331     8, 0, 0, 0, /* > */
332     3, 3, 3, 3, /* [0..9] */
333     5, 5, 10, 5, /* [A-Z] */
334     5, 5, 10, 5,
335     6, 6, 6, 6, /* [a-z] */
336     6, 6, 6, 6,
337   };
338   BROTLI_UNUSED(quality);
339   /* Try the more complex static context map only for long data. */
340   if (size_hint < (1 << 20)) {
341     return BROTLI_FALSE;
342   } else {
343     const size_t end_pos = start_pos + length;
344     /* To make entropy calculations faster, we collect histograms
345        over the 5 most significant bits of literals. One histogram
346        without context and 13 additional histograms for each context value. */
347     uint32_t* BROTLI_RESTRICT const combined_histo = arena;
348     uint32_t* BROTLI_RESTRICT const context_histo = arena + 32;
349     uint32_t total = 0;
350     double entropy[3];
351     size_t dummy;
352     size_t i;
353     ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
354     memset(arena, 0, sizeof(arena[0]) * 32 * 14);
355     for (; start_pos + 64 <= end_pos; start_pos += 4096) {
356       const size_t stride_end_pos = start_pos + 64;
357       uint8_t prev2 = input[start_pos & mask];
358       uint8_t prev1 = input[(start_pos + 1) & mask];
359       size_t pos;
360       /* To make the analysis of the data faster we only examine 64 byte long
361          strides at every 4kB intervals. */
362       for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
363         const uint8_t literal = input[pos & mask];
364         const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
365             BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
366         ++total;
367         ++combined_histo[literal >> 3];
368         ++context_histo[(context << 5) + (literal >> 3)];
369         prev2 = prev1;
370         prev1 = literal;
371       }
372     }
373     entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
374     entropy[2] = 0;
375     for (i = 0; i < 13; ++i) {
376       entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &dummy);
377     }
378     entropy[0] = 1.0 / (double)total;
379     entropy[1] *= entropy[0];
380     entropy[2] *= entropy[0];
381     /* The triggering heuristics below were tuned by compressing the individual
382        files of the silesia corpus. If we skip this kind of context modeling
383        for not very well compressible input (i.e. entropy using context modeling
384        is 60% of maximal entropy) or if expected savings by symbol are less
385        than 0.2 bits, then in every case when it triggers, the final compression
386        ratio is improved. Note however that this heuristics might be too strict
387        for some cases and could be tuned further. */
388     if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
389       return BROTLI_FALSE;
390     } else {
391       *num_literal_contexts = 13;
392       *literal_context_map = kStaticContextMapComplexUTF8;
393       return BROTLI_TRUE;
394     }
395   }
396 }
397 
DecideOverLiteralContextModeling(const uint8_t * input,size_t start_pos,size_t length,size_t mask,int quality,size_t size_hint,size_t * num_literal_contexts,const uint32_t ** literal_context_map,uint32_t * arena)398 static void DecideOverLiteralContextModeling(const uint8_t* input,
399     size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
400     size_t* num_literal_contexts, const uint32_t** literal_context_map,
401     uint32_t* arena) {
402   if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
403     return;
404   } else if (ShouldUseComplexStaticContextMap(
405       input, start_pos, length, mask, quality, size_hint,
406       num_literal_contexts, literal_context_map, arena)) {
407     /* Context map was already set, nothing else to do. */
408   } else {
409     /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
410        UTF8 data faster we only examine 64 byte long strides at every 4kB
411        intervals. */
412     const size_t end_pos = start_pos + length;
413     uint32_t* BROTLI_RESTRICT const bigram_prefix_histo = arena;
414     memset(bigram_prefix_histo, 0, sizeof(arena[0]) * 9);
415     for (; start_pos + 64 <= end_pos; start_pos += 4096) {
416       static const int lut[4] = { 0, 0, 1, 2 };
417       const size_t stride_end_pos = start_pos + 64;
418       int prev = lut[input[start_pos & mask] >> 6] * 3;
419       size_t pos;
420       for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
421         const uint8_t literal = input[pos & mask];
422         ++bigram_prefix_histo[prev + lut[literal >> 6]];
423         prev = lut[literal >> 6] * 3;
424       }
425     }
426     ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
427                      literal_context_map);
428   }
429 }
430 
ShouldCompress(const uint8_t * data,const size_t mask,const uint64_t last_flush_pos,const size_t bytes,const size_t num_literals,const size_t num_commands)431 static BROTLI_BOOL ShouldCompress(
432     const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
433     const size_t bytes, const size_t num_literals, const size_t num_commands) {
434   /* TODO(eustas): find more precise minimal block overhead. */
435   if (bytes <= 2) return BROTLI_FALSE;
436   if (num_commands < (bytes >> 8) + 2) {
437     if ((double)num_literals > 0.99 * (double)bytes) {
438       uint32_t literal_histo[256] = { 0 };
439       static const uint32_t kSampleRate = 13;
440       static const double kMinEntropy = 7.92;
441       const double bit_cost_threshold =
442           (double)bytes * kMinEntropy / kSampleRate;
443       size_t t = (bytes + kSampleRate - 1) / kSampleRate;
444       uint32_t pos = (uint32_t)last_flush_pos;
445       size_t i;
446       for (i = 0; i < t; i++) {
447         ++literal_histo[data[pos & mask]];
448         pos += kSampleRate;
449       }
450       if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
451         return BROTLI_FALSE;
452       }
453     }
454   }
455   return BROTLI_TRUE;
456 }
457 
458 /* Chooses the literal context mode for a metablock */
ChooseContextMode(const BrotliEncoderParams * params,const uint8_t * data,const size_t pos,const size_t mask,const size_t length)459 static ContextType ChooseContextMode(const BrotliEncoderParams* params,
460     const uint8_t* data, const size_t pos, const size_t mask,
461     const size_t length) {
462   /* We only do the computation for the option of something else than
463      CONTEXT_UTF8 for the highest qualities */
464   if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
465       !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
466     return CONTEXT_SIGNED;
467   }
468   return CONTEXT_UTF8;
469 }
470 
WriteMetaBlockInternal(MemoryManager * m,const uint8_t * data,const size_t mask,const uint64_t last_flush_pos,const size_t bytes,const BROTLI_BOOL is_last,ContextType literal_context_mode,const BrotliEncoderParams * params,const uint8_t prev_byte,const uint8_t prev_byte2,const size_t num_literals,const size_t num_commands,Command * commands,const int * saved_dist_cache,int * dist_cache,size_t * storage_ix,uint8_t * storage)471 static void WriteMetaBlockInternal(MemoryManager* m,
472                                    const uint8_t* data,
473                                    const size_t mask,
474                                    const uint64_t last_flush_pos,
475                                    const size_t bytes,
476                                    const BROTLI_BOOL is_last,
477                                    ContextType literal_context_mode,
478                                    const BrotliEncoderParams* params,
479                                    const uint8_t prev_byte,
480                                    const uint8_t prev_byte2,
481                                    const size_t num_literals,
482                                    const size_t num_commands,
483                                    Command* commands,
484                                    const int* saved_dist_cache,
485                                    int* dist_cache,
486                                    size_t* storage_ix,
487                                    uint8_t* storage) {
488   const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
489   uint16_t last_bytes;
490   uint8_t last_bytes_bits;
491   ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
492   BrotliEncoderParams block_params = *params;
493 
494   if (bytes == 0) {
495     /* Write the ISLAST and ISEMPTY bits. */
496     BrotliWriteBits(2, 3, storage_ix, storage);
497     *storage_ix = (*storage_ix + 7u) & ~7u;
498     return;
499   }
500 
501   if (!ShouldCompress(data, mask, last_flush_pos, bytes,
502                       num_literals, num_commands)) {
503     /* Restore the distance cache, as its last update by
504        CreateBackwardReferences is now unused. */
505     memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
506     BrotliStoreUncompressedMetaBlock(is_last, data,
507                                      wrapped_last_flush_pos, mask, bytes,
508                                      storage_ix, storage);
509     return;
510   }
511 
512   BROTLI_DCHECK(*storage_ix <= 14);
513   last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
514   last_bytes_bits = (uint8_t)(*storage_ix);
515   if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
516     BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
517                              bytes, mask, is_last, params,
518                              commands, num_commands,
519                              storage_ix, storage);
520     if (BROTLI_IS_OOM(m)) return;
521   } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
522     BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
523                                 bytes, mask, is_last, params,
524                                 commands, num_commands,
525                                 storage_ix, storage);
526     if (BROTLI_IS_OOM(m)) return;
527   } else {
528     MetaBlockSplit mb;
529     InitMetaBlockSplit(&mb);
530     if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
531       size_t num_literal_contexts = 1;
532       const uint32_t* literal_context_map = NULL;
533       if (!params->disable_literal_context_modeling) {
534         /* TODO(eustas): pull to higher level and reuse. */
535         uint32_t* arena = BROTLI_ALLOC(m, uint32_t, 14 * 32);
536         if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
537         DecideOverLiteralContextModeling(
538             data, wrapped_last_flush_pos, bytes, mask, params->quality,
539             params->size_hint, &num_literal_contexts,
540             &literal_context_map, arena);
541         BROTLI_FREE(m, arena);
542       }
543       BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
544           prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
545           literal_context_map, commands, num_commands, &mb);
546       if (BROTLI_IS_OOM(m)) return;
547     } else {
548       BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
549                            prev_byte, prev_byte2,
550                            commands, num_commands,
551                            literal_context_mode,
552                            &mb);
553       if (BROTLI_IS_OOM(m)) return;
554     }
555     if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
556       /* The number of distance symbols effectively used for distance
557          histograms. It might be less than distance alphabet size
558          for "Large Window Brotli" (32-bit). */
559       BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
560     }
561     BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
562                          prev_byte, prev_byte2,
563                          is_last,
564                          &block_params,
565                          literal_context_mode,
566                          commands, num_commands,
567                          &mb,
568                          storage_ix, storage);
569     if (BROTLI_IS_OOM(m)) return;
570     DestroyMetaBlockSplit(m, &mb);
571   }
572   if (bytes + 4 < (*storage_ix >> 3)) {
573     /* Restore the distance cache and last byte. */
574     memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
575     storage[0] = (uint8_t)last_bytes;
576     storage[1] = (uint8_t)(last_bytes >> 8);
577     *storage_ix = last_bytes_bits;
578     BrotliStoreUncompressedMetaBlock(is_last, data,
579                                      wrapped_last_flush_pos, mask,
580                                      bytes, storage_ix, storage);
581   }
582 }
583 
ChooseDistanceParams(BrotliEncoderParams * params)584 static void ChooseDistanceParams(BrotliEncoderParams* params) {
585   uint32_t distance_postfix_bits = 0;
586   uint32_t num_direct_distance_codes = 0;
587 
588   if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
589     uint32_t ndirect_msb;
590     if (params->mode == BROTLI_MODE_FONT) {
591       distance_postfix_bits = 1;
592       num_direct_distance_codes = 12;
593     } else {
594       distance_postfix_bits = params->dist.distance_postfix_bits;
595       num_direct_distance_codes = params->dist.num_direct_distance_codes;
596     }
597     ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
598     if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
599         num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
600         (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
601       distance_postfix_bits = 0;
602       num_direct_distance_codes = 0;
603     }
604   }
605 
606   BrotliInitDistanceParams(&params->dist, distance_postfix_bits,
607                            num_direct_distance_codes, params->large_window);
608 }
609 
EnsureInitialized(BrotliEncoderState * s)610 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
611   MemoryManager* m = &s->memory_manager_;
612   if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
613   if (s->is_initialized_) return BROTLI_TRUE;
614 
615   s->last_bytes_bits_ = 0;
616   s->last_bytes_ = 0;
617   s->flint_ = BROTLI_FLINT_DONE;
618   s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
619 
620   SanitizeParams(&s->params);
621   s->params.lgblock = ComputeLgBlock(&s->params);
622   ChooseDistanceParams(&s->params);
623 
624   if (s->params.stream_offset != 0) {
625     s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
626     /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
627     s->dist_cache_[0] = -16;
628     s->dist_cache_[1] = -16;
629     s->dist_cache_[2] = -16;
630     s->dist_cache_[3] = -16;
631     memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
632   }
633 
634   RingBufferSetup(&s->params, &s->ringbuffer_);
635 
636   /* Initialize last byte with stream header. */
637   {
638     int lgwin = s->params.lgwin;
639     if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
640         s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
641       lgwin = BROTLI_MAX(int, lgwin, 18);
642     }
643     if (s->params.stream_offset == 0) {
644       EncodeWindowBits(lgwin, s->params.large_window,
645                        &s->last_bytes_, &s->last_bytes_bits_);
646     } else {
647       /* Bigger values have the same effect, but could cause overflows. */
648       s->params.stream_offset = BROTLI_MIN(size_t,
649           s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
650     }
651   }
652 
653   if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
654     s->one_pass_arena_ = BROTLI_ALLOC(m, BrotliOnePassArena, 1);
655     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
656     InitCommandPrefixCodes(s->one_pass_arena_);
657   } else if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
658     s->two_pass_arena_ = BROTLI_ALLOC(m, BrotliTwoPassArena, 1);
659     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
660   }
661 
662   s->is_initialized_ = BROTLI_TRUE;
663   return BROTLI_TRUE;
664 }
665 
BrotliEncoderInitParams(BrotliEncoderParams * params)666 static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
667   params->mode = BROTLI_DEFAULT_MODE;
668   params->large_window = BROTLI_FALSE;
669   params->quality = BROTLI_DEFAULT_QUALITY;
670   params->lgwin = BROTLI_DEFAULT_WINDOW;
671   params->lgblock = 0;
672   params->stream_offset = 0;
673   params->size_hint = 0;
674   params->disable_literal_context_modeling = BROTLI_FALSE;
675   BrotliInitSharedEncoderDictionary(&params->dictionary);
676   params->dist.distance_postfix_bits = 0;
677   params->dist.num_direct_distance_codes = 0;
678   params->dist.alphabet_size_max =
679       BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
680   params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
681   params->dist.max_distance = BROTLI_MAX_DISTANCE;
682 }
683 
BrotliEncoderCleanupParams(MemoryManager * m,BrotliEncoderParams * params)684 static void BrotliEncoderCleanupParams(MemoryManager* m,
685     BrotliEncoderParams* params) {
686   BrotliCleanupSharedEncoderDictionary(m, &params->dictionary);
687 }
688 
BrotliEncoderInitState(BrotliEncoderState * s)689 static void BrotliEncoderInitState(BrotliEncoderState* s) {
690   BrotliEncoderInitParams(&s->params);
691   s->input_pos_ = 0;
692   s->num_commands_ = 0;
693   s->num_literals_ = 0;
694   s->last_insert_len_ = 0;
695   s->last_flush_pos_ = 0;
696   s->last_processed_pos_ = 0;
697   s->prev_byte_ = 0;
698   s->prev_byte2_ = 0;
699   s->storage_size_ = 0;
700   s->storage_ = 0;
701   HasherInit(&s->hasher_);
702   s->large_table_ = NULL;
703   s->large_table_size_ = 0;
704   s->one_pass_arena_ = NULL;
705   s->two_pass_arena_ = NULL;
706   s->command_buf_ = NULL;
707   s->literal_buf_ = NULL;
708   s->total_in_ = 0;
709   s->next_out_ = NULL;
710   s->available_out_ = 0;
711   s->total_out_ = 0;
712   s->stream_state_ = BROTLI_STREAM_PROCESSING;
713   s->is_last_block_emitted_ = BROTLI_FALSE;
714   s->is_initialized_ = BROTLI_FALSE;
715 
716   RingBufferInit(&s->ringbuffer_);
717 
718   s->commands_ = 0;
719   s->cmd_alloc_size_ = 0;
720 
721   /* Initialize distance cache. */
722   s->dist_cache_[0] = 4;
723   s->dist_cache_[1] = 11;
724   s->dist_cache_[2] = 15;
725   s->dist_cache_[3] = 16;
726   /* Save the state of the distance cache in case we need to restore it for
727      emitting an uncompressed block. */
728   memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
729 }
730 
BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,brotli_free_func free_func,void * opaque)731 BrotliEncoderState* BrotliEncoderCreateInstance(
732     brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
733   BrotliEncoderState* state = (BrotliEncoderState*)BrotliBootstrapAlloc(
734       sizeof(BrotliEncoderState), alloc_func, free_func, opaque);
735   if (state == NULL) {
736     /* BROTLI_DUMP(); */
737     return 0;
738   }
739   BrotliInitMemoryManager(
740       &state->memory_manager_, alloc_func, free_func, opaque);
741   BrotliEncoderInitState(state);
742   return state;
743 }
744 
745 #ifdef BROTLI_REPORTING
746 /* When BROTLI_REPORTING is defined extra reporting module have to be linked. */
747 void BrotliEncoderOnFinish(const BrotliEncoderState* s);
748 #define BROTLI_ENCODER_ON_FINISH(s) BrotliEncoderOnFinish(s);
749 #else
750 #if !defined(BROTLI_ENCODER_ON_FINISH)
751 #define BROTLI_ENCODER_ON_FINISH(s) (void)(s);
752 #endif
753 #endif
754 
BrotliEncoderCleanupState(BrotliEncoderState * s)755 static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
756   MemoryManager* m = &s->memory_manager_;
757 
758   BROTLI_ENCODER_ON_FINISH(s);
759 
760   if (BROTLI_IS_OOM(m)) {
761     BrotliWipeOutMemoryManager(m);
762     return;
763   }
764 
765   BROTLI_FREE(m, s->storage_);
766   BROTLI_FREE(m, s->commands_);
767   RingBufferFree(m, &s->ringbuffer_);
768   DestroyHasher(m, &s->hasher_);
769   BROTLI_FREE(m, s->large_table_);
770   BROTLI_FREE(m, s->one_pass_arena_);
771   BROTLI_FREE(m, s->two_pass_arena_);
772   BROTLI_FREE(m, s->command_buf_);
773   BROTLI_FREE(m, s->literal_buf_);
774   BrotliEncoderCleanupParams(m, &s->params);
775 }
776 
777 /* Deinitializes and frees BrotliEncoderState instance. */
BrotliEncoderDestroyInstance(BrotliEncoderState * state)778 void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
779   if (!state) {
780     return;
781   } else {
782     BrotliEncoderCleanupState(state);
783     BrotliBootstrapFree(state, &state->memory_manager_);
784   }
785 }
786 
787 /*
788    Copies the given input data to the internal ring buffer of the compressor.
789    No processing of the data occurs at this time and this function can be
790    called multiple times before calling WriteBrotliData() to process the
791    accumulated input. At most input_block_size() bytes of input data can be
792    copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
793  */
CopyInputToRingBuffer(BrotliEncoderState * s,const size_t input_size,const uint8_t * input_buffer)794 static void CopyInputToRingBuffer(BrotliEncoderState* s,
795                                   const size_t input_size,
796                                   const uint8_t* input_buffer) {
797   RingBuffer* ringbuffer_ = &s->ringbuffer_;
798   MemoryManager* m = &s->memory_manager_;
799   RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
800   if (BROTLI_IS_OOM(m)) return;
801   s->input_pos_ += input_size;
802 
803   /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
804      hashing not depend on uninitialized data. This makes compression
805      deterministic and it prevents uninitialized memory warnings in Valgrind.
806      Even without erasing, the output would be valid (but nondeterministic).
807 
808      Background information: The compressor stores short (at most 8 bytes)
809      substrings of the input already read in a hash table, and detects
810      repetitions by looking up such substrings in the hash table. If it
811      can find a substring, it checks whether the substring is really there
812      in the ring buffer (or it's just a hash collision). Should the hash
813      table become corrupt, this check makes sure that the output is
814      still valid, albeit the compression ratio would be bad.
815 
816      The compressor populates the hash table from the ring buffer as it's
817      reading new bytes from the input. However, at the last few indexes of
818      the ring buffer, there are not enough bytes to build full-length
819      substrings from. Since the hash table always contains full-length
820      substrings, we erase with dummy zeros here to make sure that those
821      substrings will contain zeros at the end instead of uninitialized
822      data.
823 
824      Please note that erasing is not necessary (because the
825      memory region is already initialized since he ring buffer
826      has a `tail' that holds a copy of the beginning,) so we
827      skip erasing if we have already gone around at least once in
828      the ring buffer.
829 
830      Only clear during the first round of ring-buffer writes. On
831      subsequent rounds data in the ring-buffer would be affected. */
832   if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
833     /* This is the first time when the ring buffer is being written.
834        We clear 7 bytes just after the bytes that have been copied from
835        the input buffer.
836 
837        The ring-buffer has a "tail" that holds a copy of the beginning,
838        but only once the ring buffer has been fully written once, i.e.,
839        pos <= mask. For the first time, we need to write values
840        in this tail (where index may be larger than mask), so that
841        we have exactly defined behavior and don't read uninitialized
842        memory. Due to performance reasons, hashing reads data using a
843        LOAD64, which can go 7 bytes beyond the bytes written in the
844        ring-buffer. */
845     memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
846   }
847 }
848 
849 /* Marks all input as processed.
850    Returns true if position wrapping occurs. */
UpdateLastProcessedPos(BrotliEncoderState * s)851 static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
852   uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
853   uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
854   s->last_processed_pos_ = s->input_pos_;
855   return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
856 }
857 
ExtendLastCommand(BrotliEncoderState * s,uint32_t * bytes,uint32_t * wrapped_last_processed_pos)858 static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
859                               uint32_t* wrapped_last_processed_pos) {
860   Command* last_command = &s->commands_[s->num_commands_ - 1];
861   const uint8_t* data = s->ringbuffer_.buffer_;
862   const uint32_t mask = s->ringbuffer_.mask_;
863   uint64_t max_backward_distance =
864       (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
865   uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
866   uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
867   uint64_t max_distance = last_processed_pos < max_backward_distance ?
868       last_processed_pos : max_backward_distance;
869   uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
870   uint32_t distance_code = CommandRestoreDistanceCode(last_command,
871                                                       &s->params.dist);
872   const CompoundDictionary* dict = &s->params.dictionary.compound;
873   size_t compound_dictionary_size = dict->total_size;
874   if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
875       distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
876     if (cmd_dist <= max_distance) {
877       while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
878              data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
879         last_command->copy_len_++;
880         (*bytes)--;
881         (*wrapped_last_processed_pos)++;
882       }
883     } else {
884       if ((cmd_dist - max_distance - 1) < compound_dictionary_size &&
885           last_copy_len < cmd_dist - max_distance) {
886         size_t address =
887             compound_dictionary_size - (size_t)(cmd_dist - max_distance) +
888             (size_t)last_copy_len;
889         size_t br_index = 0;
890         size_t br_offset;
891         const uint8_t* chunk;
892         size_t chunk_length;
893         while (address >= dict->chunk_offsets[br_index + 1]) br_index++;
894         br_offset = address - dict->chunk_offsets[br_index];
895         chunk = dict->chunk_source[br_index];
896         chunk_length =
897             dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index];
898         while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
899                chunk[br_offset]) {
900           last_command->copy_len_++;
901           (*bytes)--;
902           (*wrapped_last_processed_pos)++;
903           if (++br_offset == chunk_length) {
904             br_index++;
905             br_offset = 0;
906             if (br_index != dict->num_chunks) {
907               chunk = dict->chunk_source[br_index];
908               chunk_length = dict->chunk_offsets[br_index + 1] -
909                   dict->chunk_offsets[br_index];
910             } else {
911               break;
912             }
913           }
914         }
915       }
916     }
917     /* The copy length is at most the metablock size, and thus expressible. */
918     GetLengthCode(last_command->insert_len_,
919                   (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
920                            (int)(last_command->copy_len_ >> 25)),
921                   TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
922                   &last_command->cmd_prefix_);
923   }
924 }
925 
926 /*
927    Processes the accumulated input data and sets |*out_size| to the length of
928    the new output meta-block, or to zero if no new output meta-block has been
929    created (in this case the processed input data is buffered internally).
930    If |*out_size| is positive, |*output| points to the start of the output
931    data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
932    always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
933    to 7 bits of the last byte of output. Byte-alignment could be enforced by
934    emitting an empty meta-data block.
935    Returns BROTLI_FALSE if the size of the input data is larger than
936    input_block_size().
937  */
EncodeData(BrotliEncoderState * s,const BROTLI_BOOL is_last,const BROTLI_BOOL force_flush,size_t * out_size,uint8_t ** output)938 static BROTLI_BOOL EncodeData(
939     BrotliEncoderState* s, const BROTLI_BOOL is_last,
940     const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
941   const uint64_t delta = UnprocessedInputSize(s);
942   uint32_t bytes = (uint32_t)delta;
943   uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
944   uint8_t* data;
945   uint32_t mask;
946   MemoryManager* m = &s->memory_manager_;
947   ContextType literal_context_mode;
948   ContextLut literal_context_lut;
949   BROTLI_BOOL fast_compress =
950       s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
951       s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY;
952 
953   data = s->ringbuffer_.buffer_;
954   mask = s->ringbuffer_.mask_;
955 
956   if (delta == 0) {  /* No new input; still might want to flush or finish. */
957     if (!data) {  /* No input has been processed so far. */
958       if (is_last) {  /* Emit complete finalized stream. */
959         BROTLI_DCHECK(s->last_bytes_bits_ <= 14);
960         s->last_bytes_ |= (uint16_t)(3u << s->last_bytes_bits_);
961         s->last_bytes_bits_ = (uint8_t)(s->last_bytes_bits_ + 2u);
962         s->tiny_buf_.u8[0] = (uint8_t)s->last_bytes_;
963         s->tiny_buf_.u8[1] = (uint8_t)(s->last_bytes_ >> 8);
964         *output = s->tiny_buf_.u8;
965         *out_size = (s->last_bytes_bits_ + 7u) >> 3u;
966         return BROTLI_TRUE;
967       } else {  /* No data, not last -> no-op. */
968         *out_size = 0;
969         return BROTLI_TRUE;
970       }
971     } else {
972       /* Fast compress performs flush every block -> flush is no-op. */
973       if (!is_last && (!force_flush || fast_compress)) {  /* Another no-op. */
974         *out_size = 0;
975         return BROTLI_TRUE;
976       }
977     }
978   }
979   BROTLI_DCHECK(data);
980 
981   if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
982   /* Adding more blocks after "last" block is forbidden. */
983   if (s->is_last_block_emitted_) return BROTLI_FALSE;
984   if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
985 
986   if (delta > InputBlockSize(s)) {
987     return BROTLI_FALSE;
988   }
989   if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
990       !s->command_buf_) {
991     s->command_buf_ =
992         BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
993     s->literal_buf_ =
994         BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
995     if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
996         BROTLI_IS_NULL(s->literal_buf_)) {
997       return BROTLI_FALSE;
998     }
999   }
1000 
1001   if (fast_compress) {
1002     uint8_t* storage;
1003     size_t storage_ix = s->last_bytes_bits_;
1004     size_t table_size;
1005     int* table;
1006 
1007     storage = GetBrotliStorage(s, 2 * bytes + 503);
1008     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1009     storage[0] = (uint8_t)s->last_bytes_;
1010     storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1011     table = GetHashTable(s, s->params.quality, bytes, &table_size);
1012     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1013     if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1014       BrotliCompressFragmentFast(
1015           s->one_pass_arena_, &data[wrapped_last_processed_pos & mask],
1016           bytes, is_last,
1017           table, table_size,
1018           &storage_ix, storage);
1019       if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1020     } else {
1021       BrotliCompressFragmentTwoPass(
1022           s->two_pass_arena_, &data[wrapped_last_processed_pos & mask],
1023           bytes, is_last,
1024           s->command_buf_, s->literal_buf_,
1025           table, table_size,
1026           &storage_ix, storage);
1027       if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1028     }
1029     s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1030     s->last_bytes_bits_ = storage_ix & 7u;
1031     UpdateLastProcessedPos(s);
1032     *output = &storage[0];
1033     *out_size = storage_ix >> 3;
1034     return BROTLI_TRUE;
1035   }
1036 
1037   {
1038     /* Theoretical max number of commands is 1 per 2 bytes. */
1039     size_t newsize = s->num_commands_ + bytes / 2 + 1;
1040     if (newsize > s->cmd_alloc_size_) {
1041       Command* new_commands;
1042       /* Reserve a bit more memory to allow merging with a next block
1043          without reallocation: that would impact speed. */
1044       newsize += (bytes / 4) + 16;
1045       s->cmd_alloc_size_ = newsize;
1046       new_commands = BROTLI_ALLOC(m, Command, newsize);
1047       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
1048       if (s->commands_) {
1049         memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
1050         BROTLI_FREE(m, s->commands_);
1051       }
1052       s->commands_ = new_commands;
1053     }
1054   }
1055 
1056   InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
1057       wrapped_last_processed_pos, bytes, is_last);
1058 
1059   literal_context_mode = ChooseContextMode(
1060       &s->params, data, WrapPosition(s->last_flush_pos_),
1061       mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
1062   literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1063 
1064   if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1065 
1066   if (s->num_commands_ && s->last_insert_len_ == 0) {
1067     ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
1068   }
1069 
1070   if (s->params.quality == ZOPFLIFICATION_QUALITY) {
1071     BROTLI_DCHECK(s->params.hasher.type == 10);
1072     BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1073         data, mask, literal_context_lut, &s->params,
1074         &s->hasher_, s->dist_cache_,
1075         &s->last_insert_len_, &s->commands_[s->num_commands_],
1076         &s->num_commands_, &s->num_literals_);
1077     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1078   } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
1079     BROTLI_DCHECK(s->params.hasher.type == 10);
1080     BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1081         data, mask, literal_context_lut, &s->params,
1082         &s->hasher_, s->dist_cache_,
1083         &s->last_insert_len_, &s->commands_[s->num_commands_],
1084         &s->num_commands_, &s->num_literals_);
1085     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1086   } else {
1087     BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
1088         data, mask, literal_context_lut, &s->params,
1089         &s->hasher_, s->dist_cache_,
1090         &s->last_insert_len_, &s->commands_[s->num_commands_],
1091         &s->num_commands_, &s->num_literals_);
1092   }
1093 
1094   {
1095     const size_t max_length = MaxMetablockSize(&s->params);
1096     const size_t max_literals = max_length / 8;
1097     const size_t max_commands = max_length / 8;
1098     const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
1099     /* If maximal possible additional block doesn't fit metablock, flush now. */
1100     /* TODO(eustas): Postpone decision until next block arrives? */
1101     const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
1102         processed_bytes + InputBlockSize(s) <= max_length);
1103     /* If block splitting is not used, then flush as soon as there is some
1104        amount of commands / literals produced. */
1105     const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
1106         s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
1107         s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
1108     if (!is_last && !force_flush && !should_flush &&
1109         next_input_fits_metablock &&
1110         s->num_literals_ < max_literals &&
1111         s->num_commands_ < max_commands) {
1112       /* Merge with next input block. Everything will happen later. */
1113       if (UpdateLastProcessedPos(s)) {
1114         HasherReset(&s->hasher_);
1115       }
1116       *out_size = 0;
1117       return BROTLI_TRUE;
1118     }
1119   }
1120 
1121   /* Create the last insert-only command. */
1122   if (s->last_insert_len_ > 0) {
1123     InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
1124     s->num_literals_ += s->last_insert_len_;
1125     s->last_insert_len_ = 0;
1126   }
1127 
1128   if (!is_last && s->input_pos_ == s->last_flush_pos_) {
1129     /* We have no new input data and we don't have to finish the stream, so
1130        nothing to do. */
1131     *out_size = 0;
1132     return BROTLI_TRUE;
1133   }
1134   BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
1135   BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
1136   BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1137   {
1138     const uint32_t metablock_size =
1139         (uint32_t)(s->input_pos_ - s->last_flush_pos_);
1140     uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
1141     size_t storage_ix = s->last_bytes_bits_;
1142     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1143     storage[0] = (uint8_t)s->last_bytes_;
1144     storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1145     WriteMetaBlockInternal(
1146         m, data, mask, s->last_flush_pos_, metablock_size, is_last,
1147         literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
1148         s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
1149         s->dist_cache_, &storage_ix, storage);
1150     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1151     s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1152     s->last_bytes_bits_ = storage_ix & 7u;
1153     s->last_flush_pos_ = s->input_pos_;
1154     if (UpdateLastProcessedPos(s)) {
1155       HasherReset(&s->hasher_);
1156     }
1157     if (s->last_flush_pos_ > 0) {
1158       s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
1159     }
1160     if (s->last_flush_pos_ > 1) {
1161       s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
1162     }
1163     s->num_commands_ = 0;
1164     s->num_literals_ = 0;
1165     /* Save the state of the distance cache in case we need to restore it for
1166        emitting an uncompressed block. */
1167     memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
1168     *output = &storage[0];
1169     *out_size = storage_ix >> 3;
1170     return BROTLI_TRUE;
1171   }
1172 }
1173 
1174 /* Dumps remaining output bits and metadata header to |header|.
1175    Returns number of produced bytes.
1176    REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
1177    REQUIRED: |block_size| <= (1 << 24). */
WriteMetadataHeader(BrotliEncoderState * s,const size_t block_size,uint8_t * header)1178 static size_t WriteMetadataHeader(
1179     BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
1180   size_t storage_ix;
1181   storage_ix = s->last_bytes_bits_;
1182   header[0] = (uint8_t)s->last_bytes_;
1183   header[1] = (uint8_t)(s->last_bytes_ >> 8);
1184   s->last_bytes_ = 0;
1185   s->last_bytes_bits_ = 0;
1186 
1187   BrotliWriteBits(1, 0, &storage_ix, header);
1188   BrotliWriteBits(2, 3, &storage_ix, header);
1189   BrotliWriteBits(1, 0, &storage_ix, header);
1190   if (block_size == 0) {
1191     BrotliWriteBits(2, 0, &storage_ix, header);
1192   } else {
1193     uint32_t nbits = (block_size == 1) ? 1 :
1194         (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
1195     uint32_t nbytes = (nbits + 7) / 8;
1196     BrotliWriteBits(2, nbytes, &storage_ix, header);
1197     BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
1198   }
1199   return (storage_ix + 7u) >> 3;
1200 }
1201 
BrotliEncoderMaxCompressedSize(size_t input_size)1202 size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
1203   /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
1204   size_t num_large_blocks = input_size >> 14;
1205   size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
1206   size_t result = input_size + overhead;
1207   if (input_size == 0) return 2;
1208   return (result < input_size) ? 0 : result;
1209 }
1210 
1211 /* Wraps data to uncompressed brotli stream with minimal window size.
1212    |output| should point at region with at least BrotliEncoderMaxCompressedSize
1213    addressable bytes.
1214    Returns the length of stream. */
MakeUncompressedStream(const uint8_t * input,size_t input_size,uint8_t * output)1215 static size_t MakeUncompressedStream(
1216     const uint8_t* input, size_t input_size, uint8_t* output) {
1217   size_t size = input_size;
1218   size_t result = 0;
1219   size_t offset = 0;
1220   if (input_size == 0) {
1221     output[0] = 6;
1222     return 1;
1223   }
1224   output[result++] = 0x21;  /* window bits = 10, is_last = false */
1225   output[result++] = 0x03;  /* empty metadata, padding */
1226   while (size > 0) {
1227     uint32_t nibbles = 0;
1228     uint32_t chunk_size;
1229     uint32_t bits;
1230     chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
1231     if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
1232     bits =
1233         (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
1234     output[result++] = (uint8_t)bits;
1235     output[result++] = (uint8_t)(bits >> 8);
1236     output[result++] = (uint8_t)(bits >> 16);
1237     if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
1238     memcpy(&output[result], &input[offset], chunk_size);
1239     result += chunk_size;
1240     offset += chunk_size;
1241     size -= chunk_size;
1242   }
1243   output[result++] = 3;
1244   return result;
1245 }
1246 
BrotliEncoderCompress(int quality,int lgwin,BrotliEncoderMode mode,size_t input_size,const uint8_t input_buffer[BROTLI_ARRAY_PARAM (input_size)],size_t * encoded_size,uint8_t encoded_buffer[BROTLI_ARRAY_PARAM (* encoded_size)])1247 BROTLI_BOOL BrotliEncoderCompress(
1248     int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
1249     const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
1250     size_t* encoded_size,
1251     uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]) {
1252   BrotliEncoderState* s;
1253   size_t out_size = *encoded_size;
1254   const uint8_t* input_start = input_buffer;
1255   uint8_t* output_start = encoded_buffer;
1256   size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
1257   if (out_size == 0) {
1258     /* Output buffer needs at least one byte. */
1259     return BROTLI_FALSE;
1260   }
1261   if (input_size == 0) {
1262     /* Handle the special case of empty input. */
1263     *encoded_size = 1;
1264     *encoded_buffer = 6;
1265     return BROTLI_TRUE;
1266   }
1267 
1268   s = BrotliEncoderCreateInstance(0, 0, 0);
1269   if (!s) {
1270     return BROTLI_FALSE;
1271   } else {
1272     size_t available_in = input_size;
1273     const uint8_t* next_in = input_buffer;
1274     size_t available_out = *encoded_size;
1275     uint8_t* next_out = encoded_buffer;
1276     size_t total_out = 0;
1277     BROTLI_BOOL result = BROTLI_FALSE;
1278     /* TODO(eustas): check that parameters are sane. */
1279     BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
1280     BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
1281     BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
1282     BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
1283     if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1284       BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
1285     }
1286     result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
1287         &available_in, &next_in, &available_out, &next_out, &total_out);
1288     if (!BrotliEncoderIsFinished(s)) result = 0;
1289     *encoded_size = total_out;
1290     BrotliEncoderDestroyInstance(s);
1291     if (!result || (max_out_size && *encoded_size > max_out_size)) {
1292       goto fallback;
1293     }
1294     return BROTLI_TRUE;
1295   }
1296 fallback:
1297   *encoded_size = 0;
1298   if (!max_out_size) return BROTLI_FALSE;
1299   if (out_size >= max_out_size) {
1300     *encoded_size =
1301         MakeUncompressedStream(input_start, input_size, output_start);
1302     return BROTLI_TRUE;
1303   }
1304   return BROTLI_FALSE;
1305 }
1306 
InjectBytePaddingBlock(BrotliEncoderState * s)1307 static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1308   uint32_t seal = s->last_bytes_;
1309   size_t seal_bits = s->last_bytes_bits_;
1310   uint8_t* destination;
1311   s->last_bytes_ = 0;
1312   s->last_bytes_bits_ = 0;
1313   /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
1314   seal |= 0x6u << seal_bits;
1315   seal_bits += 6;
1316   /* If we have already created storage, then append to it.
1317      Storage is valid until next block is being compressed. */
1318   if (s->next_out_) {
1319     destination = s->next_out_ + s->available_out_;
1320   } else {
1321     destination = s->tiny_buf_.u8;
1322     s->next_out_ = destination;
1323   }
1324   destination[0] = (uint8_t)seal;
1325   if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
1326   if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
1327   s->available_out_ += (seal_bits + 7) >> 3;
1328 }
1329 
1330 /* Fills the |total_out|, if it is not NULL. */
SetTotalOut(BrotliEncoderState * s,size_t * total_out)1331 static void SetTotalOut(BrotliEncoderState* s, size_t* total_out) {
1332   if (total_out) {
1333     /* Saturating conversion uint64_t -> size_t */
1334     size_t result = (size_t)-1;
1335     if (s->total_out_ < result) {
1336       result = (size_t)s->total_out_;
1337     }
1338     *total_out = result;
1339   }
1340 }
1341 
1342 /* Injects padding bits or pushes compressed data to output.
1343    Returns false if nothing is done. */
InjectFlushOrPushOutput(BrotliEncoderState * s,size_t * available_out,uint8_t ** next_out,size_t * total_out)1344 static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
1345     size_t* available_out, uint8_t** next_out, size_t* total_out) {
1346   if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1347       s->last_bytes_bits_ != 0) {
1348     InjectBytePaddingBlock(s);
1349     return BROTLI_TRUE;
1350   }
1351 
1352   if (s->available_out_ != 0 && *available_out != 0) {
1353     size_t copy_output_size =
1354         BROTLI_MIN(size_t, s->available_out_, *available_out);
1355     memcpy(*next_out, s->next_out_, copy_output_size);
1356     *next_out += copy_output_size;
1357     *available_out -= copy_output_size;
1358     s->next_out_ += copy_output_size;
1359     s->available_out_ -= copy_output_size;
1360     s->total_out_ += copy_output_size;
1361     SetTotalOut(s, total_out);
1362     return BROTLI_TRUE;
1363   }
1364 
1365   return BROTLI_FALSE;
1366 }
1367 
CheckFlushComplete(BrotliEncoderState * s)1368 static void CheckFlushComplete(BrotliEncoderState* s) {
1369   if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1370       s->available_out_ == 0) {
1371     s->stream_state_ = BROTLI_STREAM_PROCESSING;
1372     s->next_out_ = 0;
1373   }
1374 }
1375 
BrotliEncoderCompressStreamFast(BrotliEncoderState * s,BrotliEncoderOperation op,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1376 static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1377     BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1378     const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1379     size_t* total_out) {
1380   const size_t block_size_limit = (size_t)1 << s->params.lgwin;
1381   const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
1382       BROTLI_MIN(size_t, *available_in, block_size_limit));
1383   uint32_t* tmp_command_buf = NULL;
1384   uint32_t* command_buf = NULL;
1385   uint8_t* tmp_literal_buf = NULL;
1386   uint8_t* literal_buf = NULL;
1387   MemoryManager* m = &s->memory_manager_;
1388   if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
1389       s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
1390     return BROTLI_FALSE;
1391   }
1392   if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1393     if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
1394       s->command_buf_ =
1395           BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1396       s->literal_buf_ =
1397           BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1398       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1399           BROTLI_IS_NULL(s->literal_buf_)) {
1400         return BROTLI_FALSE;
1401       }
1402     }
1403     if (s->command_buf_) {
1404       command_buf = s->command_buf_;
1405       literal_buf = s->literal_buf_;
1406     } else {
1407       tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
1408       tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
1409       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
1410           BROTLI_IS_NULL(tmp_literal_buf)) {
1411         return BROTLI_FALSE;
1412       }
1413       command_buf = tmp_command_buf;
1414       literal_buf = tmp_literal_buf;
1415     }
1416   }
1417 
1418   while (BROTLI_TRUE) {
1419     if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1420       continue;
1421     }
1422 
1423     /* Compress block only when internal output buffer is empty, stream is not
1424        finished, there is no pending flush request, and there is either
1425        additional input or pending operation. */
1426     if (s->available_out_ == 0 &&
1427         s->stream_state_ == BROTLI_STREAM_PROCESSING &&
1428         (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
1429       size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
1430       BROTLI_BOOL is_last =
1431           (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
1432       BROTLI_BOOL force_flush =
1433           (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
1434       size_t max_out_size = 2 * block_size + 503;
1435       BROTLI_BOOL inplace = BROTLI_TRUE;
1436       uint8_t* storage = NULL;
1437       size_t storage_ix = s->last_bytes_bits_;
1438       size_t table_size;
1439       int* table;
1440 
1441       if (force_flush && block_size == 0) {
1442         s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1443         continue;
1444       }
1445       if (max_out_size <= *available_out) {
1446         storage = *next_out;
1447       } else {
1448         inplace = BROTLI_FALSE;
1449         storage = GetBrotliStorage(s, max_out_size);
1450         if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1451       }
1452       storage[0] = (uint8_t)s->last_bytes_;
1453       storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1454       table = GetHashTable(s, s->params.quality, block_size, &table_size);
1455       if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1456 
1457       if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1458         BrotliCompressFragmentFast(s->one_pass_arena_, *next_in, block_size,
1459             is_last, table, table_size, &storage_ix, storage);
1460         if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1461       } else {
1462         BrotliCompressFragmentTwoPass(s->two_pass_arena_, *next_in, block_size,
1463             is_last, command_buf, literal_buf, table, table_size,
1464             &storage_ix, storage);
1465         if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1466       }
1467       if (block_size != 0) {
1468         *next_in += block_size;
1469         *available_in -= block_size;
1470         s->total_in_ += block_size;
1471       }
1472       if (inplace) {
1473         size_t out_bytes = storage_ix >> 3;
1474         BROTLI_DCHECK(out_bytes <= *available_out);
1475         BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
1476         *next_out += out_bytes;
1477         *available_out -= out_bytes;
1478         s->total_out_ += out_bytes;
1479         SetTotalOut(s, total_out);
1480       } else {
1481         size_t out_bytes = storage_ix >> 3;
1482         s->next_out_ = storage;
1483         s->available_out_ = out_bytes;
1484       }
1485       s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1486       s->last_bytes_bits_ = storage_ix & 7u;
1487 
1488       if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1489       if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1490       continue;
1491     }
1492     break;
1493   }
1494   BROTLI_FREE(m, tmp_command_buf);
1495   BROTLI_FREE(m, tmp_literal_buf);
1496   CheckFlushComplete(s);
1497   return BROTLI_TRUE;
1498 }
1499 
ProcessMetadata(BrotliEncoderState * s,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1500 static BROTLI_BOOL ProcessMetadata(
1501     BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
1502     size_t* available_out, uint8_t** next_out, size_t* total_out) {
1503   if (*available_in > (1u << 24)) return BROTLI_FALSE;
1504   /* Switch to metadata block workflow, if required. */
1505   if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1506     s->remaining_metadata_bytes_ = (uint32_t)*available_in;
1507     s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
1508   }
1509   if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
1510       s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
1511     return BROTLI_FALSE;
1512   }
1513 
1514   while (BROTLI_TRUE) {
1515     if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1516       continue;
1517     }
1518     if (s->available_out_ != 0) break;
1519 
1520     if (s->input_pos_ != s->last_flush_pos_) {
1521       BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
1522           &s->available_out_, &s->next_out_);
1523       if (!result) return BROTLI_FALSE;
1524       continue;
1525     }
1526 
1527     if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
1528       s->next_out_ = s->tiny_buf_.u8;
1529       s->available_out_ =
1530           WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
1531       s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
1532       continue;
1533     } else {
1534       /* Exit workflow only when there is no more input and no more output.
1535          Otherwise client may continue producing empty metadata blocks. */
1536       if (s->remaining_metadata_bytes_ == 0) {
1537         s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
1538         s->stream_state_ = BROTLI_STREAM_PROCESSING;
1539         break;
1540       }
1541       if (*available_out) {
1542         /* Directly copy input to output. */
1543         uint32_t copy = (uint32_t)BROTLI_MIN(
1544             size_t, s->remaining_metadata_bytes_, *available_out);
1545         memcpy(*next_out, *next_in, copy);
1546         *next_in += copy;
1547         *available_in -= copy;
1548         s->total_in_ += copy;  /* not actually data input, though */
1549         s->remaining_metadata_bytes_ -= copy;
1550         *next_out += copy;
1551         *available_out -= copy;
1552       } else {
1553         /* This guarantees progress in "TakeOutput" workflow. */
1554         uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
1555         s->next_out_ = s->tiny_buf_.u8;
1556         memcpy(s->next_out_, *next_in, copy);
1557         *next_in += copy;
1558         *available_in -= copy;
1559         s->total_in_ += copy;  /* not actually data input, though */
1560         s->remaining_metadata_bytes_ -= copy;
1561         s->available_out_ = copy;
1562       }
1563       continue;
1564     }
1565   }
1566 
1567   return BROTLI_TRUE;
1568 }
1569 
UpdateSizeHint(BrotliEncoderState * s,size_t available_in)1570 static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
1571   if (s->params.size_hint == 0) {
1572     uint64_t delta = UnprocessedInputSize(s);
1573     uint64_t tail = available_in;
1574     uint32_t limit = 1u << 30;
1575     uint32_t total;
1576     if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
1577       total = limit;
1578     } else {
1579       total = (uint32_t)(delta + tail);
1580     }
1581     s->params.size_hint = total;
1582   }
1583 }
1584 
BrotliEncoderCompressStream(BrotliEncoderState * s,BrotliEncoderOperation op,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1585 BROTLI_BOOL BrotliEncoderCompressStream(
1586     BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1587     const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1588     size_t* total_out) {
1589   if (!EnsureInitialized(s)) return BROTLI_FALSE;
1590 
1591   /* Unfinished metadata block; check requirements. */
1592   if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
1593     if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
1594     if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
1595   }
1596 
1597   if (op == BROTLI_OPERATION_EMIT_METADATA) {
1598     UpdateSizeHint(s, 0);  /* First data metablock might be emitted here. */
1599     return ProcessMetadata(
1600         s, available_in, next_in, available_out, next_out, total_out);
1601   }
1602 
1603   if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
1604       s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
1605     return BROTLI_FALSE;
1606   }
1607 
1608   if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
1609     return BROTLI_FALSE;
1610   }
1611   if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1612       s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1613     return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
1614         available_out, next_out, total_out);
1615   }
1616   while (BROTLI_TRUE) {
1617     size_t remaining_block_size = RemainingInputBlockSize(s);
1618     /* Shorten input to flint size. */
1619     if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
1620       remaining_block_size = (size_t)s->flint_;
1621     }
1622 
1623     if (remaining_block_size != 0 && *available_in != 0) {
1624       size_t copy_input_size =
1625           BROTLI_MIN(size_t, remaining_block_size, *available_in);
1626       CopyInputToRingBuffer(s, copy_input_size, *next_in);
1627       *next_in += copy_input_size;
1628       *available_in -= copy_input_size;
1629       s->total_in_ += copy_input_size;
1630       if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
1631       continue;
1632     }
1633 
1634     if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1635       /* Exit the "emit flint" workflow. */
1636       if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
1637         CheckFlushComplete(s);
1638         if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1639           s->flint_ = BROTLI_FLINT_DONE;
1640         }
1641       }
1642       continue;
1643     }
1644 
1645     /* Compress data only when internal output buffer is empty, stream is not
1646        finished and there is no pending flush request. */
1647     if (s->available_out_ == 0 &&
1648         s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1649       if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
1650         BROTLI_BOOL is_last = TO_BROTLI_BOOL(
1651             (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
1652         BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
1653             (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
1654         BROTLI_BOOL result;
1655         /* Force emitting (uncompressed) piece containing flint. */
1656         if (!is_last && s->flint_ == 0) {
1657           s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
1658           force_flush = BROTLI_TRUE;
1659         }
1660         UpdateSizeHint(s, *available_in);
1661         result = EncodeData(s, is_last, force_flush,
1662             &s->available_out_, &s->next_out_);
1663         if (!result) return BROTLI_FALSE;
1664         if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1665         if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1666         continue;
1667       }
1668     }
1669     break;
1670   }
1671   CheckFlushComplete(s);
1672   return BROTLI_TRUE;
1673 }
1674 
BrotliEncoderIsFinished(BrotliEncoderState * s)1675 BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
1676   return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
1677       !BrotliEncoderHasMoreOutput(s));
1678 }
1679 
BrotliEncoderHasMoreOutput(BrotliEncoderState * s)1680 BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
1681   return TO_BROTLI_BOOL(s->available_out_ != 0);
1682 }
1683 
BrotliEncoderTakeOutput(BrotliEncoderState * s,size_t * size)1684 const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
1685   size_t consumed_size = s->available_out_;
1686   uint8_t* result = s->next_out_;
1687   if (*size) {
1688     consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
1689   }
1690   if (consumed_size) {
1691     s->next_out_ += consumed_size;
1692     s->available_out_ -= consumed_size;
1693     s->total_out_ += consumed_size;
1694     CheckFlushComplete(s);
1695     *size = consumed_size;
1696   } else {
1697     *size = 0;
1698     result = 0;
1699   }
1700   return result;
1701 }
1702 
BrotliEncoderVersion(void)1703 uint32_t BrotliEncoderVersion(void) {
1704   return BROTLI_VERSION;
1705 }
1706 
BrotliEncoderPrepareDictionary(BrotliSharedDictionaryType type,size_t size,const uint8_t data[BROTLI_ARRAY_PARAM (size)],int quality,brotli_alloc_func alloc_func,brotli_free_func free_func,void * opaque)1707 BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary(
1708     BrotliSharedDictionaryType type, size_t size,
1709     const uint8_t data[BROTLI_ARRAY_PARAM(size)], int quality,
1710     brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
1711   ManagedDictionary* managed_dictionary = NULL;
1712   BROTLI_BOOL type_is_known = BROTLI_FALSE;
1713   type_is_known |= (type == BROTLI_SHARED_DICTIONARY_RAW);
1714 #if defined(BROTLI_EXPERIMENTAL)
1715   type_is_known |= (type == BROTLI_SHARED_DICTIONARY_SERIALIZED);
1716 #endif  /* BROTLI_EXPERIMENTAL */
1717   if (!type_is_known) {
1718     return NULL;
1719   }
1720   managed_dictionary =
1721       BrotliCreateManagedDictionary(alloc_func, free_func, opaque);
1722   if (managed_dictionary == NULL) {
1723     return NULL;
1724   }
1725   if (type == BROTLI_SHARED_DICTIONARY_RAW) {
1726     managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary(
1727         &managed_dictionary->memory_manager_, data, size);
1728   }
1729 #if defined(BROTLI_EXPERIMENTAL)
1730   if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
1731     SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate(
1732         &managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary));
1733     managed_dictionary->dictionary = (uint32_t*)dict;
1734     if (dict != NULL) {
1735       BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary(
1736           &managed_dictionary->memory_manager_, data, size, quality, dict);
1737       if (!ok) {
1738         BrotliFree(&managed_dictionary->memory_manager_, dict);
1739         managed_dictionary->dictionary = NULL;
1740       }
1741     }
1742   }
1743 #else  /* BROTLI_EXPERIMENTAL */
1744   (void)quality;
1745 #endif  /* BROTLI_EXPERIMENTAL */
1746   if (managed_dictionary->dictionary == NULL) {
1747     BrotliDestroyManagedDictionary(managed_dictionary);
1748     return NULL;
1749   }
1750   return (BrotliEncoderPreparedDictionary*)managed_dictionary;
1751 }
1752 
BrotliEncoderDestroyPreparedDictionary(BrotliEncoderPreparedDictionary * dictionary)1753 void BrotliEncoderDestroyPreparedDictionary(
1754     BrotliEncoderPreparedDictionary* dictionary) {
1755   ManagedDictionary* dict = (ManagedDictionary*)dictionary;
1756   if (!dictionary) return;
1757   /* First field of dictionary structs. */
1758   /* Only managed dictionaries are eligible for destruction by this method. */
1759   if (dict->magic != kManagedDictionaryMagic) {
1760     return;
1761   }
1762   if (dict->dictionary == NULL) {
1763     /* This should never ever happen. */
1764   } else if (*dict->dictionary == kLeanPreparedDictionaryMagic) {
1765     DestroyPreparedDictionary(
1766         &dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
1767   } else if (*dict->dictionary == kSharedDictionaryMagic) {
1768     BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_,
1769         (SharedEncoderDictionary*)dict->dictionary);
1770     BrotliFree(&dict->memory_manager_, dict->dictionary);
1771   } else {
1772     /* There is also kPreparedDictionaryMagic, but such instances should be
1773      * constructed and destroyed by different means. */
1774   }
1775   dict->dictionary = NULL;
1776   BrotliDestroyManagedDictionary(dict);
1777 }
1778 
BrotliEncoderAttachPreparedDictionary(BrotliEncoderState * state,const BrotliEncoderPreparedDictionary * dictionary)1779 BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
1780     const BrotliEncoderPreparedDictionary* dictionary) {
1781   /* First field of dictionary structs */
1782   const BrotliEncoderPreparedDictionary* dict = dictionary;
1783   uint32_t magic = *((const uint32_t*)dict);
1784   SharedEncoderDictionary* current = NULL;
1785   if (magic == kManagedDictionaryMagic) {
1786     /* Unwrap managed dictionary. */
1787     ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict;
1788     magic = *managed_dictionary->dictionary;
1789     dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
1790   }
1791   current = &state->params.dictionary;
1792   if (magic == kPreparedDictionaryMagic ||
1793       magic == kLeanPreparedDictionaryMagic) {
1794     const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
1795     if (!AttachPreparedDictionary(&current->compound, prepared)) {
1796       return BROTLI_FALSE;
1797     }
1798   } else if (magic == kSharedDictionaryMagic) {
1799     const SharedEncoderDictionary* attached =
1800         (const SharedEncoderDictionary*)dict;
1801     BROTLI_BOOL was_default = !current->contextual.context_based &&
1802         current->contextual.num_dictionaries == 1 &&
1803         current->contextual.dict[0]->hash_table_words ==
1804         kStaticDictionaryHashWords &&
1805         current->contextual.dict[0]->hash_table_lengths ==
1806         kStaticDictionaryHashLengths;
1807     BROTLI_BOOL new_default = !attached->contextual.context_based &&
1808         attached->contextual.num_dictionaries == 1 &&
1809         attached->contextual.dict[0]->hash_table_words ==
1810         kStaticDictionaryHashWords &&
1811         attached->contextual.dict[0]->hash_table_lengths ==
1812         kStaticDictionaryHashLengths;
1813     size_t i;
1814     if (state->is_initialized_) return BROTLI_FALSE;
1815     current->max_quality =
1816         BROTLI_MIN(int, current->max_quality, attached->max_quality);
1817     for (i = 0; i < attached->compound.num_chunks; i++) {
1818       if (!AttachPreparedDictionary(&current->compound,
1819           attached->compound.chunks[i])) {
1820         return BROTLI_FALSE;
1821       }
1822     }
1823     if (!new_default) {
1824       if (!was_default) return BROTLI_FALSE;
1825       /* Copy by value, but then set num_instances_ to 0 because their memory
1826       is managed by attached, not by current */
1827       current->contextual = attached->contextual;
1828       current->contextual.num_instances_ = 0;
1829     }
1830   } else {
1831     return BROTLI_FALSE;
1832   }
1833   return BROTLI_TRUE;
1834 }
1835 
BrotliEncoderEstimatePeakMemoryUsage(int quality,int lgwin,size_t input_size)1836 size_t BrotliEncoderEstimatePeakMemoryUsage(int quality, int lgwin,
1837                                             size_t input_size) {
1838   BrotliEncoderParams params;
1839   size_t memory_manager_slots = BROTLI_ENCODER_MEMORY_MANAGER_SLOTS;
1840   size_t memory_manager_size = memory_manager_slots * sizeof(void*);
1841   BrotliEncoderInitParams(&params);
1842   params.quality = quality;
1843   params.lgwin = lgwin;
1844   params.size_hint = input_size;
1845   params.large_window = lgwin > BROTLI_MAX_WINDOW_BITS;
1846   SanitizeParams(&params);
1847   params.lgblock = ComputeLgBlock(&params);
1848   ChooseHasher(&params, &params.hasher);
1849   if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1850       params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1851     size_t state_size = sizeof(BrotliEncoderState);
1852     size_t block_size = BROTLI_MIN(size_t, input_size, (1ul << params.lgwin));
1853     size_t hash_table_size =
1854         HashTableSize(MaxHashTableSize(params.quality), block_size);
1855     size_t hash_size =
1856         (hash_table_size < (1u << 10)) ? 0 : sizeof(int) * hash_table_size;
1857     size_t cmdbuf_size = params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY ?
1858         5 * BROTLI_MIN(size_t, block_size, 1ul << 17) : 0;
1859     if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1860       state_size += sizeof(BrotliOnePassArena);
1861     } else {
1862       state_size += sizeof(BrotliTwoPassArena);
1863     }
1864     return hash_size + cmdbuf_size + state_size;
1865   } else {
1866     size_t short_ringbuffer_size = (size_t)1 << params.lgblock;
1867     int ringbuffer_bits = ComputeRbBits(&params);
1868     size_t ringbuffer_size = input_size < short_ringbuffer_size ?
1869         input_size : (1u << ringbuffer_bits) + short_ringbuffer_size;
1870     size_t hash_size[4] = {0};
1871     size_t metablock_size =
1872         BROTLI_MIN(size_t, input_size, MaxMetablockSize(&params));
1873     size_t inputblock_size =
1874         BROTLI_MIN(size_t, input_size, (size_t)1 << params.lgblock);
1875     size_t cmdbuf_size = metablock_size * 2 + inputblock_size * 6;
1876     size_t outbuf_size = metablock_size * 2 + 503;
1877     size_t histogram_size = 0;
1878     HasherSize(&params, BROTLI_TRUE, input_size, hash_size);
1879     if (params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
1880       cmdbuf_size = BROTLI_MIN(size_t, cmdbuf_size,
1881           MAX_NUM_DELAYED_SYMBOLS * sizeof(Command) + inputblock_size * 12);
1882     }
1883     if (params.quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
1884       /* Only a very rough estimation, based on enwik8. */
1885       histogram_size = 200 << 20;
1886     } else if (params.quality >= MIN_QUALITY_FOR_BLOCK_SPLIT) {
1887       size_t literal_histograms =
1888           BROTLI_MIN(size_t, metablock_size / 6144, 256);
1889       size_t command_histograms =
1890           BROTLI_MIN(size_t, metablock_size / 6144, 256);
1891       size_t distance_histograms =
1892           BROTLI_MIN(size_t, metablock_size / 6144, 256);
1893       histogram_size = literal_histograms * sizeof(HistogramLiteral) +
1894                        command_histograms * sizeof(HistogramCommand) +
1895                        distance_histograms * sizeof(HistogramDistance);
1896     }
1897     return (memory_manager_size + ringbuffer_size +
1898             hash_size[0] + hash_size[1] + hash_size[2] + hash_size[3] +
1899             cmdbuf_size +
1900             outbuf_size +
1901             histogram_size);
1902   }
1903 }
BrotliEncoderGetPreparedDictionarySize(const BrotliEncoderPreparedDictionary * prepared_dictionary)1904 size_t BrotliEncoderGetPreparedDictionarySize(
1905     const BrotliEncoderPreparedDictionary* prepared_dictionary) {
1906   /* First field of dictionary structs */
1907   const BrotliEncoderPreparedDictionary* prepared = prepared_dictionary;
1908   uint32_t magic = *((const uint32_t*)prepared);
1909   size_t overhead = 0;
1910   if (magic == kManagedDictionaryMagic) {
1911     const ManagedDictionary* managed = (const ManagedDictionary*)prepared;
1912     overhead = sizeof(ManagedDictionary);
1913     magic = *managed->dictionary;
1914     prepared = (const BrotliEncoderPreparedDictionary*)managed->dictionary;
1915   }
1916 
1917   if (magic == kPreparedDictionaryMagic) {
1918     const PreparedDictionary* dictionary =
1919         (const PreparedDictionary*)prepared;
1920     /* Keep in sync with step 3 of CreatePreparedDictionary */
1921     return sizeof(PreparedDictionary) + dictionary->source_size +
1922         (sizeof(uint32_t) << dictionary->slot_bits) +
1923         (sizeof(uint16_t) << dictionary->bucket_bits) +
1924         (sizeof(uint32_t) * dictionary->num_items) + overhead;
1925   } else if (magic == kLeanPreparedDictionaryMagic) {
1926     const PreparedDictionary* dictionary =
1927         (const PreparedDictionary*)prepared;
1928     /* Keep in sync with step 3 of CreatePreparedDictionary */
1929     return sizeof(PreparedDictionary) + sizeof(uint8_t*) +
1930         (sizeof(uint32_t) << dictionary->slot_bits) +
1931         (sizeof(uint16_t) << dictionary->bucket_bits) +
1932         (sizeof(uint32_t) * dictionary->num_items) + overhead;
1933   } else if (magic == kSharedDictionaryMagic) {
1934     const SharedEncoderDictionary* dictionary =
1935         (const SharedEncoderDictionary*)prepared;
1936     const CompoundDictionary* compound = &dictionary->compound;
1937     const ContextualEncoderDictionary* contextual = &dictionary->contextual;
1938     size_t result = sizeof(*dictionary);
1939     size_t i;
1940     size_t num_instances;
1941     const BrotliEncoderDictionary* instances;
1942     for (i = 0; i < compound->num_prepared_instances_; i++) {
1943       size_t size = BrotliEncoderGetPreparedDictionarySize(
1944           (const BrotliEncoderPreparedDictionary*)
1945           compound->prepared_instances_[i]);
1946       if (!size) return 0;  /* error */
1947       result += size;
1948     }
1949     if (contextual->context_based) {
1950       num_instances = contextual->num_instances_;
1951       instances = contextual->instances_;
1952       result += sizeof(*instances) * num_instances;
1953     } else {
1954       num_instances = 1;
1955       instances = &contextual->instance_;
1956     }
1957     for (i = 0; i < num_instances; i++) {
1958       const BrotliEncoderDictionary* dict = &instances[i];
1959       result += dict->trie.pool_capacity * sizeof(BrotliTrieNode);
1960       if (dict->hash_table_data_words_) {
1961         result += sizeof(kStaticDictionaryHashWords);
1962       }
1963       if (dict->hash_table_data_lengths_) {
1964         result += sizeof(kStaticDictionaryHashLengths);
1965       }
1966       if (dict->buckets_data_) {
1967         result += sizeof(*dict->buckets_data_) * dict->buckets_alloc_size_;
1968       }
1969       if (dict->dict_words_data_) {
1970         result += sizeof(*dict->dict_words) * dict->dict_words_alloc_size_;
1971       }
1972       if (dict->words_instance_) {
1973         result += sizeof(*dict->words_instance_);
1974         /* data_size not added here: it is never allocated by the
1975            SharedEncoderDictionary, instead it always points to the file
1976            already loaded in memory. So if the caller wants to include
1977            this memory as well, add the size of the loaded dictionary
1978            file to this. */
1979       }
1980     }
1981     return result + overhead;
1982   }
1983   return 0;  /* error */
1984 }
1985 
1986 #if defined(BROTLI_TEST)
1987 size_t MakeUncompressedStreamForTest(const uint8_t*, size_t, uint8_t*);
MakeUncompressedStreamForTest(const uint8_t * input,size_t input_size,uint8_t * output)1988 size_t MakeUncompressedStreamForTest(
1989     const uint8_t* input, size_t input_size, uint8_t* output) {
1990   return MakeUncompressedStream(input, input_size, output);
1991 }
1992 #endif
1993 
1994 #if defined(__cplusplus) || defined(c_plusplus)
1995 }  /* extern "C" */
1996 #endif
1997