• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Function for fast encoding of an input fragment, independently from the input
8    history. This function uses two-pass processing: in the first pass we save
9    the found backward matches and literal bytes into a buffer, and in the
10    second pass we emit them into the bit stream using prefix codes built based
11    on the actual command and literal byte histograms. */
12 
13 #include "compress_fragment_two_pass.h"
14 
15 #include <string.h>  /* memcmp, memcpy, memset */
16 
17 #include <brotli/types.h>
18 
19 #include "../common/constants.h"
20 #include "../common/platform.h"
21 #include "bit_cost.h"
22 #include "brotli_bit_stream.h"
23 #include "entropy_encode.h"
24 #include "fast_log.h"
25 #include "find_match_length.h"
26 #include "write_bits.h"
27 
28 #if defined(__cplusplus) || defined(c_plusplus)
29 extern "C" {
30 #endif
31 
32 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
33 
34 /* kHashMul32 multiplier has these properties:
35    * The multiplier must be odd. Otherwise we may lose the highest bit.
36    * No long streaks of ones or zeros.
37    * There is no effort to ensure that it is a prime, the oddity is enough
38      for this use.
39    * The number has been tuned heuristically against compression benchmarks. */
40 static const uint32_t kHashMul32 = 0x1E35A7BD;
41 
Hash(const uint8_t * p,size_t shift,size_t length)42 static BROTLI_INLINE uint32_t Hash(const uint8_t* p,
43     size_t shift, size_t length) {
44   const uint64_t h =
45       (BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32;
46   return (uint32_t)(h >> shift);
47 }
48 
HashBytesAtOffset(uint64_t v,size_t offset,size_t shift,size_t length)49 static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset,
50     size_t shift, size_t length) {
51   BROTLI_DCHECK(offset <= 8 - length);
52   {
53     const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32;
54     return (uint32_t)(h >> shift);
55   }
56 }
57 
IsMatch(const uint8_t * p1,const uint8_t * p2,size_t length)58 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2,
59     size_t length) {
60   if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) {
61     if (length == 4) return BROTLI_TRUE;
62     return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]);
63   }
64   return BROTLI_FALSE;
65 }
66 
67 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
68    "bits" based on "histogram" and stores it into the bit stream. */
BuildAndStoreCommandPrefixCode(BrotliTwoPassArena * s,size_t * storage_ix,uint8_t * storage)69 static void BuildAndStoreCommandPrefixCode(BrotliTwoPassArena* s,
70                                            size_t* storage_ix,
71                                            uint8_t* storage) {
72   /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
73   /* TODO(eustas): initialize once. */
74   memset(s->tmp_depth, 0, sizeof(s->tmp_depth));
75   BrotliCreateHuffmanTree(s->cmd_histo, 64, 15, s->tmp_tree, s->cmd_depth);
76   BrotliCreateHuffmanTree(&s->cmd_histo[64], 64, 14, s->tmp_tree,
77                           &s->cmd_depth[64]);
78   /* We have to jump through a few hoops here in order to compute
79      the command bits because the symbols are in a different order than in
80      the full alphabet. This looks complicated, but having the symbols
81      in this order in the command bits saves a few branches in the Emit*
82      functions. */
83   memcpy(s->tmp_depth, s->cmd_depth + 24, 24);
84   memcpy(s->tmp_depth + 24, s->cmd_depth, 8);
85   memcpy(s->tmp_depth + 32, s->cmd_depth + 48, 8);
86   memcpy(s->tmp_depth + 40, s->cmd_depth + 8, 8);
87   memcpy(s->tmp_depth + 48, s->cmd_depth + 56, 8);
88   memcpy(s->tmp_depth + 56, s->cmd_depth + 16, 8);
89   BrotliConvertBitDepthsToSymbols(s->tmp_depth, 64, s->tmp_bits);
90   memcpy(s->cmd_bits, s->tmp_bits + 24, 16);
91   memcpy(s->cmd_bits + 8, s->tmp_bits + 40, 16);
92   memcpy(s->cmd_bits + 16, s->tmp_bits + 56, 16);
93   memcpy(s->cmd_bits + 24, s->tmp_bits, 48);
94   memcpy(s->cmd_bits + 48, s->tmp_bits + 32, 16);
95   memcpy(s->cmd_bits + 56, s->tmp_bits + 48, 16);
96   BrotliConvertBitDepthsToSymbols(&s->cmd_depth[64], 64, &s->cmd_bits[64]);
97   {
98     /* Create the bit length array for the full command alphabet. */
99     size_t i;
100     memset(s->tmp_depth, 0, 64); /* only 64 first values were used */
101     memcpy(s->tmp_depth, s->cmd_depth + 24, 8);
102     memcpy(s->tmp_depth + 64, s->cmd_depth + 32, 8);
103     memcpy(s->tmp_depth + 128, s->cmd_depth + 40, 8);
104     memcpy(s->tmp_depth + 192, s->cmd_depth + 48, 8);
105     memcpy(s->tmp_depth + 384, s->cmd_depth + 56, 8);
106     for (i = 0; i < 8; ++i) {
107       s->tmp_depth[128 + 8 * i] = s->cmd_depth[i];
108       s->tmp_depth[256 + 8 * i] = s->cmd_depth[8 + i];
109       s->tmp_depth[448 + 8 * i] = s->cmd_depth[16 + i];
110     }
111     BrotliStoreHuffmanTree(s->tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS,
112                            s->tmp_tree, storage_ix, storage);
113   }
114   BrotliStoreHuffmanTree(&s->cmd_depth[64], 64, s->tmp_tree, storage_ix,
115                          storage);
116 }
117 
EmitInsertLen(uint32_t insertlen,uint32_t ** commands)118 static BROTLI_INLINE void EmitInsertLen(
119     uint32_t insertlen, uint32_t** commands) {
120   if (insertlen < 6) {
121     **commands = insertlen;
122   } else if (insertlen < 130) {
123     const uint32_t tail = insertlen - 2;
124     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
125     const uint32_t prefix = tail >> nbits;
126     const uint32_t inscode = (nbits << 1) + prefix + 2;
127     const uint32_t extra = tail - (prefix << nbits);
128     **commands = inscode | (extra << 8);
129   } else if (insertlen < 2114) {
130     const uint32_t tail = insertlen - 66;
131     const uint32_t nbits = Log2FloorNonZero(tail);
132     const uint32_t code = nbits + 10;
133     const uint32_t extra = tail - (1u << nbits);
134     **commands = code | (extra << 8);
135   } else if (insertlen < 6210) {
136     const uint32_t extra = insertlen - 2114;
137     **commands = 21 | (extra << 8);
138   } else if (insertlen < 22594) {
139     const uint32_t extra = insertlen - 6210;
140     **commands = 22 | (extra << 8);
141   } else {
142     const uint32_t extra = insertlen - 22594;
143     **commands = 23 | (extra << 8);
144   }
145   ++(*commands);
146 }
147 
EmitCopyLen(size_t copylen,uint32_t ** commands)148 static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
149   if (copylen < 10) {
150     **commands = (uint32_t)(copylen + 38);
151   } else if (copylen < 134) {
152     const size_t tail = copylen - 6;
153     const size_t nbits = Log2FloorNonZero(tail) - 1;
154     const size_t prefix = tail >> nbits;
155     const size_t code = (nbits << 1) + prefix + 44;
156     const size_t extra = tail - (prefix << nbits);
157     **commands = (uint32_t)(code | (extra << 8));
158   } else if (copylen < 2118) {
159     const size_t tail = copylen - 70;
160     const size_t nbits = Log2FloorNonZero(tail);
161     const size_t code = nbits + 52;
162     const size_t extra = tail - ((size_t)1 << nbits);
163     **commands = (uint32_t)(code | (extra << 8));
164   } else {
165     const size_t extra = copylen - 2118;
166     **commands = (uint32_t)(63 | (extra << 8));
167   }
168   ++(*commands);
169 }
170 
EmitCopyLenLastDistance(size_t copylen,uint32_t ** commands)171 static BROTLI_INLINE void EmitCopyLenLastDistance(
172     size_t copylen, uint32_t** commands) {
173   if (copylen < 12) {
174     **commands = (uint32_t)(copylen + 20);
175     ++(*commands);
176   } else if (copylen < 72) {
177     const size_t tail = copylen - 8;
178     const size_t nbits = Log2FloorNonZero(tail) - 1;
179     const size_t prefix = tail >> nbits;
180     const size_t code = (nbits << 1) + prefix + 28;
181     const size_t extra = tail - (prefix << nbits);
182     **commands = (uint32_t)(code | (extra << 8));
183     ++(*commands);
184   } else if (copylen < 136) {
185     const size_t tail = copylen - 8;
186     const size_t code = (tail >> 5) + 54;
187     const size_t extra = tail & 31;
188     **commands = (uint32_t)(code | (extra << 8));
189     ++(*commands);
190     **commands = 64;
191     ++(*commands);
192   } else if (copylen < 2120) {
193     const size_t tail = copylen - 72;
194     const size_t nbits = Log2FloorNonZero(tail);
195     const size_t code = nbits + 52;
196     const size_t extra = tail - ((size_t)1 << nbits);
197     **commands = (uint32_t)(code | (extra << 8));
198     ++(*commands);
199     **commands = 64;
200     ++(*commands);
201   } else {
202     const size_t extra = copylen - 2120;
203     **commands = (uint32_t)(63 | (extra << 8));
204     ++(*commands);
205     **commands = 64;
206     ++(*commands);
207   }
208 }
209 
EmitDistance(uint32_t distance,uint32_t ** commands)210 static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
211   uint32_t d = distance + 3;
212   uint32_t nbits = Log2FloorNonZero(d) - 1;
213   const uint32_t prefix = (d >> nbits) & 1;
214   const uint32_t offset = (2 + prefix) << nbits;
215   const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
216   uint32_t extra = d - offset;
217   **commands = distcode | (extra << 8);
218   ++(*commands);
219 }
220 
221 /* REQUIRES: len <= 1 << 24. */
BrotliStoreMetaBlockHeader(size_t len,BROTLI_BOOL is_uncompressed,size_t * storage_ix,uint8_t * storage)222 static void BrotliStoreMetaBlockHeader(
223     size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
224     uint8_t* storage) {
225   size_t nibbles = 6;
226   /* ISLAST */
227   BrotliWriteBits(1, 0, storage_ix, storage);
228   if (len <= (1U << 16)) {
229     nibbles = 4;
230   } else if (len <= (1U << 20)) {
231     nibbles = 5;
232   }
233   BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
234   BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
235   /* ISUNCOMPRESSED */
236   BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
237 }
238 
CreateCommands(const uint8_t * input,size_t block_size,size_t input_size,const uint8_t * base_ip,int * table,size_t table_bits,size_t min_match,uint8_t ** literals,uint32_t ** commands)239 static BROTLI_INLINE void CreateCommands(const uint8_t* input,
240     size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
241     size_t table_bits, size_t min_match,
242     uint8_t** literals, uint32_t** commands) {
243   /* "ip" is the input pointer. */
244   const uint8_t* ip = input;
245   const size_t shift = 64u - table_bits;
246   const uint8_t* ip_end = input + block_size;
247   /* "next_emit" is a pointer to the first byte that is not covered by a
248      previous copy. Bytes between "next_emit" and the start of the next copy or
249      the end of the input will be emitted as literal bytes. */
250   const uint8_t* next_emit = input;
251 
252   int last_distance = -1;
253   const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
254 
255   if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
256     /* For the last block, we need to keep a 16 bytes margin so that we can be
257        sure that all distances are at most window size - 16.
258        For all other blocks, we only need to keep a margin of 5 bytes so that
259        we don't go over the block size with a copy. */
260     const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match,
261                                         input_size - kInputMarginBytes);
262     const uint8_t* ip_limit = input + len_limit;
263 
264     uint32_t next_hash;
265     for (next_hash = Hash(++ip, shift, min_match); ; ) {
266       /* Step 1: Scan forward in the input looking for a 6-byte-long match.
267          If we get close to exhausting the input then goto emit_remainder.
268 
269          Heuristic match skipping: If 32 bytes are scanned with no matches
270          found, start looking only at every other byte. If 32 more bytes are
271          scanned, look at every third byte, etc.. When a match is found,
272          immediately go back to looking at every byte. This is a small loss
273          (~5% performance, ~0.1% density) for compressible data due to more
274          bookkeeping, but for non-compressible data (such as JPEG) it's a huge
275          win since the compressor quickly "realizes" the data is incompressible
276          and doesn't bother looking for matches everywhere.
277 
278          The "skip" variable keeps track of how many bytes there are since the
279          last match; dividing it by 32 (ie. right-shifting by five) gives the
280          number of bytes to move ahead for each iteration. */
281       uint32_t skip = 32;
282 
283       const uint8_t* next_ip = ip;
284       const uint8_t* candidate;
285 
286       BROTLI_DCHECK(next_emit < ip);
287 trawl:
288       do {
289         uint32_t hash = next_hash;
290         uint32_t bytes_between_hash_lookups = skip++ >> 5;
291         ip = next_ip;
292         BROTLI_DCHECK(hash == Hash(ip, shift, min_match));
293         next_ip = ip + bytes_between_hash_lookups;
294         if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
295           goto emit_remainder;
296         }
297         next_hash = Hash(next_ip, shift, min_match);
298         candidate = ip - last_distance;
299         if (IsMatch(ip, candidate, min_match)) {
300           if (BROTLI_PREDICT_TRUE(candidate < ip)) {
301             table[hash] = (int)(ip - base_ip);
302             break;
303           }
304         }
305         candidate = base_ip + table[hash];
306         BROTLI_DCHECK(candidate >= base_ip);
307         BROTLI_DCHECK(candidate < ip);
308 
309         table[hash] = (int)(ip - base_ip);
310       } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match)));
311 
312       /* Check copy distance. If candidate is not feasible, continue search.
313          Checking is done outside of hot loop to reduce overhead. */
314       if (ip - candidate > MAX_DISTANCE) goto trawl;
315 
316       /* Step 2: Emit the found match together with the literal bytes from
317          "next_emit", and then see if we can find a next match immediately
318          afterwards. Repeat until we find no match for the input
319          without emitting some literal bytes. */
320 
321       {
322         /* We have a 6-byte match at ip, and we need to emit bytes in
323            [next_emit, ip). */
324         const uint8_t* base = ip;
325         size_t matched = min_match + FindMatchLengthWithLimit(
326             candidate + min_match, ip + min_match,
327             (size_t)(ip_end - ip) - min_match);
328         int distance = (int)(base - candidate);  /* > 0 */
329         int insert = (int)(base - next_emit);
330         ip += matched;
331         BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
332         EmitInsertLen((uint32_t)insert, commands);
333         BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
334                     (int)(next_emit - base_ip), insert, 2));
335         memcpy(*literals, next_emit, (size_t)insert);
336         *literals += insert;
337         if (distance == last_distance) {
338           **commands = 64;
339           ++(*commands);
340         } else {
341           EmitDistance((uint32_t)distance, commands);
342           last_distance = distance;
343         }
344         EmitCopyLenLastDistance(matched, commands);
345         BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
346                     "[CompressFragment] pos = %d insert = %d copy = %d\n"
347                     "[CompressFragment] pos = %d distance = %d\n",
348                     (int)(base - base_ip), (int)distance,
349                     (int)(base - base_ip) + 2, 0, (int)matched - 2,
350                     (int)(base - base_ip) + 2, (int)distance));
351 
352         next_emit = ip;
353         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
354           goto emit_remainder;
355         }
356         {
357           /* We could immediately start working at ip now, but to improve
358              compression we first update "table" with the hashes of some
359              positions within the last copy. */
360           uint64_t input_bytes;
361           uint32_t cur_hash;
362           uint32_t prev_hash;
363           if (min_match == 4) {
364             input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
365             cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
366             prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
367             table[prev_hash] = (int)(ip - base_ip - 3);
368             prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
369             table[prev_hash] = (int)(ip - base_ip - 2);
370             prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
371             table[prev_hash] = (int)(ip - base_ip - 1);
372           } else {
373             input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
374             prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
375             table[prev_hash] = (int)(ip - base_ip - 5);
376             prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
377             table[prev_hash] = (int)(ip - base_ip - 4);
378             prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
379             table[prev_hash] = (int)(ip - base_ip - 3);
380             input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
381             cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
382             prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
383             table[prev_hash] = (int)(ip - base_ip - 2);
384             prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
385             table[prev_hash] = (int)(ip - base_ip - 1);
386           }
387 
388           candidate = base_ip + table[cur_hash];
389           table[cur_hash] = (int)(ip - base_ip);
390         }
391       }
392 
393       while (ip - candidate <= MAX_DISTANCE &&
394           IsMatch(ip, candidate, min_match)) {
395         /* We have a 6-byte match at ip, and no need to emit any
396            literal bytes prior to ip. */
397         const uint8_t* base = ip;
398         size_t matched = min_match + FindMatchLengthWithLimit(
399             candidate + min_match, ip + min_match,
400             (size_t)(ip_end - ip) - min_match);
401         ip += matched;
402         last_distance = (int)(base - candidate);  /* > 0 */
403         BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
404         EmitCopyLen(matched, commands);
405         EmitDistance((uint32_t)last_distance, commands);
406         BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
407                     "[CompressFragment] pos = %d distance = %d\n",
408                     (int)(base - base_ip), 0, (int)matched,
409                     (int)(base - base_ip), (int)last_distance));
410 
411         next_emit = ip;
412         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
413           goto emit_remainder;
414         }
415         {
416           /* We could immediately start working at ip now, but to improve
417              compression we first update "table" with the hashes of some
418              positions within the last copy. */
419           uint64_t input_bytes;
420           uint32_t cur_hash;
421           uint32_t prev_hash;
422           if (min_match == 4) {
423             input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
424             cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
425             prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
426             table[prev_hash] = (int)(ip - base_ip - 3);
427             prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
428             table[prev_hash] = (int)(ip - base_ip - 2);
429             prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
430             table[prev_hash] = (int)(ip - base_ip - 1);
431           } else {
432             input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
433             prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
434             table[prev_hash] = (int)(ip - base_ip - 5);
435             prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
436             table[prev_hash] = (int)(ip - base_ip - 4);
437             prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
438             table[prev_hash] = (int)(ip - base_ip - 3);
439             input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
440             cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
441             prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
442             table[prev_hash] = (int)(ip - base_ip - 2);
443             prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
444             table[prev_hash] = (int)(ip - base_ip - 1);
445           }
446 
447           candidate = base_ip + table[cur_hash];
448           table[cur_hash] = (int)(ip - base_ip);
449         }
450       }
451 
452       next_hash = Hash(++ip, shift, min_match);
453     }
454   }
455 
456 emit_remainder:
457   BROTLI_DCHECK(next_emit <= ip_end);
458   /* Emit the remaining bytes as literals. */
459   if (next_emit < ip_end) {
460     const uint32_t insert = (uint32_t)(ip_end - next_emit);
461     EmitInsertLen(insert, commands);
462     BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
463                 (int)(next_emit - base_ip), insert, 2));
464     memcpy(*literals, next_emit, insert);
465     *literals += insert;
466   }
467 }
468 
StoreCommands(BrotliTwoPassArena * s,const uint8_t * literals,const size_t num_literals,const uint32_t * commands,const size_t num_commands,size_t * storage_ix,uint8_t * storage)469 static void StoreCommands(BrotliTwoPassArena* s,
470                           const uint8_t* literals, const size_t num_literals,
471                           const uint32_t* commands, const size_t num_commands,
472                           size_t* storage_ix, uint8_t* storage) {
473   static const uint32_t kNumExtraBits[128] = {
474       0,  0,  0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,
475       6,  7,  8,  9,  10, 12, 14, 24, 0,  0,  0,  0,  0,  0,  0,  0,
476       1,  1,  2,  2,  3,  3,  4,  4,  0,  0,  0,  0,  0,  0,  0,  0,
477       1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  7,  8,  9,  10, 24,
478       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
479       1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8,
480       9,  9,  10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
481       17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
482   };
483   static const uint32_t kInsertOffset[24] = {
484       0,  1,  2,  3,  4,   5,   6,   8,   10,   14,   18,   26,
485       34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594,
486   };
487 
488   size_t i;
489   memset(s->lit_histo, 0, sizeof(s->lit_histo));
490   /* TODO(eustas): is that necessary? */
491   memset(s->cmd_depth, 0, sizeof(s->cmd_depth));
492   /* TODO(eustas): is that necessary? */
493   memset(s->cmd_bits, 0, sizeof(s->cmd_bits));
494   memset(s->cmd_histo, 0, sizeof(s->cmd_histo));
495   for (i = 0; i < num_literals; ++i) {
496     ++s->lit_histo[literals[i]];
497   }
498   BrotliBuildAndStoreHuffmanTreeFast(s->tmp_tree, s->lit_histo, num_literals,
499                                      /* max_bits = */ 8, s->lit_depth,
500                                      s->lit_bits, storage_ix, storage);
501 
502   for (i = 0; i < num_commands; ++i) {
503     const uint32_t code = commands[i] & 0xFF;
504     BROTLI_DCHECK(code < 128);
505     ++s->cmd_histo[code];
506   }
507   s->cmd_histo[1] += 1;
508   s->cmd_histo[2] += 1;
509   s->cmd_histo[64] += 1;
510   s->cmd_histo[84] += 1;
511   BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
512 
513   for (i = 0; i < num_commands; ++i) {
514     const uint32_t cmd = commands[i];
515     const uint32_t code = cmd & 0xFF;
516     const uint32_t extra = cmd >> 8;
517     BROTLI_DCHECK(code < 128);
518     BrotliWriteBits(s->cmd_depth[code], s->cmd_bits[code], storage_ix, storage);
519     BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
520     if (code < 24) {
521       const uint32_t insert = kInsertOffset[code] + extra;
522       uint32_t j;
523       for (j = 0; j < insert; ++j) {
524         const uint8_t lit = *literals;
525         BrotliWriteBits(s->lit_depth[lit], s->lit_bits[lit], storage_ix,
526                         storage);
527         ++literals;
528       }
529     }
530   }
531 }
532 
533 /* Acceptable loss for uncompressible speedup is 2% */
534 #define MIN_RATIO 0.98
535 #define SAMPLE_RATE 43
536 
ShouldCompress(BrotliTwoPassArena * s,const uint8_t * input,size_t input_size,size_t num_literals)537 static BROTLI_BOOL ShouldCompress(BrotliTwoPassArena* s,
538     const uint8_t* input, size_t input_size, size_t num_literals) {
539   double corpus_size = (double)input_size;
540   if ((double)num_literals < MIN_RATIO * corpus_size) {
541     return BROTLI_TRUE;
542   } else {
543     const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
544     size_t i;
545     memset(s->lit_histo, 0, sizeof(s->lit_histo));
546     for (i = 0; i < input_size; i += SAMPLE_RATE) {
547       ++s->lit_histo[input[i]];
548     }
549     return TO_BROTLI_BOOL(BitsEntropy(s->lit_histo, 256) < max_total_bit_cost);
550   }
551 }
552 
RewindBitPosition(const size_t new_storage_ix,size_t * storage_ix,uint8_t * storage)553 static void RewindBitPosition(const size_t new_storage_ix,
554                               size_t* storage_ix, uint8_t* storage) {
555   const size_t bitpos = new_storage_ix & 7;
556   const size_t mask = (1u << bitpos) - 1;
557   storage[new_storage_ix >> 3] &= (uint8_t)mask;
558   *storage_ix = new_storage_ix;
559 }
560 
EmitUncompressedMetaBlock(const uint8_t * input,size_t input_size,size_t * storage_ix,uint8_t * storage)561 static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
562                                       size_t* storage_ix, uint8_t* storage) {
563   BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
564   *storage_ix = (*storage_ix + 7u) & ~7u;
565   memcpy(&storage[*storage_ix >> 3], input, input_size);
566   *storage_ix += input_size << 3;
567   storage[*storage_ix >> 3] = 0;
568 }
569 
BrotliCompressFragmentTwoPassImpl(BrotliTwoPassArena * s,const uint8_t * input,size_t input_size,BROTLI_BOOL is_last,uint32_t * command_buf,uint8_t * literal_buf,int * table,size_t table_bits,size_t min_match,size_t * storage_ix,uint8_t * storage)570 static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
571     BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
572     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
573     int* table, size_t table_bits, size_t min_match,
574     size_t* storage_ix, uint8_t* storage) {
575   /* Save the start of the first block for position and distance computations.
576   */
577   const uint8_t* base_ip = input;
578   BROTLI_UNUSED(is_last);
579 
580   while (input_size > 0) {
581     size_t block_size =
582         BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
583     uint32_t* commands = command_buf;
584     uint8_t* literals = literal_buf;
585     size_t num_literals;
586     CreateCommands(input, block_size, input_size, base_ip, table,
587                    table_bits, min_match, &literals, &commands);
588     num_literals = (size_t)(literals - literal_buf);
589     if (ShouldCompress(s, input, block_size, num_literals)) {
590       const size_t num_commands = (size_t)(commands - command_buf);
591       BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
592       /* No block splits, no contexts. */
593       BrotliWriteBits(13, 0, storage_ix, storage);
594       StoreCommands(s, literal_buf, num_literals, command_buf, num_commands,
595                     storage_ix, storage);
596     } else {
597       /* Since we did not find many backward references and the entropy of
598          the data is close to 8 bits, we can simply emit an uncompressed block.
599          This makes compression speed of uncompressible data about 3x faster. */
600       EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
601     }
602     input += block_size;
603     input_size -= block_size;
604   }
605 }
606 
607 #define FOR_TABLE_BITS_(X) \
608   X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
609 
610 #define BAKE_METHOD_PARAM_(B)                                                  \
611 static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B(            \
612     BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,            \
613     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,          \
614     int* table, size_t* storage_ix, uint8_t* storage) {                        \
615   size_t min_match = (B <= 15) ? 4 : 6;                                        \
616   BrotliCompressFragmentTwoPassImpl(s, input, input_size, is_last, command_buf,\
617       literal_buf, table, B, min_match, storage_ix, storage);                  \
618 }
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)619 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
620 #undef BAKE_METHOD_PARAM_
621 
622 void BrotliCompressFragmentTwoPass(
623     BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
624     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
625     int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
626   const size_t initial_storage_ix = *storage_ix;
627   const size_t table_bits = Log2FloorNonZero(table_size);
628   switch (table_bits) {
629 #define CASE_(B)                                      \
630     case B:                                           \
631       BrotliCompressFragmentTwoPassImpl ## B(         \
632           s, input, input_size, is_last, command_buf, \
633           literal_buf, table, storage_ix, storage);   \
634       break;
635     FOR_TABLE_BITS_(CASE_)
636 #undef CASE_
637     default: BROTLI_DCHECK(0); break;
638   }
639 
640   /* If output is larger than single uncompressed block, rewrite it. */
641   if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
642     RewindBitPosition(initial_storage_ix, storage_ix, storage);
643     EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
644   }
645 
646   if (is_last) {
647     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
648     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
649     *storage_ix = (*storage_ix + 7u) & ~7u;
650   }
651 }
652 
653 #undef FOR_TABLE_BITS_
654 
655 #if defined(__cplusplus) || defined(c_plusplus)
656 }  /* extern "C" */
657 #endif
658