• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Function for fast encoding of an input fragment, independently from the input
8    history. This function uses one-pass processing: when we find a backward
9    match, we immediately emit the corresponding command and literal codes to
10    the bit stream.
11 
12    Adapted from the CompressFragment() function in
13    https://github.com/google/snappy/blob/master/snappy.cc */
14 
15 #include "compress_fragment.h"
16 
17 #include <string.h>  /* memcmp, memcpy, memset */
18 
19 #include <brotli/types.h>
20 
21 #include "../common/platform.h"
22 #include "brotli_bit_stream.h"
23 #include "entropy_encode.h"
24 #include "fast_log.h"
25 #include "find_match_length.h"
26 #include "write_bits.h"
27 
28 #if defined(__cplusplus) || defined(c_plusplus)
29 extern "C" {
30 #endif
31 
32 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
33 
34 /* kHashMul32 multiplier has these properties:
35    * The multiplier must be odd. Otherwise we may lose the highest bit.
36    * No long streaks of ones or zeros.
37    * There is no effort to ensure that it is a prime, the oddity is enough
38      for this use.
39    * The number has been tuned heuristically against compression benchmarks. */
40 static const uint32_t kHashMul32 = 0x1E35A7BD;
41 
Hash(const uint8_t * p,size_t shift)42 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
43   const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
44   return (uint32_t)(h >> shift);
45 }
46 
HashBytesAtOffset(uint64_t v,int offset,size_t shift)47 static BROTLI_INLINE uint32_t HashBytesAtOffset(
48     uint64_t v, int offset, size_t shift) {
49   BROTLI_DCHECK(offset >= 0);
50   BROTLI_DCHECK(offset <= 3);
51   {
52     const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
53     return (uint32_t)(h >> shift);
54   }
55 }
56 
IsMatch(const uint8_t * p1,const uint8_t * p2)57 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
58   return TO_BROTLI_BOOL(
59       BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
60       p1[4] == p2[4]);
61 }
62 
63 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
64    of the "input" string and stores it into the bit stream.
65    Note that the prefix code here is built from the pre-LZ77 input, therefore
66    we can only approximate the statistics of the actual literal stream.
67    Moreover, for long inputs we build a histogram from a sample of the input
68    and thus have to assign a non-zero depth for each literal.
69    Returns estimated compression ratio millibytes/char for encoding given input
70    with generated code. */
BuildAndStoreLiteralPrefixCode(BrotliOnePassArena * s,const uint8_t * input,const size_t input_size,uint8_t depths[256],uint16_t bits[256],size_t * storage_ix,uint8_t * storage)71 static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s,
72                                              const uint8_t* input,
73                                              const size_t input_size,
74                                              uint8_t depths[256],
75                                              uint16_t bits[256],
76                                              size_t* storage_ix,
77                                              uint8_t* storage) {
78   uint32_t* BROTLI_RESTRICT const histogram = s->histogram;
79   size_t histogram_total;
80   size_t i;
81   memset(histogram, 0, sizeof(s->histogram));
82 
83   if (input_size < (1 << 15)) {
84     for (i = 0; i < input_size; ++i) {
85       ++histogram[input[i]];
86     }
87     histogram_total = input_size;
88     for (i = 0; i < 256; ++i) {
89       /* We weigh the first 11 samples with weight 3 to account for the
90          balancing effect of the LZ77 phase on the histogram. */
91       const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
92       histogram[i] += adjust;
93       histogram_total += adjust;
94     }
95   } else {
96     static const size_t kSampleRate = 29;
97     for (i = 0; i < input_size; i += kSampleRate) {
98       ++histogram[input[i]];
99     }
100     histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
101     for (i = 0; i < 256; ++i) {
102       /* We add 1 to each population count to avoid 0 bit depths (since this is
103          only a sample and we don't know if the symbol appears or not), and we
104          weigh the first 11 samples with weight 3 to account for the balancing
105          effect of the LZ77 phase on the histogram (more frequent symbols are
106          more likely to be in backward references instead as literals). */
107       const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
108       histogram[i] += adjust;
109       histogram_total += adjust;
110     }
111   }
112   BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total,
113                                      /* max_bits = */ 8,
114                                      depths, bits, storage_ix, storage);
115   {
116     size_t literal_ratio = 0;
117     for (i = 0; i < 256; ++i) {
118       if (histogram[i]) literal_ratio += histogram[i] * depths[i];
119     }
120     /* Estimated encoding ratio, millibytes per symbol. */
121     return (literal_ratio * 125) / histogram_total;
122   }
123 }
124 
125 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
126    "bits" based on "histogram" and stores it into the bit stream. */
BuildAndStoreCommandPrefixCode(BrotliOnePassArena * s,size_t * storage_ix,uint8_t * storage)127 static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s,
128     size_t* storage_ix, uint8_t* storage) {
129   const uint32_t* const histogram = s->cmd_histo;
130   uint8_t* const depth = s->cmd_depth;
131   uint16_t* const bits = s->cmd_bits;
132   uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth;
133   uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits;
134   /* TODO(eustas): do only once on initialization. */
135   memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS);
136 
137   BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth);
138   BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]);
139   /* We have to jump through a few hoops here in order to compute
140      the command bits because the symbols are in a different order than in
141      the full alphabet. This looks complicated, but having the symbols
142      in this order in the command bits saves a few branches in the Emit*
143      functions. */
144   memcpy(tmp_depth, depth, 24);
145   memcpy(tmp_depth + 24, depth + 40, 8);
146   memcpy(tmp_depth + 32, depth + 24, 8);
147   memcpy(tmp_depth + 40, depth + 48, 8);
148   memcpy(tmp_depth + 48, depth + 32, 8);
149   memcpy(tmp_depth + 56, depth + 56, 8);
150   BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits);
151   memcpy(bits, tmp_bits, 48);
152   memcpy(bits + 24, tmp_bits + 32, 16);
153   memcpy(bits + 32, tmp_bits + 48, 16);
154   memcpy(bits + 40, tmp_bits + 24, 16);
155   memcpy(bits + 48, tmp_bits + 40, 16);
156   memcpy(bits + 56, tmp_bits + 56, 16);
157   BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
158   {
159     /* Create the bit length array for the full command alphabet. */
160     size_t i;
161     memset(tmp_depth, 0, 64);  /* only 64 first values were used */
162     memcpy(tmp_depth, depth, 8);
163     memcpy(tmp_depth + 64, depth + 8, 8);
164     memcpy(tmp_depth + 128, depth + 16, 8);
165     memcpy(tmp_depth + 192, depth + 24, 8);
166     memcpy(tmp_depth + 384, depth + 32, 8);
167     for (i = 0; i < 8; ++i) {
168       tmp_depth[128 + 8 * i] = depth[40 + i];
169       tmp_depth[256 + 8 * i] = depth[48 + i];
170       tmp_depth[448 + 8 * i] = depth[56 + i];
171     }
172     /* TODO(eustas): could/should full-length machinery be avoided? */
173     BrotliStoreHuffmanTree(
174         tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage);
175   }
176   BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage);
177 }
178 
179 /* REQUIRES: insertlen < 6210 */
EmitInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)180 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
181                                         const uint8_t depth[128],
182                                         const uint16_t bits[128],
183                                         uint32_t histo[128],
184                                         size_t* storage_ix,
185                                         uint8_t* storage) {
186   if (insertlen < 6) {
187     const size_t code = insertlen + 40;
188     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
189     ++histo[code];
190   } else if (insertlen < 130) {
191     const size_t tail = insertlen - 2;
192     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
193     const size_t prefix = tail >> nbits;
194     const size_t inscode = (nbits << 1) + prefix + 42;
195     BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
196     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
197     ++histo[inscode];
198   } else if (insertlen < 2114) {
199     const size_t tail = insertlen - 66;
200     const uint32_t nbits = Log2FloorNonZero(tail);
201     const size_t code = nbits + 50;
202     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
203     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
204     ++histo[code];
205   } else {
206     BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
207     BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
208     ++histo[61];
209   }
210 }
211 
EmitLongInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)212 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
213                                             const uint8_t depth[128],
214                                             const uint16_t bits[128],
215                                             uint32_t histo[128],
216                                             size_t* storage_ix,
217                                             uint8_t* storage) {
218   if (insertlen < 22594) {
219     BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
220     BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
221     ++histo[62];
222   } else {
223     BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
224     BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
225     ++histo[63];
226   }
227 }
228 
EmitCopyLen(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)229 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
230                                       const uint8_t depth[128],
231                                       const uint16_t bits[128],
232                                       uint32_t histo[128],
233                                       size_t* storage_ix,
234                                       uint8_t* storage) {
235   if (copylen < 10) {
236     BrotliWriteBits(
237         depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
238     ++histo[copylen + 14];
239   } else if (copylen < 134) {
240     const size_t tail = copylen - 6;
241     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
242     const size_t prefix = tail >> nbits;
243     const size_t code = (nbits << 1) + prefix + 20;
244     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
245     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
246     ++histo[code];
247   } else if (copylen < 2118) {
248     const size_t tail = copylen - 70;
249     const uint32_t nbits = Log2FloorNonZero(tail);
250     const size_t code = nbits + 28;
251     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
252     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
253     ++histo[code];
254   } else {
255     BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
256     BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
257     ++histo[39];
258   }
259 }
260 
EmitCopyLenLastDistance(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)261 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
262                                                   const uint8_t depth[128],
263                                                   const uint16_t bits[128],
264                                                   uint32_t histo[128],
265                                                   size_t* storage_ix,
266                                                   uint8_t* storage) {
267   if (copylen < 12) {
268     BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
269     ++histo[copylen - 4];
270   } else if (copylen < 72) {
271     const size_t tail = copylen - 8;
272     const uint32_t nbits = Log2FloorNonZero(tail) - 1;
273     const size_t prefix = tail >> nbits;
274     const size_t code = (nbits << 1) + prefix + 4;
275     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
276     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
277     ++histo[code];
278   } else if (copylen < 136) {
279     const size_t tail = copylen - 8;
280     const size_t code = (tail >> 5) + 30;
281     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
282     BrotliWriteBits(5, tail & 31, storage_ix, storage);
283     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
284     ++histo[code];
285     ++histo[64];
286   } else if (copylen < 2120) {
287     const size_t tail = copylen - 72;
288     const uint32_t nbits = Log2FloorNonZero(tail);
289     const size_t code = nbits + 28;
290     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
291     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
292     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
293     ++histo[code];
294     ++histo[64];
295   } else {
296     BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
297     BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
298     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
299     ++histo[39];
300     ++histo[64];
301   }
302 }
303 
EmitDistance(size_t distance,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)304 static BROTLI_INLINE void EmitDistance(size_t distance,
305                                        const uint8_t depth[128],
306                                        const uint16_t bits[128],
307                                        uint32_t histo[128],
308                                        size_t* storage_ix, uint8_t* storage) {
309   const size_t d = distance + 3;
310   const uint32_t nbits = Log2FloorNonZero(d) - 1u;
311   const size_t prefix = (d >> nbits) & 1;
312   const size_t offset = (2 + prefix) << nbits;
313   const size_t distcode = 2 * (nbits - 1) + prefix + 80;
314   BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
315   BrotliWriteBits(nbits, d - offset, storage_ix, storage);
316   ++histo[distcode];
317 }
318 
EmitLiterals(const uint8_t * input,const size_t len,const uint8_t depth[256],const uint16_t bits[256],size_t * storage_ix,uint8_t * storage)319 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
320                                        const uint8_t depth[256],
321                                        const uint16_t bits[256],
322                                        size_t* storage_ix, uint8_t* storage) {
323   size_t j;
324   for (j = 0; j < len; j++) {
325     const uint8_t lit = input[j];
326     BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
327   }
328 }
329 
330 /* REQUIRES: len <= 1 << 24. */
BrotliStoreMetaBlockHeader(size_t len,BROTLI_BOOL is_uncompressed,size_t * storage_ix,uint8_t * storage)331 static void BrotliStoreMetaBlockHeader(
332     size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
333     uint8_t* storage) {
334   size_t nibbles = 6;
335   /* ISLAST */
336   BrotliWriteBits(1, 0, storage_ix, storage);
337   if (len <= (1U << 16)) {
338     nibbles = 4;
339   } else if (len <= (1U << 20)) {
340     nibbles = 5;
341   }
342   BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
343   BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
344   /* ISUNCOMPRESSED */
345   BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
346 }
347 
UpdateBits(size_t n_bits,uint32_t bits,size_t pos,uint8_t * array)348 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
349     uint8_t* array) {
350   while (n_bits > 0) {
351     size_t byte_pos = pos >> 3;
352     size_t n_unchanged_bits = pos & 7;
353     size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
354     size_t total_bits = n_unchanged_bits + n_changed_bits;
355     uint32_t mask =
356         (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
357     uint32_t unchanged_bits = array[byte_pos] & mask;
358     uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
359     array[byte_pos] =
360         (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
361     n_bits -= n_changed_bits;
362     bits >>= n_changed_bits;
363     pos += n_changed_bits;
364   }
365 }
366 
RewindBitPosition(const size_t new_storage_ix,size_t * storage_ix,uint8_t * storage)367 static void RewindBitPosition(const size_t new_storage_ix,
368                               size_t* storage_ix, uint8_t* storage) {
369   const size_t bitpos = new_storage_ix & 7;
370   const size_t mask = (1u << bitpos) - 1;
371   storage[new_storage_ix >> 3] &= (uint8_t)mask;
372   *storage_ix = new_storage_ix;
373 }
374 
ShouldMergeBlock(BrotliOnePassArena * s,const uint8_t * data,size_t len,const uint8_t * depths)375 static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s,
376     const uint8_t* data, size_t len, const uint8_t* depths) {
377   uint32_t* BROTLI_RESTRICT const histo = s->histogram;
378   static const size_t kSampleRate = 43;
379   size_t i;
380   memset(histo, 0, sizeof(s->histogram));
381   for (i = 0; i < len; i += kSampleRate) {
382     ++histo[data[i]];
383   }
384   {
385     const size_t total = (len + kSampleRate - 1) / kSampleRate;
386     double r = (FastLog2(total) + 0.5) * (double)total + 200;
387     for (i = 0; i < 256; ++i) {
388       r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
389     }
390     return TO_BROTLI_BOOL(r >= 0.0);
391   }
392 }
393 
394 /* Acceptable loss for uncompressible speedup is 2% */
395 #define MIN_RATIO 980
396 
ShouldUseUncompressedMode(const uint8_t * metablock_start,const uint8_t * next_emit,const size_t insertlen,const size_t literal_ratio)397 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
398     const uint8_t* metablock_start, const uint8_t* next_emit,
399     const size_t insertlen, const size_t literal_ratio) {
400   const size_t compressed = (size_t)(next_emit - metablock_start);
401   if (compressed * 50 > insertlen) {
402     return BROTLI_FALSE;
403   } else {
404     return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
405   }
406 }
407 
EmitUncompressedMetaBlock(const uint8_t * begin,const uint8_t * end,const size_t storage_ix_start,size_t * storage_ix,uint8_t * storage)408 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
409                                       const size_t storage_ix_start,
410                                       size_t* storage_ix, uint8_t* storage) {
411   const size_t len = (size_t)(end - begin);
412   RewindBitPosition(storage_ix_start, storage_ix, storage);
413   BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
414   *storage_ix = (*storage_ix + 7u) & ~7u;
415   memcpy(&storage[*storage_ix >> 3], begin, len);
416   *storage_ix += len << 3;
417   storage[*storage_ix >> 3] = 0;
418 }
419 
420 static uint32_t kCmdHistoSeed[128] = {
421   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
422   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
423   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
424   0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
425   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
426   1, 1, 1, 1, 0, 0, 0, 0,
427 };
428 
BrotliCompressFragmentFastImpl(BrotliOnePassArena * s,const uint8_t * input,size_t input_size,BROTLI_BOOL is_last,int * table,size_t table_bits,size_t * storage_ix,uint8_t * storage)429 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
430     BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
431     BROTLI_BOOL is_last, int* table, size_t table_bits,
432     size_t* storage_ix, uint8_t* storage) {
433   uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth;
434   uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits;
435   uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo;
436   uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth;
437   uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits;
438   const uint8_t* ip_end;
439 
440   /* "next_emit" is a pointer to the first byte that is not covered by a
441      previous copy. Bytes between "next_emit" and the start of the next copy or
442      the end of the input will be emitted as literal bytes. */
443   const uint8_t* next_emit = input;
444   /* Save the start of the first block for position and distance computations.
445   */
446   const uint8_t* base_ip = input;
447 
448   static const size_t kFirstBlockSize = 3 << 15;
449   static const size_t kMergeBlockSize = 1 << 16;
450 
451   const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
452   const size_t kMinMatchLen = 5;
453 
454   const uint8_t* metablock_start = input;
455   size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
456   size_t total_block_size = block_size;
457   /* Save the bit position of the MLEN field of the meta-block header, so that
458      we can update it later if we decide to extend this meta-block. */
459   size_t mlen_storage_ix = *storage_ix + 3;
460 
461   size_t literal_ratio;
462 
463   const uint8_t* ip;
464   int last_distance;
465 
466   const size_t shift = 64u - table_bits;
467 
468   BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
469   /* No block splits, no contexts. */
470   BrotliWriteBits(13, 0, storage_ix, storage);
471 
472   literal_ratio = BuildAndStoreLiteralPrefixCode(
473       s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage);
474 
475   {
476     /* Store the pre-compressed command and distance prefix codes. */
477     size_t i;
478     for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) {
479       BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage);
480     }
481   }
482   BrotliWriteBits(s->cmd_code_numbits & 7,
483                   s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage);
484 
485  emit_commands:
486   /* Initialize the command and distance histograms. We will gather
487      statistics of command and distance codes during the processing
488      of this block and use it to update the command and distance
489      prefix codes for the next block. */
490   memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
491 
492   /* "ip" is the input pointer. */
493   ip = input;
494   last_distance = -1;
495   ip_end = input + block_size;
496 
497   if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
498     /* For the last block, we need to keep a 16 bytes margin so that we can be
499        sure that all distances are at most window size - 16.
500        For all other blocks, we only need to keep a margin of 5 bytes so that
501        we don't go over the block size with a copy. */
502     const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
503                                         input_size - kInputMarginBytes);
504     const uint8_t* ip_limit = input + len_limit;
505 
506     uint32_t next_hash;
507     for (next_hash = Hash(++ip, shift); ; ) {
508       /* Step 1: Scan forward in the input looking for a 5-byte-long match.
509          If we get close to exhausting the input then goto emit_remainder.
510 
511          Heuristic match skipping: If 32 bytes are scanned with no matches
512          found, start looking only at every other byte. If 32 more bytes are
513          scanned, look at every third byte, etc.. When a match is found,
514          immediately go back to looking at every byte. This is a small loss
515          (~5% performance, ~0.1% density) for compressible data due to more
516          bookkeeping, but for non-compressible data (such as JPEG) it's a huge
517          win since the compressor quickly "realizes" the data is incompressible
518          and doesn't bother looking for matches everywhere.
519 
520          The "skip" variable keeps track of how many bytes there are since the
521          last match; dividing it by 32 (i.e. right-shifting by five) gives the
522          number of bytes to move ahead for each iteration. */
523       uint32_t skip = 32;
524 
525       const uint8_t* next_ip = ip;
526       const uint8_t* candidate;
527       BROTLI_DCHECK(next_emit < ip);
528 trawl:
529       do {
530         uint32_t hash = next_hash;
531         uint32_t bytes_between_hash_lookups = skip++ >> 5;
532         BROTLI_DCHECK(hash == Hash(next_ip, shift));
533         ip = next_ip;
534         next_ip = ip + bytes_between_hash_lookups;
535         if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
536           goto emit_remainder;
537         }
538         next_hash = Hash(next_ip, shift);
539         candidate = ip - last_distance;
540         if (IsMatch(ip, candidate)) {
541           if (BROTLI_PREDICT_TRUE(candidate < ip)) {
542             table[hash] = (int)(ip - base_ip);
543             break;
544           }
545         }
546         candidate = base_ip + table[hash];
547         BROTLI_DCHECK(candidate >= base_ip);
548         BROTLI_DCHECK(candidate < ip);
549 
550         table[hash] = (int)(ip - base_ip);
551       } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
552 
553       /* Check copy distance. If candidate is not feasible, continue search.
554          Checking is done outside of hot loop to reduce overhead. */
555       if (ip - candidate > MAX_DISTANCE) goto trawl;
556 
557       /* Step 2: Emit the found match together with the literal bytes from
558          "next_emit" to the bit stream, and then see if we can find a next match
559          immediately afterwards. Repeat until we find no match for the input
560          without emitting some literal bytes. */
561 
562       {
563         /* We have a 5-byte match at ip, and we need to emit bytes in
564            [next_emit, ip). */
565         const uint8_t* base = ip;
566         size_t matched = 5 + FindMatchLengthWithLimit(
567             candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
568         int distance = (int)(base - candidate);  /* > 0 */
569         size_t insert = (size_t)(base - next_emit);
570         ip += matched;
571         BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
572                     (int)(next_emit - base_ip), (unsigned long)insert, 2));
573         BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
574         if (BROTLI_PREDICT_TRUE(insert < 6210)) {
575           EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
576                         storage_ix, storage);
577         } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
578                                              literal_ratio)) {
579           EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
580                                     storage_ix, storage);
581           input_size -= (size_t)(base - input);
582           input = base;
583           next_emit = input;
584           goto next_block;
585         } else {
586           EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
587                             storage_ix, storage);
588         }
589         EmitLiterals(next_emit, insert, lit_depth, lit_bits,
590                      storage_ix, storage);
591         if (distance == last_distance) {
592           BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
593           ++cmd_histo[64];
594         } else {
595           EmitDistance((size_t)distance, cmd_depth, cmd_bits,
596                        cmd_histo, storage_ix, storage);
597           last_distance = distance;
598         }
599         EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
600                                 storage_ix, storage);
601         BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
602                     "[CompressFragment] pos = %d insert = %d copy = %d\n"
603                     "[CompressFragment] pos = %d distance = %d\n",
604                     (int)(base - base_ip), (int)distance,
605                     (int)(base - base_ip) + 2, 0, (int)matched - 2,
606                     (int)(base - base_ip) + 2, (int)distance));
607 
608         next_emit = ip;
609         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
610           goto emit_remainder;
611         }
612         /* We could immediately start working at ip now, but to improve
613            compression we first update "table" with the hashes of some positions
614            within the last copy. */
615         {
616           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
617           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
618           uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
619           table[prev_hash] = (int)(ip - base_ip - 3);
620           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
621           table[prev_hash] = (int)(ip - base_ip - 2);
622           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
623           table[prev_hash] = (int)(ip - base_ip - 1);
624 
625           candidate = base_ip + table[cur_hash];
626           table[cur_hash] = (int)(ip - base_ip);
627         }
628       }
629 
630       while (IsMatch(ip, candidate)) {
631         /* We have a 5-byte match at ip, and no need to emit any literal bytes
632            prior to ip. */
633         const uint8_t* base = ip;
634         size_t matched = 5 + FindMatchLengthWithLimit(
635             candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
636         if (ip - candidate > MAX_DISTANCE) break;
637         ip += matched;
638         last_distance = (int)(base - candidate);  /* > 0 */
639         BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
640         EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
641                     storage_ix, storage);
642         EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
643                      cmd_histo, storage_ix, storage);
644         BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
645                     "[CompressFragment] pos = %d distance = %d\n",
646                     (int)(base - base_ip), 0, (int)matched,
647                     (int)(base - base_ip), (int)last_distance));
648 
649         next_emit = ip;
650         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
651           goto emit_remainder;
652         }
653         /* We could immediately start working at ip now, but to improve
654            compression we first update "table" with the hashes of some positions
655            within the last copy. */
656         {
657           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
658           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
659           uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
660           table[prev_hash] = (int)(ip - base_ip - 3);
661           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
662           table[prev_hash] = (int)(ip - base_ip - 2);
663           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
664           table[prev_hash] = (int)(ip - base_ip - 1);
665 
666           candidate = base_ip + table[cur_hash];
667           table[cur_hash] = (int)(ip - base_ip);
668         }
669       }
670 
671       next_hash = Hash(++ip, shift);
672     }
673   }
674 
675  emit_remainder:
676   BROTLI_DCHECK(next_emit <= ip_end);
677   input += block_size;
678   input_size -= block_size;
679   block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
680 
681   /* Decide if we want to continue this meta-block instead of emitting the
682      last insert-only command. */
683   if (input_size > 0 &&
684       total_block_size + block_size <= (1 << 20) &&
685       ShouldMergeBlock(s, input, block_size, lit_depth)) {
686     BROTLI_DCHECK(total_block_size > (1 << 16));
687     /* Update the size of the current meta-block and continue emitting commands.
688        We can do this because the current size and the new size both have 5
689        nibbles. */
690     total_block_size += block_size;
691     UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
692     goto emit_commands;
693   }
694 
695   /* Emit the remaining bytes as literals. */
696   if (next_emit < ip_end) {
697     const size_t insert = (size_t)(ip_end - next_emit);
698     BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
699                 (int)(next_emit - base_ip), (unsigned long)insert, 2));
700     if (BROTLI_PREDICT_TRUE(insert < 6210)) {
701       EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
702                     storage_ix, storage);
703       EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
704     } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
705                                          literal_ratio)) {
706       EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
707                                 storage_ix, storage);
708     } else {
709       EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
710                         storage_ix, storage);
711       EmitLiterals(next_emit, insert, lit_depth, lit_bits,
712                    storage_ix, storage);
713     }
714   }
715   next_emit = ip_end;
716 
717 next_block:
718   /* If we have more data, write a new meta-block header and prefix codes and
719      then continue emitting commands. */
720   if (input_size > 0) {
721     metablock_start = input;
722     block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
723     total_block_size = block_size;
724     /* Save the bit position of the MLEN field of the meta-block header, so that
725        we can update it later if we decide to extend this meta-block. */
726     mlen_storage_ix = *storage_ix + 3;
727     BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
728     /* No block splits, no contexts. */
729     BrotliWriteBits(13, 0, storage_ix, storage);
730     literal_ratio = BuildAndStoreLiteralPrefixCode(
731         s, input, block_size, lit_depth, lit_bits, storage_ix, storage);
732     BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
733     goto emit_commands;
734   }
735 
736   if (!is_last) {
737     /* If this is not the last block, update the command and distance prefix
738        codes for the next block and store the compressed forms. */
739     s->cmd_code[0] = 0;
740     s->cmd_code_numbits = 0;
741     BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code);
742   }
743 }
744 
745 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
746 
747 #define BAKE_METHOD_PARAM_(B) \
748 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
749     BrotliOnePassArena* s, const uint8_t* input, size_t input_size,          \
750     BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \
751   BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B,    \
752       storage_ix, storage);                                                  \
753 }
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)754 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
755 #undef BAKE_METHOD_PARAM_
756 
757 void BrotliCompressFragmentFast(
758     BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
759     BROTLI_BOOL is_last, int* table, size_t table_size,
760     size_t* storage_ix, uint8_t* storage) {
761   const size_t initial_storage_ix = *storage_ix;
762   const size_t table_bits = Log2FloorNonZero(table_size);
763 
764   if (input_size == 0) {
765     BROTLI_DCHECK(is_last);
766     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
767     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
768     *storage_ix = (*storage_ix + 7u) & ~7u;
769     return;
770   }
771 
772   switch (table_bits) {
773 #define CASE_(B)                                                     \
774     case B:                                                          \
775       BrotliCompressFragmentFastImpl ## B(                           \
776           s, input, input_size, is_last, table, storage_ix, storage);\
777       break;
778     FOR_TABLE_BITS_(CASE_)
779 #undef CASE_
780     default: BROTLI_DCHECK(0); break;
781   }
782 
783   /* If output is larger than single uncompressed block, rewrite it. */
784   if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
785     EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
786                               storage_ix, storage);
787   }
788 
789   if (is_last) {
790     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
791     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
792     *storage_ix = (*storage_ix + 7u) & ~7u;
793   }
794 }
795 
796 #undef FOR_TABLE_BITS_
797 
798 #if defined(__cplusplus) || defined(c_plusplus)
799 }  /* extern "C" */
800 #endif
801