1 /* Copyright 2015 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* Function for fast encoding of an input fragment, independently from the input
8 history. This function uses one-pass processing: when we find a backward
9 match, we immediately emit the corresponding command and literal codes to
10 the bit stream.
11
12 Adapted from the CompressFragment() function in
13 https://github.com/google/snappy/blob/master/snappy.cc */
14
15 #include "compress_fragment.h"
16
17 #include <string.h> /* memcmp, memcpy, memset */
18
19 #include <brotli/types.h>
20
21 #include "../common/platform.h"
22 #include "brotli_bit_stream.h"
23 #include "entropy_encode.h"
24 #include "fast_log.h"
25 #include "find_match_length.h"
26 #include "write_bits.h"
27
28 #if defined(__cplusplus) || defined(c_plusplus)
29 extern "C" {
30 #endif
31
32 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
33
34 /* kHashMul32 multiplier has these properties:
35 * The multiplier must be odd. Otherwise we may lose the highest bit.
36 * No long streaks of ones or zeros.
37 * There is no effort to ensure that it is a prime, the oddity is enough
38 for this use.
39 * The number has been tuned heuristically against compression benchmarks. */
40 static const uint32_t kHashMul32 = 0x1E35A7BD;
41
Hash(const uint8_t * p,size_t shift)42 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
43 const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
44 return (uint32_t)(h >> shift);
45 }
46
HashBytesAtOffset(uint64_t v,int offset,size_t shift)47 static BROTLI_INLINE uint32_t HashBytesAtOffset(
48 uint64_t v, int offset, size_t shift) {
49 BROTLI_DCHECK(offset >= 0);
50 BROTLI_DCHECK(offset <= 3);
51 {
52 const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
53 return (uint32_t)(h >> shift);
54 }
55 }
56
IsMatch(const uint8_t * p1,const uint8_t * p2)57 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
58 return TO_BROTLI_BOOL(
59 BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
60 p1[4] == p2[4]);
61 }
62
63 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
64 of the "input" string and stores it into the bit stream.
65 Note that the prefix code here is built from the pre-LZ77 input, therefore
66 we can only approximate the statistics of the actual literal stream.
67 Moreover, for long inputs we build a histogram from a sample of the input
68 and thus have to assign a non-zero depth for each literal.
69 Returns estimated compression ratio millibytes/char for encoding given input
70 with generated code. */
BuildAndStoreLiteralPrefixCode(BrotliOnePassArena * s,const uint8_t * input,const size_t input_size,uint8_t depths[256],uint16_t bits[256],size_t * storage_ix,uint8_t * storage)71 static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s,
72 const uint8_t* input,
73 const size_t input_size,
74 uint8_t depths[256],
75 uint16_t bits[256],
76 size_t* storage_ix,
77 uint8_t* storage) {
78 uint32_t* BROTLI_RESTRICT const histogram = s->histogram;
79 size_t histogram_total;
80 size_t i;
81 memset(histogram, 0, sizeof(s->histogram));
82
83 if (input_size < (1 << 15)) {
84 for (i = 0; i < input_size; ++i) {
85 ++histogram[input[i]];
86 }
87 histogram_total = input_size;
88 for (i = 0; i < 256; ++i) {
89 /* We weigh the first 11 samples with weight 3 to account for the
90 balancing effect of the LZ77 phase on the histogram. */
91 const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
92 histogram[i] += adjust;
93 histogram_total += adjust;
94 }
95 } else {
96 static const size_t kSampleRate = 29;
97 for (i = 0; i < input_size; i += kSampleRate) {
98 ++histogram[input[i]];
99 }
100 histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
101 for (i = 0; i < 256; ++i) {
102 /* We add 1 to each population count to avoid 0 bit depths (since this is
103 only a sample and we don't know if the symbol appears or not), and we
104 weigh the first 11 samples with weight 3 to account for the balancing
105 effect of the LZ77 phase on the histogram (more frequent symbols are
106 more likely to be in backward references instead as literals). */
107 const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
108 histogram[i] += adjust;
109 histogram_total += adjust;
110 }
111 }
112 BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total,
113 /* max_bits = */ 8,
114 depths, bits, storage_ix, storage);
115 {
116 size_t literal_ratio = 0;
117 for (i = 0; i < 256; ++i) {
118 if (histogram[i]) literal_ratio += histogram[i] * depths[i];
119 }
120 /* Estimated encoding ratio, millibytes per symbol. */
121 return (literal_ratio * 125) / histogram_total;
122 }
123 }
124
125 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
126 "bits" based on "histogram" and stores it into the bit stream. */
BuildAndStoreCommandPrefixCode(BrotliOnePassArena * s,size_t * storage_ix,uint8_t * storage)127 static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s,
128 size_t* storage_ix, uint8_t* storage) {
129 const uint32_t* const histogram = s->cmd_histo;
130 uint8_t* const depth = s->cmd_depth;
131 uint16_t* const bits = s->cmd_bits;
132 uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth;
133 uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits;
134 /* TODO(eustas): do only once on initialization. */
135 memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS);
136
137 BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth);
138 BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]);
139 /* We have to jump through a few hoops here in order to compute
140 the command bits because the symbols are in a different order than in
141 the full alphabet. This looks complicated, but having the symbols
142 in this order in the command bits saves a few branches in the Emit*
143 functions. */
144 memcpy(tmp_depth, depth, 24);
145 memcpy(tmp_depth + 24, depth + 40, 8);
146 memcpy(tmp_depth + 32, depth + 24, 8);
147 memcpy(tmp_depth + 40, depth + 48, 8);
148 memcpy(tmp_depth + 48, depth + 32, 8);
149 memcpy(tmp_depth + 56, depth + 56, 8);
150 BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits);
151 memcpy(bits, tmp_bits, 48);
152 memcpy(bits + 24, tmp_bits + 32, 16);
153 memcpy(bits + 32, tmp_bits + 48, 16);
154 memcpy(bits + 40, tmp_bits + 24, 16);
155 memcpy(bits + 48, tmp_bits + 40, 16);
156 memcpy(bits + 56, tmp_bits + 56, 16);
157 BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
158 {
159 /* Create the bit length array for the full command alphabet. */
160 size_t i;
161 memset(tmp_depth, 0, 64); /* only 64 first values were used */
162 memcpy(tmp_depth, depth, 8);
163 memcpy(tmp_depth + 64, depth + 8, 8);
164 memcpy(tmp_depth + 128, depth + 16, 8);
165 memcpy(tmp_depth + 192, depth + 24, 8);
166 memcpy(tmp_depth + 384, depth + 32, 8);
167 for (i = 0; i < 8; ++i) {
168 tmp_depth[128 + 8 * i] = depth[40 + i];
169 tmp_depth[256 + 8 * i] = depth[48 + i];
170 tmp_depth[448 + 8 * i] = depth[56 + i];
171 }
172 /* TODO(eustas): could/should full-length machinery be avoided? */
173 BrotliStoreHuffmanTree(
174 tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage);
175 }
176 BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage);
177 }
178
179 /* REQUIRES: insertlen < 6210 */
EmitInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)180 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
181 const uint8_t depth[128],
182 const uint16_t bits[128],
183 uint32_t histo[128],
184 size_t* storage_ix,
185 uint8_t* storage) {
186 if (insertlen < 6) {
187 const size_t code = insertlen + 40;
188 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
189 ++histo[code];
190 } else if (insertlen < 130) {
191 const size_t tail = insertlen - 2;
192 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
193 const size_t prefix = tail >> nbits;
194 const size_t inscode = (nbits << 1) + prefix + 42;
195 BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
196 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
197 ++histo[inscode];
198 } else if (insertlen < 2114) {
199 const size_t tail = insertlen - 66;
200 const uint32_t nbits = Log2FloorNonZero(tail);
201 const size_t code = nbits + 50;
202 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
203 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
204 ++histo[code];
205 } else {
206 BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
207 BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
208 ++histo[61];
209 }
210 }
211
EmitLongInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)212 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
213 const uint8_t depth[128],
214 const uint16_t bits[128],
215 uint32_t histo[128],
216 size_t* storage_ix,
217 uint8_t* storage) {
218 if (insertlen < 22594) {
219 BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
220 BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
221 ++histo[62];
222 } else {
223 BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
224 BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
225 ++histo[63];
226 }
227 }
228
EmitCopyLen(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)229 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
230 const uint8_t depth[128],
231 const uint16_t bits[128],
232 uint32_t histo[128],
233 size_t* storage_ix,
234 uint8_t* storage) {
235 if (copylen < 10) {
236 BrotliWriteBits(
237 depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
238 ++histo[copylen + 14];
239 } else if (copylen < 134) {
240 const size_t tail = copylen - 6;
241 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
242 const size_t prefix = tail >> nbits;
243 const size_t code = (nbits << 1) + prefix + 20;
244 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
245 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
246 ++histo[code];
247 } else if (copylen < 2118) {
248 const size_t tail = copylen - 70;
249 const uint32_t nbits = Log2FloorNonZero(tail);
250 const size_t code = nbits + 28;
251 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
252 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
253 ++histo[code];
254 } else {
255 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
256 BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
257 ++histo[39];
258 }
259 }
260
EmitCopyLenLastDistance(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)261 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
262 const uint8_t depth[128],
263 const uint16_t bits[128],
264 uint32_t histo[128],
265 size_t* storage_ix,
266 uint8_t* storage) {
267 if (copylen < 12) {
268 BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
269 ++histo[copylen - 4];
270 } else if (copylen < 72) {
271 const size_t tail = copylen - 8;
272 const uint32_t nbits = Log2FloorNonZero(tail) - 1;
273 const size_t prefix = tail >> nbits;
274 const size_t code = (nbits << 1) + prefix + 4;
275 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
276 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
277 ++histo[code];
278 } else if (copylen < 136) {
279 const size_t tail = copylen - 8;
280 const size_t code = (tail >> 5) + 30;
281 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
282 BrotliWriteBits(5, tail & 31, storage_ix, storage);
283 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
284 ++histo[code];
285 ++histo[64];
286 } else if (copylen < 2120) {
287 const size_t tail = copylen - 72;
288 const uint32_t nbits = Log2FloorNonZero(tail);
289 const size_t code = nbits + 28;
290 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
291 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
292 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
293 ++histo[code];
294 ++histo[64];
295 } else {
296 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
297 BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
298 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
299 ++histo[39];
300 ++histo[64];
301 }
302 }
303
EmitDistance(size_t distance,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)304 static BROTLI_INLINE void EmitDistance(size_t distance,
305 const uint8_t depth[128],
306 const uint16_t bits[128],
307 uint32_t histo[128],
308 size_t* storage_ix, uint8_t* storage) {
309 const size_t d = distance + 3;
310 const uint32_t nbits = Log2FloorNonZero(d) - 1u;
311 const size_t prefix = (d >> nbits) & 1;
312 const size_t offset = (2 + prefix) << nbits;
313 const size_t distcode = 2 * (nbits - 1) + prefix + 80;
314 BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
315 BrotliWriteBits(nbits, d - offset, storage_ix, storage);
316 ++histo[distcode];
317 }
318
EmitLiterals(const uint8_t * input,const size_t len,const uint8_t depth[256],const uint16_t bits[256],size_t * storage_ix,uint8_t * storage)319 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
320 const uint8_t depth[256],
321 const uint16_t bits[256],
322 size_t* storage_ix, uint8_t* storage) {
323 size_t j;
324 for (j = 0; j < len; j++) {
325 const uint8_t lit = input[j];
326 BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
327 }
328 }
329
330 /* REQUIRES: len <= 1 << 24. */
BrotliStoreMetaBlockHeader(size_t len,BROTLI_BOOL is_uncompressed,size_t * storage_ix,uint8_t * storage)331 static void BrotliStoreMetaBlockHeader(
332 size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
333 uint8_t* storage) {
334 size_t nibbles = 6;
335 /* ISLAST */
336 BrotliWriteBits(1, 0, storage_ix, storage);
337 if (len <= (1U << 16)) {
338 nibbles = 4;
339 } else if (len <= (1U << 20)) {
340 nibbles = 5;
341 }
342 BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
343 BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
344 /* ISUNCOMPRESSED */
345 BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
346 }
347
UpdateBits(size_t n_bits,uint32_t bits,size_t pos,uint8_t * array)348 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
349 uint8_t* array) {
350 while (n_bits > 0) {
351 size_t byte_pos = pos >> 3;
352 size_t n_unchanged_bits = pos & 7;
353 size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
354 size_t total_bits = n_unchanged_bits + n_changed_bits;
355 uint32_t mask =
356 (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
357 uint32_t unchanged_bits = array[byte_pos] & mask;
358 uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
359 array[byte_pos] =
360 (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
361 n_bits -= n_changed_bits;
362 bits >>= n_changed_bits;
363 pos += n_changed_bits;
364 }
365 }
366
RewindBitPosition(const size_t new_storage_ix,size_t * storage_ix,uint8_t * storage)367 static void RewindBitPosition(const size_t new_storage_ix,
368 size_t* storage_ix, uint8_t* storage) {
369 const size_t bitpos = new_storage_ix & 7;
370 const size_t mask = (1u << bitpos) - 1;
371 storage[new_storage_ix >> 3] &= (uint8_t)mask;
372 *storage_ix = new_storage_ix;
373 }
374
ShouldMergeBlock(BrotliOnePassArena * s,const uint8_t * data,size_t len,const uint8_t * depths)375 static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s,
376 const uint8_t* data, size_t len, const uint8_t* depths) {
377 uint32_t* BROTLI_RESTRICT const histo = s->histogram;
378 static const size_t kSampleRate = 43;
379 size_t i;
380 memset(histo, 0, sizeof(s->histogram));
381 for (i = 0; i < len; i += kSampleRate) {
382 ++histo[data[i]];
383 }
384 {
385 const size_t total = (len + kSampleRate - 1) / kSampleRate;
386 double r = (FastLog2(total) + 0.5) * (double)total + 200;
387 for (i = 0; i < 256; ++i) {
388 r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
389 }
390 return TO_BROTLI_BOOL(r >= 0.0);
391 }
392 }
393
394 /* Acceptable loss for uncompressible speedup is 2% */
395 #define MIN_RATIO 980
396
ShouldUseUncompressedMode(const uint8_t * metablock_start,const uint8_t * next_emit,const size_t insertlen,const size_t literal_ratio)397 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
398 const uint8_t* metablock_start, const uint8_t* next_emit,
399 const size_t insertlen, const size_t literal_ratio) {
400 const size_t compressed = (size_t)(next_emit - metablock_start);
401 if (compressed * 50 > insertlen) {
402 return BROTLI_FALSE;
403 } else {
404 return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
405 }
406 }
407
EmitUncompressedMetaBlock(const uint8_t * begin,const uint8_t * end,const size_t storage_ix_start,size_t * storage_ix,uint8_t * storage)408 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
409 const size_t storage_ix_start,
410 size_t* storage_ix, uint8_t* storage) {
411 const size_t len = (size_t)(end - begin);
412 RewindBitPosition(storage_ix_start, storage_ix, storage);
413 BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
414 *storage_ix = (*storage_ix + 7u) & ~7u;
415 memcpy(&storage[*storage_ix >> 3], begin, len);
416 *storage_ix += len << 3;
417 storage[*storage_ix >> 3] = 0;
418 }
419
420 static uint32_t kCmdHistoSeed[128] = {
421 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
422 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
423 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
424 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
425 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
426 1, 1, 1, 1, 0, 0, 0, 0,
427 };
428
BrotliCompressFragmentFastImpl(BrotliOnePassArena * s,const uint8_t * input,size_t input_size,BROTLI_BOOL is_last,int * table,size_t table_bits,size_t * storage_ix,uint8_t * storage)429 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
430 BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
431 BROTLI_BOOL is_last, int* table, size_t table_bits,
432 size_t* storage_ix, uint8_t* storage) {
433 uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth;
434 uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits;
435 uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo;
436 uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth;
437 uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits;
438 const uint8_t* ip_end;
439
440 /* "next_emit" is a pointer to the first byte that is not covered by a
441 previous copy. Bytes between "next_emit" and the start of the next copy or
442 the end of the input will be emitted as literal bytes. */
443 const uint8_t* next_emit = input;
444 /* Save the start of the first block for position and distance computations.
445 */
446 const uint8_t* base_ip = input;
447
448 static const size_t kFirstBlockSize = 3 << 15;
449 static const size_t kMergeBlockSize = 1 << 16;
450
451 const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
452 const size_t kMinMatchLen = 5;
453
454 const uint8_t* metablock_start = input;
455 size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
456 size_t total_block_size = block_size;
457 /* Save the bit position of the MLEN field of the meta-block header, so that
458 we can update it later if we decide to extend this meta-block. */
459 size_t mlen_storage_ix = *storage_ix + 3;
460
461 size_t literal_ratio;
462
463 const uint8_t* ip;
464 int last_distance;
465
466 const size_t shift = 64u - table_bits;
467
468 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
469 /* No block splits, no contexts. */
470 BrotliWriteBits(13, 0, storage_ix, storage);
471
472 literal_ratio = BuildAndStoreLiteralPrefixCode(
473 s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage);
474
475 {
476 /* Store the pre-compressed command and distance prefix codes. */
477 size_t i;
478 for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) {
479 BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage);
480 }
481 }
482 BrotliWriteBits(s->cmd_code_numbits & 7,
483 s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage);
484
485 emit_commands:
486 /* Initialize the command and distance histograms. We will gather
487 statistics of command and distance codes during the processing
488 of this block and use it to update the command and distance
489 prefix codes for the next block. */
490 memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
491
492 /* "ip" is the input pointer. */
493 ip = input;
494 last_distance = -1;
495 ip_end = input + block_size;
496
497 if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
498 /* For the last block, we need to keep a 16 bytes margin so that we can be
499 sure that all distances are at most window size - 16.
500 For all other blocks, we only need to keep a margin of 5 bytes so that
501 we don't go over the block size with a copy. */
502 const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
503 input_size - kInputMarginBytes);
504 const uint8_t* ip_limit = input + len_limit;
505
506 uint32_t next_hash;
507 for (next_hash = Hash(++ip, shift); ; ) {
508 /* Step 1: Scan forward in the input looking for a 5-byte-long match.
509 If we get close to exhausting the input then goto emit_remainder.
510
511 Heuristic match skipping: If 32 bytes are scanned with no matches
512 found, start looking only at every other byte. If 32 more bytes are
513 scanned, look at every third byte, etc.. When a match is found,
514 immediately go back to looking at every byte. This is a small loss
515 (~5% performance, ~0.1% density) for compressible data due to more
516 bookkeeping, but for non-compressible data (such as JPEG) it's a huge
517 win since the compressor quickly "realizes" the data is incompressible
518 and doesn't bother looking for matches everywhere.
519
520 The "skip" variable keeps track of how many bytes there are since the
521 last match; dividing it by 32 (i.e. right-shifting by five) gives the
522 number of bytes to move ahead for each iteration. */
523 uint32_t skip = 32;
524
525 const uint8_t* next_ip = ip;
526 const uint8_t* candidate;
527 BROTLI_DCHECK(next_emit < ip);
528 trawl:
529 do {
530 uint32_t hash = next_hash;
531 uint32_t bytes_between_hash_lookups = skip++ >> 5;
532 BROTLI_DCHECK(hash == Hash(next_ip, shift));
533 ip = next_ip;
534 next_ip = ip + bytes_between_hash_lookups;
535 if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
536 goto emit_remainder;
537 }
538 next_hash = Hash(next_ip, shift);
539 candidate = ip - last_distance;
540 if (IsMatch(ip, candidate)) {
541 if (BROTLI_PREDICT_TRUE(candidate < ip)) {
542 table[hash] = (int)(ip - base_ip);
543 break;
544 }
545 }
546 candidate = base_ip + table[hash];
547 BROTLI_DCHECK(candidate >= base_ip);
548 BROTLI_DCHECK(candidate < ip);
549
550 table[hash] = (int)(ip - base_ip);
551 } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
552
553 /* Check copy distance. If candidate is not feasible, continue search.
554 Checking is done outside of hot loop to reduce overhead. */
555 if (ip - candidate > MAX_DISTANCE) goto trawl;
556
557 /* Step 2: Emit the found match together with the literal bytes from
558 "next_emit" to the bit stream, and then see if we can find a next match
559 immediately afterwards. Repeat until we find no match for the input
560 without emitting some literal bytes. */
561
562 {
563 /* We have a 5-byte match at ip, and we need to emit bytes in
564 [next_emit, ip). */
565 const uint8_t* base = ip;
566 size_t matched = 5 + FindMatchLengthWithLimit(
567 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
568 int distance = (int)(base - candidate); /* > 0 */
569 size_t insert = (size_t)(base - next_emit);
570 ip += matched;
571 BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
572 (int)(next_emit - base_ip), (unsigned long)insert, 2));
573 BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
574 if (BROTLI_PREDICT_TRUE(insert < 6210)) {
575 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
576 storage_ix, storage);
577 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
578 literal_ratio)) {
579 EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
580 storage_ix, storage);
581 input_size -= (size_t)(base - input);
582 input = base;
583 next_emit = input;
584 goto next_block;
585 } else {
586 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
587 storage_ix, storage);
588 }
589 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
590 storage_ix, storage);
591 if (distance == last_distance) {
592 BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
593 ++cmd_histo[64];
594 } else {
595 EmitDistance((size_t)distance, cmd_depth, cmd_bits,
596 cmd_histo, storage_ix, storage);
597 last_distance = distance;
598 }
599 EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
600 storage_ix, storage);
601 BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
602 "[CompressFragment] pos = %d insert = %d copy = %d\n"
603 "[CompressFragment] pos = %d distance = %d\n",
604 (int)(base - base_ip), (int)distance,
605 (int)(base - base_ip) + 2, 0, (int)matched - 2,
606 (int)(base - base_ip) + 2, (int)distance));
607
608 next_emit = ip;
609 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
610 goto emit_remainder;
611 }
612 /* We could immediately start working at ip now, but to improve
613 compression we first update "table" with the hashes of some positions
614 within the last copy. */
615 {
616 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
617 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
618 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
619 table[prev_hash] = (int)(ip - base_ip - 3);
620 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
621 table[prev_hash] = (int)(ip - base_ip - 2);
622 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
623 table[prev_hash] = (int)(ip - base_ip - 1);
624
625 candidate = base_ip + table[cur_hash];
626 table[cur_hash] = (int)(ip - base_ip);
627 }
628 }
629
630 while (IsMatch(ip, candidate)) {
631 /* We have a 5-byte match at ip, and no need to emit any literal bytes
632 prior to ip. */
633 const uint8_t* base = ip;
634 size_t matched = 5 + FindMatchLengthWithLimit(
635 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
636 if (ip - candidate > MAX_DISTANCE) break;
637 ip += matched;
638 last_distance = (int)(base - candidate); /* > 0 */
639 BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
640 EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
641 storage_ix, storage);
642 EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
643 cmd_histo, storage_ix, storage);
644 BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
645 "[CompressFragment] pos = %d distance = %d\n",
646 (int)(base - base_ip), 0, (int)matched,
647 (int)(base - base_ip), (int)last_distance));
648
649 next_emit = ip;
650 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
651 goto emit_remainder;
652 }
653 /* We could immediately start working at ip now, but to improve
654 compression we first update "table" with the hashes of some positions
655 within the last copy. */
656 {
657 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
658 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
659 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
660 table[prev_hash] = (int)(ip - base_ip - 3);
661 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
662 table[prev_hash] = (int)(ip - base_ip - 2);
663 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
664 table[prev_hash] = (int)(ip - base_ip - 1);
665
666 candidate = base_ip + table[cur_hash];
667 table[cur_hash] = (int)(ip - base_ip);
668 }
669 }
670
671 next_hash = Hash(++ip, shift);
672 }
673 }
674
675 emit_remainder:
676 BROTLI_DCHECK(next_emit <= ip_end);
677 input += block_size;
678 input_size -= block_size;
679 block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
680
681 /* Decide if we want to continue this meta-block instead of emitting the
682 last insert-only command. */
683 if (input_size > 0 &&
684 total_block_size + block_size <= (1 << 20) &&
685 ShouldMergeBlock(s, input, block_size, lit_depth)) {
686 BROTLI_DCHECK(total_block_size > (1 << 16));
687 /* Update the size of the current meta-block and continue emitting commands.
688 We can do this because the current size and the new size both have 5
689 nibbles. */
690 total_block_size += block_size;
691 UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
692 goto emit_commands;
693 }
694
695 /* Emit the remaining bytes as literals. */
696 if (next_emit < ip_end) {
697 const size_t insert = (size_t)(ip_end - next_emit);
698 BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
699 (int)(next_emit - base_ip), (unsigned long)insert, 2));
700 if (BROTLI_PREDICT_TRUE(insert < 6210)) {
701 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
702 storage_ix, storage);
703 EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
704 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
705 literal_ratio)) {
706 EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
707 storage_ix, storage);
708 } else {
709 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
710 storage_ix, storage);
711 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
712 storage_ix, storage);
713 }
714 }
715 next_emit = ip_end;
716
717 next_block:
718 /* If we have more data, write a new meta-block header and prefix codes and
719 then continue emitting commands. */
720 if (input_size > 0) {
721 metablock_start = input;
722 block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
723 total_block_size = block_size;
724 /* Save the bit position of the MLEN field of the meta-block header, so that
725 we can update it later if we decide to extend this meta-block. */
726 mlen_storage_ix = *storage_ix + 3;
727 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
728 /* No block splits, no contexts. */
729 BrotliWriteBits(13, 0, storage_ix, storage);
730 literal_ratio = BuildAndStoreLiteralPrefixCode(
731 s, input, block_size, lit_depth, lit_bits, storage_ix, storage);
732 BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
733 goto emit_commands;
734 }
735
736 if (!is_last) {
737 /* If this is not the last block, update the command and distance prefix
738 codes for the next block and store the compressed forms. */
739 s->cmd_code[0] = 0;
740 s->cmd_code_numbits = 0;
741 BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code);
742 }
743 }
744
745 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
746
747 #define BAKE_METHOD_PARAM_(B) \
748 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
749 BrotliOnePassArena* s, const uint8_t* input, size_t input_size, \
750 BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \
751 BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B, \
752 storage_ix, storage); \
753 }
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)754 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
755 #undef BAKE_METHOD_PARAM_
756
757 void BrotliCompressFragmentFast(
758 BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
759 BROTLI_BOOL is_last, int* table, size_t table_size,
760 size_t* storage_ix, uint8_t* storage) {
761 const size_t initial_storage_ix = *storage_ix;
762 const size_t table_bits = Log2FloorNonZero(table_size);
763
764 if (input_size == 0) {
765 BROTLI_DCHECK(is_last);
766 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
767 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
768 *storage_ix = (*storage_ix + 7u) & ~7u;
769 return;
770 }
771
772 switch (table_bits) {
773 #define CASE_(B) \
774 case B: \
775 BrotliCompressFragmentFastImpl ## B( \
776 s, input, input_size, is_last, table, storage_ix, storage);\
777 break;
778 FOR_TABLE_BITS_(CASE_)
779 #undef CASE_
780 default: BROTLI_DCHECK(0); break;
781 }
782
783 /* If output is larger than single uncompressed block, rewrite it. */
784 if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
785 EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
786 storage_ix, storage);
787 }
788
789 if (is_last) {
790 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
791 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
792 *storage_ix = (*storage_ix + 7u) & ~7u;
793 }
794 }
795
796 #undef FOR_TABLE_BITS_
797
798 #if defined(__cplusplus) || defined(c_plusplus)
799 } /* extern "C" */
800 #endif
801