1 /* Copyright 2015 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* Function for fast encoding of an input fragment, independently from the input
8 history. This function uses one-pass processing: when we find a backward
9 match, we immediately emit the corresponding command and literal codes to
10 the bit stream.
11
12 Adapted from the CompressFragment() function in
13 https://github.com/google/snappy/blob/master/snappy.cc */
14
15 #include "./compress_fragment.h"
16
17 #include <string.h> /* memcmp, memcpy, memset */
18
19 #include "../common/constants.h"
20 #include "../common/platform.h"
21 #include <brotli/types.h>
22 #include "./brotli_bit_stream.h"
23 #include "./entropy_encode.h"
24 #include "./fast_log.h"
25 #include "./find_match_length.h"
26 #include "./memory.h"
27 #include "./write_bits.h"
28
29 #if defined(__cplusplus) || defined(c_plusplus)
30 extern "C" {
31 #endif
32
33 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
34
35 /* kHashMul32 multiplier has these properties:
36 * The multiplier must be odd. Otherwise we may lose the highest bit.
37 * No long streaks of ones or zeros.
38 * There is no effort to ensure that it is a prime, the oddity is enough
39 for this use.
40 * The number has been tuned heuristically against compression benchmarks. */
41 static const uint32_t kHashMul32 = 0x1E35A7BD;
42
Hash(const uint8_t * p,size_t shift)43 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
44 const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
45 return (uint32_t)(h >> shift);
46 }
47
HashBytesAtOffset(uint64_t v,int offset,size_t shift)48 static BROTLI_INLINE uint32_t HashBytesAtOffset(
49 uint64_t v, int offset, size_t shift) {
50 BROTLI_DCHECK(offset >= 0);
51 BROTLI_DCHECK(offset <= 3);
52 {
53 const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
54 return (uint32_t)(h >> shift);
55 }
56 }
57
IsMatch(const uint8_t * p1,const uint8_t * p2)58 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
59 return TO_BROTLI_BOOL(
60 BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
61 p1[4] == p2[4]);
62 }
63
64 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
65 of the "input" string and stores it into the bit stream.
66 Note that the prefix code here is built from the pre-LZ77 input, therefore
67 we can only approximate the statistics of the actual literal stream.
68 Moreover, for long inputs we build a histogram from a sample of the input
69 and thus have to assign a non-zero depth for each literal.
70 Returns estimated compression ratio millibytes/char for encoding given input
71 with generated code. */
BuildAndStoreLiteralPrefixCode(MemoryManager * m,const uint8_t * input,const size_t input_size,uint8_t depths[256],uint16_t bits[256],size_t * storage_ix,uint8_t * storage)72 static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
73 const uint8_t* input,
74 const size_t input_size,
75 uint8_t depths[256],
76 uint16_t bits[256],
77 size_t* storage_ix,
78 uint8_t* storage) {
79 uint32_t histogram[256] = { 0 };
80 size_t histogram_total;
81 size_t i;
82 if (input_size < (1 << 15)) {
83 for (i = 0; i < input_size; ++i) {
84 ++histogram[input[i]];
85 }
86 histogram_total = input_size;
87 for (i = 0; i < 256; ++i) {
88 /* We weigh the first 11 samples with weight 3 to account for the
89 balancing effect of the LZ77 phase on the histogram. */
90 const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
91 histogram[i] += adjust;
92 histogram_total += adjust;
93 }
94 } else {
95 static const size_t kSampleRate = 29;
96 for (i = 0; i < input_size; i += kSampleRate) {
97 ++histogram[input[i]];
98 }
99 histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
100 for (i = 0; i < 256; ++i) {
101 /* We add 1 to each population count to avoid 0 bit depths (since this is
102 only a sample and we don't know if the symbol appears or not), and we
103 weigh the first 11 samples with weight 3 to account for the balancing
104 effect of the LZ77 phase on the histogram (more frequent symbols are
105 more likely to be in backward references instead as literals). */
106 const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
107 histogram[i] += adjust;
108 histogram_total += adjust;
109 }
110 }
111 BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
112 /* max_bits = */ 8,
113 depths, bits, storage_ix, storage);
114 if (BROTLI_IS_OOM(m)) return 0;
115 {
116 size_t literal_ratio = 0;
117 for (i = 0; i < 256; ++i) {
118 if (histogram[i]) literal_ratio += histogram[i] * depths[i];
119 }
120 /* Estimated encoding ratio, millibytes per symbol. */
121 return (literal_ratio * 125) / histogram_total;
122 }
123 }
124
125 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
126 "bits" based on "histogram" and stores it into the bit stream. */
BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],uint8_t depth[128],uint16_t bits[128],size_t * storage_ix,uint8_t * storage)127 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
128 uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
129 uint8_t* storage) {
130 /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
131 HuffmanTree tree[129];
132 uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
133 uint16_t cmd_bits[64];
134
135 BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
136 BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
137 /* We have to jump through a few hoops here in order to compute
138 the command bits because the symbols are in a different order than in
139 the full alphabet. This looks complicated, but having the symbols
140 in this order in the command bits saves a few branches in the Emit*
141 functions. */
142 memcpy(cmd_depth, depth, 24);
143 memcpy(cmd_depth + 24, depth + 40, 8);
144 memcpy(cmd_depth + 32, depth + 24, 8);
145 memcpy(cmd_depth + 40, depth + 48, 8);
146 memcpy(cmd_depth + 48, depth + 32, 8);
147 memcpy(cmd_depth + 56, depth + 56, 8);
148 BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
149 memcpy(bits, cmd_bits, 48);
150 memcpy(bits + 24, cmd_bits + 32, 16);
151 memcpy(bits + 32, cmd_bits + 48, 16);
152 memcpy(bits + 40, cmd_bits + 24, 16);
153 memcpy(bits + 48, cmd_bits + 40, 16);
154 memcpy(bits + 56, cmd_bits + 56, 16);
155 BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
156 {
157 /* Create the bit length array for the full command alphabet. */
158 size_t i;
159 memset(cmd_depth, 0, 64); /* only 64 first values were used */
160 memcpy(cmd_depth, depth, 8);
161 memcpy(cmd_depth + 64, depth + 8, 8);
162 memcpy(cmd_depth + 128, depth + 16, 8);
163 memcpy(cmd_depth + 192, depth + 24, 8);
164 memcpy(cmd_depth + 384, depth + 32, 8);
165 for (i = 0; i < 8; ++i) {
166 cmd_depth[128 + 8 * i] = depth[40 + i];
167 cmd_depth[256 + 8 * i] = depth[48 + i];
168 cmd_depth[448 + 8 * i] = depth[56 + i];
169 }
170 BrotliStoreHuffmanTree(
171 cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
172 }
173 BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
174 }
175
176 /* REQUIRES: insertlen < 6210 */
EmitInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)177 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
178 const uint8_t depth[128],
179 const uint16_t bits[128],
180 uint32_t histo[128],
181 size_t* storage_ix,
182 uint8_t* storage) {
183 if (insertlen < 6) {
184 const size_t code = insertlen + 40;
185 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
186 ++histo[code];
187 } else if (insertlen < 130) {
188 const size_t tail = insertlen - 2;
189 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
190 const size_t prefix = tail >> nbits;
191 const size_t inscode = (nbits << 1) + prefix + 42;
192 BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
193 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
194 ++histo[inscode];
195 } else if (insertlen < 2114) {
196 const size_t tail = insertlen - 66;
197 const uint32_t nbits = Log2FloorNonZero(tail);
198 const size_t code = nbits + 50;
199 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
200 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
201 ++histo[code];
202 } else {
203 BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
204 BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
205 ++histo[61];
206 }
207 }
208
EmitLongInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)209 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
210 const uint8_t depth[128],
211 const uint16_t bits[128],
212 uint32_t histo[128],
213 size_t* storage_ix,
214 uint8_t* storage) {
215 if (insertlen < 22594) {
216 BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
217 BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
218 ++histo[62];
219 } else {
220 BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
221 BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
222 ++histo[63];
223 }
224 }
225
EmitCopyLen(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)226 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
227 const uint8_t depth[128],
228 const uint16_t bits[128],
229 uint32_t histo[128],
230 size_t* storage_ix,
231 uint8_t* storage) {
232 if (copylen < 10) {
233 BrotliWriteBits(
234 depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
235 ++histo[copylen + 14];
236 } else if (copylen < 134) {
237 const size_t tail = copylen - 6;
238 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
239 const size_t prefix = tail >> nbits;
240 const size_t code = (nbits << 1) + prefix + 20;
241 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
242 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
243 ++histo[code];
244 } else if (copylen < 2118) {
245 const size_t tail = copylen - 70;
246 const uint32_t nbits = Log2FloorNonZero(tail);
247 const size_t code = nbits + 28;
248 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
249 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
250 ++histo[code];
251 } else {
252 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
253 BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
254 ++histo[39];
255 }
256 }
257
EmitCopyLenLastDistance(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)258 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
259 const uint8_t depth[128],
260 const uint16_t bits[128],
261 uint32_t histo[128],
262 size_t* storage_ix,
263 uint8_t* storage) {
264 if (copylen < 12) {
265 BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
266 ++histo[copylen - 4];
267 } else if (copylen < 72) {
268 const size_t tail = copylen - 8;
269 const uint32_t nbits = Log2FloorNonZero(tail) - 1;
270 const size_t prefix = tail >> nbits;
271 const size_t code = (nbits << 1) + prefix + 4;
272 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
273 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
274 ++histo[code];
275 } else if (copylen < 136) {
276 const size_t tail = copylen - 8;
277 const size_t code = (tail >> 5) + 30;
278 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
279 BrotliWriteBits(5, tail & 31, storage_ix, storage);
280 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
281 ++histo[code];
282 ++histo[64];
283 } else if (copylen < 2120) {
284 const size_t tail = copylen - 72;
285 const uint32_t nbits = Log2FloorNonZero(tail);
286 const size_t code = nbits + 28;
287 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
288 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
289 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
290 ++histo[code];
291 ++histo[64];
292 } else {
293 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
294 BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
295 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
296 ++histo[39];
297 ++histo[64];
298 }
299 }
300
EmitDistance(size_t distance,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)301 static BROTLI_INLINE void EmitDistance(size_t distance,
302 const uint8_t depth[128],
303 const uint16_t bits[128],
304 uint32_t histo[128],
305 size_t* storage_ix, uint8_t* storage) {
306 const size_t d = distance + 3;
307 const uint32_t nbits = Log2FloorNonZero(d) - 1u;
308 const size_t prefix = (d >> nbits) & 1;
309 const size_t offset = (2 + prefix) << nbits;
310 const size_t distcode = 2 * (nbits - 1) + prefix + 80;
311 BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
312 BrotliWriteBits(nbits, d - offset, storage_ix, storage);
313 ++histo[distcode];
314 }
315
EmitLiterals(const uint8_t * input,const size_t len,const uint8_t depth[256],const uint16_t bits[256],size_t * storage_ix,uint8_t * storage)316 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
317 const uint8_t depth[256],
318 const uint16_t bits[256],
319 size_t* storage_ix, uint8_t* storage) {
320 size_t j;
321 for (j = 0; j < len; j++) {
322 const uint8_t lit = input[j];
323 BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
324 }
325 }
326
327 /* REQUIRES: len <= 1 << 24. */
BrotliStoreMetaBlockHeader(size_t len,BROTLI_BOOL is_uncompressed,size_t * storage_ix,uint8_t * storage)328 static void BrotliStoreMetaBlockHeader(
329 size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
330 uint8_t* storage) {
331 size_t nibbles = 6;
332 /* ISLAST */
333 BrotliWriteBits(1, 0, storage_ix, storage);
334 if (len <= (1U << 16)) {
335 nibbles = 4;
336 } else if (len <= (1U << 20)) {
337 nibbles = 5;
338 }
339 BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
340 BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
341 /* ISUNCOMPRESSED */
342 BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
343 }
344
UpdateBits(size_t n_bits,uint32_t bits,size_t pos,uint8_t * array)345 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
346 uint8_t* array) {
347 while (n_bits > 0) {
348 size_t byte_pos = pos >> 3;
349 size_t n_unchanged_bits = pos & 7;
350 size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
351 size_t total_bits = n_unchanged_bits + n_changed_bits;
352 uint32_t mask =
353 (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
354 uint32_t unchanged_bits = array[byte_pos] & mask;
355 uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
356 array[byte_pos] =
357 (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
358 n_bits -= n_changed_bits;
359 bits >>= n_changed_bits;
360 pos += n_changed_bits;
361 }
362 }
363
RewindBitPosition(const size_t new_storage_ix,size_t * storage_ix,uint8_t * storage)364 static void RewindBitPosition(const size_t new_storage_ix,
365 size_t* storage_ix, uint8_t* storage) {
366 const size_t bitpos = new_storage_ix & 7;
367 const size_t mask = (1u << bitpos) - 1;
368 storage[new_storage_ix >> 3] &= (uint8_t)mask;
369 *storage_ix = new_storage_ix;
370 }
371
ShouldMergeBlock(const uint8_t * data,size_t len,const uint8_t * depths)372 static BROTLI_BOOL ShouldMergeBlock(
373 const uint8_t* data, size_t len, const uint8_t* depths) {
374 size_t histo[256] = { 0 };
375 static const size_t kSampleRate = 43;
376 size_t i;
377 for (i = 0; i < len; i += kSampleRate) {
378 ++histo[data[i]];
379 }
380 {
381 const size_t total = (len + kSampleRate - 1) / kSampleRate;
382 double r = (FastLog2(total) + 0.5) * (double)total + 200;
383 for (i = 0; i < 256; ++i) {
384 r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
385 }
386 return TO_BROTLI_BOOL(r >= 0.0);
387 }
388 }
389
390 /* Acceptable loss for uncompressible speedup is 2% */
391 #define MIN_RATIO 980
392
ShouldUseUncompressedMode(const uint8_t * metablock_start,const uint8_t * next_emit,const size_t insertlen,const size_t literal_ratio)393 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
394 const uint8_t* metablock_start, const uint8_t* next_emit,
395 const size_t insertlen, const size_t literal_ratio) {
396 const size_t compressed = (size_t)(next_emit - metablock_start);
397 if (compressed * 50 > insertlen) {
398 return BROTLI_FALSE;
399 } else {
400 return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
401 }
402 }
403
EmitUncompressedMetaBlock(const uint8_t * begin,const uint8_t * end,const size_t storage_ix_start,size_t * storage_ix,uint8_t * storage)404 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
405 const size_t storage_ix_start,
406 size_t* storage_ix, uint8_t* storage) {
407 const size_t len = (size_t)(end - begin);
408 RewindBitPosition(storage_ix_start, storage_ix, storage);
409 BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
410 *storage_ix = (*storage_ix + 7u) & ~7u;
411 memcpy(&storage[*storage_ix >> 3], begin, len);
412 *storage_ix += len << 3;
413 storage[*storage_ix >> 3] = 0;
414 }
415
416 static uint32_t kCmdHistoSeed[128] = {
417 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
418 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
419 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
420 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
422 1, 1, 1, 1, 0, 0, 0, 0,
423 };
424
BrotliCompressFragmentFastImpl(MemoryManager * m,const uint8_t * input,size_t input_size,BROTLI_BOOL is_last,int * table,size_t table_bits,uint8_t cmd_depth[128],uint16_t cmd_bits[128],size_t * cmd_code_numbits,uint8_t * cmd_code,size_t * storage_ix,uint8_t * storage)425 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
426 MemoryManager* m, const uint8_t* input, size_t input_size,
427 BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
428 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
429 size_t* storage_ix, uint8_t* storage) {
430 uint32_t cmd_histo[128];
431 const uint8_t* ip_end;
432
433 /* "next_emit" is a pointer to the first byte that is not covered by a
434 previous copy. Bytes between "next_emit" and the start of the next copy or
435 the end of the input will be emitted as literal bytes. */
436 const uint8_t* next_emit = input;
437 /* Save the start of the first block for position and distance computations.
438 */
439 const uint8_t* base_ip = input;
440
441 static const size_t kFirstBlockSize = 3 << 15;
442 static const size_t kMergeBlockSize = 1 << 16;
443
444 const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
445 const size_t kMinMatchLen = 5;
446
447 const uint8_t* metablock_start = input;
448 size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
449 size_t total_block_size = block_size;
450 /* Save the bit position of the MLEN field of the meta-block header, so that
451 we can update it later if we decide to extend this meta-block. */
452 size_t mlen_storage_ix = *storage_ix + 3;
453
454 uint8_t lit_depth[256];
455 uint16_t lit_bits[256];
456
457 size_t literal_ratio;
458
459 const uint8_t* ip;
460 int last_distance;
461
462 const size_t shift = 64u - table_bits;
463
464 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
465 /* No block splits, no contexts. */
466 BrotliWriteBits(13, 0, storage_ix, storage);
467
468 literal_ratio = BuildAndStoreLiteralPrefixCode(
469 m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
470 if (BROTLI_IS_OOM(m)) return;
471
472 {
473 /* Store the pre-compressed command and distance prefix codes. */
474 size_t i;
475 for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
476 BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
477 }
478 }
479 BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
480 storage_ix, storage);
481
482 emit_commands:
483 /* Initialize the command and distance histograms. We will gather
484 statistics of command and distance codes during the processing
485 of this block and use it to update the command and distance
486 prefix codes for the next block. */
487 memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
488
489 /* "ip" is the input pointer. */
490 ip = input;
491 last_distance = -1;
492 ip_end = input + block_size;
493
494 if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
495 /* For the last block, we need to keep a 16 bytes margin so that we can be
496 sure that all distances are at most window size - 16.
497 For all other blocks, we only need to keep a margin of 5 bytes so that
498 we don't go over the block size with a copy. */
499 const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
500 input_size - kInputMarginBytes);
501 const uint8_t* ip_limit = input + len_limit;
502
503 uint32_t next_hash;
504 for (next_hash = Hash(++ip, shift); ; ) {
505 /* Step 1: Scan forward in the input looking for a 5-byte-long match.
506 If we get close to exhausting the input then goto emit_remainder.
507
508 Heuristic match skipping: If 32 bytes are scanned with no matches
509 found, start looking only at every other byte. If 32 more bytes are
510 scanned, look at every third byte, etc.. When a match is found,
511 immediately go back to looking at every byte. This is a small loss
512 (~5% performance, ~0.1% density) for compressible data due to more
513 bookkeeping, but for non-compressible data (such as JPEG) it's a huge
514 win since the compressor quickly "realizes" the data is incompressible
515 and doesn't bother looking for matches everywhere.
516
517 The "skip" variable keeps track of how many bytes there are since the
518 last match; dividing it by 32 (i.e. right-shifting by five) gives the
519 number of bytes to move ahead for each iteration. */
520 uint32_t skip = 32;
521
522 const uint8_t* next_ip = ip;
523 const uint8_t* candidate;
524 BROTLI_DCHECK(next_emit < ip);
525 trawl:
526 do {
527 uint32_t hash = next_hash;
528 uint32_t bytes_between_hash_lookups = skip++ >> 5;
529 BROTLI_DCHECK(hash == Hash(next_ip, shift));
530 ip = next_ip;
531 next_ip = ip + bytes_between_hash_lookups;
532 if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
533 goto emit_remainder;
534 }
535 next_hash = Hash(next_ip, shift);
536 candidate = ip - last_distance;
537 if (IsMatch(ip, candidate)) {
538 if (BROTLI_PREDICT_TRUE(candidate < ip)) {
539 table[hash] = (int)(ip - base_ip);
540 break;
541 }
542 }
543 candidate = base_ip + table[hash];
544 BROTLI_DCHECK(candidate >= base_ip);
545 BROTLI_DCHECK(candidate < ip);
546
547 table[hash] = (int)(ip - base_ip);
548 } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
549
550 /* Check copy distance. If candidate is not feasible, continue search.
551 Checking is done outside of hot loop to reduce overhead. */
552 if (ip - candidate > MAX_DISTANCE) goto trawl;
553
554 /* Step 2: Emit the found match together with the literal bytes from
555 "next_emit" to the bit stream, and then see if we can find a next match
556 immediately afterwards. Repeat until we find no match for the input
557 without emitting some literal bytes. */
558
559 {
560 /* We have a 5-byte match at ip, and we need to emit bytes in
561 [next_emit, ip). */
562 const uint8_t* base = ip;
563 size_t matched = 5 + FindMatchLengthWithLimit(
564 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
565 int distance = (int)(base - candidate); /* > 0 */
566 size_t insert = (size_t)(base - next_emit);
567 ip += matched;
568 BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
569 if (BROTLI_PREDICT_TRUE(insert < 6210)) {
570 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
571 storage_ix, storage);
572 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
573 literal_ratio)) {
574 EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
575 storage_ix, storage);
576 input_size -= (size_t)(base - input);
577 input = base;
578 next_emit = input;
579 goto next_block;
580 } else {
581 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
582 storage_ix, storage);
583 }
584 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
585 storage_ix, storage);
586 if (distance == last_distance) {
587 BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
588 ++cmd_histo[64];
589 } else {
590 EmitDistance((size_t)distance, cmd_depth, cmd_bits,
591 cmd_histo, storage_ix, storage);
592 last_distance = distance;
593 }
594 EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
595 storage_ix, storage);
596
597 next_emit = ip;
598 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
599 goto emit_remainder;
600 }
601 /* We could immediately start working at ip now, but to improve
602 compression we first update "table" with the hashes of some positions
603 within the last copy. */
604 {
605 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
606 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
607 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
608 table[prev_hash] = (int)(ip - base_ip - 3);
609 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
610 table[prev_hash] = (int)(ip - base_ip - 2);
611 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
612 table[prev_hash] = (int)(ip - base_ip - 1);
613
614 candidate = base_ip + table[cur_hash];
615 table[cur_hash] = (int)(ip - base_ip);
616 }
617 }
618
619 while (IsMatch(ip, candidate)) {
620 /* We have a 5-byte match at ip, and no need to emit any literal bytes
621 prior to ip. */
622 const uint8_t* base = ip;
623 size_t matched = 5 + FindMatchLengthWithLimit(
624 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
625 if (ip - candidate > MAX_DISTANCE) break;
626 ip += matched;
627 last_distance = (int)(base - candidate); /* > 0 */
628 BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
629 EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
630 storage_ix, storage);
631 EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
632 cmd_histo, storage_ix, storage);
633
634 next_emit = ip;
635 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
636 goto emit_remainder;
637 }
638 /* We could immediately start working at ip now, but to improve
639 compression we first update "table" with the hashes of some positions
640 within the last copy. */
641 {
642 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
643 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
644 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
645 table[prev_hash] = (int)(ip - base_ip - 3);
646 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
647 table[prev_hash] = (int)(ip - base_ip - 2);
648 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
649 table[prev_hash] = (int)(ip - base_ip - 1);
650
651 candidate = base_ip + table[cur_hash];
652 table[cur_hash] = (int)(ip - base_ip);
653 }
654 }
655
656 next_hash = Hash(++ip, shift);
657 }
658 }
659
660 emit_remainder:
661 BROTLI_DCHECK(next_emit <= ip_end);
662 input += block_size;
663 input_size -= block_size;
664 block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
665
666 /* Decide if we want to continue this meta-block instead of emitting the
667 last insert-only command. */
668 if (input_size > 0 &&
669 total_block_size + block_size <= (1 << 20) &&
670 ShouldMergeBlock(input, block_size, lit_depth)) {
671 BROTLI_DCHECK(total_block_size > (1 << 16));
672 /* Update the size of the current meta-block and continue emitting commands.
673 We can do this because the current size and the new size both have 5
674 nibbles. */
675 total_block_size += block_size;
676 UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
677 goto emit_commands;
678 }
679
680 /* Emit the remaining bytes as literals. */
681 if (next_emit < ip_end) {
682 const size_t insert = (size_t)(ip_end - next_emit);
683 if (BROTLI_PREDICT_TRUE(insert < 6210)) {
684 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
685 storage_ix, storage);
686 EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
687 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
688 literal_ratio)) {
689 EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
690 storage_ix, storage);
691 } else {
692 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
693 storage_ix, storage);
694 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
695 storage_ix, storage);
696 }
697 }
698 next_emit = ip_end;
699
700 next_block:
701 /* If we have more data, write a new meta-block header and prefix codes and
702 then continue emitting commands. */
703 if (input_size > 0) {
704 metablock_start = input;
705 block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
706 total_block_size = block_size;
707 /* Save the bit position of the MLEN field of the meta-block header, so that
708 we can update it later if we decide to extend this meta-block. */
709 mlen_storage_ix = *storage_ix + 3;
710 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
711 /* No block splits, no contexts. */
712 BrotliWriteBits(13, 0, storage_ix, storage);
713 literal_ratio = BuildAndStoreLiteralPrefixCode(
714 m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
715 if (BROTLI_IS_OOM(m)) return;
716 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
717 storage_ix, storage);
718 goto emit_commands;
719 }
720
721 if (!is_last) {
722 /* If this is not the last block, update the command and distance prefix
723 codes for the next block and store the compressed forms. */
724 cmd_code[0] = 0;
725 *cmd_code_numbits = 0;
726 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
727 cmd_code_numbits, cmd_code);
728 }
729 }
730
731 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
732
733 #define BAKE_METHOD_PARAM_(B) \
734 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
735 MemoryManager* m, const uint8_t* input, size_t input_size, \
736 BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128], \
737 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, \
738 size_t* storage_ix, uint8_t* storage) { \
739 BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B, \
740 cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
741 }
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)742 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
743 #undef BAKE_METHOD_PARAM_
744
745 void BrotliCompressFragmentFast(
746 MemoryManager* m, const uint8_t* input, size_t input_size,
747 BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
748 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
749 size_t* storage_ix, uint8_t* storage) {
750 const size_t initial_storage_ix = *storage_ix;
751 const size_t table_bits = Log2FloorNonZero(table_size);
752
753 if (input_size == 0) {
754 BROTLI_DCHECK(is_last);
755 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
756 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
757 *storage_ix = (*storage_ix + 7u) & ~7u;
758 return;
759 }
760
761 switch (table_bits) {
762 #define CASE_(B) \
763 case B: \
764 BrotliCompressFragmentFastImpl ## B( \
765 m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
766 cmd_code_numbits, cmd_code, storage_ix, storage); \
767 break;
768 FOR_TABLE_BITS_(CASE_)
769 #undef CASE_
770 default: BROTLI_DCHECK(0); break;
771 }
772
773 /* If output is larger than single uncompressed block, rewrite it. */
774 if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
775 EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
776 storage_ix, storage);
777 }
778
779 if (is_last) {
780 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
781 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
782 *storage_ix = (*storage_ix + 7u) & ~7u;
783 }
784 }
785
786 #undef FOR_TABLE_BITS_
787
788 #if defined(__cplusplus) || defined(c_plusplus)
789 } /* extern "C" */
790 #endif
791