• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2010 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* A (forgetful) hash table to the data seen by the compressor, to
8    help create backward references to previous data. */
9 
10 #ifndef BROTLI_ENC_HASH_H_
11 #define BROTLI_ENC_HASH_H_
12 
13 #include <stdlib.h>  /* exit */
14 #include <string.h>  /* memcmp, memset */
15 
16 #include <brotli/types.h>
17 
18 #include "../common/constants.h"
19 #include "../common/dictionary.h"
20 #include "../common/platform.h"
21 #include "compound_dictionary.h"
22 #include "encoder_dict.h"
23 #include "fast_log.h"
24 #include "find_match_length.h"
25 #include "memory.h"
26 #include "quality.h"
27 #include "static_dict.h"
28 
29 #if defined(__cplusplus) || defined(c_plusplus)
30 extern "C" {
31 #endif
32 
33 typedef struct {
34   /**
35    * Dynamically allocated areas; regular hasher uses one or two allocations;
36    * "composite" hasher uses up to 4 allocations.
37    */
38   void* extra[4];
39 
40   /**
41    * False before the fisrt invocation of HasherSetup (where "extra" memory)
42    * is allocated.
43    */
44   BROTLI_BOOL is_setup_;
45 
46   size_t dict_num_lookups;
47   size_t dict_num_matches;
48 
49   BrotliHasherParams params;
50 
51   /**
52    * False if hasher needs to be "prepared" before use (before the first
53    * invocation of HasherSetup or after HasherReset). "preparation" is hasher
54    * data initialization (using input ringbuffer).
55    */
56   BROTLI_BOOL is_prepared_;
57 } HasherCommon;
58 
59 #define score_t size_t
60 
61 static const uint32_t kCutoffTransformsCount = 10;
62 /*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
63 /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
64 static const uint64_t kCutoffTransforms =
65     BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
66 
67 typedef struct HasherSearchResult {
68   size_t len;
69   size_t distance;
70   score_t score;
71   int len_code_delta; /* == len_code - len */
72 } HasherSearchResult;
73 
74 /* kHashMul32 multiplier has these properties:
75    * The multiplier must be odd. Otherwise we may lose the highest bit.
76    * No long streaks of ones or zeros.
77    * There is no effort to ensure that it is a prime, the oddity is enough
78      for this use.
79    * The number has been tuned heuristically against compression benchmarks. */
80 static const uint32_t kHashMul32 = 0x1E35A7BD;
81 static const uint64_t kHashMul64 =
82     BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
83 
Hash14(const uint8_t * data)84 static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
85   uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
86   /* The higher bits contain more mixture from the multiplication,
87      so we take our results from there. */
88   return h >> (32 - 14);
89 }
90 
PrepareDistanceCache(int * BROTLI_RESTRICT distance_cache,const int num_distances)91 static BROTLI_INLINE void PrepareDistanceCache(
92     int* BROTLI_RESTRICT distance_cache, const int num_distances) {
93   if (num_distances > 4) {
94     int last_distance = distance_cache[0];
95     distance_cache[4] = last_distance - 1;
96     distance_cache[5] = last_distance + 1;
97     distance_cache[6] = last_distance - 2;
98     distance_cache[7] = last_distance + 2;
99     distance_cache[8] = last_distance - 3;
100     distance_cache[9] = last_distance + 3;
101     if (num_distances > 10) {
102       int next_last_distance = distance_cache[1];
103       distance_cache[10] = next_last_distance - 1;
104       distance_cache[11] = next_last_distance + 1;
105       distance_cache[12] = next_last_distance - 2;
106       distance_cache[13] = next_last_distance + 2;
107       distance_cache[14] = next_last_distance - 3;
108       distance_cache[15] = next_last_distance + 3;
109     }
110   }
111 }
112 
113 #define BROTLI_LITERAL_BYTE_SCORE 135
114 #define BROTLI_DISTANCE_BIT_PENALTY 30
115 /* Score must be positive after applying maximal penalty. */
116 #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
117 
118 /* Usually, we always choose the longest backward reference. This function
119    allows for the exception of that rule.
120 
121    If we choose a backward reference that is further away, it will
122    usually be coded with more bits. We approximate this by assuming
123    log2(distance). If the distance can be expressed in terms of the
124    last four distances, we use some heuristic constants to estimate
125    the bits cost. For the first up to four literals we use the bit
126    cost of the literals from the literal cost model, after that we
127    use the average bit cost of the cost model.
128 
129    This function is used to sometimes discard a longer backward reference
130    when it is not much longer and the bit cost for encoding it is more
131    than the saved literals.
132 
133    backward_reference_offset MUST be positive. */
BackwardReferenceScore(size_t copy_length,size_t backward_reference_offset)134 static BROTLI_INLINE score_t BackwardReferenceScore(
135     size_t copy_length, size_t backward_reference_offset) {
136   return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
137       BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
138 }
139 
BackwardReferenceScoreUsingLastDistance(size_t copy_length)140 static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
141     size_t copy_length) {
142   return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
143       BROTLI_SCORE_BASE + 15;
144 }
145 
BackwardReferencePenaltyUsingLastDistance(size_t distance_short_code)146 static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
147     size_t distance_short_code) {
148   return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
149 }
150 
TestStaticDictionaryItem(const BrotliEncoderDictionary * dictionary,size_t len,size_t word_idx,const uint8_t * data,size_t max_length,size_t max_backward,size_t max_distance,HasherSearchResult * out)151 static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
152     const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx,
153     const uint8_t* data, size_t max_length, size_t max_backward,
154     size_t max_distance, HasherSearchResult* out) {
155   size_t offset;
156   size_t matchlen;
157   size_t backward;
158   score_t score;
159   offset = dictionary->words->offsets_by_length[len] + len * word_idx;
160   if (len > max_length) {
161     return BROTLI_FALSE;
162   }
163 
164   matchlen =
165       FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
166   if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
167     return BROTLI_FALSE;
168   }
169   {
170     size_t cut = len - matchlen;
171     size_t transform_id = (cut << 2) +
172         (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
173     backward = max_backward + 1 + word_idx +
174         (transform_id << dictionary->words->size_bits_by_length[len]);
175   }
176   if (backward > max_distance) {
177     return BROTLI_FALSE;
178   }
179   score = BackwardReferenceScore(matchlen, backward);
180   if (score < out->score) {
181     return BROTLI_FALSE;
182   }
183   out->len = matchlen;
184   out->len_code_delta = (int)len - (int)matchlen;
185   out->distance = backward;
186   out->score = score;
187   return BROTLI_TRUE;
188 }
189 
SearchInStaticDictionary(const BrotliEncoderDictionary * dictionary,HasherCommon * common,const uint8_t * data,size_t max_length,size_t max_backward,size_t max_distance,HasherSearchResult * out,BROTLI_BOOL shallow)190 static BROTLI_INLINE void SearchInStaticDictionary(
191     const BrotliEncoderDictionary* dictionary,
192     HasherCommon* common, const uint8_t* data, size_t max_length,
193     size_t max_backward, size_t max_distance,
194     HasherSearchResult* out, BROTLI_BOOL shallow) {
195   size_t key;
196   size_t i;
197   if (common->dict_num_matches < (common->dict_num_lookups >> 7)) {
198     return;
199   }
200   key = Hash14(data) << 1;
201   for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
202     common->dict_num_lookups++;
203     if (dictionary->hash_table_lengths[key] != 0) {
204       BROTLI_BOOL item_matches = TestStaticDictionaryItem(
205           dictionary, dictionary->hash_table_lengths[key],
206           dictionary->hash_table_words[key], data,
207           max_length, max_backward, max_distance, out);
208       if (item_matches) {
209         common->dict_num_matches++;
210       }
211     }
212   }
213 }
214 
215 typedef struct BackwardMatch {
216   uint32_t distance;
217   uint32_t length_and_code;
218 } BackwardMatch;
219 
InitBackwardMatch(BackwardMatch * self,size_t dist,size_t len)220 static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
221     size_t dist, size_t len) {
222   self->distance = (uint32_t)dist;
223   self->length_and_code = (uint32_t)(len << 5);
224 }
225 
InitDictionaryBackwardMatch(BackwardMatch * self,size_t dist,size_t len,size_t len_code)226 static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
227     size_t dist, size_t len, size_t len_code) {
228   self->distance = (uint32_t)dist;
229   self->length_and_code =
230       (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
231 }
232 
BackwardMatchLength(const BackwardMatch * self)233 static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
234   return self->length_and_code >> 5;
235 }
236 
BackwardMatchLengthCode(const BackwardMatch * self)237 static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
238   size_t code = self->length_and_code & 31;
239   return code ? code : BackwardMatchLength(self);
240 }
241 
242 #define EXPAND_CAT(a, b) CAT(a, b)
243 #define CAT(a, b) a ## b
244 #define FN(X) EXPAND_CAT(X, HASHER())
245 
246 #define HASHER() H10
247 #define BUCKET_BITS 17
248 #define MAX_TREE_SEARCH_DEPTH 64
249 #define MAX_TREE_COMP_LENGTH 128
250 #include "hash_to_binary_tree_inc.h"  /* NOLINT(build/include) */
251 #undef MAX_TREE_SEARCH_DEPTH
252 #undef MAX_TREE_COMP_LENGTH
253 #undef BUCKET_BITS
254 #undef HASHER
255 /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
256 #define MAX_NUM_MATCHES_H10 128
257 
258 /* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression
259    a little faster (0.5% - 1%) and it compresses 0.15% better on small text
260    and HTML inputs. */
261 
262 #define HASHER() H2
263 #define BUCKET_BITS 16
264 #define BUCKET_SWEEP_BITS 0
265 #define HASH_LEN 5
266 #define USE_DICTIONARY 1
267 #include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
268 #undef BUCKET_SWEEP_BITS
269 #undef USE_DICTIONARY
270 #undef HASHER
271 
272 #define HASHER() H3
273 #define BUCKET_SWEEP_BITS 1
274 #define USE_DICTIONARY 0
275 #include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
276 #undef USE_DICTIONARY
277 #undef BUCKET_SWEEP_BITS
278 #undef BUCKET_BITS
279 #undef HASHER
280 
281 #define HASHER() H4
282 #define BUCKET_BITS 17
283 #define BUCKET_SWEEP_BITS 2
284 #define USE_DICTIONARY 1
285 #include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
286 #undef USE_DICTIONARY
287 #undef HASH_LEN
288 #undef BUCKET_SWEEP_BITS
289 #undef BUCKET_BITS
290 #undef HASHER
291 
292 #define HASHER() H5
293 #include "hash_longest_match_inc.h"  /* NOLINT(build/include) */
294 #undef HASHER
295 
296 #define HASHER() H6
297 #include "hash_longest_match64_inc.h"  /* NOLINT(build/include) */
298 #undef HASHER
299 
300 #define BUCKET_BITS 15
301 
302 #define NUM_LAST_DISTANCES_TO_CHECK 4
303 #define NUM_BANKS 1
304 #define BANK_BITS 16
305 #define HASHER() H40
306 #include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
307 #undef HASHER
308 #undef NUM_LAST_DISTANCES_TO_CHECK
309 
310 #define NUM_LAST_DISTANCES_TO_CHECK 10
311 #define HASHER() H41
312 #include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
313 #undef HASHER
314 #undef NUM_LAST_DISTANCES_TO_CHECK
315 #undef NUM_BANKS
316 #undef BANK_BITS
317 
318 #define NUM_LAST_DISTANCES_TO_CHECK 16
319 #define NUM_BANKS 512
320 #define BANK_BITS 9
321 #define HASHER() H42
322 #include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
323 #undef HASHER
324 #undef NUM_LAST_DISTANCES_TO_CHECK
325 #undef NUM_BANKS
326 #undef BANK_BITS
327 
328 #undef BUCKET_BITS
329 
330 #define HASHER() H54
331 #define BUCKET_BITS 20
332 #define BUCKET_SWEEP_BITS 2
333 #define HASH_LEN 7
334 #define USE_DICTIONARY 0
335 #include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
336 #undef USE_DICTIONARY
337 #undef HASH_LEN
338 #undef BUCKET_SWEEP_BITS
339 #undef BUCKET_BITS
340 #undef HASHER
341 
342 /* fast large window hashers */
343 
344 #define HASHER() HROLLING_FAST
345 #define CHUNKLEN 32
346 #define JUMP 4
347 #define NUMBUCKETS 16777216
348 #define MASK ((NUMBUCKETS * 64) - 1)
349 #include "hash_rolling_inc.h"  /* NOLINT(build/include) */
350 #undef JUMP
351 #undef HASHER
352 
353 
354 #define HASHER() HROLLING
355 #define JUMP 1
356 #include "hash_rolling_inc.h"  /* NOLINT(build/include) */
357 #undef MASK
358 #undef NUMBUCKETS
359 #undef JUMP
360 #undef CHUNKLEN
361 #undef HASHER
362 
363 #define HASHER() H35
364 #define HASHER_A H3
365 #define HASHER_B HROLLING_FAST
366 #include "hash_composite_inc.h"  /* NOLINT(build/include) */
367 #undef HASHER_A
368 #undef HASHER_B
369 #undef HASHER
370 
371 #define HASHER() H55
372 #define HASHER_A H54
373 #define HASHER_B HROLLING_FAST
374 #include "hash_composite_inc.h"  /* NOLINT(build/include) */
375 #undef HASHER_A
376 #undef HASHER_B
377 #undef HASHER
378 
379 #define HASHER() H65
380 #define HASHER_A H6
381 #define HASHER_B HROLLING
382 #include "hash_composite_inc.h"  /* NOLINT(build/include) */
383 #undef HASHER_A
384 #undef HASHER_B
385 #undef HASHER
386 
387 #undef FN
388 #undef CAT
389 #undef EXPAND_CAT
390 
391 #define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
392 #define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
393 #define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
394 #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
395 
396 typedef struct {
397   HasherCommon common;
398 
399   union {
400 #define MEMBER_(N) \
401     H ## N _H ## N;
402     FOR_ALL_HASHERS(MEMBER_)
403 #undef MEMBER_
404   } privat;
405 } Hasher;
406 
407 /* MUST be invoked before any other method. */
HasherInit(Hasher * hasher)408 static BROTLI_INLINE void HasherInit(Hasher* hasher) {
409   hasher->common.is_setup_ = BROTLI_FALSE;
410   hasher->common.extra[0] = NULL;
411   hasher->common.extra[1] = NULL;
412   hasher->common.extra[2] = NULL;
413   hasher->common.extra[3] = NULL;
414 }
415 
DestroyHasher(MemoryManager * m,Hasher * hasher)416 static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
417   if (hasher->common.extra[0] != NULL) BROTLI_FREE(m, hasher->common.extra[0]);
418   if (hasher->common.extra[1] != NULL) BROTLI_FREE(m, hasher->common.extra[1]);
419   if (hasher->common.extra[2] != NULL) BROTLI_FREE(m, hasher->common.extra[2]);
420   if (hasher->common.extra[3] != NULL) BROTLI_FREE(m, hasher->common.extra[3]);
421 }
422 
HasherReset(Hasher * hasher)423 static BROTLI_INLINE void HasherReset(Hasher* hasher) {
424   hasher->common.is_prepared_ = BROTLI_FALSE;
425 }
426 
HasherSize(const BrotliEncoderParams * params,BROTLI_BOOL one_shot,const size_t input_size,size_t * alloc_size)427 static BROTLI_INLINE void HasherSize(const BrotliEncoderParams* params,
428     BROTLI_BOOL one_shot, const size_t input_size, size_t* alloc_size) {
429   switch (params->hasher.type) {
430 #define SIZE_(N)                                                           \
431     case N:                                                                \
432       HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \
433       break;
434     FOR_ALL_HASHERS(SIZE_)
435 #undef SIZE_
436     default:
437       break;
438   }
439 }
440 
HasherSetup(MemoryManager * m,Hasher * hasher,BrotliEncoderParams * params,const uint8_t * data,size_t position,size_t input_size,BROTLI_BOOL is_last)441 static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
442     BrotliEncoderParams* params, const uint8_t* data, size_t position,
443     size_t input_size, BROTLI_BOOL is_last) {
444   BROTLI_BOOL one_shot = (position == 0 && is_last);
445   if (!hasher->common.is_setup_) {
446     size_t alloc_size[4] = {0};
447     size_t i;
448     ChooseHasher(params, &params->hasher);
449     hasher->common.params = params->hasher;
450     hasher->common.dict_num_lookups = 0;
451     hasher->common.dict_num_matches = 0;
452     HasherSize(params, one_shot, input_size, alloc_size);
453     for (i = 0; i < 4; ++i) {
454       if (alloc_size[i] == 0) continue;
455       hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]);
456       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return;
457     }
458     switch (hasher->common.params.type) {
459 #define INITIALIZE_(N)                        \
460       case N:                                 \
461         InitializeH ## N(&hasher->common,     \
462             &hasher->privat._H ## N, params); \
463         break;
464       FOR_ALL_HASHERS(INITIALIZE_);
465 #undef INITIALIZE_
466       default:
467         break;
468     }
469     HasherReset(hasher);
470     hasher->common.is_setup_ = BROTLI_TRUE;
471   }
472 
473   if (!hasher->common.is_prepared_) {
474     switch (hasher->common.params.type) {
475 #define PREPARE_(N)                      \
476       case N:                            \
477         PrepareH ## N(                   \
478             &hasher->privat._H ## N,     \
479             one_shot, input_size, data); \
480         break;
481       FOR_ALL_HASHERS(PREPARE_)
482 #undef PREPARE_
483       default: break;
484     }
485     hasher->common.is_prepared_ = BROTLI_TRUE;
486   }
487 }
488 
InitOrStitchToPreviousBlock(MemoryManager * m,Hasher * hasher,const uint8_t * data,size_t mask,BrotliEncoderParams * params,size_t position,size_t input_size,BROTLI_BOOL is_last)489 static BROTLI_INLINE void InitOrStitchToPreviousBlock(
490     MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask,
491     BrotliEncoderParams* params, size_t position, size_t input_size,
492     BROTLI_BOOL is_last) {
493   HasherSetup(m, hasher, params, data, position, input_size, is_last);
494   if (BROTLI_IS_OOM(m)) return;
495   switch (hasher->common.params.type) {
496 #define INIT_(N)                             \
497     case N:                                  \
498       StitchToPreviousBlockH ## N(           \
499           &hasher->privat._H ## N,           \
500           input_size, position, data, mask); \
501     break;
502     FOR_ALL_HASHERS(INIT_)
503 #undef INIT_
504     default: break;
505   }
506 }
507 
508 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
509        to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
FindCompoundDictionaryMatch(const PreparedDictionary * self,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const int * BROTLI_RESTRICT distance_cache,const size_t cur_ix,const size_t max_length,const size_t distance_offset,const size_t max_distance,HasherSearchResult * BROTLI_RESTRICT out)510 static BROTLI_INLINE void FindCompoundDictionaryMatch(
511     const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
512     const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
513     const size_t cur_ix, const size_t max_length, const size_t distance_offset,
514     const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
515   const uint32_t source_size = self->source_size;
516   const size_t boundary = distance_offset - source_size;
517   const uint32_t hash_bits = self->hash_bits;
518   const uint32_t bucket_bits = self->bucket_bits;
519   const uint32_t slot_bits = self->slot_bits;
520 
521   const uint32_t hash_shift = 64u - bucket_bits;
522   const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
523   const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
524 
525   const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
526   const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
527   const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
528   const uint8_t* source = NULL;
529 
530   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
531   score_t best_score = out->score;
532   size_t best_len = out->len;
533   size_t i;
534   const uint64_t h =
535       (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
536       kPreparedDictionaryHashMul64Long;
537   const uint32_t key = (uint32_t)(h >> hash_shift);
538   const uint32_t slot = key & slot_mask;
539   const uint32_t head = heads[key];
540   const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
541   uint32_t item = (head == 0xFFFF) ? 1 : 0;
542 
543   const void* tail = (void*)&items[self->num_items];
544   if (self->magic == kPreparedDictionaryMagic) {
545     source = (const uint8_t*)tail;
546   } else {
547     /* kLeanPreparedDictionaryMagic */
548     source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
549   }
550 
551   for (i = 0; i < 4; ++i) {
552     const size_t distance = (size_t)distance_cache[i];
553     size_t offset;
554     size_t limit;
555     size_t len;
556     if (distance <= boundary || distance > distance_offset) continue;
557     offset = distance_offset - distance;
558     limit = source_size - offset;
559     limit = limit > max_length ? max_length : limit;
560     len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
561                                    limit);
562     if (len >= 2) {
563       score_t score = BackwardReferenceScoreUsingLastDistance(len);
564       if (best_score < score) {
565         if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
566         if (best_score < score) {
567           best_score = score;
568           if (len > best_len) best_len = len;
569           out->len = len;
570           out->len_code_delta = 0;
571           out->distance = distance;
572           out->score = best_score;
573         }
574       }
575     }
576   }
577   while (item == 0) {
578     size_t offset;
579     size_t distance;
580     size_t limit;
581     item = *chain;
582     chain++;
583     offset = item & 0x7FFFFFFF;
584     item &= 0x80000000;
585     distance = distance_offset - offset;
586     limit = source_size - offset;
587     limit = (limit > max_length) ? max_length : limit;
588     if (distance > max_distance) continue;
589     if (cur_ix_masked + best_len > ring_buffer_mask ||
590         best_len >= limit ||
591         data[cur_ix_masked + best_len] != source[offset + best_len]) {
592       continue;
593     }
594     {
595       const size_t len = FindMatchLengthWithLimit(&source[offset],
596                                                   &data[cur_ix_masked],
597                                                   limit);
598       if (len >= 4) {
599         score_t score = BackwardReferenceScore(len, distance);
600         if (best_score < score) {
601           best_score = score;
602           best_len = len;
603           out->len = best_len;
604           out->len_code_delta = 0;
605           out->distance = distance;
606           out->score = best_score;
607         }
608       }
609     }
610   }
611 }
612 
613 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
614        to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
FindAllCompoundDictionaryMatches(const PreparedDictionary * self,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const size_t cur_ix,const size_t min_length,const size_t max_length,const size_t distance_offset,const size_t max_distance,BackwardMatch * matches,size_t match_limit)615 static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
616     const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
617     const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
618     const size_t max_length, const size_t distance_offset,
619     const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
620   const uint32_t source_size = self->source_size;
621   const uint32_t hash_bits = self->hash_bits;
622   const uint32_t bucket_bits = self->bucket_bits;
623   const uint32_t slot_bits = self->slot_bits;
624 
625   const uint32_t hash_shift = 64u - bucket_bits;
626   const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
627   const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
628 
629   const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
630   const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
631   const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
632   const uint8_t* source = NULL;
633 
634   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
635   size_t best_len = min_length;
636   const uint64_t h =
637       (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
638       kPreparedDictionaryHashMul64Long;
639   const uint32_t key = (uint32_t)(h >> hash_shift);
640   const uint32_t slot = key & slot_mask;
641   const uint32_t head = heads[key];
642   const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
643   uint32_t item = (head == 0xFFFF) ? 1 : 0;
644   size_t found = 0;
645 
646   const void* tail = (void*)&items[self->num_items];
647   if (self->magic == kPreparedDictionaryMagic) {
648     source = (const uint8_t*)tail;
649   } else {
650     /* kLeanPreparedDictionaryMagic */
651     source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
652   }
653 
654   while (item == 0) {
655     size_t offset;
656     size_t distance;
657     size_t limit;
658     size_t len;
659     item = *chain;
660     chain++;
661     offset = item & 0x7FFFFFFF;
662     item &= 0x80000000;
663     distance = distance_offset - offset;
664     limit = source_size - offset;
665     limit = (limit > max_length) ? max_length : limit;
666     if (distance > max_distance) continue;
667     if (cur_ix_masked + best_len > ring_buffer_mask ||
668         best_len >= limit ||
669         data[cur_ix_masked + best_len] != source[offset + best_len]) {
670       continue;
671     }
672     len = FindMatchLengthWithLimit(
673         &source[offset], &data[cur_ix_masked], limit);
674     if (len > best_len) {
675       best_len = len;
676       InitBackwardMatch(matches++, distance, len);
677       found++;
678       if (found == match_limit) break;
679     }
680   }
681   return found;
682 }
683 
LookupCompoundDictionaryMatch(const CompoundDictionary * addon,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const int * BROTLI_RESTRICT distance_cache,const size_t cur_ix,const size_t max_length,const size_t max_ring_buffer_distance,const size_t max_distance,HasherSearchResult * sr)684 static BROTLI_INLINE void LookupCompoundDictionaryMatch(
685     const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
686     const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
687     const size_t cur_ix, const size_t max_length,
688     const size_t max_ring_buffer_distance, const size_t max_distance,
689     HasherSearchResult* sr) {
690   size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
691   size_t d;
692   for (d = 0; d < addon->num_chunks; ++d) {
693     /* Only one prepared dictionary type is currently supported. */
694     FindCompoundDictionaryMatch(
695         (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
696         distance_cache, cur_ix, max_length,
697         base_offset - addon->chunk_offsets[d], max_distance, sr);
698   }
699 }
700 
LookupAllCompoundDictionaryMatches(const CompoundDictionary * addon,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const size_t cur_ix,size_t min_length,const size_t max_length,const size_t max_ring_buffer_distance,const size_t max_distance,BackwardMatch * matches,size_t match_limit)701 static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
702     const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
703     const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
704     const size_t max_length, const size_t max_ring_buffer_distance,
705     const size_t max_distance, BackwardMatch* matches,
706     size_t match_limit) {
707   size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
708   size_t d;
709   size_t total_found = 0;
710   for (d = 0; d < addon->num_chunks; ++d) {
711     /* Only one prepared dictionary type is currently supported. */
712     total_found += FindAllCompoundDictionaryMatches(
713         (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
714         cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
715         max_distance, matches + total_found, match_limit - total_found);
716     if (total_found == match_limit) break;
717     if (total_found > 0) {
718       min_length = BackwardMatchLength(&matches[total_found - 1]);
719     }
720   }
721   return total_found;
722 }
723 
724 #if defined(__cplusplus) || defined(c_plusplus)
725 }  /* extern "C" */
726 #endif
727 
728 #endif  /* BROTLI_ENC_HASH_H_ */
729