• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* NOLINT(build/header_guard) */
2 /* Copyright 2016 Google Inc. All Rights Reserved.
3 
4    Distributed under MIT license.
5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7 
8 /* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
9                         NUM_LAST_DISTANCES_TO_CHECK */
10 
11 /* A (forgetful) hash table to the data seen by the compressor, to
12    help create backward references to previous data.
13 
14    Hashes are stored in chains which are bucketed to groups. Group of chains
15    share a storage "bank". When more than "bank size" chain nodes are added,
16    oldest nodes are replaced; this way several chains may share a tail. */
17 
18 #define HashForgetfulChain HASHER()
19 
20 #define BANK_SIZE (1 << BANK_BITS)
21 
22 /* Number of hash buckets. */
23 #define BUCKET_SIZE (1 << BUCKET_BITS)
24 
25 #define CAPPED_CHAINS 0
26 
FN(HashTypeLength)27 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
FN(StoreLookahead)28 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
29 
30 /* HashBytes is the function that chooses the bucket to place the address in.*/
FN(HashBytes)31 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
32   const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
33   /* The higher bits contain more mixture from the multiplication,
34      so we take our results from there. */
35   return h >> (32 - BUCKET_BITS);
36 }
37 
38 typedef struct FN(Slot) {
39   uint16_t delta;
40   uint16_t next;
41 } FN(Slot);
42 
43 typedef struct FN(Bank) {
44   FN(Slot) slots[BANK_SIZE];
45 } FN(Bank);
46 
47 typedef struct HashForgetfulChain {
48   uint16_t free_slot_idx[NUM_BANKS];  /* Up to 1KiB. Move to dynamic? */
49   size_t max_hops;
50 
51   /* Shortcuts. */
52   void* extra[2];
53   HasherCommon* common;
54 
55   /* --- Dynamic size members --- */
56 
57   /* uint32_t addr[BUCKET_SIZE]; */
58 
59   /* uint16_t head[BUCKET_SIZE]; */
60 
61   /* Truncated hash used for quick rejection of "distance cache" candidates. */
62   /* uint8_t tiny_hash[65536];*/
63 
64   /* FN(Bank) banks[NUM_BANKS]; */
65 } HashForgetfulChain;
66 
FN(Addr)67 static uint32_t* FN(Addr)(void* extra) {
68   return (uint32_t*)extra;
69 }
70 
FN(Head)71 static uint16_t* FN(Head)(void* extra) {
72   return (uint16_t*)(&FN(Addr)(extra)[BUCKET_SIZE]);
73 }
74 
FN(TinyHash)75 static uint8_t* FN(TinyHash)(void* extra) {
76   return (uint8_t*)(&FN(Head)(extra)[BUCKET_SIZE]);
77 }
78 
FN(Bank)79 static FN(Bank)* FN(Banks)(void* extra) {
80   return (FN(Bank)*)(extra);
81 }
82 
FN(Initialize)83 static void FN(Initialize)(
84     HasherCommon* common, HashForgetfulChain* BROTLI_RESTRICT self,
85     const BrotliEncoderParams* params) {
86   self->common = common;
87   self->extra[0] = common->extra[0];
88   self->extra[1] = common->extra[1];
89 
90   self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
91 }
92 
FN(Prepare)93 static void FN(Prepare)(
94     HashForgetfulChain* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
95     size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
96   uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
97   uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
98   uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
99   /* Partial preparation is 100 times slower (per socket). */
100   size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
101   if (one_shot && input_size <= partial_prepare_threshold) {
102     size_t i;
103     for (i = 0; i < input_size; ++i) {
104       size_t bucket = FN(HashBytes)(&data[i]);
105       /* See InitEmpty comment. */
106       addr[bucket] = 0xCCCCCCCC;
107       head[bucket] = 0xCCCC;
108     }
109   } else {
110     /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
111        processed by hasher never reaches 3GB + 64M; this makes all new chains
112        to be terminated after the first node. */
113     memset(addr, 0xCC, sizeof(uint32_t) * BUCKET_SIZE);
114     memset(head, 0, sizeof(uint16_t) * BUCKET_SIZE);
115   }
116   memset(tiny_hash, 0, sizeof(uint8_t) * 65536);
117   memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
118 }
119 
FN(HashMemAllocInBytes)120 static BROTLI_INLINE void FN(HashMemAllocInBytes)(
121     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
122     size_t input_size, size_t* alloc_size) {
123   BROTLI_UNUSED(params);
124   BROTLI_UNUSED(one_shot);
125   BROTLI_UNUSED(input_size);
126   alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE +
127                   sizeof(uint16_t) * BUCKET_SIZE + sizeof(uint8_t) * 65536;
128   alloc_size[1] = sizeof(FN(Bank)) * NUM_BANKS;
129 }
130 
131 /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
132    node to corresponding chain; also update tiny_hash for current position. */
FN(Store)133 static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
134     const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
135   uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
136   uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
137   uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
138   FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
139   const size_t key = FN(HashBytes)(&data[ix & mask]);
140   const size_t bank = key & (NUM_BANKS - 1);
141   const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
142   size_t delta = ix - addr[key];
143   tiny_hash[(uint16_t)ix] = (uint8_t)key;
144   if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
145   banks[bank].slots[idx].delta = (uint16_t)delta;
146   banks[bank].slots[idx].next = head[key];
147   addr[key] = (uint32_t)ix;
148   head[key] = (uint16_t)idx;
149 }
150 
FN(StoreRange)151 static BROTLI_INLINE void FN(StoreRange)(
152     HashForgetfulChain* BROTLI_RESTRICT self,
153     const uint8_t* BROTLI_RESTRICT data, const size_t mask,
154     const size_t ix_start, const size_t ix_end) {
155   size_t i;
156   for (i = ix_start; i < ix_end; ++i) {
157     FN(Store)(self, data, mask, i);
158   }
159 }
160 
FN(StitchToPreviousBlock)161 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
162     HashForgetfulChain* BROTLI_RESTRICT self,
163     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
164     size_t ring_buffer_mask) {
165   if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
166     /* Prepare the hashes for three last bytes of the last write.
167        These could not be calculated before, since they require knowledge
168        of both the previous and the current block. */
169     FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
170     FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
171     FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
172   }
173 }
174 
FN(PrepareDistanceCache)175 static BROTLI_INLINE void FN(PrepareDistanceCache)(
176     HashForgetfulChain* BROTLI_RESTRICT self,
177     int* BROTLI_RESTRICT distance_cache) {
178   BROTLI_UNUSED(self);
179   PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
180 }
181 
182 /* Find a longest backward match of &data[cur_ix] up to the length of
183    max_length and stores the position cur_ix in the hash table.
184 
185    REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
186              values; if this method is invoked repeatedly with the same distance
187              cache values, it is enough to invoke FN(PrepareDistanceCache) once.
188 
189    Does not look for matches longer than max_length.
190    Does not look for matches further away than max_backward.
191    Writes the best match into |out|.
192    |out|->score is updated only if a better match is found. */
FN(FindLongestMatch)193 static BROTLI_INLINE void FN(FindLongestMatch)(
194     HashForgetfulChain* BROTLI_RESTRICT self,
195     const BrotliEncoderDictionary* dictionary,
196     const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
197     const int* BROTLI_RESTRICT distance_cache,
198     const size_t cur_ix, const size_t max_length, const size_t max_backward,
199     const size_t dictionary_distance, const size_t max_distance,
200     HasherSearchResult* BROTLI_RESTRICT out) {
201   uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
202   uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
203   uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra[0]);
204   FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
205   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
206   /* Don't accept a short copy from far away. */
207   score_t min_score = out->score;
208   score_t best_score = out->score;
209   size_t best_len = out->len;
210   size_t i;
211   const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
212   const uint8_t tiny_hash = (uint8_t)(key);
213   out->len = 0;
214   out->len_code_delta = 0;
215   /* Try last distance first. */
216   for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
217     const size_t backward = (size_t)distance_cache[i];
218     size_t prev_ix = (cur_ix - backward);
219     /* For distance code 0 we want to consider 2-byte matches. */
220     if (i > 0 && tiny_hashes[(uint16_t)prev_ix] != tiny_hash) continue;
221     if (prev_ix >= cur_ix || backward > max_backward) {
222       continue;
223     }
224     prev_ix &= ring_buffer_mask;
225     {
226       const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
227                                                   &data[cur_ix_masked],
228                                                   max_length);
229       if (len >= 2) {
230         score_t score = BackwardReferenceScoreUsingLastDistance(len);
231         if (best_score < score) {
232           if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
233           if (best_score < score) {
234             best_score = score;
235             best_len = len;
236             out->len = best_len;
237             out->distance = backward;
238             out->score = best_score;
239           }
240         }
241       }
242     }
243   }
244   {
245     const size_t bank = key & (NUM_BANKS - 1);
246     size_t backward = 0;
247     size_t hops = self->max_hops;
248     size_t delta = cur_ix - addr[key];
249     size_t slot = head[key];
250     while (hops--) {
251       size_t prev_ix;
252       size_t last = slot;
253       backward += delta;
254       if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
255       prev_ix = (cur_ix - backward) & ring_buffer_mask;
256       slot = banks[bank].slots[last].next;
257       delta = banks[bank].slots[last].delta;
258       if (cur_ix_masked + best_len > ring_buffer_mask ||
259           prev_ix + best_len > ring_buffer_mask ||
260           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
261         continue;
262       }
263       {
264         const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
265                                                     &data[cur_ix_masked],
266                                                     max_length);
267         if (len >= 4) {
268           /* Comparing for >= 3 does not change the semantics, but just saves
269              for a few unnecessary binary logarithms in backward reference
270              score, since we are not interested in such short matches. */
271           score_t score = BackwardReferenceScore(len, backward);
272           if (best_score < score) {
273             best_score = score;
274             best_len = len;
275             out->len = best_len;
276             out->distance = backward;
277             out->score = best_score;
278           }
279         }
280       }
281     }
282     FN(Store)(self, data, ring_buffer_mask, cur_ix);
283   }
284   if (out->score == min_score) {
285     SearchInStaticDictionary(dictionary,
286         self->common, &data[cur_ix_masked], max_length, dictionary_distance,
287         max_distance, out, BROTLI_FALSE);
288   }
289 }
290 
291 #undef BANK_SIZE
292 #undef BUCKET_SIZE
293 #undef CAPPED_CHAINS
294 
295 #undef HashForgetfulChain
296