• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* NOLINT(build/header_guard) */
2 /* Copyright 2010 Google Inc. All Rights Reserved.
3 
4    Distributed under MIT license.
5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7 
8 /* template parameters: FN */
9 
10 /* A (forgetful) hash table to the data seen by the compressor, to
11    help create backward references to previous data.
12 
13    This is a hash map of fixed size (bucket_size_) to a ring buffer of
14    fixed size (block_size_). The ring buffer contains the last block_size_
15    index positions of the given hash key in the compressed data. */
16 
17 #define HashLongestMatch HASHER()
18 
FN(HashTypeLength)19 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
FN(StoreLookahead)20 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
21 
22 /* HashBytes is the function that chooses the bucket to place the address in. */
FN(HashBytes)23 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
24                                           uint64_t hash_mul) {
25   const uint64_t h = BROTLI_UNALIGNED_LOAD64LE(data) * hash_mul;
26   /* The higher bits contain more mixture from the multiplication,
27      so we take our results from there. */
28   return (size_t)(h >> (64 - 15));
29 }
30 
31 typedef struct HashLongestMatch {
32   /* Number of hash buckets. */
33   size_t bucket_size_;
34   /* Only block_size_ newest backward references are kept,
35      and the older are forgotten. */
36   size_t block_size_;
37   /* Hash multiplier tuned to match length. */
38   uint64_t hash_mul_;
39   /* Mask for accessing entries in a block (in a ring-buffer manner). */
40   uint32_t block_mask_;
41 
42   int block_bits_;
43   int num_last_distances_to_check_;
44 
45   /* Shortcuts. */
46   HasherCommon* common_;
47 
48   /* --- Dynamic size members --- */
49 
50   /* Number of entries in a particular bucket. */
51   uint16_t* num_;  /* uint16_t[bucket_size]; */
52 
53   /* Buckets containing block_size_ of backward references. */
54   uint32_t* buckets_;  /* uint32_t[bucket_size * block_size]; */
55 } HashLongestMatch;
56 
FN(Initialize)57 static void FN(Initialize)(
58     HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
59     const BrotliEncoderParams* params) {
60   self->common_ = common;
61 
62   BROTLI_UNUSED(params);
63   self->hash_mul_ = kHashMul64 << (64 - 5 * 8);
64   BROTLI_DCHECK(common->params.bucket_bits == 15);
65   self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
66   self->block_bits_ = common->params.block_bits;
67   self->block_size_ = (size_t)1 << common->params.block_bits;
68   self->block_mask_ = (uint32_t)(self->block_size_ - 1);
69   self->num_last_distances_to_check_ =
70       common->params.num_last_distances_to_check;
71   self->num_ = (uint16_t*)common->extra[0];
72   self->buckets_ = (uint32_t*)common->extra[1];
73 }
74 
FN(Prepare)75 static void FN(Prepare)(
76     HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
77     size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
78   uint16_t* BROTLI_RESTRICT num = self->num_;
79   /* Partial preparation is 100 times slower (per socket). */
80   size_t partial_prepare_threshold = self->bucket_size_ >> 6;
81   if (one_shot && input_size <= partial_prepare_threshold) {
82     size_t i;
83     for (i = 0; i < input_size; ++i) {
84       const size_t key = FN(HashBytes)(&data[i], self->hash_mul_);
85       num[key] = 0;
86     }
87   } else {
88     memset(num, 0, self->bucket_size_ * sizeof(num[0]));
89   }
90 }
91 
FN(HashMemAllocInBytes)92 static BROTLI_INLINE void FN(HashMemAllocInBytes)(
93     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
94     size_t input_size, size_t* alloc_size) {
95   size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
96   size_t block_size = (size_t)1 << params->hasher.block_bits;
97   BROTLI_UNUSED(one_shot);
98   BROTLI_UNUSED(input_size);
99   alloc_size[0] = sizeof(uint16_t) * bucket_size;
100   alloc_size[1] = sizeof(uint32_t) * bucket_size * block_size;
101 }
102 
103 /* Look at 4 bytes at &data[ix & mask].
104    Compute a hash from these, and store the value of ix at that position. */
FN(Store)105 static BROTLI_INLINE void FN(Store)(
106     HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
107     const size_t mask, const size_t ix) {
108   uint16_t* BROTLI_RESTRICT num = self->num_;
109   uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
110   const size_t key = FN(HashBytes)(&data[ix & mask], self->hash_mul_);
111   const size_t minor_ix = num[key] & self->block_mask_;
112   const size_t offset = minor_ix + (key << self->block_bits_);
113   ++num[key];
114   buckets[offset] = (uint32_t)ix;
115 }
116 
FN(StoreRange)117 static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
118     const uint8_t* BROTLI_RESTRICT data, const size_t mask,
119     const size_t ix_start, const size_t ix_end) {
120   size_t i;
121   for (i = ix_start; i < ix_end; ++i) {
122     FN(Store)(self, data, mask, i);
123   }
124 }
125 
FN(StitchToPreviousBlock)126 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
127     HashLongestMatch* BROTLI_RESTRICT self,
128     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
129     size_t ringbuffer_mask) {
130   if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
131     /* Prepare the hashes for three last bytes of the last write.
132        These could not be calculated before, since they require knowledge
133        of both the previous and the current block. */
134     FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
135     FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
136     FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
137   }
138 }
139 
FN(PrepareDistanceCache)140 static BROTLI_INLINE void FN(PrepareDistanceCache)(
141     HashLongestMatch* BROTLI_RESTRICT self,
142     int* BROTLI_RESTRICT distance_cache) {
143   PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
144 }
145 
146 /* Find a longest backward match of &data[cur_ix] up to the length of
147    max_length and stores the position cur_ix in the hash table.
148 
149    REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
150              values; if this method is invoked repeatedly with the same distance
151              cache values, it is enough to invoke FN(PrepareDistanceCache) once.
152 
153    Does not look for matches longer than max_length.
154    Does not look for matches further away than max_backward.
155    Writes the best match into |out|.
156    |out|->score is updated only if a better match is found. */
FN(FindLongestMatch)157 static BROTLI_INLINE void FN(FindLongestMatch)(
158     HashLongestMatch* BROTLI_RESTRICT self,
159     const BrotliEncoderDictionary* dictionary,
160     const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
161     const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
162     const size_t max_length, const size_t max_backward,
163     const size_t dictionary_distance, const size_t max_distance,
164     HasherSearchResult* BROTLI_RESTRICT out) {
165   uint16_t* BROTLI_RESTRICT num = self->num_;
166   uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
167   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
168   /* Don't accept a short copy from far away. */
169   score_t min_score = out->score;
170   score_t best_score = out->score;
171   size_t best_len = out->len;
172   size_t i;
173   out->len = 0;
174   out->len_code_delta = 0;
175   /* Try last distance first. */
176   for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
177     const size_t backward = (size_t)distance_cache[i];
178     size_t prev_ix = (size_t)(cur_ix - backward);
179     if (prev_ix >= cur_ix) {
180       continue;
181     }
182     if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
183       continue;
184     }
185     prev_ix &= ring_buffer_mask;
186 
187     if (cur_ix_masked + best_len > ring_buffer_mask ||
188         prev_ix + best_len > ring_buffer_mask ||
189         data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
190       continue;
191     }
192     {
193       const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
194                                                   &data[cur_ix_masked],
195                                                   max_length);
196       if (len >= 3 || (len == 2 && i < 2)) {
197         /* Comparing for >= 2 does not change the semantics, but just saves for
198            a few unnecessary binary logarithms in backward reference score,
199            since we are not interested in such short matches. */
200         score_t score = BackwardReferenceScoreUsingLastDistance(len);
201         if (best_score < score) {
202           if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
203           if (best_score < score) {
204             best_score = score;
205             best_len = len;
206             out->len = best_len;
207             out->distance = backward;
208             out->score = best_score;
209           }
210         }
211       }
212     }
213   }
214   {
215     const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
216     uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
217     const size_t down =
218         (num[key] > self->block_size_) ?
219         (num[key] - self->block_size_) : 0u;
220     const uint32_t first4 = BrotliUnalignedRead32(data + cur_ix_masked);
221     const size_t max_length_m4 = max_length - 4;
222     i = num[key];
223     for (; i > down;) {
224       size_t prev_ix = bucket[--i & self->block_mask_];
225       uint32_t current4;
226       const size_t backward = cur_ix - prev_ix;
227       if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
228         break;
229       }
230       prev_ix &= ring_buffer_mask;
231       if (cur_ix_masked + best_len > ring_buffer_mask ||
232           prev_ix + best_len > ring_buffer_mask ||
233           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
234         continue;
235       }
236       current4 = BrotliUnalignedRead32(data + prev_ix);
237       if (first4 != current4) continue;
238       {
239         const size_t len = FindMatchLengthWithLimit(&data[prev_ix + 4],
240                                                     &data[cur_ix_masked + 4],
241                                                     max_length_m4) + 4;
242         const score_t score = BackwardReferenceScore(len, backward);
243         if (best_score < score) {
244           best_score = score;
245           best_len = len;
246           out->len = best_len;
247           out->distance = backward;
248           out->score = best_score;
249         }
250       }
251     }
252     bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
253     ++num[key];
254   }
255   if (min_score == out->score) {
256     SearchInStaticDictionary(dictionary,
257         self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
258         max_distance, out, BROTLI_FALSE);
259   }
260 }
261 
262 #undef HashLongestMatch
263