1 /* NOLINT(build/header_guard) */
2 /* Copyright 2010 Google Inc. All Rights Reserved.
3
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7
8 /* template parameters: FN */
9
10 /* A (forgetful) hash table to the data seen by the compressor, to
11 help create backward references to previous data.
12
13 This is a hash map of fixed size (bucket_size_) to a ring buffer of
14 fixed size (block_size_). The ring buffer contains the last block_size_
15 index positions of the given hash key in the compressed data. */
16
17 #define HashLongestMatch HASHER()
18
FN(HashTypeLength)19 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
FN(StoreLookahead)20 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
21
22 /* HashBytes is the function that chooses the bucket to place the address in. */
FN(HashBytes)23 static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
24 const uint64_t mask,
25 const int shift) {
26 const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
27 /* The higher bits contain more mixture from the multiplication,
28 so we take our results from there. */
29 return (uint32_t)(h >> shift);
30 }
31
32 typedef struct HashLongestMatch {
33 /* Number of hash buckets. */
34 size_t bucket_size_;
35 /* Only block_size_ newest backward references are kept,
36 and the older are forgotten. */
37 size_t block_size_;
38 /* Left-shift for computing hash bucket index from hash value. */
39 int hash_shift_;
40 /* Mask for selecting the next 4-8 bytes of input */
41 uint64_t hash_mask_;
42 /* Mask for accessing entries in a block (in a ring-buffer manner). */
43 uint32_t block_mask_;
44
45 int block_bits_;
46 int num_last_distances_to_check_;
47
48 /* Shortcuts. */
49 HasherCommon* common_;
50
51 /* --- Dynamic size members --- */
52
53 /* Number of entries in a particular bucket. */
54 uint16_t* num_; /* uint16_t[bucket_size]; */
55
56 /* Buckets containing block_size_ of backward references. */
57 uint32_t* buckets_; /* uint32_t[bucket_size * block_size]; */
58 } HashLongestMatch;
59
FN(Initialize)60 static void FN(Initialize)(
61 HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
62 const BrotliEncoderParams* params) {
63 self->common_ = common;
64
65 BROTLI_UNUSED(params);
66 self->hash_shift_ = 64 - common->params.bucket_bits;
67 self->hash_mask_ = (~((uint64_t)0U)) >> (64 - 8 * common->params.hash_len);
68 self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
69 self->block_bits_ = common->params.block_bits;
70 self->block_size_ = (size_t)1 << common->params.block_bits;
71 self->block_mask_ = (uint32_t)(self->block_size_ - 1);
72 self->num_last_distances_to_check_ =
73 common->params.num_last_distances_to_check;
74 self->num_ = (uint16_t*)common->extra;
75 self->buckets_ = (uint32_t*)&self->num_[self->bucket_size_];
76 }
77
FN(Prepare)78 static void FN(Prepare)(
79 HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
80 size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
81 uint16_t* BROTLI_RESTRICT num = self->num_;
82 /* Partial preparation is 100 times slower (per socket). */
83 size_t partial_prepare_threshold = self->bucket_size_ >> 6;
84 if (one_shot && input_size <= partial_prepare_threshold) {
85 size_t i;
86 for (i = 0; i < input_size; ++i) {
87 const uint32_t key = FN(HashBytes)(&data[i], self->hash_mask_,
88 self->hash_shift_);
89 num[key] = 0;
90 }
91 } else {
92 memset(num, 0, self->bucket_size_ * sizeof(num[0]));
93 }
94 }
95
FN(HashMemAllocInBytes)96 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
97 const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
98 size_t input_size) {
99 size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
100 size_t block_size = (size_t)1 << params->hasher.block_bits;
101 BROTLI_UNUSED(one_shot);
102 BROTLI_UNUSED(input_size);
103 return sizeof(uint16_t) * bucket_size +
104 sizeof(uint32_t) * bucket_size * block_size;
105 }
106
107 /* Look at 4 bytes at &data[ix & mask].
108 Compute a hash from these, and store the value of ix at that position. */
FN(Store)109 static BROTLI_INLINE void FN(Store)(
110 HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
111 const size_t mask, const size_t ix) {
112 uint16_t* BROTLI_RESTRICT num = self->num_;
113 uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
114 const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_mask_,
115 self->hash_shift_);
116 const size_t minor_ix = num[key] & self->block_mask_;
117 const size_t offset = minor_ix + (key << self->block_bits_);
118 ++num[key];
119 buckets[offset] = (uint32_t)ix;
120 }
121
FN(StoreRange)122 static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
123 const uint8_t* BROTLI_RESTRICT data, const size_t mask,
124 const size_t ix_start, const size_t ix_end) {
125 size_t i;
126 for (i = ix_start; i < ix_end; ++i) {
127 FN(Store)(self, data, mask, i);
128 }
129 }
130
FN(StitchToPreviousBlock)131 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
132 HashLongestMatch* BROTLI_RESTRICT self,
133 size_t num_bytes, size_t position, const uint8_t* ringbuffer,
134 size_t ringbuffer_mask) {
135 if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
136 /* Prepare the hashes for three last bytes of the last write.
137 These could not be calculated before, since they require knowledge
138 of both the previous and the current block. */
139 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
140 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
141 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
142 }
143 }
144
FN(PrepareDistanceCache)145 static BROTLI_INLINE void FN(PrepareDistanceCache)(
146 HashLongestMatch* BROTLI_RESTRICT self,
147 int* BROTLI_RESTRICT distance_cache) {
148 PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
149 }
150
151 /* Find a longest backward match of &data[cur_ix] up to the length of
152 max_length and stores the position cur_ix in the hash table.
153
154 REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
155 values; if this method is invoked repeatedly with the same distance
156 cache values, it is enough to invoke FN(PrepareDistanceCache) once.
157
158 Does not look for matches longer than max_length.
159 Does not look for matches further away than max_backward.
160 Writes the best match into |out|.
161 |out|->score is updated only if a better match is found. */
FN(FindLongestMatch)162 static BROTLI_INLINE void FN(FindLongestMatch)(
163 HashLongestMatch* BROTLI_RESTRICT self,
164 const BrotliEncoderDictionary* dictionary,
165 const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
166 const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
167 const size_t max_length, const size_t max_backward,
168 const size_t dictionary_distance, const size_t max_distance,
169 HasherSearchResult* BROTLI_RESTRICT out) {
170 uint16_t* BROTLI_RESTRICT num = self->num_;
171 uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
172 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
173 /* Don't accept a short copy from far away. */
174 score_t min_score = out->score;
175 score_t best_score = out->score;
176 size_t best_len = out->len;
177 size_t i;
178 out->len = 0;
179 out->len_code_delta = 0;
180 /* Try last distance first. */
181 for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
182 const size_t backward = (size_t)distance_cache[i];
183 size_t prev_ix = (size_t)(cur_ix - backward);
184 if (prev_ix >= cur_ix) {
185 continue;
186 }
187 if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
188 continue;
189 }
190 prev_ix &= ring_buffer_mask;
191
192 if (cur_ix_masked + best_len > ring_buffer_mask ||
193 prev_ix + best_len > ring_buffer_mask ||
194 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
195 continue;
196 }
197 {
198 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
199 &data[cur_ix_masked],
200 max_length);
201 if (len >= 3 || (len == 2 && i < 2)) {
202 /* Comparing for >= 2 does not change the semantics, but just saves for
203 a few unnecessary binary logarithms in backward reference score,
204 since we are not interested in such short matches. */
205 score_t score = BackwardReferenceScoreUsingLastDistance(len);
206 if (best_score < score) {
207 if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
208 if (best_score < score) {
209 best_score = score;
210 best_len = len;
211 out->len = best_len;
212 out->distance = backward;
213 out->score = best_score;
214 }
215 }
216 }
217 }
218 }
219 {
220 const uint32_t key = FN(HashBytes)(
221 &data[cur_ix_masked], self->hash_mask_, self->hash_shift_);
222 uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
223 const size_t down =
224 (num[key] > self->block_size_) ?
225 (num[key] - self->block_size_) : 0u;
226 for (i = num[key]; i > down;) {
227 size_t prev_ix = bucket[--i & self->block_mask_];
228 const size_t backward = cur_ix - prev_ix;
229 if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
230 break;
231 }
232 prev_ix &= ring_buffer_mask;
233 if (cur_ix_masked + best_len > ring_buffer_mask ||
234 prev_ix + best_len > ring_buffer_mask ||
235 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
236 continue;
237 }
238 {
239 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
240 &data[cur_ix_masked],
241 max_length);
242 if (len >= 4) {
243 /* Comparing for >= 3 does not change the semantics, but just saves
244 for a few unnecessary binary logarithms in backward reference
245 score, since we are not interested in such short matches. */
246 score_t score = BackwardReferenceScore(len, backward);
247 if (best_score < score) {
248 best_score = score;
249 best_len = len;
250 out->len = best_len;
251 out->distance = backward;
252 out->score = best_score;
253 }
254 }
255 }
256 }
257 bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
258 ++num[key];
259 }
260 if (min_score == out->score) {
261 SearchInStaticDictionary(dictionary,
262 self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
263 max_distance, out, BROTLI_FALSE);
264 }
265 }
266
267 #undef HashLongestMatch
268