• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* NOLINT(build/header_guard) */
2 /* Copyright 2016 Google Inc. All Rights Reserved.
3 
4    Distributed under MIT license.
5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7 
8 /* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH,
9                         MAX_TREE_SEARCH_DEPTH */
10 
11 /* A (forgetful) hash table where each hash bucket contains a binary tree of
12    sequences whose first 4 bytes share the same hash code.
13    Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
14    position in the input data. The binary tree is sorted by the lexicographic
15    order of the sequences, and it is also a max-heap with respect to the
16    starting positions. */
17 
18 #define HashToBinaryTree HASHER()
19 
20 #define BUCKET_SIZE (1 << BUCKET_BITS)
21 
FN(HashTypeLength)22 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
FN(StoreLookahead)23 static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
24   return MAX_TREE_COMP_LENGTH;
25 }
26 
FN(HashBytes)27 static uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
28   uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
29   /* The higher bits contain more mixture from the multiplication,
30      so we take our results from there. */
31   return h >> (32 - BUCKET_BITS);
32 }
33 
34 typedef struct HashToBinaryTree {
35   /* The window size minus 1 */
36   size_t window_mask_;
37 
38   /* Hash table that maps the 4-byte hashes of the sequence to the last
39      position where this hash was found, which is the root of the binary
40      tree of sequences that share this hash bucket. */
41   uint32_t* buckets_;  /* uint32_t[BUCKET_SIZE]; */
42 
43   /* A position used to mark a non-existent sequence, i.e. a tree is empty if
44      its root is at invalid_pos_ and a node is a leaf if both its children
45      are at invalid_pos_. */
46   uint32_t invalid_pos_;
47 
48   /* --- Dynamic size members --- */
49 
50   /* The union of the binary trees of each hash bucket. The root of the tree
51      corresponding to a hash is a sequence starting at buckets_[hash] and
52      the left and right children of a sequence starting at pos are
53      forest_[2 * pos] and forest_[2 * pos + 1]. */
54   uint32_t* forest_;  /* uint32_t[2 * num_nodes] */
55 } HashToBinaryTree;
56 
FN(Initialize)57 static void FN(Initialize)(
58     HasherCommon* common, HashToBinaryTree* BROTLI_RESTRICT self,
59     const BrotliEncoderParams* params) {
60   self->buckets_ = (uint32_t*)common->extra[0];
61   self->forest_ = (uint32_t*)common->extra[1];
62 
63   self->window_mask_ = (1u << params->lgwin) - 1u;
64   self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
65 }
66 
FN(Prepare)67 static void FN(Prepare)
68     (HashToBinaryTree* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
69     size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
70   uint32_t invalid_pos = self->invalid_pos_;
71   uint32_t i;
72   uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
73   BROTLI_UNUSED(data);
74   BROTLI_UNUSED(one_shot);
75   BROTLI_UNUSED(input_size);
76   for (i = 0; i < BUCKET_SIZE; i++) {
77     buckets[i] = invalid_pos;
78   }
79 }
80 
FN(HashMemAllocInBytes)81 static BROTLI_INLINE void FN(HashMemAllocInBytes)(
82     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
83     size_t input_size, size_t* alloc_size) {
84   size_t num_nodes = (size_t)1 << params->lgwin;
85   if (one_shot && input_size < num_nodes) {
86     num_nodes = input_size;
87   }
88   alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE;
89   alloc_size[1] = 2 * sizeof(uint32_t) * num_nodes;
90 }
91 
FN(LeftChildIndex)92 static BROTLI_INLINE size_t FN(LeftChildIndex)(
93     HashToBinaryTree* BROTLI_RESTRICT self,
94     const size_t pos) {
95   return 2 * (pos & self->window_mask_);
96 }
97 
FN(RightChildIndex)98 static BROTLI_INLINE size_t FN(RightChildIndex)(
99     HashToBinaryTree* BROTLI_RESTRICT self,
100     const size_t pos) {
101   return 2 * (pos & self->window_mask_) + 1;
102 }
103 
104 /* Stores the hash of the next 4 bytes and in a single tree-traversal, the
105    hash bucket's binary tree is searched for matches and is re-rooted at the
106    current position.
107 
108    If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
109    current position is searched for matches, but the state of the hash table
110    is not changed, since we can not know the final sorting order of the
111    current (incomplete) sequence.
112 
113    This function must be called with increasing cur_ix positions. */
FN(StoreAndFindMatches)114 static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
115     HashToBinaryTree* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
116     const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
117     const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
118     BackwardMatch* BROTLI_RESTRICT matches) {
119   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
120   const size_t max_comp_len =
121       BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
122   const BROTLI_BOOL should_reroot_tree =
123       TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
124   const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
125   uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
126   uint32_t* BROTLI_RESTRICT forest = self->forest_;
127   size_t prev_ix = buckets[key];
128   /* The forest index of the rightmost node of the left subtree of the new
129      root, updated as we traverse and re-root the tree of the hash bucket. */
130   size_t node_left = FN(LeftChildIndex)(self, cur_ix);
131   /* The forest index of the leftmost node of the right subtree of the new
132      root, updated as we traverse and re-root the tree of the hash bucket. */
133   size_t node_right = FN(RightChildIndex)(self, cur_ix);
134   /* The match length of the rightmost node of the left subtree of the new
135      root, updated as we traverse and re-root the tree of the hash bucket. */
136   size_t best_len_left = 0;
137   /* The match length of the leftmost node of the right subtree of the new
138      root, updated as we traverse and re-root the tree of the hash bucket. */
139   size_t best_len_right = 0;
140   size_t depth_remaining;
141   if (should_reroot_tree) {
142     buckets[key] = (uint32_t)cur_ix;
143   }
144   for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
145     const size_t backward = cur_ix - prev_ix;
146     const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
147     if (backward == 0 || backward > max_backward || depth_remaining == 0) {
148       if (should_reroot_tree) {
149         forest[node_left] = self->invalid_pos_;
150         forest[node_right] = self->invalid_pos_;
151       }
152       break;
153     }
154     {
155       const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
156       size_t len;
157       BROTLI_DCHECK(cur_len <= MAX_TREE_COMP_LENGTH);
158       len = cur_len +
159           FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
160                                    &data[prev_ix_masked + cur_len],
161                                    max_length - cur_len);
162       BROTLI_DCHECK(
163           0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
164       if (matches && len > *best_len) {
165         *best_len = len;
166         InitBackwardMatch(matches++, backward, len);
167       }
168       if (len >= max_comp_len) {
169         if (should_reroot_tree) {
170           forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
171           forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
172         }
173         break;
174       }
175       if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
176         best_len_left = len;
177         if (should_reroot_tree) {
178           forest[node_left] = (uint32_t)prev_ix;
179         }
180         node_left = FN(RightChildIndex)(self, prev_ix);
181         prev_ix = forest[node_left];
182       } else {
183         best_len_right = len;
184         if (should_reroot_tree) {
185           forest[node_right] = (uint32_t)prev_ix;
186         }
187         node_right = FN(LeftChildIndex)(self, prev_ix);
188         prev_ix = forest[node_right];
189       }
190     }
191   }
192   return matches;
193 }
194 
195 /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
196    length of max_length and stores the position cur_ix in the hash table.
197 
198    Sets *num_matches to the number of matches found, and stores the found
199    matches in matches[0] to matches[*num_matches - 1]. The matches will be
200    sorted by strictly increasing length and (non-strictly) increasing
201    distance. */
FN(FindAllMatches)202 static BROTLI_INLINE size_t FN(FindAllMatches)(
203     HashToBinaryTree* BROTLI_RESTRICT self,
204     const BrotliEncoderDictionary* dictionary,
205     const uint8_t* BROTLI_RESTRICT data,
206     const size_t ring_buffer_mask, const size_t cur_ix,
207     const size_t max_length, const size_t max_backward,
208     const size_t dictionary_distance, const BrotliEncoderParams* params,
209     BackwardMatch* matches) {
210   BackwardMatch* const orig_matches = matches;
211   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
212   size_t best_len = 1;
213   const size_t short_match_max_backward =
214       params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
215   size_t stop = cur_ix - short_match_max_backward;
216   uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
217   size_t i;
218   if (cur_ix < short_match_max_backward) { stop = 0; }
219   for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
220     size_t prev_ix = i;
221     const size_t backward = cur_ix - prev_ix;
222     if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
223       break;
224     }
225     prev_ix &= ring_buffer_mask;
226     if (data[cur_ix_masked] != data[prev_ix] ||
227         data[cur_ix_masked + 1] != data[prev_ix + 1]) {
228       continue;
229     }
230     {
231       const size_t len =
232           FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
233                                    max_length);
234       if (len > best_len) {
235         best_len = len;
236         InitBackwardMatch(matches++, backward, len);
237       }
238     }
239   }
240   if (best_len < max_length) {
241     matches = FN(StoreAndFindMatches)(self, data, cur_ix,
242         ring_buffer_mask, max_length, max_backward, &best_len, matches);
243   }
244   for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
245     dict_matches[i] = kInvalidMatch;
246   }
247   {
248     size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
249     if (BrotliFindAllStaticDictionaryMatches(dictionary,
250         &data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
251       size_t maxlen = BROTLI_MIN(
252           size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
253       size_t l;
254       for (l = minlen; l <= maxlen; ++l) {
255         uint32_t dict_id = dict_matches[l];
256         if (dict_id < kInvalidMatch) {
257           size_t distance = dictionary_distance + (dict_id >> 5) + 1;
258           if (distance <= params->dist.max_distance) {
259             InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
260           }
261         }
262       }
263     }
264   }
265   return (size_t)(matches - orig_matches);
266 }
267 
268 /* Stores the hash of the next 4 bytes and re-roots the binary tree at the
269    current sequence, without returning any matches.
270    REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
FN(Store)271 static BROTLI_INLINE void FN(Store)(HashToBinaryTree* BROTLI_RESTRICT self,
272     const uint8_t* BROTLI_RESTRICT data,
273     const size_t mask, const size_t ix) {
274   /* Maximum distance is window size - 16, see section 9.1. of the spec. */
275   const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
276   FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
277       max_backward, NULL, NULL);
278 }
279 
FN(StoreRange)280 static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* BROTLI_RESTRICT self,
281     const uint8_t* BROTLI_RESTRICT data, const size_t mask,
282     const size_t ix_start, const size_t ix_end) {
283   size_t i = ix_start;
284   size_t j = ix_start;
285   if (ix_start + 63 <= ix_end) {
286     i = ix_end - 63;
287   }
288   if (ix_start + 512 <= i) {
289     for (; j < i; j += 8) {
290       FN(Store)(self, data, mask, j);
291     }
292   }
293   for (; i < ix_end; ++i) {
294     FN(Store)(self, data, mask, i);
295   }
296 }
297 
FN(StitchToPreviousBlock)298 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
299     HashToBinaryTree* BROTLI_RESTRICT self,
300     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
301     size_t ringbuffer_mask) {
302   if (num_bytes >= FN(HashTypeLength)() - 1 &&
303       position >= MAX_TREE_COMP_LENGTH) {
304     /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
305        These could not be calculated before, since they require knowledge
306        of both the previous and the current block. */
307     const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
308     const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
309     size_t i;
310     for (i = i_start; i < i_end; ++i) {
311       /* Maximum distance is window size - 16, see section 9.1. of the spec.
312          Furthermore, we have to make sure that we don't look further back
313          from the start of the next block than the window size, otherwise we
314          could access already overwritten areas of the ring-buffer. */
315       const size_t max_backward =
316           self->window_mask_ - BROTLI_MAX(size_t,
317                                           BROTLI_WINDOW_GAP - 1,
318                                           position - i);
319       /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
320          end of the current block and that we have at least
321          MAX_TREE_COMP_LENGTH tail in the ring-buffer. */
322       FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
323           MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
324     }
325   }
326 }
327 
328 #undef BUCKET_SIZE
329 
330 #undef HashToBinaryTree
331