1 /* NOLINT(build/header_guard) */
2 /* Copyright 2018 Google Inc. All Rights Reserved.
3
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7
8 /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
9 /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
10 /* JUMP = skip bytes for speedup */
11
12 /* Rolling hash for long distance long string matches. Stores one position
13 per bucket, bucket key is computed over a long region. */
14
15 #define HashRolling HASHER()
16
17 static const uint32_t FN(kRollingHashMul32) = 69069;
18 static const uint32_t FN(kInvalidPos) = 0xffffffff;
19
20 /* This hasher uses a longer forward length, but returning a higher value here
21 will hurt compression by the main hasher when combined with a composite
22 hasher. The hasher tests for forward itself instead. */
FN(HashTypeLength)23 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
FN(StoreLookahead)24 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
25
26 /* Computes a code from a single byte. A lookup table of 256 values could be
27 used, but simply adding 1 works about as good. */
FN(HashByte)28 static uint32_t FN(HashByte)(uint8_t byte) {
29 return (uint32_t)byte + 1u;
30 }
31
FN(HashRollingFunctionInitial)32 static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
33 uint32_t factor) {
34 return (uint32_t)(factor * state + FN(HashByte)(add));
35 }
36
FN(HashRollingFunction)37 static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
38 uint8_t rem, uint32_t factor,
39 uint32_t factor_remove) {
40 return (uint32_t)(factor * state +
41 FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
42 }
43
44 typedef struct HashRolling {
45 uint32_t state;
46 uint32_t* table;
47 size_t next_ix;
48
49 uint32_t chunk_len;
50 uint32_t factor;
51 uint32_t factor_remove;
52 } HashRolling;
53
FN(Self)54 static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) {
55 return (HashRolling*)&(GetHasherCommon(handle)[1]);
56 }
57
FN(Initialize)58 static void FN(Initialize)(
59 HasherHandle handle, const BrotliEncoderParams* params) {
60 HashRolling* self = FN(Self)(handle);
61 size_t i;
62 self->state = 0;
63 self->next_ix = 0;
64
65 self->factor = FN(kRollingHashMul32);
66
67 /* Compute the factor of the oldest byte to remove: factor**steps modulo
68 0xffffffff (the multiplications rely on 32-bit overflow) */
69 self->factor_remove = 1;
70 for (i = 0; i < CHUNKLEN; i += JUMP) {
71 self->factor_remove *= self->factor;
72 }
73
74 self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling));
75 for (i = 0; i < NUMBUCKETS; i++) {
76 self->table[i] = FN(kInvalidPos);
77 }
78
79 BROTLI_UNUSED(params);
80 }
81
FN(Prepare)82 static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
83 size_t input_size, const uint8_t* data) {
84 HashRolling* self = FN(Self)(handle);
85 size_t i;
86 /* Too small size, cannot use this hasher. */
87 if (input_size < CHUNKLEN) return;
88 self->state = 0;
89 for (i = 0; i < CHUNKLEN; i += JUMP) {
90 self->state = FN(HashRollingFunctionInitial)(
91 self->state, data[i], self->factor);
92 }
93 BROTLI_UNUSED(one_shot);
94 }
95
FN(HashMemAllocInBytes)96 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
97 const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
98 size_t input_size) {
99 return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t);
100 BROTLI_UNUSED(params);
101 BROTLI_UNUSED(one_shot);
102 BROTLI_UNUSED(input_size);
103 }
104
FN(Store)105 static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
106 const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
107 BROTLI_UNUSED(handle);
108 BROTLI_UNUSED(data);
109 BROTLI_UNUSED(mask);
110 BROTLI_UNUSED(ix);
111 }
112
FN(StoreRange)113 static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
114 const uint8_t* data, const size_t mask, const size_t ix_start,
115 const size_t ix_end) {
116 BROTLI_UNUSED(handle);
117 BROTLI_UNUSED(data);
118 BROTLI_UNUSED(mask);
119 BROTLI_UNUSED(ix_start);
120 BROTLI_UNUSED(ix_end);
121 }
122
FN(StitchToPreviousBlock)123 static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
124 size_t num_bytes, size_t position, const uint8_t* ringbuffer,
125 size_t ring_buffer_mask) {
126 /* In this case we must re-initialize the hasher from scratch from the
127 current position. */
128 HashRolling* self = FN(Self)(handle);
129 size_t position_masked;
130 size_t available = num_bytes;
131 if ((position & (JUMP - 1)) != 0) {
132 size_t diff = JUMP - (position & (JUMP - 1));
133 available = (diff > available) ? 0 : (available - diff);
134 position += diff;
135 }
136 position_masked = position & ring_buffer_mask;
137 /* wrapping around ringbuffer not handled. */
138 if (available > ring_buffer_mask - position_masked) {
139 available = ring_buffer_mask - position_masked;
140 }
141
142 FN(Prepare)(handle, BROTLI_FALSE, available,
143 ringbuffer + (position & ring_buffer_mask));
144 self->next_ix = position;
145 BROTLI_UNUSED(num_bytes);
146 }
147
FN(PrepareDistanceCache)148 static BROTLI_INLINE void FN(PrepareDistanceCache)(
149 HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
150 BROTLI_UNUSED(handle);
151 BROTLI_UNUSED(distance_cache);
152 }
153
FN(FindLongestMatch)154 static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
155 const BrotliEncoderDictionary* dictionary,
156 const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
157 const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
158 const size_t max_length, const size_t max_backward,
159 const size_t gap, const size_t max_distance,
160 HasherSearchResult* BROTLI_RESTRICT out) {
161 HashRolling* self = FN(Self)(handle);
162 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
163 size_t pos = self->next_ix;
164
165 if ((cur_ix & (JUMP - 1)) != 0) return;
166
167 /* Not enough lookahead */
168 if (max_length < CHUNKLEN) return;
169
170 for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
171 uint32_t code = self->state & MASK;
172
173 uint8_t rem = data[pos & ring_buffer_mask];
174 uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
175 size_t found_ix = FN(kInvalidPos);
176
177 self->state = FN(HashRollingFunction)(
178 self->state, add, rem, self->factor, self->factor_remove);
179
180 if (code < NUMBUCKETS) {
181 found_ix = self->table[code];
182 self->table[code] = (uint32_t)pos;
183 if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
184 /* The cast to 32-bit makes backward distances up to 4GB work even
185 if cur_ix is above 4GB, despite using 32-bit values in the table. */
186 size_t backward = (uint32_t)(cur_ix - found_ix);
187 if (backward <= max_backward) {
188 const size_t found_ix_masked = found_ix & ring_buffer_mask;
189 const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
190 &data[cur_ix_masked],
191 max_length);
192 if (len >= 4 && len > out->len) {
193 score_t score = BackwardReferenceScore(len, backward);
194 if (score > out->score) {
195 out->len = len;
196 out->distance = backward;
197 out->score = score;
198 out->len_code_delta = 0;
199 }
200 }
201 }
202 }
203 }
204 }
205
206 self->next_ix = cur_ix + JUMP;
207
208 /* NOTE: this hasher does not search in the dictionary. It is used as
209 backup-hasher, the main hasher already searches in it. */
210 BROTLI_UNUSED(dictionary);
211 BROTLI_UNUSED(distance_cache);
212 BROTLI_UNUSED(gap);
213 BROTLI_UNUSED(max_distance);
214 }
215
216 #undef HashRolling
217