1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 #include "net/extras/preload_data/decoder.h"
11 #include "base/check_op.h"
12 #include "base/notreached.h"
13
14 namespace net::extras {
15
BitReader(const uint8_t * bytes,size_t num_bits)16 PreloadDecoder::BitReader::BitReader(const uint8_t* bytes, size_t num_bits)
17 : bytes_(bytes), num_bits_(num_bits), num_bytes_((num_bits + 7) / 8) {}
18
19 // Next sets |*out| to the next bit from the input. It returns false if no
20 // more bits are available or true otherwise.
Next(bool * out)21 bool PreloadDecoder::BitReader::Next(bool* out) {
22 if (num_bits_used_ == 8) {
23 if (current_byte_index_ >= num_bytes_) {
24 return false;
25 }
26 current_byte_ = bytes_[current_byte_index_++];
27 num_bits_used_ = 0;
28 }
29
30 *out = 1 & (current_byte_ >> (7 - num_bits_used_));
31 num_bits_used_++;
32 return true;
33 }
34
35 // Read sets the |num_bits| least-significant bits of |*out| to the value of
36 // the next |num_bits| bits from the input. It returns false if there are
37 // insufficient bits in the input or true otherwise.
Read(unsigned num_bits,uint32_t * out)38 bool PreloadDecoder::BitReader::Read(unsigned num_bits, uint32_t* out) {
39 DCHECK_LE(num_bits, 32u);
40
41 uint32_t ret = 0;
42 for (unsigned i = 0; i < num_bits; ++i) {
43 bool bit;
44 if (!Next(&bit)) {
45 return false;
46 }
47 ret |= static_cast<uint32_t>(bit) << (num_bits - 1 - i);
48 }
49
50 *out = ret;
51 return true;
52 }
53
54 namespace {
55
56 // Reads one bit from |reader|, shifts |*bits| left by 1, and adds the read bit
57 // to the end of |*bits|.
ReadBit(PreloadDecoder::BitReader * reader,uint8_t * bits)58 bool ReadBit(PreloadDecoder::BitReader* reader, uint8_t* bits) {
59 bool bit;
60 if (!reader->Next(&bit)) {
61 return false;
62 }
63 *bits <<= 1;
64 if (bit) {
65 (*bits)++;
66 }
67 return true;
68 }
69
70 } // namespace
71
DecodeSize(size_t * out)72 bool PreloadDecoder::BitReader::DecodeSize(size_t* out) {
73 uint8_t bits = 0;
74 if (!ReadBit(this, &bits) || !ReadBit(this, &bits)) {
75 return false;
76 }
77 if (bits == 0) {
78 *out = 0;
79 return true;
80 }
81 if (!ReadBit(this, &bits)) {
82 return false;
83 }
84 // We've parsed 3 bits so far. Check all possible combinations:
85 bool is_even;
86 switch (bits) {
87 case 0b000:
88 case 0b001:
89 // This should have been handled in the if (bits == 0) check.
90 NOTREACHED();
91 case 0b010:
92 // A specialization of the 0b01 prefix for unary-like even numbers.
93 *out = 4;
94 return true;
95 case 0b011:
96 // This will be handled with the prefixes for unary-like encoding below.
97 is_even = true;
98 break;
99 case 0b100:
100 *out = 1;
101 return true;
102 case 0b101:
103 *out = 2;
104 return true;
105 case 0b110:
106 *out = 3;
107 return true;
108 case 0b111:
109 // This will be handled with the prefixes for unary-like encoding below.
110 is_even = false;
111 break;
112 default:
113 // All cases should be covered above.
114 NOTREACHED();
115 }
116 size_t bit_length = 3;
117 while (true) {
118 bit_length++;
119 bool bit;
120 if (!Next(&bit)) {
121 return false;
122 }
123 if (!bit) {
124 break;
125 }
126 }
127 size_t ret = (bit_length - 2) * 2;
128 if (!is_even) {
129 ret--;
130 }
131 *out = ret;
132 return true;
133 }
134
135 // Seek sets the current offest in the input to bit number |offset|. It
136 // returns true if |offset| is within the range of the input and false
137 // otherwise.
Seek(size_t offset)138 bool PreloadDecoder::BitReader::Seek(size_t offset) {
139 if (offset >= num_bits_) {
140 return false;
141 }
142 current_byte_index_ = offset / 8;
143 current_byte_ = bytes_[current_byte_index_++];
144 num_bits_used_ = offset % 8;
145 return true;
146 }
147
HuffmanDecoder(const uint8_t * tree,size_t tree_bytes)148 PreloadDecoder::HuffmanDecoder::HuffmanDecoder(const uint8_t* tree,
149 size_t tree_bytes)
150 : tree_(tree), tree_bytes_(tree_bytes) {}
151
Decode(PreloadDecoder::BitReader * reader,char * out) const152 bool PreloadDecoder::HuffmanDecoder::Decode(PreloadDecoder::BitReader* reader,
153 char* out) const {
154 const uint8_t* current = &tree_[tree_bytes_ - 2];
155
156 for (;;) {
157 bool bit;
158 if (!reader->Next(&bit)) {
159 return false;
160 }
161
162 uint8_t b = current[bit];
163 if (b & 0x80) {
164 *out = static_cast<char>(b & 0x7f);
165 return true;
166 }
167
168 unsigned offset = static_cast<unsigned>(b) * 2;
169 DCHECK_LT(offset, tree_bytes_);
170 if (offset >= tree_bytes_) {
171 return false;
172 }
173
174 current = &tree_[offset];
175 }
176 }
177
PreloadDecoder(const uint8_t * huffman_tree,size_t huffman_tree_size,const uint8_t * trie,size_t trie_bits,size_t trie_root_position)178 PreloadDecoder::PreloadDecoder(const uint8_t* huffman_tree,
179 size_t huffman_tree_size,
180 const uint8_t* trie,
181 size_t trie_bits,
182 size_t trie_root_position)
183 : huffman_decoder_(huffman_tree, huffman_tree_size),
184 bit_reader_(trie, trie_bits),
185 trie_root_position_(trie_root_position) {}
186
187 PreloadDecoder::~PreloadDecoder() = default;
188
Decode(const std::string & search,bool * out_found)189 bool PreloadDecoder::Decode(const std::string& search, bool* out_found) {
190 size_t bit_offset = trie_root_position_;
191 *out_found = false;
192
193 // current_search_offset contains one more than the index of the current
194 // character in the search keyword that is being considered. It's one greater
195 // so that we can represent the position just before the beginning (with
196 // zero).
197 size_t current_search_offset = search.size();
198
199 for (;;) {
200 // Seek to the desired location.
201 if (!bit_reader_.Seek(bit_offset)) {
202 return false;
203 }
204
205 // Decode the length of the common prefix.
206 size_t prefix_length;
207 if (!bit_reader_.DecodeSize(&prefix_length)) {
208 return false;
209 }
210
211 // Match each character in the prefix.
212 for (size_t i = 0; i < prefix_length; ++i) {
213 if (current_search_offset == 0) {
214 // We can't match the terminator with a prefix string.
215 return true;
216 }
217
218 char c;
219 if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
220 return false;
221 }
222 if (search[current_search_offset - 1] != c) {
223 return true;
224 }
225 current_search_offset--;
226 }
227
228 bool is_first_offset = true;
229 size_t current_offset = 0;
230
231 // Next is the dispatch table.
232 for (;;) {
233 char c;
234 if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
235 return false;
236 }
237 if (c == kEndOfTable) {
238 // No exact match.
239 return true;
240 }
241
242 if (c == kEndOfString) {
243 if (!ReadEntry(&bit_reader_, search, current_search_offset,
244 out_found)) {
245 return false;
246 }
247 if (current_search_offset == 0) {
248 CHECK(*out_found);
249 return true;
250 }
251 continue;
252 }
253
254 // The entries in a dispatch table are in order thus we can tell if there
255 // will be no match if the current character past the one that we want.
256 if (current_search_offset == 0 || search[current_search_offset - 1] < c) {
257 return true;
258 }
259
260 if (is_first_offset) {
261 // The first offset is backwards from the current position.
262 uint32_t jump_delta_bits;
263 uint32_t jump_delta;
264 if (!bit_reader_.Read(5, &jump_delta_bits) ||
265 !bit_reader_.Read(jump_delta_bits, &jump_delta)) {
266 return false;
267 }
268
269 if (bit_offset < jump_delta) {
270 return false;
271 }
272
273 current_offset = bit_offset - jump_delta;
274 is_first_offset = false;
275 } else {
276 // Subsequent offsets are forward from the target of the first offset.
277 uint32_t is_long_jump;
278 if (!bit_reader_.Read(1, &is_long_jump)) {
279 return false;
280 }
281
282 uint32_t jump_delta;
283 if (!is_long_jump) {
284 if (!bit_reader_.Read(7, &jump_delta)) {
285 return false;
286 }
287 } else {
288 uint32_t jump_delta_bits;
289 if (!bit_reader_.Read(4, &jump_delta_bits) ||
290 !bit_reader_.Read(jump_delta_bits + 8, &jump_delta)) {
291 return false;
292 }
293 }
294
295 current_offset += jump_delta;
296 if (current_offset >= bit_offset) {
297 return false;
298 }
299 }
300
301 DCHECK_LT(0u, current_search_offset);
302 if (search[current_search_offset - 1] == c) {
303 bit_offset = current_offset;
304 current_search_offset--;
305 break;
306 }
307 }
308 }
309 NOTREACHED();
310 }
311
312 } // namespace net::extras
313