• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "net/extras/preload_data/decoder.h"
11 #include "base/check_op.h"
12 #include "base/notreached.h"
13 
14 namespace net::extras {
15 
BitReader(const uint8_t * bytes,size_t num_bits)16 PreloadDecoder::BitReader::BitReader(const uint8_t* bytes, size_t num_bits)
17     : bytes_(bytes), num_bits_(num_bits), num_bytes_((num_bits + 7) / 8) {}
18 
19 // Next sets |*out| to the next bit from the input. It returns false if no
20 // more bits are available or true otherwise.
Next(bool * out)21 bool PreloadDecoder::BitReader::Next(bool* out) {
22   if (num_bits_used_ == 8) {
23     if (current_byte_index_ >= num_bytes_) {
24       return false;
25     }
26     current_byte_ = bytes_[current_byte_index_++];
27     num_bits_used_ = 0;
28   }
29 
30   *out = 1 & (current_byte_ >> (7 - num_bits_used_));
31   num_bits_used_++;
32   return true;
33 }
34 
35 // Read sets the |num_bits| least-significant bits of |*out| to the value of
36 // the next |num_bits| bits from the input. It returns false if there are
37 // insufficient bits in the input or true otherwise.
Read(unsigned num_bits,uint32_t * out)38 bool PreloadDecoder::BitReader::Read(unsigned num_bits, uint32_t* out) {
39   DCHECK_LE(num_bits, 32u);
40 
41   uint32_t ret = 0;
42   for (unsigned i = 0; i < num_bits; ++i) {
43     bool bit;
44     if (!Next(&bit)) {
45       return false;
46     }
47     ret |= static_cast<uint32_t>(bit) << (num_bits - 1 - i);
48   }
49 
50   *out = ret;
51   return true;
52 }
53 
54 namespace {
55 
56 // Reads one bit from |reader|, shifts |*bits| left by 1, and adds the read bit
57 // to the end of |*bits|.
ReadBit(PreloadDecoder::BitReader * reader,uint8_t * bits)58 bool ReadBit(PreloadDecoder::BitReader* reader, uint8_t* bits) {
59   bool bit;
60   if (!reader->Next(&bit)) {
61     return false;
62   }
63   *bits <<= 1;
64   if (bit) {
65     (*bits)++;
66   }
67   return true;
68 }
69 
70 }  // namespace
71 
DecodeSize(size_t * out)72 bool PreloadDecoder::BitReader::DecodeSize(size_t* out) {
73   uint8_t bits = 0;
74   if (!ReadBit(this, &bits) || !ReadBit(this, &bits)) {
75     return false;
76   }
77   if (bits == 0) {
78     *out = 0;
79     return true;
80   }
81   if (!ReadBit(this, &bits)) {
82     return false;
83   }
84   // We've parsed 3 bits so far. Check all possible combinations:
85   bool is_even;
86   switch (bits) {
87     case 0b000:
88     case 0b001:
89       // This should have been handled in the if (bits == 0) check.
90       NOTREACHED();
91     case 0b010:
92       // A specialization of the 0b01 prefix for unary-like even numbers.
93       *out = 4;
94       return true;
95     case 0b011:
96       // This will be handled with the prefixes for unary-like encoding below.
97       is_even = true;
98       break;
99     case 0b100:
100       *out = 1;
101       return true;
102     case 0b101:
103       *out = 2;
104       return true;
105     case 0b110:
106       *out = 3;
107       return true;
108     case 0b111:
109       // This will be handled with the prefixes for unary-like encoding below.
110       is_even = false;
111       break;
112     default:
113       // All cases should be covered above.
114       NOTREACHED();
115   }
116   size_t bit_length = 3;
117   while (true) {
118     bit_length++;
119     bool bit;
120     if (!Next(&bit)) {
121       return false;
122     }
123     if (!bit) {
124       break;
125     }
126   }
127   size_t ret = (bit_length - 2) * 2;
128   if (!is_even) {
129     ret--;
130   }
131   *out = ret;
132   return true;
133 }
134 
135 // Seek sets the current offest in the input to bit number |offset|. It
136 // returns true if |offset| is within the range of the input and false
137 // otherwise.
Seek(size_t offset)138 bool PreloadDecoder::BitReader::Seek(size_t offset) {
139   if (offset >= num_bits_) {
140     return false;
141   }
142   current_byte_index_ = offset / 8;
143   current_byte_ = bytes_[current_byte_index_++];
144   num_bits_used_ = offset % 8;
145   return true;
146 }
147 
HuffmanDecoder(const uint8_t * tree,size_t tree_bytes)148 PreloadDecoder::HuffmanDecoder::HuffmanDecoder(const uint8_t* tree,
149                                                size_t tree_bytes)
150     : tree_(tree), tree_bytes_(tree_bytes) {}
151 
Decode(PreloadDecoder::BitReader * reader,char * out) const152 bool PreloadDecoder::HuffmanDecoder::Decode(PreloadDecoder::BitReader* reader,
153                                             char* out) const {
154   const uint8_t* current = &tree_[tree_bytes_ - 2];
155 
156   for (;;) {
157     bool bit;
158     if (!reader->Next(&bit)) {
159       return false;
160     }
161 
162     uint8_t b = current[bit];
163     if (b & 0x80) {
164       *out = static_cast<char>(b & 0x7f);
165       return true;
166     }
167 
168     unsigned offset = static_cast<unsigned>(b) * 2;
169     DCHECK_LT(offset, tree_bytes_);
170     if (offset >= tree_bytes_) {
171       return false;
172     }
173 
174     current = &tree_[offset];
175   }
176 }
177 
PreloadDecoder(const uint8_t * huffman_tree,size_t huffman_tree_size,const uint8_t * trie,size_t trie_bits,size_t trie_root_position)178 PreloadDecoder::PreloadDecoder(const uint8_t* huffman_tree,
179                                size_t huffman_tree_size,
180                                const uint8_t* trie,
181                                size_t trie_bits,
182                                size_t trie_root_position)
183     : huffman_decoder_(huffman_tree, huffman_tree_size),
184       bit_reader_(trie, trie_bits),
185       trie_root_position_(trie_root_position) {}
186 
187 PreloadDecoder::~PreloadDecoder() = default;
188 
Decode(const std::string & search,bool * out_found)189 bool PreloadDecoder::Decode(const std::string& search, bool* out_found) {
190   size_t bit_offset = trie_root_position_;
191   *out_found = false;
192 
193   // current_search_offset contains one more than the index of the current
194   // character in the search keyword that is being considered. It's one greater
195   // so that we can represent the position just before the beginning (with
196   // zero).
197   size_t current_search_offset = search.size();
198 
199   for (;;) {
200     // Seek to the desired location.
201     if (!bit_reader_.Seek(bit_offset)) {
202       return false;
203     }
204 
205     // Decode the length of the common prefix.
206     size_t prefix_length;
207     if (!bit_reader_.DecodeSize(&prefix_length)) {
208       return false;
209     }
210 
211     // Match each character in the prefix.
212     for (size_t i = 0; i < prefix_length; ++i) {
213       if (current_search_offset == 0) {
214         // We can't match the terminator with a prefix string.
215         return true;
216       }
217 
218       char c;
219       if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
220         return false;
221       }
222       if (search[current_search_offset - 1] != c) {
223         return true;
224       }
225       current_search_offset--;
226     }
227 
228     bool is_first_offset = true;
229     size_t current_offset = 0;
230 
231     // Next is the dispatch table.
232     for (;;) {
233       char c;
234       if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
235         return false;
236       }
237       if (c == kEndOfTable) {
238         // No exact match.
239         return true;
240       }
241 
242       if (c == kEndOfString) {
243         if (!ReadEntry(&bit_reader_, search, current_search_offset,
244                        out_found)) {
245           return false;
246         }
247         if (current_search_offset == 0) {
248           CHECK(*out_found);
249           return true;
250         }
251         continue;
252       }
253 
254       // The entries in a dispatch table are in order thus we can tell if there
255       // will be no match if the current character past the one that we want.
256       if (current_search_offset == 0 || search[current_search_offset - 1] < c) {
257         return true;
258       }
259 
260       if (is_first_offset) {
261         // The first offset is backwards from the current position.
262         uint32_t jump_delta_bits;
263         uint32_t jump_delta;
264         if (!bit_reader_.Read(5, &jump_delta_bits) ||
265             !bit_reader_.Read(jump_delta_bits, &jump_delta)) {
266           return false;
267         }
268 
269         if (bit_offset < jump_delta) {
270           return false;
271         }
272 
273         current_offset = bit_offset - jump_delta;
274         is_first_offset = false;
275       } else {
276         // Subsequent offsets are forward from the target of the first offset.
277         uint32_t is_long_jump;
278         if (!bit_reader_.Read(1, &is_long_jump)) {
279           return false;
280         }
281 
282         uint32_t jump_delta;
283         if (!is_long_jump) {
284           if (!bit_reader_.Read(7, &jump_delta)) {
285             return false;
286           }
287         } else {
288           uint32_t jump_delta_bits;
289           if (!bit_reader_.Read(4, &jump_delta_bits) ||
290               !bit_reader_.Read(jump_delta_bits + 8, &jump_delta)) {
291             return false;
292           }
293         }
294 
295         current_offset += jump_delta;
296         if (current_offset >= bit_offset) {
297           return false;
298         }
299       }
300 
301       DCHECK_LT(0u, current_search_offset);
302       if (search[current_search_offset - 1] == c) {
303         bit_offset = current_offset;
304         current_search_offset--;
305         break;
306       }
307     }
308   }
309   NOTREACHED();
310 }
311 
312 }  // namespace net::extras
313