• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef NET_EXTRAS_PRELOAD_DATA_DECODER_H_
6 #define NET_EXTRAS_PRELOAD_DATA_DECODER_H_
7 
8 #include <stdint.h>
9 
10 #include <string>
11 
12 #include "base/memory/raw_ptr.h"
13 
14 namespace net::extras {
15 
16 // Decodes an entry from preloaded data.
17 // Clients must implement ReadEntry() method to read the specific type of data
18 // they are interested in.
19 class PreloadDecoder {
20  public:
21   // These must match the values in net/tools/huffman_trie/trie/trie_writer.h.
22   enum : char { kEndOfString = 0, kEndOfTable = 127 };
23 
24   // BitReader is a class that allows a bytestring to be read bit-by-bit.
25   class BitReader {
26    public:
27     BitReader(const uint8_t* bytes, size_t num_bits);
28 
29     BitReader(const BitReader&) = delete;
30     BitReader& operator=(const BitReader&) = delete;
31 
32     // Next sets |*out| to the next bit from the input. It returns false if no
33     // more bits are available or true otherwise.
34     bool Next(bool* out);
35 
36     // Read sets the |num_bits| least-significant bits of |*out| to the value of
37     // the next |num_bits| bits from the input. It returns false if there are
38     // insufficient bits in the input or true otherwise.
39     bool Read(unsigned num_bits, uint32_t* out);
40 
41     // Decodes a size_t from the reader, putting the resulting value in |*out|.
42     // Returns false if there are insufficient bits to read and true otherwise.
43     //
44     // This function's inverse is TrieBitBuffer::WriteSize.
45     //
46     // The encoding is a prefix code optimized for small values (less than 4).
47     // It is designed for the lengths of prefixes in the HSTS Preload list trie.
48     // Compared to the unary encoding that was previously used (where the number
49     // of bits used is one plus the value being encoded), this uses one more bit
50     // for encoding 0 and 1, and the same number of bits for encoding 2, and
51     // fewer bits for encoding values greater than 2. At the time of writing,
52     // 35% of the lengths encoded in the trie were 0 or 1, 11% were 2, and the
53     // remaining 54% were greater than 2.
54     //
55     // This encoding scheme uses a variable number of bits to encode each value.
56     // There are fixed values for 0, 1, 2, and 3, and then a simple rule is used
57     // for 4 and greater. 0 uses 2 bits; 1 through 3 use 3 bits. The fixed
58     // values are as follows:
59     //
60     //   0: 0b00
61     //   1: 0b100
62     //   2: 0b101
63     //   3: 0b110
64     //
65     // Note that none of the fixed values are prefixed with 0b01 or 0b111. These
66     // prefixes are used with a unary-like encoding for values 4 and above.
67     // Zero or more 1s, followed by a 0, are appended to one of those prefixes.
68     // Even values use the prefix 0b01, and odd values use the prefix 0b111. The
69     // number of 1s to append is half the value (rounded down) minus 1.
70     bool DecodeSize(size_t* out);
71 
72     // Seek sets the current offest in the input to bit number |offset|. It
73     // returns true if |offset| is within the range of the input and false
74     // otherwise.
75     bool Seek(size_t offset);
76 
77    private:
78     const raw_ptr<const uint8_t, AllowPtrArithmetic> bytes_;
79     const size_t num_bits_;
80     const size_t num_bytes_;
81     // current_byte_index_ contains the current byte offset in |bytes_|.
82     size_t current_byte_index_ = 0;
83     // current_byte_ contains the current byte of the input.
84     uint8_t current_byte_;
85     // num_bits_used_ contains the number of bits of |current_byte_| that have
86     // been read.
87     unsigned num_bits_used_ = 8;
88   };
89 
90   // HuffmanDecoder is a very simple Huffman reader. The input Huffman tree is
91   // simply encoded as a series of two-byte structures. The first byte
92   // determines the "0" pointer for that node and the second the "1" pointer.
93   // Each byte either has the MSB set, in which case the bottom 7 bits are the
94   // value for that position, or else the bottom seven bits contain the index of
95   // a node.
96   //
97   // The tree is decoded by walking rather than a table-driven approach.
98   class HuffmanDecoder {
99    public:
100     HuffmanDecoder(const uint8_t* tree, size_t tree_bytes);
101 
102     HuffmanDecoder(const HuffmanDecoder&) = delete;
103     HuffmanDecoder& operator=(const HuffmanDecoder&) = delete;
104 
105     bool Decode(PreloadDecoder::BitReader* reader, char* out) const;
106 
107    private:
108     const raw_ptr<const uint8_t, AllowPtrArithmetic> tree_;
109     const size_t tree_bytes_;
110   };
111 
112   PreloadDecoder(const uint8_t* huffman_tree,
113                  size_t huffman_tree_size,
114                  const uint8_t* trie,
115                  size_t trie_bits,
116                  size_t trie_root_position);
117 
118   PreloadDecoder(const PreloadDecoder&) = delete;
119   PreloadDecoder& operator=(const PreloadDecoder&) = delete;
120 
121   virtual ~PreloadDecoder();
122 
123   // Resolves search keyword given by |search| in the preloaded data. Returns
124   // false on internal error and true otherwise. After a successful return,
125   // |*out_found| is true iff a relevant entry has been found. In the case of
126   // HSTS data, |search| is the hostname being searched.
127   //
128   // Although this code should be robust, it never processes attacker-controlled
129   // data -- it only operates on the preloaded data built into the binary.
130   //
131   // The preloaded data is represented as a trie and matches |search|
132   // backwards. Each node in the trie starts with a number of characters, which
133   // must match exactly. After that is a dispatch table which maps the next
134   // character in the search keyword to another node in the trie.
135   //
136   // In the dispatch table, the zero character represents the "end of string"
137   // (which is the *beginning* of the search keyword since we process it
138   // backwards). The value in that case is special -- rather than an offset to
139   // another trie node, it contains the searched entry (for HSTS data, it
140   // contains whether subdomains are included, pinsets etc.). Clients must
141   // implement ReadEntry to read the entry at this location.
142   //
143   // Dispatch tables are always given in order, but the "end of string" (zero)
144   // value always comes before an entry for '.'.
145   bool Decode(const std::string& search, bool* out_found);
146 
147  protected:
148   virtual bool ReadEntry(BitReader* reader,
149                          const std::string& search,
150                          size_t current_search_offset,
151                          bool* out_found) = 0;
152 
huffman_decoder()153   const HuffmanDecoder& huffman_decoder() const { return huffman_decoder_; }
154 
155  private:
156   HuffmanDecoder huffman_decoder_;
157   BitReader bit_reader_;
158 
159   const size_t trie_root_position_;
160 };
161 
162 }  // namespace net::extras
163 
164 #endif  // NET_EXTRAS_PRELOAD_DATA_DECODER_H_
165