• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef SRC_HUFFMAN_TABLE_H_
6 #define SRC_HUFFMAN_TABLE_H_
7 
8 #include <cstddef>
9 #include <cstdint>
10 #include <string>
11 #include <vector>
12 
13 #include "puffin/src/bit_reader.h"
14 #include "puffin/src/bit_writer.h"
15 #include "puffin/src/include/puffin/common.h"
16 #include "puffin/src/include/puffin/errors.h"
17 #include "puffin/src/set_errors.h"
18 
19 namespace puffin {
20 
21 // Maximum Huffman code length based on RFC1951.
22 constexpr size_t kMaxHuffmanBits = 15;
23 
24 // Permutations of input Huffman code lengths (used only to read
25 // |dynamic_code_lens_|).
26 extern const uint8_t kPermutations[];
27 
28 // The bases of each alphabet which is added to the integer value of extra
29 // bits that comes after the Huffman code in the input to create the given
30 // length value. The last element is a guard.
31 extern const uint16_t kLengthBases[];
32 
33 // Number of extra bits that comes after the associating Huffman code.
34 extern const uint8_t kLengthExtraBits[];
35 
36 // same as |kLengthBases| except for the the distances instead of lengths.
37 // The last element is a guard.
38 extern const uint16_t kDistanceBases[];
39 
40 // Same as |kLengthExtraBits| except for distances instead of lengths.
41 extern const uint8_t kDistanceExtraBits[];
42 
43 class HuffmanTable {
44  public:
45   HuffmanTable();
46   virtual ~HuffmanTable() = default;
47 
48   // Checks the lengths of Huffman length arrays for correctness
49   //
50   // |num_lit_len|  IN  The number of literal/lengths code lengths
51   // |num_distance| IN  The number of distance code lengths
52   // |num_codes|    IN  The number of code lengths for reading Huffman table.
CheckHuffmanArrayLengths(size_t num_lit_len,size_t num_distance,size_t num_codes)53   inline bool CheckHuffmanArrayLengths(size_t num_lit_len,
54                                        size_t num_distance,
55                                        size_t num_codes) {
56     if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) {
57       LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: "
58                  << "num_lit_len(" << num_lit_len << ") "
59                  << "num_distance(" << num_distance << ") "
60                  << "num_codes(" << num_codes << ")";
61       return false;
62     }
63     return true;
64   }
65 
66   // Returns the maximum number of bits used in the current literal/length
67   // Huffman codes.
LitLenMaxBits()68   inline size_t LitLenMaxBits() { return lit_len_max_bits_; }
69 
70   // Returns the maximum number of bits used in the current distance Huffman
71   // codes.
DistanceMaxBits()72   inline size_t DistanceMaxBits() { return distance_max_bits_; }
73 
74   // Returns the alphabet associated with the set of input bits for the code
75   // length array.
76   //
77   // |bits|     IN   The input Huffman bits read from the deflate stream.
78   // |alphabet| OUT  The alphabet associated with the given |bits|.
79   // |nbits|    OUT  The number of bits in the Huffman code of alphabet.
80   // Returns true if there is an alphabet associated with |bits|.
CodeAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)81   inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
82     auto hc = code_hcodes_[bits];
83     TEST_AND_RETURN_FALSE(hc & 0x8000);
84     *alphabet = hc & 0x7FFF;
85     *nbits = code_lens_[*alphabet];
86     return true;
87   }
88 
89   // Returns the alphabet associated with the set of input bits for the
90   // literal/length code length array.
91   //
92   // |bits|     IN   The input Huffman bits read from the deflate stream.
93   // |alphabet| OUT  The alphabet associated with the given |bits|.
94   // |nbits|    OUT  The number of bits in the Huffman code of the |alphabet|.
95   // Returns true if there is an alphabet associated with |bits|.
LitLenAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)96   inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
97     auto hc = lit_len_hcodes_[bits];
98     TEST_AND_RETURN_FALSE(hc & 0x8000);
99     *alphabet = hc & 0x7FFF;
100     *nbits = lit_len_lens_[*alphabet];
101     return true;
102   }
103 
104   // Returns the alphabet associated with the set of input bits for the
105   // distance code length array.
106   //
107   // |bits|     IN   The input Huffman bits read from the deflate stream.
108   // |alphabet| OUT  The alphabet associated with the given |bits|.
109   // |nbits|    OUT  The number of bits in the Huffman code of the |alphabet|.
110   // Returns true if there is an alphabet associated with |bits|.
DistanceAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)111   inline bool DistanceAlphabet(uint32_t bits,
112                                uint16_t* alphabet,
113                                size_t* nbits) {
114     auto hc = distance_hcodes_[bits];
115     TEST_AND_RETURN_FALSE(hc & 0x8000);
116     *alphabet = hc & 0x7FFF;
117     *nbits = distance_lens_[*alphabet];
118     return true;
119   }
120 
121   // Returns the Huffman code of a give alphabet for Huffman table codes.
122   //
123   // |alphabet| IN   The alphabet.
124   // |huffman|  OUT  The Huffman code for |alphabet|.
125   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
126   //                 |alphabet|.
CodeHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)127   inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) {
128     TEST_AND_RETURN_FALSE(alphabet < code_lens_.size());
129     *huffman = code_rcodes_[alphabet];
130     *nbits = code_lens_[alphabet];
131     return true;
132   }
133 
134   // Returns the Huffman code of a give alphabet for literal/length codes.
135   //
136   // |alphabet| IN   The alphabet.
137   // |huffman|  OUT  The Huffman code for |alphabet|.
138   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
139   //                 |alphabet|.
LitLenHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)140   inline bool LitLenHuffman(uint16_t alphabet,
141                             uint16_t* huffman,
142                             size_t* nbits) {
143     TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size());
144     *huffman = lit_len_rcodes_[alphabet];
145     *nbits = lit_len_lens_[alphabet];
146     return true;
147   }
148 
EndOfBlockBitLength(size_t * nbits)149   inline bool EndOfBlockBitLength(size_t* nbits) {
150     TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size());
151     *nbits = lit_len_lens_[256];
152     return true;
153   }
154 
155   // Returns the Huffman code of a give alphabet for distance codes.
156   //
157   // |alphabet| IN   The alphabet.
158   // |huffman|  OUT  The Huffman code for |alphabet|.
159   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
160   //                 |alphabet|.
DistanceHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)161   inline bool DistanceHuffman(uint16_t alphabet,
162                               uint16_t* huffman,
163                               size_t* nbits) {
164     TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size());
165     *huffman = distance_rcodes_[alphabet];
166     *nbits = distance_lens_[alphabet];
167     return true;
168   }
169 
170   // This populates the object with fixed huffman table parameters.
171   // TODO(ahassani): Revamp the use of this function to be initiliazed once in
172   // the lifetime of the program and only one instance needed.
173   bool BuildFixedHuffmanTable();
174 
175   // This functions first reads the Huffman code length arrays from the input
176   // deflate stream, then builds both literal/length and distance Huffman
177   // code arrays. It also writes the Huffman table into the puffed stream.
178   //
179   // |br|      IN      The |BitReaderInterface| reading the deflate stream.
180   // |buffer|  OUT     The object to write the Huffman table.
181   // |length|  IN/OUT  The length available in the |buffer| and in return it
182   //                   will be the length of Huffman table data written into
183   //                   the |buffer|.
184   bool BuildDynamicHuffmanTable(BitReaderInterface* br,
185                                 uint8_t* buffer,
186                                 size_t* length,
187                                 Error* error);
188 
189   // This functions first reads the Huffman code length arrays from the input
190   // puffed |buffer|, then builds both literal/length and distance Huffman code
191   // arrays. It also writes the coded Huffman table arrays into the deflate
192   // stream.
193   //
194   // |buffer| IN      The array to read the Huffman table from.
195   // |length| IN      The length available in the |buffer|.
196   // |bw|     IN/OUT  The |BitWriterInterface| for writing into the deflate
197   //                  stream.
198   // |error|  OUT     The error code.
199   bool BuildDynamicHuffmanTable(const uint8_t* buffer,
200                                 size_t length,
201                                 BitWriterInterface* bw,
202                                 Error* error);
203 
204  protected:
205   // Initializes the Huffman codes from an array of lengths.
206   //
207   // |lens|     IN   The input array of code lengths.
208   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
209   bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits);
210 
211   // Creates the Huffman code to alphabet array.
212   // |lens|     IN   The input array of code lengths.
213   // |hcodes|   OUT  The Huffman to alphabet array.
214   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
215   bool BuildHuffmanCodes(const Buffer& lens,
216                          std::vector<uint16_t>* hcodes,
217                          size_t* max_bits);
218 
219   // Creates the alphabet to Huffman code array.
220   // |lens|     IN   The input array of code lengths.
221   // |rcodes|   OUT  The Huffman to Huffman array.
222   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
223   bool BuildHuffmanReverseCodes(const Buffer& lens,
224                                 std::vector<uint16_t>* rcodes,
225                                 size_t* max_bits);
226 
227   // Reads a specific Huffman code length array from input. At the same time
228   // writes the array into the puffed stream. The Huffman code length array is
229   // either the literal/lengths or distance codes.
230   //
231   // |br|        IN      The |BitReaderInterface| for reading the deflate
232   //                      stream.
233   // |buffer|    OUT     The array to write the Huffman table.
234   // |length|    IN/OUT  The length available in the |buffer| and in return it
235   //                     will be the length of data written into the |buffer|.
236   // |max_bits|  IN      The maximum number of bits in the Huffman codes.
237   // |num_codes| IN      The size of the Huffman code length array in the input.
238   // |lens|      OUT     The resulting Huffman code length array.
239   // |error|     OUT     The error code.
240   bool BuildHuffmanCodeLengths(BitReaderInterface* br,
241                                uint8_t* buffer,
242                                size_t* length,
243                                size_t max_bits,
244                                size_t num_codes,
245                                Buffer* lens,
246                                Error* error);
247 
248   // Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and
249   // writing into deflate stream.
250   //
251   // |buffer|    IN      The array to read the Huffman table from.
252   // |length|    IN      The length available in the |buffer|.
253   // |bw|        IN/OUT  The |BitWriterInterface| for writing into the deflate
254   //                     stream.
255   // |num_codes| IN      Number of Huffman code lengths to read from the
256   //                     |buffer|.
257   // |lens|      OUT     The Huffman code lengths array.
258   // |error|     OUT     The error code.
259   bool BuildHuffmanCodeLengths(const uint8_t* buffer,
260                                size_t* length,
261                                BitWriterInterface* bw,
262                                size_t num_codes,
263                                Buffer* lens,
264                                Error* error);
265 
266  private:
267   // A utility struct used to create Huffman codes.
268   struct CodeIndexPair {
269     uint16_t code;   // The Huffman code
270     uint16_t index;  // The alphabet
271   };
272   // A vector with maximum size of 286 that is only uses as temporary space for
273   // building Huffman codes.
274   std::vector<CodeIndexPair> codeindexpairs_;
275 
276   // Used in building Huffman codes for literals/lengths and distances.
277   std::vector<uint8_t> lit_len_lens_;
278   std::vector<uint16_t> lit_len_hcodes_;
279   std::vector<uint16_t> lit_len_rcodes_;
280   size_t lit_len_max_bits_;
281   std::vector<uint8_t> distance_lens_;
282   std::vector<uint16_t> distance_hcodes_;
283   std::vector<uint16_t> distance_rcodes_;
284   size_t distance_max_bits_;
285 
286   // The reason for keeping a temporary buffer here is to avoid reallocing each
287   // time.
288   std::vector<uint8_t> tmp_lens_;
289 
290   // Used in building Huffman codes for reading and decoding literal/length and
291   // distance Huffman code length arrays.
292   std::vector<uint8_t> code_lens_;
293   std::vector<uint16_t> code_hcodes_;
294   std::vector<uint16_t> code_rcodes_;
295   size_t code_max_bits_;
296 
297   bool initialized_;
298 
299   DISALLOW_COPY_AND_ASSIGN(HuffmanTable);
300 };
301 
302 // The type of a block in a deflate stream.
303 enum class BlockType : uint8_t {
304   kUncompressed = 0x00,
305   kFixed = 0x01,
306   kDynamic = 0x02,
307 };
308 
309 std::string BlockTypeToString(BlockType type);
310 
311 }  // namespace puffin
312 
313 #endif  // SRC_HUFFMAN_TABLE_H_
314