1 // Copyright 2017 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef SRC_HUFFMAN_TABLE_H_ 6 #define SRC_HUFFMAN_TABLE_H_ 7 8 #include <cstddef> 9 #include <cstdint> 10 #include <string> 11 #include <vector> 12 13 #include "puffin/src/bit_reader.h" 14 #include "puffin/src/bit_writer.h" 15 #include "puffin/src/include/puffin/common.h" 16 #include "puffin/src/include/puffin/errors.h" 17 #include "puffin/src/set_errors.h" 18 19 namespace puffin { 20 21 // Maximum Huffman code length based on RFC1951. 22 constexpr size_t kMaxHuffmanBits = 15; 23 24 // Permutations of input Huffman code lengths (used only to read 25 // |dynamic_code_lens_|). 26 extern const uint8_t kPermutations[]; 27 28 // The bases of each alphabet which is added to the integer value of extra 29 // bits that comes after the Huffman code in the input to create the given 30 // length value. The last element is a guard. 31 extern const uint16_t kLengthBases[]; 32 33 // Number of extra bits that comes after the associating Huffman code. 34 extern const uint8_t kLengthExtraBits[]; 35 36 // same as |kLengthBases| except for the the distances instead of lengths. 37 // The last element is a guard. 38 extern const uint16_t kDistanceBases[]; 39 40 // Same as |kLengthExtraBits| except for distances instead of lengths. 41 extern const uint8_t kDistanceExtraBits[]; 42 43 class HuffmanTable { 44 public: 45 HuffmanTable(); 46 virtual ~HuffmanTable() = default; 47 48 // Checks the lengths of Huffman length arrays for correctness 49 // 50 // |num_lit_len| IN The number of literal/lengths code lengths 51 // |num_distance| IN The number of distance code lengths 52 // |num_codes| IN The number of code lengths for reading Huffman table. CheckHuffmanArrayLengths(size_t num_lit_len,size_t num_distance,size_t num_codes)53 inline bool CheckHuffmanArrayLengths(size_t num_lit_len, 54 size_t num_distance, 55 size_t num_codes) { 56 if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) { 57 LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: " 58 << "num_lit_len(" << num_lit_len << ") " 59 << "num_distance(" << num_distance << ") " 60 << "num_codes(" << num_codes << ")"; 61 return false; 62 } 63 return true; 64 } 65 66 // Returns the maximum number of bits used in the current literal/length 67 // Huffman codes. LitLenMaxBits()68 inline size_t LitLenMaxBits() { return lit_len_max_bits_; } 69 70 // Returns the maximum number of bits used in the current distance Huffman 71 // codes. DistanceMaxBits()72 inline size_t DistanceMaxBits() { return distance_max_bits_; } 73 74 // Returns the alphabet associated with the set of input bits for the code 75 // length array. 76 // 77 // |bits| IN The input Huffman bits read from the deflate stream. 78 // |alphabet| OUT The alphabet associated with the given |bits|. 79 // |nbits| OUT The number of bits in the Huffman code of alphabet. 80 // Returns true if there is an alphabet associated with |bits|. CodeAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)81 inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { 82 auto hc = code_hcodes_[bits]; 83 TEST_AND_RETURN_FALSE(hc & 0x8000); 84 *alphabet = hc & 0x7FFF; 85 *nbits = code_lens_[*alphabet]; 86 return true; 87 } 88 89 // Returns the alphabet associated with the set of input bits for the 90 // literal/length code length array. 91 // 92 // |bits| IN The input Huffman bits read from the deflate stream. 93 // |alphabet| OUT The alphabet associated with the given |bits|. 94 // |nbits| OUT The number of bits in the Huffman code of the |alphabet|. 95 // Returns true if there is an alphabet associated with |bits|. LitLenAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)96 inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { 97 auto hc = lit_len_hcodes_[bits]; 98 TEST_AND_RETURN_FALSE(hc & 0x8000); 99 *alphabet = hc & 0x7FFF; 100 *nbits = lit_len_lens_[*alphabet]; 101 return true; 102 } 103 104 // Returns the alphabet associated with the set of input bits for the 105 // distance code length array. 106 // 107 // |bits| IN The input Huffman bits read from the deflate stream. 108 // |alphabet| OUT The alphabet associated with the given |bits|. 109 // |nbits| OUT The number of bits in the Huffman code of the |alphabet|. 110 // Returns true if there is an alphabet associated with |bits|. DistanceAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)111 inline bool DistanceAlphabet(uint32_t bits, 112 uint16_t* alphabet, 113 size_t* nbits) { 114 auto hc = distance_hcodes_[bits]; 115 TEST_AND_RETURN_FALSE(hc & 0x8000); 116 *alphabet = hc & 0x7FFF; 117 *nbits = distance_lens_[*alphabet]; 118 return true; 119 } 120 121 // Returns the Huffman code of a give alphabet for Huffman table codes. 122 // 123 // |alphabet| IN The alphabet. 124 // |huffman| OUT The Huffman code for |alphabet|. 125 // |nbits| OUT The maximum number of bits in the Huffman code of the 126 // |alphabet|. CodeHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)127 inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) { 128 TEST_AND_RETURN_FALSE(alphabet < code_lens_.size()); 129 *huffman = code_rcodes_[alphabet]; 130 *nbits = code_lens_[alphabet]; 131 return true; 132 } 133 134 // Returns the Huffman code of a give alphabet for literal/length codes. 135 // 136 // |alphabet| IN The alphabet. 137 // |huffman| OUT The Huffman code for |alphabet|. 138 // |nbits| OUT The maximum number of bits in the Huffman code of the 139 // |alphabet|. LitLenHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)140 inline bool LitLenHuffman(uint16_t alphabet, 141 uint16_t* huffman, 142 size_t* nbits) { 143 TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size()); 144 *huffman = lit_len_rcodes_[alphabet]; 145 *nbits = lit_len_lens_[alphabet]; 146 return true; 147 } 148 EndOfBlockBitLength(size_t * nbits)149 inline bool EndOfBlockBitLength(size_t* nbits) { 150 TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size()); 151 *nbits = lit_len_lens_[256]; 152 return true; 153 } 154 155 // Returns the Huffman code of a give alphabet for distance codes. 156 // 157 // |alphabet| IN The alphabet. 158 // |huffman| OUT The Huffman code for |alphabet|. 159 // |nbits| OUT The maximum number of bits in the Huffman code of the 160 // |alphabet|. DistanceHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)161 inline bool DistanceHuffman(uint16_t alphabet, 162 uint16_t* huffman, 163 size_t* nbits) { 164 TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size()); 165 *huffman = distance_rcodes_[alphabet]; 166 *nbits = distance_lens_[alphabet]; 167 return true; 168 } 169 170 // This populates the object with fixed huffman table parameters. 171 // TODO(ahassani): Revamp the use of this function to be initiliazed once in 172 // the lifetime of the program and only one instance needed. 173 bool BuildFixedHuffmanTable(); 174 175 // This functions first reads the Huffman code length arrays from the input 176 // deflate stream, then builds both literal/length and distance Huffman 177 // code arrays. It also writes the Huffman table into the puffed stream. 178 // 179 // |br| IN The |BitReaderInterface| reading the deflate stream. 180 // |buffer| OUT The object to write the Huffman table. 181 // |length| IN/OUT The length available in the |buffer| and in return it 182 // will be the length of Huffman table data written into 183 // the |buffer|. 184 bool BuildDynamicHuffmanTable(BitReaderInterface* br, 185 uint8_t* buffer, 186 size_t* length, 187 Error* error); 188 189 // This functions first reads the Huffman code length arrays from the input 190 // puffed |buffer|, then builds both literal/length and distance Huffman code 191 // arrays. It also writes the coded Huffman table arrays into the deflate 192 // stream. 193 // 194 // |buffer| IN The array to read the Huffman table from. 195 // |length| IN The length available in the |buffer|. 196 // |bw| IN/OUT The |BitWriterInterface| for writing into the deflate 197 // stream. 198 // |error| OUT The error code. 199 bool BuildDynamicHuffmanTable(const uint8_t* buffer, 200 size_t length, 201 BitWriterInterface* bw, 202 Error* error); 203 204 protected: 205 // Initializes the Huffman codes from an array of lengths. 206 // 207 // |lens| IN The input array of code lengths. 208 // |max_bits| OUT The maximum number of bits used for the Huffman codes. 209 bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits); 210 211 // Creates the Huffman code to alphabet array. 212 // |lens| IN The input array of code lengths. 213 // |hcodes| OUT The Huffman to alphabet array. 214 // |max_bits| OUT The maximum number of bits used for the Huffman codes. 215 bool BuildHuffmanCodes(const Buffer& lens, 216 std::vector<uint16_t>* hcodes, 217 size_t* max_bits); 218 219 // Creates the alphabet to Huffman code array. 220 // |lens| IN The input array of code lengths. 221 // |rcodes| OUT The Huffman to Huffman array. 222 // |max_bits| OUT The maximum number of bits used for the Huffman codes. 223 bool BuildHuffmanReverseCodes(const Buffer& lens, 224 std::vector<uint16_t>* rcodes, 225 size_t* max_bits); 226 227 // Reads a specific Huffman code length array from input. At the same time 228 // writes the array into the puffed stream. The Huffman code length array is 229 // either the literal/lengths or distance codes. 230 // 231 // |br| IN The |BitReaderInterface| for reading the deflate 232 // stream. 233 // |buffer| OUT The array to write the Huffman table. 234 // |length| IN/OUT The length available in the |buffer| and in return it 235 // will be the length of data written into the |buffer|. 236 // |max_bits| IN The maximum number of bits in the Huffman codes. 237 // |num_codes| IN The size of the Huffman code length array in the input. 238 // |lens| OUT The resulting Huffman code length array. 239 // |error| OUT The error code. 240 bool BuildHuffmanCodeLengths(BitReaderInterface* br, 241 uint8_t* buffer, 242 size_t* length, 243 size_t max_bits, 244 size_t num_codes, 245 Buffer* lens, 246 Error* error); 247 248 // Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and 249 // writing into deflate stream. 250 // 251 // |buffer| IN The array to read the Huffman table from. 252 // |length| IN The length available in the |buffer|. 253 // |bw| IN/OUT The |BitWriterInterface| for writing into the deflate 254 // stream. 255 // |num_codes| IN Number of Huffman code lengths to read from the 256 // |buffer|. 257 // |lens| OUT The Huffman code lengths array. 258 // |error| OUT The error code. 259 bool BuildHuffmanCodeLengths(const uint8_t* buffer, 260 size_t* length, 261 BitWriterInterface* bw, 262 size_t num_codes, 263 Buffer* lens, 264 Error* error); 265 266 private: 267 // A utility struct used to create Huffman codes. 268 struct CodeIndexPair { 269 uint16_t code; // The Huffman code 270 uint16_t index; // The alphabet 271 }; 272 // A vector with maximum size of 286 that is only uses as temporary space for 273 // building Huffman codes. 274 std::vector<CodeIndexPair> codeindexpairs_; 275 276 // Used in building Huffman codes for literals/lengths and distances. 277 std::vector<uint8_t> lit_len_lens_; 278 std::vector<uint16_t> lit_len_hcodes_; 279 std::vector<uint16_t> lit_len_rcodes_; 280 size_t lit_len_max_bits_; 281 std::vector<uint8_t> distance_lens_; 282 std::vector<uint16_t> distance_hcodes_; 283 std::vector<uint16_t> distance_rcodes_; 284 size_t distance_max_bits_; 285 286 // The reason for keeping a temporary buffer here is to avoid reallocing each 287 // time. 288 std::vector<uint8_t> tmp_lens_; 289 290 // Used in building Huffman codes for reading and decoding literal/length and 291 // distance Huffman code length arrays. 292 std::vector<uint8_t> code_lens_; 293 std::vector<uint16_t> code_hcodes_; 294 std::vector<uint16_t> code_rcodes_; 295 size_t code_max_bits_; 296 297 bool initialized_; 298 299 DISALLOW_COPY_AND_ASSIGN(HuffmanTable); 300 }; 301 302 // The type of a block in a deflate stream. 303 enum class BlockType : uint8_t { 304 kUncompressed = 0x00, 305 kFixed = 0x01, 306 kDynamic = 0x02, 307 }; 308 309 std::string BlockTypeToString(BlockType type); 310 311 } // namespace puffin 312 313 #endif // SRC_HUFFMAN_TABLE_H_ 314