1 // Copyright 2017 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef SRC_HUFFMAN_TABLE_H_ 6 #define SRC_HUFFMAN_TABLE_H_ 7 8 #include <cstddef> 9 #include <cstdint> 10 #include <string> 11 #include <vector> 12 13 #include "puffin/src/bit_reader.h" 14 #include "puffin/src/bit_writer.h" 15 #include "puffin/src/include/puffin/common.h" 16 #include "puffin/src/logging.h" 17 18 namespace puffin { 19 20 // Maximum Huffman code length based on RFC1951. 21 constexpr size_t kMaxHuffmanBits = 15; 22 23 // Permutations of input Huffman code lengths (used only to read 24 // |dynamic_code_lens_|). 25 extern const uint8_t kPermutations[]; 26 27 // The bases of each alphabet which is added to the integer value of extra 28 // bits that comes after the Huffman code in the input to create the given 29 // length value. The last element is a guard. 30 extern const uint16_t kLengthBases[]; 31 32 // Number of extra bits that comes after the associating Huffman code. 33 extern const uint8_t kLengthExtraBits[]; 34 35 // same as |kLengthBases| except for the the distances instead of lengths. 36 // The last element is a guard. 37 extern const uint16_t kDistanceBases[]; 38 39 // Same as |kLengthExtraBits| except for distances instead of lengths. 40 extern const uint8_t kDistanceExtraBits[]; 41 42 class HuffmanTable { 43 public: 44 HuffmanTable(); 45 virtual ~HuffmanTable() = default; 46 47 // Checks the lengths of Huffman length arrays for correctness 48 // 49 // |num_lit_len| IN The number of literal/lengths code lengths 50 // |num_distance| IN The number of distance code lengths 51 // |num_codes| IN The number of code lengths for reading Huffman table. CheckHuffmanArrayLengths(size_t num_lit_len,size_t num_distance,size_t num_codes)52 inline bool CheckHuffmanArrayLengths(size_t num_lit_len, 53 size_t num_distance, 54 size_t num_codes) { 55 if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) { 56 LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: " 57 << "num_lit_len(" << num_lit_len << ") " 58 << "num_distance(" << num_distance << ") " 59 << "num_codes(" << num_codes << ")"; 60 return false; 61 } 62 return true; 63 } 64 65 // Returns the maximum number of bits used in the current literal/length 66 // Huffman codes. LitLenMaxBits()67 inline size_t LitLenMaxBits() { return lit_len_max_bits_; } 68 69 // Returns the maximum number of bits used in the current distance Huffman 70 // codes. DistanceMaxBits()71 inline size_t DistanceMaxBits() { return distance_max_bits_; } 72 73 // Returns the alphabet associated with the set of input bits for the code 74 // length array. 75 // 76 // |bits| IN The input Huffman bits read from the deflate stream. 77 // |alphabet| OUT The alphabet associated with the given |bits|. 78 // |nbits| OUT The number of bits in the Huffman code of alphabet. 79 // Returns true if there is an alphabet associated with |bits|. CodeAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)80 inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { 81 auto hc = code_hcodes_[bits]; 82 TEST_AND_RETURN_FALSE(hc & 0x8000); 83 *alphabet = hc & 0x7FFF; 84 *nbits = code_lens_[*alphabet]; 85 return true; 86 } 87 88 // Returns the alphabet associated with the set of input bits for the 89 // literal/length code length array. 90 // 91 // |bits| IN The input Huffman bits read from the deflate stream. 92 // |alphabet| OUT The alphabet associated with the given |bits|. 93 // |nbits| OUT The number of bits in the Huffman code of the |alphabet|. 94 // Returns true if there is an alphabet associated with |bits|. LitLenAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)95 inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { 96 auto hc = lit_len_hcodes_[bits]; 97 TEST_AND_RETURN_FALSE(hc & 0x8000); 98 *alphabet = hc & 0x7FFF; 99 *nbits = lit_len_lens_[*alphabet]; 100 return true; 101 } 102 103 // Returns the alphabet associated with the set of input bits for the 104 // distance code length array. 105 // 106 // |bits| IN The input Huffman bits read from the deflate stream. 107 // |alphabet| OUT The alphabet associated with the given |bits|. 108 // |nbits| OUT The number of bits in the Huffman code of the |alphabet|. 109 // Returns true if there is an alphabet associated with |bits|. DistanceAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)110 inline bool DistanceAlphabet(uint32_t bits, 111 uint16_t* alphabet, 112 size_t* nbits) { 113 auto hc = distance_hcodes_[bits]; 114 TEST_AND_RETURN_FALSE(hc & 0x8000); 115 *alphabet = hc & 0x7FFF; 116 *nbits = distance_lens_[*alphabet]; 117 return true; 118 } 119 120 // Returns the Huffman code of a give alphabet for Huffman table codes. 121 // 122 // |alphabet| IN The alphabet. 123 // |huffman| OUT The Huffman code for |alphabet|. 124 // |nbits| OUT The maximum number of bits in the Huffman code of the 125 // |alphabet|. CodeHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)126 inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) { 127 TEST_AND_RETURN_FALSE(alphabet < code_lens_.size()); 128 *huffman = code_rcodes_[alphabet]; 129 *nbits = code_lens_[alphabet]; 130 return true; 131 } 132 133 // Returns the Huffman code of a give alphabet for literal/length codes. 134 // 135 // |alphabet| IN The alphabet. 136 // |huffman| OUT The Huffman code for |alphabet|. 137 // |nbits| OUT The maximum number of bits in the Huffman code of the 138 // |alphabet|. LitLenHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)139 inline bool LitLenHuffman(uint16_t alphabet, 140 uint16_t* huffman, 141 size_t* nbits) { 142 TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size()); 143 *huffman = lit_len_rcodes_[alphabet]; 144 *nbits = lit_len_lens_[alphabet]; 145 return true; 146 } 147 EndOfBlockBitLength(size_t * nbits)148 inline bool EndOfBlockBitLength(size_t* nbits) { 149 TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size()); 150 *nbits = lit_len_lens_[256]; 151 return true; 152 } 153 154 // Returns the Huffman code of a give alphabet for distance codes. 155 // 156 // |alphabet| IN The alphabet. 157 // |huffman| OUT The Huffman code for |alphabet|. 158 // |nbits| OUT The maximum number of bits in the Huffman code of the 159 // |alphabet|. DistanceHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)160 inline bool DistanceHuffman(uint16_t alphabet, 161 uint16_t* huffman, 162 size_t* nbits) { 163 TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size()); 164 *huffman = distance_rcodes_[alphabet]; 165 *nbits = distance_lens_[alphabet]; 166 return true; 167 } 168 169 // This populates the object with fixed huffman table parameters. 170 // TODO(ahassani): Revamp the use of this function to be initiliazed once in 171 // the lifetime of the program and only one instance needed. 172 bool BuildFixedHuffmanTable(); 173 174 // This functions first reads the Huffman code length arrays from the input 175 // deflate stream, then builds both literal/length and distance Huffman 176 // code arrays. It also writes the Huffman table into the puffed stream. 177 // 178 // |br| IN The |BitReaderInterface| reading the deflate stream. 179 // |buffer| OUT The object to write the Huffman table. 180 // |length| IN/OUT The length available in the |buffer| and in return it 181 // will be the length of Huffman table data written into 182 // the |buffer|. 183 bool BuildDynamicHuffmanTable(BitReaderInterface* br, 184 uint8_t* buffer, 185 size_t* length); 186 187 // This functions first reads the Huffman code length arrays from the input 188 // puffed |buffer|, then builds both literal/length and distance Huffman code 189 // arrays. It also writes the coded Huffman table arrays into the deflate 190 // stream. 191 // 192 // |buffer| IN The array to read the Huffman table from. 193 // |length| IN The length available in the |buffer|. 194 // |bw| IN/OUT The |BitWriterInterface| for writing into the deflate 195 // stream. 196 bool BuildDynamicHuffmanTable(const uint8_t* buffer, 197 size_t length, 198 BitWriterInterface* bw); 199 200 protected: 201 // Initializes the Huffman codes from an array of lengths. 202 // 203 // |lens| IN The input array of code lengths. 204 // |max_bits| OUT The maximum number of bits used for the Huffman codes. 205 bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits); 206 207 // Creates the Huffman code to alphabet array. 208 // |lens| IN The input array of code lengths. 209 // |hcodes| OUT The Huffman to alphabet array. 210 // |max_bits| OUT The maximum number of bits used for the Huffman codes. 211 bool BuildHuffmanCodes(const Buffer& lens, 212 std::vector<uint16_t>* hcodes, 213 size_t* max_bits); 214 215 // Creates the alphabet to Huffman code array. 216 // |lens| IN The input array of code lengths. 217 // |rcodes| OUT The Huffman to Huffman array. 218 // |max_bits| OUT The maximum number of bits used for the Huffman codes. 219 bool BuildHuffmanReverseCodes(const Buffer& lens, 220 std::vector<uint16_t>* rcodes, 221 size_t* max_bits); 222 223 // Reads a specific Huffman code length array from input. At the same time 224 // writes the array into the puffed stream. The Huffman code length array is 225 // either the literal/lengths or distance codes. 226 // 227 // |br| IN The |BitReaderInterface| for reading the deflate 228 // stream. 229 // |buffer| OUT The array to write the Huffman table. 230 // |length| IN/OUT The length available in the |buffer| and in return it 231 // will be the length of data written into the |buffer|. 232 // |max_bits| IN The maximum number of bits in the Huffman codes. 233 // |num_codes| IN The size of the Huffman code length array in the input. 234 // |lens| OUT The resulting Huffman code length array. 235 bool BuildHuffmanCodeLengths(BitReaderInterface* br, 236 uint8_t* buffer, 237 size_t* length, 238 size_t max_bits, 239 size_t num_codes, 240 Buffer* lens); 241 242 // Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and 243 // writing into deflate stream. 244 // 245 // |buffer| IN The array to read the Huffman table from. 246 // |length| IN The length available in the |buffer|. 247 // |bw| IN/OUT The |BitWriterInterface| for writing into the deflate 248 // stream. 249 // |num_codes| IN Number of Huffman code lengths to read from the 250 // |buffer|. 251 // |lens| OUT The Huffman code lengths array. 252 bool BuildHuffmanCodeLengths(const uint8_t* buffer, 253 size_t* length, 254 BitWriterInterface* bw, 255 size_t num_codes, 256 Buffer* lens); 257 258 private: 259 // A utility struct used to create Huffman codes. 260 struct CodeIndexPair { 261 uint16_t code; // The Huffman code 262 uint16_t index; // The alphabet 263 }; 264 // A vector with maximum size of 286 that is only uses as temporary space for 265 // building Huffman codes. 266 std::vector<CodeIndexPair> codeindexpairs_; 267 268 // Used in building Huffman codes for literals/lengths and distances. 269 std::vector<uint8_t> lit_len_lens_; 270 std::vector<uint16_t> lit_len_hcodes_; 271 std::vector<uint16_t> lit_len_rcodes_; 272 size_t lit_len_max_bits_; 273 std::vector<uint8_t> distance_lens_; 274 std::vector<uint16_t> distance_hcodes_; 275 std::vector<uint16_t> distance_rcodes_; 276 size_t distance_max_bits_; 277 278 // The reason for keeping a temporary buffer here is to avoid reallocing each 279 // time. 280 std::vector<uint8_t> tmp_lens_; 281 282 // Used in building Huffman codes for reading and decoding literal/length and 283 // distance Huffman code length arrays. 284 std::vector<uint8_t> code_lens_; 285 std::vector<uint16_t> code_hcodes_; 286 std::vector<uint16_t> code_rcodes_; 287 size_t code_max_bits_; 288 289 bool initialized_; 290 291 DISALLOW_COPY_AND_ASSIGN(HuffmanTable); 292 }; 293 294 // The type of a block in a deflate stream. 295 enum class BlockType : uint8_t { 296 kUncompressed = 0x00, 297 kFixed = 0x01, 298 kDynamic = 0x02, 299 }; 300 301 std::string BlockTypeToString(BlockType type); 302 303 } // namespace puffin 304 305 #endif // SRC_HUFFMAN_TABLE_H_ 306