• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "puffin/src/huffman_table.h"
6 
7 #include <algorithm>
8 #include <vector>
9 
10 #include "puffin/src/include/puffin/errors.h"
11 #include "puffin/src/set_errors.h"
12 
13 namespace puffin {
14 
15 // clang-format off
16 // Permutations of input Huffman code lengths (used only to read code lengths
17 // necessary for reading Huffman table.)
18 const uint8_t kPermutations[19] = {
19   16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
20 
21 // The bases of each alphabet which is added to the integer value of extra
22 // bits that comes after the Huffman code in the input to create the given
23 // length value. The last element is a guard.
24 const uint16_t kLengthBases[30] = {
25   3,  4,  5,  6,  7,  8,  9,  10, 11,  13,  15,  17,  19,  23,  27, 31, 35, 43,
26   51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF};
27 
28 // Number of extra bits that comes after the associating Huffman code.
29 const uint8_t kLengthExtraBits[29] = {
30   0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5,
31   5, 5, 0};
32 
33 // Same as |kLengthBases| but for the distances instead of lengths. The last
34 // element is a guard.
35 const uint16_t kDistanceBases[31] = {
36   1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
37   1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF};
38 
39 // Same as |kLengthExtraBits| but for distances instead of lengths.
40 const uint8_t kDistanceExtraBits[30] = {
41   0, 0, 0,  0,  1,  1,  2,  2,  3,  3,  4, 4, 5,  5,  6,  6,  7,  7,  8,  8, 9,
42   9, 10, 10, 11, 11, 12, 12, 13, 13};
43 // clang-format on
44 
45 // 288 is the maximum number of needed huffman codes for an alphabet. Fixed
46 // huffman table needs 288 and dynamic huffman table needs maximum 286.
47 // 286 = 256 (coding a byte) +
48 //         1 (coding the end of block symbole) +
49 //        29 (coding the lengths)
HuffmanTable()50 HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {}
51 
InitHuffmanCodes(const Buffer & lens,size_t * max_bits)52 bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) {
53   // Temporary buffers used in |InitHuffmanCodes|.
54   uint16_t len_count_[kMaxHuffmanBits + 1] = {0};
55   uint16_t next_code_[kMaxHuffmanBits + 1] = {0};
56 
57   // 1. Count the number of codes for each length;
58   for (auto len : lens) {
59     len_count_[len]++;
60   }
61 
62   for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) {
63     if (len_count_[*max_bits] != 0) {
64       break;
65     }
66   }
67 
68   // Check for oversubscribed code lengths. (A code with length 'L' cannot have
69   // more than 2^L items.
70   for (size_t idx = 1; idx <= *max_bits; idx++) {
71     if (len_count_[idx] > (1 << idx)) {
72       LOG(ERROR) << "Oversubscribed code lengths error!";
73       return false;
74     }
75   }
76 
77   // 2. Compute the coding of the first element for each length.
78   uint16_t code = 0;
79   len_count_[0] = 0;
80   for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) {
81     code = (code + len_count_[bits - 1]) << 1;
82     next_code_[bits] = code;
83   }
84 
85   codeindexpairs_.clear();
86   // 3. Calculate all the code values.
87   for (size_t idx = 0; idx < lens.size(); idx++) {
88     auto len = lens[idx];
89     if (len == 0) {
90       continue;
91     }
92 
93     // Reverse the code
94     CodeIndexPair cip;
95     cip.code = 0;
96     auto tmp_code = next_code_[len];
97     for (size_t r = 0; r < len; r++) {
98       cip.code <<= 1;
99       cip.code |= tmp_code & 1U;
100       tmp_code >>= 1;
101     }
102     cip.index = idx;
103     codeindexpairs_.push_back(cip);
104     next_code_[len]++;
105   }
106   return true;
107 }
108 
BuildHuffmanCodes(const Buffer & lens,std::vector<uint16_t> * hcodes,size_t * max_bits)109 bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens,
110                                      std::vector<uint16_t>* hcodes,
111                                      size_t* max_bits) {
112   TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
113   // Sort descending based on the bit-length of the code.
114   std::sort(codeindexpairs_.begin(),
115             codeindexpairs_.end(),
116             [&lens](const CodeIndexPair& a, const CodeIndexPair& b) {
117               return lens[a.index] > lens[b.index];
118             });
119 
120   // Only zero out the part of hcodes which is valuable.
121   memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t));
122   for (const auto& cip : codeindexpairs_) {
123     // The MSB bit of the code in hcodes is set if it is a valid code and its
124     // code exists in the input Huffman table.
125     (*hcodes)[cip.code] = cip.index | 0x8000;
126     auto fill_bits = *max_bits - lens[cip.index];
127     for (auto idx = 1; idx < (1 << fill_bits); idx++) {
128       auto location = (idx << lens[cip.index]) | cip.code;
129       if (!((*hcodes)[location] & 0x8000)) {
130         (*hcodes)[location] = cip.index | 0x8000;
131       }
132     }
133   }
134   return true;
135 }
136 
BuildHuffmanReverseCodes(const Buffer & lens,std::vector<uint16_t> * rcodes,size_t * max_bits)137 bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens,
138                                             std::vector<uint16_t>* rcodes,
139                                             size_t* max_bits) {
140   TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
141   // Sort ascending based on the index.
142   std::sort(codeindexpairs_.begin(),
143             codeindexpairs_.end(),
144             [](const CodeIndexPair& a, const CodeIndexPair& b) -> bool {
145               return a.index < b.index;
146             });
147 
148   size_t index = 0;
149   for (size_t idx = 0; idx < rcodes->size(); idx++) {
150     if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) {
151       (*rcodes)[idx] = codeindexpairs_[index].code;
152       index++;
153     } else {
154       (*rcodes)[idx] = 0;
155     }
156   }
157   return true;
158 }
159 
BuildFixedHuffmanTable()160 bool HuffmanTable::BuildFixedHuffmanTable() {
161   if (!initialized_) {
162     // For all the vectors used in this class, we set the size in the
163     // constructor and we do not change the size later. This is for optimization
164     // purposes. The total size of data in this class is approximately
165     // 2KB. Because it is a constructor return values cannot be checked.
166     lit_len_lens_.resize(288);
167     lit_len_rcodes_.resize(288);
168     lit_len_hcodes_.resize(1 << 9);
169 
170     distance_lens_.resize(30);
171     distance_rcodes_.resize(30);
172     distance_hcodes_.resize(1 << 5);
173 
174     size_t i = 0;
175     while (i < 144) {
176       lit_len_lens_[i++] = 8;
177     }
178     while (i < 256) {
179       lit_len_lens_[i++] = 9;
180     }
181     while (i < 280) {
182       lit_len_lens_[i++] = 7;
183     }
184     while (i < 288) {
185       lit_len_lens_[i++] = 8;
186     }
187 
188     i = 0;
189     while (i < 30) {
190       distance_lens_[i++] = 5;
191     }
192 
193     TEST_AND_RETURN_FALSE(
194         BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
195 
196     TEST_AND_RETURN_FALSE(BuildHuffmanCodes(
197         distance_lens_, &distance_hcodes_, &distance_max_bits_));
198 
199     TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
200         lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
201 
202     TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
203         distance_lens_, &distance_rcodes_, &distance_max_bits_));
204 
205     initialized_ = true;
206   }
207   return true;
208 }
209 
BuildDynamicHuffmanTable(BitReaderInterface * br,uint8_t * buffer,size_t * length,Error * error)210 bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br,
211                                             uint8_t* buffer,
212                                             size_t* length,
213                                             Error* error) {
214   // Initilize only once and reuse.
215   if (!initialized_) {
216     // Only resizing the arrays needed.
217     code_lens_.resize(19);
218     code_hcodes_.resize(1 << 7);
219 
220     lit_len_lens_.resize(286);
221     lit_len_hcodes_.resize(1 << 15);
222 
223     distance_lens_.resize(30);
224     distance_hcodes_.resize(1 << 15);
225 
226     // 286: Maximum number of literal/lengths symbols.
227     // 30: Maximum number of distance symbols.
228     // The reason we reserve this to the sum of both maximum sizes is that we
229     // need to calculate both huffman codes contiguously. See b/72815313.
230     tmp_lens_.resize(286 + 30);
231     initialized_ = true;
232   }
233 
234   // Read the header. Reads the first portion of the Huffman data from input and
235   // writes it into the puff |buffer|. The first portion includes the size
236   // (|num_lit_len|) of the literals/lengths Huffman code length array
237   // (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman
238   // code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of
239   // Huffman code length array (dynamic_code_lens_) for reading
240   // |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by
241   // reading |dynamic_code_lens_|.
242 
243   TEST_AND_RETURN_FALSE_SET_ERROR(*length >= 3, Error::kInsufficientOutput);
244   size_t index = 0;
245   TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(14), Error::kInsufficientInput);
246   buffer[index++] = br->ReadBits(5);  // HLIST
247   auto num_lit_len = br->ReadBits(5) + 257;
248   br->DropBits(5);
249 
250   buffer[index++] = br->ReadBits(5);  // HDIST
251   auto num_distance = br->ReadBits(5) + 1;
252   br->DropBits(5);
253 
254   buffer[index++] = br->ReadBits(4);  // HCLEN
255   auto num_codes = br->ReadBits(4) + 4;
256   br->DropBits(4);
257 
258   TEST_AND_RETURN_FALSE_SET_ERROR(
259       CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes),
260       Error::kInvalidInput);
261 
262   bool checked = false;
263   size_t idx = 0;
264   TEST_AND_RETURN_FALSE_SET_ERROR(*length - index >= (num_codes + 1) / 2,
265                                   Error::kInsufficientOutput);
266   // Two codes per byte
267   for (; idx < num_codes; idx++) {
268     TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(3),
269                                     Error::kInsufficientInput);
270     code_lens_[kPermutations[idx]] = br->ReadBits(3);
271     if (checked) {
272       buffer[index++] |= br->ReadBits(3);
273     } else {
274       buffer[index] = br->ReadBits(3) << 4;
275     }
276     checked = !checked;
277     br->DropBits(3);
278   }
279   // Pad the last byte if odd number of codes.
280   if (checked) {
281     index++;
282   }
283   for (; idx < 19; idx++) {
284     code_lens_[kPermutations[idx]] = 0;
285   }
286 
287   TEST_AND_RETURN_FALSE_SET_ERROR(
288       BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_),
289       Error::kInvalidInput);
290 
291   // Build literals/lengths and distance Huffman code length arrays.
292   auto bytes_available = (*length - index);
293   tmp_lens_.clear();
294   TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths(
295       br, buffer + index, &bytes_available, code_max_bits_,
296       num_lit_len + num_distance, &tmp_lens_, error));
297   index += bytes_available;
298 
299   // TODO(ahassani): Optimize this so the memcpy is not needed anymore.
300   lit_len_lens_.clear();
301   lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
302                        tmp_lens_.begin() + num_lit_len);
303 
304   distance_lens_.clear();
305   distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
306                         tmp_lens_.end());
307 
308   TEST_AND_RETURN_FALSE_SET_ERROR(
309       BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_),
310       Error::kInvalidInput);
311 
312   // Build distance Huffman codes.
313   TEST_AND_RETURN_FALSE_SET_ERROR(
314       BuildHuffmanCodes(distance_lens_, &distance_hcodes_, &distance_max_bits_),
315       Error::kInvalidInput);
316 
317   *length = index;
318   return true;
319 }
320 
BuildHuffmanCodeLengths(BitReaderInterface * br,uint8_t * buffer,size_t * length,size_t max_bits,size_t num_codes,Buffer * lens,Error * error)321 bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br,
322                                            uint8_t* buffer,
323                                            size_t* length,
324                                            size_t max_bits,
325                                            size_t num_codes,
326                                            Buffer* lens,
327                                            Error* error) {
328   size_t index = 0;
329   lens->clear();
330   for (size_t idx = 0; idx < num_codes;) {
331     TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(max_bits),
332                                     Error::kInsufficientInput);
333     auto bits = br->ReadBits(max_bits);
334     uint16_t code;
335     size_t nbits;
336     TEST_AND_RETURN_FALSE_SET_ERROR(CodeAlphabet(bits, &code, &nbits),
337                                     Error::kInvalidInput);
338     TEST_AND_RETURN_FALSE_SET_ERROR(index < *length,
339                                     Error::kInsufficientOutput);
340     br->DropBits(nbits);
341     if (code < 16) {
342       buffer[index++] = code;
343       lens->push_back(code);
344       idx++;
345     } else {
346       TEST_AND_RETURN_FALSE_SET_ERROR(code < 19, Error::kInvalidInput);
347       size_t copy_num = 0;
348       uint8_t copy_val;
349       switch (code) {
350         case 16:
351           TEST_AND_RETURN_FALSE_SET_ERROR(idx != 0, Error::kInvalidInput);
352           TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(2),
353                                           Error::kInsufficientInput);
354           copy_num = 3 + br->ReadBits(2);
355           buffer[index++] = 16 + br->ReadBits(2);  // 3 - 6 times
356           copy_val = (*lens)[idx - 1];
357           br->DropBits(2);
358           break;
359 
360         case 17:
361           TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(3),
362                                           Error::kInsufficientInput);
363           copy_num = 3 + br->ReadBits(3);
364           buffer[index++] = 20 + br->ReadBits(3);  // 3 - 10 times
365           copy_val = 0;
366           br->DropBits(3);
367           break;
368 
369         case 18:
370           TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(7),
371                                           Error::kInsufficientInput);
372           copy_num = 11 + br->ReadBits(7);
373           buffer[index++] = 28 + br->ReadBits(7);  // 11 - 138 times
374           copy_val = 0;
375           br->DropBits(7);
376           break;
377 
378         default:
379           LOG(ERROR) << "Invalid code!";
380           *error = Error::kInvalidInput;
381           return false;
382           break;
383       }
384       idx += copy_num;
385       while (copy_num--) {
386         lens->push_back(copy_val);
387       }
388     }
389   }
390   TEST_AND_RETURN_FALSE(lens->size() == num_codes);
391   *length = index;
392   return true;
393 }
394 
BuildDynamicHuffmanTable(const uint8_t * buffer,size_t length,BitWriterInterface * bw,Error * error)395 bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer,
396                                             size_t length,
397                                             BitWriterInterface* bw,
398                                             Error* error) {
399   if (!initialized_) {
400     // Only resizing the arrays needed.
401     code_lens_.resize(19);
402     code_rcodes_.resize(19);
403 
404     lit_len_lens_.resize(286);
405     lit_len_rcodes_.resize(286);
406 
407     distance_lens_.resize(30);
408     distance_rcodes_.resize(30);
409 
410     tmp_lens_.resize(286 + 30);
411 
412     initialized_ = true;
413   }
414 
415   TEST_AND_RETURN_FALSE_SET_ERROR(length >= 3, Error::kInsufficientInput);
416   size_t index = 0;
417   // Write the header.
418   size_t num_lit_len = buffer[index] + 257;
419   TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(5, buffer[index++]),
420                                   Error::kInsufficientOutput);
421 
422   size_t num_distance = buffer[index] + 1;
423   TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(5, buffer[index++]),
424                                   Error::kInsufficientOutput);
425 
426   size_t num_codes = buffer[index] + 4;
427   TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(4, buffer[index++]),
428                                   Error::kInsufficientOutput);
429 
430   TEST_AND_RETURN_FALSE_SET_ERROR(
431       CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes),
432       Error::kInvalidInput);
433 
434   TEST_AND_RETURN_FALSE_SET_ERROR(length - index >= (num_codes + 1) / 2,
435                                   Error::kInsufficientInput);
436   bool checked = false;
437   size_t idx = 0;
438   for (; idx < num_codes; idx++) {
439     uint8_t len;
440     if (checked) {
441       len = buffer[index++] & 0x0F;
442     } else {
443       len = buffer[index] >> 4;
444     }
445     checked = !checked;
446     code_lens_[kPermutations[idx]] = len;
447     TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(3, len),
448                                     Error::kInsufficientOutput);
449   }
450   if (checked) {
451     index++;
452   }
453   for (; idx < 19; idx++) {
454     code_lens_[kPermutations[idx]] = 0;
455   }
456 
457   TEST_AND_RETURN_FALSE_SET_ERROR(
458       BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_),
459       Error::kInvalidInput);
460 
461   // Build literal/lengths and distance Huffman code length arrays.
462   auto bytes_available = length - index;
463   TEST_AND_RETURN_FALSE(
464       BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw,
465                               num_lit_len + num_distance, &tmp_lens_, error));
466   index += bytes_available;
467 
468   lit_len_lens_.clear();
469   lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
470                        tmp_lens_.begin() + num_lit_len);
471 
472   distance_lens_.clear();
473   distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
474                         tmp_lens_.end());
475 
476   // Build literal/lengths Huffman reverse codes.
477   TEST_AND_RETURN_FALSE_SET_ERROR(
478       BuildHuffmanReverseCodes(
479           lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_),
480       Error::kInvalidInput);
481 
482   // Build distance Huffman reverse codes.
483   TEST_AND_RETURN_FALSE_SET_ERROR(
484       BuildHuffmanReverseCodes(
485           distance_lens_, &distance_rcodes_, &distance_max_bits_),
486       Error::kInvalidInput);
487 
488   TEST_AND_RETURN_FALSE_SET_ERROR(length == index, Error::kInvalidInput);
489 
490   return true;
491 }
492 
BuildHuffmanCodeLengths(const uint8_t * buffer,size_t * length,BitWriterInterface * bw,size_t num_codes,Buffer * lens,Error * error)493 bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer,
494                                            size_t* length,
495                                            BitWriterInterface* bw,
496                                            size_t num_codes,
497                                            Buffer* lens,
498                                            Error* error) {
499   lens->clear();
500   uint16_t hcode;
501   size_t nbits;
502   size_t index = 0;
503   for (size_t idx = 0; idx < num_codes;) {
504     TEST_AND_RETURN_FALSE_SET_ERROR(index < *length, Error::kInsufficientInput);
505     auto pcode = buffer[index++];
506     TEST_AND_RETURN_FALSE_SET_ERROR(pcode <= 155, Error::kInvalidInput);
507 
508     auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18;
509     TEST_AND_RETURN_FALSE_SET_ERROR(CodeHuffman(code, &hcode, &nbits),
510                                     Error::kInvalidInput);
511     TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(nbits, hcode),
512                                     Error::kInsufficientOutput);
513     if (code < 16) {
514       lens->push_back(code);
515       idx++;
516     } else {
517       size_t copy_num = 0;
518       uint8_t copy_val;
519       switch (code) {
520         case 16:
521           // Cannot repeat a non-existent last code if idx == 0.
522           TEST_AND_RETURN_FALSE_SET_ERROR(idx != 0, Error::kInvalidInput);
523           TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(2, pcode - 16),
524                                           Error::kInsufficientOutput);
525           copy_num = 3 + pcode - 16;
526           copy_val = (*lens)[idx - 1];
527           break;
528 
529         case 17:
530           TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(3, pcode - 20),
531                                           Error::kInsufficientOutput);
532           copy_num = 3 + pcode - 20;
533           copy_val = 0;
534           break;
535 
536         case 18:
537           TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(7, pcode - 28),
538                                           Error::kInsufficientOutput);
539           copy_num = 11 + pcode - 28;
540           copy_val = 0;
541           break;
542 
543         default:
544           break;
545       }
546       idx += copy_num;
547       while (copy_num--) {
548         lens->push_back(copy_val);
549       }
550     }
551   }
552   TEST_AND_RETURN_FALSE(lens->size() == num_codes);
553   *length = index;
554   return true;
555 }
556 
BlockTypeToString(BlockType type)557 std::string BlockTypeToString(BlockType type) {
558   switch (type) {
559     case BlockType::kUncompressed:
560       return "Uncompressed";
561 
562     case BlockType::kFixed:
563       return "Fixed";
564 
565     case BlockType::kDynamic:
566       return "Dynamic";
567 
568     default:
569       return "Unknown";
570   }
571 }
572 
573 }  // namespace puffin
574