1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "puffin/src/huffman_table.h"
6
7 #include <algorithm>
8 #include <vector>
9
10 #include "puffin/src/include/puffin/errors.h"
11 #include "puffin/src/set_errors.h"
12
13 namespace puffin {
14
15 // clang-format off
16 // Permutations of input Huffman code lengths (used only to read code lengths
17 // necessary for reading Huffman table.)
18 const uint8_t kPermutations[19] = {
19 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
20
21 // The bases of each alphabet which is added to the integer value of extra
22 // bits that comes after the Huffman code in the input to create the given
23 // length value. The last element is a guard.
24 const uint16_t kLengthBases[30] = {
25 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43,
26 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF};
27
28 // Number of extra bits that comes after the associating Huffman code.
29 const uint8_t kLengthExtraBits[29] = {
30 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5,
31 5, 5, 0};
32
33 // Same as |kLengthBases| but for the distances instead of lengths. The last
34 // element is a guard.
35 const uint16_t kDistanceBases[31] = {
36 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
37 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF};
38
39 // Same as |kLengthExtraBits| but for distances instead of lengths.
40 const uint8_t kDistanceExtraBits[30] = {
41 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9,
42 9, 10, 10, 11, 11, 12, 12, 13, 13};
43 // clang-format on
44
45 // 288 is the maximum number of needed huffman codes for an alphabet. Fixed
46 // huffman table needs 288 and dynamic huffman table needs maximum 286.
47 // 286 = 256 (coding a byte) +
48 // 1 (coding the end of block symbole) +
49 // 29 (coding the lengths)
HuffmanTable()50 HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {}
51
InitHuffmanCodes(const Buffer & lens,size_t * max_bits)52 bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) {
53 // Temporary buffers used in |InitHuffmanCodes|.
54 uint16_t len_count_[kMaxHuffmanBits + 1] = {0};
55 uint16_t next_code_[kMaxHuffmanBits + 1] = {0};
56
57 // 1. Count the number of codes for each length;
58 for (auto len : lens) {
59 len_count_[len]++;
60 }
61
62 for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) {
63 if (len_count_[*max_bits] != 0) {
64 break;
65 }
66 }
67
68 // Check for oversubscribed code lengths. (A code with length 'L' cannot have
69 // more than 2^L items.
70 for (size_t idx = 1; idx <= *max_bits; idx++) {
71 if (len_count_[idx] > (1 << idx)) {
72 LOG(ERROR) << "Oversubscribed code lengths error!";
73 return false;
74 }
75 }
76
77 // 2. Compute the coding of the first element for each length.
78 uint16_t code = 0;
79 len_count_[0] = 0;
80 for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) {
81 code = (code + len_count_[bits - 1]) << 1;
82 next_code_[bits] = code;
83 }
84
85 codeindexpairs_.clear();
86 // 3. Calculate all the code values.
87 for (size_t idx = 0; idx < lens.size(); idx++) {
88 auto len = lens[idx];
89 if (len == 0) {
90 continue;
91 }
92
93 // Reverse the code
94 CodeIndexPair cip;
95 cip.code = 0;
96 auto tmp_code = next_code_[len];
97 for (size_t r = 0; r < len; r++) {
98 cip.code <<= 1;
99 cip.code |= tmp_code & 1U;
100 tmp_code >>= 1;
101 }
102 cip.index = idx;
103 codeindexpairs_.push_back(cip);
104 next_code_[len]++;
105 }
106 return true;
107 }
108
BuildHuffmanCodes(const Buffer & lens,std::vector<uint16_t> * hcodes,size_t * max_bits)109 bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens,
110 std::vector<uint16_t>* hcodes,
111 size_t* max_bits) {
112 TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
113 // Sort descending based on the bit-length of the code.
114 std::sort(codeindexpairs_.begin(),
115 codeindexpairs_.end(),
116 [&lens](const CodeIndexPair& a, const CodeIndexPair& b) {
117 return lens[a.index] > lens[b.index];
118 });
119
120 // Only zero out the part of hcodes which is valuable.
121 memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t));
122 for (const auto& cip : codeindexpairs_) {
123 // The MSB bit of the code in hcodes is set if it is a valid code and its
124 // code exists in the input Huffman table.
125 (*hcodes)[cip.code] = cip.index | 0x8000;
126 auto fill_bits = *max_bits - lens[cip.index];
127 for (auto idx = 1; idx < (1 << fill_bits); idx++) {
128 auto location = (idx << lens[cip.index]) | cip.code;
129 if (!((*hcodes)[location] & 0x8000)) {
130 (*hcodes)[location] = cip.index | 0x8000;
131 }
132 }
133 }
134 return true;
135 }
136
BuildHuffmanReverseCodes(const Buffer & lens,std::vector<uint16_t> * rcodes,size_t * max_bits)137 bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens,
138 std::vector<uint16_t>* rcodes,
139 size_t* max_bits) {
140 TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
141 // Sort ascending based on the index.
142 std::sort(codeindexpairs_.begin(),
143 codeindexpairs_.end(),
144 [](const CodeIndexPair& a, const CodeIndexPair& b) -> bool {
145 return a.index < b.index;
146 });
147
148 size_t index = 0;
149 for (size_t idx = 0; idx < rcodes->size(); idx++) {
150 if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) {
151 (*rcodes)[idx] = codeindexpairs_[index].code;
152 index++;
153 } else {
154 (*rcodes)[idx] = 0;
155 }
156 }
157 return true;
158 }
159
BuildFixedHuffmanTable()160 bool HuffmanTable::BuildFixedHuffmanTable() {
161 if (!initialized_) {
162 // For all the vectors used in this class, we set the size in the
163 // constructor and we do not change the size later. This is for optimization
164 // purposes. The total size of data in this class is approximately
165 // 2KB. Because it is a constructor return values cannot be checked.
166 lit_len_lens_.resize(288);
167 lit_len_rcodes_.resize(288);
168 lit_len_hcodes_.resize(1 << 9);
169
170 distance_lens_.resize(30);
171 distance_rcodes_.resize(30);
172 distance_hcodes_.resize(1 << 5);
173
174 size_t i = 0;
175 while (i < 144) {
176 lit_len_lens_[i++] = 8;
177 }
178 while (i < 256) {
179 lit_len_lens_[i++] = 9;
180 }
181 while (i < 280) {
182 lit_len_lens_[i++] = 7;
183 }
184 while (i < 288) {
185 lit_len_lens_[i++] = 8;
186 }
187
188 i = 0;
189 while (i < 30) {
190 distance_lens_[i++] = 5;
191 }
192
193 TEST_AND_RETURN_FALSE(
194 BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
195
196 TEST_AND_RETURN_FALSE(BuildHuffmanCodes(
197 distance_lens_, &distance_hcodes_, &distance_max_bits_));
198
199 TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
200 lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
201
202 TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
203 distance_lens_, &distance_rcodes_, &distance_max_bits_));
204
205 initialized_ = true;
206 }
207 return true;
208 }
209
BuildDynamicHuffmanTable(BitReaderInterface * br,uint8_t * buffer,size_t * length,Error * error)210 bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br,
211 uint8_t* buffer,
212 size_t* length,
213 Error* error) {
214 // Initilize only once and reuse.
215 if (!initialized_) {
216 // Only resizing the arrays needed.
217 code_lens_.resize(19);
218 code_hcodes_.resize(1 << 7);
219
220 lit_len_lens_.resize(286);
221 lit_len_hcodes_.resize(1 << 15);
222
223 distance_lens_.resize(30);
224 distance_hcodes_.resize(1 << 15);
225
226 // 286: Maximum number of literal/lengths symbols.
227 // 30: Maximum number of distance symbols.
228 // The reason we reserve this to the sum of both maximum sizes is that we
229 // need to calculate both huffman codes contiguously. See b/72815313.
230 tmp_lens_.resize(286 + 30);
231 initialized_ = true;
232 }
233
234 // Read the header. Reads the first portion of the Huffman data from input and
235 // writes it into the puff |buffer|. The first portion includes the size
236 // (|num_lit_len|) of the literals/lengths Huffman code length array
237 // (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman
238 // code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of
239 // Huffman code length array (dynamic_code_lens_) for reading
240 // |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by
241 // reading |dynamic_code_lens_|.
242
243 TEST_AND_RETURN_FALSE_SET_ERROR(*length >= 3, Error::kInsufficientOutput);
244 size_t index = 0;
245 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(14), Error::kInsufficientInput);
246 buffer[index++] = br->ReadBits(5); // HLIST
247 auto num_lit_len = br->ReadBits(5) + 257;
248 br->DropBits(5);
249
250 buffer[index++] = br->ReadBits(5); // HDIST
251 auto num_distance = br->ReadBits(5) + 1;
252 br->DropBits(5);
253
254 buffer[index++] = br->ReadBits(4); // HCLEN
255 auto num_codes = br->ReadBits(4) + 4;
256 br->DropBits(4);
257
258 TEST_AND_RETURN_FALSE_SET_ERROR(
259 CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes),
260 Error::kInvalidInput);
261
262 bool checked = false;
263 size_t idx = 0;
264 TEST_AND_RETURN_FALSE_SET_ERROR(*length - index >= (num_codes + 1) / 2,
265 Error::kInsufficientOutput);
266 // Two codes per byte
267 for (; idx < num_codes; idx++) {
268 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(3),
269 Error::kInsufficientInput);
270 code_lens_[kPermutations[idx]] = br->ReadBits(3);
271 if (checked) {
272 buffer[index++] |= br->ReadBits(3);
273 } else {
274 buffer[index] = br->ReadBits(3) << 4;
275 }
276 checked = !checked;
277 br->DropBits(3);
278 }
279 // Pad the last byte if odd number of codes.
280 if (checked) {
281 index++;
282 }
283 for (; idx < 19; idx++) {
284 code_lens_[kPermutations[idx]] = 0;
285 }
286
287 TEST_AND_RETURN_FALSE_SET_ERROR(
288 BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_),
289 Error::kInvalidInput);
290
291 // Build literals/lengths and distance Huffman code length arrays.
292 auto bytes_available = (*length - index);
293 tmp_lens_.clear();
294 TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths(
295 br, buffer + index, &bytes_available, code_max_bits_,
296 num_lit_len + num_distance, &tmp_lens_, error));
297 index += bytes_available;
298
299 // TODO(ahassani): Optimize this so the memcpy is not needed anymore.
300 lit_len_lens_.clear();
301 lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
302 tmp_lens_.begin() + num_lit_len);
303
304 distance_lens_.clear();
305 distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
306 tmp_lens_.end());
307
308 TEST_AND_RETURN_FALSE_SET_ERROR(
309 BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_),
310 Error::kInvalidInput);
311
312 // Build distance Huffman codes.
313 TEST_AND_RETURN_FALSE_SET_ERROR(
314 BuildHuffmanCodes(distance_lens_, &distance_hcodes_, &distance_max_bits_),
315 Error::kInvalidInput);
316
317 *length = index;
318 return true;
319 }
320
BuildHuffmanCodeLengths(BitReaderInterface * br,uint8_t * buffer,size_t * length,size_t max_bits,size_t num_codes,Buffer * lens,Error * error)321 bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br,
322 uint8_t* buffer,
323 size_t* length,
324 size_t max_bits,
325 size_t num_codes,
326 Buffer* lens,
327 Error* error) {
328 size_t index = 0;
329 lens->clear();
330 for (size_t idx = 0; idx < num_codes;) {
331 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(max_bits),
332 Error::kInsufficientInput);
333 auto bits = br->ReadBits(max_bits);
334 uint16_t code;
335 size_t nbits;
336 TEST_AND_RETURN_FALSE_SET_ERROR(CodeAlphabet(bits, &code, &nbits),
337 Error::kInvalidInput);
338 TEST_AND_RETURN_FALSE_SET_ERROR(index < *length,
339 Error::kInsufficientOutput);
340 br->DropBits(nbits);
341 if (code < 16) {
342 buffer[index++] = code;
343 lens->push_back(code);
344 idx++;
345 } else {
346 TEST_AND_RETURN_FALSE_SET_ERROR(code < 19, Error::kInvalidInput);
347 size_t copy_num = 0;
348 uint8_t copy_val;
349 switch (code) {
350 case 16:
351 TEST_AND_RETURN_FALSE_SET_ERROR(idx != 0, Error::kInvalidInput);
352 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(2),
353 Error::kInsufficientInput);
354 copy_num = 3 + br->ReadBits(2);
355 buffer[index++] = 16 + br->ReadBits(2); // 3 - 6 times
356 copy_val = (*lens)[idx - 1];
357 br->DropBits(2);
358 break;
359
360 case 17:
361 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(3),
362 Error::kInsufficientInput);
363 copy_num = 3 + br->ReadBits(3);
364 buffer[index++] = 20 + br->ReadBits(3); // 3 - 10 times
365 copy_val = 0;
366 br->DropBits(3);
367 break;
368
369 case 18:
370 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(7),
371 Error::kInsufficientInput);
372 copy_num = 11 + br->ReadBits(7);
373 buffer[index++] = 28 + br->ReadBits(7); // 11 - 138 times
374 copy_val = 0;
375 br->DropBits(7);
376 break;
377
378 default:
379 LOG(ERROR) << "Invalid code!";
380 *error = Error::kInvalidInput;
381 return false;
382 break;
383 }
384 idx += copy_num;
385 while (copy_num--) {
386 lens->push_back(copy_val);
387 }
388 }
389 }
390 TEST_AND_RETURN_FALSE(lens->size() == num_codes);
391 *length = index;
392 return true;
393 }
394
BuildDynamicHuffmanTable(const uint8_t * buffer,size_t length,BitWriterInterface * bw,Error * error)395 bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer,
396 size_t length,
397 BitWriterInterface* bw,
398 Error* error) {
399 if (!initialized_) {
400 // Only resizing the arrays needed.
401 code_lens_.resize(19);
402 code_rcodes_.resize(19);
403
404 lit_len_lens_.resize(286);
405 lit_len_rcodes_.resize(286);
406
407 distance_lens_.resize(30);
408 distance_rcodes_.resize(30);
409
410 tmp_lens_.resize(286 + 30);
411
412 initialized_ = true;
413 }
414
415 TEST_AND_RETURN_FALSE_SET_ERROR(length >= 3, Error::kInsufficientInput);
416 size_t index = 0;
417 // Write the header.
418 size_t num_lit_len = buffer[index] + 257;
419 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(5, buffer[index++]),
420 Error::kInsufficientOutput);
421
422 size_t num_distance = buffer[index] + 1;
423 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(5, buffer[index++]),
424 Error::kInsufficientOutput);
425
426 size_t num_codes = buffer[index] + 4;
427 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(4, buffer[index++]),
428 Error::kInsufficientOutput);
429
430 TEST_AND_RETURN_FALSE_SET_ERROR(
431 CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes),
432 Error::kInvalidInput);
433
434 TEST_AND_RETURN_FALSE_SET_ERROR(length - index >= (num_codes + 1) / 2,
435 Error::kInsufficientInput);
436 bool checked = false;
437 size_t idx = 0;
438 for (; idx < num_codes; idx++) {
439 uint8_t len;
440 if (checked) {
441 len = buffer[index++] & 0x0F;
442 } else {
443 len = buffer[index] >> 4;
444 }
445 checked = !checked;
446 code_lens_[kPermutations[idx]] = len;
447 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(3, len),
448 Error::kInsufficientOutput);
449 }
450 if (checked) {
451 index++;
452 }
453 for (; idx < 19; idx++) {
454 code_lens_[kPermutations[idx]] = 0;
455 }
456
457 TEST_AND_RETURN_FALSE_SET_ERROR(
458 BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_),
459 Error::kInvalidInput);
460
461 // Build literal/lengths and distance Huffman code length arrays.
462 auto bytes_available = length - index;
463 TEST_AND_RETURN_FALSE(
464 BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw,
465 num_lit_len + num_distance, &tmp_lens_, error));
466 index += bytes_available;
467
468 lit_len_lens_.clear();
469 lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
470 tmp_lens_.begin() + num_lit_len);
471
472 distance_lens_.clear();
473 distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
474 tmp_lens_.end());
475
476 // Build literal/lengths Huffman reverse codes.
477 TEST_AND_RETURN_FALSE_SET_ERROR(
478 BuildHuffmanReverseCodes(
479 lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_),
480 Error::kInvalidInput);
481
482 // Build distance Huffman reverse codes.
483 TEST_AND_RETURN_FALSE_SET_ERROR(
484 BuildHuffmanReverseCodes(
485 distance_lens_, &distance_rcodes_, &distance_max_bits_),
486 Error::kInvalidInput);
487
488 TEST_AND_RETURN_FALSE_SET_ERROR(length == index, Error::kInvalidInput);
489
490 return true;
491 }
492
BuildHuffmanCodeLengths(const uint8_t * buffer,size_t * length,BitWriterInterface * bw,size_t num_codes,Buffer * lens,Error * error)493 bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer,
494 size_t* length,
495 BitWriterInterface* bw,
496 size_t num_codes,
497 Buffer* lens,
498 Error* error) {
499 lens->clear();
500 uint16_t hcode;
501 size_t nbits;
502 size_t index = 0;
503 for (size_t idx = 0; idx < num_codes;) {
504 TEST_AND_RETURN_FALSE_SET_ERROR(index < *length, Error::kInsufficientInput);
505 auto pcode = buffer[index++];
506 TEST_AND_RETURN_FALSE_SET_ERROR(pcode <= 155, Error::kInvalidInput);
507
508 auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18;
509 TEST_AND_RETURN_FALSE_SET_ERROR(CodeHuffman(code, &hcode, &nbits),
510 Error::kInvalidInput);
511 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(nbits, hcode),
512 Error::kInsufficientOutput);
513 if (code < 16) {
514 lens->push_back(code);
515 idx++;
516 } else {
517 size_t copy_num = 0;
518 uint8_t copy_val;
519 switch (code) {
520 case 16:
521 // Cannot repeat a non-existent last code if idx == 0.
522 TEST_AND_RETURN_FALSE_SET_ERROR(idx != 0, Error::kInvalidInput);
523 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(2, pcode - 16),
524 Error::kInsufficientOutput);
525 copy_num = 3 + pcode - 16;
526 copy_val = (*lens)[idx - 1];
527 break;
528
529 case 17:
530 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(3, pcode - 20),
531 Error::kInsufficientOutput);
532 copy_num = 3 + pcode - 20;
533 copy_val = 0;
534 break;
535
536 case 18:
537 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(7, pcode - 28),
538 Error::kInsufficientOutput);
539 copy_num = 11 + pcode - 28;
540 copy_val = 0;
541 break;
542
543 default:
544 break;
545 }
546 idx += copy_num;
547 while (copy_num--) {
548 lens->push_back(copy_val);
549 }
550 }
551 }
552 TEST_AND_RETURN_FALSE(lens->size() == num_codes);
553 *length = index;
554 return true;
555 }
556
BlockTypeToString(BlockType type)557 std::string BlockTypeToString(BlockType type) {
558 switch (type) {
559 case BlockType::kUncompressed:
560 return "Uncompressed";
561
562 case BlockType::kFixed:
563 return "Fixed";
564
565 case BlockType::kDynamic:
566 return "Dynamic";
567
568 default:
569 return "Unknown";
570 }
571 }
572
573 } // namespace puffin
574