1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "puffin/src/huffman_table.h"
6
7 #include <algorithm>
8 #include <vector>
9
10 #include "puffin/src/logging.h"
11
12 using std::string;
13 using std::vector;
14
15 namespace puffin {
16
17 // Permutations of input Huffman code lengths (used only to read code lengths
18 // necessary for reading Huffman table.)
19 const uint8_t kPermutations[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
20 11, 4, 12, 3, 13, 2, 14, 1, 15};
21
22 // The bases of each alphabet which is added to the integer value of extra
23 // bits that comes after the Huffman code in the input to create the given
24 // length value. The last element is a guard.
25 const uint16_t kLengthBases[30] = {
26 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
27 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF};
28
29 // Number of extra bits that comes after the associating Huffman code.
30 const uint8_t kLengthExtraBits[29] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
31 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
32 4, 4, 4, 4, 5, 5, 5, 5, 0};
33
34 // Same as |kLengthBases| but for the distances instead of lengths. The last
35 // element is a guard.
36 const uint16_t kDistanceBases[31] = {
37 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33,
38 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
39 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF};
40
41 // Same as |kLengthExtraBits| but for distances instead of lengths.
42 const uint8_t kDistanceExtraBits[30] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
43 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
44 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
45
46 // 288 is the maximum number of needed huffman codes for an alphabet. Fixed
47 // huffman table needs 288 and dynamic huffman table needs maximum 286.
48 // 286 = 256 (coding a byte) +
49 // 1 (coding the end of block symbole) +
50 // 29 (coding the lengths)
HuffmanTable()51 HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {}
52
InitHuffmanCodes(const Buffer & lens,size_t * max_bits)53 bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) {
54 // Temporary buffers used in |InitHuffmanCodes|.
55 uint16_t len_count_[kMaxHuffmanBits + 1] = {0};
56 uint16_t next_code_[kMaxHuffmanBits + 1] = {0};
57
58 // 1. Count the number of codes for each length;
59 for (auto len : lens) {
60 len_count_[len]++;
61 }
62
63 for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) {
64 if (len_count_[*max_bits] != 0) {
65 break;
66 }
67 }
68
69 // Check for oversubscribed code lengths. (A code with length 'L' cannot have
70 // more than 2^L items.
71 for (size_t idx = 1; idx <= *max_bits; idx++) {
72 if (len_count_[idx] > (1 << idx)) {
73 LOG(ERROR) << "Oversubscribed code lengths error!";
74 return false;
75 }
76 }
77
78 // 2. Compute the coding of the first element for each length.
79 uint16_t code = 0;
80 len_count_[0] = 0;
81 for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) {
82 code = (code + len_count_[bits - 1]) << 1;
83 next_code_[bits] = code;
84 }
85
86 codeindexpairs_.clear();
87 // 3. Calculate all the code values.
88 for (size_t idx = 0; idx < lens.size(); idx++) {
89 auto len = lens[idx];
90 if (len == 0) {
91 continue;
92 }
93
94 // Reverse the code
95 CodeIndexPair cip;
96 cip.code = 0;
97 auto tmp_code = next_code_[len];
98 for (size_t r = 0; r < len; r++) {
99 cip.code <<= 1;
100 cip.code |= tmp_code & 1U;
101 tmp_code >>= 1;
102 }
103 cip.index = idx;
104 codeindexpairs_.push_back(cip);
105 next_code_[len]++;
106 }
107 return true;
108 }
109
BuildHuffmanCodes(const Buffer & lens,vector<uint16_t> * hcodes,size_t * max_bits)110 bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens,
111 vector<uint16_t>* hcodes,
112 size_t* max_bits) {
113 TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
114 // Sort descending based on the bit-length of the code.
115 std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
116 [&lens](const CodeIndexPair& a, const CodeIndexPair& b) {
117 return lens[a.index] > lens[b.index];
118 });
119
120 // Only zero out the part of hcodes which is valuable.
121 memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t));
122 for (const auto& cip : codeindexpairs_) {
123 // The MSB bit of the code in hcodes is set if it is a valid code and its
124 // code exists in the input Huffman table.
125 (*hcodes)[cip.code] = cip.index | 0x8000;
126 auto fill_bits = *max_bits - lens[cip.index];
127 for (auto idx = 1; idx < (1 << fill_bits); idx++) {
128 auto location = (idx << lens[cip.index]) | cip.code;
129 if (!((*hcodes)[location] & 0x8000)) {
130 (*hcodes)[location] = cip.index | 0x8000;
131 }
132 }
133 }
134 return true;
135 }
136
BuildHuffmanReverseCodes(const Buffer & lens,vector<uint16_t> * rcodes,size_t * max_bits)137 bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens,
138 vector<uint16_t>* rcodes,
139 size_t* max_bits) {
140 TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
141 // Sort ascending based on the index.
142 std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
143 [](const CodeIndexPair& a, const CodeIndexPair& b) -> bool {
144 return a.index < b.index;
145 });
146
147 size_t index = 0;
148 for (size_t idx = 0; idx < rcodes->size(); idx++) {
149 if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) {
150 (*rcodes)[idx] = codeindexpairs_[index].code;
151 index++;
152 } else {
153 (*rcodes)[idx] = 0;
154 }
155 }
156 return true;
157 }
158
BuildFixedHuffmanTable()159 bool HuffmanTable::BuildFixedHuffmanTable() {
160 if (!initialized_) {
161 // For all the vectors used in this class, we set the size in the
162 // constructor and we do not change the size later. This is for optimization
163 // purposes. The total size of data in this class is approximately
164 // 2KB. Because it is a constructor return values cannot be checked.
165 lit_len_lens_.resize(288);
166 lit_len_rcodes_.resize(288);
167 lit_len_hcodes_.resize(1 << 9);
168
169 distance_lens_.resize(30);
170 distance_rcodes_.resize(30);
171 distance_hcodes_.resize(1 << 5);
172
173 size_t i = 0;
174 while (i < 144) {
175 lit_len_lens_[i++] = 8;
176 }
177 while (i < 256) {
178 lit_len_lens_[i++] = 9;
179 }
180 while (i < 280) {
181 lit_len_lens_[i++] = 7;
182 }
183 while (i < 288) {
184 lit_len_lens_[i++] = 8;
185 }
186
187 i = 0;
188 while (i < 30) {
189 distance_lens_[i++] = 5;
190 }
191
192 TEST_AND_RETURN_FALSE(
193 BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
194
195 TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
196 &distance_max_bits_));
197
198 TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
199 lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
200
201 TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
202 distance_lens_, &distance_rcodes_, &distance_max_bits_));
203
204 initialized_ = true;
205 }
206 return true;
207 }
208
BuildDynamicHuffmanTable(BitReaderInterface * br,uint8_t * buffer,size_t * length)209 bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br,
210 uint8_t* buffer,
211 size_t* length) {
212 // Initilize only once and reuse.
213 if (!initialized_) {
214 // Only resizing the arrays needed.
215 code_lens_.resize(19);
216 code_hcodes_.resize(1 << 7);
217
218 lit_len_lens_.resize(286);
219 lit_len_hcodes_.resize(1 << 15);
220
221 distance_lens_.resize(30);
222 distance_hcodes_.resize(1 << 15);
223
224 // 286: Maximum number of literal/lengths symbols.
225 // 30: Maximum number of distance symbols.
226 // The reason we reserve this to the sum of both maximum sizes is that we
227 // need to calculate both huffman codes contiguously. See b/72815313.
228 tmp_lens_.resize(286 + 30);
229 initialized_ = true;
230 }
231
232 // Read the header. Reads the first portion of the Huffman data from input and
233 // writes it into the puff |buffer|. The first portion includes the size
234 // (|num_lit_len|) of the literals/lengths Huffman code length array
235 // (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman
236 // code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of
237 // Huffman code length array (dynamic_code_lens_) for reading
238 // |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by
239 // reading |dynamic_code_lens_|.
240
241 TEST_AND_RETURN_FALSE(*length >= 3);
242 size_t index = 0;
243 TEST_AND_RETURN_FALSE(br->CacheBits(14));
244 buffer[index++] = br->ReadBits(5); // HLIST
245 auto num_lit_len = br->ReadBits(5) + 257;
246 br->DropBits(5);
247
248 buffer[index++] = br->ReadBits(5); // HDIST
249 auto num_distance = br->ReadBits(5) + 1;
250 br->DropBits(5);
251
252 buffer[index++] = br->ReadBits(4); // HCLEN
253 auto num_codes = br->ReadBits(4) + 4;
254 br->DropBits(4);
255
256 TEST_AND_RETURN_FALSE(
257 CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
258
259 bool checked = false;
260 size_t idx = 0;
261 TEST_AND_RETURN_FALSE(*length - index >= (num_codes + 1) / 2);
262 // Two codes per byte
263 for (; idx < num_codes; idx++) {
264 TEST_AND_RETURN_FALSE(br->CacheBits(3));
265 code_lens_[kPermutations[idx]] = br->ReadBits(3);
266 if (checked) {
267 buffer[index++] |= br->ReadBits(3);
268 } else {
269 buffer[index] = br->ReadBits(3) << 4;
270 }
271 checked = !checked;
272 br->DropBits(3);
273 }
274 // Pad the last byte if odd number of codes.
275 if (checked) {
276 index++;
277 }
278 for (; idx < 19; idx++) {
279 code_lens_[kPermutations[idx]] = 0;
280 }
281
282 TEST_AND_RETURN_FALSE(
283 BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_));
284
285 // Build literals/lengths and distance Huffman code length arrays.
286 auto bytes_available = (*length - index);
287 tmp_lens_.clear();
288 TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths(
289 br, buffer + index, &bytes_available, code_max_bits_,
290 num_lit_len + num_distance, &tmp_lens_));
291 index += bytes_available;
292
293 // TODO(ahassani): Optimize this so the memcpy is not needed anymore.
294 lit_len_lens_.clear();
295 lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
296 tmp_lens_.begin() + num_lit_len);
297
298 distance_lens_.clear();
299 distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
300 tmp_lens_.end());
301
302 TEST_AND_RETURN_FALSE(
303 BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
304
305 // Build distance Huffman codes.
306 TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
307 &distance_max_bits_));
308
309 *length = index;
310 return true;
311 }
312
BuildHuffmanCodeLengths(BitReaderInterface * br,uint8_t * buffer,size_t * length,size_t max_bits,size_t num_codes,Buffer * lens)313 bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br,
314 uint8_t* buffer,
315 size_t* length,
316 size_t max_bits,
317 size_t num_codes,
318 Buffer* lens) {
319 size_t index = 0;
320 lens->clear();
321 for (size_t idx = 0; idx < num_codes;) {
322 TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
323 auto bits = br->ReadBits(max_bits);
324 uint16_t code;
325 size_t nbits;
326 TEST_AND_RETURN_FALSE(CodeAlphabet(bits, &code, &nbits));
327 TEST_AND_RETURN_FALSE(index < *length);
328 br->DropBits(nbits);
329 if (code < 16) {
330 buffer[index++] = code;
331 lens->push_back(code);
332 idx++;
333 } else {
334 TEST_AND_RETURN_FALSE(code < 19);
335 size_t copy_num = 0;
336 uint8_t copy_val;
337 switch (code) {
338 case 16:
339 TEST_AND_RETURN_FALSE(idx != 0);
340 TEST_AND_RETURN_FALSE(br->CacheBits(2));
341 copy_num = 3 + br->ReadBits(2);
342 buffer[index++] = 16 + br->ReadBits(2); // 3 - 6 times
343 copy_val = (*lens)[idx - 1];
344 br->DropBits(2);
345 break;
346
347 case 17:
348 TEST_AND_RETURN_FALSE(br->CacheBits(3));
349 copy_num = 3 + br->ReadBits(3);
350 buffer[index++] = 20 + br->ReadBits(3); // 3 - 10 times
351 copy_val = 0;
352 br->DropBits(3);
353 break;
354
355 case 18:
356 TEST_AND_RETURN_FALSE(br->CacheBits(7));
357 copy_num = 11 + br->ReadBits(7);
358 buffer[index++] = 28 + br->ReadBits(7); // 11 - 138 times
359 copy_val = 0;
360 br->DropBits(7);
361 break;
362
363 default:
364 LOG(ERROR) << "Invalid code!";
365 return false;
366 }
367 idx += copy_num;
368 while (copy_num--) {
369 lens->push_back(copy_val);
370 }
371 }
372 }
373 TEST_AND_RETURN_FALSE(lens->size() == num_codes);
374 *length = index;
375 return true;
376 }
377
BuildDynamicHuffmanTable(const uint8_t * buffer,size_t length,BitWriterInterface * bw)378 bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer,
379 size_t length,
380 BitWriterInterface* bw) {
381 if (!initialized_) {
382 // Only resizing the arrays needed.
383 code_lens_.resize(19);
384 code_rcodes_.resize(19);
385
386 lit_len_lens_.resize(286);
387 lit_len_rcodes_.resize(286);
388
389 distance_lens_.resize(30);
390 distance_rcodes_.resize(30);
391
392 tmp_lens_.resize(286 + 30);
393
394 initialized_ = true;
395 }
396
397 TEST_AND_RETURN_FALSE(length >= 3);
398 size_t index = 0;
399 // Write the header.
400 size_t num_lit_len = buffer[index] + 257;
401 TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
402
403 size_t num_distance = buffer[index] + 1;
404 TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
405
406 size_t num_codes = buffer[index] + 4;
407 TEST_AND_RETURN_FALSE(bw->WriteBits(4, buffer[index++]));
408
409 TEST_AND_RETURN_FALSE(
410 CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
411
412 TEST_AND_RETURN_FALSE(length - index >= (num_codes + 1) / 2);
413 bool checked = false;
414 size_t idx = 0;
415 for (; idx < num_codes; idx++) {
416 uint8_t len;
417 if (checked) {
418 len = buffer[index++] & 0x0F;
419 } else {
420 len = buffer[index] >> 4;
421 }
422 checked = !checked;
423 code_lens_[kPermutations[idx]] = len;
424 TEST_AND_RETURN_FALSE(bw->WriteBits(3, len));
425 }
426 if (checked) {
427 index++;
428 }
429 for (; idx < 19; idx++) {
430 code_lens_[kPermutations[idx]] = 0;
431 }
432
433 TEST_AND_RETURN_FALSE(
434 BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_));
435
436 // Build literal/lengths and distance Huffman code length arrays.
437 auto bytes_available = length - index;
438 TEST_AND_RETURN_FALSE(
439 BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw,
440 num_lit_len + num_distance, &tmp_lens_));
441 index += bytes_available;
442
443 lit_len_lens_.clear();
444 lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
445 tmp_lens_.begin() + num_lit_len);
446
447 distance_lens_.clear();
448 distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
449 tmp_lens_.end());
450
451 // Build literal/lengths Huffman reverse codes.
452 TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
453 lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
454
455 // Build distance Huffman reverse codes.
456 TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
457 distance_lens_, &distance_rcodes_, &distance_max_bits_));
458
459 TEST_AND_RETURN_FALSE(length == index);
460
461 return true;
462 }
463
BuildHuffmanCodeLengths(const uint8_t * buffer,size_t * length,BitWriterInterface * bw,size_t num_codes,Buffer * lens)464 bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer,
465 size_t* length,
466 BitWriterInterface* bw,
467 size_t num_codes,
468 Buffer* lens) {
469 lens->clear();
470 uint16_t hcode;
471 size_t nbits;
472 size_t index = 0;
473 for (size_t idx = 0; idx < num_codes;) {
474 TEST_AND_RETURN_FALSE(index < *length);
475 auto pcode = buffer[index++];
476 TEST_AND_RETURN_FALSE(pcode <= 155);
477
478 auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18;
479 TEST_AND_RETURN_FALSE(CodeHuffman(code, &hcode, &nbits));
480 TEST_AND_RETURN_FALSE(bw->WriteBits(nbits, hcode));
481 if (code < 16) {
482 lens->push_back(code);
483 idx++;
484 } else {
485 size_t copy_num = 0;
486 uint8_t copy_val;
487 switch (code) {
488 case 16:
489 // Cannot repeat a non-existent last code if idx == 0.
490 TEST_AND_RETURN_FALSE(idx != 0);
491 TEST_AND_RETURN_FALSE(bw->WriteBits(2, pcode - 16));
492 copy_num = 3 + pcode - 16;
493 copy_val = (*lens)[idx - 1];
494 break;
495
496 case 17:
497 TEST_AND_RETURN_FALSE(bw->WriteBits(3, pcode - 20));
498 copy_num = 3 + pcode - 20;
499 copy_val = 0;
500 break;
501
502 case 18:
503 TEST_AND_RETURN_FALSE(bw->WriteBits(7, pcode - 28));
504 copy_num = 11 + pcode - 28;
505 copy_val = 0;
506 break;
507
508 default:
509 break;
510 }
511 idx += copy_num;
512 while (copy_num--) {
513 lens->push_back(copy_val);
514 }
515 }
516 }
517 TEST_AND_RETURN_FALSE(lens->size() == num_codes);
518 *length = index;
519 return true;
520 }
521
BlockTypeToString(BlockType type)522 string BlockTypeToString(BlockType type) {
523 switch (type) {
524 case BlockType::kUncompressed:
525 return "Uncompressed";
526
527 case BlockType::kFixed:
528 return "Fixed";
529
530 case BlockType::kDynamic:
531 return "Dynamic";
532
533 default:
534 return "Unknown";
535 }
536 }
537
538 } // namespace puffin
539