1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "puffin/src/include/puffin/puffer.h"
6
7 #include <algorithm>
8 #include <memory>
9 #include <string>
10 #include <utility>
11 #include <vector>
12
13 #include "puffin/src/bit_reader.h"
14 #include "puffin/src/huffman_table.h"
15 #include "puffin/src/include/puffin/common.h"
16 #include "puffin/src/include/puffin/stream.h"
17 #include "puffin/src/logging.h"
18 #include "puffin/src/puff_data.h"
19 #include "puffin/src/puff_writer.h"
20
21 using std::string;
22 using std::vector;
23
24 namespace puffin {
25
Puffer(bool exclude_bad_distance_caches)26 Puffer::Puffer(bool exclude_bad_distance_caches)
27 : dyn_ht_(new HuffmanTable()),
28 fix_ht_(new HuffmanTable()),
29 exclude_bad_distance_caches_(exclude_bad_distance_caches) {}
30
Puffer()31 Puffer::Puffer() : Puffer(false) {}
32
~Puffer()33 Puffer::~Puffer() {}
34
PuffDeflate(BitReaderInterface * br,PuffWriterInterface * pw,vector<BitExtent> * deflates) const35 bool Puffer::PuffDeflate(BitReaderInterface* br,
36 PuffWriterInterface* pw,
37 vector<BitExtent>* deflates) const {
38 PuffData pd;
39 HuffmanTable* cur_ht;
40 bool end_loop = false;
41 // No bits left to read, return. We try to cache at least eight bits because
42 // the minimum length of a deflate bit stream is 8: (fixed huffman table) 3
43 // bits header + 5 bits just one len/dist symbol.
44 while (!end_loop && br->CacheBits(8)) {
45 auto start_bit_offset = br->OffsetInBits();
46
47 TEST_AND_RETURN_FALSE(br->CacheBits(3));
48 uint8_t final_bit = br->ReadBits(1); // BFINAL
49 br->DropBits(1);
50 uint8_t type = br->ReadBits(2); // BTYPE
51 br->DropBits(2);
52 DVLOG(2) << "Read block type: "
53 << BlockTypeToString(static_cast<BlockType>(type));
54
55 // If it is the final block and we are just looking for deflate locations,
56 // we consider this the end of the search.
57 if (deflates != nullptr && final_bit) {
58 end_loop = true;
59 }
60
61 // Header structure
62 // +-+-+-+-+-+-+-+-+
63 // |F| TP| SKIP |
64 // +-+-+-+-+-+-+-+-+
65 // F -> final_bit
66 // TP -> type
67 // SKIP -> skipped_bits (only in kUncompressed type)
68 auto block_header = (final_bit << 7) | (type << 5);
69 switch (static_cast<BlockType>(type)) {
70 case BlockType::kUncompressed: {
71 auto skipped_bits = br->ReadBoundaryBits();
72 br->SkipBoundaryBits();
73 TEST_AND_RETURN_FALSE(br->CacheBits(32));
74 auto len = br->ReadBits(16); // LEN
75 br->DropBits(16);
76 auto nlen = br->ReadBits(16); // NLEN
77 br->DropBits(16);
78
79 if ((len ^ nlen) != 0xFFFF) {
80 LOG(ERROR) << "Length of uncompressed data is invalid;"
81 << " LEN(" << len << ") NLEN(" << nlen << ")";
82 return false;
83 }
84
85 // Put skipped bits into header.
86 block_header |= skipped_bits;
87
88 // Insert block header.
89 pd.type = PuffData::Type::kBlockMetadata;
90 pd.block_metadata[0] = block_header;
91 pd.length = 1;
92 TEST_AND_RETURN_FALSE(pw->Insert(pd));
93
94 // Insert all the raw literals.
95 pd.type = PuffData::Type::kLiterals;
96 pd.length = len;
97 TEST_AND_RETURN_FALSE(br->GetByteReaderFn(pd.length, &pd.read_fn));
98 TEST_AND_RETURN_FALSE(pw->Insert(pd));
99
100 pd.type = PuffData::Type::kEndOfBlock;
101 TEST_AND_RETURN_FALSE(pw->Insert(pd));
102
103 // There is no need to insert the location of uncompressed deflates
104 // because we do not want the uncompressed blocks when trying to find
105 // the bit-addressed location of deflates. They better be ignored.
106
107 // continue the loop. Do not read any literal/length/distance.
108 continue;
109 }
110
111 case BlockType::kFixed:
112 fix_ht_->BuildFixedHuffmanTable();
113 cur_ht = fix_ht_.get();
114 pd.type = PuffData::Type::kBlockMetadata;
115 pd.block_metadata[0] = block_header;
116 pd.length = 1;
117 TEST_AND_RETURN_FALSE(pw->Insert(pd));
118 break;
119
120 case BlockType::kDynamic:
121 pd.type = PuffData::Type::kBlockMetadata;
122 pd.block_metadata[0] = block_header;
123 pd.length = sizeof(pd.block_metadata) - 1;
124 TEST_AND_RETURN_FALSE(dyn_ht_->BuildDynamicHuffmanTable(
125 br, &pd.block_metadata[1], &pd.length));
126 pd.length += 1; // For the header.
127 TEST_AND_RETURN_FALSE(pw->Insert(pd));
128 cur_ht = dyn_ht_.get();
129 break;
130
131 default:
132 LOG(ERROR) << "Invalid block compression type: "
133 << static_cast<int>(type);
134 return false;
135 }
136
137 // If true and the list of output |deflates| is non-null, the current
138 // deflate location will be added to that list.
139 bool include_deflate = true;
140
141 while (true) { // Breaks when the end of block is reached.
142 auto max_bits = cur_ht->LitLenMaxBits();
143 if (!br->CacheBits(max_bits)) {
144 // It could be the end of buffer and the bit length of the end_of_block
145 // symbol has less than maximum bit length of current Huffman table. So
146 // only asking for the size of end of block symbol (256).
147 TEST_AND_RETURN_FALSE(cur_ht->EndOfBlockBitLength(&max_bits));
148 }
149 TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
150 auto bits = br->ReadBits(max_bits);
151 uint16_t lit_len_alphabet;
152 size_t nbits;
153 TEST_AND_RETURN_FALSE(
154 cur_ht->LitLenAlphabet(bits, &lit_len_alphabet, &nbits));
155 br->DropBits(nbits);
156 if (lit_len_alphabet < 256) {
157 pd.type = PuffData::Type::kLiteral;
158 pd.byte = lit_len_alphabet;
159 TEST_AND_RETURN_FALSE(pw->Insert(pd));
160
161 } else if (256 == lit_len_alphabet) {
162 pd.type = PuffData::Type::kEndOfBlock;
163 TEST_AND_RETURN_FALSE(pw->Insert(pd));
164 if (deflates != nullptr && include_deflate) {
165 deflates->emplace_back(start_bit_offset,
166 br->OffsetInBits() - start_bit_offset);
167 }
168 break; // Breaks the loop.
169 } else {
170 TEST_AND_RETURN_FALSE(lit_len_alphabet <= 285);
171 // Reading length.
172 auto len_code_start = lit_len_alphabet - 257;
173 auto extra_bits_len = kLengthExtraBits[len_code_start];
174 uint16_t extra_bits_value = 0;
175 if (extra_bits_len) {
176 TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
177 extra_bits_value = br->ReadBits(extra_bits_len);
178 br->DropBits(extra_bits_len);
179 }
180 auto length = kLengthBases[len_code_start] + extra_bits_value;
181
182 auto bits_to_cache = cur_ht->DistanceMaxBits();
183 if (!br->CacheBits(bits_to_cache)) {
184 // This is a corner case that is present in the older versions of the
185 // puffin. So we need to catch it and correctly discard this kind of
186 // deflate when we encounter it. See crbug.com/915559 for more info.
187 bits_to_cache = br->BitsRemaining();
188 TEST_AND_RETURN_FALSE(br->CacheBits(bits_to_cache));
189 if (exclude_bad_distance_caches_) {
190 include_deflate = false;
191 }
192 LOG(WARNING) << "A rare condition that older puffin clients fail to"
193 << " recognize happened. Nothing to worry about."
194 << " See crbug.com/915559";
195 }
196 auto bits = br->ReadBits(bits_to_cache);
197 uint16_t distance_alphabet;
198 size_t nbits;
199 TEST_AND_RETURN_FALSE(
200 cur_ht->DistanceAlphabet(bits, &distance_alphabet, &nbits));
201 br->DropBits(nbits);
202
203 // Reading distance.
204 extra_bits_len = kDistanceExtraBits[distance_alphabet];
205 extra_bits_value = 0;
206 if (extra_bits_len) {
207 TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
208 extra_bits_value = br->ReadBits(extra_bits_len);
209 br->DropBits(extra_bits_len);
210 }
211
212 pd.type = PuffData::Type::kLenDist;
213 pd.length = length;
214 pd.distance = kDistanceBases[distance_alphabet] + extra_bits_value;
215 TEST_AND_RETURN_FALSE(pw->Insert(pd));
216 }
217 }
218 }
219 TEST_AND_RETURN_FALSE(pw->Flush());
220 return true;
221 }
222
223 } // namespace puffin
224