1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "puffin/src/puffin_stream.h"
6
7 #include <algorithm>
8 #include <memory>
9 #include <string>
10 #include <utility>
11 #include <vector>
12
13 #include "puffin/src/bit_reader.h"
14 #include "puffin/src/bit_writer.h"
15 #include "puffin/src/include/puffin/common.h"
16 #include "puffin/src/include/puffin/huffer.h"
17 #include "puffin/src/include/puffin/puffer.h"
18 #include "puffin/src/include/puffin/stream.h"
19 #include "puffin/src/logging.h"
20 #include "puffin/src/puff_reader.h"
21 #include "puffin/src/puff_writer.h"
22
23 using std::shared_ptr;
24 using std::unique_ptr;
25 using std::vector;
26
27 namespace puffin {
28
29 namespace {
30
CheckArgsIntegrity(uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs)31 bool CheckArgsIntegrity(uint64_t puff_size,
32 const vector<BitExtent>& deflates,
33 const vector<ByteExtent>& puffs) {
34 TEST_AND_RETURN_FALSE(puffs.size() == deflates.size());
35 // Check if the |puff_size| is actually greater than the last byte of the last
36 // puff in |puffs|.
37 if (!puffs.empty()) {
38 TEST_AND_RETURN_FALSE(puff_size >=
39 puffs.back().offset + puffs.back().length);
40 }
41
42 // Check to make sure |puffs| and |deflates| are sorted and non-overlapping.
43 auto is_overlap = [](const auto& a, const auto& b) {
44 return (a.offset + a.length) > b.offset;
45 };
46 TEST_AND_RETURN_FALSE(deflates.end() == std::adjacent_find(deflates.begin(),
47 deflates.end(),
48 is_overlap));
49 TEST_AND_RETURN_FALSE(puffs.end() == std::adjacent_find(puffs.begin(),
50 puffs.end(),
51 is_overlap));
52 return true;
53 }
54
55 } // namespace
56
CreateForPuff(UniqueStreamPtr stream,shared_ptr<Puffer> puffer,uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs,size_t max_cache_size)57 UniqueStreamPtr PuffinStream::CreateForPuff(UniqueStreamPtr stream,
58 shared_ptr<Puffer> puffer,
59 uint64_t puff_size,
60 const vector<BitExtent>& deflates,
61 const vector<ByteExtent>& puffs,
62 size_t max_cache_size) {
63 TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
64 nullptr);
65 TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
66
67 UniqueStreamPtr puffin_stream(new PuffinStream(std::move(stream), puffer,
68 nullptr, puff_size, deflates,
69 puffs, max_cache_size));
70 TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
71 return puffin_stream;
72 }
73
CreateForHuff(UniqueStreamPtr stream,shared_ptr<Huffer> huffer,uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs)74 UniqueStreamPtr PuffinStream::CreateForHuff(UniqueStreamPtr stream,
75 shared_ptr<Huffer> huffer,
76 uint64_t puff_size,
77 const vector<BitExtent>& deflates,
78 const vector<ByteExtent>& puffs) {
79 TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
80 nullptr);
81 TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
82
83 UniqueStreamPtr puffin_stream(new PuffinStream(
84 std::move(stream), nullptr, huffer, puff_size, deflates, puffs, 0));
85 TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
86 return puffin_stream;
87 }
88
PuffinStream(UniqueStreamPtr stream,shared_ptr<Puffer> puffer,shared_ptr<Huffer> huffer,uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs,size_t max_cache_size)89 PuffinStream::PuffinStream(UniqueStreamPtr stream,
90 shared_ptr<Puffer> puffer,
91 shared_ptr<Huffer> huffer,
92 uint64_t puff_size,
93 const vector<BitExtent>& deflates,
94 const vector<ByteExtent>& puffs,
95 size_t max_cache_size)
96 : stream_(std::move(stream)),
97 puffer_(puffer),
98 huffer_(huffer),
99 puff_stream_size_(puff_size),
100 deflates_(deflates),
101 puffs_(puffs),
102 puff_pos_(0),
103 skip_bytes_(0),
104 deflate_bit_pos_(0),
105 last_byte_(0),
106 extra_byte_(0),
107 is_for_puff_(puffer_ ? true : false),
108 closed_(false),
109 max_cache_size_(max_cache_size),
110 cur_cache_size_(0) {
111 // Building upper bounds for faster seek.
112 upper_bounds_.reserve(puffs.size());
113 for (const auto& puff : puffs) {
114 upper_bounds_.emplace_back(puff.offset + puff.length);
115 }
116 upper_bounds_.emplace_back(puff_stream_size_ + 1);
117
118 // We can pass the size of the deflate stream too, but it is not necessary
119 // yet. We cannot get the size of stream from itself, because we might be
120 // writing into it and its size is not defined yet.
121 uint64_t deflate_stream_size = puff_stream_size_;
122 if (!puffs.empty()) {
123 deflate_stream_size =
124 ((deflates.back().offset + deflates.back().length) / 8) +
125 puff_stream_size_ - (puffs.back().offset + puffs.back().length);
126 }
127
128 deflates_.emplace_back(deflate_stream_size * 8, 0);
129 puffs_.emplace_back(puff_stream_size_, 0);
130
131 // Look for the largest puff and deflate extents and get proper size buffers.
132 uint64_t max_puff_length = 0;
133 for (const auto& puff : puffs) {
134 max_puff_length = std::max(max_puff_length, puff.length);
135 }
136 puff_buffer_.reset(new Buffer(max_puff_length + 1));
137 if (max_cache_size_ < max_puff_length) {
138 max_cache_size_ = 0; // It means we are not caching puffs.
139 }
140
141 uint64_t max_deflate_length = 0;
142 for (const auto& deflate : deflates) {
143 max_deflate_length = std::max(max_deflate_length, deflate.length * 8);
144 }
145 deflate_buffer_.reset(new Buffer(max_deflate_length + 2));
146 }
147
GetSize(uint64_t * size) const148 bool PuffinStream::GetSize(uint64_t* size) const {
149 *size = puff_stream_size_;
150 return true;
151 }
152
GetOffset(uint64_t * offset) const153 bool PuffinStream::GetOffset(uint64_t* offset) const {
154 *offset = puff_pos_ + skip_bytes_;
155 return true;
156 }
157
Seek(uint64_t offset)158 bool PuffinStream::Seek(uint64_t offset) {
159 TEST_AND_RETURN_FALSE(!closed_);
160 if (!is_for_puff_) {
161 // For huffing we should not seek, only seek to zero is accepted.
162 TEST_AND_RETURN_FALSE(offset == 0);
163 }
164
165 TEST_AND_RETURN_FALSE(offset <= puff_stream_size_);
166
167 // We are searching first available puff which either includes the |offset| or
168 // it is the next available puff after |offset|.
169 auto next_puff_iter =
170 std::upper_bound(upper_bounds_.begin(), upper_bounds_.end(), offset);
171 TEST_AND_RETURN_FALSE(next_puff_iter != upper_bounds_.end());
172 auto next_puff_idx = std::distance(upper_bounds_.begin(), next_puff_iter);
173 cur_puff_ = std::next(puffs_.begin(), next_puff_idx);
174 cur_deflate_ = std::next(deflates_.begin(), next_puff_idx);
175
176 if (offset < cur_puff_->offset) {
177 // between two puffs.
178 puff_pos_ = offset;
179 auto back_track_bytes = cur_puff_->offset - puff_pos_;
180 deflate_bit_pos_ = ((cur_deflate_->offset + 7) / 8 - back_track_bytes) * 8;
181 if (cur_puff_ != puffs_.begin()) {
182 auto prev_deflate = std::prev(cur_deflate_);
183 if (deflate_bit_pos_ < prev_deflate->offset + prev_deflate->length) {
184 deflate_bit_pos_ = prev_deflate->offset + prev_deflate->length;
185 }
186 }
187 } else {
188 // Inside a puff.
189 puff_pos_ = cur_puff_->offset;
190 deflate_bit_pos_ = cur_deflate_->offset;
191 }
192 skip_bytes_ = offset - puff_pos_;
193 if (!is_for_puff_ && offset == 0) {
194 TEST_AND_RETURN_FALSE(stream_->Seek(0));
195 TEST_AND_RETURN_FALSE(SetExtraByte());
196 }
197 return true;
198 }
199
Close()200 bool PuffinStream::Close() {
201 closed_ = true;
202 return stream_->Close();
203 }
204
Read(void * buffer,size_t count)205 bool PuffinStream::Read(void* buffer, size_t count) {
206 TEST_AND_RETURN_FALSE(!closed_);
207 TEST_AND_RETURN_FALSE(is_for_puff_);
208 if (cur_puff_ == puffs_.end()) {
209 TEST_AND_RETURN_FALSE(count == 0);
210 }
211 auto bytes = static_cast<uint8_t*>(buffer);
212 uint64_t length = count;
213 uint64_t bytes_read = 0;
214 while (bytes_read < length) {
215 if (puff_pos_ < cur_puff_->offset) {
216 // Reading between two deflates. We also read bytes that have at least one
217 // bit of a deflate bit stream. The byte which has both deflate and raw
218 // data will be shifted or masked off the deflate bits and the remaining
219 // value will be saved in the puff stream as an byte integer.
220 uint64_t start_byte = (deflate_bit_pos_ / 8);
221 uint64_t end_byte = (cur_deflate_->offset + 7) / 8;
222 auto bytes_to_read = std::min(length - bytes_read, end_byte - start_byte);
223 TEST_AND_RETURN_FALSE(bytes_to_read >= 1);
224
225 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
226 TEST_AND_RETURN_FALSE(stream_->Read(bytes + bytes_read, bytes_to_read));
227
228 // If true, we read the first byte of the curret deflate. So we have to
229 // mask out the deflate bits (which are most significant bits.)
230 if ((start_byte + bytes_to_read) * 8 > cur_deflate_->offset) {
231 bytes[bytes_read + bytes_to_read - 1] &=
232 (1 << (cur_deflate_->offset & 7)) - 1;
233 }
234
235 // If true, we read the last byte of the previous deflate and we have to
236 // shift it out. The least significat bits belongs to the deflate
237 // stream. The order of these last two conditions are important because a
238 // byte can contain a finishing deflate and a starting deflate with some
239 // bits between them so we have to modify correctly. Keep in mind that in
240 // this situation both are modifying the same byte.
241 if (start_byte * 8 < deflate_bit_pos_) {
242 bytes[bytes_read] >>= deflate_bit_pos_ & 7;
243 }
244
245 // Pass |deflate_bit_pos_| for all the read bytes.
246 deflate_bit_pos_ -= deflate_bit_pos_ & 7;
247 deflate_bit_pos_ += bytes_to_read * 8;
248 if (deflate_bit_pos_ > cur_deflate_->offset) {
249 // In case it reads into the first byte of the current deflate.
250 deflate_bit_pos_ = cur_deflate_->offset;
251 }
252
253 bytes_read += bytes_to_read;
254 puff_pos_ += bytes_to_read;
255 TEST_AND_RETURN_FALSE(puff_pos_ <= cur_puff_->offset);
256 } else {
257 // Reading the deflate itself. We read all bytes including the first and
258 // last byte (which may partially include a deflate bit). Here we keep the
259 // |puff_pos_| point to the first byte of the puffed stream and
260 // |skip_bytes_| shows how many bytes in the puff we have copied till now.
261 auto start_byte = (cur_deflate_->offset / 8);
262 auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
263 auto bytes_to_read = end_byte - start_byte;
264 // Puff directly to buffer if it has space.
265 bool puff_directly_into_buffer =
266 max_cache_size_ == 0 && (skip_bytes_ == 0) &&
267 (length - bytes_read >= cur_puff_->length);
268
269 auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_);
270 if (max_cache_size_ == 0 ||
271 !GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) {
272 // Did not find the puff buffer in cache. We have to build it.
273 deflate_buffer_->resize(bytes_to_read);
274 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
275 TEST_AND_RETURN_FALSE(
276 stream_->Read(deflate_buffer_->data(), bytes_to_read));
277 BufferBitReader bit_reader(deflate_buffer_->data(), bytes_to_read);
278
279 BufferPuffWriter puff_writer(puff_directly_into_buffer
280 ? bytes + bytes_read
281 : puff_buffer_->data(),
282 cur_puff_->length);
283
284 // Drop the first unused bits.
285 size_t extra_bits_len = cur_deflate_->offset & 7;
286 TEST_AND_RETURN_FALSE(bit_reader.CacheBits(extra_bits_len));
287 bit_reader.DropBits(extra_bits_len);
288
289 TEST_AND_RETURN_FALSE(
290 puffer_->PuffDeflate(&bit_reader, &puff_writer, nullptr));
291 TEST_AND_RETURN_FALSE(bytes_to_read == bit_reader.Offset());
292 TEST_AND_RETURN_FALSE(cur_puff_->length == puff_writer.Size());
293 } else {
294 // Just seek to proper location.
295 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte + bytes_to_read));
296 }
297 // Copy from puff buffer to output if needed.
298 auto bytes_to_copy =
299 std::min(length - bytes_read, cur_puff_->length - skip_bytes_);
300 if (!puff_directly_into_buffer) {
301 memcpy(bytes + bytes_read, puff_buffer_->data() + skip_bytes_,
302 bytes_to_copy);
303 }
304
305 skip_bytes_ += bytes_to_copy;
306 bytes_read += bytes_to_copy;
307
308 // Move to next puff.
309 if (puff_pos_ + skip_bytes_ == cur_puff_->offset + cur_puff_->length) {
310 puff_pos_ += skip_bytes_;
311 skip_bytes_ = 0;
312 deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
313 cur_puff_++;
314 cur_deflate_++;
315 if (cur_puff_ == puffs_.end()) {
316 break;
317 }
318 }
319 }
320 }
321
322 TEST_AND_RETURN_FALSE(bytes_read == length);
323 return true;
324 }
325
Write(const void * buffer,size_t count)326 bool PuffinStream::Write(const void* buffer, size_t count) {
327 TEST_AND_RETURN_FALSE(!closed_);
328 TEST_AND_RETURN_FALSE(!is_for_puff_);
329 auto bytes = static_cast<const uint8_t*>(buffer);
330 uint64_t length = count;
331 uint64_t bytes_wrote = 0;
332 while (bytes_wrote < length) {
333 if (deflate_bit_pos_ < (cur_deflate_->offset & ~7ull)) {
334 // Between two puffs or before the first puff. We know that we are
335 // starting from the byte boundary because we have already processed the
336 // non-deflate bits of the last byte of the last deflate. Here we don't
337 // process any byte that has deflate bit.
338 TEST_AND_RETURN_FALSE((deflate_bit_pos_ & 7) == 0);
339 auto copy_len =
340 std::min((cur_deflate_->offset / 8) - (deflate_bit_pos_ / 8),
341 length - bytes_wrote);
342 TEST_AND_RETURN_FALSE(stream_->Write(bytes + bytes_wrote, copy_len));
343 bytes_wrote += copy_len;
344 puff_pos_ += copy_len;
345 deflate_bit_pos_ += copy_len * 8;
346 } else {
347 // We are in a puff. We have to buffer incoming bytes until we reach the
348 // size of the current puff so we can huff :). If the last bit of the
349 // current deflate does not end in a byte boundary, then we have to read
350 // one more byte to fill up the last byte of the deflate stream before
351 // doing anything else.
352
353 // |deflate_bit_pos_| now should be in the same byte as
354 // |cur_deflate->offset|.
355 if (deflate_bit_pos_ < cur_deflate_->offset) {
356 last_byte_ |= bytes[bytes_wrote++] << (deflate_bit_pos_ & 7);
357 skip_bytes_ = 0;
358 deflate_bit_pos_ = cur_deflate_->offset;
359 puff_pos_++;
360 TEST_AND_RETURN_FALSE(puff_pos_ == cur_puff_->offset);
361 }
362
363 auto copy_len = std::min(length - bytes_wrote,
364 cur_puff_->length + extra_byte_ - skip_bytes_);
365 TEST_AND_RETURN_FALSE(puff_buffer_->size() >= skip_bytes_ + copy_len);
366 memcpy(puff_buffer_->data() + skip_bytes_, bytes + bytes_wrote, copy_len);
367 skip_bytes_ += copy_len;
368 bytes_wrote += copy_len;
369
370 if (skip_bytes_ == cur_puff_->length + extra_byte_) {
371 // |puff_buffer_| is full, now huff into the |deflate_buffer_|.
372 auto start_byte = cur_deflate_->offset / 8;
373 auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
374 auto bytes_to_write = end_byte - start_byte;
375
376 deflate_buffer_->resize(bytes_to_write);
377 BufferBitWriter bit_writer(deflate_buffer_->data(), bytes_to_write);
378 BufferPuffReader puff_reader(puff_buffer_->data(), cur_puff_->length);
379
380 // Write last byte if it has any.
381 TEST_AND_RETURN_FALSE(
382 bit_writer.WriteBits(cur_deflate_->offset & 7, last_byte_));
383 last_byte_ = 0;
384
385 TEST_AND_RETURN_FALSE(huffer_->HuffDeflate(&puff_reader, &bit_writer));
386 TEST_AND_RETURN_FALSE(bit_writer.Size() == bytes_to_write);
387 TEST_AND_RETURN_FALSE(puff_reader.BytesLeft() == 0);
388
389 deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
390 if (extra_byte_ == 1) {
391 deflate_buffer_->data()[bytes_to_write - 1] |=
392 puff_buffer_->data()[cur_puff_->length] << (deflate_bit_pos_ & 7);
393 deflate_bit_pos_ = (deflate_bit_pos_ + 7) & ~7ull;
394 } else if ((deflate_bit_pos_ & 7) != 0) {
395 // This happens if current and next deflate finish and end on the same
396 // byte, then we cannot write into output until we have huffed the
397 // next puff buffer, so untill then we cache it into |last_byte_| and
398 // we won't write it out.
399 last_byte_ = deflate_buffer_->data()[bytes_to_write - 1];
400 bytes_to_write--;
401 }
402
403 // Write |deflate_buffer_| into output.
404 TEST_AND_RETURN_FALSE(
405 stream_->Write(deflate_buffer_->data(), bytes_to_write));
406
407 // Move to the next deflate/puff.
408 puff_pos_ += skip_bytes_;
409 skip_bytes_ = 0;
410 cur_puff_++;
411 cur_deflate_++;
412 if (cur_puff_ == puffs_.end()) {
413 break;
414 }
415 // Find if need an extra byte to cache at the end.
416 TEST_AND_RETURN_FALSE(SetExtraByte());
417 }
418 }
419 }
420
421 TEST_AND_RETURN_FALSE(bytes_wrote == length);
422 return true;
423 }
424
SetExtraByte()425 bool PuffinStream::SetExtraByte() {
426 TEST_AND_RETURN_FALSE(cur_deflate_ != deflates_.end());
427 if ((cur_deflate_ + 1) == deflates_.end()) {
428 extra_byte_ = 0;
429 return true;
430 }
431 uint64_t end_bit = cur_deflate_->offset + cur_deflate_->length;
432 if ((end_bit & 7) && ((end_bit + 7) & ~7ull) <= (cur_deflate_ + 1)->offset) {
433 extra_byte_ = 1;
434 } else {
435 extra_byte_ = 0;
436 }
437 return true;
438 }
439
GetPuffCache(int puff_id,uint64_t puff_size,shared_ptr<Buffer> * buffer)440 bool PuffinStream::GetPuffCache(int puff_id,
441 uint64_t puff_size,
442 shared_ptr<Buffer>* buffer) {
443 bool found = false;
444 // Search for it.
445 std::pair<int, shared_ptr<Buffer>> cache;
446 // TODO(*): Find a faster way of doing this? Maybe change the data structure
447 // that supports faster search.
448 for (auto iter = caches_.begin(); iter != caches_.end(); ++iter) {
449 if (iter->first == puff_id) {
450 cache = std::move(*iter);
451 found = true;
452 // Remove it so later we can add it to the begining of the list.
453 caches_.erase(iter);
454 break;
455 }
456 }
457
458 // If not found, either create one or get one from the list.
459 if (!found) {
460 // If |caches_| were full, remove last ones in the list (least used), until
461 // we have enough space for the new cache.
462 while (!caches_.empty() && cur_cache_size_ + puff_size > max_cache_size_) {
463 cache = std::move(caches_.back());
464 caches_.pop_back(); // Remove it from the list.
465 cur_cache_size_ -= cache.second->capacity();
466 }
467 // If we have not populated the cache yet, create one.
468 if (!cache.second) {
469 cache.second.reset(new Buffer(puff_size));
470 }
471 cache.second->resize(puff_size);
472
473 constexpr uint64_t kMaxSizeDifference = 20 * 1024;
474 if (puff_size + kMaxSizeDifference < cache.second->capacity()) {
475 cache.second->shrink_to_fit();
476 }
477
478 cur_cache_size_ += cache.second->capacity();
479 cache.first = puff_id;
480 }
481
482 *buffer = cache.second;
483 // By now we have either removed a cache or created new one. Now we have to
484 // insert it in the front of the list so it becomes the most recently used
485 // one.
486 caches_.push_front(std::move(cache));
487 return found;
488 }
489
490 } // namespace puffin
491