1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "puffin/src/puffin_stream.h"
6
7 #include <algorithm>
8 #include <memory>
9 #include <string>
10 #include <utility>
11 #include <vector>
12
13 #include "puffin/src/bit_reader.h"
14 #include "puffin/src/bit_writer.h"
15 #include "puffin/src/include/puffin/common.h"
16 #include "puffin/src/include/puffin/huffer.h"
17 #include "puffin/src/include/puffin/puffer.h"
18 #include "puffin/src/include/puffin/stream.h"
19 #include "puffin/src/puff_reader.h"
20 #include "puffin/src/puff_writer.h"
21 #include "puffin/src/set_errors.h"
22
23 namespace puffin {
24
25 using std::vector;
26 using std::unique_ptr;
27 using std::shared_ptr;
28
29 namespace {
30
CheckArgsIntegrity(uint64_t puff_size,const std::vector<BitExtent> & deflates,const std::vector<ByteExtent> & puffs)31 bool CheckArgsIntegrity(uint64_t puff_size,
32 const std::vector<BitExtent>& deflates,
33 const std::vector<ByteExtent>& puffs) {
34 TEST_AND_RETURN_FALSE(puffs.size() == deflates.size());
35 // Check if the |puff_size| is actually greater than the last byte of the last
36 // puff in |puffs|.
37 if (!puffs.empty()) {
38 TEST_AND_RETURN_FALSE(puff_size >=
39 puffs.back().offset + puffs.back().length);
40 }
41
42 // Check to make sure |puffs| and |deflates| are sorted and non-overlapping.
43 auto is_overlap = [](const auto& a, const auto& b) {
44 return (a.offset + a.length) > b.offset;
45 };
46 TEST_AND_RETURN_FALSE(deflates.end() == std::adjacent_find(deflates.begin(),
47 deflates.end(),
48 is_overlap));
49 TEST_AND_RETURN_FALSE(puffs.end() == std::adjacent_find(puffs.begin(),
50 puffs.end(),
51 is_overlap));
52 return true;
53 }
54
55 } // namespace
56
CreateForPuff(UniqueStreamPtr stream,std::shared_ptr<Puffer> puffer,uint64_t puff_size,const std::vector<BitExtent> & deflates,const std::vector<ByteExtent> & puffs,size_t max_cache_size)57 UniqueStreamPtr PuffinStream::CreateForPuff(
58 UniqueStreamPtr stream,
59 std::shared_ptr<Puffer> puffer,
60 uint64_t puff_size,
61 const std::vector<BitExtent>& deflates,
62 const std::vector<ByteExtent>& puffs,
63 size_t max_cache_size) {
64 TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
65 nullptr);
66 TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
67
68 UniqueStreamPtr puffin_stream(new PuffinStream(std::move(stream), puffer,
69 nullptr, puff_size, deflates,
70 puffs, max_cache_size));
71 TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
72 return puffin_stream;
73 }
74
CreateForHuff(UniqueStreamPtr stream,std::shared_ptr<Huffer> huffer,uint64_t puff_size,const std::vector<BitExtent> & deflates,const std::vector<ByteExtent> & puffs)75 UniqueStreamPtr PuffinStream::CreateForHuff(
76 UniqueStreamPtr stream,
77 std::shared_ptr<Huffer> huffer,
78 uint64_t puff_size,
79 const std::vector<BitExtent>& deflates,
80 const std::vector<ByteExtent>& puffs) {
81 TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
82 nullptr);
83 TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
84
85 UniqueStreamPtr puffin_stream(new PuffinStream(
86 std::move(stream), nullptr, huffer, puff_size, deflates, puffs, 0));
87 TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
88 return puffin_stream;
89 }
90
PuffinStream(UniqueStreamPtr stream,shared_ptr<Puffer> puffer,shared_ptr<Huffer> huffer,uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs,size_t max_cache_size)91 PuffinStream::PuffinStream(UniqueStreamPtr stream,
92 shared_ptr<Puffer> puffer,
93 shared_ptr<Huffer> huffer,
94 uint64_t puff_size,
95 const vector<BitExtent>& deflates,
96 const vector<ByteExtent>& puffs,
97 size_t max_cache_size)
98 : stream_(std::move(stream)),
99 puffer_(puffer),
100 huffer_(huffer),
101 puff_stream_size_(puff_size),
102 deflates_(deflates),
103 puffs_(puffs),
104 puff_pos_(0),
105 skip_bytes_(0),
106 deflate_bit_pos_(0),
107 last_byte_(0),
108 extra_byte_(0),
109 is_for_puff_(puffer_ ? true : false),
110 closed_(false),
111 max_cache_size_(max_cache_size),
112 cur_cache_size_(0) {
113 // Building upper bounds for faster seek.
114 upper_bounds_.reserve(puffs.size());
115 for (const auto& puff : puffs) {
116 upper_bounds_.emplace_back(puff.offset + puff.length);
117 }
118 upper_bounds_.emplace_back(puff_stream_size_ + 1);
119
120 // We can pass the size of the deflate stream too, but it is not necessary
121 // yet. We cannot get the size of stream from itself, because we might be
122 // writing into it and its size is not defined yet.
123 uint64_t deflate_stream_size = puff_stream_size_;
124 if (!puffs.empty()) {
125 deflate_stream_size =
126 ((deflates.back().offset + deflates.back().length) / 8) +
127 puff_stream_size_ - (puffs.back().offset + puffs.back().length);
128 }
129
130 deflates_.emplace_back(deflate_stream_size * 8, 0);
131 puffs_.emplace_back(puff_stream_size_, 0);
132
133 // Look for the largest puff and deflate extents and get proper size buffers.
134 uint64_t max_puff_length = 0;
135 for (const auto& puff : puffs) {
136 max_puff_length = std::max(max_puff_length, puff.length);
137 }
138 puff_buffer_.reset(new Buffer(max_puff_length + 1));
139 if (max_cache_size_ < max_puff_length) {
140 max_cache_size_ = 0; // It means we are not caching puffs.
141 }
142
143 uint64_t max_deflate_length = 0;
144 for (const auto& deflate : deflates) {
145 max_deflate_length = std::max(max_deflate_length, deflate.length * 8);
146 }
147 deflate_buffer_.reset(new Buffer(max_deflate_length + 2));
148 }
149
GetSize(uint64_t * size) const150 bool PuffinStream::GetSize(uint64_t* size) const {
151 *size = puff_stream_size_;
152 return true;
153 }
154
GetOffset(uint64_t * offset) const155 bool PuffinStream::GetOffset(uint64_t* offset) const {
156 *offset = puff_pos_ + skip_bytes_;
157 return true;
158 }
159
Seek(uint64_t offset)160 bool PuffinStream::Seek(uint64_t offset) {
161 TEST_AND_RETURN_FALSE(!closed_);
162 if (!is_for_puff_) {
163 // For huffing we should not seek, only seek to zero is accepted.
164 TEST_AND_RETURN_FALSE(offset == 0);
165 }
166
167 TEST_AND_RETURN_FALSE(offset <= puff_stream_size_);
168
169 // We are searching first available puff which either includes the |offset| or
170 // it is the next available puff after |offset|.
171 auto next_puff_iter =
172 std::upper_bound(upper_bounds_.begin(), upper_bounds_.end(), offset);
173 TEST_AND_RETURN_FALSE(next_puff_iter != upper_bounds_.end());
174 auto next_puff_idx = std::distance(upper_bounds_.begin(), next_puff_iter);
175 cur_puff_ = std::next(puffs_.begin(), next_puff_idx);
176 cur_deflate_ = std::next(deflates_.begin(), next_puff_idx);
177
178 if (offset < cur_puff_->offset) {
179 // between two puffs.
180 puff_pos_ = offset;
181 auto back_track_bytes = cur_puff_->offset - puff_pos_;
182 deflate_bit_pos_ = ((cur_deflate_->offset + 7) / 8 - back_track_bytes) * 8;
183 if (cur_puff_ != puffs_.begin()) {
184 auto prev_deflate = std::prev(cur_deflate_);
185 if (deflate_bit_pos_ < prev_deflate->offset + prev_deflate->length) {
186 deflate_bit_pos_ = prev_deflate->offset + prev_deflate->length;
187 }
188 }
189 } else {
190 // Inside a puff.
191 puff_pos_ = cur_puff_->offset;
192 deflate_bit_pos_ = cur_deflate_->offset;
193 }
194 skip_bytes_ = offset - puff_pos_;
195 if (!is_for_puff_ && offset == 0) {
196 TEST_AND_RETURN_FALSE(stream_->Seek(0));
197 TEST_AND_RETURN_FALSE(SetExtraByte());
198 }
199 return true;
200 }
201
Close()202 bool PuffinStream::Close() {
203 closed_ = true;
204 return stream_->Close();
205 }
206
Read(void * buffer,size_t count)207 bool PuffinStream::Read(void* buffer, size_t count) {
208 TEST_AND_RETURN_FALSE(!closed_);
209 TEST_AND_RETURN_FALSE(is_for_puff_);
210 if (cur_puff_ == puffs_.end()) {
211 TEST_AND_RETURN_FALSE(count == 0);
212 }
213 auto bytes = static_cast<uint8_t*>(buffer);
214 uint64_t length = count;
215 uint64_t bytes_read = 0;
216 while (bytes_read < length) {
217 if (puff_pos_ < cur_puff_->offset) {
218 // Reading between two deflates. We also read bytes that have at least one
219 // bit of a deflate bit stream. The byte which has both deflate and raw
220 // data will be shifted or masked off the deflate bits and the remaining
221 // value will be saved in the puff stream as an byte integer.
222 uint64_t start_byte = (deflate_bit_pos_ / 8);
223 uint64_t end_byte = (cur_deflate_->offset + 7) / 8;
224 auto bytes_to_read = std::min(length - bytes_read, end_byte - start_byte);
225 TEST_AND_RETURN_FALSE(bytes_to_read >= 1);
226
227 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
228 TEST_AND_RETURN_FALSE(stream_->Read(bytes + bytes_read, bytes_to_read));
229
230 // If true, we read the first byte of the curret deflate. So we have to
231 // mask out the deflate bits (which are most significant bits.)
232 if ((start_byte + bytes_to_read) * 8 > cur_deflate_->offset) {
233 bytes[bytes_read + bytes_to_read - 1] &=
234 (1 << (cur_deflate_->offset & 7)) - 1;
235 }
236
237 // If true, we read the last byte of the previous deflate and we have to
238 // shift it out. The least significat bits belongs to the deflate
239 // stream. The order of these last two conditions are important because a
240 // byte can contain a finishing deflate and a starting deflate with some
241 // bits between them so we have to modify correctly. Keep in mind that in
242 // this situation both are modifying the same byte.
243 if (start_byte * 8 < deflate_bit_pos_) {
244 bytes[bytes_read] >>= deflate_bit_pos_ & 7;
245 }
246
247 // Pass |deflate_bit_pos_| for all the read bytes.
248 deflate_bit_pos_ -= deflate_bit_pos_ & 7;
249 deflate_bit_pos_ += bytes_to_read * 8;
250 if (deflate_bit_pos_ > cur_deflate_->offset) {
251 // In case it reads into the first byte of the current deflate.
252 deflate_bit_pos_ = cur_deflate_->offset;
253 }
254
255 bytes_read += bytes_to_read;
256 puff_pos_ += bytes_to_read;
257 TEST_AND_RETURN_FALSE(puff_pos_ <= cur_puff_->offset);
258 } else {
259 // Reading the deflate itself. We read all bytes including the first and
260 // last byte (which may partially include a deflate bit). Here we keep the
261 // |puff_pos_| point to the first byte of the puffed stream and
262 // |skip_bytes_| shows how many bytes in the puff we have copied till now.
263 auto start_byte = (cur_deflate_->offset / 8);
264 auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
265 auto bytes_to_read = end_byte - start_byte;
266 // Puff directly to buffer if it has space.
267 bool puff_directly_into_buffer =
268 max_cache_size_ == 0 && (skip_bytes_ == 0) &&
269 (length - bytes_read >= cur_puff_->length);
270
271 auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_);
272 if (max_cache_size_ == 0 ||
273 !GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) {
274 // Did not find the puff buffer in cache. We have to build it.
275 deflate_buffer_->resize(bytes_to_read);
276 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
277 TEST_AND_RETURN_FALSE(
278 stream_->Read(deflate_buffer_->data(), bytes_to_read));
279 BufferBitReader bit_reader(deflate_buffer_->data(), bytes_to_read);
280
281 BufferPuffWriter puff_writer(puff_directly_into_buffer
282 ? bytes + bytes_read
283 : puff_buffer_->data(),
284 cur_puff_->length);
285
286 // Drop the first unused bits.
287 size_t extra_bits_len = cur_deflate_->offset & 7;
288 TEST_AND_RETURN_FALSE(bit_reader.CacheBits(extra_bits_len));
289 bit_reader.DropBits(extra_bits_len);
290
291 Error error;
292 TEST_AND_RETURN_FALSE(
293 puffer_->PuffDeflate(&bit_reader, &puff_writer, nullptr, &error));
294 TEST_AND_RETURN_FALSE(bytes_to_read == bit_reader.Offset());
295 TEST_AND_RETURN_FALSE(cur_puff_->length == puff_writer.Size());
296 } else {
297 // Just seek to proper location.
298 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte + bytes_to_read));
299 }
300 // Copy from puff buffer to output if needed.
301 auto bytes_to_copy =
302 std::min(length - bytes_read, cur_puff_->length - skip_bytes_);
303 if (!puff_directly_into_buffer) {
304 memcpy(bytes + bytes_read, puff_buffer_->data() + skip_bytes_,
305 bytes_to_copy);
306 }
307
308 skip_bytes_ += bytes_to_copy;
309 bytes_read += bytes_to_copy;
310
311 // Move to next puff.
312 if (puff_pos_ + skip_bytes_ == cur_puff_->offset + cur_puff_->length) {
313 puff_pos_ += skip_bytes_;
314 skip_bytes_ = 0;
315 deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
316 cur_puff_++;
317 cur_deflate_++;
318 if (cur_puff_ == puffs_.end()) {
319 break;
320 }
321 }
322 }
323 }
324
325 TEST_AND_RETURN_FALSE(bytes_read == length);
326 return true;
327 }
328
Write(const void * buffer,size_t count)329 bool PuffinStream::Write(const void* buffer, size_t count) {
330 TEST_AND_RETURN_FALSE(!closed_);
331 TEST_AND_RETURN_FALSE(!is_for_puff_);
332 auto bytes = static_cast<const uint8_t*>(buffer);
333 uint64_t length = count;
334 uint64_t bytes_wrote = 0;
335 while (bytes_wrote < length) {
336 if (deflate_bit_pos_ < (cur_deflate_->offset & ~7ull)) {
337 // Between two puffs or before the first puff. We know that we are
338 // starting from the byte boundary because we have already processed the
339 // non-deflate bits of the last byte of the last deflate. Here we don't
340 // process any byte that has deflate bit.
341 TEST_AND_RETURN_FALSE((deflate_bit_pos_ & 7) == 0);
342 auto copy_len =
343 std::min((cur_deflate_->offset / 8) - (deflate_bit_pos_ / 8),
344 length - bytes_wrote);
345 TEST_AND_RETURN_FALSE(stream_->Write(bytes + bytes_wrote, copy_len));
346 bytes_wrote += copy_len;
347 puff_pos_ += copy_len;
348 deflate_bit_pos_ += copy_len * 8;
349 } else {
350 // We are in a puff. We have to buffer incoming bytes until we reach the
351 // size of the current puff so we can huff :). If the last bit of the
352 // current deflate does not end in a byte boundary, then we have to read
353 // one more byte to fill up the last byte of the deflate stream before
354 // doing anything else.
355
356 // |deflate_bit_pos_| now should be in the same byte as
357 // |cur_deflate->offset|.
358 if (deflate_bit_pos_ < cur_deflate_->offset) {
359 last_byte_ |= bytes[bytes_wrote++] << (deflate_bit_pos_ & 7);
360 skip_bytes_ = 0;
361 deflate_bit_pos_ = cur_deflate_->offset;
362 puff_pos_++;
363 TEST_AND_RETURN_FALSE(puff_pos_ == cur_puff_->offset);
364 }
365
366 auto copy_len = std::min(length - bytes_wrote,
367 cur_puff_->length + extra_byte_ - skip_bytes_);
368 TEST_AND_RETURN_FALSE(puff_buffer_->size() >= skip_bytes_ + copy_len);
369 memcpy(puff_buffer_->data() + skip_bytes_, bytes + bytes_wrote, copy_len);
370 skip_bytes_ += copy_len;
371 bytes_wrote += copy_len;
372
373 if (skip_bytes_ == cur_puff_->length + extra_byte_) {
374 // |puff_buffer_| is full, now huff into the |deflate_buffer_|.
375 auto start_byte = cur_deflate_->offset / 8;
376 auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
377 auto bytes_to_write = end_byte - start_byte;
378
379 deflate_buffer_->resize(bytes_to_write);
380 BufferBitWriter bit_writer(deflate_buffer_->data(), bytes_to_write);
381 BufferPuffReader puff_reader(puff_buffer_->data(), cur_puff_->length);
382
383 // Write last byte if it has any.
384 TEST_AND_RETURN_FALSE(
385 bit_writer.WriteBits(cur_deflate_->offset & 7, last_byte_));
386 last_byte_ = 0;
387
388 Error error;
389 TEST_AND_RETURN_FALSE(
390 huffer_->HuffDeflate(&puff_reader, &bit_writer, &error));
391 TEST_AND_RETURN_FALSE(bit_writer.Size() == bytes_to_write);
392 TEST_AND_RETURN_FALSE(puff_reader.BytesLeft() == 0);
393
394 deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
395 if (extra_byte_ == 1) {
396 deflate_buffer_->data()[bytes_to_write - 1] |=
397 puff_buffer_->data()[cur_puff_->length] << (deflate_bit_pos_ & 7);
398 deflate_bit_pos_ = (deflate_bit_pos_ + 7) & ~7ull;
399 } else if ((deflate_bit_pos_ & 7) != 0) {
400 // This happens if current and next deflate finish and end on the same
401 // byte, then we cannot write into output until we have huffed the
402 // next puff buffer, so untill then we cache it into |last_byte_| and
403 // we won't write it out.
404 last_byte_ = deflate_buffer_->data()[bytes_to_write - 1];
405 bytes_to_write--;
406 }
407
408 // Write |deflate_buffer_| into output.
409 TEST_AND_RETURN_FALSE(
410 stream_->Write(deflate_buffer_->data(), bytes_to_write));
411
412 // Move to the next deflate/puff.
413 puff_pos_ += skip_bytes_;
414 skip_bytes_ = 0;
415 cur_puff_++;
416 cur_deflate_++;
417 if (cur_puff_ == puffs_.end()) {
418 break;
419 }
420 // Find if need an extra byte to cache at the end.
421 TEST_AND_RETURN_FALSE(SetExtraByte());
422 }
423 }
424 }
425
426 TEST_AND_RETURN_FALSE(bytes_wrote == length);
427 return true;
428 }
429
SetExtraByte()430 bool PuffinStream::SetExtraByte() {
431 TEST_AND_RETURN_FALSE(cur_deflate_ != deflates_.end());
432 if ((cur_deflate_ + 1) == deflates_.end()) {
433 extra_byte_ = 0;
434 return true;
435 }
436 uint64_t end_bit = cur_deflate_->offset + cur_deflate_->length;
437 if ((end_bit & 7) && ((end_bit + 7) & ~7ull) <= (cur_deflate_ + 1)->offset) {
438 extra_byte_ = 1;
439 } else {
440 extra_byte_ = 0;
441 }
442 return true;
443 }
444
GetPuffCache(int puff_id,uint64_t puff_size,SharedBufferPtr * buffer)445 bool PuffinStream::GetPuffCache(int puff_id,
446 uint64_t puff_size,
447 SharedBufferPtr* buffer) {
448 bool found = false;
449 // Search for it.
450 std::pair<int, SharedBufferPtr> cache;
451 // TODO(*): Find a faster way of doing this? Maybe change the data structure
452 // that supports faster search.
453 for (auto iter = caches_.begin(); iter != caches_.end(); ++iter) {
454 if (iter->first == puff_id) {
455 cache = std::move(*iter);
456 found = true;
457 // Remove it so later we can add it to the begining of the list.
458 caches_.erase(iter);
459 break;
460 }
461 }
462
463 // If not found, either create one or get one from the list.
464 if (!found) {
465 // If |caches_| were full, remove last ones in the list (least used), until
466 // we have enough space for the new cache.
467 while (!caches_.empty() && cur_cache_size_ + puff_size > max_cache_size_) {
468 cache = std::move(caches_.back());
469 caches_.pop_back(); // Remove it from the list.
470 cur_cache_size_ -= cache.second->capacity();
471 }
472 // If we have not populated the cache yet, create one.
473 if (!cache.second) {
474 cache.second.reset(new Buffer(puff_size));
475 }
476 cache.second->resize(puff_size);
477
478 constexpr uint64_t kMaxSizeDifference = 20 * 1024;
479 if (puff_size + kMaxSizeDifference < cache.second->capacity()) {
480 cache.second->shrink_to_fit();
481 }
482
483 cur_cache_size_ += cache.second->capacity();
484 cache.first = puff_id;
485 }
486
487 *buffer = cache.second;
488 // By now we have either removed a cache or created new one. Now we have to
489 // insert it in the front of the list so it becomes the most recently used
490 // one.
491 caches_.push_front(std::move(cache));
492 return found;
493 }
494
495 } // namespace puffin
496