• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "puffin/src/puffin_stream.h"
6 
7 #include <algorithm>
8 #include <memory>
9 #include <string>
10 #include <utility>
11 #include <vector>
12 
13 #include "puffin/src/bit_reader.h"
14 #include "puffin/src/bit_writer.h"
15 #include "puffin/src/include/puffin/common.h"
16 #include "puffin/src/include/puffin/huffer.h"
17 #include "puffin/src/include/puffin/puffer.h"
18 #include "puffin/src/include/puffin/stream.h"
19 #include "puffin/src/puff_reader.h"
20 #include "puffin/src/puff_writer.h"
21 #include "puffin/src/set_errors.h"
22 
23 namespace puffin {
24 
25 using std::vector;
26 using std::unique_ptr;
27 using std::shared_ptr;
28 
29 namespace {
30 
CheckArgsIntegrity(uint64_t puff_size,const std::vector<BitExtent> & deflates,const std::vector<ByteExtent> & puffs)31 bool CheckArgsIntegrity(uint64_t puff_size,
32                         const std::vector<BitExtent>& deflates,
33                         const std::vector<ByteExtent>& puffs) {
34   TEST_AND_RETURN_FALSE(puffs.size() == deflates.size());
35   // Check if the |puff_size| is actually greater than the last byte of the last
36   // puff in |puffs|.
37   if (!puffs.empty()) {
38     TEST_AND_RETURN_FALSE(puff_size >=
39                           puffs.back().offset + puffs.back().length);
40   }
41 
42   // Check to make sure |puffs| and |deflates| are sorted and non-overlapping.
43   auto is_overlap = [](const auto& a, const auto& b) {
44     return (a.offset + a.length) > b.offset;
45   };
46   TEST_AND_RETURN_FALSE(deflates.end() == std::adjacent_find(deflates.begin(),
47                                                              deflates.end(),
48                                                              is_overlap));
49   TEST_AND_RETURN_FALSE(puffs.end() == std::adjacent_find(puffs.begin(),
50                                                           puffs.end(),
51                                                           is_overlap));
52   return true;
53 }
54 
55 }  // namespace
56 
CreateForPuff(UniqueStreamPtr stream,std::shared_ptr<Puffer> puffer,uint64_t puff_size,const std::vector<BitExtent> & deflates,const std::vector<ByteExtent> & puffs,size_t max_cache_size)57 UniqueStreamPtr PuffinStream::CreateForPuff(
58     UniqueStreamPtr stream,
59     std::shared_ptr<Puffer> puffer,
60     uint64_t puff_size,
61     const std::vector<BitExtent>& deflates,
62     const std::vector<ByteExtent>& puffs,
63     size_t max_cache_size) {
64   TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
65                         nullptr);
66   TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
67 
68   UniqueStreamPtr puffin_stream(new PuffinStream(std::move(stream), puffer,
69                                                  nullptr, puff_size, deflates,
70                                                  puffs, max_cache_size));
71   TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
72   return puffin_stream;
73 }
74 
CreateForHuff(UniqueStreamPtr stream,std::shared_ptr<Huffer> huffer,uint64_t puff_size,const std::vector<BitExtent> & deflates,const std::vector<ByteExtent> & puffs)75 UniqueStreamPtr PuffinStream::CreateForHuff(
76     UniqueStreamPtr stream,
77     std::shared_ptr<Huffer> huffer,
78     uint64_t puff_size,
79     const std::vector<BitExtent>& deflates,
80     const std::vector<ByteExtent>& puffs) {
81   TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
82                         nullptr);
83   TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
84 
85   UniqueStreamPtr puffin_stream(new PuffinStream(
86       std::move(stream), nullptr, huffer, puff_size, deflates, puffs, 0));
87   TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
88   return puffin_stream;
89 }
90 
PuffinStream(UniqueStreamPtr stream,shared_ptr<Puffer> puffer,shared_ptr<Huffer> huffer,uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs,size_t max_cache_size)91 PuffinStream::PuffinStream(UniqueStreamPtr stream,
92                            shared_ptr<Puffer> puffer,
93                            shared_ptr<Huffer> huffer,
94                            uint64_t puff_size,
95                            const vector<BitExtent>& deflates,
96                            const vector<ByteExtent>& puffs,
97                            size_t max_cache_size)
98     : stream_(std::move(stream)),
99       puffer_(puffer),
100       huffer_(huffer),
101       puff_stream_size_(puff_size),
102       deflates_(deflates),
103       puffs_(puffs),
104       puff_pos_(0),
105       skip_bytes_(0),
106       deflate_bit_pos_(0),
107       last_byte_(0),
108       extra_byte_(0),
109       is_for_puff_(puffer_ ? true : false),
110       closed_(false),
111       max_cache_size_(max_cache_size),
112       cur_cache_size_(0) {
113   // Building upper bounds for faster seek.
114   upper_bounds_.reserve(puffs.size());
115   for (const auto& puff : puffs) {
116     upper_bounds_.emplace_back(puff.offset + puff.length);
117   }
118   upper_bounds_.emplace_back(puff_stream_size_ + 1);
119 
120   // We can pass the size of the deflate stream too, but it is not necessary
121   // yet. We cannot get the size of stream from itself, because we might be
122   // writing into it and its size is not defined yet.
123   uint64_t deflate_stream_size = puff_stream_size_;
124   if (!puffs.empty()) {
125     deflate_stream_size =
126         ((deflates.back().offset + deflates.back().length) / 8) +
127         puff_stream_size_ - (puffs.back().offset + puffs.back().length);
128   }
129 
130   deflates_.emplace_back(deflate_stream_size * 8, 0);
131   puffs_.emplace_back(puff_stream_size_, 0);
132 
133   // Look for the largest puff and deflate extents and get proper size buffers.
134   uint64_t max_puff_length = 0;
135   for (const auto& puff : puffs) {
136     max_puff_length = std::max(max_puff_length, puff.length);
137   }
138   puff_buffer_.reset(new Buffer(max_puff_length + 1));
139   if (max_cache_size_ < max_puff_length) {
140     max_cache_size_ = 0;  // It means we are not caching puffs.
141   }
142 
143   uint64_t max_deflate_length = 0;
144   for (const auto& deflate : deflates) {
145     max_deflate_length = std::max(max_deflate_length, deflate.length * 8);
146   }
147   deflate_buffer_.reset(new Buffer(max_deflate_length + 2));
148 }
149 
GetSize(uint64_t * size) const150 bool PuffinStream::GetSize(uint64_t* size) const {
151   *size = puff_stream_size_;
152   return true;
153 }
154 
GetOffset(uint64_t * offset) const155 bool PuffinStream::GetOffset(uint64_t* offset) const {
156   *offset = puff_pos_ + skip_bytes_;
157   return true;
158 }
159 
Seek(uint64_t offset)160 bool PuffinStream::Seek(uint64_t offset) {
161   TEST_AND_RETURN_FALSE(!closed_);
162   if (!is_for_puff_) {
163     // For huffing we should not seek, only seek to zero is accepted.
164     TEST_AND_RETURN_FALSE(offset == 0);
165   }
166 
167   TEST_AND_RETURN_FALSE(offset <= puff_stream_size_);
168 
169   // We are searching first available puff which either includes the |offset| or
170   // it is the next available puff after |offset|.
171   auto next_puff_iter =
172       std::upper_bound(upper_bounds_.begin(), upper_bounds_.end(), offset);
173   TEST_AND_RETURN_FALSE(next_puff_iter != upper_bounds_.end());
174   auto next_puff_idx = std::distance(upper_bounds_.begin(), next_puff_iter);
175   cur_puff_ = std::next(puffs_.begin(), next_puff_idx);
176   cur_deflate_ = std::next(deflates_.begin(), next_puff_idx);
177 
178   if (offset < cur_puff_->offset) {
179     // between two puffs.
180     puff_pos_ = offset;
181     auto back_track_bytes = cur_puff_->offset - puff_pos_;
182     deflate_bit_pos_ = ((cur_deflate_->offset + 7) / 8 - back_track_bytes) * 8;
183     if (cur_puff_ != puffs_.begin()) {
184       auto prev_deflate = std::prev(cur_deflate_);
185       if (deflate_bit_pos_ < prev_deflate->offset + prev_deflate->length) {
186         deflate_bit_pos_ = prev_deflate->offset + prev_deflate->length;
187       }
188     }
189   } else {
190     // Inside a puff.
191     puff_pos_ = cur_puff_->offset;
192     deflate_bit_pos_ = cur_deflate_->offset;
193   }
194   skip_bytes_ = offset - puff_pos_;
195   if (!is_for_puff_ && offset == 0) {
196     TEST_AND_RETURN_FALSE(stream_->Seek(0));
197     TEST_AND_RETURN_FALSE(SetExtraByte());
198   }
199   return true;
200 }
201 
Close()202 bool PuffinStream::Close() {
203   closed_ = true;
204   return stream_->Close();
205 }
206 
Read(void * buffer,size_t count)207 bool PuffinStream::Read(void* buffer, size_t count) {
208   TEST_AND_RETURN_FALSE(!closed_);
209   TEST_AND_RETURN_FALSE(is_for_puff_);
210   if (cur_puff_ == puffs_.end()) {
211     TEST_AND_RETURN_FALSE(count == 0);
212   }
213   auto bytes = static_cast<uint8_t*>(buffer);
214   uint64_t length = count;
215   uint64_t bytes_read = 0;
216   while (bytes_read < length) {
217     if (puff_pos_ < cur_puff_->offset) {
218       // Reading between two deflates. We also read bytes that have at least one
219       // bit of a deflate bit stream. The byte which has both deflate and raw
220       // data will be shifted or masked off the deflate bits and the remaining
221       // value will be saved in the puff stream as an byte integer.
222       uint64_t start_byte = (deflate_bit_pos_ / 8);
223       uint64_t end_byte = (cur_deflate_->offset + 7) / 8;
224       auto bytes_to_read = std::min(length - bytes_read, end_byte - start_byte);
225       TEST_AND_RETURN_FALSE(bytes_to_read >= 1);
226 
227       TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
228       TEST_AND_RETURN_FALSE(stream_->Read(bytes + bytes_read, bytes_to_read));
229 
230       // If true, we read the first byte of the curret deflate. So we have to
231       // mask out the deflate bits (which are most significant bits.)
232       if ((start_byte + bytes_to_read) * 8 > cur_deflate_->offset) {
233         bytes[bytes_read + bytes_to_read - 1] &=
234             (1 << (cur_deflate_->offset & 7)) - 1;
235       }
236 
237       // If true, we read the last byte of the previous deflate and we have to
238       // shift it out. The least significat bits belongs to the deflate
239       // stream. The order of these last two conditions are important because a
240       // byte can contain a finishing deflate and a starting deflate with some
241       // bits between them so we have to modify correctly. Keep in mind that in
242       // this situation both are modifying the same byte.
243       if (start_byte * 8 < deflate_bit_pos_) {
244         bytes[bytes_read] >>= deflate_bit_pos_ & 7;
245       }
246 
247       // Pass |deflate_bit_pos_| for all the read bytes.
248       deflate_bit_pos_ -= deflate_bit_pos_ & 7;
249       deflate_bit_pos_ += bytes_to_read * 8;
250       if (deflate_bit_pos_ > cur_deflate_->offset) {
251         // In case it reads into the first byte of the current deflate.
252         deflate_bit_pos_ = cur_deflate_->offset;
253       }
254 
255       bytes_read += bytes_to_read;
256       puff_pos_ += bytes_to_read;
257       TEST_AND_RETURN_FALSE(puff_pos_ <= cur_puff_->offset);
258     } else {
259       // Reading the deflate itself. We read all bytes including the first and
260       // last byte (which may partially include a deflate bit). Here we keep the
261       // |puff_pos_| point to the first byte of the puffed stream and
262       // |skip_bytes_| shows how many bytes in the puff we have copied till now.
263       auto start_byte = (cur_deflate_->offset / 8);
264       auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
265       auto bytes_to_read = end_byte - start_byte;
266       // Puff directly to buffer if it has space.
267       bool puff_directly_into_buffer =
268           max_cache_size_ == 0 && (skip_bytes_ == 0) &&
269           (length - bytes_read >= cur_puff_->length);
270 
271       auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_);
272       if (max_cache_size_ == 0 ||
273           !GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) {
274         // Did not find the puff buffer in cache. We have to build it.
275         deflate_buffer_->resize(bytes_to_read);
276         TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
277         TEST_AND_RETURN_FALSE(
278             stream_->Read(deflate_buffer_->data(), bytes_to_read));
279         BufferBitReader bit_reader(deflate_buffer_->data(), bytes_to_read);
280 
281         BufferPuffWriter puff_writer(puff_directly_into_buffer
282                                          ? bytes + bytes_read
283                                          : puff_buffer_->data(),
284                                      cur_puff_->length);
285 
286         // Drop the first unused bits.
287         size_t extra_bits_len = cur_deflate_->offset & 7;
288         TEST_AND_RETURN_FALSE(bit_reader.CacheBits(extra_bits_len));
289         bit_reader.DropBits(extra_bits_len);
290 
291         Error error;
292         TEST_AND_RETURN_FALSE(
293             puffer_->PuffDeflate(&bit_reader, &puff_writer, nullptr, &error));
294         TEST_AND_RETURN_FALSE(bytes_to_read == bit_reader.Offset());
295         TEST_AND_RETURN_FALSE(cur_puff_->length == puff_writer.Size());
296       } else {
297         // Just seek to proper location.
298         TEST_AND_RETURN_FALSE(stream_->Seek(start_byte + bytes_to_read));
299       }
300       // Copy from puff buffer to output if needed.
301       auto bytes_to_copy =
302           std::min(length - bytes_read, cur_puff_->length - skip_bytes_);
303       if (!puff_directly_into_buffer) {
304         memcpy(bytes + bytes_read, puff_buffer_->data() + skip_bytes_,
305                bytes_to_copy);
306       }
307 
308       skip_bytes_ += bytes_to_copy;
309       bytes_read += bytes_to_copy;
310 
311       // Move to next puff.
312       if (puff_pos_ + skip_bytes_ == cur_puff_->offset + cur_puff_->length) {
313         puff_pos_ += skip_bytes_;
314         skip_bytes_ = 0;
315         deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
316         cur_puff_++;
317         cur_deflate_++;
318         if (cur_puff_ == puffs_.end()) {
319           break;
320         }
321       }
322     }
323   }
324 
325   TEST_AND_RETURN_FALSE(bytes_read == length);
326   return true;
327 }
328 
Write(const void * buffer,size_t count)329 bool PuffinStream::Write(const void* buffer, size_t count) {
330   TEST_AND_RETURN_FALSE(!closed_);
331   TEST_AND_RETURN_FALSE(!is_for_puff_);
332   auto bytes = static_cast<const uint8_t*>(buffer);
333   uint64_t length = count;
334   uint64_t bytes_wrote = 0;
335   while (bytes_wrote < length) {
336     if (deflate_bit_pos_ < (cur_deflate_->offset & ~7ull)) {
337       // Between two puffs or before the first puff. We know that we are
338       // starting from the byte boundary because we have already processed the
339       // non-deflate bits of the last byte of the last deflate. Here we don't
340       // process any byte that has deflate bit.
341       TEST_AND_RETURN_FALSE((deflate_bit_pos_ & 7) == 0);
342       auto copy_len =
343           std::min((cur_deflate_->offset / 8) - (deflate_bit_pos_ / 8),
344                    length - bytes_wrote);
345       TEST_AND_RETURN_FALSE(stream_->Write(bytes + bytes_wrote, copy_len));
346       bytes_wrote += copy_len;
347       puff_pos_ += copy_len;
348       deflate_bit_pos_ += copy_len * 8;
349     } else {
350       // We are in a puff. We have to buffer incoming bytes until we reach the
351       // size of the current puff so we can huff :). If the last bit of the
352       // current deflate does not end in a byte boundary, then we have to read
353       // one more byte to fill up the last byte of the deflate stream before
354       // doing anything else.
355 
356       // |deflate_bit_pos_| now should be in the same byte as
357       // |cur_deflate->offset|.
358       if (deflate_bit_pos_ < cur_deflate_->offset) {
359         last_byte_ |= bytes[bytes_wrote++] << (deflate_bit_pos_ & 7);
360         skip_bytes_ = 0;
361         deflate_bit_pos_ = cur_deflate_->offset;
362         puff_pos_++;
363         TEST_AND_RETURN_FALSE(puff_pos_ == cur_puff_->offset);
364       }
365 
366       auto copy_len = std::min(length - bytes_wrote,
367                                cur_puff_->length + extra_byte_ - skip_bytes_);
368       TEST_AND_RETURN_FALSE(puff_buffer_->size() >= skip_bytes_ + copy_len);
369       memcpy(puff_buffer_->data() + skip_bytes_, bytes + bytes_wrote, copy_len);
370       skip_bytes_ += copy_len;
371       bytes_wrote += copy_len;
372 
373       if (skip_bytes_ == cur_puff_->length + extra_byte_) {
374         // |puff_buffer_| is full, now huff into the |deflate_buffer_|.
375         auto start_byte = cur_deflate_->offset / 8;
376         auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
377         auto bytes_to_write = end_byte - start_byte;
378 
379         deflate_buffer_->resize(bytes_to_write);
380         BufferBitWriter bit_writer(deflate_buffer_->data(), bytes_to_write);
381         BufferPuffReader puff_reader(puff_buffer_->data(), cur_puff_->length);
382 
383         // Write last byte if it has any.
384         TEST_AND_RETURN_FALSE(
385             bit_writer.WriteBits(cur_deflate_->offset & 7, last_byte_));
386         last_byte_ = 0;
387 
388         Error error;
389         TEST_AND_RETURN_FALSE(
390             huffer_->HuffDeflate(&puff_reader, &bit_writer, &error));
391         TEST_AND_RETURN_FALSE(bit_writer.Size() == bytes_to_write);
392         TEST_AND_RETURN_FALSE(puff_reader.BytesLeft() == 0);
393 
394         deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
395         if (extra_byte_ == 1) {
396           deflate_buffer_->data()[bytes_to_write - 1] |=
397               puff_buffer_->data()[cur_puff_->length] << (deflate_bit_pos_ & 7);
398           deflate_bit_pos_ = (deflate_bit_pos_ + 7) & ~7ull;
399         } else if ((deflate_bit_pos_ & 7) != 0) {
400           // This happens if current and next deflate finish and end on the same
401           // byte, then we cannot write into output until we have huffed the
402           // next puff buffer, so untill then we cache it into |last_byte_| and
403           // we won't write it out.
404           last_byte_ = deflate_buffer_->data()[bytes_to_write - 1];
405           bytes_to_write--;
406         }
407 
408         // Write |deflate_buffer_| into output.
409         TEST_AND_RETURN_FALSE(
410             stream_->Write(deflate_buffer_->data(), bytes_to_write));
411 
412         // Move to the next deflate/puff.
413         puff_pos_ += skip_bytes_;
414         skip_bytes_ = 0;
415         cur_puff_++;
416         cur_deflate_++;
417         if (cur_puff_ == puffs_.end()) {
418           break;
419         }
420         // Find if need an extra byte to cache at the end.
421         TEST_AND_RETURN_FALSE(SetExtraByte());
422       }
423     }
424   }
425 
426   TEST_AND_RETURN_FALSE(bytes_wrote == length);
427   return true;
428 }
429 
SetExtraByte()430 bool PuffinStream::SetExtraByte() {
431   TEST_AND_RETURN_FALSE(cur_deflate_ != deflates_.end());
432   if ((cur_deflate_ + 1) == deflates_.end()) {
433     extra_byte_ = 0;
434     return true;
435   }
436   uint64_t end_bit = cur_deflate_->offset + cur_deflate_->length;
437   if ((end_bit & 7) && ((end_bit + 7) & ~7ull) <= (cur_deflate_ + 1)->offset) {
438     extra_byte_ = 1;
439   } else {
440     extra_byte_ = 0;
441   }
442   return true;
443 }
444 
GetPuffCache(int puff_id,uint64_t puff_size,SharedBufferPtr * buffer)445 bool PuffinStream::GetPuffCache(int puff_id,
446                                 uint64_t puff_size,
447                                 SharedBufferPtr* buffer) {
448   bool found = false;
449   // Search for it.
450   std::pair<int, SharedBufferPtr> cache;
451   // TODO(*): Find a faster way of doing this? Maybe change the data structure
452   // that supports faster search.
453   for (auto iter = caches_.begin(); iter != caches_.end(); ++iter) {
454     if (iter->first == puff_id) {
455       cache = std::move(*iter);
456       found = true;
457       // Remove it so later we can add it to the begining of the list.
458       caches_.erase(iter);
459       break;
460     }
461   }
462 
463   // If not found, either create one or get one from the list.
464   if (!found) {
465     // If |caches_| were full, remove last ones in the list (least used), until
466     // we have enough space for the new cache.
467     while (!caches_.empty() && cur_cache_size_ + puff_size > max_cache_size_) {
468       cache = std::move(caches_.back());
469       caches_.pop_back();  // Remove it from the list.
470       cur_cache_size_ -= cache.second->capacity();
471     }
472     // If we have not populated the cache yet, create one.
473     if (!cache.second) {
474       cache.second.reset(new Buffer(puff_size));
475     }
476     cache.second->resize(puff_size);
477 
478     constexpr uint64_t kMaxSizeDifference = 20 * 1024;
479     if (puff_size + kMaxSizeDifference < cache.second->capacity()) {
480       cache.second->shrink_to_fit();
481     }
482 
483     cur_cache_size_ += cache.second->capacity();
484     cache.first = puff_id;
485   }
486 
487   *buffer = cache.second;
488   // By now we have either removed a cache or created new one. Now we have to
489   // insert it in the front of the list so it becomes the most recently used
490   // one.
491   caches_.push_front(std::move(cache));
492   return found;
493 }
494 
495 }  // namespace puffin
496