• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "puffin/src/include/puffin/utils.h"
6 
7 #include <inttypes.h>
8 
9 #include <string>
10 #include <vector>
11 
12 #include <zlib.h>
13 
14 #include "puffin/src/bit_reader.h"
15 #include "puffin/src/file_stream.h"
16 #include "puffin/src/include/puffin/common.h"
17 #include "puffin/src/include/puffin/errors.h"
18 #include "puffin/src/include/puffin/puffer.h"
19 #include "puffin/src/memory_stream.h"
20 #include "puffin/src/puff_writer.h"
21 #include "puffin/src/set_errors.h"
22 
23 namespace {
24 // Use memcpy to access the unaligned data of type |T|.
25 template <typename T>
get_unaligned(const void * address)26 inline T get_unaligned(const void* address) {
27   T result;
28   memcpy(&result, address, sizeof(T));
29   return result;
30 }
31 
32 // Calculate both the compressed size and uncompressed size of the deflate
33 // block that starts from the offset |start| of buffer |data|.
CalculateSizeOfDeflateBlock(const puffin::Buffer & data,uint64_t start,uint64_t * compressed_size,uint64_t * uncompressed_size)34 bool CalculateSizeOfDeflateBlock(const puffin::Buffer& data,
35                                  uint64_t start,
36                                  uint64_t* compressed_size,
37                                  uint64_t* uncompressed_size) {
38   TEST_AND_RETURN_FALSE(compressed_size != nullptr &&
39                         uncompressed_size != nullptr);
40 
41   TEST_AND_RETURN_FALSE(start < data.size());
42 
43   z_stream strm = {};
44   strm.avail_in = data.size() - start;
45   strm.next_in = data.data() + start;
46 
47   // -15 means we are decoding a 'raw' stream without zlib headers.
48   if (inflateInit2(&strm, -15)) {
49     LOG(ERROR) << "Failed to initialize inflate: " << strm.msg;
50     return false;
51   }
52 
53   const unsigned int kBufferSize = 32768;
54   std::vector<uint8_t> uncompressed_data(kBufferSize);
55   *uncompressed_size = 0;
56   int status = Z_OK;
57   do {
58     // Overwrite the same buffer since we don't need the uncompressed data.
59     strm.avail_out = kBufferSize;
60     strm.next_out = uncompressed_data.data();
61     status = inflate(&strm, Z_NO_FLUSH);
62     if (status < 0) {
63       LOG(ERROR) << "Inflate failed: " << strm.msg << ", has decompressed "
64                  << *uncompressed_size << " bytes.";
65       return false;
66     }
67     *uncompressed_size += kBufferSize - strm.avail_out;
68   } while (status != Z_STREAM_END);
69 
70   *compressed_size = data.size() - start - strm.avail_in;
71   TEST_AND_RETURN_FALSE(inflateEnd(&strm) == Z_OK);
72   return true;
73 }
74 
75 }  // namespace
76 
77 namespace puffin {
78 
79 using std::string;
80 using std::vector;
81 
BytesInByteExtents(const vector<ByteExtent> & extents)82 uint64_t BytesInByteExtents(const vector<ByteExtent>& extents) {
83   uint64_t bytes = 0;
84   for (const auto& extent : extents) {
85     bytes += extent.length;
86   }
87   return bytes;
88 }
89 
90 // This function uses RFC1950 (https://www.ietf.org/rfc/rfc1950.txt) for the
91 // definition of a zlib stream.  For finding the deflate blocks, we relying on
92 // the proper size of the zlib stream in |data|. Basically the size of the zlib
93 // stream should be known before hand. Otherwise we need to parse the stream and
94 // find the location of compressed blocks using CalculateSizeOfDeflateBlock().
LocateDeflatesInZlib(const Buffer & data,std::vector<ByteExtent> * deflate_blocks)95 bool LocateDeflatesInZlib(const Buffer& data,
96                           std::vector<ByteExtent>* deflate_blocks) {
97   // A zlib stream has the following format:
98   // 0           1     compression method and flag
99   // 1           1     flag
100   // 2           4     preset dictionary (optional)
101   // 2 or 6      n     compressed data
102   // n+(2 or 6)  4     Adler-32 checksum
103   TEST_AND_RETURN_FALSE(data.size() >= 6 + 4);  // Header + Footer
104   uint16_t cmf = data[0];
105   auto compression_method = cmf & 0x0F;
106   // For deflate compression_method should be 8.
107   TEST_AND_RETURN_FALSE(compression_method == 8);
108 
109   auto cinfo = (cmf & 0xF0) >> 4;
110   // Value greater than 7 is not allowed in deflate.
111   TEST_AND_RETURN_FALSE(cinfo <= 7);
112 
113   auto flag = data[1];
114   TEST_AND_RETURN_FALSE(((cmf << 8) + flag) % 31 == 0);
115 
116   uint64_t header_len = 2;
117   if (flag & 0x20) {
118     header_len += 4;  // 4 bytes for the preset dictionary.
119   }
120 
121   // 4 is for ADLER32.
122   deflate_blocks->emplace_back(header_len, data.size() - header_len - 4);
123   return true;
124 }
125 
FindDeflateSubBlocks(const UniqueStreamPtr & src,const vector<ByteExtent> & deflates,vector<BitExtent> * subblock_deflates)126 bool FindDeflateSubBlocks(const UniqueStreamPtr& src,
127                           const vector<ByteExtent>& deflates,
128                           vector<BitExtent>* subblock_deflates) {
129   Puffer puffer;
130   Buffer deflate_buffer;
131   for (const auto& deflate : deflates) {
132     TEST_AND_RETURN_FALSE(src->Seek(deflate.offset));
133     // Read from src into deflate_buffer.
134     deflate_buffer.resize(deflate.length);
135     TEST_AND_RETURN_FALSE(src->Read(deflate_buffer.data(), deflate.length));
136 
137     // Find all the subblocks.
138     BufferBitReader bit_reader(deflate_buffer.data(), deflate.length);
139     BufferPuffWriter puff_writer(nullptr, 0);
140     Error error;
141     vector<BitExtent> subblocks;
142     TEST_AND_RETURN_FALSE(
143         puffer.PuffDeflate(&bit_reader, &puff_writer, &subblocks, &error));
144     TEST_AND_RETURN_FALSE(deflate.length == bit_reader.Offset());
145     for (const auto& subblock : subblocks) {
146       subblock_deflates->emplace_back(subblock.offset + deflate.offset * 8,
147                                       subblock.length);
148     }
149   }
150   return true;
151 }
152 
LocateDeflatesInZlibBlocks(const string & file_path,const vector<ByteExtent> & zlibs,vector<BitExtent> * deflates)153 bool LocateDeflatesInZlibBlocks(const string& file_path,
154                                 const vector<ByteExtent>& zlibs,
155                                 vector<BitExtent>* deflates) {
156   auto src = FileStream::Open(file_path, true, false);
157   TEST_AND_RETURN_FALSE(src);
158 
159   Buffer buffer;
160   for (auto& zlib : zlibs) {
161     buffer.resize(zlib.length);
162     TEST_AND_RETURN_FALSE(src->Seek(zlib.offset));
163     TEST_AND_RETURN_FALSE(src->Read(buffer.data(), buffer.size()));
164 
165     vector<ByteExtent> deflate_blocks;
166     TEST_AND_RETURN_FALSE(LocateDeflatesInZlib(buffer, &deflate_blocks));
167 
168     vector<BitExtent> deflate_subblocks;
169     auto zlib_blc_src = MemoryStream::CreateForRead(buffer);
170     TEST_AND_RETURN_FALSE(
171         FindDeflateSubBlocks(zlib_blc_src, deflate_blocks, &deflate_subblocks));
172 
173     // Relocated based on the offset of the zlib.
174     for (const auto& def : deflate_subblocks) {
175       deflates->emplace_back(zlib.offset * 8 + def.offset, def.length);
176     }
177   }
178   return true;
179 }
180 
181 // For more information about gzip format, refer to RFC 1952 located at:
182 // https://www.ietf.org/rfc/rfc1952.txt
LocateDeflatesInGzip(const Buffer & data,vector<ByteExtent> * deflate_blocks)183 bool LocateDeflatesInGzip(const Buffer& data,
184                           vector<ByteExtent>* deflate_blocks) {
185   uint64_t member_start = 0;
186   while (member_start < data.size()) {
187     // Each member entry has the following format
188     // 0      1     0x1F
189     // 1      1     0x8B
190     // 2      1     compression method (8 denotes deflate)
191     // 3      1     set of flags
192     // 4      4     modification time
193     // 8      1     extra flags
194     // 9      1     operating system
195     TEST_AND_RETURN_FALSE(member_start + 10 <= data.size());
196     TEST_AND_RETURN_FALSE(data[member_start + 0] == 0x1F);
197     TEST_AND_RETURN_FALSE(data[member_start + 1] == 0x8B);
198     TEST_AND_RETURN_FALSE(data[member_start + 2] == 8);
199 
200     uint64_t offset = member_start + 10;
201     int flag = data[member_start + 3];
202     // Extra field
203     if (flag & 4) {
204       TEST_AND_RETURN_FALSE(offset + 2 <= data.size());
205       uint16_t extra_length = data[offset++];
206       extra_length |= static_cast<uint16_t>(data[offset++]) << 8;
207       TEST_AND_RETURN_FALSE(offset + extra_length <= data.size());
208       offset += extra_length;
209     }
210     // File name field
211     if (flag & 8) {
212       while (true) {
213         TEST_AND_RETURN_FALSE(offset + 1 <= data.size());
214         if (data[offset++] == 0) {
215           break;
216         }
217       }
218     }
219     // File comment field
220     if (flag & 16) {
221       while (true) {
222         TEST_AND_RETURN_FALSE(offset + 1 <= data.size());
223         if (data[offset++] == 0) {
224           break;
225         }
226       }
227     }
228     // CRC16 field
229     if (flag & 2) {
230       offset += 2;
231     }
232 
233     uint64_t compressed_size, uncompressed_size;
234     TEST_AND_RETURN_FALSE(CalculateSizeOfDeflateBlock(
235         data, offset, &compressed_size, &uncompressed_size));
236     TEST_AND_RETURN_FALSE(offset + compressed_size <= data.size());
237     deflate_blocks->push_back(ByteExtent(offset, compressed_size));
238     offset += compressed_size;
239 
240     // Ignore CRC32;
241     TEST_AND_RETURN_FALSE(offset + 8 <= data.size());
242     offset += 4;
243     uint32_t u_size = 0;
244     for (size_t i = 0; i < 4; i++) {
245       u_size |= static_cast<uint32_t>(data[offset++]) << (i * 8);
246     }
247     TEST_AND_RETURN_FALSE(uncompressed_size % (1 << 31) == u_size);
248     member_start = offset;
249   }
250   return true;
251 }
252 
253 // For more information about the zip format, refer to
254 // https://support.pkware.com/display/PKZIP/APPNOTE
LocateDeflatesInZipArchive(const Buffer & data,vector<ByteExtent> * deflate_blocks)255 bool LocateDeflatesInZipArchive(const Buffer& data,
256                                 vector<ByteExtent>* deflate_blocks) {
257   uint64_t pos = 0;
258   while (pos <= data.size() - 30) {
259     // TODO(xunchang) add support for big endian system when searching for
260     // magic numbers.
261     if (get_unaligned<uint32_t>(data.data() + pos) != 0x04034b50) {
262       pos++;
263       continue;
264     }
265 
266     // local file header format
267     // 0      4     0x04034b50
268     // 4      2     minimum version needed to extract
269     // 6      2     general purpose bit flag
270     // 8      2     compression method
271     // 10     4     file last modification date & time
272     // 14     4     CRC-32
273     // 18     4     compressed size
274     // 22     4     uncompressed size
275     // 26     2     file name length
276     // 28     2     extra field length
277     // 30     n     file name
278     // 30+n   m     extra field
279     auto compression_method = get_unaligned<uint16_t>(data.data() + pos + 8);
280     if (compression_method != 8) {  // non-deflate type
281       pos += 4;
282       continue;
283     }
284 
285     auto compressed_size = get_unaligned<uint32_t>(data.data() + pos + 18);
286     auto uncompressed_size = get_unaligned<uint32_t>(data.data() + pos + 22);
287     auto file_name_length = get_unaligned<uint16_t>(data.data() + pos + 26);
288     auto extra_field_length = get_unaligned<uint16_t>(data.data() + pos + 28);
289     uint64_t header_size = 30 + file_name_length + extra_field_length;
290 
291     // sanity check
292     if (static_cast<uint64_t>(header_size) + compressed_size > data.size() ||
293         pos > data.size() - header_size - compressed_size) {
294       pos += 4;
295       continue;
296     }
297 
298     uint64_t calculated_compressed_size;
299     uint64_t calculated_uncompressed_size;
300     if (!CalculateSizeOfDeflateBlock(data, pos + header_size,
301                                      &calculated_compressed_size,
302                                      &calculated_uncompressed_size)) {
303       LOG(ERROR) << "Failed to decompress the zip entry starting from: " << pos
304                  << ", skip adding deflates for this entry.";
305       pos += 4;
306       continue;
307     }
308 
309     // Double check the compressed size and uncompressed size if they are
310     // available in the file header.
311     if (compressed_size > 0 && compressed_size != calculated_compressed_size) {
312       LOG(WARNING) << "Compressed size in the file header: " << compressed_size
313                    << " doesn't equal the real size: "
314                    << calculated_compressed_size;
315     }
316 
317     if (uncompressed_size > 0 &&
318         uncompressed_size != calculated_uncompressed_size) {
319       LOG(WARNING) << "Uncompressed size in the file header: "
320                    << uncompressed_size << " doesn't equal the real size: "
321                    << calculated_uncompressed_size;
322     }
323 
324     deflate_blocks->emplace_back(pos + header_size, calculated_compressed_size);
325     pos += header_size + calculated_compressed_size;
326   }
327 
328   return true;
329 }
330 
LocateDeflateSubBlocksInZipArchive(const Buffer & data,vector<BitExtent> * deflates)331 bool LocateDeflateSubBlocksInZipArchive(const Buffer& data,
332                                         vector<BitExtent>* deflates) {
333   vector<ByteExtent> deflate_blocks;
334   if (!LocateDeflatesInZipArchive(data, &deflate_blocks)) {
335     return false;
336   }
337 
338   auto src = MemoryStream::CreateForRead(data);
339   return FindDeflateSubBlocks(src, deflate_blocks, deflates);
340 }
341 
FindPuffLocations(const UniqueStreamPtr & src,const vector<BitExtent> & deflates,vector<ByteExtent> * puffs,uint64_t * out_puff_size)342 bool FindPuffLocations(const UniqueStreamPtr& src,
343                        const vector<BitExtent>& deflates,
344                        vector<ByteExtent>* puffs,
345                        uint64_t* out_puff_size) {
346   Puffer puffer;
347   Buffer deflate_buffer;
348 
349   // Here accumulate the size difference between each corresponding deflate and
350   // puff. At the end we add this cummulative size difference to the size of the
351   // deflate stream to get the size of the puff stream. We use signed size
352   // because puff size could be smaller than deflate size.
353   int64_t total_size_difference = 0;
354   for (auto deflate = deflates.begin(); deflate != deflates.end(); ++deflate) {
355     // Read from src into deflate_buffer.
356     auto start_byte = deflate->offset / 8;
357     auto end_byte = (deflate->offset + deflate->length + 7) / 8;
358     deflate_buffer.resize(end_byte - start_byte);
359     TEST_AND_RETURN_FALSE(src->Seek(start_byte));
360     TEST_AND_RETURN_FALSE(
361         src->Read(deflate_buffer.data(), deflate_buffer.size()));
362     // Find the size of the puff.
363     BufferBitReader bit_reader(deflate_buffer.data(), deflate_buffer.size());
364     uint64_t bits_to_skip = deflate->offset % 8;
365     TEST_AND_RETURN_FALSE(bit_reader.CacheBits(bits_to_skip));
366     bit_reader.DropBits(bits_to_skip);
367 
368     BufferPuffWriter puff_writer(nullptr, 0);
369     Error error;
370     TEST_AND_RETURN_FALSE(
371         puffer.PuffDeflate(&bit_reader, &puff_writer, nullptr, &error));
372     TEST_AND_RETURN_FALSE(deflate_buffer.size() == bit_reader.Offset());
373 
374     // 1 if a deflate ends at the same byte that the next deflate starts and
375     // there is a few bits gap between them. In practice this may never happen,
376     // but it is a good idea to support it anyways. If there is a gap, the value
377     // of the gap will be saved as an integer byte to the puff stream. The parts
378     // of the byte that belogs to the deflates are shifted out.
379     int gap = 0;
380     if (deflate != deflates.begin()) {
381       auto prev_deflate = std::prev(deflate);
382       if ((prev_deflate->offset + prev_deflate->length == deflate->offset)
383           // If deflates are on byte boundary the gap will not be counted later,
384           // so we won't worry about it.
385           && (deflate->offset % 8 != 0)) {
386         gap = 1;
387       }
388     }
389 
390     start_byte = ((deflate->offset + 7) / 8);
391     end_byte = (deflate->offset + deflate->length) / 8;
392     int64_t deflate_length_in_bytes = end_byte - start_byte;
393 
394     // If there was no gap bits between the current and previous deflates, there
395     // will be no extra gap byte, so the offset will be shifted one byte back.
396     auto puff_offset = start_byte - gap + total_size_difference;
397     auto puff_size = puff_writer.Size();
398     // Add the location into puff.
399     puffs->emplace_back(puff_offset, puff_size);
400     total_size_difference +=
401         static_cast<int64_t>(puff_size) - deflate_length_in_bytes - gap;
402   }
403 
404   uint64_t src_size;
405   TEST_AND_RETURN_FALSE(src->GetSize(&src_size));
406   auto final_size = static_cast<int64_t>(src_size) + total_size_difference;
407   TEST_AND_RETURN_FALSE(final_size >= 0);
408   *out_puff_size = final_size;
409   return true;
410 }
411 
412 }  // namespace puffin
413