• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/util/zip_reader.h"
18 
19 #include <time.h>
20 
21 #include "perfetto/base/build_config.h"
22 #include "perfetto/base/logging.h"
23 #include "perfetto/base/time.h"
24 #include "perfetto/ext/base/utils.h"
25 #include "src/trace_processor/util/gzip_utils.h"
26 #include "src/trace_processor/util/streaming_line_reader.h"
27 
28 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
29 #include <zlib.h>  // For crc32().
30 #endif
31 
32 namespace perfetto {
33 namespace trace_processor {
34 namespace util {
35 
36 namespace {
37 
38 // Entry signatures.
39 const uint32_t kFileHeaderSig = 0x04034b50;
40 const uint32_t kCentralDirectorySig = 0x02014b50;
41 
42 // Compression flags.
43 const uint16_t kNoCompression = 0;
44 const uint16_t kDeflate = 8;
45 
46 template <typename T>
ReadAndAdvance(const uint8_t ** ptr)47 T ReadAndAdvance(const uint8_t** ptr) {
48   T res{};
49   memcpy(base::AssumeLittleEndian(&res), *ptr, sizeof(T));
50   *ptr += sizeof(T);
51   return res;
52 }
53 
54 }  // namespace
55 
56 ZipReader::ZipReader() = default;
57 ZipReader::~ZipReader() = default;
58 
Parse(const void * data,size_t len)59 base::Status ZipReader::Parse(const void* data, size_t len) {
60   const uint8_t* input = static_cast<const uint8_t*>(data);
61   const uint8_t* const input_begin = input;
62   const uint8_t* const input_end = input + len;
63   auto input_avail = [&] { return static_cast<size_t>(input_end - input); };
64 
65   // .zip file sequence:
66   // [ File 1 header (30 bytes) ]
67   // [ File 1 name ]
68   // [ File 1 extra fields (optional) ]
69   // [ File 1 compressed payload ]
70   //
71   // [ File 2 header (30 bytes) ]
72   // [ File 2 name ]
73   // [ File 2 extra fields (optional) ]
74   // [ File 2 compressed payload ]
75   //
76   // [ Central directory (ignored) ]
77   while (input < input_end) {
78     // Initial state, we are building up the file header.
79     if (cur_.raw_hdr_size < kZipFileHdrSize) {
80       size_t copy_size =
81           std::min(input_avail(), kZipFileHdrSize - cur_.raw_hdr_size);
82       memcpy(&cur_.raw_hdr[cur_.raw_hdr_size], input, copy_size);
83       cur_.raw_hdr_size += copy_size;
84       input += copy_size;
85 
86       // If we got all the kZipFileHdrSize bytes, parse the zip file header now.
87       if (cur_.raw_hdr_size == kZipFileHdrSize) {
88         const uint8_t* hdr_it = &cur_.raw_hdr[0];
89         cur_.hdr.signature = ReadAndAdvance<uint32_t>(&hdr_it);
90         if (cur_.hdr.signature == kCentralDirectorySig) {
91           // We reached the central directory at the end of file.
92           // We don't make any use here of the central directory, so we just
93           // ignore everything else after this point.
94           // Here we abuse the ZipFile class a bit. The Central Directory header
95           // has a different layout. The first 4 bytes (signature) match, the
96           // rest don't but the sizeof(central dir) is >> sizeof(file header) so
97           // we are fine.
98           // We do this rather than retuning because we could have further
99           // Parse() calls (imagine parsing bytes one by one), and we need a way
100           // to keep track of the "keep eating input without doing anything".
101           cur_.ignore_bytes_after_fname = std::numeric_limits<size_t>::max();
102           input = input_end;
103           break;
104         }
105         if (cur_.hdr.signature != kFileHeaderSig) {
106           return base::ErrStatus(
107               "Invalid signature found at offset 0x%zx. Actual=%x, expected=%x",
108               static_cast<size_t>(input - input_begin) - kZipFileHdrSize,
109               cur_.hdr.signature, kFileHeaderSig);
110         }
111 
112         cur_.hdr.version = ReadAndAdvance<uint16_t>(&hdr_it);
113         cur_.hdr.flags = ReadAndAdvance<uint16_t>(&hdr_it);
114         cur_.hdr.compression = ReadAndAdvance<uint16_t>(&hdr_it);
115         cur_.hdr.mtime = ReadAndAdvance<uint16_t>(&hdr_it);
116         cur_.hdr.mdate = ReadAndAdvance<uint16_t>(&hdr_it);
117         cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&hdr_it);
118         cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
119         cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
120         cur_.hdr.fname_len = ReadAndAdvance<uint16_t>(&hdr_it);
121         cur_.hdr.extra_field_len = ReadAndAdvance<uint16_t>(&hdr_it);
122         PERFETTO_DCHECK(static_cast<size_t>(hdr_it - cur_.raw_hdr) ==
123                         kZipFileHdrSize);
124 
125         // We support only up to version 2.0 (20). Higher versions define
126         // more advanced features that we don't support (zip64 extensions,
127         // encryption).
128         // Flag bits 1,2 define the compression strength for deflating (which
129         // zlib supports transparently). Other bits define other compression
130         // methods that we don't support.
131         if ((cur_.hdr.version > 20) || (cur_.hdr.flags & ~3) != 0) {
132           return base::ErrStatus(
133               "Unsupported zip features at offset 0x%zx. version=%x, flags=%x",
134               static_cast<size_t>(input - input_begin) - kZipFileHdrSize,
135               cur_.hdr.version, cur_.hdr.flags);
136         }
137         cur_.compressed_data.reset(new uint8_t[cur_.hdr.compressed_size]);
138         cur_.ignore_bytes_after_fname = cur_.hdr.extra_field_len;
139       }
140       continue;
141     }
142 
143     // Build up the file name.
144     if (cur_.hdr.fname.size() < cur_.hdr.fname_len) {
145       size_t name_left = cur_.hdr.fname_len - cur_.hdr.fname.size();
146       size_t copy_size = std::min(name_left, input_avail());
147       cur_.hdr.fname.append(reinterpret_cast<const char*>(input), copy_size);
148       input += copy_size;
149       continue;
150     }
151 
152     // Skip any bytes if extra fields were present.
153     if (cur_.ignore_bytes_after_fname > 0) {
154       size_t skip_size = std::min(input_avail(), cur_.ignore_bytes_after_fname);
155       cur_.ignore_bytes_after_fname -= skip_size;
156       input += skip_size;
157       continue;
158     }
159 
160     // Build up the compressed payload
161     if (cur_.compressed_data_written < cur_.hdr.compressed_size) {
162       size_t needed = cur_.hdr.compressed_size - cur_.compressed_data_written;
163       size_t copy_size = std::min(needed, input_avail());
164       memcpy(&cur_.compressed_data[cur_.compressed_data_written], input,
165              copy_size);
166       cur_.compressed_data_written += copy_size;
167       input += copy_size;
168       continue;
169     }
170 
171     // We have accumulated the whole header, file name and compressed payload.
172     PERFETTO_DCHECK(cur_.raw_hdr_size == kZipFileHdrSize);
173     PERFETTO_DCHECK(cur_.hdr.fname.size() == cur_.hdr.fname_len);
174     PERFETTO_DCHECK(cur_.compressed_data_written == cur_.hdr.compressed_size);
175     PERFETTO_DCHECK(cur_.ignore_bytes_after_fname == 0);
176 
177     files_.emplace_back();
178     files_.back().hdr_ = std::move(cur_.hdr);
179     files_.back().compressed_data_ = std::move(cur_.compressed_data);
180     cur_ = FileParseState();  // Reset the parsing state for the next file.
181 
182   }  // while (input < input_end)
183 
184   // At this point we must have consumed all input.
185   PERFETTO_DCHECK(input_avail() == 0);
186   return base::OkStatus();
187 }
188 
Find(const std::string & path)189 ZipFile* ZipReader::Find(const std::string& path) {
190   for (ZipFile& zf : files_) {
191     if (zf.name() == path)
192       return &zf;
193   }
194   return nullptr;
195 }
196 
197 ZipFile::ZipFile() = default;
198 ZipFile::~ZipFile() = default;
199 ZipFile::ZipFile(ZipFile&& other) noexcept = default;
200 ZipFile& ZipFile::operator=(ZipFile&& other) noexcept = default;
201 
Decompress(std::vector<uint8_t> * out_data) const202 base::Status ZipFile::Decompress(std::vector<uint8_t>* out_data) const {
203   out_data->clear();
204 
205   auto res = DoDecompressionChecks();
206   if (!res.ok())
207     return res;
208 
209   if (hdr_.compression == kNoCompression) {
210     const uint8_t* data = compressed_data_.get();
211     out_data->insert(out_data->end(), data, data + hdr_.compressed_size);
212     return base::OkStatus();
213   }
214 
215   if (hdr_.uncompressed_size == 0)
216     return base::OkStatus();
217 
218   PERFETTO_DCHECK(hdr_.compression == kDeflate);
219   GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
220   dec.Feed(compressed_data_.get(), hdr_.compressed_size);
221 
222   out_data->resize(hdr_.uncompressed_size);
223   auto dec_res = dec.ExtractOutput(out_data->data(), out_data->size());
224   if (dec_res.ret != GzipDecompressor::ResultCode::kEof) {
225     return base::ErrStatus("Zip decompression error (%d) on %s (c=%u, u=%u)",
226                            static_cast<int>(dec_res.ret), hdr_.fname.c_str(),
227                            hdr_.compressed_size, hdr_.uncompressed_size);
228   }
229   out_data->resize(dec_res.bytes_written);
230 
231 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
232   const auto* crc_data = reinterpret_cast<const ::Bytef*>(out_data->data());
233   auto crc_len = static_cast<::uInt>(out_data->size());
234   auto actual_crc32 = static_cast<uint32_t>(::crc32(0u, crc_data, crc_len));
235   if (actual_crc32 != hdr_.checksum) {
236     return base::ErrStatus("Zip CRC32 failure on %s (actual: %x, expected: %x)",
237                            hdr_.fname.c_str(), actual_crc32, hdr_.checksum);
238   }
239 #endif
240 
241   return base::OkStatus();
242 }
243 
DecompressLines(LinesCallback callback) const244 base::Status ZipFile::DecompressLines(LinesCallback callback) const {
245   using ResultCode = GzipDecompressor::ResultCode;
246 
247   auto res = DoDecompressionChecks();
248   if (!res.ok())
249     return res;
250 
251   StreamingLineReader line_reader(callback);
252 
253   if (hdr_.compression == kNoCompression) {
254     line_reader.Tokenize(
255         base::StringView(reinterpret_cast<const char*>(compressed_data_.get()),
256                          hdr_.compressed_size));
257     return base::OkStatus();
258   }
259 
260   PERFETTO_DCHECK(hdr_.compression == kDeflate);
261   GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
262   dec.Feed(compressed_data_.get(), hdr_.compressed_size);
263 
264   static constexpr size_t kChunkSize = 32768;
265   GzipDecompressor::Result dec_res;
266   do {
267     auto* wptr = reinterpret_cast<uint8_t*>(line_reader.BeginWrite(kChunkSize));
268     dec_res = dec.ExtractOutput(wptr, kChunkSize);
269     if (dec_res.ret == ResultCode::kError ||
270         dec_res.ret == ResultCode::kNeedsMoreInput)
271       return base::ErrStatus("zlib decompression error on %s (%d)",
272                              name().c_str(), static_cast<int>(dec_res.ret));
273     PERFETTO_DCHECK(dec_res.bytes_written <= kChunkSize);
274     line_reader.EndWrite(dec_res.bytes_written);
275   } while (dec_res.ret == ResultCode::kOk);
276   return base::OkStatus();
277 }
278 
279 // Common logic for both Decompress() and DecompressLines().
DoDecompressionChecks() const280 base::Status ZipFile::DoDecompressionChecks() const {
281   PERFETTO_DCHECK(compressed_data_);
282 
283   if (hdr_.compression == kNoCompression) {
284     PERFETTO_CHECK(hdr_.compressed_size == hdr_.uncompressed_size);
285     return base::OkStatus();
286   }
287 
288   if (hdr_.compression != kDeflate) {
289     return base::ErrStatus("Zip compression mode not supported (%u)",
290                            hdr_.compression);
291   }
292 
293   if (!IsGzipSupported()) {
294     return base::ErrStatus(
295         "Cannot open zip file. Gzip is not enabled in the current build. "
296         "Rebuild with enable_perfetto_zlib=true");
297   }
298 
299   return base::OkStatus();
300 }
301 
302 // Returns a 64-bit version of time_t, that is, the num seconds since the Epoch.
GetDatetime() const303 int64_t ZipFile::GetDatetime() const {
304   // Date: 7 bits year, 4 bits month, 5 bits day.
305   // Time: 5 bits hour, 6 bits minute, 5 bits second.
306   struct tm mdt {};
307   // As per man 3 mktime, `tm_year` is relative to 1900 not Epoch. Go figure.
308   mdt.tm_year = 1980 + (hdr_.mdate >> (16 - 7)) - 1900;
309 
310   // As per the man page, the month ranges 0 to 11 (Jan = 0).
311   mdt.tm_mon = ((hdr_.mdate >> (16 - 7 - 4)) & 0x0f) - 1;
312 
313   // However, still according to the same man page, the day starts from 1.
314   mdt.tm_mday = hdr_.mdate & 0x1f;
315 
316   mdt.tm_hour = hdr_.mtime >> (16 - 5);
317   mdt.tm_min = (hdr_.mtime >> (16 - 5 - 6)) & 0x3f;
318 
319   // Seconds in the DOS format have only 5 bits, so they lose the last bit of
320   // resolution, hence the * 2.
321   mdt.tm_sec = (hdr_.mtime & 0x1f) * 2;
322   return base::TimeGm(&mdt);
323 }
324 
GetDatetimeStr() const325 std::string ZipFile::GetDatetimeStr() const {
326   char buf[32]{};
327   time_t secs = static_cast<time_t>(GetDatetime());
328   strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", gmtime(&secs));
329   buf[sizeof(buf) - 1] = '\0';
330   return buf;
331 }
332 
333 }  // namespace util
334 }  // namespace trace_processor
335 }  // namespace perfetto
336