1 /*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/util/zip_reader.h"
18
19 #include <time.h>
20
21 #include "perfetto/base/build_config.h"
22 #include "perfetto/base/logging.h"
23 #include "perfetto/base/time.h"
24 #include "perfetto/ext/base/utils.h"
25 #include "src/trace_processor/util/gzip_utils.h"
26 #include "src/trace_processor/util/streaming_line_reader.h"
27
28 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
29 #include <zlib.h> // For crc32().
30 #endif
31
32 namespace perfetto {
33 namespace trace_processor {
34 namespace util {
35
36 namespace {
37
38 // Entry signatures.
39 const uint32_t kFileHeaderSig = 0x04034b50;
40 const uint32_t kCentralDirectorySig = 0x02014b50;
41
42 // Compression flags.
43 const uint16_t kNoCompression = 0;
44 const uint16_t kDeflate = 8;
45
46 template <typename T>
ReadAndAdvance(const uint8_t ** ptr)47 T ReadAndAdvance(const uint8_t** ptr) {
48 T res{};
49 memcpy(base::AssumeLittleEndian(&res), *ptr, sizeof(T));
50 *ptr += sizeof(T);
51 return res;
52 }
53
54 } // namespace
55
56 ZipReader::ZipReader() = default;
57 ZipReader::~ZipReader() = default;
58
Parse(const void * data,size_t len)59 base::Status ZipReader::Parse(const void* data, size_t len) {
60 const uint8_t* input = static_cast<const uint8_t*>(data);
61 const uint8_t* const input_begin = input;
62 const uint8_t* const input_end = input + len;
63 auto input_avail = [&] { return static_cast<size_t>(input_end - input); };
64
65 // .zip file sequence:
66 // [ File 1 header (30 bytes) ]
67 // [ File 1 name ]
68 // [ File 1 extra fields (optional) ]
69 // [ File 1 compressed payload ]
70 //
71 // [ File 2 header (30 bytes) ]
72 // [ File 2 name ]
73 // [ File 2 extra fields (optional) ]
74 // [ File 2 compressed payload ]
75 //
76 // [ Central directory (ignored) ]
77 while (input < input_end) {
78 // Initial state, we are building up the file header.
79 if (cur_.raw_hdr_size < kZipFileHdrSize) {
80 size_t copy_size =
81 std::min(input_avail(), kZipFileHdrSize - cur_.raw_hdr_size);
82 memcpy(&cur_.raw_hdr[cur_.raw_hdr_size], input, copy_size);
83 cur_.raw_hdr_size += copy_size;
84 input += copy_size;
85
86 // If we got all the kZipFileHdrSize bytes, parse the zip file header now.
87 if (cur_.raw_hdr_size == kZipFileHdrSize) {
88 const uint8_t* hdr_it = &cur_.raw_hdr[0];
89 cur_.hdr.signature = ReadAndAdvance<uint32_t>(&hdr_it);
90 if (cur_.hdr.signature == kCentralDirectorySig) {
91 // We reached the central directory at the end of file.
92 // We don't make any use here of the central directory, so we just
93 // ignore everything else after this point.
94 // Here we abuse the ZipFile class a bit. The Central Directory header
95 // has a different layout. The first 4 bytes (signature) match, the
96 // rest don't but the sizeof(central dir) is >> sizeof(file header) so
97 // we are fine.
98 // We do this rather than retuning because we could have further
99 // Parse() calls (imagine parsing bytes one by one), and we need a way
100 // to keep track of the "keep eating input without doing anything".
101 cur_.ignore_bytes_after_fname = std::numeric_limits<size_t>::max();
102 input = input_end;
103 break;
104 }
105 if (cur_.hdr.signature != kFileHeaderSig) {
106 return base::ErrStatus(
107 "Invalid signature found at offset 0x%zx. Actual=%x, expected=%x",
108 static_cast<size_t>(input - input_begin) - kZipFileHdrSize,
109 cur_.hdr.signature, kFileHeaderSig);
110 }
111
112 cur_.hdr.version = ReadAndAdvance<uint16_t>(&hdr_it);
113 cur_.hdr.flags = ReadAndAdvance<uint16_t>(&hdr_it);
114 cur_.hdr.compression = ReadAndAdvance<uint16_t>(&hdr_it);
115 cur_.hdr.mtime = ReadAndAdvance<uint16_t>(&hdr_it);
116 cur_.hdr.mdate = ReadAndAdvance<uint16_t>(&hdr_it);
117 cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&hdr_it);
118 cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
119 cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
120 cur_.hdr.fname_len = ReadAndAdvance<uint16_t>(&hdr_it);
121 cur_.hdr.extra_field_len = ReadAndAdvance<uint16_t>(&hdr_it);
122 PERFETTO_DCHECK(static_cast<size_t>(hdr_it - cur_.raw_hdr) ==
123 kZipFileHdrSize);
124
125 // We support only up to version 2.0 (20). Higher versions define
126 // more advanced features that we don't support (zip64 extensions,
127 // encryption).
128 // Flag bits 1,2 define the compression strength for deflating (which
129 // zlib supports transparently). Other bits define other compression
130 // methods that we don't support.
131 if ((cur_.hdr.version > 20) || (cur_.hdr.flags & ~3) != 0) {
132 return base::ErrStatus(
133 "Unsupported zip features at offset 0x%zx. version=%x, flags=%x",
134 static_cast<size_t>(input - input_begin) - kZipFileHdrSize,
135 cur_.hdr.version, cur_.hdr.flags);
136 }
137 cur_.compressed_data.reset(new uint8_t[cur_.hdr.compressed_size]);
138 cur_.ignore_bytes_after_fname = cur_.hdr.extra_field_len;
139 }
140 continue;
141 }
142
143 // Build up the file name.
144 if (cur_.hdr.fname.size() < cur_.hdr.fname_len) {
145 size_t name_left = cur_.hdr.fname_len - cur_.hdr.fname.size();
146 size_t copy_size = std::min(name_left, input_avail());
147 cur_.hdr.fname.append(reinterpret_cast<const char*>(input), copy_size);
148 input += copy_size;
149 continue;
150 }
151
152 // Skip any bytes if extra fields were present.
153 if (cur_.ignore_bytes_after_fname > 0) {
154 size_t skip_size = std::min(input_avail(), cur_.ignore_bytes_after_fname);
155 cur_.ignore_bytes_after_fname -= skip_size;
156 input += skip_size;
157 continue;
158 }
159
160 // Build up the compressed payload
161 if (cur_.compressed_data_written < cur_.hdr.compressed_size) {
162 size_t needed = cur_.hdr.compressed_size - cur_.compressed_data_written;
163 size_t copy_size = std::min(needed, input_avail());
164 memcpy(&cur_.compressed_data[cur_.compressed_data_written], input,
165 copy_size);
166 cur_.compressed_data_written += copy_size;
167 input += copy_size;
168 continue;
169 }
170
171 // We have accumulated the whole header, file name and compressed payload.
172 PERFETTO_DCHECK(cur_.raw_hdr_size == kZipFileHdrSize);
173 PERFETTO_DCHECK(cur_.hdr.fname.size() == cur_.hdr.fname_len);
174 PERFETTO_DCHECK(cur_.compressed_data_written == cur_.hdr.compressed_size);
175 PERFETTO_DCHECK(cur_.ignore_bytes_after_fname == 0);
176
177 files_.emplace_back();
178 files_.back().hdr_ = std::move(cur_.hdr);
179 files_.back().compressed_data_ = std::move(cur_.compressed_data);
180 cur_ = FileParseState(); // Reset the parsing state for the next file.
181
182 } // while (input < input_end)
183
184 // At this point we must have consumed all input.
185 PERFETTO_DCHECK(input_avail() == 0);
186 return base::OkStatus();
187 }
188
Find(const std::string & path)189 ZipFile* ZipReader::Find(const std::string& path) {
190 for (ZipFile& zf : files_) {
191 if (zf.name() == path)
192 return &zf;
193 }
194 return nullptr;
195 }
196
197 ZipFile::ZipFile() = default;
198 ZipFile::~ZipFile() = default;
199 ZipFile::ZipFile(ZipFile&& other) noexcept = default;
200 ZipFile& ZipFile::operator=(ZipFile&& other) noexcept = default;
201
Decompress(std::vector<uint8_t> * out_data) const202 base::Status ZipFile::Decompress(std::vector<uint8_t>* out_data) const {
203 out_data->clear();
204
205 auto res = DoDecompressionChecks();
206 if (!res.ok())
207 return res;
208
209 if (hdr_.compression == kNoCompression) {
210 const uint8_t* data = compressed_data_.get();
211 out_data->insert(out_data->end(), data, data + hdr_.compressed_size);
212 return base::OkStatus();
213 }
214
215 if (hdr_.uncompressed_size == 0)
216 return base::OkStatus();
217
218 PERFETTO_DCHECK(hdr_.compression == kDeflate);
219 GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
220 dec.Feed(compressed_data_.get(), hdr_.compressed_size);
221
222 out_data->resize(hdr_.uncompressed_size);
223 auto dec_res = dec.ExtractOutput(out_data->data(), out_data->size());
224 if (dec_res.ret != GzipDecompressor::ResultCode::kEof) {
225 return base::ErrStatus("Zip decompression error (%d) on %s (c=%u, u=%u)",
226 static_cast<int>(dec_res.ret), hdr_.fname.c_str(),
227 hdr_.compressed_size, hdr_.uncompressed_size);
228 }
229 out_data->resize(dec_res.bytes_written);
230
231 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
232 const auto* crc_data = reinterpret_cast<const ::Bytef*>(out_data->data());
233 auto crc_len = static_cast<::uInt>(out_data->size());
234 auto actual_crc32 = static_cast<uint32_t>(::crc32(0u, crc_data, crc_len));
235 if (actual_crc32 != hdr_.checksum) {
236 return base::ErrStatus("Zip CRC32 failure on %s (actual: %x, expected: %x)",
237 hdr_.fname.c_str(), actual_crc32, hdr_.checksum);
238 }
239 #endif
240
241 return base::OkStatus();
242 }
243
DecompressLines(LinesCallback callback) const244 base::Status ZipFile::DecompressLines(LinesCallback callback) const {
245 using ResultCode = GzipDecompressor::ResultCode;
246
247 auto res = DoDecompressionChecks();
248 if (!res.ok())
249 return res;
250
251 StreamingLineReader line_reader(callback);
252
253 if (hdr_.compression == kNoCompression) {
254 line_reader.Tokenize(
255 base::StringView(reinterpret_cast<const char*>(compressed_data_.get()),
256 hdr_.compressed_size));
257 return base::OkStatus();
258 }
259
260 PERFETTO_DCHECK(hdr_.compression == kDeflate);
261 GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
262 dec.Feed(compressed_data_.get(), hdr_.compressed_size);
263
264 static constexpr size_t kChunkSize = 32768;
265 GzipDecompressor::Result dec_res;
266 do {
267 auto* wptr = reinterpret_cast<uint8_t*>(line_reader.BeginWrite(kChunkSize));
268 dec_res = dec.ExtractOutput(wptr, kChunkSize);
269 if (dec_res.ret == ResultCode::kError ||
270 dec_res.ret == ResultCode::kNeedsMoreInput)
271 return base::ErrStatus("zlib decompression error on %s (%d)",
272 name().c_str(), static_cast<int>(dec_res.ret));
273 PERFETTO_DCHECK(dec_res.bytes_written <= kChunkSize);
274 line_reader.EndWrite(dec_res.bytes_written);
275 } while (dec_res.ret == ResultCode::kOk);
276 return base::OkStatus();
277 }
278
279 // Common logic for both Decompress() and DecompressLines().
DoDecompressionChecks() const280 base::Status ZipFile::DoDecompressionChecks() const {
281 PERFETTO_DCHECK(compressed_data_);
282
283 if (hdr_.compression == kNoCompression) {
284 PERFETTO_CHECK(hdr_.compressed_size == hdr_.uncompressed_size);
285 return base::OkStatus();
286 }
287
288 if (hdr_.compression != kDeflate) {
289 return base::ErrStatus("Zip compression mode not supported (%u)",
290 hdr_.compression);
291 }
292
293 if (!IsGzipSupported()) {
294 return base::ErrStatus(
295 "Cannot open zip file. Gzip is not enabled in the current build. "
296 "Rebuild with enable_perfetto_zlib=true");
297 }
298
299 return base::OkStatus();
300 }
301
302 // Returns a 64-bit version of time_t, that is, the num seconds since the Epoch.
GetDatetime() const303 int64_t ZipFile::GetDatetime() const {
304 // Date: 7 bits year, 4 bits month, 5 bits day.
305 // Time: 5 bits hour, 6 bits minute, 5 bits second.
306 struct tm mdt {};
307 // As per man 3 mktime, `tm_year` is relative to 1900 not Epoch. Go figure.
308 mdt.tm_year = 1980 + (hdr_.mdate >> (16 - 7)) - 1900;
309
310 // As per the man page, the month ranges 0 to 11 (Jan = 0).
311 mdt.tm_mon = ((hdr_.mdate >> (16 - 7 - 4)) & 0x0f) - 1;
312
313 // However, still according to the same man page, the day starts from 1.
314 mdt.tm_mday = hdr_.mdate & 0x1f;
315
316 mdt.tm_hour = hdr_.mtime >> (16 - 5);
317 mdt.tm_min = (hdr_.mtime >> (16 - 5 - 6)) & 0x3f;
318
319 // Seconds in the DOS format have only 5 bits, so they lose the last bit of
320 // resolution, hence the * 2.
321 mdt.tm_sec = (hdr_.mtime & 0x1f) * 2;
322 return base::TimeGm(&mdt);
323 }
324
GetDatetimeStr() const325 std::string ZipFile::GetDatetimeStr() const {
326 char buf[32]{};
327 time_t secs = static_cast<time_t>(GetDatetime());
328 strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", gmtime(&secs));
329 buf[sizeof(buf) - 1] = '\0';
330 return buf;
331 }
332
333 } // namespace util
334 } // namespace trace_processor
335 } // namespace perfetto
336