/* * Copyright (C) 2022 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "src/trace_processor/util/zip_reader.h" #include #include #include #include #include #include #include #include #include "perfetto/base/build_config.h" #include "perfetto/base/logging.h" #include "perfetto/base/status.h" #include "perfetto/base/time.h" #include "perfetto/ext/base/status_or.h" #include "perfetto/ext/base/string_view.h" #include "perfetto/ext/base/utils.h" #include "perfetto/trace_processor/trace_blob_view.h" #include "src/trace_processor/util/gzip_utils.h" #include "src/trace_processor/util/status_macros.h" #include "src/trace_processor/util/streaming_line_reader.h" #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB) #include #include #endif namespace perfetto::trace_processor::util { namespace { // Entry signatures. constexpr uint32_t kFileHeaderSig = 0x04034b50; constexpr uint32_t kCentralDirectorySig = 0x02014b50; constexpr uint32_t kDataDescriptorSig = 0x08074b50; // 4 bytes each of: 1) signature, 2) crc, 3) compressed size 4) uncompressed // size. constexpr uint32_t kDataDescriptorSize = 4 * 4; enum GeneralPurposeBitFlag : uint32_t { kEncrypted = 1 << 0, k8kSlidingDictionary = 1u << 1, kShannonFaro = 1u << 2, kDataDescriptor = 1u << 3, kLangageEncoding = 1u << 11, kUnknown = ~(kEncrypted | k8kSlidingDictionary | kShannonFaro | kDataDescriptor | kLangageEncoding), }; // Compression flags. const uint16_t kNoCompression = 0; const uint16_t kDeflate = 8; template T ReadAndAdvance(const uint8_t** ptr) { T res{}; memcpy(base::AssumeLittleEndian(&res), *ptr, sizeof(T)); *ptr += sizeof(T); return res; } } // namespace ZipReader::ZipReader() = default; ZipReader::~ZipReader() = default; base::Status ZipReader::Parse(TraceBlobView tbv) { reader_.PushBack(std::move(tbv)); // .zip file sequence: // [ File 1 header (30 bytes) ] // [ File 1 name ] // [ File 1 extra fields (optional) ] // [ File 1 compressed payload ] // [ File 1 data descriptor (optional) ] // // [ File 2 header (30 bytes) ] // [ File 2 name ] // [ File 2 extra fields (optional) ] // [ File 2 compressed payload ] // [ File 2 data descriptor (optional) ] // // [ Central directory (ignored) ] for (;;) { auto state = cur_.parse_state; switch (state) { case FileParseState::kHeader: RETURN_IF_ERROR(TryParseHeader()); break; case FileParseState::kFilename: RETURN_IF_ERROR(TryParseFilename()); break; case FileParseState::kSkipBytes: RETURN_IF_ERROR(TrySkipBytes()); break; case FileParseState::kCompressedData: RETURN_IF_ERROR(TryParseCompressedData()); break; } if (state == cur_.parse_state) { return base::OkStatus(); } } } base::Status ZipReader::TryParseHeader() { PERFETTO_CHECK(cur_.hdr.signature == 0); std::optional hdr = reader_.SliceOff(reader_.start_offset(), kZipFileHdrSize); if (!hdr) { return base::OkStatus(); } PERFETTO_CHECK(reader_.PopFrontBytes(kZipFileHdrSize)); const uint8_t* hdr_it = hdr->data(); cur_.hdr.signature = ReadAndAdvance(&hdr_it); if (cur_.hdr.signature == kCentralDirectorySig) { // We reached the central directory at the end of file. // We don't make any use here of the central directory, so we just // ignore everything else after this point. // Here we abuse the ZipFile class a bit. The Central Directory header // has a different layout. The first 4 bytes (signature) match, the // rest don't but the sizeof(central dir) is >> sizeof(file header) so // we are fine. // We do this rather than retuning because we could have further // Parse() calls (imagine parsing bytes one by one), and we need a way // to keep track of the "keep eating input without doing anything". cur_.ignore_bytes_after_fname = std::numeric_limits::max(); cur_.parse_state = FileParseState::kSkipBytes; return base::OkStatus(); } if (cur_.hdr.signature != kFileHeaderSig) { return base::ErrStatus( "Invalid signature found at offset 0x%zx. Actual=0x%x, " "expected=0x%x", reader_.start_offset(), cur_.hdr.signature, kFileHeaderSig); } cur_.hdr.version = ReadAndAdvance(&hdr_it); cur_.hdr.flags = ReadAndAdvance(&hdr_it); cur_.hdr.compression = ReadAndAdvance(&hdr_it); cur_.hdr.mtime = ReadAndAdvance(&hdr_it); cur_.hdr.mdate = ReadAndAdvance(&hdr_it); cur_.hdr.checksum = ReadAndAdvance(&hdr_it); cur_.hdr.compressed_size = ReadAndAdvance(&hdr_it); cur_.hdr.uncompressed_size = ReadAndAdvance(&hdr_it); cur_.hdr.fname_len = ReadAndAdvance(&hdr_it); cur_.hdr.extra_field_len = ReadAndAdvance(&hdr_it); PERFETTO_DCHECK(static_cast(hdr_it - hdr->data()) == kZipFileHdrSize); // We support only up to version 2.0 (20). Higher versions define // more advanced features that we don't support (zip64 extensions, // encryption). // Disallow encryption or any flags we don't know how to handle. if ((cur_.hdr.version > 20) || (cur_.hdr.flags & kEncrypted) || (cur_.hdr.flags & kUnknown)) { return base::ErrStatus( "Unsupported zip features at offset 0x%zx. version=%x, flags=%x", reader_.start_offset(), cur_.hdr.version, cur_.hdr.flags); } if (cur_.hdr.compression != kNoCompression && cur_.hdr.compression != kDeflate) { return base::ErrStatus( "Unsupported compression type at offset 0x%zx. type=%x. Only " "deflate and no compression are supported.", reader_.start_offset(), cur_.hdr.compression); } if (cur_.hdr.flags & kDataDescriptor && cur_.hdr.compression != kDeflate) { return base::ErrStatus( "Unsupported compression type at offset 0x%zx. type=%x. Only " "deflate supported for ZIPs compressed in a streaming fashion.", reader_.start_offset(), cur_.hdr.compression); } cur_.ignore_bytes_after_fname = cur_.hdr.extra_field_len; cur_.parse_state = FileParseState::kFilename; return base::OkStatus(); } base::Status ZipReader::TryParseFilename() { if (cur_.hdr.fname_len == 0) { cur_.parse_state = FileParseState::kSkipBytes; return base::OkStatus(); } PERFETTO_CHECK(cur_.hdr.fname.empty()); std::optional fname_tbv = reader_.SliceOff(reader_.start_offset(), cur_.hdr.fname_len); if (!fname_tbv) { return base::OkStatus(); } PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.fname_len)); cur_.hdr.fname = std::string(reinterpret_cast(fname_tbv->data()), cur_.hdr.fname_len); cur_.parse_state = FileParseState::kSkipBytes; return base::OkStatus(); } base::Status ZipReader::TrySkipBytes() { if (cur_.ignore_bytes_after_fname == 0) { cur_.parse_state = FileParseState::kCompressedData; return base::OkStatus(); } size_t avail = reader_.avail(); if (avail < cur_.ignore_bytes_after_fname) { PERFETTO_CHECK(reader_.PopFrontBytes(avail)); cur_.ignore_bytes_after_fname -= avail; return base::OkStatus(); } PERFETTO_CHECK(reader_.PopFrontBytes(cur_.ignore_bytes_after_fname)); cur_.ignore_bytes_after_fname = 0; cur_.parse_state = FileParseState::kCompressedData; return base::OkStatus(); } base::Status ZipReader::TryParseCompressedData() { // Build up the compressed payload if (cur_.hdr.flags & kDataDescriptor) { if (!cur_.compressed) { ASSIGN_OR_RETURN(auto compressed, TryParseUnsizedCompressedData()); if (!compressed) { return base::OkStatus(); } cur_.compressed = std::move(compressed); } std::optional data_descriptor = reader_.SliceOff(reader_.start_offset(), kDataDescriptorSize); if (!data_descriptor) { return base::OkStatus(); } PERFETTO_CHECK(reader_.PopFrontBytes(kDataDescriptorSize)); const auto* desc_it = data_descriptor->data(); auto desc_sig = ReadAndAdvance(&desc_it); if (desc_sig != kDataDescriptorSig) { return base::ErrStatus( "Invalid signature found at offset 0x%zx. Actual=0x%x, " "expected=0x%x", reader_.start_offset(), desc_sig, kDataDescriptorSig); } cur_.hdr.checksum = ReadAndAdvance(&desc_it); cur_.hdr.compressed_size = ReadAndAdvance(&desc_it); cur_.hdr.uncompressed_size = ReadAndAdvance(&desc_it); } else { PERFETTO_CHECK(!cur_.compressed); std::optional raw_compressed = reader_.SliceOff(reader_.start_offset(), cur_.hdr.compressed_size); if (!raw_compressed) { return base::OkStatus(); } cur_.compressed = *std::move(raw_compressed); PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.compressed_size)); } // We have accumulated the whole header, file name and compressed payload. PERFETTO_CHECK(cur_.compressed); PERFETTO_CHECK(cur_.hdr.fname.size() == cur_.hdr.fname_len); PERFETTO_CHECK(cur_.compressed->size() == cur_.hdr.compressed_size); PERFETTO_CHECK(cur_.ignore_bytes_after_fname == 0); files_.emplace_back(); files_.back().hdr_ = std::move(cur_.hdr); files_.back().compressed_data_ = *std::move(cur_.compressed); cur_ = FileParseState(); // Reset the parsing state for the next file. return base::OkStatus(); } // namespace perfetto::trace_processor::util base::StatusOr> ZipReader::TryParseUnsizedCompressedData() { PERFETTO_CHECK(cur_.hdr.compression == kDeflate); auto start = reader_.start_offset() + cur_.decompressor_bytes_fed; auto end = reader_.end_offset(); auto slice = reader_.SliceOff(start, end - start); PERFETTO_CHECK(slice); auto res_code = cur_.decompressor.FeedAndExtract(slice->data(), slice->size(), [](const uint8_t*, size_t) { // Intentionally do // nothing: we are only // looking for the bounds // of the deflate stream, // we are not actually // interested in the // output. }); switch (res_code) { case GzipDecompressor::ResultCode::kNeedsMoreInput: cur_.decompressor_bytes_fed += slice->size(); return {std::nullopt}; case GzipDecompressor::ResultCode::kError: return base::ErrStatus( "Failed decompressing stream in ZIP file at offset 0x%zx", reader_.start_offset()); case GzipDecompressor::ResultCode::kOk: PERFETTO_FATAL("Unexpected result code"); case GzipDecompressor::ResultCode::kEof: break; } cur_.decompressor_bytes_fed += slice->size() - cur_.decompressor.AvailIn(); auto raw_compressed = reader_.SliceOff(reader_.start_offset(), cur_.decompressor_bytes_fed); PERFETTO_CHECK(raw_compressed); PERFETTO_CHECK(reader_.PopFrontBytes(cur_.decompressor_bytes_fed)); return {std::move(raw_compressed)}; } ZipFile* ZipReader::Find(const std::string& path) { for (ZipFile& zf : files_) { if (zf.name() == path) return &zf; } return nullptr; } ZipFile::ZipFile() = default; ZipFile::~ZipFile() = default; ZipFile::ZipFile(ZipFile&& other) noexcept = default; ZipFile& ZipFile::operator=(ZipFile&& other) noexcept = default; base::Status ZipFile::Decompress(std::vector* out_data) const { out_data->clear(); RETURN_IF_ERROR(DoDecompressionChecks()); if (hdr_.compression == kNoCompression) { const uint8_t* data = compressed_data_.data(); out_data->insert(out_data->end(), data, data + hdr_.compressed_size); return base::OkStatus(); } if (hdr_.uncompressed_size == 0) { return base::OkStatus(); } PERFETTO_DCHECK(hdr_.compression == kDeflate); GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate); dec.Feed(compressed_data_.data(), hdr_.compressed_size); out_data->resize(hdr_.uncompressed_size); auto dec_res = dec.ExtractOutput(out_data->data(), out_data->size()); if (dec_res.ret != GzipDecompressor::ResultCode::kEof) { return base::ErrStatus("Zip decompression error (%d) on %s (c=%u, u=%u)", static_cast(dec_res.ret), hdr_.fname.c_str(), hdr_.compressed_size, hdr_.uncompressed_size); } out_data->resize(dec_res.bytes_written); #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB) const auto* crc_data = reinterpret_cast(out_data->data()); auto crc_len = static_cast<::uInt>(out_data->size()); auto actual_crc32 = static_cast(::crc32(0u, crc_data, crc_len)); if (actual_crc32 != hdr_.checksum) { return base::ErrStatus("Zip CRC32 failure on %s (actual: %x, expected: %x)", hdr_.fname.c_str(), actual_crc32, hdr_.checksum); } #endif return base::OkStatus(); } base::Status ZipFile::DecompressLines(LinesCallback callback) const { using ResultCode = GzipDecompressor::ResultCode; RETURN_IF_ERROR(DoDecompressionChecks()); StreamingLineReader line_reader(std::move(callback)); if (hdr_.compression == kNoCompression) { line_reader.Tokenize( base::StringView(reinterpret_cast(compressed_data_.data()), hdr_.compressed_size)); return base::OkStatus(); } PERFETTO_DCHECK(hdr_.compression == kDeflate); GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate); dec.Feed(compressed_data_.data(), hdr_.compressed_size); static constexpr size_t kChunkSize = 32768; GzipDecompressor::Result dec_res; do { auto* wptr = reinterpret_cast(line_reader.BeginWrite(kChunkSize)); dec_res = dec.ExtractOutput(wptr, kChunkSize); if (dec_res.ret == ResultCode::kError || dec_res.ret == ResultCode::kNeedsMoreInput) { return base::ErrStatus("zlib decompression error on %s (%d)", name().c_str(), static_cast(dec_res.ret)); } PERFETTO_DCHECK(dec_res.bytes_written <= kChunkSize); line_reader.EndWrite(dec_res.bytes_written); } while (dec_res.ret == ResultCode::kOk); return base::OkStatus(); } // Common logic for both Decompress() and DecompressLines(). base::Status ZipFile::DoDecompressionChecks() const { if (hdr_.compression == kNoCompression) { PERFETTO_CHECK(hdr_.compressed_size == hdr_.uncompressed_size); return base::OkStatus(); } if (hdr_.compression != kDeflate) { return base::ErrStatus("Zip compression mode not supported (%u)", hdr_.compression); } if (!IsGzipSupported()) { return base::ErrStatus( "Cannot open zip file. Gzip is not enabled in the current build. " "Rebuild with enable_perfetto_zlib=true"); } return base::OkStatus(); } // Returns a 64-bit version of time_t, that is, the num seconds since the // Epoch. int64_t ZipFile::GetDatetime() const { // Date: 7 bits year, 4 bits month, 5 bits day. // Time: 5 bits hour, 6 bits minute, 5 bits second. struct tm mdt {}; // As per man 3 mktime, `tm_year` is relative to 1900 not Epoch. Go figure. mdt.tm_year = 1980 + (hdr_.mdate >> (16 - 7)) - 1900; // As per the man page, the month ranges 0 to 11 (Jan = 0). mdt.tm_mon = ((hdr_.mdate >> (16 - 7 - 4)) & 0x0f) - 1; // However, still according to the same man page, the day starts from 1. mdt.tm_mday = hdr_.mdate & 0x1f; mdt.tm_hour = hdr_.mtime >> (16 - 5); mdt.tm_min = (hdr_.mtime >> (16 - 5 - 6)) & 0x3f; // Seconds in the DOS format have only 5 bits, so they lose the last bit of // resolution, hence the * 2. mdt.tm_sec = (hdr_.mtime & 0x1f) * 2; return base::TimeGm(&mdt); } std::string ZipFile::GetDatetimeStr() const { char buf[32]{}; time_t secs = static_cast(GetDatetime()); strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", gmtime(&secs)); buf[sizeof(buf) - 1] = '\0'; return buf; } } // namespace perfetto::trace_processor::util