1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <stdint.h> 20 21 #include <map> 22 #include <memory> 23 #include <string_view> 24 #include <utility> 25 26 #include "zip_error.h" 27 28 // This class is the interface of the central directory entries map. The map 29 // helps to locate a particular cd entry based on the filename. 30 class CdEntryMapInterface { 31 public: 32 virtual ~CdEntryMapInterface() = default; 33 // Adds an entry to the map. The |name| should internally points to the 34 // filename field of a cd entry. And |start| points to the beginning of the 35 // central directory. Returns 0 on success. 36 virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0; 37 // For the zip entry |entryName|, finds the offset of its filename field in 38 // the central directory. Returns a pair of [status, offset]. The value of 39 // the status is 0 on success. 40 virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name, 41 const uint8_t* cd_start) const = 0; 42 // Resets the iterator to the beginning of the map. 43 virtual void ResetIteration() = 0; 44 // Returns the [name, cd offset] of the current element. Also increments the 45 // iterator to points to the next element. Returns an empty pair we have read 46 // past boundary. 47 virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0; 48 }; 49 50 /** 51 * More space efficient string representation of strings in an mmaped zipped 52 * file than std::string_view. Using std::string_view as an entry in the 53 * ZipArchive hash table wastes space. std::string_view stores a pointer to a 54 * string (on 64 bit, 8 bytes) and the length to read from that pointer, 55 * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting 56 * 6 bytes. 57 * 58 * ZipStringOffset stores a 4 byte offset from a fixed location in the memory 59 * mapped file instead of the entire address, consuming 8 bytes with alignment. 60 */ 61 struct ZipStringOffset { 62 uint32_t name_offset; 63 uint16_t name_length; 64 ToStringViewZipStringOffset65 const std::string_view ToStringView(const uint8_t* start) const { 66 return std::string_view{reinterpret_cast<const char*>(start + name_offset), name_length}; 67 } 68 }; 69 70 // This implementation of CdEntryMap uses an array hash table. It uses less 71 // memory than std::map; and it's used as the default implementation for zip 72 // archives without zip64 extension. 73 class CdEntryMapZip32 : public CdEntryMapInterface { 74 public: 75 static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries); 76 77 ZipError AddToMap(std::string_view name, const uint8_t* start) override; 78 std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name, 79 const uint8_t* cd_start) const override; 80 void ResetIteration() override; 81 std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override; 82 83 private: 84 explicit CdEntryMapZip32(uint16_t num_entries); 85 86 // We know how many entries are in the Zip archive, so we can have a 87 // fixed-size hash table. We define a load factor of 0.75 and over 88 // allocate so the maximum number entries can never be higher than 89 // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t. 90 uint32_t hash_table_size_{0}; 91 std::unique_ptr<ZipStringOffset[], decltype(&free)> hash_table_{nullptr, free}; 92 93 // The position of element for the current iteration. 94 uint32_t current_position_{0}; 95 }; 96 97 // This implementation of CdEntryMap uses a std::map 98 class CdEntryMapZip64 : public CdEntryMapInterface { 99 public: 100 static std::unique_ptr<CdEntryMapInterface> Create(); 101 102 ZipError AddToMap(std::string_view name, const uint8_t* start) override; 103 std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name, 104 const uint8_t* cd_start) const override; 105 void ResetIteration() override; 106 std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override; 107 108 private: 109 CdEntryMapZip64() = default; 110 111 std::map<std::string_view, uint64_t> entry_table_; 112 113 std::map<std::string_view, uint64_t>::iterator iterator_; 114 }; 115