1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 18 #define SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 19 20 #include <stddef.h> 21 #include <stdint.h> 22 23 #include <limits> 24 #include <vector> 25 26 #include "perfetto/ext/base/flat_hash_map.h" 27 #include "perfetto/ext/base/hash.h" 28 #include "perfetto/ext/base/optional.h" 29 #include "perfetto/ext/base/paged_memory.h" 30 #include "perfetto/protozero/proto_utils.h" 31 #include "src/trace_processor/containers/null_term_string_view.h" 32 33 namespace perfetto { 34 namespace trace_processor { 35 36 // Interns strings in a string pool and hands out compact StringIds which can 37 // be used to retrieve the string in O(1). 38 class StringPool { 39 public: 40 struct Id { 41 Id() = default; 42 43 bool operator==(const Id& other) const { return other.id == id; } 44 bool operator!=(const Id& other) const { return !(other == *this); } 45 bool operator<(const Id& other) const { return id < other.id; } 46 is_nullId47 bool is_null() const { return id == 0u; } 48 is_large_stringId49 bool is_large_string() const { return id & kLargeStringFlagBitMask; } 50 block_offsetId51 uint32_t block_offset() const { return id & kBlockOffsetBitMask; } 52 block_indexId53 uint32_t block_index() const { 54 return (id & kBlockIndexBitMask) >> kNumBlockOffsetBits; 55 } 56 large_string_indexId57 uint32_t large_string_index() const { 58 PERFETTO_DCHECK(is_large_string()); 59 return id & ~kLargeStringFlagBitMask; 60 } 61 raw_idId62 uint32_t raw_id() const { return id; } 63 LargeStringId64 static Id LargeString(size_t index) { 65 PERFETTO_DCHECK(index <= static_cast<uint32_t>(index)); 66 PERFETTO_DCHECK(!(index & kLargeStringFlagBitMask)); 67 return Id(kLargeStringFlagBitMask | static_cast<uint32_t>(index)); 68 } 69 BlockStringId70 static Id BlockString(size_t index, uint32_t offset) { 71 PERFETTO_DCHECK(index < (1u << (kNumBlockIndexBits + 1))); 72 PERFETTO_DCHECK(offset < (1u << (kNumBlockOffsetBits + 1))); 73 return Id(~kLargeStringFlagBitMask & 74 (static_cast<uint32_t>(index << kNumBlockOffsetBits) | 75 (offset & kBlockOffsetBitMask))); 76 } 77 RawId78 static constexpr Id Raw(uint32_t raw) { return Id(raw); } 79 NullId80 static constexpr Id Null() { return Id(0u); } 81 82 private: IdId83 constexpr Id(uint32_t i) : id(i) {} 84 85 uint32_t id; 86 }; 87 88 // Iterator over the strings in the pool. 89 class Iterator { 90 public: 91 Iterator(const StringPool*); 92 93 explicit operator bool() const; 94 Iterator& operator++(); 95 96 NullTermStringView StringView(); 97 Id StringId(); 98 99 private: 100 const StringPool* pool_ = nullptr; 101 uint32_t block_index_ = 0; 102 uint32_t block_offset_ = 0; 103 uint32_t large_strings_index_ = 0; 104 }; 105 106 StringPool(); 107 ~StringPool(); 108 109 // Allow std::move(). 110 StringPool(StringPool&&) noexcept; 111 StringPool& operator=(StringPool&&) noexcept; 112 113 // Disable implicit copy. 114 StringPool(const StringPool&) = delete; 115 StringPool& operator=(const StringPool&) = delete; 116 InternString(base::StringView str)117 Id InternString(base::StringView str) { 118 if (str.data() == nullptr) 119 return Id::Null(); 120 121 auto hash = str.Hash(); 122 123 // Perform a hashtable insertion with a null ID just to check if the string 124 // is already inserted. If it's not, overwrite 0 with the actual Id. 125 auto it_and_inserted = string_index_.Insert(hash, Id()); 126 Id* id = it_and_inserted.first; 127 if (!it_and_inserted.second) { 128 PERFETTO_DCHECK(Get(*id) == str); 129 return *id; 130 } 131 *id = InsertString(str, hash); 132 return *id; 133 } 134 GetId(base::StringView str)135 base::Optional<Id> GetId(base::StringView str) const { 136 if (str.data() == nullptr) 137 return Id::Null(); 138 139 auto hash = str.Hash(); 140 Id* id = string_index_.Find(hash); 141 if (id) { 142 PERFETTO_DCHECK(Get(*id) == str); 143 return *id; 144 } 145 return base::nullopt; 146 } 147 Get(Id id)148 NullTermStringView Get(Id id) const { 149 if (id.is_null()) 150 return NullTermStringView(); 151 if (id.is_large_string()) 152 return GetLargeString(id); 153 return GetFromBlockPtr(IdToPtr(id)); 154 } 155 CreateIterator()156 Iterator CreateIterator() const { return Iterator(this); } 157 size()158 size_t size() const { return string_index_.size(); } 159 160 private: 161 using StringHash = uint64_t; 162 163 struct Block { BlockBlock164 explicit Block(size_t size) 165 : mem_(base::PagedMemory::Allocate(size, 166 base::PagedMemory::kDontCommit)), 167 size_(size) {} 168 ~Block() = default; 169 170 // Allow std::move(). 171 Block(Block&&) noexcept = default; 172 Block& operator=(Block&&) = default; 173 174 // Disable implicit copy. 175 Block(const Block&) = delete; 176 Block& operator=(const Block&) = delete; 177 GetBlock178 uint8_t* Get(uint32_t offset) const { 179 return static_cast<uint8_t*>(mem_.Get()) + offset; 180 } 181 182 std::pair<bool /*success*/, uint32_t /*offset*/> TryInsert( 183 base::StringView str); 184 OffsetOfBlock185 uint32_t OffsetOf(const uint8_t* ptr) const { 186 PERFETTO_DCHECK(Get(0) < ptr && 187 ptr <= Get(static_cast<uint32_t>(size_ - 1))); 188 return static_cast<uint32_t>(ptr - Get(0)); 189 } 190 posBlock191 uint32_t pos() const { return pos_; } 192 193 private: 194 base::PagedMemory mem_; 195 uint32_t pos_ = 0; 196 size_t size_ = 0; 197 }; 198 199 friend class Iterator; 200 friend class StringPoolTest; 201 202 // StringPool IDs are 32-bit. If the MSB is 1, the remaining bits of the ID 203 // are an index into the |large_strings_| vector. Otherwise, the next 6 bits 204 // are the index of the Block in the pool, and the remaining 25 bits the 205 // offset of the encoded string inside the pool. 206 // 207 // [31] [30:25] [24:0] 208 // | | | 209 // | | +---- offset in block (or LSB of large string index). 210 // | +------------ block index (or MSB of large string index). 211 // +------------------- 1: large string, 0: string in a Block. 212 static constexpr size_t kNumBlockIndexBits = 6; 213 static constexpr size_t kNumBlockOffsetBits = 25; 214 215 static constexpr size_t kLargeStringFlagBitMask = 1u << 31; 216 static constexpr size_t kBlockOffsetBitMask = (1u << kNumBlockOffsetBits) - 1; 217 static constexpr size_t kBlockIndexBitMask = 218 0xffffffff & ~kLargeStringFlagBitMask & ~kBlockOffsetBitMask; 219 220 static constexpr size_t kBlockSizeBytes = kBlockOffsetBitMask + 1; // 32 MB 221 222 // If a string doesn't fit into the current block, we can either start a new 223 // block or insert the string into the |large_strings_| vector. To maximize 224 // the used proportion of each block's memory, we only start a new block if 225 // the string isn't very large. 226 static constexpr size_t kMinLargeStringSizeBytes = kBlockSizeBytes / 8; 227 228 // Number of bytes to reserve for size and null terminator. 229 // This is the upper limit on metadata size: 5 bytes for max uint32, 230 // plus 1 byte for null terminator. The actual size may be lower. 231 static constexpr uint8_t kMaxMetadataSize = 6; 232 233 // Inserts the string with the given hash into the pool and return its Id. 234 Id InsertString(base::StringView, uint64_t hash); 235 236 // Insert a large string into the pool and return its Id. 237 Id InsertLargeString(base::StringView, uint64_t hash); 238 239 // The returned pointer points to the start of the string metadata (i.e. the 240 // first byte of the size). IdToPtr(Id id)241 const uint8_t* IdToPtr(Id id) const { 242 // If the MSB is set, the ID represents an index into |large_strings_|, so 243 // shouldn't be converted into a block pointer. 244 PERFETTO_DCHECK(!id.is_large_string()); 245 246 size_t block_index = id.block_index(); 247 uint32_t block_offset = id.block_offset(); 248 249 PERFETTO_DCHECK(block_index < blocks_.size()); 250 PERFETTO_DCHECK(block_offset < blocks_[block_index].pos()); 251 252 return blocks_[block_index].Get(block_offset); 253 } 254 255 // |ptr| should point to the start of the string metadata (i.e. the first byte 256 // of the size). 257 // Returns pointer to the start of the string. ReadSize(const uint8_t * ptr,uint32_t * size)258 static const uint8_t* ReadSize(const uint8_t* ptr, uint32_t* size) { 259 uint64_t value = 0; 260 const uint8_t* str_ptr = protozero::proto_utils::ParseVarInt( 261 ptr, ptr + kMaxMetadataSize, &value); 262 PERFETTO_DCHECK(str_ptr != ptr); 263 PERFETTO_DCHECK(value < std::numeric_limits<uint32_t>::max()); 264 *size = static_cast<uint32_t>(value); 265 return str_ptr; 266 } 267 268 // |ptr| should point to the start of the string metadata (i.e. the first byte 269 // of the size). GetFromBlockPtr(const uint8_t * ptr)270 static NullTermStringView GetFromBlockPtr(const uint8_t* ptr) { 271 uint32_t size = 0; 272 const uint8_t* str_ptr = ReadSize(ptr, &size); 273 return NullTermStringView(reinterpret_cast<const char*>(str_ptr), size); 274 } 275 276 // Lookup a string in the |large_strings_| vector. |id| should have the MSB 277 // set. GetLargeString(Id id)278 NullTermStringView GetLargeString(Id id) const { 279 PERFETTO_DCHECK(id.is_large_string()); 280 size_t index = id.large_string_index(); 281 PERFETTO_DCHECK(index < large_strings_.size()); 282 const std::string* str = large_strings_[index].get(); 283 return NullTermStringView(str->c_str(), str->size()); 284 } 285 286 // The actual memory storing the strings. 287 std::vector<Block> blocks_; 288 289 // Any string that is too large to fit into a Block is stored separately 290 // (inside a unique_ptr to ensure any references to it remain valid even if 291 // |large_strings_| is resized). 292 std::vector<std::unique_ptr<std::string>> large_strings_; 293 294 // Maps hashes of strings to the Id in the string pool. 295 base::FlatHashMap<StringHash, 296 Id, 297 base::AlreadyHashed<StringHash>, 298 base::LinearProbe, 299 /*AppendOnly=*/true> 300 string_index_{/*initial_capacity=*/1024u * 1024u}; 301 }; 302 303 } // namespace trace_processor 304 } // namespace perfetto 305 306 template <> 307 struct std::hash<::perfetto::trace_processor::StringPool::Id> { 308 using argument_type = ::perfetto::trace_processor::StringPool::Id; 309 using result_type = size_t; 310 311 result_type operator()(const argument_type& r) const { 312 return std::hash<uint32_t>{}(r.raw_id()); 313 } 314 }; 315 316 #endif // SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 317