1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_STRING_POOL_H_ 18 #define SRC_TRACE_PROCESSOR_STRING_POOL_H_ 19 20 #include "perfetto/base/paged_memory.h" 21 #include "src/trace_processor/null_term_string_view.h" 22 23 #include <unordered_map> 24 #include <vector> 25 26 namespace perfetto { 27 namespace trace_processor { 28 29 // Interns strings in a string pool and hands out compact StringIds which can 30 // be used to retrieve the string in O(1). 31 // On 64-bit platforms, the string pool is implemented as a mmaped buffer 32 // of 4GB with the id being equal ot the offset into this buffer of the string. 33 // On 32-bit platforms instead, the implementation allocates 32MB blocks of 34 // mmaped memory with the pointer being directly converted to the id. 35 class StringPool { 36 public: 37 using Id = uint32_t; 38 39 // Iterator over the strings in the pool. 40 class Iterator { 41 public: 42 Iterator(const StringPool*); 43 44 explicit operator bool() const; 45 Iterator& operator++(); 46 47 NullTermStringView StringView(); 48 Id StringId(); 49 50 private: 51 const StringPool* pool_ = nullptr; 52 uint32_t block_id_ = 0; 53 uint32_t block_offset_ = 0; 54 }; 55 56 StringPool(); 57 ~StringPool(); 58 59 // Allow std::move(). 60 StringPool(StringPool&&) noexcept; 61 StringPool& operator=(StringPool&&); 62 63 // Disable implicit copy. 64 StringPool(const StringPool&) = delete; 65 StringPool& operator=(const StringPool&) = delete; 66 InternString(base::StringView str)67 Id InternString(base::StringView str) { 68 if (str.data() == nullptr) 69 return 0; 70 71 auto hash = str.Hash(); 72 auto id_it = string_index_.find(hash); 73 if (id_it != string_index_.end()) { 74 PERFETTO_DCHECK(Get(id_it->second) == str); 75 return id_it->second; 76 } 77 return InsertString(str, hash); 78 } 79 Get(Id id)80 NullTermStringView Get(Id id) const { 81 if (id == 0) 82 return NullTermStringView(); 83 return GetFromPtr(IdToPtr(id)); 84 } 85 CreateIterator()86 Iterator CreateIterator() const { return Iterator(this); } 87 size()88 size_t size() const { return string_index_.size(); } 89 90 private: 91 using StringHash = uint64_t; 92 struct Block { BlockBlock93 Block() : mem_(base::PagedMemory::Allocate(kBlockSize)) {} 94 ~Block() = default; 95 96 // Allow std::move(). 97 Block(Block&&) noexcept = default; 98 Block& operator=(Block&&) = default; 99 100 // Disable implicit copy. 101 Block(const Block&) = delete; 102 Block& operator=(const Block&) = delete; 103 GetBlock104 uint8_t* Get(uint32_t offset) const { 105 return static_cast<uint8_t*>(mem_.Get()) + offset; 106 } 107 108 uint8_t* TryInsert(base::StringView str); 109 OffsetOfBlock110 uint32_t OffsetOf(uint8_t* ptr) const { 111 PERFETTO_DCHECK(Get(0) < ptr && ptr < Get(kBlockSize - 1)); 112 return static_cast<uint32_t>(ptr - Get(0)); 113 } 114 posBlock115 uint32_t pos() const { return pos_; } 116 117 private: 118 static constexpr size_t kBlockSize = 119 sizeof(void*) == 8 120 ? static_cast<size_t>(4ull * 1024ull * 1024ull * 1024ull) /* 4GB */ 121 : 32ull * 1024ull * 1024ull /* 32MB */; 122 123 base::PagedMemory mem_; 124 uint32_t pos_ = 0; 125 }; 126 127 friend class Iterator; 128 129 // Number of bytes to reserve for size and null terminator. 130 static constexpr uint8_t kMetadataSize = 3; 131 132 // Inserts the string with the given hash into the pool 133 Id InsertString(base::StringView, uint64_t hash); 134 135 // |ptr| should point to the start of the string metadata (i.e. the first byte 136 // of the size). PtrToId(uint8_t * ptr)137 Id PtrToId(uint8_t* ptr) const { 138 // For a 64 bit architecture, the id is the offset of the pointer inside 139 // the one and only 4GB block. 140 if (sizeof(void*) == 8) { 141 PERFETTO_DCHECK(blocks_.size() == 1); 142 return blocks_.back().OffsetOf(ptr); 143 } 144 145 // On 32 bit architectures, the size of the pointer is 32-bit so we simply 146 // use the pointer itself as the id. 147 // Double cast needed because, on 64 archs, the compiler complains that we 148 // are losing information. 149 return static_cast<Id>(reinterpret_cast<uintptr_t>(ptr)); 150 } 151 152 // THe returned pointer points to the start of the string metadata (i.e. the 153 // first byte of the size). IdToPtr(Id id)154 uint8_t* IdToPtr(Id id) const { 155 // For a 64 bit architecture, the pointer is simply the found by taking 156 // the base of the 4GB block and adding the offset given by |id|. 157 if (sizeof(void*) == 8) { 158 PERFETTO_DCHECK(blocks_.size() == 1); 159 return blocks_.back().Get(id); 160 } 161 // On a 32 bit architecture, the pointer is the same as the id. 162 return reinterpret_cast<uint8_t*>(id); 163 } 164 165 // |ptr| should point to the start of the string metadata (i.e. the first byte 166 // of the size). GetSize(uint8_t * ptr)167 static uint16_t GetSize(uint8_t* ptr) { 168 // The size is simply memcpyed into the byte buffer when writing. 169 uint16_t size; 170 memcpy(&size, ptr, sizeof(uint16_t)); 171 return size; 172 } 173 174 // |ptr| should point to the start of the string metadata (i.e. the first byte 175 // of the size). GetFromPtr(uint8_t * ptr)176 static NullTermStringView GetFromPtr(uint8_t* ptr) { 177 // With the first two bytes being used for the size, the string starts from 178 // byte 3. 179 return NullTermStringView(reinterpret_cast<char*>(&ptr[2]), GetSize(ptr)); 180 } 181 182 // The actual memory storing the strings. 183 std::vector<Block> blocks_; 184 185 // Maps hashes of strings to the Id in the string pool. 186 // TODO(lalitm): At some point we should benchmark just using a static 187 // hashtable of 1M elements, we can afford paying a fixed 8MB here 188 std::unordered_map<StringHash, Id> string_index_; 189 }; 190 191 } // namespace trace_processor 192 } // namespace perfetto 193 194 #endif // SRC_TRACE_PROCESSOR_STRING_POOL_H_ 195