• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_
18 #define SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_
19 
20 #include <stddef.h>
21 #include <stdint.h>
22 
23 #include <limits>
24 #include <vector>
25 
26 #include "perfetto/ext/base/flat_hash_map.h"
27 #include "perfetto/ext/base/hash.h"
28 #include "perfetto/ext/base/optional.h"
29 #include "perfetto/ext/base/paged_memory.h"
30 #include "perfetto/protozero/proto_utils.h"
31 #include "src/trace_processor/containers/null_term_string_view.h"
32 
33 namespace perfetto {
34 namespace trace_processor {
35 
36 // Interns strings in a string pool and hands out compact StringIds which can
37 // be used to retrieve the string in O(1).
38 class StringPool {
39  public:
40   struct Id {
41     Id() = default;
42 
43     bool operator==(const Id& other) const { return other.id == id; }
44     bool operator!=(const Id& other) const { return !(other == *this); }
45     bool operator<(const Id& other) const { return id < other.id; }
46 
is_nullId47     bool is_null() const { return id == 0u; }
48 
is_large_stringId49     bool is_large_string() const { return id & kLargeStringFlagBitMask; }
50 
block_offsetId51     uint32_t block_offset() const { return id & kBlockOffsetBitMask; }
52 
block_indexId53     uint32_t block_index() const {
54       return (id & kBlockIndexBitMask) >> kNumBlockOffsetBits;
55     }
56 
large_string_indexId57     uint32_t large_string_index() const {
58       PERFETTO_DCHECK(is_large_string());
59       return id & ~kLargeStringFlagBitMask;
60     }
61 
raw_idId62     uint32_t raw_id() const { return id; }
63 
LargeStringId64     static Id LargeString(size_t index) {
65       PERFETTO_DCHECK(index <= static_cast<uint32_t>(index));
66       PERFETTO_DCHECK(!(index & kLargeStringFlagBitMask));
67       return Id(kLargeStringFlagBitMask | static_cast<uint32_t>(index));
68     }
69 
BlockStringId70     static Id BlockString(size_t index, uint32_t offset) {
71       PERFETTO_DCHECK(index < (1u << (kNumBlockIndexBits + 1)));
72       PERFETTO_DCHECK(offset < (1u << (kNumBlockOffsetBits + 1)));
73       return Id(~kLargeStringFlagBitMask &
74                 (static_cast<uint32_t>(index << kNumBlockOffsetBits) |
75                  (offset & kBlockOffsetBitMask)));
76     }
77 
RawId78     static constexpr Id Raw(uint32_t raw) { return Id(raw); }
79 
NullId80     static constexpr Id Null() { return Id(0u); }
81 
82    private:
IdId83     constexpr Id(uint32_t i) : id(i) {}
84 
85     uint32_t id;
86   };
87 
88   // Iterator over the strings in the pool.
89   class Iterator {
90    public:
91     Iterator(const StringPool*);
92 
93     explicit operator bool() const;
94     Iterator& operator++();
95 
96     NullTermStringView StringView();
97     Id StringId();
98 
99    private:
100     const StringPool* pool_ = nullptr;
101     uint32_t block_index_ = 0;
102     uint32_t block_offset_ = 0;
103     uint32_t large_strings_index_ = 0;
104   };
105 
106   StringPool();
107   ~StringPool();
108 
109   // Allow std::move().
110   StringPool(StringPool&&) noexcept;
111   StringPool& operator=(StringPool&&) noexcept;
112 
113   // Disable implicit copy.
114   StringPool(const StringPool&) = delete;
115   StringPool& operator=(const StringPool&) = delete;
116 
InternString(base::StringView str)117   Id InternString(base::StringView str) {
118     if (str.data() == nullptr)
119       return Id::Null();
120 
121     auto hash = str.Hash();
122 
123     // Perform a hashtable insertion with a null ID just to check if the string
124     // is already inserted. If it's not, overwrite 0 with the actual Id.
125     auto it_and_inserted = string_index_.Insert(hash, Id());
126     Id* id = it_and_inserted.first;
127     if (!it_and_inserted.second) {
128       PERFETTO_DCHECK(Get(*id) == str);
129       return *id;
130     }
131     *id = InsertString(str, hash);
132     return *id;
133   }
134 
GetId(base::StringView str)135   base::Optional<Id> GetId(base::StringView str) const {
136     if (str.data() == nullptr)
137       return Id::Null();
138 
139     auto hash = str.Hash();
140     Id* id = string_index_.Find(hash);
141     if (id) {
142       PERFETTO_DCHECK(Get(*id) == str);
143       return *id;
144     }
145     return base::nullopt;
146   }
147 
Get(Id id)148   NullTermStringView Get(Id id) const {
149     if (id.is_null())
150       return NullTermStringView();
151     if (id.is_large_string())
152       return GetLargeString(id);
153     return GetFromBlockPtr(IdToPtr(id));
154   }
155 
CreateIterator()156   Iterator CreateIterator() const { return Iterator(this); }
157 
size()158   size_t size() const { return string_index_.size(); }
159 
160  private:
161   using StringHash = uint64_t;
162 
163   struct Block {
BlockBlock164     explicit Block(size_t size)
165         : mem_(base::PagedMemory::Allocate(size,
166                                            base::PagedMemory::kDontCommit)),
167           size_(size) {}
168     ~Block() = default;
169 
170     // Allow std::move().
171     Block(Block&&) noexcept = default;
172     Block& operator=(Block&&) = default;
173 
174     // Disable implicit copy.
175     Block(const Block&) = delete;
176     Block& operator=(const Block&) = delete;
177 
GetBlock178     uint8_t* Get(uint32_t offset) const {
179       return static_cast<uint8_t*>(mem_.Get()) + offset;
180     }
181 
182     std::pair<bool /*success*/, uint32_t /*offset*/> TryInsert(
183         base::StringView str);
184 
OffsetOfBlock185     uint32_t OffsetOf(const uint8_t* ptr) const {
186       PERFETTO_DCHECK(Get(0) < ptr &&
187                       ptr <= Get(static_cast<uint32_t>(size_ - 1)));
188       return static_cast<uint32_t>(ptr - Get(0));
189     }
190 
posBlock191     uint32_t pos() const { return pos_; }
192 
193    private:
194     base::PagedMemory mem_;
195     uint32_t pos_ = 0;
196     size_t size_ = 0;
197   };
198 
199   friend class Iterator;
200   friend class StringPoolTest;
201 
202   // StringPool IDs are 32-bit. If the MSB is 1, the remaining bits of the ID
203   // are an index into the |large_strings_| vector. Otherwise, the next 6 bits
204   // are the index of the Block in the pool, and the remaining 25 bits the
205   // offset of the encoded string inside the pool.
206   //
207   // [31] [30:25] [24:0]
208   //  |      |       |
209   //  |      |       +---- offset in block (or LSB of large string index).
210   //  |      +------------ block index (or MSB of large string index).
211   //  +------------------- 1: large string, 0: string in a Block.
212   static constexpr size_t kNumBlockIndexBits = 6;
213   static constexpr size_t kNumBlockOffsetBits = 25;
214 
215   static constexpr size_t kLargeStringFlagBitMask = 1u << 31;
216   static constexpr size_t kBlockOffsetBitMask = (1u << kNumBlockOffsetBits) - 1;
217   static constexpr size_t kBlockIndexBitMask =
218       0xffffffff & ~kLargeStringFlagBitMask & ~kBlockOffsetBitMask;
219 
220   static constexpr size_t kBlockSizeBytes = kBlockOffsetBitMask + 1;  // 32 MB
221 
222   // If a string doesn't fit into the current block, we can either start a new
223   // block or insert the string into the |large_strings_| vector. To maximize
224   // the used proportion of each block's memory, we only start a new block if
225   // the string isn't very large.
226   static constexpr size_t kMinLargeStringSizeBytes = kBlockSizeBytes / 8;
227 
228   // Number of bytes to reserve for size and null terminator.
229   // This is the upper limit on metadata size: 5 bytes for max uint32,
230   // plus 1 byte for null terminator. The actual size may be lower.
231   static constexpr uint8_t kMaxMetadataSize = 6;
232 
233   // Inserts the string with the given hash into the pool and return its Id.
234   Id InsertString(base::StringView, uint64_t hash);
235 
236   // Insert a large string into the pool and return its Id.
237   Id InsertLargeString(base::StringView, uint64_t hash);
238 
239   // The returned pointer points to the start of the string metadata (i.e. the
240   // first byte of the size).
IdToPtr(Id id)241   const uint8_t* IdToPtr(Id id) const {
242     // If the MSB is set, the ID represents an index into |large_strings_|, so
243     // shouldn't be converted into a block pointer.
244     PERFETTO_DCHECK(!id.is_large_string());
245 
246     size_t block_index = id.block_index();
247     uint32_t block_offset = id.block_offset();
248 
249     PERFETTO_DCHECK(block_index < blocks_.size());
250     PERFETTO_DCHECK(block_offset < blocks_[block_index].pos());
251 
252     return blocks_[block_index].Get(block_offset);
253   }
254 
255   // |ptr| should point to the start of the string metadata (i.e. the first byte
256   // of the size).
257   // Returns pointer to the start of the string.
ReadSize(const uint8_t * ptr,uint32_t * size)258   static const uint8_t* ReadSize(const uint8_t* ptr, uint32_t* size) {
259     uint64_t value = 0;
260     const uint8_t* str_ptr = protozero::proto_utils::ParseVarInt(
261         ptr, ptr + kMaxMetadataSize, &value);
262     PERFETTO_DCHECK(str_ptr != ptr);
263     PERFETTO_DCHECK(value < std::numeric_limits<uint32_t>::max());
264     *size = static_cast<uint32_t>(value);
265     return str_ptr;
266   }
267 
268   // |ptr| should point to the start of the string metadata (i.e. the first byte
269   // of the size).
GetFromBlockPtr(const uint8_t * ptr)270   static NullTermStringView GetFromBlockPtr(const uint8_t* ptr) {
271     uint32_t size = 0;
272     const uint8_t* str_ptr = ReadSize(ptr, &size);
273     return NullTermStringView(reinterpret_cast<const char*>(str_ptr), size);
274   }
275 
276   // Lookup a string in the |large_strings_| vector. |id| should have the MSB
277   // set.
GetLargeString(Id id)278   NullTermStringView GetLargeString(Id id) const {
279     PERFETTO_DCHECK(id.is_large_string());
280     size_t index = id.large_string_index();
281     PERFETTO_DCHECK(index < large_strings_.size());
282     const std::string* str = large_strings_[index].get();
283     return NullTermStringView(str->c_str(), str->size());
284   }
285 
286   // The actual memory storing the strings.
287   std::vector<Block> blocks_;
288 
289   // Any string that is too large to fit into a Block is stored separately
290   // (inside a unique_ptr to ensure any references to it remain valid even if
291   // |large_strings_| is resized).
292   std::vector<std::unique_ptr<std::string>> large_strings_;
293 
294   // Maps hashes of strings to the Id in the string pool.
295   base::FlatHashMap<StringHash,
296                     Id,
297                     base::AlreadyHashed<StringHash>,
298                     base::LinearProbe,
299                     /*AppendOnly=*/true>
300       string_index_{/*initial_capacity=*/1024u * 1024u};
301 };
302 
303 }  // namespace trace_processor
304 }  // namespace perfetto
305 
306 template <>
307 struct std::hash<::perfetto::trace_processor::StringPool::Id> {
308   using argument_type = ::perfetto::trace_processor::StringPool::Id;
309   using result_type = size_t;
310 
311   result_type operator()(const argument_type& r) const {
312     return std::hash<uint32_t>{}(r.raw_id());
313   }
314 };
315 
316 #endif  // SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_
317