1 // Copyright (c) 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "gn/string_atom.h"
6
7 #include <array>
8 #include <memory>
9 #include <mutex>
10 #include <set>
11 #include <string>
12 #include <vector>
13
14 #include "gn/hash_table_base.h"
15
16 namespace {
17
18 // Implementation note:
19 //
20 // StringAtomSet implements the global shared state, which is:
21 //
22 // - a group of std::string instances with a persistent address, allocated
23 // through a fast slab allocator.
24 //
25 // - a set of string pointers, corresponding to the known strings in the
26 // group.
27 //
28 // - a mutex to ensure correct thread-safety.
29 //
30 // - a find() method that takes an std::string_view argument, and uses it
31 // to find a matching entry in the string tree. If none is available,
32 // a new std::string is allocated and its address inserted into the tree
33 // before being returned.
34 //
35 // Because the mutex is a large bottleneck, each thread implements
36 // its own local string pointer cache, and will only call StringAtomSet::find()
37 // in case of a lookup miss. This is critical for good performance.
38 //
39
40 static const std::string kEmptyString;
41
42 using KeyType = const std::string*;
43
44 // A HashTableBase node type that stores one hash value and one string pointer.
45 struct KeyNode {
46 size_t hash;
47 KeyType key;
48
49 // The following methods are required by HashTableBase<>
is_valid__anoneb4c62090111::KeyNode50 bool is_valid() const { return !is_null(); }
is_null__anoneb4c62090111::KeyNode51 bool is_null() const { return !key; }
hash_value__anoneb4c62090111::KeyNode52 size_t hash_value() const { return hash; }
53
54 // No deletion support means faster lookup code.
is_tombstone__anoneb4c62090111::KeyNode55 static constexpr bool is_tombstone() { return false; }
56 };
57
58 // This is a trivial hash table of string pointers, using open addressing.
59 // It is faster in practice than using a standard container or even a
60 // base::flat_set<>.
61 //
62 // Usage is the following:
63 //
64 // 1) Compute string hash value.
65 //
66 // 2) Call Lookup() with the hash value and the string_view key,
67 // this always returns a mutable Node* pointer, say |node|.
68 //
69 // 3) If |node->key| is not nullptr, this is the key to use.
70 // Otherwise, allocate a new string with persistent address,
71 // and call Insert(), passing the |node|, |hash| and new string
72 // address as arguments.
73 //
74 struct KeySet : public HashTableBase<KeyNode> {
75 using BaseType = HashTableBase<KeyNode>;
76 using Node = BaseType::Node;
77
78 // Compute hash for |str|. Replace with faster hash function if available.
Hash__anoneb4c62090111::KeySet79 static size_t Hash(std::string_view str) {
80 return std::hash<std::string_view>()(str);
81 }
82
83 // Lookup for |str| with specific |hash| value.
84 // Return a Node pointer. If the key was found, |node.key| is its value.
85 // Otherwise, the caller should create a new key value, then call Insert()
86 // below.
87 //
88 // NOTE: Even though this method is const, because it doesn't modify the
89 // state of the KeySet, it returns a *mutable* node pointer, to be
90 // passed to Insert() in case of a miss.
91 //
Lookup__anoneb4c62090111::KeySet92 Node* Lookup(size_t hash, std::string_view str) const {
93 return BaseType::NodeLookup(hash, [hash, &str](const Node* node) {
94 // NOTE: Only is_valid() node pointers are passed to this function
95 // which means key won't be null, and there are no tombstone values
96 // in this derivation of HashTableBase<>.
97 return node->hash == hash && *node->key == str;
98 });
99 }
100
Insert__anoneb4c62090111::KeySet101 void Insert(Node* node, size_t hash, KeyType key) {
102 node->hash = hash;
103 node->key = key;
104 BaseType::UpdateAfterInsert();
105 }
106 };
107
108 class StringAtomSet {
109 public:
StringAtomSet()110 StringAtomSet() {
111 // Ensure kEmptyString is in our set while not being allocated
112 // from a slab. The end result is that find("") should always
113 // return this address.
114 //
115 // This allows the StringAtom() default initializer to use the same
116 // address directly, avoiding a table lookup.
117 //
118 size_t hash = set_.Hash("");
119 auto* node = set_.Lookup(hash, "");
120 set_.Insert(node, hash, &kEmptyString);
121 }
122
123 // Find the unique constant string pointer for |key|.
find(std::string_view key)124 const std::string* find(std::string_view key) {
125 std::lock_guard<std::mutex> lock(mutex_);
126 size_t hash = set_.Hash(key);
127 auto* node = set_.Lookup(hash, key);
128 if (node->key)
129 return node->key;
130
131 // Allocate new string, insert its address in the set.
132 if (slab_index_ >= kStringsPerSlab) {
133 slabs_.push_back(new Slab());
134 slab_index_ = 0;
135 }
136 std::string* result = slabs_.back()->init(slab_index_++, key);
137 set_.Insert(node, hash, result);
138 return result;
139 }
140
141 private:
142 static constexpr unsigned int kStringsPerSlab = 128;
143
144 // Each slab is allocated independently, has a fixed address and stores
145 // kStringsPerSlab items of type StringStorage. The latter has the same
146 // size and alignment as std::string, but doesn't need default-initialization.
147 // This is used to slightly speed up Slab allocation and string
148 // initialization. The drawback is that on process exit, allocated strings
149 // are leaked (but GN already leaks several hundred MiBs of memory anyway).
150
151 // A C++ union that can store an std::string but without default
152 // initialization and destruction.
153 union StringStorage {
StringStorage()154 StringStorage() {}
~StringStorage()155 ~StringStorage() {}
156 char dummy;
157 std::string str;
158 };
159
160 // A fixed array of StringStorage items. Can be allocated cheaply.
161 class Slab {
162 public:
163 // Init the n-th string in the slab with |str|.
164 // Return its location as well.
init(size_t index,std::string_view str)165 std::string* init(size_t index, std::string_view str) {
166 std::string* result = &items_[index].str;
167 new (result) std::string(str);
168 return result;
169 }
170
171 private:
172 StringStorage items_[kStringsPerSlab];
173 };
174
175 std::mutex mutex_;
176 KeySet set_;
177 std::vector<Slab*> slabs_;
178 unsigned int slab_index_ = kStringsPerSlab;
179 };
180
GetStringAtomSet()181 StringAtomSet& GetStringAtomSet() {
182 static StringAtomSet s_string_atom_set;
183 return s_string_atom_set;
184 }
185
186 // Each thread maintains its own ThreadLocalCache to perform fast lookups
187 // without taking any mutex in most cases.
188 class ThreadLocalCache {
189 public:
190 // Find the unique constant string pointer for |key| in this cache,
191 // and fallback to the global one in case of a miss.
find(std::string_view key)192 KeyType find(std::string_view key) {
193 size_t hash = local_set_.Hash(key);
194 auto* node = local_set_.Lookup(hash, key);
195 if (node->key)
196 return node->key;
197
198 KeyType result = GetStringAtomSet().find(key);
199 local_set_.Insert(node, hash, result);
200 return result;
201 }
202
203 private:
204 KeySet local_set_;
205 };
206
207 #if !defined(OS_ZOS)
208 thread_local ThreadLocalCache s_local_cache;
209 #else
210 // TODO(gabylb) - zos: thread_local not yet supported, use zoslib's impl'n:
211 static ThreadLocalCache s_tlc;
212 __tlssim<ThreadLocalCache*> __g_s_local_cache_impl(&s_tlc);
213 #define s_local_cache (*__g_s_local_cache_impl.access())
214 #endif
215
216 } // namespace
217
StringAtom()218 StringAtom::StringAtom() : value_(kEmptyString) {}
219
StringAtom(std::string_view str)220 StringAtom::StringAtom(std::string_view str) noexcept
221 #ifndef OS_ZOS
222 : value_(*s_local_cache.find(str)) {}
223 #else
224 : value_(*s_local_cache->find(str)) {}
225 #endif
226