• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "gn/string_atom.h"
6 
7 #include <array>
8 #include <memory>
9 #include <mutex>
10 #include <set>
11 #include <string>
12 #include <vector>
13 
14 #include "gn/hash_table_base.h"
15 
16 namespace {
17 
18 // Implementation note:
19 //
20 // StringAtomSet implements the global shared state, which is:
21 //
22 //    - a group of std::string instances with a persistent address, allocated
23 //      through a fast slab allocator.
24 //
25 //    - a set of string pointers, corresponding to the known strings in the
26 //      group.
27 //
28 //    - a mutex to ensure correct thread-safety.
29 //
30 //    - a find() method that takes an std::string_view argument, and uses it
31 //      to find a matching entry in the string tree. If none is available,
32 //      a new std::string is allocated and its address inserted into the tree
33 //      before being returned.
34 //
35 // Because the mutex is a large bottleneck, each thread implements
36 // its own local string pointer cache, and will only call StringAtomSet::find()
37 // in case of a lookup miss. This is critical for good performance.
38 //
39 
40 static const std::string kEmptyString;
41 
42 using KeyType = const std::string*;
43 
44 // A HashTableBase node type that stores one hash value and one string pointer.
45 struct KeyNode {
46   size_t hash;
47   KeyType key;
48 
49   // The following methods are required by HashTableBase<>
is_valid__anoneb4c62090111::KeyNode50   bool is_valid() const { return !is_null(); }
is_null__anoneb4c62090111::KeyNode51   bool is_null() const { return !key; }
hash_value__anoneb4c62090111::KeyNode52   size_t hash_value() const { return hash; }
53 
54   // No deletion support means faster lookup code.
is_tombstone__anoneb4c62090111::KeyNode55   static constexpr bool is_tombstone() { return false; }
56 };
57 
58 // This is a trivial hash table of string pointers, using open addressing.
59 // It is faster in practice than using a standard container or even a
60 // base::flat_set<>.
61 //
62 // Usage is the following:
63 //
64 //     1) Compute string hash value.
65 //
66 //     2) Call Lookup() with the hash value and the string_view key,
67 //        this always returns a mutable Node* pointer, say |node|.
68 //
69 //     3) If |node->key| is not nullptr, this is the key to use.
70 //        Otherwise, allocate a new string with persistent address,
71 //        and call Insert(), passing the |node|, |hash| and new string
72 //        address as arguments.
73 //
74 struct KeySet : public HashTableBase<KeyNode> {
75   using BaseType = HashTableBase<KeyNode>;
76   using Node = BaseType::Node;
77 
78   // Compute hash for |str|. Replace with faster hash function if available.
Hash__anoneb4c62090111::KeySet79   static size_t Hash(std::string_view str) {
80     return std::hash<std::string_view>()(str);
81   }
82 
83   // Lookup for |str| with specific |hash| value.
84   // Return a Node pointer. If the key was found, |node.key| is its value.
85   // Otherwise, the caller should create a new key value, then call Insert()
86   // below.
87   //
88   // NOTE: Even though this method is const, because it doesn't modify the
89   //       state of the KeySet, it returns a *mutable* node pointer, to be
90   //       passed to Insert() in case of a miss.
91   //
Lookup__anoneb4c62090111::KeySet92   Node* Lookup(size_t hash, std::string_view str) const {
93     return BaseType::NodeLookup(hash, [hash, &str](const Node* node) {
94       // NOTE: Only is_valid() node pointers are passed to this function
95       // which means key won't be null, and there are no tombstone values
96       // in this derivation of HashTableBase<>.
97       return node->hash == hash && *node->key == str;
98     });
99   }
100 
Insert__anoneb4c62090111::KeySet101   void Insert(Node* node, size_t hash, KeyType key) {
102     node->hash = hash;
103     node->key = key;
104     BaseType::UpdateAfterInsert();
105   }
106 };
107 
108 class StringAtomSet {
109  public:
StringAtomSet()110   StringAtomSet() {
111     // Ensure kEmptyString is in our set while not being allocated
112     // from a slab. The end result is that find("") should always
113     // return this address.
114     //
115     // This allows the StringAtom() default initializer to use the same
116     // address directly, avoiding a table lookup.
117     //
118     size_t hash = set_.Hash("");
119     auto* node = set_.Lookup(hash, "");
120     set_.Insert(node, hash, &kEmptyString);
121   }
122 
123   // Find the unique constant string pointer for |key|.
find(std::string_view key)124   const std::string* find(std::string_view key) {
125     std::lock_guard<std::mutex> lock(mutex_);
126     size_t hash = set_.Hash(key);
127     auto* node = set_.Lookup(hash, key);
128     if (node->key)
129       return node->key;
130 
131     // Allocate new string, insert its address in the set.
132     if (slab_index_ >= kStringsPerSlab) {
133       slabs_.push_back(new Slab());
134       slab_index_ = 0;
135     }
136     std::string* result = slabs_.back()->init(slab_index_++, key);
137     set_.Insert(node, hash, result);
138     return result;
139   }
140 
141  private:
142   static constexpr unsigned int kStringsPerSlab = 128;
143 
144   // Each slab is allocated independently, has a fixed address and stores
145   // kStringsPerSlab items of type StringStorage. The latter has the same
146   // size and alignment as std::string, but doesn't need default-initialization.
147   // This is used to slightly speed up Slab allocation and string
148   // initialization. The drawback is that on process exit, allocated strings
149   // are leaked (but GN already leaks several hundred MiBs of memory anyway).
150 
151   // A C++ union that can store an std::string but without default
152   // initialization and destruction.
153   union StringStorage {
StringStorage()154     StringStorage() {}
~StringStorage()155     ~StringStorage() {}
156     char dummy;
157     std::string str;
158   };
159 
160   // A fixed array of StringStorage items. Can be allocated cheaply.
161   class Slab {
162    public:
163     // Init the n-th string in the slab with |str|.
164     // Return its location as well.
init(size_t index,std::string_view str)165     std::string* init(size_t index, std::string_view str) {
166       std::string* result = &items_[index].str;
167       new (result) std::string(str);
168       return result;
169     }
170 
171    private:
172     StringStorage items_[kStringsPerSlab];
173   };
174 
175   std::mutex mutex_;
176   KeySet set_;
177   std::vector<Slab*> slabs_;
178   unsigned int slab_index_ = kStringsPerSlab;
179 };
180 
GetStringAtomSet()181 StringAtomSet& GetStringAtomSet() {
182   static StringAtomSet s_string_atom_set;
183   return s_string_atom_set;
184 }
185 
186 // Each thread maintains its own ThreadLocalCache to perform fast lookups
187 // without taking any mutex in most cases.
188 class ThreadLocalCache {
189  public:
190   // Find the unique constant string pointer for |key| in this cache,
191   // and fallback to the global one in case of a miss.
find(std::string_view key)192   KeyType find(std::string_view key) {
193     size_t hash = local_set_.Hash(key);
194     auto* node = local_set_.Lookup(hash, key);
195     if (node->key)
196       return node->key;
197 
198     KeyType result = GetStringAtomSet().find(key);
199     local_set_.Insert(node, hash, result);
200     return result;
201   }
202 
203  private:
204   KeySet local_set_;
205 };
206 
207 #if !defined(OS_ZOS)
208 thread_local ThreadLocalCache s_local_cache;
209 #else
210 // TODO(gabylb) - zos: thread_local not yet supported, use zoslib's impl'n:
211 static ThreadLocalCache s_tlc;
212 __tlssim<ThreadLocalCache*> __g_s_local_cache_impl(&s_tlc);
213 #define s_local_cache (*__g_s_local_cache_impl.access())
214 #endif
215 
216 }  // namespace
217 
StringAtom()218 StringAtom::StringAtom() : value_(kEmptyString) {}
219 
StringAtom(std::string_view str)220 StringAtom::StringAtom(std::string_view str) noexcept
221 #ifndef OS_ZOS
222     : value_(*s_local_cache.find(str)) {}
223 #else
224     : value_(*s_local_cache->find(str)) {}
225 #endif
226