• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "gn/string_atom.h"
6 
7 #include <array>
8 #include <memory>
9 #include <mutex>
10 #include <set>
11 #include <string>
12 #include <unordered_set>
13 #include <vector>
14 
15 #include "base/containers/flat_set.h"
16 
17 namespace {
18 
19 // Implementation note:
20 //
21 // StringAtomSet implements the global shared state, which is:
22 //
23 //    - a group of std::string instances with a persistent address, allocated
24 //      through a fast slab allocator.
25 //
26 //    - a set of string pointers, corresponding to the known strings in the
27 //      group.
28 //
29 //    - a mutex to ensure correct thread-safety.
30 //
31 //    - a find() method that takes an std::string_view argument, and uses it
32 //      to find a matching entry in the string tree. If none is available,
33 //      a new std::string is allocated and its address inserted into the tree
34 //      before being returned.
35 //
36 // Because the mutex is a large bottleneck, each thread implements
37 // its own local string pointer cache, and will only call StringAtomSet::find()
38 // in case of a lookup miss. This is critical for good performance.
39 //
40 
41 static const std::string kEmptyString;
42 
43 using KeyType = const std::string*;
44 
45 // This is a trivial hash table of string pointers, using open addressing.
46 // It is faster in practice than using a standard container or even a
47 // base::flat_set<>.
48 //
49 // Usage is the following:
50 //
51 //     1) Compute string hash value.
52 //
53 //     2) Call Lookup() with the hash value and the string_view key,
54 //        this always returns a mutable Node* pointer, say |node|.
55 //
56 //     3) If |node->key| is not nullptr, this is the key to use.
57 //        Otherwise, allocate a new string with persistent address,
58 //        and call Insert(), passing the |node|, |hash| and new string
59 //        address as arguments.
60 //
61 struct KeySet {
62   struct Node {
63     size_t hash = 0;
64     KeyType key = nullptr;
65   };
66 
67   // Compute hash for |str|. Replace with faster hash function if available.
Hash__anon30f874f30111::KeySet68   static size_t Hash(std::string_view str) {
69     return std::hash<std::string_view>()(str);
70   }
71 
72   // Lookup for |str| with specific |hash| value.
73   // Return a Node pointer. If the key was found, |node.key| is its value.
74   // Otherwise, the caller should create a new key value, then call Insert()
75   // below.
76   //
77   // NOTE: Even though this method is const, because it doesn't modify the
78   //       state of the KeySet, it returns a *mutable* node pointer, to be
79   //       passed to Insert() in case of a miss.
80   //
Lookup__anon30f874f30111::KeySet81   Node* Lookup(size_t hash, std::string_view str) const {
82     size_t index = hash & (size_ - 1);
83     const Node* nodes = &buckets_[0];
84     const Node* nodes_limit = nodes + size_;
85     const Node* node = nodes + index;
86     for (;;) {
87       if (!node->key || (node->hash == hash && *node->key == str))
88         return const_cast<Node*>(node);
89       if (++node == nodes_limit)
90         node = nodes;
91     }
92   }
93 
94   // Insert a new key in this set. |node| must be a value returned by
95   // a previous Lookup() call. |hash| is the hash value for |key|.
Insert__anon30f874f30111::KeySet96   void Insert(Node* node, size_t hash, KeyType key) {
97     node->hash = hash;
98     node->key = key;
99     count_ += 1;
100     if (count_ * 4 >= size_ * 3)  // 75% max load factor
101       GrowBuckets();
102   }
103 
GrowBuckets__anon30f874f30111::KeySet104   void GrowBuckets() {
105     size_t size = buckets_.size();
106     size_t new_size = size * 2;
107     size_t new_mask = new_size - 1;
108 
109     std::vector<Node> new_buckets;
110     new_buckets.resize(new_size);
111     for (const Node& node : buckets_) {
112       size_t index = node.hash & new_mask;
113       for (;;) {
114         Node& node2 = new_buckets[index];
115         if (!node2.key) {
116           node2 = node;
117           break;
118         }
119         index = (index + 1) & new_mask;
120       }
121     }
122     buckets_ = std::move(new_buckets);
123     size_ = new_size;
124   }
125 
126   size_t size_ = 2;
127   size_t count_ = 0;
128   std::vector<Node> buckets_ = {Node{}, Node{}};
129 };
130 
131 class StringAtomSet {
132  public:
StringAtomSet()133   StringAtomSet() {
134     // Ensure kEmptyString is in our set while not being allocated
135     // from a slab. The end result is that find("") should always
136     // return this address.
137     //
138     // This allows the StringAtom() default initializer to use the same
139     // address directly, avoiding a table lookup.
140     //
141     size_t hash = set_.Hash("");
142     auto* node = set_.Lookup(hash, "");
143     set_.Insert(node, hash, &kEmptyString);
144   }
145 
146   // Find the unique constant string pointer for |key|.
find(std::string_view key)147   const std::string* find(std::string_view key) {
148     std::lock_guard<std::mutex> lock(mutex_);
149     size_t hash = set_.Hash(key);
150     auto* node = set_.Lookup(hash, key);
151     if (node->key)
152       return node->key;
153 
154     // Allocate new string, insert its address in the set.
155     if (slab_index_ >= kStringsPerSlab) {
156       slabs_.push_back(new Slab());
157       slab_index_ = 0;
158     }
159     std::string* result = slabs_.back()->init(slab_index_++, key);
160     set_.Insert(node, hash, result);
161     return result;
162   }
163 
164  private:
165   static constexpr unsigned int kStringsPerSlab = 128;
166 
167   // Each slab is allocated independently, has a fixed address and stores
168   // kStringsPerSlab items of type StringStorage. The latter has the same
169   // size and alignment as std::string, but doesn't need default-initialization.
170   // This is used to slightly speed up Slab allocation and string
171   // initialization. The drawback is that on process exit, allocated strings
172   // are leaked (but GN already leaks several hundred MiBs of memory anyway).
173 
174   // A C++ union that can store an std::string but without default
175   // initialization and destruction.
176   union StringStorage {
StringStorage()177     StringStorage() {}
~StringStorage()178     ~StringStorage() {}
179     char dummy;
180     std::string str;
181   };
182 
183   // A fixed array of StringStorage items. Can be allocated cheaply.
184   class Slab {
185    public:
186     // Init the n-th string in the slab with |str|.
187     // Return its location as well.
init(size_t index,const std::string_view & str)188     std::string* init(size_t index, const std::string_view& str) {
189       std::string* result = &items_[index].str;
190       new (result) std::string(str);
191       return result;
192     }
193 
194    private:
195     StringStorage items_[kStringsPerSlab];
196   };
197 
198   std::mutex mutex_;
199   KeySet set_;
200   std::vector<Slab*> slabs_;
201   unsigned int slab_index_ = kStringsPerSlab;
202 };
203 
GetStringAtomSet()204 StringAtomSet& GetStringAtomSet() {
205   static StringAtomSet s_string_atom_set;
206   return s_string_atom_set;
207 }
208 
209 // Each thread maintains its own ThreadLocalCache to perform fast lookups
210 // without taking any mutex in most cases.
211 class ThreadLocalCache {
212  public:
213   // Find the unique constant string pointer for |key| in this cache,
214   // and fallback to the global one in case of a miss.
find(std::string_view key)215   KeyType find(std::string_view key) {
216     size_t hash = local_set_.Hash(key);
217     auto* node = local_set_.Lookup(hash, key);
218     if (node->key)
219       return node->key;
220 
221     KeyType result = GetStringAtomSet().find(key);
222     local_set_.Insert(node, hash, result);
223     return result;
224   }
225 
226  private:
227   KeySet local_set_;
228 };
229 
230 thread_local ThreadLocalCache s_local_cache;
231 
232 }  // namespace
233 
StringAtom()234 StringAtom::StringAtom() : value_(kEmptyString) {}
235 
StringAtom(std::string_view str)236 StringAtom::StringAtom(std::string_view str) noexcept
237     : value_(*s_local_cache.find(str)) {}
238