1 //===-- ConstString.cpp ---------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "lldb/Utility/ConstString.h"
10
11 #include "lldb/Utility/Stream.h"
12
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/ADT/iterator.h"
15 #include "llvm/Support/Allocator.h"
16 #include "llvm/Support/DJB.h"
17 #include "llvm/Support/FormatProviders.h"
18 #include "llvm/Support/RWMutex.h"
19 #include "llvm/Support/Threading.h"
20
21 #include <array>
22 #include <utility>
23
24 #include <inttypes.h>
25 #include <stdint.h>
26 #include <string.h>
27
28 using namespace lldb_private;
29
30 class Pool {
31 public:
32 /// The default BumpPtrAllocatorImpl slab size.
33 static const size_t AllocatorSlabSize = 4096;
34 static const size_t SizeThreshold = AllocatorSlabSize;
35 /// Every Pool has its own allocator which receives an equal share of
36 /// the ConstString allocations. This means that when allocating many
37 /// ConstStrings, every allocator sees only its small share of allocations and
38 /// assumes LLDB only allocated a small amount of memory so far. In reality
39 /// LLDB allocated a total memory that is N times as large as what the
40 /// allocator sees (where N is the number of string pools). This causes that
41 /// the BumpPtrAllocator continues a long time to allocate memory in small
42 /// chunks which only makes sense when allocating a small amount of memory
43 /// (which is true from the perspective of a single allocator). On some
44 /// systems doing all these small memory allocations causes LLDB to spend
45 /// a lot of time in malloc, so we need to force all these allocators to
46 /// behave like one allocator in terms of scaling their memory allocations
47 /// with increased demand. To do this we set the growth delay for each single
48 /// allocator to a rate so that our pool of allocators scales their memory
49 /// allocations similar to a single BumpPtrAllocatorImpl.
50 ///
51 /// Currently we have 256 string pools and the normal growth delay of the
52 /// BumpPtrAllocatorImpl is 128 (i.e., the memory allocation size increases
53 /// every 128 full chunks), so by changing the delay to 1 we get a
54 /// total growth delay in our allocator collection of 256/1 = 256. This is
55 /// still only half as fast as a normal allocator but we can't go any faster
56 /// without decreasing the number of string pools.
57 static const size_t AllocatorGrowthDelay = 1;
58 typedef llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, AllocatorSlabSize,
59 SizeThreshold, AllocatorGrowthDelay>
60 Allocator;
61 typedef const char *StringPoolValueType;
62 typedef llvm::StringMap<StringPoolValueType, Allocator> StringPool;
63 typedef llvm::StringMapEntry<StringPoolValueType> StringPoolEntryType;
64
65 static StringPoolEntryType &
GetStringMapEntryFromKeyData(const char * keyData)66 GetStringMapEntryFromKeyData(const char *keyData) {
67 return StringPoolEntryType::GetStringMapEntryFromKeyData(keyData);
68 }
69
GetConstCStringLength(const char * ccstr)70 static size_t GetConstCStringLength(const char *ccstr) {
71 if (ccstr != nullptr) {
72 // Since the entry is read only, and we derive the entry entirely from
73 // the pointer, we don't need the lock.
74 const StringPoolEntryType &entry = GetStringMapEntryFromKeyData(ccstr);
75 return entry.getKey().size();
76 }
77 return 0;
78 }
79
GetMangledCounterpart(const char * ccstr) const80 StringPoolValueType GetMangledCounterpart(const char *ccstr) const {
81 if (ccstr != nullptr) {
82 const uint8_t h = hash(llvm::StringRef(ccstr));
83 llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
84 return GetStringMapEntryFromKeyData(ccstr).getValue();
85 }
86 return nullptr;
87 }
88
GetConstCString(const char * cstr)89 const char *GetConstCString(const char *cstr) {
90 if (cstr != nullptr)
91 return GetConstCStringWithLength(cstr, strlen(cstr));
92 return nullptr;
93 }
94
GetConstCStringWithLength(const char * cstr,size_t cstr_len)95 const char *GetConstCStringWithLength(const char *cstr, size_t cstr_len) {
96 if (cstr != nullptr)
97 return GetConstCStringWithStringRef(llvm::StringRef(cstr, cstr_len));
98 return nullptr;
99 }
100
GetConstCStringWithStringRef(const llvm::StringRef & string_ref)101 const char *GetConstCStringWithStringRef(const llvm::StringRef &string_ref) {
102 if (string_ref.data()) {
103 const uint8_t h = hash(string_ref);
104
105 {
106 llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
107 auto it = m_string_pools[h].m_string_map.find(string_ref);
108 if (it != m_string_pools[h].m_string_map.end())
109 return it->getKeyData();
110 }
111
112 llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
113 StringPoolEntryType &entry =
114 *m_string_pools[h]
115 .m_string_map.insert(std::make_pair(string_ref, nullptr))
116 .first;
117 return entry.getKeyData();
118 }
119 return nullptr;
120 }
121
122 const char *
GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,const char * mangled_ccstr)123 GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,
124 const char *mangled_ccstr) {
125 const char *demangled_ccstr = nullptr;
126
127 {
128 const uint8_t h = hash(demangled);
129 llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
130
131 // Make or update string pool entry with the mangled counterpart
132 StringPool &map = m_string_pools[h].m_string_map;
133 StringPoolEntryType &entry = *map.try_emplace(demangled).first;
134
135 entry.second = mangled_ccstr;
136
137 // Extract the const version of the demangled_cstr
138 demangled_ccstr = entry.getKeyData();
139 }
140
141 {
142 // Now assign the demangled const string as the counterpart of the
143 // mangled const string...
144 const uint8_t h = hash(llvm::StringRef(mangled_ccstr));
145 llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
146 GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr);
147 }
148
149 // Return the constant demangled C string
150 return demangled_ccstr;
151 }
152
GetConstTrimmedCStringWithLength(const char * cstr,size_t cstr_len)153 const char *GetConstTrimmedCStringWithLength(const char *cstr,
154 size_t cstr_len) {
155 if (cstr != nullptr) {
156 const size_t trimmed_len = strnlen(cstr, cstr_len);
157 return GetConstCStringWithLength(cstr, trimmed_len);
158 }
159 return nullptr;
160 }
161
162 // Return the size in bytes that this object and any items in its collection
163 // of uniqued strings + data count values takes in memory.
MemorySize() const164 size_t MemorySize() const {
165 size_t mem_size = sizeof(Pool);
166 for (const auto &pool : m_string_pools) {
167 llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
168 for (const auto &entry : pool.m_string_map)
169 mem_size += sizeof(StringPoolEntryType) + entry.getKey().size();
170 }
171 return mem_size;
172 }
173
174 protected:
hash(const llvm::StringRef & s) const175 uint8_t hash(const llvm::StringRef &s) const {
176 uint32_t h = llvm::djbHash(s);
177 return ((h >> 24) ^ (h >> 16) ^ (h >> 8) ^ h) & 0xff;
178 }
179
180 struct PoolEntry {
181 mutable llvm::sys::SmartRWMutex<false> m_mutex;
182 StringPool m_string_map;
183 };
184
185 std::array<PoolEntry, 256> m_string_pools;
186 };
187
188 // Frameworks and dylibs aren't supposed to have global C++ initializers so we
189 // hide the string pool in a static function so that it will get initialized on
190 // the first call to this static function.
191 //
192 // Note, for now we make the string pool a pointer to the pool, because we
193 // can't guarantee that some objects won't get destroyed after the global
194 // destructor chain is run, and trying to make sure no destructors touch
195 // ConstStrings is difficult. So we leak the pool instead.
StringPool()196 static Pool &StringPool() {
197 static llvm::once_flag g_pool_initialization_flag;
198 static Pool *g_string_pool = nullptr;
199
200 llvm::call_once(g_pool_initialization_flag,
201 []() { g_string_pool = new Pool(); });
202
203 return *g_string_pool;
204 }
205
ConstString(const char * cstr)206 ConstString::ConstString(const char *cstr)
207 : m_string(StringPool().GetConstCString(cstr)) {}
208
ConstString(const char * cstr,size_t cstr_len)209 ConstString::ConstString(const char *cstr, size_t cstr_len)
210 : m_string(StringPool().GetConstCStringWithLength(cstr, cstr_len)) {}
211
ConstString(const llvm::StringRef & s)212 ConstString::ConstString(const llvm::StringRef &s)
213 : m_string(StringPool().GetConstCStringWithStringRef(s)) {}
214
operator <(ConstString rhs) const215 bool ConstString::operator<(ConstString rhs) const {
216 if (m_string == rhs.m_string)
217 return false;
218
219 llvm::StringRef lhs_string_ref(GetStringRef());
220 llvm::StringRef rhs_string_ref(rhs.GetStringRef());
221
222 // If both have valid C strings, then return the comparison
223 if (lhs_string_ref.data() && rhs_string_ref.data())
224 return lhs_string_ref < rhs_string_ref;
225
226 // Else one of them was nullptr, so if LHS is nullptr then it is less than
227 return lhs_string_ref.data() == nullptr;
228 }
229
operator <<(Stream & s,ConstString str)230 Stream &lldb_private::operator<<(Stream &s, ConstString str) {
231 const char *cstr = str.GetCString();
232 if (cstr != nullptr)
233 s << cstr;
234
235 return s;
236 }
237
GetLength() const238 size_t ConstString::GetLength() const {
239 return Pool::GetConstCStringLength(m_string);
240 }
241
Equals(ConstString lhs,ConstString rhs,const bool case_sensitive)242 bool ConstString::Equals(ConstString lhs, ConstString rhs,
243 const bool case_sensitive) {
244 if (lhs.m_string == rhs.m_string)
245 return true;
246
247 // Since the pointers weren't equal, and identical ConstStrings always have
248 // identical pointers, the result must be false for case sensitive equality
249 // test.
250 if (case_sensitive)
251 return false;
252
253 // perform case insensitive equality test
254 llvm::StringRef lhs_string_ref(lhs.GetStringRef());
255 llvm::StringRef rhs_string_ref(rhs.GetStringRef());
256 return lhs_string_ref.equals_lower(rhs_string_ref);
257 }
258
Compare(ConstString lhs,ConstString rhs,const bool case_sensitive)259 int ConstString::Compare(ConstString lhs, ConstString rhs,
260 const bool case_sensitive) {
261 // If the iterators are the same, this is the same string
262 const char *lhs_cstr = lhs.m_string;
263 const char *rhs_cstr = rhs.m_string;
264 if (lhs_cstr == rhs_cstr)
265 return 0;
266 if (lhs_cstr && rhs_cstr) {
267 llvm::StringRef lhs_string_ref(lhs.GetStringRef());
268 llvm::StringRef rhs_string_ref(rhs.GetStringRef());
269
270 if (case_sensitive) {
271 return lhs_string_ref.compare(rhs_string_ref);
272 } else {
273 return lhs_string_ref.compare_lower(rhs_string_ref);
274 }
275 }
276
277 if (lhs_cstr)
278 return +1; // LHS isn't nullptr but RHS is
279 else
280 return -1; // LHS is nullptr but RHS isn't
281 }
282
Dump(Stream * s,const char * fail_value) const283 void ConstString::Dump(Stream *s, const char *fail_value) const {
284 if (s != nullptr) {
285 const char *cstr = AsCString(fail_value);
286 if (cstr != nullptr)
287 s->PutCString(cstr);
288 }
289 }
290
DumpDebug(Stream * s) const291 void ConstString::DumpDebug(Stream *s) const {
292 const char *cstr = GetCString();
293 size_t cstr_len = GetLength();
294 // Only print the parens if we have a non-nullptr string
295 const char *parens = cstr ? "\"" : "";
296 s->Printf("%*p: ConstString, string = %s%s%s, length = %" PRIu64,
297 static_cast<int>(sizeof(void *) * 2),
298 static_cast<const void *>(this), parens, cstr, parens,
299 static_cast<uint64_t>(cstr_len));
300 }
301
SetCString(const char * cstr)302 void ConstString::SetCString(const char *cstr) {
303 m_string = StringPool().GetConstCString(cstr);
304 }
305
SetString(const llvm::StringRef & s)306 void ConstString::SetString(const llvm::StringRef &s) {
307 m_string = StringPool().GetConstCStringWithLength(s.data(), s.size());
308 }
309
SetStringWithMangledCounterpart(llvm::StringRef demangled,ConstString mangled)310 void ConstString::SetStringWithMangledCounterpart(llvm::StringRef demangled,
311 ConstString mangled) {
312 m_string = StringPool().GetConstCStringAndSetMangledCounterPart(
313 demangled, mangled.m_string);
314 }
315
GetMangledCounterpart(ConstString & counterpart) const316 bool ConstString::GetMangledCounterpart(ConstString &counterpart) const {
317 counterpart.m_string = StringPool().GetMangledCounterpart(m_string);
318 return (bool)counterpart;
319 }
320
SetCStringWithLength(const char * cstr,size_t cstr_len)321 void ConstString::SetCStringWithLength(const char *cstr, size_t cstr_len) {
322 m_string = StringPool().GetConstCStringWithLength(cstr, cstr_len);
323 }
324
SetTrimmedCStringWithLength(const char * cstr,size_t cstr_len)325 void ConstString::SetTrimmedCStringWithLength(const char *cstr,
326 size_t cstr_len) {
327 m_string = StringPool().GetConstTrimmedCStringWithLength(cstr, cstr_len);
328 }
329
StaticMemorySize()330 size_t ConstString::StaticMemorySize() {
331 // Get the size of the static string pool
332 return StringPool().MemorySize();
333 }
334
format(const ConstString & CS,llvm::raw_ostream & OS,llvm::StringRef Options)335 void llvm::format_provider<ConstString>::format(const ConstString &CS,
336 llvm::raw_ostream &OS,
337 llvm::StringRef Options) {
338 format_provider<StringRef>::format(CS.GetStringRef(), OS, Options);
339 }
340
output(const ConstString & Val,void *,raw_ostream & Out)341 void llvm::yaml::ScalarTraits<ConstString>::output(const ConstString &Val,
342 void *, raw_ostream &Out) {
343 Out << Val.GetStringRef();
344 }
345
346 llvm::StringRef
input(llvm::StringRef Scalar,void *,ConstString & Val)347 llvm::yaml::ScalarTraits<ConstString>::input(llvm::StringRef Scalar, void *,
348 ConstString &Val) {
349 Val = ConstString(Scalar);
350 return {};
351 }
352