1 //===-- Symtab.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include <map>
10 #include <set>
11
12 #include "Plugins/Language/ObjC/ObjCLanguage.h"
13
14 #include "lldb/Core/Module.h"
15 #include "lldb/Core/RichManglingContext.h"
16 #include "lldb/Core/Section.h"
17 #include "lldb/Symbol/ObjectFile.h"
18 #include "lldb/Symbol/Symbol.h"
19 #include "lldb/Symbol/SymbolContext.h"
20 #include "lldb/Symbol/Symtab.h"
21 #include "lldb/Utility/RegularExpression.h"
22 #include "lldb/Utility/Stream.h"
23 #include "lldb/Utility/Timer.h"
24
25 #include "llvm/ADT/StringRef.h"
26
27 using namespace lldb;
28 using namespace lldb_private;
29
Symtab(ObjectFile * objfile)30 Symtab::Symtab(ObjectFile *objfile)
31 : m_objfile(objfile), m_symbols(), m_file_addr_to_index(*this),
32 m_name_to_index(), m_mutex(), m_file_addr_to_index_computed(false),
33 m_name_indexes_computed(false) {}
34
~Symtab()35 Symtab::~Symtab() {}
36
Reserve(size_t count)37 void Symtab::Reserve(size_t count) {
38 // Clients should grab the mutex from this symbol table and lock it manually
39 // when calling this function to avoid performance issues.
40 m_symbols.reserve(count);
41 }
42
Resize(size_t count)43 Symbol *Symtab::Resize(size_t count) {
44 // Clients should grab the mutex from this symbol table and lock it manually
45 // when calling this function to avoid performance issues.
46 m_symbols.resize(count);
47 return m_symbols.empty() ? nullptr : &m_symbols[0];
48 }
49
AddSymbol(const Symbol & symbol)50 uint32_t Symtab::AddSymbol(const Symbol &symbol) {
51 // Clients should grab the mutex from this symbol table and lock it manually
52 // when calling this function to avoid performance issues.
53 uint32_t symbol_idx = m_symbols.size();
54 m_name_to_index.Clear();
55 m_file_addr_to_index.Clear();
56 m_symbols.push_back(symbol);
57 m_file_addr_to_index_computed = false;
58 m_name_indexes_computed = false;
59 return symbol_idx;
60 }
61
GetNumSymbols() const62 size_t Symtab::GetNumSymbols() const {
63 std::lock_guard<std::recursive_mutex> guard(m_mutex);
64 return m_symbols.size();
65 }
66
SectionFileAddressesChanged()67 void Symtab::SectionFileAddressesChanged() {
68 m_name_to_index.Clear();
69 m_file_addr_to_index_computed = false;
70 }
71
Dump(Stream * s,Target * target,SortOrder sort_order,Mangled::NamePreference name_preference)72 void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order,
73 Mangled::NamePreference name_preference) {
74 std::lock_guard<std::recursive_mutex> guard(m_mutex);
75
76 // s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
77 s->Indent();
78 const FileSpec &file_spec = m_objfile->GetFileSpec();
79 const char *object_name = nullptr;
80 if (m_objfile->GetModule())
81 object_name = m_objfile->GetModule()->GetObjectName().GetCString();
82
83 if (file_spec)
84 s->Printf("Symtab, file = %s%s%s%s, num_symbols = %" PRIu64,
85 file_spec.GetPath().c_str(), object_name ? "(" : "",
86 object_name ? object_name : "", object_name ? ")" : "",
87 (uint64_t)m_symbols.size());
88 else
89 s->Printf("Symtab, num_symbols = %" PRIu64 "", (uint64_t)m_symbols.size());
90
91 if (!m_symbols.empty()) {
92 switch (sort_order) {
93 case eSortOrderNone: {
94 s->PutCString(":\n");
95 DumpSymbolHeader(s);
96 const_iterator begin = m_symbols.begin();
97 const_iterator end = m_symbols.end();
98 for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
99 s->Indent();
100 pos->Dump(s, target, std::distance(begin, pos), name_preference);
101 }
102 } break;
103
104 case eSortOrderByName: {
105 // Although we maintain a lookup by exact name map, the table isn't
106 // sorted by name. So we must make the ordered symbol list up ourselves.
107 s->PutCString(" (sorted by name):\n");
108 DumpSymbolHeader(s);
109
110 std::multimap<llvm::StringRef, const Symbol *> name_map;
111 for (const_iterator pos = m_symbols.begin(), end = m_symbols.end();
112 pos != end; ++pos) {
113 const char *name = pos->GetName().AsCString();
114 if (name && name[0])
115 name_map.insert(std::make_pair(name, &(*pos)));
116 }
117
118 for (const auto &name_to_symbol : name_map) {
119 const Symbol *symbol = name_to_symbol.second;
120 s->Indent();
121 symbol->Dump(s, target, symbol - &m_symbols[0], name_preference);
122 }
123 } break;
124
125 case eSortOrderByAddress:
126 s->PutCString(" (sorted by address):\n");
127 DumpSymbolHeader(s);
128 if (!m_file_addr_to_index_computed)
129 InitAddressIndexes();
130 const size_t num_entries = m_file_addr_to_index.GetSize();
131 for (size_t i = 0; i < num_entries; ++i) {
132 s->Indent();
133 const uint32_t symbol_idx = m_file_addr_to_index.GetEntryRef(i).data;
134 m_symbols[symbol_idx].Dump(s, target, symbol_idx, name_preference);
135 }
136 break;
137 }
138 } else {
139 s->PutCString("\n");
140 }
141 }
142
Dump(Stream * s,Target * target,std::vector<uint32_t> & indexes,Mangled::NamePreference name_preference) const143 void Symtab::Dump(Stream *s, Target *target, std::vector<uint32_t> &indexes,
144 Mangled::NamePreference name_preference) const {
145 std::lock_guard<std::recursive_mutex> guard(m_mutex);
146
147 const size_t num_symbols = GetNumSymbols();
148 // s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
149 s->Indent();
150 s->Printf("Symtab %" PRIu64 " symbol indexes (%" PRIu64 " symbols total):\n",
151 (uint64_t)indexes.size(), (uint64_t)m_symbols.size());
152 s->IndentMore();
153
154 if (!indexes.empty()) {
155 std::vector<uint32_t>::const_iterator pos;
156 std::vector<uint32_t>::const_iterator end = indexes.end();
157 DumpSymbolHeader(s);
158 for (pos = indexes.begin(); pos != end; ++pos) {
159 size_t idx = *pos;
160 if (idx < num_symbols) {
161 s->Indent();
162 m_symbols[idx].Dump(s, target, idx, name_preference);
163 }
164 }
165 }
166 s->IndentLess();
167 }
168
DumpSymbolHeader(Stream * s)169 void Symtab::DumpSymbolHeader(Stream *s) {
170 s->Indent(" Debug symbol\n");
171 s->Indent(" |Synthetic symbol\n");
172 s->Indent(" ||Externally Visible\n");
173 s->Indent(" |||\n");
174 s->Indent("Index UserID DSX Type File Address/Value Load "
175 "Address Size Flags Name\n");
176 s->Indent("------- ------ --- --------------- ------------------ "
177 "------------------ ------------------ ---------- "
178 "----------------------------------\n");
179 }
180
CompareSymbolID(const void * key,const void * p)181 static int CompareSymbolID(const void *key, const void *p) {
182 const user_id_t match_uid = *(const user_id_t *)key;
183 const user_id_t symbol_uid = ((const Symbol *)p)->GetID();
184 if (match_uid < symbol_uid)
185 return -1;
186 if (match_uid > symbol_uid)
187 return 1;
188 return 0;
189 }
190
FindSymbolByID(lldb::user_id_t symbol_uid) const191 Symbol *Symtab::FindSymbolByID(lldb::user_id_t symbol_uid) const {
192 std::lock_guard<std::recursive_mutex> guard(m_mutex);
193
194 Symbol *symbol =
195 (Symbol *)::bsearch(&symbol_uid, &m_symbols[0], m_symbols.size(),
196 sizeof(m_symbols[0]), CompareSymbolID);
197 return symbol;
198 }
199
SymbolAtIndex(size_t idx)200 Symbol *Symtab::SymbolAtIndex(size_t idx) {
201 // Clients should grab the mutex from this symbol table and lock it manually
202 // when calling this function to avoid performance issues.
203 if (idx < m_symbols.size())
204 return &m_symbols[idx];
205 return nullptr;
206 }
207
SymbolAtIndex(size_t idx) const208 const Symbol *Symtab::SymbolAtIndex(size_t idx) const {
209 // Clients should grab the mutex from this symbol table and lock it manually
210 // when calling this function to avoid performance issues.
211 if (idx < m_symbols.size())
212 return &m_symbols[idx];
213 return nullptr;
214 }
215
lldb_skip_name(llvm::StringRef mangled,Mangled::ManglingScheme scheme)216 static bool lldb_skip_name(llvm::StringRef mangled,
217 Mangled::ManglingScheme scheme) {
218 switch (scheme) {
219 case Mangled::eManglingSchemeItanium: {
220 if (mangled.size() < 3 || !mangled.startswith("_Z"))
221 return true;
222
223 // Avoid the following types of symbols in the index.
224 switch (mangled[2]) {
225 case 'G': // guard variables
226 case 'T': // virtual tables, VTT structures, typeinfo structures + names
227 case 'Z': // named local entities (if we eventually handle
228 // eSymbolTypeData, we will want this back)
229 return true;
230
231 default:
232 break;
233 }
234
235 // Include this name in the index.
236 return false;
237 }
238
239 // No filters for this scheme yet. Include all names in indexing.
240 case Mangled::eManglingSchemeMSVC:
241 return false;
242
243 // Don't try and demangle things we can't categorize.
244 case Mangled::eManglingSchemeNone:
245 return true;
246 }
247 llvm_unreachable("unknown scheme!");
248 }
249
InitNameIndexes()250 void Symtab::InitNameIndexes() {
251 // Protected function, no need to lock mutex...
252 if (!m_name_indexes_computed) {
253 m_name_indexes_computed = true;
254 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
255 Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
256 // Create the name index vector to be able to quickly search by name
257 const size_t num_symbols = m_symbols.size();
258 m_name_to_index.Reserve(num_symbols);
259
260 // The "const char *" in "class_contexts" and backlog::value_type::second
261 // must come from a ConstString::GetCString()
262 std::set<const char *> class_contexts;
263 std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
264 backlog.reserve(num_symbols / 2);
265
266 // Instantiation of the demangler is expensive, so better use a single one
267 // for all entries during batch processing.
268 RichManglingContext rmc;
269 for (uint32_t value = 0; value < num_symbols; ++value) {
270 Symbol *symbol = &m_symbols[value];
271
272 // Don't let trampolines get into the lookup by name map If we ever need
273 // the trampoline symbols to be searchable by name we can remove this and
274 // then possibly add a new bool to any of the Symtab functions that
275 // lookup symbols by name to indicate if they want trampolines.
276 if (symbol->IsTrampoline())
277 continue;
278
279 // If the symbol's name string matched a Mangled::ManglingScheme, it is
280 // stored in the mangled field.
281 Mangled &mangled = symbol->GetMangled();
282 if (ConstString name = mangled.GetMangledName()) {
283 m_name_to_index.Append(name, value);
284
285 if (symbol->ContainsLinkerAnnotations()) {
286 // If the symbol has linker annotations, also add the version without
287 // the annotations.
288 ConstString stripped = ConstString(
289 m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
290 m_name_to_index.Append(stripped, value);
291 }
292
293 const SymbolType type = symbol->GetType();
294 if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
295 if (mangled.DemangleWithRichManglingInfo(rmc, lldb_skip_name))
296 RegisterMangledNameEntry(value, class_contexts, backlog, rmc);
297 }
298 }
299
300 // Symbol name strings that didn't match a Mangled::ManglingScheme, are
301 // stored in the demangled field.
302 if (ConstString name = mangled.GetDemangledName()) {
303 m_name_to_index.Append(name, value);
304
305 if (symbol->ContainsLinkerAnnotations()) {
306 // If the symbol has linker annotations, also add the version without
307 // the annotations.
308 name = ConstString(
309 m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
310 m_name_to_index.Append(name, value);
311 }
312
313 // If the demangled name turns out to be an ObjC name, and is a category
314 // name, add the version without categories to the index too.
315 ObjCLanguage::MethodName objc_method(name.GetStringRef(), true);
316 if (objc_method.IsValid(true)) {
317 m_selector_to_index.Append(objc_method.GetSelector(), value);
318
319 if (ConstString objc_method_no_category =
320 objc_method.GetFullNameWithoutCategory(true))
321 m_name_to_index.Append(objc_method_no_category, value);
322 }
323 }
324 }
325
326 for (const auto &record : backlog) {
327 RegisterBacklogEntry(record.first, record.second, class_contexts);
328 }
329
330 m_name_to_index.Sort();
331 m_name_to_index.SizeToFit();
332 m_selector_to_index.Sort();
333 m_selector_to_index.SizeToFit();
334 m_basename_to_index.Sort();
335 m_basename_to_index.SizeToFit();
336 m_method_to_index.Sort();
337 m_method_to_index.SizeToFit();
338 }
339 }
340
RegisterMangledNameEntry(uint32_t value,std::set<const char * > & class_contexts,std::vector<std::pair<NameToIndexMap::Entry,const char * >> & backlog,RichManglingContext & rmc)341 void Symtab::RegisterMangledNameEntry(
342 uint32_t value, std::set<const char *> &class_contexts,
343 std::vector<std::pair<NameToIndexMap::Entry, const char *>> &backlog,
344 RichManglingContext &rmc) {
345 // Only register functions that have a base name.
346 rmc.ParseFunctionBaseName();
347 llvm::StringRef base_name = rmc.GetBufferRef();
348 if (base_name.empty())
349 return;
350
351 // The base name will be our entry's name.
352 NameToIndexMap::Entry entry(ConstString(base_name), value);
353
354 rmc.ParseFunctionDeclContextName();
355 llvm::StringRef decl_context = rmc.GetBufferRef();
356
357 // Register functions with no context.
358 if (decl_context.empty()) {
359 // This has to be a basename
360 m_basename_to_index.Append(entry);
361 // If there is no context (no namespaces or class scopes that come before
362 // the function name) then this also could be a fullname.
363 m_name_to_index.Append(entry);
364 return;
365 }
366
367 // Make sure we have a pool-string pointer and see if we already know the
368 // context name.
369 const char *decl_context_ccstr = ConstString(decl_context).GetCString();
370 auto it = class_contexts.find(decl_context_ccstr);
371
372 // Register constructors and destructors. They are methods and create
373 // declaration contexts.
374 if (rmc.IsCtorOrDtor()) {
375 m_method_to_index.Append(entry);
376 if (it == class_contexts.end())
377 class_contexts.insert(it, decl_context_ccstr);
378 return;
379 }
380
381 // Register regular methods with a known declaration context.
382 if (it != class_contexts.end()) {
383 m_method_to_index.Append(entry);
384 return;
385 }
386
387 // Regular methods in unknown declaration contexts are put to the backlog. We
388 // will revisit them once we processed all remaining symbols.
389 backlog.push_back(std::make_pair(entry, decl_context_ccstr));
390 }
391
RegisterBacklogEntry(const NameToIndexMap::Entry & entry,const char * decl_context,const std::set<const char * > & class_contexts)392 void Symtab::RegisterBacklogEntry(
393 const NameToIndexMap::Entry &entry, const char *decl_context,
394 const std::set<const char *> &class_contexts) {
395 auto it = class_contexts.find(decl_context);
396 if (it != class_contexts.end()) {
397 m_method_to_index.Append(entry);
398 } else {
399 // If we got here, we have something that had a context (was inside
400 // a namespace or class) yet we don't know the entry
401 m_method_to_index.Append(entry);
402 m_basename_to_index.Append(entry);
403 }
404 }
405
PreloadSymbols()406 void Symtab::PreloadSymbols() {
407 std::lock_guard<std::recursive_mutex> guard(m_mutex);
408 InitNameIndexes();
409 }
410
AppendSymbolNamesToMap(const IndexCollection & indexes,bool add_demangled,bool add_mangled,NameToIndexMap & name_to_index_map) const411 void Symtab::AppendSymbolNamesToMap(const IndexCollection &indexes,
412 bool add_demangled, bool add_mangled,
413 NameToIndexMap &name_to_index_map) const {
414 if (add_demangled || add_mangled) {
415 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
416 Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
417 std::lock_guard<std::recursive_mutex> guard(m_mutex);
418
419 // Create the name index vector to be able to quickly search by name
420 const size_t num_indexes = indexes.size();
421 for (size_t i = 0; i < num_indexes; ++i) {
422 uint32_t value = indexes[i];
423 assert(i < m_symbols.size());
424 const Symbol *symbol = &m_symbols[value];
425
426 const Mangled &mangled = symbol->GetMangled();
427 if (add_demangled) {
428 if (ConstString name = mangled.GetDemangledName())
429 name_to_index_map.Append(name, value);
430 }
431
432 if (add_mangled) {
433 if (ConstString name = mangled.GetMangledName())
434 name_to_index_map.Append(name, value);
435 }
436 }
437 }
438 }
439
AppendSymbolIndexesWithType(SymbolType symbol_type,std::vector<uint32_t> & indexes,uint32_t start_idx,uint32_t end_index) const440 uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
441 std::vector<uint32_t> &indexes,
442 uint32_t start_idx,
443 uint32_t end_index) const {
444 std::lock_guard<std::recursive_mutex> guard(m_mutex);
445
446 uint32_t prev_size = indexes.size();
447
448 const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
449
450 for (uint32_t i = start_idx; i < count; ++i) {
451 if (symbol_type == eSymbolTypeAny || m_symbols[i].GetType() == symbol_type)
452 indexes.push_back(i);
453 }
454
455 return indexes.size() - prev_size;
456 }
457
AppendSymbolIndexesWithTypeAndFlagsValue(SymbolType symbol_type,uint32_t flags_value,std::vector<uint32_t> & indexes,uint32_t start_idx,uint32_t end_index) const458 uint32_t Symtab::AppendSymbolIndexesWithTypeAndFlagsValue(
459 SymbolType symbol_type, uint32_t flags_value,
460 std::vector<uint32_t> &indexes, uint32_t start_idx,
461 uint32_t end_index) const {
462 std::lock_guard<std::recursive_mutex> guard(m_mutex);
463
464 uint32_t prev_size = indexes.size();
465
466 const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
467
468 for (uint32_t i = start_idx; i < count; ++i) {
469 if ((symbol_type == eSymbolTypeAny ||
470 m_symbols[i].GetType() == symbol_type) &&
471 m_symbols[i].GetFlags() == flags_value)
472 indexes.push_back(i);
473 }
474
475 return indexes.size() - prev_size;
476 }
477
AppendSymbolIndexesWithType(SymbolType symbol_type,Debug symbol_debug_type,Visibility symbol_visibility,std::vector<uint32_t> & indexes,uint32_t start_idx,uint32_t end_index) const478 uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
479 Debug symbol_debug_type,
480 Visibility symbol_visibility,
481 std::vector<uint32_t> &indexes,
482 uint32_t start_idx,
483 uint32_t end_index) const {
484 std::lock_guard<std::recursive_mutex> guard(m_mutex);
485
486 uint32_t prev_size = indexes.size();
487
488 const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
489
490 for (uint32_t i = start_idx; i < count; ++i) {
491 if (symbol_type == eSymbolTypeAny ||
492 m_symbols[i].GetType() == symbol_type) {
493 if (CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
494 indexes.push_back(i);
495 }
496 }
497
498 return indexes.size() - prev_size;
499 }
500
GetIndexForSymbol(const Symbol * symbol) const501 uint32_t Symtab::GetIndexForSymbol(const Symbol *symbol) const {
502 if (!m_symbols.empty()) {
503 const Symbol *first_symbol = &m_symbols[0];
504 if (symbol >= first_symbol && symbol < first_symbol + m_symbols.size())
505 return symbol - first_symbol;
506 }
507 return UINT32_MAX;
508 }
509
510 struct SymbolSortInfo {
511 const bool sort_by_load_addr;
512 const Symbol *symbols;
513 };
514
515 namespace {
516 struct SymbolIndexComparator {
517 const std::vector<Symbol> &symbols;
518 std::vector<lldb::addr_t> &addr_cache;
519
520 // Getting from the symbol to the Address to the File Address involves some
521 // work. Since there are potentially many symbols here, and we're using this
522 // for sorting so we're going to be computing the address many times, cache
523 // that in addr_cache. The array passed in has to be the same size as the
524 // symbols array passed into the member variable symbols, and should be
525 // initialized with LLDB_INVALID_ADDRESS.
526 // NOTE: You have to make addr_cache externally and pass it in because
527 // std::stable_sort
528 // makes copies of the comparator it is initially passed in, and you end up
529 // spending huge amounts of time copying this array...
530
SymbolIndexComparator__anon045043190111::SymbolIndexComparator531 SymbolIndexComparator(const std::vector<Symbol> &s,
532 std::vector<lldb::addr_t> &a)
533 : symbols(s), addr_cache(a) {
534 assert(symbols.size() == addr_cache.size());
535 }
operator ()__anon045043190111::SymbolIndexComparator536 bool operator()(uint32_t index_a, uint32_t index_b) {
537 addr_t value_a = addr_cache[index_a];
538 if (value_a == LLDB_INVALID_ADDRESS) {
539 value_a = symbols[index_a].GetAddressRef().GetFileAddress();
540 addr_cache[index_a] = value_a;
541 }
542
543 addr_t value_b = addr_cache[index_b];
544 if (value_b == LLDB_INVALID_ADDRESS) {
545 value_b = symbols[index_b].GetAddressRef().GetFileAddress();
546 addr_cache[index_b] = value_b;
547 }
548
549 if (value_a == value_b) {
550 // The if the values are equal, use the original symbol user ID
551 lldb::user_id_t uid_a = symbols[index_a].GetID();
552 lldb::user_id_t uid_b = symbols[index_b].GetID();
553 if (uid_a < uid_b)
554 return true;
555 if (uid_a > uid_b)
556 return false;
557 return false;
558 } else if (value_a < value_b)
559 return true;
560
561 return false;
562 }
563 };
564 }
565
SortSymbolIndexesByValue(std::vector<uint32_t> & indexes,bool remove_duplicates) const566 void Symtab::SortSymbolIndexesByValue(std::vector<uint32_t> &indexes,
567 bool remove_duplicates) const {
568 std::lock_guard<std::recursive_mutex> guard(m_mutex);
569
570 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
571 Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION);
572 // No need to sort if we have zero or one items...
573 if (indexes.size() <= 1)
574 return;
575
576 // Sort the indexes in place using std::stable_sort.
577 // NOTE: The use of std::stable_sort instead of llvm::sort here is strictly
578 // for performance, not correctness. The indexes vector tends to be "close"
579 // to sorted, which the stable sort handles better.
580
581 std::vector<lldb::addr_t> addr_cache(m_symbols.size(), LLDB_INVALID_ADDRESS);
582
583 SymbolIndexComparator comparator(m_symbols, addr_cache);
584 std::stable_sort(indexes.begin(), indexes.end(), comparator);
585
586 // Remove any duplicates if requested
587 if (remove_duplicates) {
588 auto last = std::unique(indexes.begin(), indexes.end());
589 indexes.erase(last, indexes.end());
590 }
591 }
592
AppendSymbolIndexesWithName(ConstString symbol_name,std::vector<uint32_t> & indexes)593 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
594 std::vector<uint32_t> &indexes) {
595 std::lock_guard<std::recursive_mutex> guard(m_mutex);
596
597 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
598 Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
599 if (symbol_name) {
600 if (!m_name_indexes_computed)
601 InitNameIndexes();
602
603 return m_name_to_index.GetValues(symbol_name, indexes);
604 }
605 return 0;
606 }
607
AppendSymbolIndexesWithName(ConstString symbol_name,Debug symbol_debug_type,Visibility symbol_visibility,std::vector<uint32_t> & indexes)608 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
609 Debug symbol_debug_type,
610 Visibility symbol_visibility,
611 std::vector<uint32_t> &indexes) {
612 std::lock_guard<std::recursive_mutex> guard(m_mutex);
613
614 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
615 Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
616 if (symbol_name) {
617 const size_t old_size = indexes.size();
618 if (!m_name_indexes_computed)
619 InitNameIndexes();
620
621 std::vector<uint32_t> all_name_indexes;
622 const size_t name_match_count =
623 m_name_to_index.GetValues(symbol_name, all_name_indexes);
624 for (size_t i = 0; i < name_match_count; ++i) {
625 if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type,
626 symbol_visibility))
627 indexes.push_back(all_name_indexes[i]);
628 }
629 return indexes.size() - old_size;
630 }
631 return 0;
632 }
633
634 uint32_t
AppendSymbolIndexesWithNameAndType(ConstString symbol_name,SymbolType symbol_type,std::vector<uint32_t> & indexes)635 Symtab::AppendSymbolIndexesWithNameAndType(ConstString symbol_name,
636 SymbolType symbol_type,
637 std::vector<uint32_t> &indexes) {
638 std::lock_guard<std::recursive_mutex> guard(m_mutex);
639
640 if (AppendSymbolIndexesWithName(symbol_name, indexes) > 0) {
641 std::vector<uint32_t>::iterator pos = indexes.begin();
642 while (pos != indexes.end()) {
643 if (symbol_type == eSymbolTypeAny ||
644 m_symbols[*pos].GetType() == symbol_type)
645 ++pos;
646 else
647 pos = indexes.erase(pos);
648 }
649 }
650 return indexes.size();
651 }
652
AppendSymbolIndexesWithNameAndType(ConstString symbol_name,SymbolType symbol_type,Debug symbol_debug_type,Visibility symbol_visibility,std::vector<uint32_t> & indexes)653 uint32_t Symtab::AppendSymbolIndexesWithNameAndType(
654 ConstString symbol_name, SymbolType symbol_type,
655 Debug symbol_debug_type, Visibility symbol_visibility,
656 std::vector<uint32_t> &indexes) {
657 std::lock_guard<std::recursive_mutex> guard(m_mutex);
658
659 if (AppendSymbolIndexesWithName(symbol_name, symbol_debug_type,
660 symbol_visibility, indexes) > 0) {
661 std::vector<uint32_t>::iterator pos = indexes.begin();
662 while (pos != indexes.end()) {
663 if (symbol_type == eSymbolTypeAny ||
664 m_symbols[*pos].GetType() == symbol_type)
665 ++pos;
666 else
667 pos = indexes.erase(pos);
668 }
669 }
670 return indexes.size();
671 }
672
AppendSymbolIndexesMatchingRegExAndType(const RegularExpression & regexp,SymbolType symbol_type,std::vector<uint32_t> & indexes)673 uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
674 const RegularExpression ®exp, SymbolType symbol_type,
675 std::vector<uint32_t> &indexes) {
676 std::lock_guard<std::recursive_mutex> guard(m_mutex);
677
678 uint32_t prev_size = indexes.size();
679 uint32_t sym_end = m_symbols.size();
680
681 for (uint32_t i = 0; i < sym_end; i++) {
682 if (symbol_type == eSymbolTypeAny ||
683 m_symbols[i].GetType() == symbol_type) {
684 const char *name = m_symbols[i].GetName().AsCString();
685 if (name) {
686 if (regexp.Execute(name))
687 indexes.push_back(i);
688 }
689 }
690 }
691 return indexes.size() - prev_size;
692 }
693
AppendSymbolIndexesMatchingRegExAndType(const RegularExpression & regexp,SymbolType symbol_type,Debug symbol_debug_type,Visibility symbol_visibility,std::vector<uint32_t> & indexes)694 uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
695 const RegularExpression ®exp, SymbolType symbol_type,
696 Debug symbol_debug_type, Visibility symbol_visibility,
697 std::vector<uint32_t> &indexes) {
698 std::lock_guard<std::recursive_mutex> guard(m_mutex);
699
700 uint32_t prev_size = indexes.size();
701 uint32_t sym_end = m_symbols.size();
702
703 for (uint32_t i = 0; i < sym_end; i++) {
704 if (symbol_type == eSymbolTypeAny ||
705 m_symbols[i].GetType() == symbol_type) {
706 if (!CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
707 continue;
708
709 const char *name = m_symbols[i].GetName().AsCString();
710 if (name) {
711 if (regexp.Execute(name))
712 indexes.push_back(i);
713 }
714 }
715 }
716 return indexes.size() - prev_size;
717 }
718
FindSymbolWithType(SymbolType symbol_type,Debug symbol_debug_type,Visibility symbol_visibility,uint32_t & start_idx)719 Symbol *Symtab::FindSymbolWithType(SymbolType symbol_type,
720 Debug symbol_debug_type,
721 Visibility symbol_visibility,
722 uint32_t &start_idx) {
723 std::lock_guard<std::recursive_mutex> guard(m_mutex);
724
725 const size_t count = m_symbols.size();
726 for (size_t idx = start_idx; idx < count; ++idx) {
727 if (symbol_type == eSymbolTypeAny ||
728 m_symbols[idx].GetType() == symbol_type) {
729 if (CheckSymbolAtIndex(idx, symbol_debug_type, symbol_visibility)) {
730 start_idx = idx;
731 return &m_symbols[idx];
732 }
733 }
734 }
735 return nullptr;
736 }
737
738 void
FindAllSymbolsWithNameAndType(ConstString name,SymbolType symbol_type,std::vector<uint32_t> & symbol_indexes)739 Symtab::FindAllSymbolsWithNameAndType(ConstString name,
740 SymbolType symbol_type,
741 std::vector<uint32_t> &symbol_indexes) {
742 std::lock_guard<std::recursive_mutex> guard(m_mutex);
743
744 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
745 Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
746 // Initialize all of the lookup by name indexes before converting NAME to a
747 // uniqued string NAME_STR below.
748 if (!m_name_indexes_computed)
749 InitNameIndexes();
750
751 if (name) {
752 // The string table did have a string that matched, but we need to check
753 // the symbols and match the symbol_type if any was given.
754 AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_indexes);
755 }
756 }
757
FindAllSymbolsWithNameAndType(ConstString name,SymbolType symbol_type,Debug symbol_debug_type,Visibility symbol_visibility,std::vector<uint32_t> & symbol_indexes)758 void Symtab::FindAllSymbolsWithNameAndType(
759 ConstString name, SymbolType symbol_type, Debug symbol_debug_type,
760 Visibility symbol_visibility, std::vector<uint32_t> &symbol_indexes) {
761 std::lock_guard<std::recursive_mutex> guard(m_mutex);
762
763 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
764 Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
765 // Initialize all of the lookup by name indexes before converting NAME to a
766 // uniqued string NAME_STR below.
767 if (!m_name_indexes_computed)
768 InitNameIndexes();
769
770 if (name) {
771 // The string table did have a string that matched, but we need to check
772 // the symbols and match the symbol_type if any was given.
773 AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
774 symbol_visibility, symbol_indexes);
775 }
776 }
777
FindAllSymbolsMatchingRexExAndType(const RegularExpression & regex,SymbolType symbol_type,Debug symbol_debug_type,Visibility symbol_visibility,std::vector<uint32_t> & symbol_indexes)778 void Symtab::FindAllSymbolsMatchingRexExAndType(
779 const RegularExpression ®ex, SymbolType symbol_type,
780 Debug symbol_debug_type, Visibility symbol_visibility,
781 std::vector<uint32_t> &symbol_indexes) {
782 std::lock_guard<std::recursive_mutex> guard(m_mutex);
783
784 AppendSymbolIndexesMatchingRegExAndType(regex, symbol_type, symbol_debug_type,
785 symbol_visibility, symbol_indexes);
786 }
787
FindFirstSymbolWithNameAndType(ConstString name,SymbolType symbol_type,Debug symbol_debug_type,Visibility symbol_visibility)788 Symbol *Symtab::FindFirstSymbolWithNameAndType(ConstString name,
789 SymbolType symbol_type,
790 Debug symbol_debug_type,
791 Visibility symbol_visibility) {
792 std::lock_guard<std::recursive_mutex> guard(m_mutex);
793
794 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
795 Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
796 if (!m_name_indexes_computed)
797 InitNameIndexes();
798
799 if (name) {
800 std::vector<uint32_t> matching_indexes;
801 // The string table did have a string that matched, but we need to check
802 // the symbols and match the symbol_type if any was given.
803 if (AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
804 symbol_visibility,
805 matching_indexes)) {
806 std::vector<uint32_t>::const_iterator pos, end = matching_indexes.end();
807 for (pos = matching_indexes.begin(); pos != end; ++pos) {
808 Symbol *symbol = SymbolAtIndex(*pos);
809
810 if (symbol->Compare(name, symbol_type))
811 return symbol;
812 }
813 }
814 }
815 return nullptr;
816 }
817
818 typedef struct {
819 const Symtab *symtab;
820 const addr_t file_addr;
821 Symbol *match_symbol;
822 const uint32_t *match_index_ptr;
823 addr_t match_offset;
824 } SymbolSearchInfo;
825
826 // Add all the section file start address & size to the RangeVector, recusively
827 // adding any children sections.
AddSectionsToRangeMap(SectionList * sectlist,RangeVector<addr_t,addr_t> & section_ranges)828 static void AddSectionsToRangeMap(SectionList *sectlist,
829 RangeVector<addr_t, addr_t> §ion_ranges) {
830 const int num_sections = sectlist->GetNumSections(0);
831 for (int i = 0; i < num_sections; i++) {
832 SectionSP sect_sp = sectlist->GetSectionAtIndex(i);
833 if (sect_sp) {
834 SectionList &child_sectlist = sect_sp->GetChildren();
835
836 // If this section has children, add the children to the RangeVector.
837 // Else add this section to the RangeVector.
838 if (child_sectlist.GetNumSections(0) > 0) {
839 AddSectionsToRangeMap(&child_sectlist, section_ranges);
840 } else {
841 size_t size = sect_sp->GetByteSize();
842 if (size > 0) {
843 addr_t base_addr = sect_sp->GetFileAddress();
844 RangeVector<addr_t, addr_t>::Entry entry;
845 entry.SetRangeBase(base_addr);
846 entry.SetByteSize(size);
847 section_ranges.Append(entry);
848 }
849 }
850 }
851 }
852 }
853
InitAddressIndexes()854 void Symtab::InitAddressIndexes() {
855 // Protected function, no need to lock mutex...
856 if (!m_file_addr_to_index_computed && !m_symbols.empty()) {
857 m_file_addr_to_index_computed = true;
858
859 FileRangeToIndexMap::Entry entry;
860 const_iterator begin = m_symbols.begin();
861 const_iterator end = m_symbols.end();
862 for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
863 if (pos->ValueIsAddress()) {
864 entry.SetRangeBase(pos->GetAddressRef().GetFileAddress());
865 entry.SetByteSize(pos->GetByteSize());
866 entry.data = std::distance(begin, pos);
867 m_file_addr_to_index.Append(entry);
868 }
869 }
870 const size_t num_entries = m_file_addr_to_index.GetSize();
871 if (num_entries > 0) {
872 m_file_addr_to_index.Sort();
873
874 // Create a RangeVector with the start & size of all the sections for
875 // this objfile. We'll need to check this for any FileRangeToIndexMap
876 // entries with an uninitialized size, which could potentially be a large
877 // number so reconstituting the weak pointer is busywork when it is
878 // invariant information.
879 SectionList *sectlist = m_objfile->GetSectionList();
880 RangeVector<addr_t, addr_t> section_ranges;
881 if (sectlist) {
882 AddSectionsToRangeMap(sectlist, section_ranges);
883 section_ranges.Sort();
884 }
885
886 // Iterate through the FileRangeToIndexMap and fill in the size for any
887 // entries that didn't already have a size from the Symbol (e.g. if we
888 // have a plain linker symbol with an address only, instead of debug info
889 // where we get an address and a size and a type, etc.)
890 for (size_t i = 0; i < num_entries; i++) {
891 FileRangeToIndexMap::Entry *entry =
892 m_file_addr_to_index.GetMutableEntryAtIndex(i);
893 if (entry->GetByteSize() == 0) {
894 addr_t curr_base_addr = entry->GetRangeBase();
895 const RangeVector<addr_t, addr_t>::Entry *containing_section =
896 section_ranges.FindEntryThatContains(curr_base_addr);
897
898 // Use the end of the section as the default max size of the symbol
899 addr_t sym_size = 0;
900 if (containing_section) {
901 sym_size =
902 containing_section->GetByteSize() -
903 (entry->GetRangeBase() - containing_section->GetRangeBase());
904 }
905
906 for (size_t j = i; j < num_entries; j++) {
907 FileRangeToIndexMap::Entry *next_entry =
908 m_file_addr_to_index.GetMutableEntryAtIndex(j);
909 addr_t next_base_addr = next_entry->GetRangeBase();
910 if (next_base_addr > curr_base_addr) {
911 addr_t size_to_next_symbol = next_base_addr - curr_base_addr;
912
913 // Take the difference between this symbol and the next one as
914 // its size, if it is less than the size of the section.
915 if (sym_size == 0 || size_to_next_symbol < sym_size) {
916 sym_size = size_to_next_symbol;
917 }
918 break;
919 }
920 }
921
922 if (sym_size > 0) {
923 entry->SetByteSize(sym_size);
924 Symbol &symbol = m_symbols[entry->data];
925 symbol.SetByteSize(sym_size);
926 symbol.SetSizeIsSynthesized(true);
927 }
928 }
929 }
930
931 // Sort again in case the range size changes the ordering
932 m_file_addr_to_index.Sort();
933 }
934 }
935 }
936
CalculateSymbolSizes()937 void Symtab::CalculateSymbolSizes() {
938 std::lock_guard<std::recursive_mutex> guard(m_mutex);
939 // Size computation happens inside InitAddressIndexes.
940 InitAddressIndexes();
941 }
942
FindSymbolAtFileAddress(addr_t file_addr)943 Symbol *Symtab::FindSymbolAtFileAddress(addr_t file_addr) {
944 std::lock_guard<std::recursive_mutex> guard(m_mutex);
945 if (!m_file_addr_to_index_computed)
946 InitAddressIndexes();
947
948 const FileRangeToIndexMap::Entry *entry =
949 m_file_addr_to_index.FindEntryStartsAt(file_addr);
950 if (entry) {
951 Symbol *symbol = SymbolAtIndex(entry->data);
952 if (symbol->GetFileAddress() == file_addr)
953 return symbol;
954 }
955 return nullptr;
956 }
957
FindSymbolContainingFileAddress(addr_t file_addr)958 Symbol *Symtab::FindSymbolContainingFileAddress(addr_t file_addr) {
959 std::lock_guard<std::recursive_mutex> guard(m_mutex);
960
961 if (!m_file_addr_to_index_computed)
962 InitAddressIndexes();
963
964 const FileRangeToIndexMap::Entry *entry =
965 m_file_addr_to_index.FindEntryThatContains(file_addr);
966 if (entry) {
967 Symbol *symbol = SymbolAtIndex(entry->data);
968 if (symbol->ContainsFileAddress(file_addr))
969 return symbol;
970 }
971 return nullptr;
972 }
973
ForEachSymbolContainingFileAddress(addr_t file_addr,std::function<bool (Symbol *)> const & callback)974 void Symtab::ForEachSymbolContainingFileAddress(
975 addr_t file_addr, std::function<bool(Symbol *)> const &callback) {
976 std::lock_guard<std::recursive_mutex> guard(m_mutex);
977
978 if (!m_file_addr_to_index_computed)
979 InitAddressIndexes();
980
981 std::vector<uint32_t> all_addr_indexes;
982
983 // Get all symbols with file_addr
984 const size_t addr_match_count =
985 m_file_addr_to_index.FindEntryIndexesThatContain(file_addr,
986 all_addr_indexes);
987
988 for (size_t i = 0; i < addr_match_count; ++i) {
989 Symbol *symbol = SymbolAtIndex(all_addr_indexes[i]);
990 if (symbol->ContainsFileAddress(file_addr)) {
991 if (!callback(symbol))
992 break;
993 }
994 }
995 }
996
SymbolIndicesToSymbolContextList(std::vector<uint32_t> & symbol_indexes,SymbolContextList & sc_list)997 void Symtab::SymbolIndicesToSymbolContextList(
998 std::vector<uint32_t> &symbol_indexes, SymbolContextList &sc_list) {
999 // No need to protect this call using m_mutex all other method calls are
1000 // already thread safe.
1001
1002 const bool merge_symbol_into_function = true;
1003 size_t num_indices = symbol_indexes.size();
1004 if (num_indices > 0) {
1005 SymbolContext sc;
1006 sc.module_sp = m_objfile->GetModule();
1007 for (size_t i = 0; i < num_indices; i++) {
1008 sc.symbol = SymbolAtIndex(symbol_indexes[i]);
1009 if (sc.symbol)
1010 sc_list.AppendIfUnique(sc, merge_symbol_into_function);
1011 }
1012 }
1013 }
1014
FindFunctionSymbols(ConstString name,uint32_t name_type_mask,SymbolContextList & sc_list)1015 void Symtab::FindFunctionSymbols(ConstString name, uint32_t name_type_mask,
1016 SymbolContextList &sc_list) {
1017 std::vector<uint32_t> symbol_indexes;
1018
1019 // eFunctionNameTypeAuto should be pre-resolved by a call to
1020 // Module::LookupInfo::LookupInfo()
1021 assert((name_type_mask & eFunctionNameTypeAuto) == 0);
1022
1023 if (name_type_mask & (eFunctionNameTypeBase | eFunctionNameTypeFull)) {
1024 std::vector<uint32_t> temp_symbol_indexes;
1025 FindAllSymbolsWithNameAndType(name, eSymbolTypeAny, temp_symbol_indexes);
1026
1027 unsigned temp_symbol_indexes_size = temp_symbol_indexes.size();
1028 if (temp_symbol_indexes_size > 0) {
1029 std::lock_guard<std::recursive_mutex> guard(m_mutex);
1030 for (unsigned i = 0; i < temp_symbol_indexes_size; i++) {
1031 SymbolContext sym_ctx;
1032 sym_ctx.symbol = SymbolAtIndex(temp_symbol_indexes[i]);
1033 if (sym_ctx.symbol) {
1034 switch (sym_ctx.symbol->GetType()) {
1035 case eSymbolTypeCode:
1036 case eSymbolTypeResolver:
1037 case eSymbolTypeReExported:
1038 symbol_indexes.push_back(temp_symbol_indexes[i]);
1039 break;
1040 default:
1041 break;
1042 }
1043 }
1044 }
1045 }
1046 }
1047
1048 if (name_type_mask & eFunctionNameTypeBase) {
1049 // From mangled names we can't tell what is a basename and what is a method
1050 // name, so we just treat them the same
1051 if (!m_name_indexes_computed)
1052 InitNameIndexes();
1053
1054 if (!m_basename_to_index.IsEmpty()) {
1055 const UniqueCStringMap<uint32_t>::Entry *match;
1056 for (match = m_basename_to_index.FindFirstValueForName(name);
1057 match != nullptr;
1058 match = m_basename_to_index.FindNextValueForName(match)) {
1059 symbol_indexes.push_back(match->value);
1060 }
1061 }
1062 }
1063
1064 if (name_type_mask & eFunctionNameTypeMethod) {
1065 if (!m_name_indexes_computed)
1066 InitNameIndexes();
1067
1068 if (!m_method_to_index.IsEmpty()) {
1069 const UniqueCStringMap<uint32_t>::Entry *match;
1070 for (match = m_method_to_index.FindFirstValueForName(name);
1071 match != nullptr;
1072 match = m_method_to_index.FindNextValueForName(match)) {
1073 symbol_indexes.push_back(match->value);
1074 }
1075 }
1076 }
1077
1078 if (name_type_mask & eFunctionNameTypeSelector) {
1079 if (!m_name_indexes_computed)
1080 InitNameIndexes();
1081
1082 if (!m_selector_to_index.IsEmpty()) {
1083 const UniqueCStringMap<uint32_t>::Entry *match;
1084 for (match = m_selector_to_index.FindFirstValueForName(name);
1085 match != nullptr;
1086 match = m_selector_to_index.FindNextValueForName(match)) {
1087 symbol_indexes.push_back(match->value);
1088 }
1089 }
1090 }
1091
1092 if (!symbol_indexes.empty()) {
1093 llvm::sort(symbol_indexes.begin(), symbol_indexes.end());
1094 symbol_indexes.erase(
1095 std::unique(symbol_indexes.begin(), symbol_indexes.end()),
1096 symbol_indexes.end());
1097 SymbolIndicesToSymbolContextList(symbol_indexes, sc_list);
1098 }
1099 }
1100
GetParent(Symbol * child_symbol) const1101 const Symbol *Symtab::GetParent(Symbol *child_symbol) const {
1102 uint32_t child_idx = GetIndexForSymbol(child_symbol);
1103 if (child_idx != UINT32_MAX && child_idx > 0) {
1104 for (uint32_t idx = child_idx - 1; idx != UINT32_MAX; --idx) {
1105 const Symbol *symbol = SymbolAtIndex(idx);
1106 const uint32_t sibling_idx = symbol->GetSiblingIndex();
1107 if (sibling_idx != UINT32_MAX && sibling_idx > child_idx)
1108 return symbol;
1109 }
1110 }
1111 return nullptr;
1112 }
1113