1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2020 Red Hat, Inc.
5 // Copyright (C) 2020 Google, Inc.
6 //
7 // Author: Matthias Maennich
8
9 /// @file
10 ///
11 /// This contains the definition of the symtab reader
12
13 #include <algorithm>
14 #include <iostream>
15 #include <unordered_map>
16 #include <unordered_set>
17
18 #include "abg-elf-helpers.h"
19 #include "abg-fwd.h"
20 #include "abg-internal.h"
21 #include "abg-tools-utils.h"
22
23 // Though this is an internal header, we need to export the symbols to be able
24 // to test this code. TODO: find a way to export symbols just for unit tests.
25 ABG_BEGIN_EXPORT_DECLARATIONS
26 #include "abg-symtab-reader.h"
27 ABG_END_EXPORT_DECLARATIONS
28
29 namespace abigail
30 {
31
32 namespace symtab_reader
33 {
34
35 /// symtab_filter implementations
36
37 /// Determine whether a symbol is matching the filter criteria of this filter
38 /// object. In terms of a filter functionality, you would _not_ filter out
39 /// this symbol if it passes this (i.e. returns true).
40 ///
41 /// @param symbol The Elf symbol under test.
42 ///
43 /// @return whether the symbol matches all relevant / required criteria
44 bool
matches(const elf_symbol & symbol) const45 symtab_filter::matches(const elf_symbol& symbol) const
46 {
47 if (functions_ && *functions_ != symbol.is_function())
48 return false;
49 if (variables_ && *variables_ != symbol.is_variable())
50 return false;
51 if (public_symbols_ && *public_symbols_ != symbol.is_public())
52 return false;
53 if (undefined_symbols_ && *undefined_symbols_ == symbol.is_defined())
54 return false;
55 if (kernel_symbols_ && *kernel_symbols_ != symbol.is_in_ksymtab())
56 return false;
57
58 return true;
59 }
60
61 /// symtab implementations
62
63 /// Obtain a suitable default filter for iterating this symtab object.
64 ///
65 /// The symtab_filter obtained is populated with some sensible default
66 /// settings, such as public_symbols(true) and kernel_symbols(true) if the
67 /// binary has been identified as Linux Kernel binary.
68 ///
69 /// @return a symtab_filter with sensible populated defaults
70 symtab_filter
make_filter() const71 symtab::make_filter() const
72 {
73 symtab_filter filter;
74 filter.set_public_symbols();
75 if (is_kernel_binary_)
76 filter.set_kernel_symbols();
77 return filter;
78 }
79
80 /// Get a vector of symbols that are associated with a certain name
81 ///
82 /// @param name the name the symbols need to match
83 ///
84 /// @return a vector of symbols, empty if no matching symbols have been found
85 const elf_symbols&
lookup_symbol(const std::string & name) const86 symtab::lookup_symbol(const std::string& name) const
87 {
88 static const elf_symbols empty_result;
89 const auto it = name_symbol_map_.find(name);
90 if (it != name_symbol_map_.end())
91 return it->second;
92 return empty_result;
93 }
94
95 /// Lookup a symbol by its address
96 ///
97 /// @param symbol_addr the starting address of the symbol
98 ///
99 /// @return a symbol if found, else an empty sptr
100 const elf_symbol_sptr&
lookup_symbol(GElf_Addr symbol_addr) const101 symtab::lookup_symbol(GElf_Addr symbol_addr) const
102 {
103 static const elf_symbol_sptr empty_result;
104 const auto addr_it = addr_symbol_map_.find(symbol_addr);
105 if (addr_it != addr_symbol_map_.end())
106 return addr_it->second;
107 else
108 {
109 // check for a potential entry address mapping instead,
110 // relevant for ppc ELFv1 binaries
111 const auto entry_it = entry_addr_symbol_map_.find(symbol_addr);
112 if (entry_it != entry_addr_symbol_map_.end())
113 return entry_it->second;
114 }
115 return empty_result;
116 }
117
118 /// A symbol sorting functor.
119 static struct
120 {
121 bool
operator ()abigail::symtab_reader::__anon00697f170108122 operator()(const elf_symbol_sptr& left, const elf_symbol_sptr& right)
123 {return left->get_id_string() < right->get_id_string();}
124 } symbol_sort;
125
126 /// Construct a symtab object and instantiate it from an ELF
127 /// handle. Also pass in the ir::environment we are living in. If
128 /// specified, the symbol_predicate will be respected when creating
129 /// the full vector of symbols.
130 ///
131 /// @param elf_handle the elf handle to load the symbol table from
132 ///
133 /// @param env the environment we are operating in
134 ///
135 /// @param is_suppressed a predicate function to determine if a symbol should
136 /// be suppressed
137 ///
138 /// @return a smart pointer handle to symtab, set to nullptr if the load was
139 /// not completed
140 symtab_ptr
load(Elf * elf_handle,ir::environment * env,symbol_predicate is_suppressed)141 symtab::load(Elf* elf_handle,
142 ir::environment* env,
143 symbol_predicate is_suppressed)
144 {
145 ABG_ASSERT(elf_handle);
146 ABG_ASSERT(env);
147
148 symtab_ptr result(new symtab);
149 if (!result->load_(elf_handle, env, is_suppressed))
150 return {};
151
152 return result;
153 }
154
155 /// Construct a symtab object from existing name->symbol lookup maps.
156 /// They were possibly read from a different representation (XML maybe).
157 ///
158 /// @param function_symbol_map a map from ELF function name to elf_symbol
159 ///
160 /// @param variable_symbol_map a map from ELF variable name to elf_symbol
161 ///
162 /// @return a smart pointer handle to symtab, set to nullptr if the load was
163 /// not completed
164 symtab_ptr
load(string_elf_symbols_map_sptr function_symbol_map,string_elf_symbols_map_sptr variables_symbol_map)165 symtab::load(string_elf_symbols_map_sptr function_symbol_map,
166 string_elf_symbols_map_sptr variables_symbol_map)
167 {
168 symtab_ptr result(new symtab);
169 if (!result->load_(function_symbol_map, variables_symbol_map))
170 return {};
171
172 return result;
173 }
174
175 /// Default constructor of the @ref symtab type.
symtab()176 symtab::symtab()
177 : is_kernel_binary_(false), has_ksymtab_entries_(false)
178 {}
179
180 /// Load the symtab representation from an Elf binary presented to us by an
181 /// Elf* handle.
182 ///
183 /// This method iterates over the entries of .symtab and collects all
184 /// interesting symbols (functions and variables).
185 ///
186 /// In case of a Linux Kernel binary, it also collects information about the
187 /// symbols exported via EXPORT_SYMBOL in the Kernel that would then end up
188 /// having a corresponding __ksymtab entry.
189 ///
190 /// Symbols that are suppressed will be omitted from the symbols_ vector, but
191 /// still be discoverable through the name->symbol and addr->symbol lookup
192 /// maps.
193 ///
194 /// @param elf_handle the elf handle to load the symbol table from
195 ///
196 /// @param env the environment we are operating in
197 ///
198 /// @param is_suppressed a predicate function to determine if a symbol should
199 /// be suppressed
200 ///
201 /// @return true if the load succeeded
202 bool
load_(Elf * elf_handle,ir::environment * env,symbol_predicate is_suppressed)203 symtab::load_(Elf* elf_handle,
204 ir::environment* env,
205 symbol_predicate is_suppressed)
206 {
207 GElf_Ehdr ehdr_mem;
208 GElf_Ehdr* header = gelf_getehdr(elf_handle, &ehdr_mem);
209 if (!header)
210 {
211 std::cerr << "Could not get ELF header: Skipping symtab load.\n";
212 return false;
213 }
214
215 Elf_Scn* symtab_section = elf_helpers::find_symbol_table_section(elf_handle);
216 if (!symtab_section)
217 {
218 std::cerr << "No symbol table found: Skipping symtab load.\n";
219 return false;
220 }
221
222 GElf_Shdr symtab_sheader;
223 gelf_getshdr(symtab_section, &symtab_sheader);
224
225 // check for bogus section header
226 if (symtab_sheader.sh_entsize == 0)
227 {
228 std::cerr << "Invalid symtab header found: Skipping symtab load.\n";
229 return false;
230 }
231
232 const size_t number_syms =
233 symtab_sheader.sh_size / symtab_sheader.sh_entsize;
234
235 Elf_Data* symtab = elf_getdata(symtab_section, 0);
236 if (!symtab)
237 {
238 std::cerr << "Could not load elf symtab: Skipping symtab load.\n";
239 return false;
240 }
241
242 // The __kstrtab_strings sections is basically an ELF strtab but does not
243 // support elf_strptr lookups. A single call to elf_getdata gives a handle to
244 // washed section data.
245 //
246 // The value of a __kstrtabns_FOO (or other similar) symbol is an address
247 // within the __kstrtab_strings section. To look up the string value, we need
248 // to translate from vmlinux load address to section offset by subtracting the
249 // base address of the section. This adjustment is not needed for loadable
250 // modules which are relocatable and so identifiable by ELF type ET_REL.
251 Elf_Scn* strings_section = elf_helpers::find_ksymtab_strings_section(elf_handle);
252 size_t strings_offset = 0;
253 const char* strings_data = nullptr;
254 size_t strings_size = 0;
255 if (strings_section)
256 {
257 GElf_Shdr strings_sheader;
258 gelf_getshdr(strings_section, &strings_sheader);
259 strings_offset = header->e_type == ET_REL ? 0 : strings_sheader.sh_addr;
260 Elf_Data* data = elf_getdata(strings_section, nullptr);
261 ABG_ASSERT(data->d_off == 0);
262 strings_data = reinterpret_cast<const char *>(data->d_buf);
263 strings_size = data->d_size;
264 }
265
266 const bool is_kernel = elf_helpers::is_linux_kernel(elf_handle);
267 std::unordered_set<std::string> exported_kernel_symbols;
268 std::unordered_map<std::string, uint32_t> crc_values;
269 std::unordered_map<std::string, std::string> namespaces;
270
271 for (size_t i = 0; i < number_syms; ++i)
272 {
273 GElf_Sym *sym, sym_mem;
274 sym = gelf_getsym(symtab, i, &sym_mem);
275 if (!sym)
276 {
277 std::cerr << "Could not load symbol with index " << i
278 << ": Skipping symtab load.\n";
279 return false;
280 }
281
282 const char* const name_str =
283 elf_strptr(elf_handle, symtab_sheader.sh_link, sym->st_name);
284
285 // no name, no game
286 if (!name_str)
287 continue;
288
289 const std::string name = name_str;
290 if (name.empty())
291 continue;
292
293 // Handle ksymtab entries. Every symbol entry that starts with __ksymtab_
294 // indicates that the symbol in question is exported through ksymtab. We
295 // do not know whether this is ksymtab_gpl or ksymtab, but that is good
296 // enough for now.
297 //
298 // We could follow up with this entry:
299 //
300 // symbol_value -> ksymtab_entry in either ksymtab_gpl or ksymtab
301 // -> addr/name/namespace (in case of PREL32: offset)
302 //
303 // That way we could also detect ksymtab<>ksymtab_gpl changes or changes
304 // of the symbol namespace.
305 //
306 // As of now this lookup is fragile, as occasionally ksymtabs are empty
307 // (seen so far for kernel modules and LTO builds). Hence we stick to the
308 // fairly safe assumption that ksymtab exported entries are having an
309 // appearence as __ksymtab_<symbol> in the symtab.
310 if (is_kernel && name.rfind("__ksymtab_", 0) == 0)
311 {
312 ABG_ASSERT(exported_kernel_symbols.insert(name.substr(10)).second);
313 continue;
314 }
315 if (is_kernel && name.rfind("__crc_", 0) == 0)
316 {
317 uint32_t crc_value;
318 ABG_ASSERT(elf_helpers::get_crc_for_symbol(elf_handle,
319 sym, crc_value));
320 ABG_ASSERT(crc_values.emplace(name.substr(6), crc_value).second);
321 continue;
322 }
323 if (strings_section && is_kernel && name.rfind("__kstrtabns_", 0) == 0)
324 {
325 // This symbol lives in the __ksymtab_strings section but st_value may
326 // be a vmlinux load address so we need to subtract the offset before
327 // looking it up in that section.
328 const size_t value = sym->st_value;
329 const size_t offset = value - strings_offset;
330 // check offset
331 ABG_ASSERT(offset < strings_size);
332 // find the terminating NULL
333 const char* first = strings_data + offset;
334 const char* last = strings_data + strings_size;
335 const char* limit = std::find(first, last, 0);
336 // check NULL found
337 ABG_ASSERT(limit < last);
338 // interpret the empty namespace name as no namespace name
339 if (first < limit)
340 ABG_ASSERT(namespaces.emplace(
341 name.substr(12), std::string(first, limit - first)).second);
342 continue;
343 }
344
345 // filter out uninteresting entries and only keep functions/variables for
346 // now. The rest might be interesting in the future though.
347 const int sym_type = GELF_ST_TYPE(sym->st_info);
348 if (!(sym_type == STT_FUNC
349 || sym_type == STT_GNU_IFUNC
350 // If the symbol is for an OBJECT, the index of the
351 // section it refers to cannot be absolute.
352 // Otherwise that OBJECT is not a variable.
353 || (sym_type == STT_OBJECT && sym->st_shndx != SHN_ABS)
354 || sym_type == STT_TLS))
355 continue;
356
357 const bool sym_is_defined = sym->st_shndx != SHN_UNDEF;
358 // this occurs in relocatable files.
359 const bool sym_is_common = sym->st_shndx == SHN_COMMON;
360
361 elf_symbol::version ver;
362 elf_helpers::get_version_for_symbol(elf_handle, i, sym_is_defined, ver);
363
364 const elf_symbol_sptr& symbol_sptr =
365 elf_symbol::create
366 (env, i, sym->st_size, name,
367 elf_helpers::stt_to_elf_symbol_type(GELF_ST_TYPE(sym->st_info)),
368 elf_helpers::stb_to_elf_symbol_binding(GELF_ST_BIND(sym->st_info)),
369 sym_is_defined, sym_is_common, ver,
370 elf_helpers::stv_to_elf_symbol_visibility
371 (GELF_ST_VISIBILITY(sym->st_other)));
372
373 // We do not take suppressed symbols into our symbol vector to avoid
374 // accidental leakage. But we ensure supressed symbols are otherwise set
375 // up for lookup.
376 if (!(is_suppressed && is_suppressed(symbol_sptr)))
377 // add to the symbol vector
378 symbols_.push_back(symbol_sptr);
379 else
380 symbol_sptr->set_is_suppressed(true);
381
382 // add to the name->symbol lookup
383 name_symbol_map_[name].push_back(symbol_sptr);
384
385 // add to the addr->symbol lookup
386 if (symbol_sptr->is_common_symbol())
387 {
388 const auto it = name_symbol_map_.find(name);
389 ABG_ASSERT(it != name_symbol_map_.end());
390 const elf_symbols& common_sym_instances = it->second;
391 ABG_ASSERT(!common_sym_instances.empty());
392 if (common_sym_instances.size() > 1)
393 {
394 elf_symbol_sptr main_common_sym = common_sym_instances[0];
395 ABG_ASSERT(main_common_sym->get_name() == name);
396 ABG_ASSERT(main_common_sym->is_common_symbol());
397 ABG_ASSERT(symbol_sptr.get() != main_common_sym.get());
398 main_common_sym->add_common_instance(symbol_sptr);
399 }
400 }
401 else if (symbol_sptr->is_defined())
402 setup_symbol_lookup_tables(elf_handle, sym, symbol_sptr);
403 }
404
405 add_alternative_address_lookups(elf_handle);
406
407 is_kernel_binary_ = elf_helpers::is_linux_kernel(elf_handle);
408
409 // Now apply the ksymtab_exported attribute to the symbols we collected.
410 for (const auto& symbol : exported_kernel_symbols)
411 {
412 const auto r = name_symbol_map_.find(symbol);
413 if (r == name_symbol_map_.end())
414 continue;
415
416 for (const auto& elf_symbol : r->second)
417 if (elf_symbol->is_public())
418 elf_symbol->set_is_in_ksymtab(true);
419 has_ksymtab_entries_ = true;
420 }
421
422 // Now add the CRC values
423 for (const auto& crc_entry : crc_values)
424 {
425 const auto r = name_symbol_map_.find(crc_entry.first);
426 if (r == name_symbol_map_.end())
427 continue;
428
429 for (const auto& symbol : r->second)
430 symbol->set_crc(crc_entry.second);
431 }
432
433 // Now add the namespaces
434 for (const auto& namespace_entry : namespaces)
435 {
436 const auto r = name_symbol_map_.find(namespace_entry.first);
437 if (r == name_symbol_map_.end())
438 continue;
439
440 for (const auto& symbol : r->second)
441 symbol->set_namespace(namespace_entry.second);
442 }
443
444 // sort the symbols for deterministic output
445 std::sort(symbols_.begin(), symbols_.end(), symbol_sort);
446
447 return true;
448 }
449
450 /// Load the symtab representation from a function/variable lookup map pair.
451 ///
452 /// This method assumes the lookup maps are correct and sets up the data
453 /// vector as well as the name->symbol lookup map. The addr->symbol lookup
454 /// map cannot be set up in this case.
455 ///
456 /// @param function_symbol_map a map from ELF function name to elf_symbol
457 ///
458 /// @param variable_symbol_map a map from ELF variable name to elf_symbol
459 ///
460 /// @return true if the load succeeded
461 bool
load_(string_elf_symbols_map_sptr function_symbol_map,string_elf_symbols_map_sptr variables_symbol_map)462 symtab::load_(string_elf_symbols_map_sptr function_symbol_map,
463 string_elf_symbols_map_sptr variables_symbol_map)
464
465 {
466 if (function_symbol_map)
467 for (const auto& symbol_map_entry : *function_symbol_map)
468 {
469 for (const auto& symbol : symbol_map_entry.second)
470 {
471 if (!symbol->is_suppressed())
472 symbols_.push_back(symbol);
473 }
474 ABG_ASSERT(name_symbol_map_.insert(symbol_map_entry).second);
475 }
476
477 if (variables_symbol_map)
478 for (const auto& symbol_map_entry : *variables_symbol_map)
479 {
480 for (const auto& symbol : symbol_map_entry.second)
481 {
482 if (!symbol->is_suppressed())
483 symbols_.push_back(symbol);
484 }
485 ABG_ASSERT(name_symbol_map_.insert(symbol_map_entry).second);
486 }
487
488 // sort the symbols for deterministic output
489 std::sort(symbols_.begin(), symbols_.end(), symbol_sort);
490
491 return true;
492 }
493
494 /// Notify the symtab about the name of the main symbol at a given address.
495 ///
496 /// From just alone the symtab we can't guess the main symbol of a bunch of
497 /// aliased symbols that all point to the same address. During processing of
498 /// additional information (such as DWARF), this information becomes apparent
499 /// and we can adjust the addr->symbol lookup map as well as the alias
500 /// reference of the symbol objects.
501 ///
502 /// @param addr the addr that we are updating the main symbol for
503 /// @param name the name of the main symbol
504 void
update_main_symbol(GElf_Addr addr,const std::string & name)505 symtab::update_main_symbol(GElf_Addr addr, const std::string& name)
506 {
507 // get one symbol (i.e. the current main symbol)
508 elf_symbol_sptr symbol = lookup_symbol(addr);
509
510 // The caller might not know whether the addr is associated to an ELF symbol
511 // that we care about. E.g. the addr could be associated to an ELF symbol,
512 // but not one in .dynsym when looking at a DSO. Hence, early exit if the
513 // lookup failed.
514 if (!symbol)
515 return;
516
517 // determine the new main symbol by attempting an update
518 elf_symbol_sptr new_main = symbol->update_main_symbol(name);
519
520 // also update the default symbol we return when looked up by address
521 if (new_main)
522 addr_symbol_map_[addr] = new_main;
523 }
524
525 /// Various adjustments and bookkeeping may be needed to provide a correct
526 /// interpretation (one that matches DWARF addresses) of raw symbol values.
527 ///
528 /// This is a sub-routine for symtab::load_and
529 /// symtab::add_alternative_address_lookups and must be called only
530 /// once (per symbol) during the execution of the former.
531 ///
532 /// @param elf_handle the ELF handle
533 ///
534 /// @param elf_symbol the ELF symbol
535 ///
536 /// @param symbol_sptr the libabigail symbol
537 ///
538 /// @return a possibly-adjusted symbol value
539 GElf_Addr
setup_symbol_lookup_tables(Elf * elf_handle,GElf_Sym * elf_symbol,const elf_symbol_sptr & symbol_sptr)540 symtab::setup_symbol_lookup_tables(Elf* elf_handle,
541 GElf_Sym* elf_symbol,
542 const elf_symbol_sptr& symbol_sptr)
543 {
544 const bool is_arm32 = elf_helpers::architecture_is_arm32(elf_handle);
545 const bool is_arm64 = elf_helpers::architecture_is_arm64(elf_handle);
546 const bool is_ppc64 = elf_helpers::architecture_is_ppc64(elf_handle);
547 const bool is_ppc32 = elf_helpers::architecture_is_ppc32(elf_handle);
548
549 GElf_Addr symbol_value =
550 elf_helpers::maybe_adjust_et_rel_sym_addr_to_abs_addr(elf_handle,
551 elf_symbol);
552
553 if (is_arm32 && symbol_sptr->is_function())
554 // Clear bit zero of ARM32 addresses as per "ELF for the Arm
555 // Architecture" section 5.5.3.
556 // https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
557 symbol_value &= ~1;
558
559 if (is_arm64)
560 // Copy bit 55 over bits 56 to 63 which may be tag information.
561 symbol_value = symbol_value & (1ULL<<55)
562 ? symbol_value | (0xffULL<<56)
563 : symbol_value &~ (0xffULL<<56);
564
565 if (symbol_sptr->is_defined())
566 {
567 const auto result =
568 addr_symbol_map_.emplace(symbol_value, symbol_sptr);
569 if (!result.second)
570 // A symbol with the same address already exists. This
571 // means this symbol is an alias of the main symbol with
572 // that address. So let's register this new alias as such.
573 result.first->second->get_main_symbol()->add_alias(symbol_sptr);
574 }
575
576 // Please note that update_function_entry_address_symbol_map depends
577 // on the symbol aliases been setup. This is why, the
578 // elf_symbol::add_alias call is done above BEFORE this point.
579 if ((is_ppc64 || is_ppc32) && symbol_sptr->is_function())
580 update_function_entry_address_symbol_map(elf_handle, elf_symbol,
581 symbol_sptr);
582
583 return symbol_value;
584 }
585
586 /// Update the function entry symbol map to later allow lookups of this symbol
587 /// by entry address as well. This is relevant for ppc64 ELFv1 binaries.
588 ///
589 /// For ppc64 ELFv1 binaries, we need to build a function entry point address
590 /// -> function symbol map. This is in addition to the function pointer ->
591 /// symbol map. This is because on ppc64 ELFv1, a function pointer is
592 /// different from a function entry point address.
593 ///
594 /// On ppc64 ELFv1, the DWARF DIE of a function references the address of the
595 /// entry point of the function symbol; whereas the value of the function
596 /// symbol is the function pointer. As these addresses are different, if I we
597 /// want to get to the symbol of a function from its entry point address (as
598 /// referenced by DWARF function DIEs) we must have the two maps I mentionned
599 /// right above.
600 ///
601 /// In other words, we need a map that associates a function entry point
602 /// address with the symbol of that function, to be able to get the function
603 /// symbol that corresponds to a given function DIE, on ppc64.
604 ///
605 /// The value of the function pointer (the value of the symbol) usually refers
606 /// to the offset of a table in the .opd section. But sometimes, for a symbol
607 /// named "foo", the corresponding symbol named ".foo" (note the dot before
608 /// foo) which value is the entry point address of the function; that entry
609 /// point address refers to a region in the .text section.
610 ///
611 /// So we are only interested in values of the symbol that are in the .opd
612 /// section.
613 ///
614 /// @param elf_handle the ELF handle to operate on
615 ///
616 /// @param native_symbol the native Elf symbol to update the entry for
617 ///
618 /// @param symbol_sptr the internal symbol to associte the entry address with
619 void
update_function_entry_address_symbol_map(Elf * elf_handle,GElf_Sym * native_symbol,const elf_symbol_sptr & symbol_sptr)620 symtab::update_function_entry_address_symbol_map(
621 Elf* elf_handle, GElf_Sym* native_symbol, const elf_symbol_sptr& symbol_sptr)
622 {
623 const GElf_Addr fn_desc_addr = native_symbol->st_value;
624 const GElf_Addr fn_entry_point_addr =
625 elf_helpers::lookup_ppc64_elf_fn_entry_point_address(elf_handle,
626 fn_desc_addr);
627
628 const std::pair<addr_symbol_map_type::const_iterator, bool>& result =
629 entry_addr_symbol_map_.emplace(fn_entry_point_addr, symbol_sptr);
630
631 const addr_symbol_map_type::const_iterator it = result.first;
632 const bool was_inserted = result.second;
633 if (!was_inserted
634 && elf_helpers::address_is_in_opd_section(elf_handle, fn_desc_addr))
635 {
636 // Either
637 //
638 // 'symbol' must have been registered as an alias for
639 // it->second->get_main_symbol()
640 //
641 // Or
642 //
643 // if the name of 'symbol' is foo, then the name of it2->second is
644 // ".foo". That is, foo is the name of the symbol when it refers to the
645 // function descriptor in the .opd section and ".foo" is an internal name
646 // for the address of the entry point of foo.
647 //
648 // In the latter case, we just want to keep a reference to "foo" as .foo
649 // is an internal name.
650
651 const bool two_symbols_alias =
652 it->second->get_main_symbol()->does_alias(*symbol_sptr);
653 const bool symbol_is_foo_and_prev_symbol_is_dot_foo =
654 (it->second->get_name() == std::string(".") + symbol_sptr->get_name());
655
656 ABG_ASSERT(two_symbols_alias
657 || symbol_is_foo_and_prev_symbol_is_dot_foo);
658
659 if (symbol_is_foo_and_prev_symbol_is_dot_foo)
660 // Let's just keep a reference of the symbol that the user sees in the
661 // source code (the one named foo). The symbol which name is prefixed
662 // with a "dot" is an artificial one.
663 entry_addr_symbol_map_[fn_entry_point_addr] = symbol_sptr;
664 }
665 }
666
667 /// Fill up the lookup maps with alternative keys
668 ///
669 /// Due to special features like Control-Flow-Integrity (CFI), the symbol
670 /// lookup could be done indirectly. E.g. enabling CFI causes clang to
671 /// associate the DWARF information with the actual CFI protected function
672 /// (suffix .cfi) instead of with the entry symbol in the symtab.
673 ///
674 /// This function adds additional lookup keys to compensate for that.
675 ///
676 /// So far, this only implements CFI support, by adding addr->symbol pairs
677 /// where
678 /// addr : symbol value of the <foo>.cfi value
679 /// symbol : symbol_sptr looked up via "<foo>"
680 ///
681 /// @param elf_handle the ELF handle to operate on
682 void
add_alternative_address_lookups(Elf * elf_handle)683 symtab::add_alternative_address_lookups(Elf* elf_handle)
684 {
685 Elf_Scn* symtab_section = elf_helpers::find_symtab_section(elf_handle);
686 if (!symtab_section)
687 return;
688 GElf_Shdr symtab_sheader;
689 gelf_getshdr(symtab_section, &symtab_sheader);
690
691 const size_t number_syms =
692 symtab_sheader.sh_size / symtab_sheader.sh_entsize;
693
694 Elf_Data* symtab = elf_getdata(symtab_section, 0);
695
696 for (size_t i = 0; i < number_syms; ++i)
697 {
698 GElf_Sym *sym, sym_mem;
699 sym = gelf_getsym(symtab, i, &sym_mem);
700 if (!sym)
701 {
702 std::cerr << "Could not load symbol with index " << i
703 << ": Skipping alternative symbol load.\n";
704 continue;
705 }
706
707 const char* const name_str =
708 elf_strptr(elf_handle, symtab_sheader.sh_link, sym->st_name);
709
710 // no name, no game
711 if (!name_str)
712 continue;
713
714 const std::string name = name_str;
715 if (name.empty())
716 continue;
717
718 // Add alternative lookup addresses for CFI symbols
719 static const std::string cfi = ".cfi";
720 if (name.size() > cfi.size()
721 && name.compare(name.size() - cfi.size(), cfi.size(), cfi) == 0)
722 // ... name.ends_with(".cfi")
723 {
724 const auto candidate_name = name.substr(0, name.size() - cfi.size());
725
726 auto symbols = lookup_symbol(candidate_name);
727 // lookup_symbol returns a vector of symbols. For this case we handle
728 // only the case that there has been exactly one match. Otherwise we
729 // can't reasonably handle it and need to bail out.
730 ABG_ASSERT(symbols.size() <= 1);
731 if (symbols.size() == 1)
732 {
733 const auto& symbol_sptr = symbols[0];
734 setup_symbol_lookup_tables(elf_handle, sym, symbol_sptr);
735 }
736 }
737 }
738 }
739
740 } // end namespace symtab_reader
741 } // end namespace abigail
742