• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2020-2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Maria Teguiani
19 // Author: Giuliano Procida
20 // Author: Aleksei Vetrov
21 
22 #include "elf_loader.h"
23 
24 #include <elf.h>
25 #include <gelf.h>
26 #include <libelf.h>
27 
28 #include <cstddef>
29 #include <cstring>
30 #include <functional>
31 #include <limits>
32 #include <ostream>
33 #include <string>
34 #include <string_view>
35 #include <utility>
36 #include <vector>
37 
38 #include "error.h"
39 #include "graph.h"
40 
41 namespace stg {
42 namespace elf {
43 
44 namespace {
45 
ParseSymbolType(unsigned char symbol_type)46 SymbolTableEntry::SymbolType ParseSymbolType(unsigned char symbol_type) {
47   switch (symbol_type) {
48     case STT_NOTYPE:
49       return SymbolTableEntry::SymbolType::NOTYPE;
50     case STT_OBJECT:
51       return SymbolTableEntry::SymbolType::OBJECT;
52     case STT_FUNC:
53       return SymbolTableEntry::SymbolType::FUNCTION;
54     case STT_SECTION:
55       return SymbolTableEntry::SymbolType::SECTION;
56     case STT_FILE:
57       return SymbolTableEntry::SymbolType::FILE;
58     case STT_COMMON:
59       return SymbolTableEntry::SymbolType::COMMON;
60     case STT_TLS:
61       return SymbolTableEntry::SymbolType::TLS;
62     case STT_GNU_IFUNC:
63       return SymbolTableEntry::SymbolType::GNU_IFUNC;
64     default:
65       Die() << "Unknown ELF symbol type: " << symbol_type;
66   }
67 }
68 
ParseSymbolBinding(unsigned char binding)69 SymbolTableEntry::Binding ParseSymbolBinding(unsigned char binding) {
70   switch (binding) {
71     case STB_LOCAL:
72       return SymbolTableEntry::Binding::LOCAL;
73     case STB_GLOBAL:
74       return SymbolTableEntry::Binding::GLOBAL;
75     case STB_WEAK:
76       return SymbolTableEntry::Binding::WEAK;
77     case STB_GNU_UNIQUE:
78       return SymbolTableEntry::Binding::GNU_UNIQUE;
79     default:
80       Die() << "Unknown ELF symbol binding: " << binding;
81   }
82 }
83 
ParseSymbolVisibility(unsigned char visibility)84 SymbolTableEntry::Visibility ParseSymbolVisibility(unsigned char visibility) {
85   switch (visibility) {
86     case STV_DEFAULT:
87       return SymbolTableEntry::Visibility::DEFAULT;
88     case STV_INTERNAL:
89       return SymbolTableEntry::Visibility::INTERNAL;
90     case STV_HIDDEN:
91       return SymbolTableEntry::Visibility::HIDDEN;
92     case STV_PROTECTED:
93       return SymbolTableEntry::Visibility::PROTECTED;
94     default:
95       Die() << "Unknown ELF symbol visibility: " << visibility;
96   }
97 }
98 
ParseSymbolValueType(Elf64_Section section_index)99 SymbolTableEntry::ValueType ParseSymbolValueType(Elf64_Section section_index) {
100   switch (section_index) {
101     case SHN_UNDEF:
102       return SymbolTableEntry::ValueType::UNDEFINED;
103     case SHN_ABS:
104       return SymbolTableEntry::ValueType::ABSOLUTE;
105     case SHN_COMMON:
106       return SymbolTableEntry::ValueType::COMMON;
107     default:
108       return SymbolTableEntry::ValueType::RELATIVE_TO_SECTION;
109   }
110 }
111 
ElfHeaderTypeToString(unsigned char elf_header_type)112 std::string ElfHeaderTypeToString(unsigned char elf_header_type) {
113   switch (elf_header_type) {
114     case ET_NONE:
115       return "none";
116     case ET_REL:
117       return "relocatable";
118     case ET_EXEC:
119       return "executable";
120     case ET_DYN:
121       return "shared object";
122     case ET_CORE:
123       return "coredump";
124     default:
125       return "unknown (type = " + std::to_string(elf_header_type) + ')';
126   }
127 }
128 
ElfSectionTypeToString(Elf64_Word elf_section_type)129 std::string ElfSectionTypeToString(Elf64_Word elf_section_type) {
130   switch (elf_section_type) {
131     case SHT_SYMTAB:
132       return "symtab";
133     case SHT_DYNSYM:
134       return "dynsym";
135     case SHT_GNU_verdef:
136       return "GNU_verdef";
137     case SHT_GNU_verneed:
138       return "GNU_verneed";
139     case SHT_GNU_versym:
140       return "GNU_versym";
141     default:
142       return "unknown (type = " + std::to_string(elf_section_type) + ')';
143   }
144 }
145 
GetMachine(Elf * elf)146 GElf_Half GetMachine(Elf* elf) {
147   GElf_Ehdr header;
148   Check(gelf_getehdr(elf, &header) != nullptr) << "could not get ELF header";
149   return header.e_machine;
150 }
151 
AdjustAddress(GElf_Half machine,SymbolTableEntry & entry)152 void AdjustAddress(GElf_Half machine, SymbolTableEntry& entry) {
153   if (machine == EM_ARM) {
154     if (entry.symbol_type == SymbolTableEntry::SymbolType::FUNCTION
155         || entry.symbol_type == SymbolTableEntry::SymbolType::GNU_IFUNC) {
156       // Clear bit zero of ARM32 addresses as per "ELF for the Arm Architecture"
157       // section 5.5.3.  https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
158       entry.value &= ~1;
159     }
160   } else if (machine == EM_AARCH64) {
161     // Copy bit 55 over bits 56 to 63 which may be tag information.
162     entry.value = entry.value & (1ULL << 55)
163                   ? entry.value | (0xffULL << 56)
164                   : entry.value & ~(0xffULL << 56);
165   }
166 }
167 
GetSectionsIf(Elf * elf,const std::function<bool (const GElf_Shdr &)> & predicate)168 std::vector<Elf_Scn*> GetSectionsIf(
169     Elf* elf, const std::function<bool(const GElf_Shdr&)>& predicate) {
170   std::vector<Elf_Scn*> result;
171   Elf_Scn* section = nullptr;
172   GElf_Shdr header;
173   while ((section = elf_nextscn(elf, section)) != nullptr) {
174     Check(gelf_getshdr(section, &header) != nullptr)
175         << "could not get ELF section header";
176     if (predicate(header)) {
177       result.push_back(section);
178     }
179   }
180   return result;
181 }
182 
GetSectionsByName(Elf * elf,const std::string & name)183 std::vector<Elf_Scn*> GetSectionsByName(Elf* elf, const std::string& name) {
184   size_t shdr_strtab_index;
185   Check(elf_getshdrstrndx(elf, &shdr_strtab_index) == 0)
186       << "could not get ELF section header string table index";
187   return GetSectionsIf(elf, [&](const GElf_Shdr& header) {
188     const auto* section_name =
189         elf_strptr(elf, shdr_strtab_index, header.sh_name);
190     return section_name != nullptr && section_name == name;
191   });
192 }
193 
MaybeGetSectionByName(Elf * elf,const std::string & name)194 Elf_Scn* MaybeGetSectionByName(Elf* elf, const std::string& name) {
195   const auto sections = GetSectionsByName(elf, name);
196   if (sections.empty()) {
197     return nullptr;
198   }
199   Check(sections.size() == 1)
200       << "multiple sections found with name '" << name << "'";
201   return sections[0];
202 }
203 
GetSectionByName(Elf * elf,const std::string & name)204 Elf_Scn* GetSectionByName(Elf* elf, const std::string& name) {
205   Elf_Scn* section = MaybeGetSectionByName(elf, name);
206   Check(section != nullptr) << "no section found with name '" << name << "'";
207   return section;
208 }
209 
MaybeGetSectionByType(Elf * elf,Elf64_Word type)210 Elf_Scn* MaybeGetSectionByType(Elf* elf, Elf64_Word type) {
211   auto sections = GetSectionsIf(
212       elf, [&](const GElf_Shdr& header) { return header.sh_type == type; });
213   if (sections.empty()) {
214     return nullptr;
215   }
216   Check(sections.size() == 1) << "multiple sections found with type " << type;
217   return sections[0];
218 }
219 
GetSectionByIndex(Elf * elf,size_t index)220 Elf_Scn* GetSectionByIndex(Elf* elf, size_t index) {
221   Elf_Scn* section = elf_getscn(elf, index);
222   Check(section != nullptr) << "no section found with index " << index;
223   return section;
224 }
225 
226 struct SectionInfo {
227   GElf_Shdr header;
228   Elf_Data* data;
229 };
230 
GetSectionInfo(Elf_Scn * section)231 SectionInfo GetSectionInfo(Elf_Scn* section) {
232   const size_t index = elf_ndxscn(section);
233   GElf_Shdr section_header;
234   Check(gelf_getshdr(section, &section_header) != nullptr)
235       << "failed to read section (index = " << index << ") header";
236   Elf_Data* data = elf_getdata(section, nullptr);
237   Check(data != nullptr) << "section (index = " << index << ") data is invalid";
238   return {section_header, data};
239 }
240 
GetNumberOfEntries(const GElf_Shdr & section_header)241 size_t GetNumberOfEntries(const GElf_Shdr& section_header) {
242   Check(section_header.sh_entsize != 0)
243       << "zero table entity size is unexpected for section "
244       << ElfSectionTypeToString(section_header.sh_type);
245   return section_header.sh_size / section_header.sh_entsize;
246 }
247 
GetString(Elf * elf,uint32_t section,size_t offset)248 std::string_view GetString(Elf* elf, uint32_t section, size_t offset) {
249   const auto name = elf_strptr(elf, section, offset);
250 
251   Check(name != nullptr) << "string was not found (section: " << section
252                          << ", offset: " << offset << ")";
253   return name;
254 }
255 
GetSymbolTableSection(Elf * elf,bool is_linux_kernel_binary)256 Elf_Scn* GetSymbolTableSection(Elf* elf, bool is_linux_kernel_binary) {
257   GElf_Ehdr elf_header;
258   Check(gelf_getehdr(elf, &elf_header) != nullptr)
259       << "could not get ELF header";
260 
261   Elf_Scn* symtab = MaybeGetSectionByType(elf, SHT_SYMTAB);
262   Elf_Scn* dynsym = MaybeGetSectionByType(elf, SHT_DYNSYM);
263   if (symtab != nullptr && dynsym != nullptr) {
264     // Relocatable ELF binaries, Linux kernel and modules have their
265     // exported symbols in .symtab, all other ELF types have their
266     // exported symbols in .dynsym.
267     if (elf_header.e_type == ET_REL || is_linux_kernel_binary) {
268       return symtab;
269     }
270     if (elf_header.e_type == ET_DYN || elf_header.e_type == ET_EXEC) {
271       return dynsym;
272     }
273     Die() << "unsupported ELF type: '"
274           << ElfHeaderTypeToString(elf_header.e_type) << "'";
275   } else if (symtab != nullptr) {
276     return symtab;
277   } else if (dynsym != nullptr) {
278     return dynsym;
279   } else {
280     Die() << "no ELF symbol table found";
281   }
282 }
283 
284 
285 constexpr std::string_view kCFISuffix = ".cfi";
286 
IsCFISymbolName(std::string_view name)287 bool IsCFISymbolName(std::string_view name) {
288   // Check if symbol name ends with ".cfi"
289   // TODO: use std::string_view::ends_with
290   return (name.size() >= kCFISuffix.size() &&
291           name.substr(name.size() - kCFISuffix.size()) == kCFISuffix);
292 }
293 
294 }  // namespace
295 
UnwrapCFISymbolName(std::string_view cfi_name)296 std::string_view UnwrapCFISymbolName(std::string_view cfi_name) {
297   Check(IsCFISymbolName(cfi_name))
298       << "CFI symbol " << cfi_name << " doesn't end with .cfi";
299   return cfi_name.substr(0, cfi_name.size() - kCFISuffix.size());
300 }
301 
302 namespace {
303 
GetSymbols(Elf * elf,Elf_Scn * symbol_table_section,bool cfi)304 std::vector<SymbolTableEntry> GetSymbols(
305     Elf* elf, Elf_Scn* symbol_table_section, bool cfi) {
306   const auto machine = GetMachine(elf);
307   const auto [symbol_table_header, symbol_table_data] =
308       GetSectionInfo(symbol_table_section);
309   const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header);
310 
311   std::vector<SymbolTableEntry> result;
312   result.reserve(number_of_symbols);
313 
314   // GElf uses int for indexes in symbol table, prevent int overflow.
315   Check(number_of_symbols <= std::numeric_limits<int>::max())
316       << "number of symbols exceeds INT_MAX";
317   for (size_t i = 0; i < number_of_symbols; ++i) {
318     GElf_Sym symbol;
319     Check(gelf_getsym(symbol_table_data, static_cast<int>(i), &symbol) !=
320           nullptr)
321         << "symbol (i = " << i << ") was not found";
322 
323     const auto name =
324         GetString(elf, symbol_table_header.sh_link, symbol.st_name);
325     if (cfi != IsCFISymbolName(name)) {
326       continue;
327     }
328     SymbolTableEntry entry{
329         .name = name,
330         .value = symbol.st_value,
331         .size = symbol.st_size,
332         .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)),
333         .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)),
334         .visibility =
335             ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)),
336         .section_index = symbol.st_shndx,
337         .value_type = ParseSymbolValueType(symbol.st_shndx),
338     };
339     AdjustAddress(machine, entry);
340     result.push_back(entry);
341   }
342 
343   return result;
344 }
345 
IsLinuxKernelBinary(Elf * elf)346 bool IsLinuxKernelBinary(Elf* elf) {
347   // The Linux kernel itself has many specific sections that are sufficient to
348   // classify a binary as kernel binary if present, `__ksymtab_strings` is one
349   // of them. It is present if a kernel binary (vmlinux or a module) exports
350   // symbols via the EXPORT_SYMBOL_* macros and it contains symbol names and
351   // namespaces which form part of the ABI.
352   //
353   // Kernel modules might not present a `__ksymtab_strings` section if they do
354   // not export symbols themselves via the ksymtab. Yet they can be identified
355   // by the presence of the `.modinfo` section. Since that is somewhat a generic
356   // name, also check for the presence of `.gnu.linkonce.this_module` to get
357   // solid signal as both of those sections are present in kernel modules.
358   return MaybeGetSectionByName(elf, "__ksymtab_strings") != nullptr ||
359          (MaybeGetSectionByName(elf, ".modinfo") != nullptr &&
360           MaybeGetSectionByName(elf, ".gnu.linkonce.this_module") != nullptr);
361 }
362 
IsRelocatable(Elf * elf)363 bool IsRelocatable(Elf* elf) {
364   GElf_Ehdr elf_header;
365   Check(gelf_getehdr(elf, &elf_header) != nullptr)
366       << "could not get ELF header";
367 
368   return elf_header.e_type == ET_REL;
369 }
370 
IsLittleEndianBinary(Elf * elf)371 bool IsLittleEndianBinary(Elf* elf) {
372   GElf_Ehdr elf_header;
373   Check(gelf_getehdr(elf, &elf_header) != nullptr)
374       << "could not get ELF header";
375 
376   switch (auto endianness = elf_header.e_ident[EI_DATA]) {
377     case ELFDATA2LSB:
378       return true;
379     case ELFDATA2MSB:
380       return false;
381     default:
382       Die() << "Unsupported ELF endianness: " << endianness;
383   }
384 }
385 
386 }  // namespace
387 
operator <<(std::ostream & os,SymbolTableEntry::SymbolType type)388 std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type) {
389   using SymbolType = SymbolTableEntry::SymbolType;
390   switch (type) {
391     case SymbolType::NOTYPE:
392       return os << "notype";
393     case SymbolType::OBJECT:
394       return os << "object";
395     case SymbolType::FUNCTION:
396       return os << "function";
397     case SymbolType::SECTION:
398       return os << "section";
399     case SymbolType::FILE:
400       return os << "file";
401     case SymbolType::COMMON:
402       return os << "common";
403     case SymbolType::TLS:
404       return os << "TLS";
405     case SymbolType::GNU_IFUNC:
406       return os << "indirect (ifunc) function";
407   }
408 }
409 
operator <<(std::ostream & os,const SymbolTableEntry::ValueType type)410 std::ostream& operator<<(std::ostream& os,
411                          const SymbolTableEntry::ValueType type) {
412   using ValueType = SymbolTableEntry::ValueType;
413   switch (type) {
414     case ValueType::UNDEFINED:
415       return os << "undefined";
416     case ValueType::ABSOLUTE:
417       return os << "absolute";
418     case ValueType::COMMON:
419       return os << "common";
420     case ValueType::RELATIVE_TO_SECTION:
421       return os << "relative";
422   }
423 }
424 
ElfLoader(Elf * elf)425 ElfLoader::ElfLoader(Elf* elf)
426     : elf_(elf) {
427   Check(elf_ != nullptr) << "No ELF was provided";
428   InitializeElfInformation();
429 }
430 
InitializeElfInformation()431 void ElfLoader::InitializeElfInformation() {
432   is_linux_kernel_binary_ = elf::IsLinuxKernelBinary(elf_);
433   is_relocatable_ = elf::IsRelocatable(elf_);
434   is_little_endian_binary_ = elf::IsLittleEndianBinary(elf_);
435 }
436 
GetBtfRawData() const437 std::string_view ElfLoader::GetBtfRawData() const {
438   Elf_Scn* btf_section = GetSectionByName(elf_, ".BTF");
439   Check(btf_section != nullptr) << ".BTF section is invalid";
440   Elf_Data* elf_data = elf_rawdata(btf_section, nullptr);
441   Check(elf_data != nullptr) << ".BTF section data is invalid";
442   const char* btf_start = static_cast<char*>(elf_data->d_buf);
443   const size_t btf_size = elf_data->d_size;
444   return std::string_view(btf_start, btf_size);
445 }
446 
GetElfSymbols() const447 std::vector<SymbolTableEntry> ElfLoader::GetElfSymbols() const {
448   Elf_Scn* symbol_table_section =
449       GetSymbolTableSection(elf_, is_linux_kernel_binary_);
450   Check(symbol_table_section != nullptr)
451       << "failed to find symbol table section";
452 
453   return GetSymbols(elf_, symbol_table_section, /* cfi = */ false);
454 }
455 
GetCFISymbols() const456 std::vector<SymbolTableEntry> ElfLoader::GetCFISymbols() const {
457   // CFI symbols may be only in .symtab
458   Elf_Scn* symbol_table_section = MaybeGetSectionByType(elf_, SHT_SYMTAB);
459   if (symbol_table_section == nullptr) {
460     // It is possible for ET_DYN and ET_EXEC ELF binaries to not have .symtab,
461     // because it was trimmed away. We can't determine whether there were CFI
462     // symbols in the first place, so the best we can do is returning an empty
463     // list.
464     return {};
465   }
466   return GetSymbols(elf_, symbol_table_section, /* cfi = */ true);
467 }
468 
GetElfSymbolCRC(const SymbolTableEntry & symbol) const469 ElfSymbol::CRC ElfLoader::GetElfSymbolCRC(
470     const SymbolTableEntry& symbol) const {
471   Check(is_little_endian_binary_)
472       << "CRC is not supported in big-endian binaries";
473   const auto address = GetAbsoluteAddress(symbol);
474   if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
475     return ElfSymbol::CRC{static_cast<uint32_t>(address)};
476   }
477   Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
478       << "CRC symbol is expected to be absolute or relative to a section";
479 
480   const auto section = GetSectionByIndex(elf_, symbol.section_index);
481   const auto [header, data] = GetSectionInfo(section);
482   Check(data->d_buf != nullptr) << "Section has no data buffer";
483 
484   Check(address >= header.sh_addr)
485       << "CRC symbol address is below CRC section start";
486 
487   const size_t offset = address - header.sh_addr;
488   const size_t offset_end = offset + sizeof(uint32_t);
489   Check(offset_end <= data->d_size && offset_end <= header.sh_size)
490       << "CRC symbol address is above CRC section end";
491 
492   return ElfSymbol::CRC{*reinterpret_cast<uint32_t*>(
493       reinterpret_cast<char*>(data->d_buf) + offset)};
494 }
495 
GetElfSymbolNamespace(const SymbolTableEntry & symbol) const496 std::string_view ElfLoader::GetElfSymbolNamespace(
497     const SymbolTableEntry& symbol) const {
498   Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
499       << "Namespace symbol is expected to be relative to a section";
500 
501   const auto section = GetSectionByIndex(elf_, symbol.section_index);
502   const auto [header, data] = GetSectionInfo(section);
503   Check(data->d_buf != nullptr) << "Section has no data buffer";
504 
505   const auto address = GetAbsoluteAddress(symbol);
506   Check(address >= header.sh_addr)
507       << "Namespace symbol address is below namespace section start";
508 
509   const size_t offset = address - header.sh_addr;
510   Check(offset < data->d_size && offset < header.sh_size)
511       << "Namespace symbol address is above namespace section end";
512 
513   const char* begin = reinterpret_cast<const char*>(data->d_buf) + offset;
514   const size_t length = strnlen(begin, data->d_size - offset);
515   Check(offset + length < data->d_size)
516       << "Namespace string should be null-terminated";
517 
518   return std::string_view(begin, length);
519 }
520 
GetAbsoluteAddress(const SymbolTableEntry & symbol) const521 size_t ElfLoader::GetAbsoluteAddress(const SymbolTableEntry& symbol) const {
522   if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
523     return symbol.value;
524   }
525   Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
526       << "Only absolute and relative to sections symbols are supported";
527   // In relocatable files, st_value holds a section offset for a defined symbol.
528   if (is_relocatable_) {
529     const auto section = GetSectionByIndex(elf_, symbol.section_index);
530     GElf_Shdr header;
531     Check(gelf_getshdr(section, &header) != nullptr)
532         << "failed to get symbol section header";
533     Check(symbol.value + symbol.size <= header.sh_size)
534         << "Symbol should be inside the section";
535     return symbol.value + header.sh_addr;
536   }
537   // In executable and shared object files, st_value holds a virtual address.
538   return symbol.value;
539 }
540 
IsLinuxKernelBinary() const541 bool ElfLoader::IsLinuxKernelBinary() const {
542   return is_linux_kernel_binary_;
543 }
544 
IsLittleEndianBinary() const545 bool ElfLoader::IsLittleEndianBinary() const {
546   return is_little_endian_binary_;
547 }
548 
549 }  // namespace elf
550 }  // namespace stg
551