1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2020-2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Maria Teguiani
19 // Author: Giuliano Procida
20 // Author: Aleksei Vetrov
21
22 #include "elf_loader.h"
23
24 #include <elf.h>
25 #include <gelf.h>
26 #include <libelf.h>
27
28 #include <cstddef>
29 #include <cstring>
30 #include <functional>
31 #include <limits>
32 #include <ostream>
33 #include <string>
34 #include <string_view>
35 #include <utility>
36 #include <vector>
37
38 #include "error.h"
39 #include "graph.h"
40
41 namespace stg {
42 namespace elf {
43
44 namespace {
45
ParseSymbolType(unsigned char symbol_type)46 SymbolTableEntry::SymbolType ParseSymbolType(unsigned char symbol_type) {
47 switch (symbol_type) {
48 case STT_NOTYPE:
49 return SymbolTableEntry::SymbolType::NOTYPE;
50 case STT_OBJECT:
51 return SymbolTableEntry::SymbolType::OBJECT;
52 case STT_FUNC:
53 return SymbolTableEntry::SymbolType::FUNCTION;
54 case STT_SECTION:
55 return SymbolTableEntry::SymbolType::SECTION;
56 case STT_FILE:
57 return SymbolTableEntry::SymbolType::FILE;
58 case STT_COMMON:
59 return SymbolTableEntry::SymbolType::COMMON;
60 case STT_TLS:
61 return SymbolTableEntry::SymbolType::TLS;
62 case STT_GNU_IFUNC:
63 return SymbolTableEntry::SymbolType::GNU_IFUNC;
64 default:
65 Die() << "Unknown ELF symbol type: " << symbol_type;
66 }
67 }
68
ParseSymbolBinding(unsigned char binding)69 SymbolTableEntry::Binding ParseSymbolBinding(unsigned char binding) {
70 switch (binding) {
71 case STB_LOCAL:
72 return SymbolTableEntry::Binding::LOCAL;
73 case STB_GLOBAL:
74 return SymbolTableEntry::Binding::GLOBAL;
75 case STB_WEAK:
76 return SymbolTableEntry::Binding::WEAK;
77 case STB_GNU_UNIQUE:
78 return SymbolTableEntry::Binding::GNU_UNIQUE;
79 default:
80 Die() << "Unknown ELF symbol binding: " << binding;
81 }
82 }
83
ParseSymbolVisibility(unsigned char visibility)84 SymbolTableEntry::Visibility ParseSymbolVisibility(unsigned char visibility) {
85 switch (visibility) {
86 case STV_DEFAULT:
87 return SymbolTableEntry::Visibility::DEFAULT;
88 case STV_INTERNAL:
89 return SymbolTableEntry::Visibility::INTERNAL;
90 case STV_HIDDEN:
91 return SymbolTableEntry::Visibility::HIDDEN;
92 case STV_PROTECTED:
93 return SymbolTableEntry::Visibility::PROTECTED;
94 default:
95 Die() << "Unknown ELF symbol visibility: " << visibility;
96 }
97 }
98
ParseSymbolValueType(Elf64_Section section_index)99 SymbolTableEntry::ValueType ParseSymbolValueType(Elf64_Section section_index) {
100 switch (section_index) {
101 case SHN_UNDEF:
102 return SymbolTableEntry::ValueType::UNDEFINED;
103 case SHN_ABS:
104 return SymbolTableEntry::ValueType::ABSOLUTE;
105 case SHN_COMMON:
106 return SymbolTableEntry::ValueType::COMMON;
107 default:
108 return SymbolTableEntry::ValueType::RELATIVE_TO_SECTION;
109 }
110 }
111
ElfHeaderTypeToString(unsigned char elf_header_type)112 std::string ElfHeaderTypeToString(unsigned char elf_header_type) {
113 switch (elf_header_type) {
114 case ET_NONE:
115 return "none";
116 case ET_REL:
117 return "relocatable";
118 case ET_EXEC:
119 return "executable";
120 case ET_DYN:
121 return "shared object";
122 case ET_CORE:
123 return "coredump";
124 default:
125 return "unknown (type = " + std::to_string(elf_header_type) + ')';
126 }
127 }
128
ElfSectionTypeToString(Elf64_Word elf_section_type)129 std::string ElfSectionTypeToString(Elf64_Word elf_section_type) {
130 switch (elf_section_type) {
131 case SHT_SYMTAB:
132 return "symtab";
133 case SHT_DYNSYM:
134 return "dynsym";
135 case SHT_GNU_verdef:
136 return "GNU_verdef";
137 case SHT_GNU_verneed:
138 return "GNU_verneed";
139 case SHT_GNU_versym:
140 return "GNU_versym";
141 default:
142 return "unknown (type = " + std::to_string(elf_section_type) + ')';
143 }
144 }
145
GetMachine(Elf * elf)146 GElf_Half GetMachine(Elf* elf) {
147 GElf_Ehdr header;
148 Check(gelf_getehdr(elf, &header) != nullptr) << "could not get ELF header";
149 return header.e_machine;
150 }
151
AdjustAddress(GElf_Half machine,SymbolTableEntry & entry)152 void AdjustAddress(GElf_Half machine, SymbolTableEntry& entry) {
153 if (machine == EM_ARM) {
154 if (entry.symbol_type == SymbolTableEntry::SymbolType::FUNCTION
155 || entry.symbol_type == SymbolTableEntry::SymbolType::GNU_IFUNC) {
156 // Clear bit zero of ARM32 addresses as per "ELF for the Arm Architecture"
157 // section 5.5.3. https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
158 entry.value &= ~1;
159 }
160 } else if (machine == EM_AARCH64) {
161 // Copy bit 55 over bits 56 to 63 which may be tag information.
162 entry.value = entry.value & (1ULL << 55)
163 ? entry.value | (0xffULL << 56)
164 : entry.value & ~(0xffULL << 56);
165 }
166 }
167
GetSectionsIf(Elf * elf,const std::function<bool (const GElf_Shdr &)> & predicate)168 std::vector<Elf_Scn*> GetSectionsIf(
169 Elf* elf, const std::function<bool(const GElf_Shdr&)>& predicate) {
170 std::vector<Elf_Scn*> result;
171 Elf_Scn* section = nullptr;
172 GElf_Shdr header;
173 while ((section = elf_nextscn(elf, section)) != nullptr) {
174 Check(gelf_getshdr(section, &header) != nullptr)
175 << "could not get ELF section header";
176 if (predicate(header)) {
177 result.push_back(section);
178 }
179 }
180 return result;
181 }
182
GetSectionsByName(Elf * elf,const std::string & name)183 std::vector<Elf_Scn*> GetSectionsByName(Elf* elf, const std::string& name) {
184 size_t shdr_strtab_index;
185 Check(elf_getshdrstrndx(elf, &shdr_strtab_index) == 0)
186 << "could not get ELF section header string table index";
187 return GetSectionsIf(elf, [&](const GElf_Shdr& header) {
188 const auto* section_name =
189 elf_strptr(elf, shdr_strtab_index, header.sh_name);
190 return section_name != nullptr && section_name == name;
191 });
192 }
193
MaybeGetSectionByName(Elf * elf,const std::string & name)194 Elf_Scn* MaybeGetSectionByName(Elf* elf, const std::string& name) {
195 const auto sections = GetSectionsByName(elf, name);
196 if (sections.empty()) {
197 return nullptr;
198 }
199 Check(sections.size() == 1)
200 << "multiple sections found with name '" << name << "'";
201 return sections[0];
202 }
203
GetSectionByName(Elf * elf,const std::string & name)204 Elf_Scn* GetSectionByName(Elf* elf, const std::string& name) {
205 Elf_Scn* section = MaybeGetSectionByName(elf, name);
206 Check(section != nullptr) << "no section found with name '" << name << "'";
207 return section;
208 }
209
MaybeGetSectionByType(Elf * elf,Elf64_Word type)210 Elf_Scn* MaybeGetSectionByType(Elf* elf, Elf64_Word type) {
211 auto sections = GetSectionsIf(
212 elf, [&](const GElf_Shdr& header) { return header.sh_type == type; });
213 if (sections.empty()) {
214 return nullptr;
215 }
216 Check(sections.size() == 1) << "multiple sections found with type " << type;
217 return sections[0];
218 }
219
GetSectionByIndex(Elf * elf,size_t index)220 Elf_Scn* GetSectionByIndex(Elf* elf, size_t index) {
221 Elf_Scn* section = elf_getscn(elf, index);
222 Check(section != nullptr) << "no section found with index " << index;
223 return section;
224 }
225
226 struct SectionInfo {
227 GElf_Shdr header;
228 Elf_Data* data;
229 };
230
GetSectionInfo(Elf_Scn * section)231 SectionInfo GetSectionInfo(Elf_Scn* section) {
232 const size_t index = elf_ndxscn(section);
233 GElf_Shdr section_header;
234 Check(gelf_getshdr(section, §ion_header) != nullptr)
235 << "failed to read section (index = " << index << ") header";
236 Elf_Data* data = elf_getdata(section, nullptr);
237 Check(data != nullptr) << "section (index = " << index << ") data is invalid";
238 return {section_header, data};
239 }
240
GetNumberOfEntries(const GElf_Shdr & section_header)241 size_t GetNumberOfEntries(const GElf_Shdr& section_header) {
242 Check(section_header.sh_entsize != 0)
243 << "zero table entity size is unexpected for section "
244 << ElfSectionTypeToString(section_header.sh_type);
245 return section_header.sh_size / section_header.sh_entsize;
246 }
247
GetString(Elf * elf,uint32_t section,size_t offset)248 std::string_view GetString(Elf* elf, uint32_t section, size_t offset) {
249 const auto name = elf_strptr(elf, section, offset);
250
251 Check(name != nullptr) << "string was not found (section: " << section
252 << ", offset: " << offset << ")";
253 return name;
254 }
255
GetSymbolTableSection(Elf * elf,bool is_linux_kernel_binary)256 Elf_Scn* GetSymbolTableSection(Elf* elf, bool is_linux_kernel_binary) {
257 GElf_Ehdr elf_header;
258 Check(gelf_getehdr(elf, &elf_header) != nullptr)
259 << "could not get ELF header";
260
261 Elf_Scn* symtab = MaybeGetSectionByType(elf, SHT_SYMTAB);
262 Elf_Scn* dynsym = MaybeGetSectionByType(elf, SHT_DYNSYM);
263 if (symtab != nullptr && dynsym != nullptr) {
264 // Relocatable ELF binaries, Linux kernel and modules have their
265 // exported symbols in .symtab, all other ELF types have their
266 // exported symbols in .dynsym.
267 if (elf_header.e_type == ET_REL || is_linux_kernel_binary) {
268 return symtab;
269 }
270 if (elf_header.e_type == ET_DYN || elf_header.e_type == ET_EXEC) {
271 return dynsym;
272 }
273 Die() << "unsupported ELF type: '"
274 << ElfHeaderTypeToString(elf_header.e_type) << "'";
275 } else if (symtab != nullptr) {
276 return symtab;
277 } else if (dynsym != nullptr) {
278 return dynsym;
279 } else {
280 Die() << "no ELF symbol table found";
281 }
282 }
283
284
285 constexpr std::string_view kCFISuffix = ".cfi";
286
IsCFISymbolName(std::string_view name)287 bool IsCFISymbolName(std::string_view name) {
288 // Check if symbol name ends with ".cfi"
289 // TODO: use std::string_view::ends_with
290 return (name.size() >= kCFISuffix.size() &&
291 name.substr(name.size() - kCFISuffix.size()) == kCFISuffix);
292 }
293
294 } // namespace
295
UnwrapCFISymbolName(std::string_view cfi_name)296 std::string_view UnwrapCFISymbolName(std::string_view cfi_name) {
297 Check(IsCFISymbolName(cfi_name))
298 << "CFI symbol " << cfi_name << " doesn't end with .cfi";
299 return cfi_name.substr(0, cfi_name.size() - kCFISuffix.size());
300 }
301
302 namespace {
303
GetSymbols(Elf * elf,Elf_Scn * symbol_table_section,bool cfi)304 std::vector<SymbolTableEntry> GetSymbols(
305 Elf* elf, Elf_Scn* symbol_table_section, bool cfi) {
306 const auto machine = GetMachine(elf);
307 const auto [symbol_table_header, symbol_table_data] =
308 GetSectionInfo(symbol_table_section);
309 const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header);
310
311 std::vector<SymbolTableEntry> result;
312 result.reserve(number_of_symbols);
313
314 // GElf uses int for indexes in symbol table, prevent int overflow.
315 Check(number_of_symbols <= std::numeric_limits<int>::max())
316 << "number of symbols exceeds INT_MAX";
317 for (size_t i = 0; i < number_of_symbols; ++i) {
318 GElf_Sym symbol;
319 Check(gelf_getsym(symbol_table_data, static_cast<int>(i), &symbol) !=
320 nullptr)
321 << "symbol (i = " << i << ") was not found";
322
323 const auto name =
324 GetString(elf, symbol_table_header.sh_link, symbol.st_name);
325 if (cfi != IsCFISymbolName(name)) {
326 continue;
327 }
328 SymbolTableEntry entry{
329 .name = name,
330 .value = symbol.st_value,
331 .size = symbol.st_size,
332 .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)),
333 .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)),
334 .visibility =
335 ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)),
336 .section_index = symbol.st_shndx,
337 .value_type = ParseSymbolValueType(symbol.st_shndx),
338 };
339 AdjustAddress(machine, entry);
340 result.push_back(entry);
341 }
342
343 return result;
344 }
345
IsLinuxKernelBinary(Elf * elf)346 bool IsLinuxKernelBinary(Elf* elf) {
347 // The Linux kernel itself has many specific sections that are sufficient to
348 // classify a binary as kernel binary if present, `__ksymtab_strings` is one
349 // of them. It is present if a kernel binary (vmlinux or a module) exports
350 // symbols via the EXPORT_SYMBOL_* macros and it contains symbol names and
351 // namespaces which form part of the ABI.
352 //
353 // Kernel modules might not present a `__ksymtab_strings` section if they do
354 // not export symbols themselves via the ksymtab. Yet they can be identified
355 // by the presence of the `.modinfo` section. Since that is somewhat a generic
356 // name, also check for the presence of `.gnu.linkonce.this_module` to get
357 // solid signal as both of those sections are present in kernel modules.
358 return MaybeGetSectionByName(elf, "__ksymtab_strings") != nullptr ||
359 (MaybeGetSectionByName(elf, ".modinfo") != nullptr &&
360 MaybeGetSectionByName(elf, ".gnu.linkonce.this_module") != nullptr);
361 }
362
IsRelocatable(Elf * elf)363 bool IsRelocatable(Elf* elf) {
364 GElf_Ehdr elf_header;
365 Check(gelf_getehdr(elf, &elf_header) != nullptr)
366 << "could not get ELF header";
367
368 return elf_header.e_type == ET_REL;
369 }
370
IsLittleEndianBinary(Elf * elf)371 bool IsLittleEndianBinary(Elf* elf) {
372 GElf_Ehdr elf_header;
373 Check(gelf_getehdr(elf, &elf_header) != nullptr)
374 << "could not get ELF header";
375
376 switch (auto endianness = elf_header.e_ident[EI_DATA]) {
377 case ELFDATA2LSB:
378 return true;
379 case ELFDATA2MSB:
380 return false;
381 default:
382 Die() << "Unsupported ELF endianness: " << endianness;
383 }
384 }
385
386 } // namespace
387
operator <<(std::ostream & os,SymbolTableEntry::SymbolType type)388 std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type) {
389 using SymbolType = SymbolTableEntry::SymbolType;
390 switch (type) {
391 case SymbolType::NOTYPE:
392 return os << "notype";
393 case SymbolType::OBJECT:
394 return os << "object";
395 case SymbolType::FUNCTION:
396 return os << "function";
397 case SymbolType::SECTION:
398 return os << "section";
399 case SymbolType::FILE:
400 return os << "file";
401 case SymbolType::COMMON:
402 return os << "common";
403 case SymbolType::TLS:
404 return os << "TLS";
405 case SymbolType::GNU_IFUNC:
406 return os << "indirect (ifunc) function";
407 }
408 }
409
operator <<(std::ostream & os,const SymbolTableEntry::ValueType type)410 std::ostream& operator<<(std::ostream& os,
411 const SymbolTableEntry::ValueType type) {
412 using ValueType = SymbolTableEntry::ValueType;
413 switch (type) {
414 case ValueType::UNDEFINED:
415 return os << "undefined";
416 case ValueType::ABSOLUTE:
417 return os << "absolute";
418 case ValueType::COMMON:
419 return os << "common";
420 case ValueType::RELATIVE_TO_SECTION:
421 return os << "relative";
422 }
423 }
424
ElfLoader(Elf * elf)425 ElfLoader::ElfLoader(Elf* elf)
426 : elf_(elf) {
427 Check(elf_ != nullptr) << "No ELF was provided";
428 InitializeElfInformation();
429 }
430
InitializeElfInformation()431 void ElfLoader::InitializeElfInformation() {
432 is_linux_kernel_binary_ = elf::IsLinuxKernelBinary(elf_);
433 is_relocatable_ = elf::IsRelocatable(elf_);
434 is_little_endian_binary_ = elf::IsLittleEndianBinary(elf_);
435 }
436
GetBtfRawData() const437 std::string_view ElfLoader::GetBtfRawData() const {
438 Elf_Scn* btf_section = GetSectionByName(elf_, ".BTF");
439 Check(btf_section != nullptr) << ".BTF section is invalid";
440 Elf_Data* elf_data = elf_rawdata(btf_section, nullptr);
441 Check(elf_data != nullptr) << ".BTF section data is invalid";
442 const char* btf_start = static_cast<char*>(elf_data->d_buf);
443 const size_t btf_size = elf_data->d_size;
444 return std::string_view(btf_start, btf_size);
445 }
446
GetElfSymbols() const447 std::vector<SymbolTableEntry> ElfLoader::GetElfSymbols() const {
448 Elf_Scn* symbol_table_section =
449 GetSymbolTableSection(elf_, is_linux_kernel_binary_);
450 Check(symbol_table_section != nullptr)
451 << "failed to find symbol table section";
452
453 return GetSymbols(elf_, symbol_table_section, /* cfi = */ false);
454 }
455
GetCFISymbols() const456 std::vector<SymbolTableEntry> ElfLoader::GetCFISymbols() const {
457 // CFI symbols may be only in .symtab
458 Elf_Scn* symbol_table_section = MaybeGetSectionByType(elf_, SHT_SYMTAB);
459 if (symbol_table_section == nullptr) {
460 // It is possible for ET_DYN and ET_EXEC ELF binaries to not have .symtab,
461 // because it was trimmed away. We can't determine whether there were CFI
462 // symbols in the first place, so the best we can do is returning an empty
463 // list.
464 return {};
465 }
466 return GetSymbols(elf_, symbol_table_section, /* cfi = */ true);
467 }
468
GetElfSymbolCRC(const SymbolTableEntry & symbol) const469 ElfSymbol::CRC ElfLoader::GetElfSymbolCRC(
470 const SymbolTableEntry& symbol) const {
471 Check(is_little_endian_binary_)
472 << "CRC is not supported in big-endian binaries";
473 const auto address = GetAbsoluteAddress(symbol);
474 if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
475 return ElfSymbol::CRC{static_cast<uint32_t>(address)};
476 }
477 Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
478 << "CRC symbol is expected to be absolute or relative to a section";
479
480 const auto section = GetSectionByIndex(elf_, symbol.section_index);
481 const auto [header, data] = GetSectionInfo(section);
482 Check(data->d_buf != nullptr) << "Section has no data buffer";
483
484 Check(address >= header.sh_addr)
485 << "CRC symbol address is below CRC section start";
486
487 const size_t offset = address - header.sh_addr;
488 const size_t offset_end = offset + sizeof(uint32_t);
489 Check(offset_end <= data->d_size && offset_end <= header.sh_size)
490 << "CRC symbol address is above CRC section end";
491
492 return ElfSymbol::CRC{*reinterpret_cast<uint32_t*>(
493 reinterpret_cast<char*>(data->d_buf) + offset)};
494 }
495
GetElfSymbolNamespace(const SymbolTableEntry & symbol) const496 std::string_view ElfLoader::GetElfSymbolNamespace(
497 const SymbolTableEntry& symbol) const {
498 Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
499 << "Namespace symbol is expected to be relative to a section";
500
501 const auto section = GetSectionByIndex(elf_, symbol.section_index);
502 const auto [header, data] = GetSectionInfo(section);
503 Check(data->d_buf != nullptr) << "Section has no data buffer";
504
505 const auto address = GetAbsoluteAddress(symbol);
506 Check(address >= header.sh_addr)
507 << "Namespace symbol address is below namespace section start";
508
509 const size_t offset = address - header.sh_addr;
510 Check(offset < data->d_size && offset < header.sh_size)
511 << "Namespace symbol address is above namespace section end";
512
513 const char* begin = reinterpret_cast<const char*>(data->d_buf) + offset;
514 const size_t length = strnlen(begin, data->d_size - offset);
515 Check(offset + length < data->d_size)
516 << "Namespace string should be null-terminated";
517
518 return std::string_view(begin, length);
519 }
520
GetAbsoluteAddress(const SymbolTableEntry & symbol) const521 size_t ElfLoader::GetAbsoluteAddress(const SymbolTableEntry& symbol) const {
522 if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
523 return symbol.value;
524 }
525 Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
526 << "Only absolute and relative to sections symbols are supported";
527 // In relocatable files, st_value holds a section offset for a defined symbol.
528 if (is_relocatable_) {
529 const auto section = GetSectionByIndex(elf_, symbol.section_index);
530 GElf_Shdr header;
531 Check(gelf_getshdr(section, &header) != nullptr)
532 << "failed to get symbol section header";
533 Check(symbol.value + symbol.size <= header.sh_size)
534 << "Symbol should be inside the section";
535 return symbol.value + header.sh_addr;
536 }
537 // In executable and shared object files, st_value holds a virtual address.
538 return symbol.value;
539 }
540
IsLinuxKernelBinary() const541 bool ElfLoader::IsLinuxKernelBinary() const {
542 return is_linux_kernel_binary_;
543 }
544
IsLittleEndianBinary() const545 bool ElfLoader::IsLittleEndianBinary() const {
546 return is_little_endian_binary_;
547 }
548
549 } // namespace elf
550 } // namespace stg
551