• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19 
20 #include "elf_reader.h"
21 
22 #include <cstddef>
23 #include <map>
24 #include <memory>
25 #include <optional>
26 #include <string>
27 #include <string_view>
28 #include <utility>
29 #include <vector>
30 
31 #include "dwarf_processor.h"
32 #include "dwarf_wrappers.h"
33 #include "elf_dwarf_handle.h"
34 #include "elf_loader.h"
35 #include "error.h"
36 #include "filter.h"
37 #include "graph.h"
38 #include "hex.h"
39 #include "reader_options.h"
40 #include "runtime.h"
41 #include "type_normalisation.h"
42 #include "type_resolution.h"
43 #include "unification.h"
44 
45 namespace stg {
46 namespace elf {
47 namespace internal {
48 
49 namespace {
50 
51 template <typename M, typename K>
MaybeGet(const M & map,const K & key)52 std::optional<typename M::mapped_type> MaybeGet(const M& map, const K& key) {
53   const auto it = map.find(key);
54   if (it == map.end()) {
55     return {};
56   }
57   return {it->second};
58 }
59 
60 }  // namespace
61 
ConvertSymbolType(SymbolTableEntry::SymbolType symbol_type)62 ElfSymbol::SymbolType ConvertSymbolType(
63     SymbolTableEntry::SymbolType symbol_type) {
64   switch (symbol_type) {
65     case SymbolTableEntry::SymbolType::NOTYPE:
66       return ElfSymbol::SymbolType::NOTYPE;
67     case SymbolTableEntry::SymbolType::OBJECT:
68       return ElfSymbol::SymbolType::OBJECT;
69     case SymbolTableEntry::SymbolType::FUNCTION:
70       return ElfSymbol::SymbolType::FUNCTION;
71     case SymbolTableEntry::SymbolType::COMMON:
72       return ElfSymbol::SymbolType::COMMON;
73     case SymbolTableEntry::SymbolType::TLS:
74       return ElfSymbol::SymbolType::TLS;
75     case SymbolTableEntry::SymbolType::GNU_IFUNC:
76       return ElfSymbol::SymbolType::GNU_IFUNC;
77     default:
78       Die() << "Unsupported ELF symbol type: " << symbol_type;
79   }
80 }
81 
GetKsymtabSymbols(const SymbolTable & symbols)82 SymbolNameList GetKsymtabSymbols(const SymbolTable& symbols) {
83   constexpr std::string_view kKsymtabPrefix = "__ksymtab_";
84   SymbolNameList result;
85   result.reserve(symbols.size() / 2);
86   for (const auto& symbol : symbols) {
87     if (symbol.name.substr(0, kKsymtabPrefix.size()) == kKsymtabPrefix) {
88       result.emplace(symbol.name.substr(kKsymtabPrefix.size()));
89     }
90   }
91   return result;
92 }
93 
GetCRCValuesMap(const SymbolTable & symbols,const ElfLoader & elf)94 CRCValuesMap GetCRCValuesMap(const SymbolTable& symbols, const ElfLoader& elf) {
95   constexpr std::string_view kCRCPrefix = "__crc_";
96 
97   CRCValuesMap crc_values;
98 
99   for (const auto& symbol : symbols) {
100     const std::string_view name = symbol.name;
101     if (name.substr(0, kCRCPrefix.size()) == kCRCPrefix) {
102       const std::string_view name_suffix = name.substr(kCRCPrefix.size());
103       if (!crc_values.emplace(name_suffix, elf.GetElfSymbolCRC(symbol))
104                .second) {
105         Die() << "Multiple CRC values for symbol '" << name_suffix << '\'';
106       }
107     }
108   }
109 
110   return crc_values;
111 }
112 
GetNamespacesMap(const SymbolTable & symbols,const ElfLoader & elf)113 NamespacesMap GetNamespacesMap(const SymbolTable& symbols,
114                                const ElfLoader& elf) {
115   constexpr std::string_view kNSPrefix = "__kstrtabns_";
116 
117   NamespacesMap namespaces;
118 
119   for (const auto& symbol : symbols) {
120     const std::string_view name = symbol.name;
121     if (name.substr(0, kNSPrefix.size()) == kNSPrefix) {
122       const std::string_view name_suffix = name.substr(kNSPrefix.size());
123       const std::string_view ns = elf.GetElfSymbolNamespace(symbol);
124       if (ns.empty()) {
125         // The global namespace is explicitly represented as the empty string,
126         // but the common interpretation is that such symbols lack an export
127         // namespace.
128         continue;
129       }
130       if (!namespaces.emplace(name_suffix, ns).second) {
131         Die() << "Multiple namespaces for symbol '" << name_suffix << '\'';
132       }
133     }
134   }
135 
136   return namespaces;
137 }
138 
GetCFIAddressMap(const SymbolTable & symbols,const ElfLoader & elf)139 AddressMap GetCFIAddressMap(const SymbolTable& symbols, const ElfLoader& elf) {
140   AddressMap name_to_address;
141   for (const auto& symbol : symbols) {
142     const std::string_view name_prefix = UnwrapCFISymbolName(symbol.name);
143     const size_t address = elf.GetAbsoluteAddress(symbol);
144     if (!name_to_address.emplace(name_prefix, address).second) {
145       Die() << "Multiple CFI symbols referring to symbol '" << name_prefix
146             << '\'';
147     }
148   }
149   return name_to_address;
150 }
151 
IsPublicFunctionOrVariable(const SymbolTableEntry & symbol)152 bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol) {
153   const auto symbol_type = symbol.symbol_type;
154   // Reject symbols that are not functions or variables.
155   if (symbol_type != SymbolTableEntry::SymbolType::FUNCTION &&
156       symbol_type != SymbolTableEntry::SymbolType::OBJECT &&
157       symbol_type != SymbolTableEntry::SymbolType::TLS &&
158       symbol_type != SymbolTableEntry::SymbolType::GNU_IFUNC) {
159     return false;
160   }
161 
162   // Function or variable of ValueType::ABSOLUTE is not expected in any binary,
163   // but GNU `ld` adds object of such type for every version name defined in
164   // file. Such symbol should be rejected, because in fact it is not variable.
165   if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
166     Check(symbol_type == SymbolTableEntry::SymbolType::OBJECT)
167         << "Unexpected function or variable with ABSOLUTE value type";
168     return false;
169   }
170 
171   // Undefined symbol is dependency of the binary but is not part of ABI
172   // provided by binary and should be rejected.
173   if (symbol.value_type == SymbolTableEntry::ValueType::UNDEFINED) {
174     return false;
175   }
176 
177   // Common symbols can only be seen in .o files emitted by old compilers.
178   if (symbol.value_type == SymbolTableEntry::ValueType::COMMON) {
179     Die() << "unexpected COMMON symbol: '" << symbol.name << '\'';
180   }
181 
182   // Local symbol is not visible outside the binary, so it is not public
183   // and should be rejected.
184   if (symbol.binding == SymbolTableEntry::Binding::LOCAL) {
185     return false;
186   }
187 
188   // "Hidden" and "internal" visibility values mean that symbol is not public
189   // and should be rejected.
190   if (symbol.visibility == SymbolTableEntry::Visibility::HIDDEN ||
191       symbol.visibility == SymbolTableEntry::Visibility::INTERNAL) {
192     return false;
193   }
194 
195   return true;
196 }
197 
IsLinuxKernelFunctionOrVariable(const SymbolNameList & ksymtab,const SymbolTableEntry & symbol)198 bool IsLinuxKernelFunctionOrVariable(const SymbolNameList& ksymtab,
199                                      const SymbolTableEntry& symbol) {
200   // We use symbol name extracted from __ksymtab_ symbols as a proxy for the
201   // real symbol in the ksymtab. Such names can still be duplicated by LOCAL
202   // symbols so drop them to avoid false matches.
203   if (symbol.binding == SymbolTableEntry::Binding::LOCAL) {
204     return false;
205   }
206 
207   // TODO: handle undefined ksymtab symbols
208   if (symbol.value_type == SymbolTableEntry::ValueType::UNDEFINED) {
209     return false;
210   }
211 
212   // Common symbols can only be seen in .o files emitted by old compilers.
213   if (symbol.value_type == SymbolTableEntry::ValueType::COMMON) {
214     Die() << "unexpected COMMON symbol: '" << symbol.name << '\'';
215   }
216 
217   // Symbol linkage is determined by the ksymtab.
218   if (!ksymtab.contains(symbol.name)) {
219     return false;
220   }
221 
222   const auto symbol_type = symbol.symbol_type;
223   // Keep function and object symbols, but not GNU indirect function or TLS ones
224   // as the module loader does not expect them.
225   if (symbol_type != SymbolTableEntry::SymbolType::FUNCTION
226       && symbol_type != SymbolTableEntry::SymbolType::OBJECT) {
227     // TODO: upgrade to Die after more testing / fixing
228     Warn() << "ignoring Linux kernel symbol '" << symbol.name << "' in section "
229            << Hex(symbol.section_index) << " of type " << symbol_type;
230     return false;
231   }
232 
233   return true;
234 }
235 
236 namespace {
237 
238 class Reader {
239  public:
Reader(Runtime & runtime,Graph & graph,ElfDwarfHandle & elf_dwarf_handle,ReadOptions options,const std::unique_ptr<Filter> & file_filter)240   Reader(Runtime& runtime, Graph& graph, ElfDwarfHandle& elf_dwarf_handle,
241          ReadOptions options, const std::unique_ptr<Filter>& file_filter)
242       : graph_(graph),
243         elf_dwarf_handle_(elf_dwarf_handle),
244         elf_(elf_dwarf_handle_.GetElf()),
245         options_(options),
246         file_filter_(file_filter),
247         runtime_(runtime) {}
248 
249   Id Read();
250 
251  private:
252   using SymbolIndex =
253       std::map<std::pair<dwarf::Location, std::string>, std::vector<size_t>>;
254 
255   void GetLinuxKernelSymbols(
256       const std::vector<SymbolTableEntry>& all_symbols,
257       std::vector<std::pair<ElfSymbol, dwarf::Location>>& symbols) const;
258   void GetUserspaceSymbols(
259       const std::vector<SymbolTableEntry>& all_symbols,
260       std::vector<std::pair<ElfSymbol, dwarf::Location>>& symbols) const;
261 
BuildRoot(const std::vector<std::pair<ElfSymbol,dwarf::Location>> & symbols)262   Id BuildRoot(
263       const std::vector<std::pair<ElfSymbol, dwarf::Location>>& symbols) {
264     // On destruction, the unification object will remove or rewrite each graph
265     // node for which it has a mapping.
266     //
267     // Graph rewriting is expensive so an important optimisation is to restrict
268     // the nodes in consideration to the ones allocated by the DWARF processor
269     // here and any symbol or type roots that follow. This is done by setting
270     // the starting node ID to be the current graph limit.
271     const Id start = graph_.Limit();
272 
273     const dwarf::Types types =
274         dwarf::Process(elf_dwarf_handle_.GetDwarf(),
275                        elf_.IsLittleEndianBinary(), file_filter_, graph_);
276 
277     // A less important optimisation is avoiding copying the mapping array as it
278     // is populated. This is done by reserving space to the new graph limit.
279     Unification unification(runtime_, graph_, start, graph_.Limit());
280 
281     // fill location to id
282     //
283     // In general, we want to handle as many of the following cases as possible.
284     // In practice, determining the correct ELF-DWARF match may be impossible.
285     //
286     // * compiler-driven aliasing - multiple symbols with same address
287     // * zero-size symbol false aliasing - multiple symbols and types with same
288     //   address
289     // * weak/strong linkage symbols - multiple symbols and types with same
290     //   address
291     // * assembly symbols - multiple declarations but no definition and no
292     //   address in DWARF.
293     SymbolIndex location_and_name_to_index;
294     for (size_t i = 0; i < types.symbols.size(); ++i) {
295       const auto& s = types.symbols[i];
296       location_and_name_to_index[{s.location, s.linkage_name}].push_back(i);
297     }
298 
299     std::map<std::string, Id> symbols_map;
300     for (auto [symbol, address] : symbols) {
301       // TODO: add VersionInfoToString to SymbolKey name
302       // TODO: check for uniqueness of SymbolKey in map after
303       // support for version info
304       MaybeAddTypeInfo(location_and_name_to_index, types.symbols, address,
305                        symbol, unification);
306       symbols_map.emplace(VersionedSymbolName(symbol),
307                           graph_.Add<ElfSymbol>(symbol));
308     }
309 
310     std::map<std::string, Id> types_map;
311     if (options_.Test(ReadOptions::TYPE_ROOTS)) {
312       const InterfaceKey get_key(graph_);
313       for (const auto id : types.named_type_ids) {
314         const auto [it, inserted] = types_map.emplace(get_key(id), id);
315         if (!inserted && !unification.Unify(id, it->second)) {
316           Die() << "found conflicting interface type: " << it->first;
317         }
318       }
319     }
320 
321     const Id root = graph_.Add<Interface>(
322         std::move(symbols_map), std::move(types_map));
323 
324     // Use all named types and DWARF declarations as roots for type resolution.
325     std::vector<Id> roots;
326     roots.reserve(types.named_type_ids.size() + types.symbols.size() + 1);
327     for (const auto& symbol : types.symbols) {
328       roots.push_back(symbol.type_id);
329     }
330     for (const auto id : types.named_type_ids) {
331       roots.push_back(id);
332     }
333     roots.push_back(root);
334 
335     stg::ResolveTypes(runtime_, graph_, unification, {roots});
336 
337     return unification.Find(root);
338   }
339 
IsEqual(Unification & unification,const dwarf::Types::Symbol & lhs,const dwarf::Types::Symbol & rhs)340   static bool IsEqual(Unification& unification,
341                       const dwarf::Types::Symbol& lhs,
342                       const dwarf::Types::Symbol& rhs) {
343     return lhs.scoped_name == rhs.scoped_name
344         && lhs.linkage_name == rhs.linkage_name
345         && lhs.location == rhs.location
346         && unification.Unify(lhs.type_id, rhs.type_id);
347   }
348 
SymbolTableEntryToElfSymbol(const CRCValuesMap & crc_values,const NamespacesMap & namespaces,const SymbolTableEntry & symbol)349   static ElfSymbol SymbolTableEntryToElfSymbol(
350       const CRCValuesMap& crc_values, const NamespacesMap& namespaces,
351       const SymbolTableEntry& symbol) {
352     return {
353         /* symbol_name = */ std::string(symbol.name),
354         /* version_info = */ std::nullopt,
355         /* is_defined = */
356         symbol.value_type != SymbolTableEntry::ValueType::UNDEFINED,
357         /* symbol_type = */ ConvertSymbolType(symbol.symbol_type),
358         /* binding = */ symbol.binding,
359         /* visibility = */ symbol.visibility,
360         /* crc = */ MaybeGet(crc_values, std::string(symbol.name)),
361         /* ns = */ MaybeGet(namespaces, std::string(symbol.name)),
362         /* type_id = */ std::nullopt,
363         /* full_name = */ std::nullopt};
364   }
365 
MaybeAddTypeInfo(const SymbolIndex & location_and_name_to_index,const std::vector<dwarf::Types::Symbol> & dwarf_symbols,dwarf::Location location,ElfSymbol & node,Unification & unification)366   static void MaybeAddTypeInfo(
367       const SymbolIndex& location_and_name_to_index,
368       const std::vector<dwarf::Types::Symbol>& dwarf_symbols,
369       dwarf::Location location, ElfSymbol& node, Unification& unification) {
370     // try to find the first symbol with given location
371     const auto start_it = location_and_name_to_index.lower_bound(
372         std::make_pair(location, std::string()));
373     auto best_symbols_it = location_and_name_to_index.end();
374     bool matched_by_name = false;
375     size_t candidates = 0;
376     for (auto it = start_it;
377          it != location_and_name_to_index.end() && it->first.first == location;
378          ++it) {
379       ++candidates;
380       // We have at least matching locations.
381       if (it->first.second == node.symbol_name) {
382         // If we have also matching names we can stop looking further.
383         matched_by_name = true;
384         best_symbols_it = it;
385         break;
386       }
387       if (best_symbols_it == location_and_name_to_index.end()) {
388         // Otherwise keep the first match.
389         best_symbols_it = it;
390       }
391     }
392     if (best_symbols_it != location_and_name_to_index.end()) {
393       const auto& best_symbols = best_symbols_it->second;
394       Check(!best_symbols.empty()) << "best_symbols.empty()";
395       const auto& best_symbol = dwarf_symbols[best_symbols[0]];
396       for (size_t i = 1; i < best_symbols.size(); ++i) {
397         const auto& other = dwarf_symbols[best_symbols[i]];
398         // TODO: allow "compatible" duplicates, for example
399         // "void foo(int bar)" vs "void foo(const int bar)"
400         if (!IsEqual(unification, best_symbol, other)) {
401           Die() << "Duplicate DWARF symbol: location="
402                 << best_symbols_it->first.first
403                 << ", name=" << best_symbols_it->first.second;
404         }
405       }
406       if (best_symbol.scoped_name.empty()) {
407         Die() << "Anonymous DWARF symbol: location="
408               << best_symbols_it->first.first
409               << ", name=" << best_symbols_it->first.second;
410       }
411       // There may be multiple DWARF symbols with same address (zero-length
412       // arrays), or ELF symbol has different name from DWARF symbol (aliases).
413       // But if we have both situations at once, we can't match ELF to DWARF and
414       // it should be fixed in analysed binary source code.
415       Check(matched_by_name || candidates == 1)
416           << "Multiple candidate symbols without matching name: location="
417           << best_symbols_it->first.first
418           << ", name=" << best_symbols_it->first.second;
419       node.type_id = best_symbol.type_id;
420       node.full_name = best_symbol.scoped_name;
421     }
422   }
423 
424   Graph& graph_;
425   ElfDwarfHandle& elf_dwarf_handle_;
426   ElfLoader elf_;
427   ReadOptions options_;
428   const std::unique_ptr<Filter>& file_filter_;
429   Runtime& runtime_;
430 };
431 
GetLinuxKernelSymbols(const std::vector<SymbolTableEntry> & all_symbols,std::vector<std::pair<ElfSymbol,dwarf::Location>> & symbols) const432 void Reader::GetLinuxKernelSymbols(
433     const std::vector<SymbolTableEntry>& all_symbols,
434     std::vector<std::pair<ElfSymbol, dwarf::Location>>& symbols) const {
435   const auto crcs = GetCRCValuesMap(all_symbols, elf_);
436   const auto namespaces = GetNamespacesMap(all_symbols, elf_);
437   const auto ksymtab_symbols = GetKsymtabSymbols(all_symbols);
438   for (const auto& symbol : all_symbols) {
439     if (IsLinuxKernelFunctionOrVariable(ksymtab_symbols, symbol)) {
440       const size_t address = elf_.GetAbsoluteAddress(symbol);
441       symbols.emplace_back(
442           SymbolTableEntryToElfSymbol(crcs, namespaces, symbol),
443           dwarf::Location{dwarf::Location::Kind::ADDRESS, address});
444     }
445   }
446 }
447 
GetUserspaceSymbols(const std::vector<SymbolTableEntry> & all_symbols,std::vector<std::pair<ElfSymbol,dwarf::Location>> & symbols) const448 void Reader::GetUserspaceSymbols(
449     const std::vector<SymbolTableEntry>& all_symbols,
450     std::vector<std::pair<ElfSymbol, dwarf::Location>>& symbols) const {
451   const auto cfi_address_map = GetCFIAddressMap(elf_.GetCFISymbols(), elf_);
452   for (const auto& symbol : all_symbols) {
453     if (IsPublicFunctionOrVariable(symbol)) {
454       if (symbol.symbol_type == SymbolTableEntry::SymbolType::TLS) {
455         // TLS symbols offsets may be incorrect because of unsupported
456         // relocations. Resetting it to zero the same way as it is done in
457         // dwarf::Entry::GetLocationFromExpression.
458         // TODO: match TLS variables by offset
459         symbols.emplace_back(SymbolTableEntryToElfSymbol({}, {}, symbol),
460                              dwarf::Location{dwarf::Location::Kind::TLS, 0});
461       } else {
462         const auto cfi_it = cfi_address_map.find(std::string(symbol.name));
463         const size_t absolute = cfi_it != cfi_address_map.end()
464                                     ? cfi_it->second
465                                     : elf_.GetAbsoluteAddress(symbol);
466         symbols.emplace_back(
467             SymbolTableEntryToElfSymbol({}, {}, symbol),
468             dwarf::Location{dwarf::Location::Kind::ADDRESS, absolute});
469       }
470     }
471   }
472 }
473 
Read()474 Id Reader::Read() {
475   const auto all_symbols = elf_.GetElfSymbols();
476   const auto get_symbols = elf_.IsLinuxKernelBinary()
477                            ? &Reader::GetLinuxKernelSymbols
478                            : &Reader::GetUserspaceSymbols;
479   std::vector<std::pair<ElfSymbol, dwarf::Location>> symbols;
480   symbols.reserve(all_symbols.size());
481   (this->*get_symbols)(all_symbols, symbols);
482   symbols.shrink_to_fit();
483 
484   const Id root = BuildRoot(symbols);
485 
486   // Types produced by ELF/DWARF readers may require removing useless
487   // qualifiers.
488   return RemoveUselessQualifiers(graph_, root);
489 }
490 
491 }  // namespace
492 }  // namespace internal
493 
Read(Runtime & runtime,Graph & graph,ElfDwarfHandle & elf_dwarf_handle,ReadOptions options,const std::unique_ptr<Filter> & file_filter)494 Id Read(Runtime& runtime, Graph& graph, ElfDwarfHandle& elf_dwarf_handle,
495         ReadOptions options, const std::unique_ptr<Filter>& file_filter) {
496   return internal::Reader(runtime, graph, elf_dwarf_handle, options,
497                           file_filter)
498       .Read();
499 }
500 
501 }  // namespace elf
502 }  // namespace stg
503