• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19 
20 #include "elf_reader.h"
21 
22 #include <cstddef>
23 #include <functional>
24 #include <map>
25 #include <memory>
26 #include <optional>
27 #include <string>
28 #include <string_view>
29 #include <utility>
30 #include <vector>
31 
32 #include "dwarf_processor.h"
33 #include "dwarf_wrappers.h"
34 #include "elf_loader.h"
35 #include "error.h"
36 #include "filter.h"
37 #include "graph.h"
38 #include "reader_options.h"
39 #include "runtime.h"
40 #include "type_normalisation.h"
41 #include "type_resolution.h"
42 #include "unification.h"
43 
44 namespace stg {
45 namespace elf {
46 namespace internal {
47 
48 namespace {
49 
50 template <typename M, typename K>
MaybeGet(const M & map,const K & key)51 std::optional<typename M::mapped_type> MaybeGet(const M& map, const K& key) {
52   const auto it = map.find(key);
53   if (it == map.end()) {
54     return {};
55   }
56   return {it->second};
57 }
58 
59 }  // namespace
60 
ConvertSymbolType(SymbolTableEntry::SymbolType symbol_type)61 ElfSymbol::SymbolType ConvertSymbolType(
62     SymbolTableEntry::SymbolType symbol_type) {
63   switch (symbol_type) {
64     case SymbolTableEntry::SymbolType::OBJECT:
65       return ElfSymbol::SymbolType::OBJECT;
66     case SymbolTableEntry::SymbolType::FUNCTION:
67       return ElfSymbol::SymbolType::FUNCTION;
68     case SymbolTableEntry::SymbolType::COMMON:
69       return ElfSymbol::SymbolType::COMMON;
70     case SymbolTableEntry::SymbolType::TLS:
71       return ElfSymbol::SymbolType::TLS;
72     case SymbolTableEntry::SymbolType::GNU_IFUNC:
73       return ElfSymbol::SymbolType::GNU_IFUNC;
74     default:
75       Die() << "Unsupported ELF symbol type: " << symbol_type;
76   }
77 }
78 
GetKsymtabSymbols(const SymbolTable & symbols)79 SymbolNameList GetKsymtabSymbols(const SymbolTable& symbols) {
80   constexpr std::string_view kKsymtabPrefix = "__ksymtab_";
81   SymbolNameList result;
82   result.reserve(symbols.size() / 2);
83   for (const auto& symbol : symbols) {
84     if (symbol.name.substr(0, kKsymtabPrefix.size()) == kKsymtabPrefix) {
85       result.emplace(symbol.name.substr(kKsymtabPrefix.size()));
86     }
87   }
88   return result;
89 }
90 
GetCRCValuesMap(const SymbolTable & symbols,const ElfLoader & elf)91 CRCValuesMap GetCRCValuesMap(const SymbolTable& symbols, const ElfLoader& elf) {
92   constexpr std::string_view kCRCPrefix = "__crc_";
93 
94   CRCValuesMap crc_values;
95 
96   for (const auto& symbol : symbols) {
97     const std::string_view name = symbol.name;
98     if (name.substr(0, kCRCPrefix.size()) == kCRCPrefix) {
99       std::string_view name_suffix = name.substr(kCRCPrefix.size());
100       if (!crc_values.emplace(name_suffix, elf.GetElfSymbolCRC(symbol))
101                .second) {
102         Die() << "Multiple CRC values for symbol '" << name_suffix << '\'';
103       }
104     }
105   }
106 
107   return crc_values;
108 }
109 
GetNamespacesMap(const SymbolTable & symbols,const ElfLoader & elf)110 NamespacesMap GetNamespacesMap(const SymbolTable& symbols,
111                                const ElfLoader& elf) {
112   constexpr std::string_view kNSPrefix = "__kstrtabns_";
113 
114   NamespacesMap namespaces;
115 
116   for (const auto& symbol : symbols) {
117     const std::string_view name = symbol.name;
118     if (name.substr(0, kNSPrefix.size()) == kNSPrefix) {
119       const std::string_view name_suffix = name.substr(kNSPrefix.size());
120       const std::string_view ns = elf.GetElfSymbolNamespace(symbol);
121       if (ns.empty()) {
122         // The global namespace is explicitly represented as the empty string,
123         // but the common interpretation is that such symbols lack an export
124         // namespace.
125         continue;
126       }
127       if (!namespaces.emplace(name_suffix, ns).second) {
128         Die() << "Multiple namespaces for symbol '" << name_suffix << '\'';
129       }
130     }
131   }
132 
133   return namespaces;
134 }
135 
GetCFIAddressMap(const SymbolTable & symbols,const ElfLoader & elf)136 AddressMap GetCFIAddressMap(const SymbolTable& symbols, const ElfLoader& elf) {
137   AddressMap name_to_address;
138   for (const auto& symbol : symbols) {
139     const std::string_view name_prefix = UnwrapCFISymbolName(symbol.name);
140     const size_t address = elf.GetAbsoluteAddress(symbol);
141     if (!name_to_address.emplace(name_prefix, address).second) {
142       Die() << "Multiple CFI symbols referring to symbol '" << name_prefix
143             << '\'';
144     }
145   }
146   return name_to_address;
147 }
148 
IsPublicFunctionOrVariable(const SymbolTableEntry & symbol)149 bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol) {
150   const auto symbol_type = symbol.symbol_type;
151   // Reject symbols that are not functions or variables.
152   if (symbol_type != SymbolTableEntry::SymbolType::FUNCTION &&
153       symbol_type != SymbolTableEntry::SymbolType::OBJECT &&
154       symbol_type != SymbolTableEntry::SymbolType::TLS &&
155       symbol_type != SymbolTableEntry::SymbolType::GNU_IFUNC) {
156     return false;
157   }
158 
159   // Function or variable of ValueType::ABSOLUTE is not expected in any binary,
160   // but GNU `ld` adds object of such type for every version name defined in
161   // file. Such symbol should be rejected, because in fact it is not variable.
162   if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
163     Check(symbol_type == SymbolTableEntry::SymbolType::OBJECT)
164         << "Unexpected function or variable with ABSOLUTE value type";
165     return false;
166   }
167 
168   // Undefined symbol is dependency of the binary but is not part of ABI
169   // provided by binary and should be rejected.
170   if (symbol.value_type == SymbolTableEntry::ValueType::UNDEFINED) {
171     return false;
172   }
173 
174   // Local symbol is not visible outside the binary, so it is not public
175   // and should be rejected.
176   if (symbol.binding == SymbolTableEntry::Binding::LOCAL) {
177     return false;
178   }
179 
180   // "Hidden" and "internal" visibility values mean that symbol is not public
181   // and should be rejected.
182   if (symbol.visibility == SymbolTableEntry::Visibility::HIDDEN ||
183       symbol.visibility == SymbolTableEntry::Visibility::INTERNAL) {
184     return false;
185   }
186 
187   return true;
188 }
189 
190 namespace {
191 
192 class Reader {
193  public:
Reader(Runtime & runtime,Graph & graph,const std::string & path,ReadOptions options,const std::unique_ptr<Filter> & file_filter)194   Reader(Runtime& runtime, Graph& graph, const std::string& path,
195          ReadOptions options, const std::unique_ptr<Filter>& file_filter)
196       : graph_(graph),
197         dwarf_(path),
198         elf_(dwarf_.GetElf()),
199         options_(options),
200         file_filter_(file_filter),
201         runtime_(runtime) {}
202 
Reader(Runtime & runtime,Graph & graph,char * data,size_t size,ReadOptions options,const std::unique_ptr<Filter> & file_filter)203   Reader(Runtime& runtime, Graph& graph, char* data, size_t size,
204          ReadOptions options, const std::unique_ptr<Filter>& file_filter)
205       : graph_(graph),
206         dwarf_(data, size),
207         elf_(dwarf_.GetElf()),
208         options_(options),
209         file_filter_(file_filter),
210         runtime_(runtime) {}
211 
212   Id Read();
213 
214  private:
215   using SymbolIndex =
216       std::map<std::pair<dwarf::Address, std::string>, std::vector<size_t>>;
217 
BuildRoot(const std::vector<std::pair<ElfSymbol,size_t>> & symbols)218   Id BuildRoot(const std::vector<std::pair<ElfSymbol, size_t>>& symbols) {
219     // On destruction, the unification object will remove or rewrite each graph
220     // node for which it has a mapping.
221     //
222     // Graph rewriting is expensive so an important optimisation is to restrict
223     // the nodes in consideration to the ones allocated by the DWARF processor
224     // here and any symbol or type roots that follow. This is done by setting
225     // the starting node ID to be the current graph limit.
226     Unification unification(runtime_, graph_, graph_.Limit());
227 
228     const dwarf::Types types = dwarf::Process(
229         dwarf_, elf_.IsLittleEndianBinary(), file_filter_, graph_);
230 
231     // A less important optimisation is avoiding copying the mapping array as it
232     // is populated. This is done by reserving space to the new graph limit.
233     unification.Reserve(graph_.Limit());
234 
235     // fill address to id
236     //
237     // In general, we want to handle as many of the following cases as possible.
238     // In practice, determining the correct ELF-DWARF match may be impossible.
239     //
240     // * compiler-driven aliasing - multiple symbols with same address
241     // * zero-size symbol false aliasing - multiple symbols and types with same
242     //   address
243     // * weak/strong linkage symbols - multiple symbols and types with same
244     //   address
245     // * assembly symbols - multiple declarations but no definition and no
246     //   address in DWARF.
247     SymbolIndex address_name_to_index;
248     for (size_t i = 0; i < types.symbols.size(); ++i) {
249       const auto& symbol = types.symbols[i];
250 
251       const auto& name =
252           symbol.linkage_name.has_value() ? *symbol.linkage_name : symbol.name;
253       address_name_to_index[std::make_pair(symbol.address, name)].push_back(i);
254     }
255 
256     std::map<std::string, Id> symbols_map;
257     for (auto [symbol, address] : symbols) {
258       // TODO: add VersionInfoToString to SymbolKey name
259       // TODO: check for uniqueness of SymbolKey in map after
260       // support for version info
261       MaybeAddTypeInfo(address_name_to_index, types.symbols, address, symbol,
262                        unification);
263       symbols_map.emplace(VersionedSymbolName(symbol),
264                           graph_.Add<ElfSymbol>(symbol));
265     }
266 
267     std::map<std::string, Id> types_map;
268     if (options_.Test(ReadOptions::TYPE_ROOTS)) {
269       const InterfaceKey get_key(graph_);
270       for (const auto id : types.named_type_ids) {
271         const auto [it, inserted] = types_map.emplace(get_key(id), id);
272         if (!inserted && !unification.Unify(id, it->second)) {
273           Die() << "found conflicting interface type: " << it->first;
274         }
275       }
276     }
277 
278     Id root = graph_.Add<Interface>(
279         std::move(symbols_map), std::move(types_map));
280 
281     // Use all named types and DWARF declarations as roots for type resolution.
282     std::vector<Id> roots;
283     roots.reserve(types.named_type_ids.size() + types.symbols.size() + 1);
284     for (const auto& symbol : types.symbols) {
285       roots.push_back(symbol.id);
286     }
287     for (const auto id : types.named_type_ids) {
288       roots.push_back(id);
289     }
290     roots.push_back(root);
291 
292     stg::ResolveTypes(runtime_, graph_, unification, {roots});
293 
294     unification.Update(root);
295     return root;
296   }
297 
IsEqual(Unification & unification,const dwarf::Types::Symbol & lhs,const dwarf::Types::Symbol & rhs)298   static bool IsEqual(Unification& unification,
299                       const dwarf::Types::Symbol& lhs,
300                       const dwarf::Types::Symbol& rhs) {
301     return lhs.name == rhs.name && lhs.linkage_name == rhs.linkage_name
302         && lhs.address == rhs.address && unification.Unify(lhs.id, rhs.id);
303   }
304 
SymbolTableEntryToElfSymbol(const CRCValuesMap & crc_values,const NamespacesMap & namespaces,const SymbolTableEntry & symbol)305   static ElfSymbol SymbolTableEntryToElfSymbol(
306       const CRCValuesMap& crc_values, const NamespacesMap& namespaces,
307       const SymbolTableEntry& symbol) {
308     return ElfSymbol(
309         /* symbol_name = */ std::string(symbol.name),
310         /* version_info = */ std::nullopt,
311         /* is_defined = */
312         symbol.value_type != SymbolTableEntry::ValueType::UNDEFINED,
313         /* symbol_type = */ ConvertSymbolType(symbol.symbol_type),
314         /* binding = */ symbol.binding,
315         /* visibility = */ symbol.visibility,
316         /* crc = */ MaybeGet(crc_values, std::string(symbol.name)),
317         /* ns = */ MaybeGet(namespaces, std::string(symbol.name)),
318         /* type_id = */ std::nullopt,
319         /* full_name = */ std::nullopt);
320   }
321 
MaybeAddTypeInfo(const SymbolIndex & address_name_to_index,const std::vector<dwarf::Types::Symbol> & dwarf_symbols,size_t address_value,ElfSymbol & node,Unification & unification)322   static void MaybeAddTypeInfo(
323       const SymbolIndex& address_name_to_index,
324       const std::vector<dwarf::Types::Symbol>& dwarf_symbols,
325       size_t address_value, ElfSymbol& node, Unification& unification) {
326     const bool is_tls = node.symbol_type == ElfSymbol::SymbolType::TLS;
327     if (is_tls) {
328       // TLS symbols address may be incorrect because of unsupported
329       // relocations. Resetting it to zero the same way as it is done in
330       // dwarf::Entry::GetAddressFromLocation.
331       // TODO: match TLS variables by address
332       address_value = 0;
333     }
334     const dwarf::Address address{.value = address_value, .is_tls = is_tls};
335     // try to find the first symbol with given address
336     const auto start_it = address_name_to_index.lower_bound(
337         std::make_pair(address, std::string()));
338     auto best_symbols_it = address_name_to_index.end();
339     bool matched_by_name = false;
340     size_t candidates = 0;
341     for (auto it = start_it;
342          it != address_name_to_index.end() && it->first.first == address;
343          ++it) {
344       ++candidates;
345       // We have at least matching addresses.
346       if (it->first.second == node.symbol_name) {
347         // If we have also matching names we can stop looking further.
348         matched_by_name = true;
349         best_symbols_it = it;
350         break;
351       }
352       if (best_symbols_it == address_name_to_index.end()) {
353         // Otherwise keep the first match.
354         best_symbols_it = it;
355       }
356     }
357     if (best_symbols_it != address_name_to_index.end()) {
358       const auto& best_symbols = best_symbols_it->second;
359       Check(!best_symbols.empty()) << "best_symbols.empty()";
360       const auto& best_symbol = dwarf_symbols[best_symbols[0]];
361       for (size_t i = 1; i < best_symbols.size(); ++i) {
362         const auto& other = dwarf_symbols[best_symbols[i]];
363         // TODO: allow "compatible" duplicates, for example
364         // "void foo(int bar)" vs "void foo(const int bar)"
365         if (!IsEqual(unification, best_symbol, other)) {
366           Die() << "Duplicate DWARF symbol: address="
367                 << best_symbol.address << ", name=" << best_symbol.name;
368         }
369       }
370       if (best_symbol.name.empty()) {
371         Die() << "DWARF symbol (address = " << best_symbol.address
372               << ", linkage_name = "
373               << best_symbol.linkage_name.value_or("{missing}")
374               << " should have a name";
375       }
376       // There may be multiple DWARF symbols with same address (zero-length
377       // arrays), or ELF symbol has different name from DWARF symbol (aliases).
378       // But if we have both situations at once, we can't match ELF to DWARF and
379       // it should be fixed in analysed binary source code.
380       Check(matched_by_name || candidates == 1)
381           << "multiple candidates without matching names, best_symbol.name="
382           << best_symbol.name;
383       node.type_id = best_symbol.id;
384       node.full_name = best_symbol.name;
385     }
386   }
387 
388   Graph& graph_;
389   // The order of the following two fields is important because ElfLoader uses
390   // an Elf* from dwarf::Handler without owning it.
391   dwarf::Handler dwarf_;
392   elf::ElfLoader elf_;
393   ReadOptions options_;
394   const std::unique_ptr<Filter>& file_filter_;
395   Runtime& runtime_;
396 };
397 
Read()398 Id Reader::Read() {
399   const auto all_symbols = elf_.GetElfSymbols();
400   const bool is_linux_kernel = elf_.IsLinuxKernelBinary();
401   const SymbolNameList ksymtab_symbols =
402       is_linux_kernel ? GetKsymtabSymbols(all_symbols) : SymbolNameList();
403 
404   CRCValuesMap crc_values;
405   NamespacesMap namespaces;
406   if (is_linux_kernel) {
407     crc_values = GetCRCValuesMap(all_symbols, elf_);
408     namespaces = GetNamespacesMap(all_symbols, elf_);
409   }
410 
411   const auto cfi_address_map = GetCFIAddressMap(elf_.GetCFISymbols(), elf_);
412 
413   std::vector<std::pair<ElfSymbol, size_t>> symbols;
414   symbols.reserve(all_symbols.size());
415   for (const auto& symbol : all_symbols) {
416     if (IsPublicFunctionOrVariable(symbol) &&
417         (!is_linux_kernel || ksymtab_symbols.count(symbol.name))) {
418       const auto cfi_it = cfi_address_map.find(std::string(symbol.name));
419       const size_t address = cfi_it != cfi_address_map.end()
420                                  ? cfi_it->second
421                                  : elf_.GetAbsoluteAddress(symbol);
422       symbols.emplace_back(
423           SymbolTableEntryToElfSymbol(crc_values, namespaces, symbol), address);
424     }
425   }
426   symbols.shrink_to_fit();
427 
428   Id root = BuildRoot(symbols);
429 
430   // Types produced by ELF/DWARF readers may require removing useless
431   // qualifiers.
432   RemoveUselessQualifiers(graph_, root);
433 
434   return root;
435 }
436 
437 }  // namespace
438 }  // namespace internal
439 
Read(Runtime & runtime,Graph & graph,const std::string & path,ReadOptions options,const std::unique_ptr<Filter> & file_filter)440 Id Read(Runtime& runtime, Graph& graph, const std::string& path,
441         ReadOptions options, const std::unique_ptr<Filter>& file_filter) {
442   return internal::Reader(runtime, graph, path, options, file_filter).Read();
443 }
444 
Read(Runtime & runtime,Graph & graph,char * data,size_t size,ReadOptions options,const std::unique_ptr<Filter> & file_filter)445 Id Read(Runtime& runtime, Graph& graph, char* data, size_t size,
446         ReadOptions options, const std::unique_ptr<Filter>& file_filter) {
447   return internal::Reader(runtime, graph, data, size, options, file_filter)
448       .Read();
449 }
450 
451 }  // namespace elf
452 }  // namespace stg
453