1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19
20 #include "elf_reader.h"
21
22 #include <cstddef>
23 #include <functional>
24 #include <map>
25 #include <memory>
26 #include <optional>
27 #include <string>
28 #include <string_view>
29 #include <utility>
30 #include <vector>
31
32 #include "dwarf_processor.h"
33 #include "dwarf_wrappers.h"
34 #include "elf_loader.h"
35 #include "error.h"
36 #include "filter.h"
37 #include "graph.h"
38 #include "reader_options.h"
39 #include "runtime.h"
40 #include "type_normalisation.h"
41 #include "type_resolution.h"
42 #include "unification.h"
43
44 namespace stg {
45 namespace elf {
46 namespace internal {
47
48 namespace {
49
50 template <typename M, typename K>
MaybeGet(const M & map,const K & key)51 std::optional<typename M::mapped_type> MaybeGet(const M& map, const K& key) {
52 const auto it = map.find(key);
53 if (it == map.end()) {
54 return {};
55 }
56 return {it->second};
57 }
58
59 } // namespace
60
ConvertSymbolType(SymbolTableEntry::SymbolType symbol_type)61 ElfSymbol::SymbolType ConvertSymbolType(
62 SymbolTableEntry::SymbolType symbol_type) {
63 switch (symbol_type) {
64 case SymbolTableEntry::SymbolType::OBJECT:
65 return ElfSymbol::SymbolType::OBJECT;
66 case SymbolTableEntry::SymbolType::FUNCTION:
67 return ElfSymbol::SymbolType::FUNCTION;
68 case SymbolTableEntry::SymbolType::COMMON:
69 return ElfSymbol::SymbolType::COMMON;
70 case SymbolTableEntry::SymbolType::TLS:
71 return ElfSymbol::SymbolType::TLS;
72 case SymbolTableEntry::SymbolType::GNU_IFUNC:
73 return ElfSymbol::SymbolType::GNU_IFUNC;
74 default:
75 Die() << "Unsupported ELF symbol type: " << symbol_type;
76 }
77 }
78
GetKsymtabSymbols(const SymbolTable & symbols)79 SymbolNameList GetKsymtabSymbols(const SymbolTable& symbols) {
80 constexpr std::string_view kKsymtabPrefix = "__ksymtab_";
81 SymbolNameList result;
82 result.reserve(symbols.size() / 2);
83 for (const auto& symbol : symbols) {
84 if (symbol.name.substr(0, kKsymtabPrefix.size()) == kKsymtabPrefix) {
85 result.emplace(symbol.name.substr(kKsymtabPrefix.size()));
86 }
87 }
88 return result;
89 }
90
GetCRCValuesMap(const SymbolTable & symbols,const ElfLoader & elf)91 CRCValuesMap GetCRCValuesMap(const SymbolTable& symbols, const ElfLoader& elf) {
92 constexpr std::string_view kCRCPrefix = "__crc_";
93
94 CRCValuesMap crc_values;
95
96 for (const auto& symbol : symbols) {
97 const std::string_view name = symbol.name;
98 if (name.substr(0, kCRCPrefix.size()) == kCRCPrefix) {
99 std::string_view name_suffix = name.substr(kCRCPrefix.size());
100 if (!crc_values.emplace(name_suffix, elf.GetElfSymbolCRC(symbol))
101 .second) {
102 Die() << "Multiple CRC values for symbol '" << name_suffix << '\'';
103 }
104 }
105 }
106
107 return crc_values;
108 }
109
GetNamespacesMap(const SymbolTable & symbols,const ElfLoader & elf)110 NamespacesMap GetNamespacesMap(const SymbolTable& symbols,
111 const ElfLoader& elf) {
112 constexpr std::string_view kNSPrefix = "__kstrtabns_";
113
114 NamespacesMap namespaces;
115
116 for (const auto& symbol : symbols) {
117 const std::string_view name = symbol.name;
118 if (name.substr(0, kNSPrefix.size()) == kNSPrefix) {
119 const std::string_view name_suffix = name.substr(kNSPrefix.size());
120 const std::string_view ns = elf.GetElfSymbolNamespace(symbol);
121 if (ns.empty()) {
122 // The global namespace is explicitly represented as the empty string,
123 // but the common interpretation is that such symbols lack an export
124 // namespace.
125 continue;
126 }
127 if (!namespaces.emplace(name_suffix, ns).second) {
128 Die() << "Multiple namespaces for symbol '" << name_suffix << '\'';
129 }
130 }
131 }
132
133 return namespaces;
134 }
135
GetCFIAddressMap(const SymbolTable & symbols,const ElfLoader & elf)136 AddressMap GetCFIAddressMap(const SymbolTable& symbols, const ElfLoader& elf) {
137 AddressMap name_to_address;
138 for (const auto& symbol : symbols) {
139 const std::string_view name_prefix = UnwrapCFISymbolName(symbol.name);
140 const size_t address = elf.GetAbsoluteAddress(symbol);
141 if (!name_to_address.emplace(name_prefix, address).second) {
142 Die() << "Multiple CFI symbols referring to symbol '" << name_prefix
143 << '\'';
144 }
145 }
146 return name_to_address;
147 }
148
IsPublicFunctionOrVariable(const SymbolTableEntry & symbol)149 bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol) {
150 const auto symbol_type = symbol.symbol_type;
151 // Reject symbols that are not functions or variables.
152 if (symbol_type != SymbolTableEntry::SymbolType::FUNCTION &&
153 symbol_type != SymbolTableEntry::SymbolType::OBJECT &&
154 symbol_type != SymbolTableEntry::SymbolType::TLS &&
155 symbol_type != SymbolTableEntry::SymbolType::GNU_IFUNC) {
156 return false;
157 }
158
159 // Function or variable of ValueType::ABSOLUTE is not expected in any binary,
160 // but GNU `ld` adds object of such type for every version name defined in
161 // file. Such symbol should be rejected, because in fact it is not variable.
162 if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
163 Check(symbol_type == SymbolTableEntry::SymbolType::OBJECT)
164 << "Unexpected function or variable with ABSOLUTE value type";
165 return false;
166 }
167
168 // Undefined symbol is dependency of the binary but is not part of ABI
169 // provided by binary and should be rejected.
170 if (symbol.value_type == SymbolTableEntry::ValueType::UNDEFINED) {
171 return false;
172 }
173
174 // Local symbol is not visible outside the binary, so it is not public
175 // and should be rejected.
176 if (symbol.binding == SymbolTableEntry::Binding::LOCAL) {
177 return false;
178 }
179
180 // "Hidden" and "internal" visibility values mean that symbol is not public
181 // and should be rejected.
182 if (symbol.visibility == SymbolTableEntry::Visibility::HIDDEN ||
183 symbol.visibility == SymbolTableEntry::Visibility::INTERNAL) {
184 return false;
185 }
186
187 return true;
188 }
189
190 namespace {
191
192 class Reader {
193 public:
Reader(Runtime & runtime,Graph & graph,const std::string & path,ReadOptions options,const std::unique_ptr<Filter> & file_filter)194 Reader(Runtime& runtime, Graph& graph, const std::string& path,
195 ReadOptions options, const std::unique_ptr<Filter>& file_filter)
196 : graph_(graph),
197 dwarf_(path),
198 elf_(dwarf_.GetElf()),
199 options_(options),
200 file_filter_(file_filter),
201 runtime_(runtime) {}
202
Reader(Runtime & runtime,Graph & graph,char * data,size_t size,ReadOptions options,const std::unique_ptr<Filter> & file_filter)203 Reader(Runtime& runtime, Graph& graph, char* data, size_t size,
204 ReadOptions options, const std::unique_ptr<Filter>& file_filter)
205 : graph_(graph),
206 dwarf_(data, size),
207 elf_(dwarf_.GetElf()),
208 options_(options),
209 file_filter_(file_filter),
210 runtime_(runtime) {}
211
212 Id Read();
213
214 private:
215 using SymbolIndex =
216 std::map<std::pair<dwarf::Address, std::string>, std::vector<size_t>>;
217
BuildRoot(const std::vector<std::pair<ElfSymbol,size_t>> & symbols)218 Id BuildRoot(const std::vector<std::pair<ElfSymbol, size_t>>& symbols) {
219 // On destruction, the unification object will remove or rewrite each graph
220 // node for which it has a mapping.
221 //
222 // Graph rewriting is expensive so an important optimisation is to restrict
223 // the nodes in consideration to the ones allocated by the DWARF processor
224 // here and any symbol or type roots that follow. This is done by setting
225 // the starting node ID to be the current graph limit.
226 Unification unification(runtime_, graph_, graph_.Limit());
227
228 const dwarf::Types types = dwarf::Process(
229 dwarf_, elf_.IsLittleEndianBinary(), file_filter_, graph_);
230
231 // A less important optimisation is avoiding copying the mapping array as it
232 // is populated. This is done by reserving space to the new graph limit.
233 unification.Reserve(graph_.Limit());
234
235 // fill address to id
236 //
237 // In general, we want to handle as many of the following cases as possible.
238 // In practice, determining the correct ELF-DWARF match may be impossible.
239 //
240 // * compiler-driven aliasing - multiple symbols with same address
241 // * zero-size symbol false aliasing - multiple symbols and types with same
242 // address
243 // * weak/strong linkage symbols - multiple symbols and types with same
244 // address
245 // * assembly symbols - multiple declarations but no definition and no
246 // address in DWARF.
247 SymbolIndex address_name_to_index;
248 for (size_t i = 0; i < types.symbols.size(); ++i) {
249 const auto& symbol = types.symbols[i];
250
251 const auto& name =
252 symbol.linkage_name.has_value() ? *symbol.linkage_name : symbol.name;
253 address_name_to_index[std::make_pair(symbol.address, name)].push_back(i);
254 }
255
256 std::map<std::string, Id> symbols_map;
257 for (auto [symbol, address] : symbols) {
258 // TODO: add VersionInfoToString to SymbolKey name
259 // TODO: check for uniqueness of SymbolKey in map after
260 // support for version info
261 MaybeAddTypeInfo(address_name_to_index, types.symbols, address, symbol,
262 unification);
263 symbols_map.emplace(VersionedSymbolName(symbol),
264 graph_.Add<ElfSymbol>(symbol));
265 }
266
267 std::map<std::string, Id> types_map;
268 if (options_.Test(ReadOptions::TYPE_ROOTS)) {
269 const InterfaceKey get_key(graph_);
270 for (const auto id : types.named_type_ids) {
271 const auto [it, inserted] = types_map.emplace(get_key(id), id);
272 if (!inserted && !unification.Unify(id, it->second)) {
273 Die() << "found conflicting interface type: " << it->first;
274 }
275 }
276 }
277
278 Id root = graph_.Add<Interface>(
279 std::move(symbols_map), std::move(types_map));
280
281 // Use all named types and DWARF declarations as roots for type resolution.
282 std::vector<Id> roots;
283 roots.reserve(types.named_type_ids.size() + types.symbols.size() + 1);
284 for (const auto& symbol : types.symbols) {
285 roots.push_back(symbol.id);
286 }
287 for (const auto id : types.named_type_ids) {
288 roots.push_back(id);
289 }
290 roots.push_back(root);
291
292 stg::ResolveTypes(runtime_, graph_, unification, {roots});
293
294 unification.Update(root);
295 return root;
296 }
297
IsEqual(Unification & unification,const dwarf::Types::Symbol & lhs,const dwarf::Types::Symbol & rhs)298 static bool IsEqual(Unification& unification,
299 const dwarf::Types::Symbol& lhs,
300 const dwarf::Types::Symbol& rhs) {
301 return lhs.name == rhs.name && lhs.linkage_name == rhs.linkage_name
302 && lhs.address == rhs.address && unification.Unify(lhs.id, rhs.id);
303 }
304
SymbolTableEntryToElfSymbol(const CRCValuesMap & crc_values,const NamespacesMap & namespaces,const SymbolTableEntry & symbol)305 static ElfSymbol SymbolTableEntryToElfSymbol(
306 const CRCValuesMap& crc_values, const NamespacesMap& namespaces,
307 const SymbolTableEntry& symbol) {
308 return ElfSymbol(
309 /* symbol_name = */ std::string(symbol.name),
310 /* version_info = */ std::nullopt,
311 /* is_defined = */
312 symbol.value_type != SymbolTableEntry::ValueType::UNDEFINED,
313 /* symbol_type = */ ConvertSymbolType(symbol.symbol_type),
314 /* binding = */ symbol.binding,
315 /* visibility = */ symbol.visibility,
316 /* crc = */ MaybeGet(crc_values, std::string(symbol.name)),
317 /* ns = */ MaybeGet(namespaces, std::string(symbol.name)),
318 /* type_id = */ std::nullopt,
319 /* full_name = */ std::nullopt);
320 }
321
MaybeAddTypeInfo(const SymbolIndex & address_name_to_index,const std::vector<dwarf::Types::Symbol> & dwarf_symbols,size_t address_value,ElfSymbol & node,Unification & unification)322 static void MaybeAddTypeInfo(
323 const SymbolIndex& address_name_to_index,
324 const std::vector<dwarf::Types::Symbol>& dwarf_symbols,
325 size_t address_value, ElfSymbol& node, Unification& unification) {
326 const bool is_tls = node.symbol_type == ElfSymbol::SymbolType::TLS;
327 if (is_tls) {
328 // TLS symbols address may be incorrect because of unsupported
329 // relocations. Resetting it to zero the same way as it is done in
330 // dwarf::Entry::GetAddressFromLocation.
331 // TODO: match TLS variables by address
332 address_value = 0;
333 }
334 const dwarf::Address address{.value = address_value, .is_tls = is_tls};
335 // try to find the first symbol with given address
336 const auto start_it = address_name_to_index.lower_bound(
337 std::make_pair(address, std::string()));
338 auto best_symbols_it = address_name_to_index.end();
339 bool matched_by_name = false;
340 size_t candidates = 0;
341 for (auto it = start_it;
342 it != address_name_to_index.end() && it->first.first == address;
343 ++it) {
344 ++candidates;
345 // We have at least matching addresses.
346 if (it->first.second == node.symbol_name) {
347 // If we have also matching names we can stop looking further.
348 matched_by_name = true;
349 best_symbols_it = it;
350 break;
351 }
352 if (best_symbols_it == address_name_to_index.end()) {
353 // Otherwise keep the first match.
354 best_symbols_it = it;
355 }
356 }
357 if (best_symbols_it != address_name_to_index.end()) {
358 const auto& best_symbols = best_symbols_it->second;
359 Check(!best_symbols.empty()) << "best_symbols.empty()";
360 const auto& best_symbol = dwarf_symbols[best_symbols[0]];
361 for (size_t i = 1; i < best_symbols.size(); ++i) {
362 const auto& other = dwarf_symbols[best_symbols[i]];
363 // TODO: allow "compatible" duplicates, for example
364 // "void foo(int bar)" vs "void foo(const int bar)"
365 if (!IsEqual(unification, best_symbol, other)) {
366 Die() << "Duplicate DWARF symbol: address="
367 << best_symbol.address << ", name=" << best_symbol.name;
368 }
369 }
370 if (best_symbol.name.empty()) {
371 Die() << "DWARF symbol (address = " << best_symbol.address
372 << ", linkage_name = "
373 << best_symbol.linkage_name.value_or("{missing}")
374 << " should have a name";
375 }
376 // There may be multiple DWARF symbols with same address (zero-length
377 // arrays), or ELF symbol has different name from DWARF symbol (aliases).
378 // But if we have both situations at once, we can't match ELF to DWARF and
379 // it should be fixed in analysed binary source code.
380 Check(matched_by_name || candidates == 1)
381 << "multiple candidates without matching names, best_symbol.name="
382 << best_symbol.name;
383 node.type_id = best_symbol.id;
384 node.full_name = best_symbol.name;
385 }
386 }
387
388 Graph& graph_;
389 // The order of the following two fields is important because ElfLoader uses
390 // an Elf* from dwarf::Handler without owning it.
391 dwarf::Handler dwarf_;
392 elf::ElfLoader elf_;
393 ReadOptions options_;
394 const std::unique_ptr<Filter>& file_filter_;
395 Runtime& runtime_;
396 };
397
Read()398 Id Reader::Read() {
399 const auto all_symbols = elf_.GetElfSymbols();
400 const bool is_linux_kernel = elf_.IsLinuxKernelBinary();
401 const SymbolNameList ksymtab_symbols =
402 is_linux_kernel ? GetKsymtabSymbols(all_symbols) : SymbolNameList();
403
404 CRCValuesMap crc_values;
405 NamespacesMap namespaces;
406 if (is_linux_kernel) {
407 crc_values = GetCRCValuesMap(all_symbols, elf_);
408 namespaces = GetNamespacesMap(all_symbols, elf_);
409 }
410
411 const auto cfi_address_map = GetCFIAddressMap(elf_.GetCFISymbols(), elf_);
412
413 std::vector<std::pair<ElfSymbol, size_t>> symbols;
414 symbols.reserve(all_symbols.size());
415 for (const auto& symbol : all_symbols) {
416 if (IsPublicFunctionOrVariable(symbol) &&
417 (!is_linux_kernel || ksymtab_symbols.count(symbol.name))) {
418 const auto cfi_it = cfi_address_map.find(std::string(symbol.name));
419 const size_t address = cfi_it != cfi_address_map.end()
420 ? cfi_it->second
421 : elf_.GetAbsoluteAddress(symbol);
422 symbols.emplace_back(
423 SymbolTableEntryToElfSymbol(crc_values, namespaces, symbol), address);
424 }
425 }
426 symbols.shrink_to_fit();
427
428 Id root = BuildRoot(symbols);
429
430 // Types produced by ELF/DWARF readers may require removing useless
431 // qualifiers.
432 RemoveUselessQualifiers(graph_, root);
433
434 return root;
435 }
436
437 } // namespace
438 } // namespace internal
439
Read(Runtime & runtime,Graph & graph,const std::string & path,ReadOptions options,const std::unique_ptr<Filter> & file_filter)440 Id Read(Runtime& runtime, Graph& graph, const std::string& path,
441 ReadOptions options, const std::unique_ptr<Filter>& file_filter) {
442 return internal::Reader(runtime, graph, path, options, file_filter).Read();
443 }
444
Read(Runtime & runtime,Graph & graph,char * data,size_t size,ReadOptions options,const std::unique_ptr<Filter> & file_filter)445 Id Read(Runtime& runtime, Graph& graph, char* data, size_t size,
446 ReadOptions options, const std::unique_ptr<Filter>& file_filter) {
447 return internal::Reader(runtime, graph, data, size, options, file_filter)
448 .Read();
449 }
450
451 } // namespace elf
452 } // namespace stg
453