1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19
20 #include "dwarf_wrappers.h"
21
22 #include <dwarf.h>
23 #include <elf.h>
24 #include <elfutils/libdw.h>
25 #include <elfutils/libdwfl.h>
26 #include <fcntl.h>
27
28 #include <cstddef>
29 #include <cstdint>
30 #include <ios>
31 #include <memory>
32 #include <optional>
33 #include <ostream>
34 #include <string>
35 #include <utility>
36 #include <vector>
37
38 #include "error.h"
39
40 namespace stg {
41 namespace dwarf {
42
operator <<(std::ostream & os,const Address & address)43 std::ostream& operator<<(std::ostream& os, const Address& address) {
44 return os << Hex(address.value) << (address.is_tls ? " (TLS)" : "");
45 }
46
47 namespace {
48
49 static const Dwfl_Callbacks kDwflCallbacks = {
50 .find_elf = nullptr,
51 .find_debuginfo = dwfl_standard_find_debuginfo,
52 .section_address = dwfl_offline_section_address,
53 .debuginfo_path = nullptr};
54
55 constexpr int kReturnOk = 0;
56 constexpr int kReturnNoEntry = 1;
57
GetAttribute(Dwarf_Die * die,uint32_t attribute)58 std::optional<Dwarf_Attribute> GetAttribute(Dwarf_Die* die,
59 uint32_t attribute) {
60 // Create an optional with default-initialized value already inside
61 std::optional<Dwarf_Attribute> result(std::in_place);
62 // "integrate" automatically resolves DW_AT_abstract_origin and
63 // DW_AT_specification references, fetching the attribute from the linked DIE.
64 //
65 // libdw has infinite loop protection, as it stops after 16 dereferences.
66 // TODO: don't use dwarf_attr_integrate by default
67 if (!dwarf_attr_integrate(die, attribute, &result.value())) {
68 result.reset();
69 }
70 return result;
71 }
72
73 // Get the attribute directly from DIE without following DW_AT_specification and
74 // DW_AT_abstract_origin references.
GetDirectAttribute(Dwarf_Die * die,uint32_t attribute)75 std::optional<Dwarf_Attribute> GetDirectAttribute(Dwarf_Die* die,
76 uint32_t attribute) {
77 // Create an optional with default-initialized value already inside
78 std::optional<Dwarf_Attribute> result(std::in_place);
79 if (!dwarf_attr(die, attribute, &result.value())) {
80 result.reset();
81 }
82 return result;
83 }
84
CheckOrDwflError(bool condition,const char * caller)85 void CheckOrDwflError(bool condition, const char* caller) {
86 if (!condition) {
87 int dwfl_error = dwfl_errno();
88 const char* errmsg = dwfl_errmsg(dwfl_error);
89 if (errmsg == nullptr) {
90 // There are some cases when DWFL fails to produce an error message.
91 Die() << caller << " returned error code " << Hex(dwfl_error);
92 }
93 Die() << caller << " returned error: " << errmsg;
94 }
95 }
96
MaybeGetUnsignedOperand(const Dwarf_Op & operand)97 std::optional<uint64_t> MaybeGetUnsignedOperand(const Dwarf_Op& operand) {
98 switch (operand.atom) {
99 case DW_OP_addr:
100 case DW_OP_const1u:
101 case DW_OP_const2u:
102 case DW_OP_const4u:
103 case DW_OP_const8u:
104 case DW_OP_constu:
105 return operand.number;
106 case DW_OP_const1s:
107 case DW_OP_const2s:
108 case DW_OP_const4s:
109 case DW_OP_const8s:
110 case DW_OP_consts:
111 if (static_cast<int64_t>(operand.number) < 0) {
112 // Atom is not an unsigned constant
113 return std::nullopt;
114 }
115 return operand.number;
116 case DW_OP_lit0...DW_OP_lit31:
117 return operand.atom - DW_OP_lit0;
118 default:
119 return std::nullopt;
120 }
121 }
122
123 struct Expression {
operator []stg::dwarf::__anon288118dd0111::Expression124 const Dwarf_Op& operator[](size_t i) const {
125 return atoms[i];
126 }
127
128 Dwarf_Op* atoms = nullptr;
129 size_t length = 0;
130 };
131
MaybeGetExpression(Dwarf_Attribute & attribute)132 std::optional<Expression> MaybeGetExpression(Dwarf_Attribute& attribute) {
133 Expression result;
134
135 Check(dwarf_getlocation(&attribute, &result.atoms, &result.length) ==
136 kReturnOk) << "dwarf_getlocation returned error";
137 // If no location attribute is present or has an empty location description,
138 // the variable is present in the source but not in the object code.
139 // So zero length expression is equivalent of no location attribute.
140 if (result.length == 0) {
141 return std::nullopt;
142 }
143 Check(result.atoms != nullptr)
144 << "dwarf_getlocation returned non-empty expression with NULL atoms";
145 return result;
146 }
147
148 } // namespace
149
Handler(const std::string & path)150 Handler::Handler(const std::string& path) : dwfl_(dwfl_begin(&kDwflCallbacks)) {
151 CheckOrDwflError(dwfl_.get(), "dwfl_begin");
152 // Add data to process to dwfl
153 dwfl_module_ =
154 dwfl_report_offline(dwfl_.get(), path.c_str(), path.c_str(), -1);
155 InitialiseDwarf();
156 }
157
Handler(char * data,size_t size)158 Handler::Handler(char* data, size_t size) : dwfl_(dwfl_begin(&kDwflCallbacks)) {
159 CheckOrDwflError(dwfl_.get(), "dwfl_begin");
160
161 // Check if ELF can be opened from input data, because DWFL couldn't handle
162 // memory, that is not ELF.
163 // TODO: remove this workaround
164 Elf* elf = elf_memory(data, size);
165 Check(elf != nullptr) << "Input data is not ELF";
166 elf_end(elf);
167
168 // Add data to process to dwfl
169 dwfl_module_ = dwfl_report_offline_memory(dwfl_.get(), "<memory>", "<memory>",
170 data, size);
171 InitialiseDwarf();
172 }
173
InitialiseDwarf()174 void Handler::InitialiseDwarf() {
175 CheckOrDwflError(dwfl_.get(), "dwfl_report_offline");
176 // Finish adding files to dwfl and process them
177 CheckOrDwflError(dwfl_report_end(dwfl_.get(), nullptr, nullptr) == kReturnOk,
178 "dwfl_report_end");
179 GElf_Addr loadbase = 0; // output argument for dwfl, unused by us
180 dwarf_ = dwfl_module_getdwarf(dwfl_module_, &loadbase);
181 CheckOrDwflError(dwarf_, "dwfl_module_getdwarf");
182 }
183
GetElf()184 Elf* Handler::GetElf() {
185 GElf_Addr loadbase = 0; // output argument for dwfl, unused by us
186 Elf* elf = dwfl_module_getelf(dwfl_module_, &loadbase);
187 CheckOrDwflError(elf, "dwfl_module_getelf");
188 return elf;
189 }
190
GetCompilationUnits()191 std::vector<CompilationUnit> Handler::GetCompilationUnits() {
192 std::vector<CompilationUnit> result;
193 Dwarf_Off offset = 0;
194 while (true) {
195 Dwarf_Off next_offset;
196 size_t header_size = 0;
197 Dwarf_Half version = 0;
198 int return_code =
199 dwarf_next_unit(dwarf_, offset, &next_offset, &header_size, &version,
200 nullptr, nullptr, nullptr, nullptr, nullptr);
201 Check(return_code == kReturnOk || return_code == kReturnNoEntry)
202 << "dwarf_next_unit returned error";
203 if (return_code == kReturnNoEntry) {
204 break;
205 }
206 result.push_back({version, {}});
207 Check(dwarf_offdie(dwarf_, offset + header_size, &result.back().entry.die))
208 << "dwarf_offdie returned error";
209
210 offset = next_offset;
211 }
212 return result;
213 }
214
GetChildren()215 std::vector<Entry> Entry::GetChildren() {
216 Entry child;
217 int return_code = dwarf_child(&die, &child.die);
218 Check(return_code == kReturnOk || return_code == kReturnNoEntry)
219 << "dwarf_child returned error";
220 std::vector<Entry> result;
221 while (return_code == kReturnOk) {
222 result.push_back(child);
223 return_code = dwarf_siblingof(&child.die, &child.die);
224 Check(return_code == kReturnOk || return_code == kReturnNoEntry)
225 << "dwarf_siblingof returned error";
226 }
227 return result;
228 }
229
GetTag()230 int Entry::GetTag() {
231 return dwarf_tag(&die);
232 }
233
GetOffset()234 Dwarf_Off Entry::GetOffset() {
235 return dwarf_dieoffset(&die);
236 }
237
MaybeGetString(uint32_t attribute)238 std::optional<std::string> Entry::MaybeGetString(uint32_t attribute) {
239 std::optional<std::string> result;
240 auto dwarf_attribute = GetAttribute(&die, attribute);
241 if (!dwarf_attribute) {
242 return result;
243 }
244
245 const char* value = dwarf_formstring(&dwarf_attribute.value());
246 Check(value != nullptr) << "dwarf_formstring returned error";
247 result.emplace(value);
248 return result;
249 }
250
MaybeGetDirectString(uint32_t attribute)251 std::optional<std::string> Entry::MaybeGetDirectString(uint32_t attribute) {
252 std::optional<std::string> result;
253 auto dwarf_attribute = GetDirectAttribute(&die, attribute);
254 if (!dwarf_attribute) {
255 return result;
256 }
257
258 const char* value = dwarf_formstring(&dwarf_attribute.value());
259 Check(value != nullptr) << "dwarf_formstring returned error";
260 result.emplace(value);
261 return result;
262 }
263
MaybeGetUnsignedConstant(uint32_t attribute)264 std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) {
265 auto dwarf_attribute = GetAttribute(&die, attribute);
266 if (!dwarf_attribute) {
267 return {};
268 }
269
270 uint64_t value;
271 if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) {
272 Die() << "dwarf_formudata returned error";
273 }
274 return value;
275 }
276
MustGetUnsignedConstant(uint32_t attribute)277 uint64_t Entry::MustGetUnsignedConstant(uint32_t attribute) {
278 auto maybe_constant = MaybeGetUnsignedConstant(attribute);
279 if (!maybe_constant) {
280 Die() << "DWARF entry <" << Hex(GetOffset()) << "> with tag " << GetTag()
281 << " is missing attribute " << Hex(attribute);
282 }
283 return maybe_constant.value();
284 }
285
GetFlag(uint32_t attribute)286 bool Entry::GetFlag(uint32_t attribute) {
287 bool result = false;
288 auto dwarf_attribute = (attribute == DW_AT_declaration)
289 ? GetDirectAttribute(&die, attribute)
290 : GetAttribute(&die, attribute);
291 if (!dwarf_attribute) {
292 return result;
293 }
294
295 Check(dwarf_formflag(&dwarf_attribute.value(), &result) == kReturnOk)
296 << "dwarf_formflag returned error";
297 return result;
298 }
299
MaybeGetReference(uint32_t attribute)300 std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) {
301 std::optional<Entry> result;
302 auto dwarf_attribute = GetAttribute(&die, attribute);
303 if (!dwarf_attribute) {
304 return result;
305 }
306
307 result.emplace();
308 Check(dwarf_formref_die(&dwarf_attribute.value(), &result->die))
309 << "dwarf_formref_die returned error";
310 return result;
311 }
312
313 namespace {
314
GetAddressFromLocation(Dwarf_Attribute & attribute)315 std::optional<Address> GetAddressFromLocation(Dwarf_Attribute& attribute) {
316 const auto expression_opt = MaybeGetExpression(attribute);
317 if (!expression_opt) {
318 return {};
319 }
320 const Expression& expression = *expression_opt;
321
322 Dwarf_Attribute result_attribute;
323 if (dwarf_getlocation_attr(&attribute, expression.atoms, &result_attribute) ==
324 kReturnOk) {
325 uint64_t address;
326 Check(dwarf_formaddr(&result_attribute, &address) == kReturnOk)
327 << "dwarf_formaddr returned error";
328 return Address{.value = address, .is_tls = false};
329 }
330 if (expression.length == 1 && expression[0].atom == DW_OP_addr) {
331 // DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to
332 // manually extract the address from expression.
333 return Address{.value = expression[0].number, .is_tls = false};
334 }
335 // TLS operation has different encodings in Clang and GCC:
336 // * Clang 14 uses DW_OP_GNU_push_tls_address
337 // * GCC 12 uses DW_OP_form_tls_address
338 if (expression.length == 2 &&
339 (expression[1].atom == DW_OP_GNU_push_tls_address ||
340 expression[1].atom == DW_OP_form_tls_address)) {
341 // TLS symbols address may be incorrect because of unsupported
342 // relocations. Resetting it to zero the same way as it is done in
343 // elf::Reader::MaybeAddTypeInfo.
344 // TODO: match TLS variables by address
345 return Address{.value = 0, .is_tls = true};
346 }
347
348 Die() << "Unsupported data location expression";
349 }
350
351 } // namespace
352
MaybeGetAddress(uint32_t attribute)353 std::optional<Address> Entry::MaybeGetAddress(uint32_t attribute) {
354 auto dwarf_attribute = GetAttribute(&die, attribute);
355 if (!dwarf_attribute) {
356 return {};
357 }
358 if (attribute == DW_AT_location) {
359 return GetAddressFromLocation(*dwarf_attribute);
360 }
361
362 Address address;
363 Check(dwarf_formaddr(&dwarf_attribute.value(), &address.value) == kReturnOk)
364 << "dwarf_formaddr returned error";
365 address.is_tls = false;
366 return address;
367 }
368
MaybeGetMemberByteOffset()369 std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() {
370 auto attribute = GetAttribute(&die, DW_AT_data_member_location);
371 if (!attribute) {
372 return {};
373 }
374
375 uint64_t offset;
376 // Try to interpret attribute as an unsigned integer constant
377 if (dwarf_formudata(&attribute.value(), &offset) == kReturnOk) {
378 return offset;
379 }
380
381 // Parse location expression
382 const auto expression_opt = MaybeGetExpression(attribute.value());
383 if (!expression_opt) {
384 return {};
385 }
386 const Expression& expression = *expression_opt;
387
388 // Parse virtual base classes offset, which looks like this:
389 // [0] = DW_OP_dup
390 // [1] = DW_OP_deref
391 // [2] = constant operand
392 // [3] = DW_OP_minus
393 // [4] = DW_OP_deref
394 // [5] = DW_OP_plus
395 // This form is not in the standard, but hardcoded in compilers:
396 // * https://github.com/llvm/llvm-project/blob/release/17.x/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp#L1611
397 // * https://github.com/gcc-mirror/gcc/blob/releases/gcc-13/gcc/dwarf2out.cc#L20029
398 if (expression.length == 6 &&
399 expression[0].atom == DW_OP_dup &&
400 expression[1].atom == DW_OP_deref &&
401 expression[3].atom == DW_OP_minus &&
402 expression[4].atom == DW_OP_deref &&
403 expression[5].atom == DW_OP_plus) {
404 const auto byte_offset = MaybeGetUnsignedOperand(expression[2]);
405 if (byte_offset) {
406 return byte_offset;
407 }
408 }
409
410 Die() << "Unsupported member offset expression, " << Hex(GetOffset());
411 }
412
MaybeGetVtableOffset()413 std::optional<uint64_t> Entry::MaybeGetVtableOffset() {
414 auto attribute = GetAttribute(&die, DW_AT_vtable_elem_location);
415 if (!attribute) {
416 return {};
417 }
418
419 // Parse location expression
420 const auto expression_opt = MaybeGetExpression(attribute.value());
421 if (!expression_opt) {
422 return {};
423 }
424 const Expression& expression = *expression_opt;
425
426 // We expect compilers to produce expression with one constant operand
427 if (expression.length == 1) {
428 const auto offset = MaybeGetUnsignedOperand(expression[0]);
429 if (offset) {
430 return offset;
431 }
432 }
433
434 Die() << "Unsupported vtable offset expression, " << Hex(GetOffset());
435 }
436
MaybeGetCount()437 std::optional<uint64_t> Entry::MaybeGetCount() {
438 auto lower_bound_attribute = MaybeGetUnsignedConstant(DW_AT_lower_bound);
439 if (lower_bound_attribute && *lower_bound_attribute != 0) {
440 Die() << "Non-zero DW_AT_lower_bound is not supported";
441 }
442 auto upper_bound_attribute = GetAttribute(&die, DW_AT_upper_bound);
443 auto count_attribute = GetAttribute(&die, DW_AT_count);
444 if (!upper_bound_attribute && !count_attribute) {
445 return {};
446 }
447 if (upper_bound_attribute && count_attribute) {
448 Die() << "Both DW_AT_upper_bound and DW_AT_count given";
449 }
450 Dwarf_Attribute dwarf_attribute;
451 uint64_t addend;
452 if (upper_bound_attribute) {
453 dwarf_attribute = *upper_bound_attribute;
454 addend = 1;
455 } else {
456 dwarf_attribute = *count_attribute;
457 addend = 0;
458 }
459
460 uint64_t value;
461 if (dwarf_formudata(&dwarf_attribute, &value) == kReturnOk) {
462 return value + addend;
463 }
464
465 // Don't fail if attribute is not a constant and treat this as no count
466 // provided. This can happen if array has variable length.
467 // TODO: implement clean solution for separating "not a
468 // constant" errors from other errors.
469 return {};
470 }
471
Files(Entry & compilation_unit)472 Files::Files(Entry& compilation_unit) {
473 if (dwarf_getsrcfiles(&compilation_unit.die, &files_, &files_count_) !=
474 kReturnOk) {
475 Die() << "No source file information in DWARF";
476 }
477 }
478
MaybeGetFile(Entry & entry,uint32_t attribute) const479 std::optional<std::string> Files::MaybeGetFile(Entry& entry,
480 uint32_t attribute) const {
481 auto file_index = entry.MaybeGetUnsignedConstant(attribute);
482 if (!file_index) {
483 return std::nullopt;
484 }
485 Check(files_ != nullptr) << "dwarf::Files was not initialised";
486 if (*file_index >= files_count_) {
487 Die() << "File index is greater than or equal files count (" << *file_index
488 << " >= " << files_count_ << ")";
489 }
490 const char* result = dwarf_filesrc(files_, *file_index, nullptr, nullptr);
491 Check(result != nullptr) << "dwarf_filesrc returned error";
492 return result;
493 }
494
495 } // namespace dwarf
496 } // namespace stg
497