• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19 
20 #include "dwarf_wrappers.h"
21 
22 #include <dwarf.h>
23 #include <elf.h>
24 #include <elfutils/libdw.h>
25 #include <elfutils/libdwfl.h>
26 #include <fcntl.h>
27 
28 #include <cstddef>
29 #include <cstdint>
30 #include <ios>
31 #include <memory>
32 #include <optional>
33 #include <ostream>
34 #include <string>
35 #include <utility>
36 #include <vector>
37 
38 #include "error.h"
39 
40 namespace stg {
41 namespace dwarf {
42 
operator <<(std::ostream & os,const Address & address)43 std::ostream& operator<<(std::ostream& os, const Address& address) {
44   return os << Hex(address.value) << (address.is_tls ? " (TLS)" : "");
45 }
46 
47 namespace {
48 
49 static const Dwfl_Callbacks kDwflCallbacks = {
50     .find_elf = nullptr,
51     .find_debuginfo = dwfl_standard_find_debuginfo,
52     .section_address = dwfl_offline_section_address,
53     .debuginfo_path = nullptr};
54 
55 constexpr int kReturnOk = 0;
56 constexpr int kReturnNoEntry = 1;
57 
GetAttribute(Dwarf_Die * die,uint32_t attribute)58 std::optional<Dwarf_Attribute> GetAttribute(Dwarf_Die* die,
59                                             uint32_t attribute) {
60   // Create an optional with default-initialized value already inside
61   std::optional<Dwarf_Attribute> result(std::in_place);
62   // "integrate" automatically resolves DW_AT_abstract_origin and
63   // DW_AT_specification references, fetching the attribute from the linked DIE.
64   //
65   // libdw has infinite loop protection, as it stops after 16 dereferences.
66   // TODO: don't use dwarf_attr_integrate by default
67   if (!dwarf_attr_integrate(die, attribute, &result.value())) {
68     result.reset();
69   }
70   return result;
71 }
72 
73 // Get the attribute directly from DIE without following DW_AT_specification and
74 // DW_AT_abstract_origin references.
GetDirectAttribute(Dwarf_Die * die,uint32_t attribute)75 std::optional<Dwarf_Attribute> GetDirectAttribute(Dwarf_Die* die,
76                                                   uint32_t attribute) {
77   // Create an optional with default-initialized value already inside
78   std::optional<Dwarf_Attribute> result(std::in_place);
79   if (!dwarf_attr(die, attribute, &result.value())) {
80     result.reset();
81   }
82   return result;
83 }
84 
CheckOrDwflError(bool condition,const char * caller)85 void CheckOrDwflError(bool condition, const char* caller) {
86   if (!condition) {
87     int dwfl_error = dwfl_errno();
88     const char* errmsg = dwfl_errmsg(dwfl_error);
89     if (errmsg == nullptr) {
90       // There are some cases when DWFL fails to produce an error message.
91       Die() << caller << " returned error code " << Hex(dwfl_error);
92     }
93     Die() << caller << " returned error: " << errmsg;
94   }
95 }
96 
MaybeGetUnsignedOperand(const Dwarf_Op & operand)97 std::optional<uint64_t> MaybeGetUnsignedOperand(const Dwarf_Op& operand) {
98   switch (operand.atom) {
99     case DW_OP_addr:
100     case DW_OP_const1u:
101     case DW_OP_const2u:
102     case DW_OP_const4u:
103     case DW_OP_const8u:
104     case DW_OP_constu:
105       return operand.number;
106     case DW_OP_const1s:
107     case DW_OP_const2s:
108     case DW_OP_const4s:
109     case DW_OP_const8s:
110     case DW_OP_consts:
111       if (static_cast<int64_t>(operand.number) < 0) {
112         // Atom is not an unsigned constant
113         return std::nullopt;
114       }
115       return operand.number;
116     case DW_OP_lit0...DW_OP_lit31:
117       return operand.atom - DW_OP_lit0;
118     default:
119       return std::nullopt;
120   }
121 }
122 
123 struct Expression {
operator []stg::dwarf::__anon288118dd0111::Expression124   const Dwarf_Op& operator[](size_t i) const {
125     return atoms[i];
126   }
127 
128   Dwarf_Op* atoms = nullptr;
129   size_t length = 0;
130 };
131 
MaybeGetExpression(Dwarf_Attribute & attribute)132 std::optional<Expression> MaybeGetExpression(Dwarf_Attribute& attribute) {
133   Expression result;
134 
135   Check(dwarf_getlocation(&attribute, &result.atoms, &result.length) ==
136         kReturnOk) << "dwarf_getlocation returned error";
137   // If no location attribute is present or has an empty location description,
138   // the variable is present in the source but not in the object code.
139   // So zero length expression is equivalent of no location attribute.
140   if (result.length == 0) {
141     return std::nullopt;
142   }
143   Check(result.atoms != nullptr)
144       << "dwarf_getlocation returned non-empty expression with NULL atoms";
145   return result;
146 }
147 
148 }  // namespace
149 
Handler(const std::string & path)150 Handler::Handler(const std::string& path) : dwfl_(dwfl_begin(&kDwflCallbacks)) {
151   CheckOrDwflError(dwfl_.get(), "dwfl_begin");
152   // Add data to process to dwfl
153   dwfl_module_ =
154       dwfl_report_offline(dwfl_.get(), path.c_str(), path.c_str(), -1);
155   InitialiseDwarf();
156 }
157 
Handler(char * data,size_t size)158 Handler::Handler(char* data, size_t size) : dwfl_(dwfl_begin(&kDwflCallbacks)) {
159   CheckOrDwflError(dwfl_.get(), "dwfl_begin");
160 
161   // Check if ELF can be opened from input data, because DWFL couldn't handle
162   // memory, that is not ELF.
163   // TODO: remove this workaround
164   Elf* elf = elf_memory(data, size);
165   Check(elf != nullptr) << "Input data is not ELF";
166   elf_end(elf);
167 
168   // Add data to process to dwfl
169   dwfl_module_ = dwfl_report_offline_memory(dwfl_.get(), "<memory>", "<memory>",
170                                             data, size);
171   InitialiseDwarf();
172 }
173 
InitialiseDwarf()174 void Handler::InitialiseDwarf() {
175   CheckOrDwflError(dwfl_.get(), "dwfl_report_offline");
176   // Finish adding files to dwfl and process them
177   CheckOrDwflError(dwfl_report_end(dwfl_.get(), nullptr, nullptr) == kReturnOk,
178                    "dwfl_report_end");
179   GElf_Addr loadbase = 0;  // output argument for dwfl, unused by us
180   dwarf_ = dwfl_module_getdwarf(dwfl_module_, &loadbase);
181   CheckOrDwflError(dwarf_, "dwfl_module_getdwarf");
182 }
183 
GetElf()184 Elf* Handler::GetElf() {
185   GElf_Addr loadbase = 0;  // output argument for dwfl, unused by us
186   Elf* elf = dwfl_module_getelf(dwfl_module_, &loadbase);
187   CheckOrDwflError(elf, "dwfl_module_getelf");
188   return elf;
189 }
190 
GetCompilationUnits()191 std::vector<CompilationUnit> Handler::GetCompilationUnits() {
192   std::vector<CompilationUnit> result;
193   Dwarf_Off offset = 0;
194   while (true) {
195     Dwarf_Off next_offset;
196     size_t header_size = 0;
197     Dwarf_Half version = 0;
198     int return_code =
199         dwarf_next_unit(dwarf_, offset, &next_offset, &header_size, &version,
200                         nullptr, nullptr, nullptr, nullptr, nullptr);
201     Check(return_code == kReturnOk || return_code == kReturnNoEntry)
202         << "dwarf_next_unit returned error";
203     if (return_code == kReturnNoEntry) {
204       break;
205     }
206     result.push_back({version, {}});
207     Check(dwarf_offdie(dwarf_, offset + header_size, &result.back().entry.die))
208         << "dwarf_offdie returned error";
209 
210     offset = next_offset;
211   }
212   return result;
213 }
214 
GetChildren()215 std::vector<Entry> Entry::GetChildren() {
216   Entry child;
217   int return_code = dwarf_child(&die, &child.die);
218   Check(return_code == kReturnOk || return_code == kReturnNoEntry)
219       << "dwarf_child returned error";
220   std::vector<Entry> result;
221   while (return_code == kReturnOk) {
222     result.push_back(child);
223     return_code = dwarf_siblingof(&child.die, &child.die);
224     Check(return_code == kReturnOk || return_code == kReturnNoEntry)
225         << "dwarf_siblingof returned error";
226   }
227   return result;
228 }
229 
GetTag()230 int Entry::GetTag() {
231   return dwarf_tag(&die);
232 }
233 
GetOffset()234 Dwarf_Off Entry::GetOffset() {
235   return dwarf_dieoffset(&die);
236 }
237 
MaybeGetString(uint32_t attribute)238 std::optional<std::string> Entry::MaybeGetString(uint32_t attribute) {
239   std::optional<std::string> result;
240   auto dwarf_attribute = GetAttribute(&die, attribute);
241   if (!dwarf_attribute) {
242     return result;
243   }
244 
245   const char* value = dwarf_formstring(&dwarf_attribute.value());
246   Check(value != nullptr) << "dwarf_formstring returned error";
247   result.emplace(value);
248   return result;
249 }
250 
MaybeGetDirectString(uint32_t attribute)251 std::optional<std::string> Entry::MaybeGetDirectString(uint32_t attribute) {
252   std::optional<std::string> result;
253   auto dwarf_attribute = GetDirectAttribute(&die, attribute);
254   if (!dwarf_attribute) {
255     return result;
256   }
257 
258   const char* value = dwarf_formstring(&dwarf_attribute.value());
259   Check(value != nullptr) << "dwarf_formstring returned error";
260   result.emplace(value);
261   return result;
262 }
263 
MaybeGetUnsignedConstant(uint32_t attribute)264 std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) {
265   auto dwarf_attribute = GetAttribute(&die, attribute);
266   if (!dwarf_attribute) {
267     return {};
268   }
269 
270   uint64_t value;
271   if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) {
272     Die() << "dwarf_formudata returned error";
273   }
274   return value;
275 }
276 
MustGetUnsignedConstant(uint32_t attribute)277 uint64_t Entry::MustGetUnsignedConstant(uint32_t attribute) {
278   auto maybe_constant = MaybeGetUnsignedConstant(attribute);
279   if (!maybe_constant) {
280     Die() << "DWARF entry <" << Hex(GetOffset()) << "> with tag " << GetTag()
281           << " is missing attribute " << Hex(attribute);
282   }
283   return maybe_constant.value();
284 }
285 
GetFlag(uint32_t attribute)286 bool Entry::GetFlag(uint32_t attribute) {
287   bool result = false;
288   auto dwarf_attribute = (attribute == DW_AT_declaration)
289                              ? GetDirectAttribute(&die, attribute)
290                              : GetAttribute(&die, attribute);
291   if (!dwarf_attribute) {
292     return result;
293   }
294 
295   Check(dwarf_formflag(&dwarf_attribute.value(), &result) == kReturnOk)
296       << "dwarf_formflag returned error";
297   return result;
298 }
299 
MaybeGetReference(uint32_t attribute)300 std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) {
301   std::optional<Entry> result;
302   auto dwarf_attribute = GetAttribute(&die, attribute);
303   if (!dwarf_attribute) {
304     return result;
305   }
306 
307   result.emplace();
308   Check(dwarf_formref_die(&dwarf_attribute.value(), &result->die))
309       << "dwarf_formref_die returned error";
310   return result;
311 }
312 
313 namespace {
314 
GetAddressFromLocation(Dwarf_Attribute & attribute)315 std::optional<Address> GetAddressFromLocation(Dwarf_Attribute& attribute) {
316   const auto expression_opt = MaybeGetExpression(attribute);
317   if (!expression_opt) {
318     return {};
319   }
320   const Expression& expression = *expression_opt;
321 
322   Dwarf_Attribute result_attribute;
323   if (dwarf_getlocation_attr(&attribute, expression.atoms, &result_attribute) ==
324       kReturnOk) {
325     uint64_t address;
326     Check(dwarf_formaddr(&result_attribute, &address) == kReturnOk)
327         << "dwarf_formaddr returned error";
328     return Address{.value = address, .is_tls = false};
329   }
330   if (expression.length == 1 && expression[0].atom == DW_OP_addr) {
331     // DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to
332     // manually extract the address from expression.
333     return Address{.value = expression[0].number, .is_tls = false};
334   }
335   // TLS operation has different encodings in Clang and GCC:
336   // * Clang 14 uses DW_OP_GNU_push_tls_address
337   // * GCC 12 uses DW_OP_form_tls_address
338   if (expression.length == 2 &&
339       (expression[1].atom == DW_OP_GNU_push_tls_address ||
340        expression[1].atom == DW_OP_form_tls_address)) {
341     // TLS symbols address may be incorrect because of unsupported
342     // relocations. Resetting it to zero the same way as it is done in
343     // elf::Reader::MaybeAddTypeInfo.
344     // TODO: match TLS variables by address
345     return Address{.value = 0, .is_tls = true};
346   }
347 
348   Die() << "Unsupported data location expression";
349 }
350 
351 }  // namespace
352 
MaybeGetAddress(uint32_t attribute)353 std::optional<Address> Entry::MaybeGetAddress(uint32_t attribute) {
354   auto dwarf_attribute = GetAttribute(&die, attribute);
355   if (!dwarf_attribute) {
356     return {};
357   }
358   if (attribute == DW_AT_location) {
359     return GetAddressFromLocation(*dwarf_attribute);
360   }
361 
362   Address address;
363   Check(dwarf_formaddr(&dwarf_attribute.value(), &address.value) == kReturnOk)
364       << "dwarf_formaddr returned error";
365   address.is_tls = false;
366   return address;
367 }
368 
MaybeGetMemberByteOffset()369 std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() {
370   auto attribute = GetAttribute(&die, DW_AT_data_member_location);
371   if (!attribute) {
372     return {};
373   }
374 
375   uint64_t offset;
376   // Try to interpret attribute as an unsigned integer constant
377   if (dwarf_formudata(&attribute.value(), &offset) == kReturnOk) {
378     return offset;
379   }
380 
381   // Parse location expression
382   const auto expression_opt = MaybeGetExpression(attribute.value());
383   if (!expression_opt) {
384     return {};
385   }
386   const Expression& expression = *expression_opt;
387 
388   // Parse virtual base classes offset, which looks like this:
389   //   [0] = DW_OP_dup
390   //   [1] = DW_OP_deref
391   //   [2] = constant operand
392   //   [3] = DW_OP_minus
393   //   [4] = DW_OP_deref
394   //   [5] = DW_OP_plus
395   // This form is not in the standard, but hardcoded in compilers:
396   //   * https://github.com/llvm/llvm-project/blob/release/17.x/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp#L1611
397   //   * https://github.com/gcc-mirror/gcc/blob/releases/gcc-13/gcc/dwarf2out.cc#L20029
398   if (expression.length == 6 &&
399       expression[0].atom == DW_OP_dup &&
400       expression[1].atom == DW_OP_deref &&
401       expression[3].atom == DW_OP_minus &&
402       expression[4].atom == DW_OP_deref &&
403       expression[5].atom == DW_OP_plus) {
404     const auto byte_offset = MaybeGetUnsignedOperand(expression[2]);
405     if (byte_offset) {
406       return byte_offset;
407     }
408   }
409 
410   Die() << "Unsupported member offset expression, " << Hex(GetOffset());
411 }
412 
MaybeGetVtableOffset()413 std::optional<uint64_t> Entry::MaybeGetVtableOffset() {
414   auto attribute = GetAttribute(&die, DW_AT_vtable_elem_location);
415   if (!attribute) {
416     return {};
417   }
418 
419   // Parse location expression
420   const auto expression_opt = MaybeGetExpression(attribute.value());
421   if (!expression_opt) {
422     return {};
423   }
424   const Expression& expression = *expression_opt;
425 
426   // We expect compilers to produce expression with one constant operand
427   if (expression.length == 1) {
428     const auto offset = MaybeGetUnsignedOperand(expression[0]);
429     if (offset) {
430       return offset;
431     }
432   }
433 
434   Die() << "Unsupported vtable offset expression, " << Hex(GetOffset());
435 }
436 
MaybeGetCount()437 std::optional<uint64_t> Entry::MaybeGetCount() {
438   auto lower_bound_attribute = MaybeGetUnsignedConstant(DW_AT_lower_bound);
439   if (lower_bound_attribute && *lower_bound_attribute != 0) {
440     Die() << "Non-zero DW_AT_lower_bound is not supported";
441   }
442   auto upper_bound_attribute = GetAttribute(&die, DW_AT_upper_bound);
443   auto count_attribute = GetAttribute(&die, DW_AT_count);
444   if (!upper_bound_attribute && !count_attribute) {
445     return {};
446   }
447   if (upper_bound_attribute && count_attribute) {
448     Die() << "Both DW_AT_upper_bound and DW_AT_count given";
449   }
450   Dwarf_Attribute dwarf_attribute;
451   uint64_t addend;
452   if (upper_bound_attribute) {
453     dwarf_attribute = *upper_bound_attribute;
454     addend = 1;
455   } else {
456     dwarf_attribute = *count_attribute;
457     addend = 0;
458   }
459 
460   uint64_t value;
461   if (dwarf_formudata(&dwarf_attribute, &value) == kReturnOk) {
462     return value + addend;
463   }
464 
465   // Don't fail if attribute is not a constant and treat this as no count
466   // provided. This can happen if array has variable length.
467   // TODO: implement clean solution for separating "not a
468   // constant" errors from other errors.
469   return {};
470 }
471 
Files(Entry & compilation_unit)472 Files::Files(Entry& compilation_unit) {
473   if (dwarf_getsrcfiles(&compilation_unit.die, &files_, &files_count_) !=
474       kReturnOk) {
475     Die() << "No source file information in DWARF";
476   }
477 }
478 
MaybeGetFile(Entry & entry,uint32_t attribute) const479 std::optional<std::string> Files::MaybeGetFile(Entry& entry,
480                                                uint32_t attribute) const {
481   auto file_index = entry.MaybeGetUnsignedConstant(attribute);
482   if (!file_index) {
483     return std::nullopt;
484   }
485   Check(files_ != nullptr) << "dwarf::Files was not initialised";
486   if (*file_index >= files_count_) {
487     Die() << "File index is greater than or equal files count (" << *file_index
488           << " >= " << files_count_ << ")";
489   }
490   const char* result = dwarf_filesrc(files_, *file_index, nullptr, nullptr);
491   Check(result != nullptr) << "dwarf_filesrc returned error";
492   return result;
493 }
494 
495 }  // namespace dwarf
496 }  // namespace stg
497