• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19 
20 #include "dwarf_processor.h"
21 
22 #include <dwarf.h>
23 #include <elfutils/libdw.h>
24 
25 #include <algorithm>
26 #include <cstddef>
27 #include <cstdint>
28 #include <memory>
29 #include <optional>
30 #include <sstream>
31 #include <string>
32 #include <string_view>
33 #include <unordered_map>
34 #include <utility>
35 #include <vector>
36 
37 #include "dwarf_wrappers.h"
38 #include "error.h"
39 #include "filter.h"
40 #include "graph.h"
41 #include "scope.h"
42 
43 namespace stg {
44 namespace dwarf {
45 
46 namespace {
47 
HasIncompleteTypes(uint64_t language)48 bool HasIncompleteTypes(uint64_t language) {
49   return language != DW_LANG_Rust;
50 }
51 
EntryToString(Entry & entry)52 std::string EntryToString(Entry& entry) {
53   std::ostringstream os;
54   os << "DWARF entry <" << Hex(entry.GetOffset()) << ">";
55   return os.str();
56 }
57 
MaybeGetName(Entry & entry)58 std::optional<std::string> MaybeGetName(Entry& entry) {
59   return entry.MaybeGetString(DW_AT_name);
60 }
61 
GetName(Entry & entry)62 std::string GetName(Entry& entry) {
63   auto result = MaybeGetName(entry);
64   if (!result.has_value()) {
65     Die() << "Name was not found for " << EntryToString(entry);
66   }
67   return std::move(*result);
68 }
69 
GetNameOrEmpty(Entry & entry)70 std::string GetNameOrEmpty(Entry& entry) {
71   auto result = MaybeGetName(entry);
72   if (!result.has_value()) {
73     return std::string();
74   }
75   return std::move(*result);
76 }
77 
MaybeGetLinkageName(int version,Entry & entry)78 std::optional<std::string> MaybeGetLinkageName(int version, Entry& entry) {
79   return entry.MaybeGetString(
80       version < 4 ? DW_AT_MIPS_linkage_name : DW_AT_linkage_name);
81 }
82 
GetBitSize(Entry & entry)83 size_t GetBitSize(Entry& entry) {
84   if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
85     return *byte_size * 8;
86   } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
87     return *bit_size;
88   }
89   Die() << "Bit size was not found for " << EntryToString(entry);
90 }
91 
GetByteSize(Entry & entry)92 size_t GetByteSize(Entry& entry) {
93   if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
94     return *byte_size;
95   } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
96     // Round up bit_size / 8 to get minimal needed storage size in bytes.
97     return (*bit_size + 7) / 8;
98   }
99   Die() << "Byte size was not found for " << EntryToString(entry);
100 }
101 
GetEncoding(Entry & entry)102 Primitive::Encoding GetEncoding(Entry& entry) {
103   const auto dwarf_encoding = entry.MaybeGetUnsignedConstant(DW_AT_encoding);
104   if (!dwarf_encoding) {
105     Die() << "Encoding was not found for " << EntryToString(entry);
106   }
107   switch (*dwarf_encoding) {
108     case DW_ATE_boolean:
109       return Primitive::Encoding::BOOLEAN;
110     case DW_ATE_complex_float:
111       return Primitive::Encoding::COMPLEX_NUMBER;
112     case DW_ATE_float:
113       return Primitive::Encoding::REAL_NUMBER;
114     case DW_ATE_signed:
115       return Primitive::Encoding::SIGNED_INTEGER;
116     case DW_ATE_signed_char:
117       return Primitive::Encoding::SIGNED_CHARACTER;
118     case DW_ATE_unsigned:
119       return Primitive::Encoding::UNSIGNED_INTEGER;
120     case DW_ATE_unsigned_char:
121       return Primitive::Encoding::UNSIGNED_CHARACTER;
122     case DW_ATE_UTF:
123       return Primitive::Encoding::UTF;
124     default:
125       Die() << "Unknown encoding " << Hex(*dwarf_encoding) << " for "
126             << EntryToString(entry);
127   }
128 }
129 
MaybeGetReferredType(Entry & entry)130 std::optional<Entry> MaybeGetReferredType(Entry& entry) {
131   return entry.MaybeGetReference(DW_AT_type);
132 }
133 
GetReferredType(Entry & entry)134 Entry GetReferredType(Entry& entry) {
135   auto result = MaybeGetReferredType(entry);
136   if (!result.has_value()) {
137     Die() << "Type reference was not found in " << EntryToString(entry);
138   }
139   return *result;
140 }
141 
GetNumberOfElements(Entry & entry)142 size_t GetNumberOfElements(Entry& entry) {
143   // DWARF standard says, that array dimensions could be an entry with
144   // either DW_TAG_subrange_type or DW_TAG_enumeration_type. However, this
145   // code supports only the DW_TAG_subrange_type.
146   Check(entry.GetTag() == DW_TAG_subrange_type)
147       << "Array's dimensions should be an entry of DW_TAG_subrange_type";
148   std::optional<size_t> number_of_elements = entry.MaybeGetCount();
149   if (number_of_elements) {
150     return *number_of_elements;
151   }
152   // If a subrange has no DW_AT_count and no DW_AT_upper_bound attribute, its
153   // size is unknown.
154   return 0;
155 }
156 
157 // Calculate number of bits from the "beginning" of the containing entity to
158 // the "beginning" of the data member using DW_AT_bit_offset.
159 //
160 // "Number of bits from the beginning", depends on the definition of the
161 // "beginning", which is different for big- and little-endian architectures.
162 // However, DW_AT_bit_offset is defined from the high order bit of the storage
163 // unit to the high order bit of a field and is the same for both architectures.
164 
165 // So this function converts DW_AT_bit_offset to the "number of bits from the
166 // beginning".
CalculateBitfieldAdjustment(Entry & entry,size_t bit_size,bool is_little_endian_binary)167 size_t CalculateBitfieldAdjustment(Entry& entry, size_t bit_size,
168                              bool is_little_endian_binary) {
169   if (bit_size == 0) {
170     // bit_size == 0 marks that it is not a bit field. No adjustment needed.
171     return 0;
172   }
173   auto container_byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size);
174   auto bit_offset = entry.MaybeGetUnsignedConstant(DW_AT_bit_offset);
175   Check(container_byte_size.has_value() && bit_offset.has_value())
176       << "If member offset is defined as DW_AT_data_member_location, bit field "
177          "should have DW_AT_byte_size and DW_AT_bit_offset";
178   // The following structure will be used as an example in the explanations:
179   // struct foo {
180   //   uint16_t rest_of_the_struct;
181   //   uint16_t x : 5;
182   //   uint16_t y : 6;
183   //   uint16_t z : 5;
184   // };
185   if (is_little_endian_binary) {
186     // Compiler usualy packs bit fields starting with the least significant
187     // bits, but DW_AT_bit_offset is counted from high to low bits:
188     //
189     // rest of the struct|<    container   >
190     //    Container bits: 01234|56789A|BCDEF
191     //  Bit-fields' bits: 01234|012345|01234
192     //        bit_offset: <<<<B<<<<<<5<<<<<0
193     //   bits from start: 0>>>>>5>>>>>>B>>>>
194     //                    <x:5>|< y:6>|<z:5>
195     //
196     //   x.bit_offset: 11 (0xB) bits
197     //   y.bit_offset: 5 bits
198     //   z.bit_offset: 0 bits
199     //
200     // So we need to subtract bit_offset from the container bit size
201     // (container_byte_size * 8) to inverse direction. Also we need to convert
202     // from high- to low-order bit, because the field "begins" with low-order
203     // bit. To do so we need to subtract field's bit size. Resulting formula is:
204     //
205     //   container_byte_size * 8 - bit_offset - bit_size
206     //
207     // If we try it on example, we get correct values:
208     //   x: 2 * 8 - 11 - 5 = 0
209     //   y: 2 * 8 - 5 - 6 = 5
210     //   z: 2 * 8 - 0 - 5 = 11 (0xB)
211     return *container_byte_size * 8 - *bit_offset - bit_size;
212   }
213   // Big-endian orders begins with high-order bit and the bit_offset is from the
214   // high order bit:
215   //
216   // rest of the struct|<    container   >
217   //    Container bits: FEDCB|A98765|43210
218   //  Bit-fields' bits: 43210|543210|43210
219   //        bit_offset: 0>>>>>5>>>>>>B>>>>
220   //   bits from start: 0>>>>>5>>>>>>B>>>>
221   //                    <x:5>|< y:6>|<z:5>
222   //
223   // So we just return bit_offset.
224   return *bit_offset;
225 }
226 
227 // Calculate the number of bits from the beginning of the structure to the
228 // beginning of the data member.
GetDataBitOffset(Entry & entry,size_t bit_size,bool is_little_endian_binary)229 size_t GetDataBitOffset(Entry& entry, size_t bit_size,
230                         bool is_little_endian_binary) {
231   // Offset may be represented either by DW_AT_data_bit_offset (in bits) or by
232   // DW_AT_data_member_location (in bytes).
233   if (auto data_bit_offset =
234           entry.MaybeGetUnsignedConstant(DW_AT_data_bit_offset)) {
235     // DW_AT_data_bit_offset contains what this function needs for any type
236     // of member (bitfield or not) on architecture of any endianness.
237     return *data_bit_offset;
238   } else if (auto byte_offset = entry.MaybeGetMemberByteOffset()) {
239     // DW_AT_data_member_location contains offset in bytes.
240     const size_t bit_offset = *byte_offset * 8;
241     // But there can be offset part, coming from DW_AT_bit_offset. DWARF 5
242     // standard requires to use DW_AT_data_bit_offset in this case, but a lot
243     // of binaries still use combination of DW_AT_data_member_location and
244     // DW_AT_bit_offset.
245     const size_t bitfield_adjusment =
246         CalculateBitfieldAdjustment(entry, bit_size, is_little_endian_binary);
247     return bit_offset + bitfield_adjusment;
248   } else {
249     // If the beginning of the data member is the same as the beginning of the
250     // containing entity then neither attribute is required.
251     return 0;
252   }
253 }
254 
255 }  // namespace
256 
257 // Transforms DWARF entries to STG.
258 class Processor {
259  public:
Processor(Graph & graph,Id void_id,Id variadic_id,bool is_little_endian_binary,const std::unique_ptr<Filter> & file_filter,Types & result)260   Processor(Graph& graph, Id void_id, Id variadic_id,
261             bool is_little_endian_binary,
262             const std::unique_ptr<Filter>& file_filter, Types& result)
263       : graph_(graph),
264         void_id_(void_id),
265         variadic_id_(variadic_id),
266         is_little_endian_binary_(is_little_endian_binary),
267         file_filter_(file_filter),
268         result_(result) {}
269 
ProcessCompilationUnit(CompilationUnit & compilation_unit)270   void ProcessCompilationUnit(CompilationUnit& compilation_unit) {
271     version_ = compilation_unit.version;
272     if (file_filter_ != nullptr) {
273       files_ = dwarf::Files(compilation_unit.entry);
274     }
275     Process(compilation_unit.entry);
276   }
277 
CheckUnresolvedIds() const278   void CheckUnresolvedIds() const {
279     for (const auto& [offset, id] : id_map_) {
280       if (!graph_.Is(id)) {
281         Die() << "unresolved id " << id << ", DWARF offset " << Hex(offset);
282       }
283     }
284   }
285 
ResolveSymbolSpecifications()286   void ResolveSymbolSpecifications() {
287     std::sort(unresolved_symbol_specifications_.begin(),
288               unresolved_symbol_specifications_.end());
289     std::sort(scoped_names_.begin(), scoped_names_.end());
290     auto symbols_it = unresolved_symbol_specifications_.begin();
291     auto names_it = scoped_names_.begin();
292     while (symbols_it != unresolved_symbol_specifications_.end()) {
293       while (names_it != scoped_names_.end() &&
294              names_it->first < symbols_it->first) {
295         ++names_it;
296       }
297       if (names_it == scoped_names_.end() ||
298           names_it->first != symbols_it->first) {
299         Die() << "Scoped name not found for entry " << Hex(symbols_it->first);
300       }
301       result_.symbols[symbols_it->second].name = names_it->second;
302       ++symbols_it;
303     }
304   }
305 
306  private:
Process(Entry & entry)307   void Process(Entry& entry) {
308     try {
309       return ProcessInternal(entry);
310     } catch (Exception& e) {
311       std::ostringstream os;
312       os << "processing DIE " << Hex(entry.GetOffset());
313       e.Add(os.str());
314       throw;
315     }
316   }
317 
ProcessInternal(Entry & entry)318   void ProcessInternal(Entry& entry) {
319     ++result_.processed_entries;
320     const auto tag = entry.GetTag();
321     switch (tag) {
322       case DW_TAG_array_type:
323         ProcessArray(entry);
324         break;
325       case DW_TAG_enumeration_type:
326         ProcessEnum(entry);
327         break;
328       case DW_TAG_class_type:
329         ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
330         break;
331       case DW_TAG_structure_type:
332         ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
333         break;
334       case DW_TAG_union_type:
335         ProcessStructUnion(entry, StructUnion::Kind::UNION);
336         break;
337       case DW_TAG_member:
338         Die() << "DW_TAG_member outside of struct/class/union";
339         break;
340       case DW_TAG_pointer_type:
341         ProcessReference<PointerReference>(
342             entry, PointerReference::Kind::POINTER);
343         break;
344       case DW_TAG_reference_type:
345         ProcessReference<PointerReference>(
346             entry, PointerReference::Kind::LVALUE_REFERENCE);
347         break;
348       case DW_TAG_rvalue_reference_type:
349         ProcessReference<PointerReference>(
350             entry, PointerReference::Kind::RVALUE_REFERENCE);
351         break;
352       case DW_TAG_ptr_to_member_type:
353         ProcessPointerToMember(entry);
354         break;
355       case DW_TAG_unspecified_type:
356         ProcessUnspecifiedType(entry);
357         break;
358       case DW_TAG_compile_unit:
359         language_ = entry.MustGetUnsignedConstant(DW_AT_language);
360         ProcessAllChildren(entry);
361         break;
362       case DW_TAG_typedef:
363         ProcessTypedef(entry);
364         break;
365       case DW_TAG_base_type:
366         ProcessBaseType(entry);
367         break;
368       case DW_TAG_const_type:
369         ProcessReference<Qualified>(entry, Qualifier::CONST);
370         break;
371       case DW_TAG_volatile_type:
372         ProcessReference<Qualified>(entry, Qualifier::VOLATILE);
373         break;
374       case DW_TAG_restrict_type:
375         ProcessReference<Qualified>(entry, Qualifier::RESTRICT);
376         break;
377       case DW_TAG_atomic_type:
378         // TODO: test pending BTF / test suite support
379         ProcessReference<Qualified>(entry, Qualifier::ATOMIC);
380         break;
381       case DW_TAG_variable:
382         // Process only variables visible externally
383         if (entry.GetFlag(DW_AT_external)) {
384           ProcessVariable(entry);
385         }
386         break;
387       case DW_TAG_subroutine_type:
388         // Standalone function type, for example, used in function pointers.
389         ProcessFunction(entry);
390         break;
391       case DW_TAG_subprogram:
392         // DWARF equivalent of ELF function symbol.
393         ProcessFunction(entry);
394         break;
395       case DW_TAG_namespace:
396         ProcessNamespace(entry);
397         break;
398       case DW_TAG_lexical_block:
399         ProcessAllChildren(entry);
400         break;
401 
402       default:
403         // TODO: die on unexpected tag, when this switch contains
404         // all expected tags
405         break;
406     }
407   }
408 
ProcessAllChildren(Entry & entry)409   void ProcessAllChildren(Entry& entry) {
410     for (auto& child : entry.GetChildren()) {
411       Process(child);
412     }
413   }
414 
CheckNoChildren(Entry & entry)415   void CheckNoChildren(Entry& entry) {
416     if (!entry.GetChildren().empty()) {
417       Die() << "Entry expected to have no children";
418     }
419   }
420 
ProcessNamespace(Entry & entry)421   void ProcessNamespace(Entry& entry) {
422     const auto name = GetNameOrEmpty(entry);
423     const PushScopeName push_scope_name(scope_, "namespace", name);
424     ProcessAllChildren(entry);
425   }
426 
ProcessBaseType(Entry & entry)427   void ProcessBaseType(Entry& entry) {
428     CheckNoChildren(entry);
429     const auto type_name = GetName(entry);
430     const size_t bit_size = GetBitSize(entry);
431     if (bit_size % 8) {
432       Die() << "type '" << type_name << "' size is not a multiple of 8";
433     }
434     const size_t byte_size = bit_size / 8;
435     AddProcessedNode<Primitive>(entry, type_name, GetEncoding(entry),
436                                 byte_size);
437   }
438 
ProcessTypedef(Entry & entry)439   void ProcessTypedef(Entry& entry) {
440     const auto type_name = GetName(entry);
441     const auto full_name = scope_ + type_name;
442     const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
443     const Id id = AddProcessedNode<Typedef>(entry, full_name, referred_type_id);
444     if (!ShouldKeepDefinition(entry, type_name)) {
445       // We always model (and keep) typedef definitions. But we should exclude
446       // filtered out types from being type roots.
447       return;
448     }
449     AddNamedTypeNode(id);
450   }
451 
452   template<typename Node, typename KindType>
ProcessReference(Entry & entry,KindType kind)453   void ProcessReference(Entry& entry, KindType kind) {
454     const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
455     AddProcessedNode<Node>(entry, kind, referred_type_id);
456   }
457 
ProcessPointerToMember(Entry & entry)458   void ProcessPointerToMember(Entry& entry) {
459     const Id containing_type_id =
460         GetReferredTypeId(entry.MaybeGetReference(DW_AT_containing_type));
461     const Id pointee_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
462     AddProcessedNode<PointerToMember>(entry, containing_type_id,
463                                       pointee_type_id);
464   }
465 
ProcessUnspecifiedType(Entry & entry)466   void ProcessUnspecifiedType(Entry& entry) {
467     const std::string type_name =  GetName(entry);
468     Check(type_name == "decltype(nullptr)")
469         << "Unsupported DW_TAG_unspecified_type: " << type_name;
470     AddProcessedNode<Special>(entry, Special::Kind::NULLPTR);
471   }
472 
ShouldKeepDefinition(Entry & entry,const std::string & name) const473   bool ShouldKeepDefinition(Entry& entry, const std::string& name) const {
474     if (!HasIncompleteTypes(language_) || file_filter_ == nullptr) {
475       return true;
476     }
477     const auto file = files_.MaybeGetFile(entry, DW_AT_decl_file);
478     if (!file) {
479       // Built in types that do not have DW_AT_decl_file should be preserved.
480       static constexpr std::string_view kBuiltinPrefix = "__";
481       // TODO: use std::string_view::starts_with
482       if (name.substr(0, kBuiltinPrefix.size()) == kBuiltinPrefix) {
483         return true;
484       }
485       Die() << "File filter is provided, but " << name << " ("
486             << EntryToString(entry) << ") doesn't have DW_AT_decl_file";
487     }
488     return (*file_filter_)(*file);
489   }
490 
ProcessStructUnion(Entry & entry,StructUnion::Kind kind)491   void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) {
492     const auto type_name = GetNameOrEmpty(entry);
493     const auto full_name = type_name.empty() ? type_name : scope_ + type_name;
494     const PushScopeName push_scope_name(scope_, kind, type_name);
495 
496     std::vector<Id> base_classes;
497     std::vector<Id> members;
498     std::vector<Id> methods;
499     std::optional<VariantAndMembers> variant_and_members = std::nullopt;
500 
501     for (auto& child : entry.GetChildren()) {
502       auto child_tag = child.GetTag();
503       // All possible children of struct/class/union
504       switch (child_tag) {
505         case DW_TAG_member:
506           if (child.GetFlag(DW_AT_external)) {
507             // static members are interpreted as variables and not included in
508             // members.
509             ProcessVariable(child);
510           } else {
511             members.push_back(GetIdForEntry(child));
512             ProcessMember(child);
513           }
514           break;
515         case DW_TAG_subprogram:
516           ProcessMethod(methods, child);
517           break;
518         case DW_TAG_inheritance:
519           base_classes.push_back(GetIdForEntry(child));
520           ProcessBaseClass(child);
521           break;
522         case DW_TAG_structure_type:
523         case DW_TAG_class_type:
524         case DW_TAG_union_type:
525         case DW_TAG_enumeration_type:
526         case DW_TAG_typedef:
527         case DW_TAG_const_type:
528         case DW_TAG_volatile_type:
529         case DW_TAG_restrict_type:
530         case DW_TAG_atomic_type:
531         case DW_TAG_array_type:
532         case DW_TAG_pointer_type:
533         case DW_TAG_reference_type:
534         case DW_TAG_rvalue_reference_type:
535         case DW_TAG_ptr_to_member_type:
536         case DW_TAG_unspecified_type:
537         case DW_TAG_variable:
538           Process(child);
539           break;
540         case DW_TAG_imported_declaration:
541         case DW_TAG_imported_module:
542           // For now information there is useless for ABI monitoring, but we
543           // need to check that there is no missing information in descendants.
544           CheckNoChildren(child);
545           break;
546         case DW_TAG_template_type_parameter:
547         case DW_TAG_template_value_parameter:
548         case DW_TAG_GNU_template_template_param:
549         case DW_TAG_GNU_template_parameter_pack:
550           // We just skip these as neither GCC nor Clang seem to use them
551           // properly (resulting in no references to such DIEs).
552           break;
553         case DW_TAG_variant_part:
554           if (full_name.empty()) {
555             Die() << "Variant name should not be empty: "
556                   << EntryToString(entry);
557           }
558           variant_and_members = GetVariantAndMembers(child);
559           break;
560         default:
561           Die() << "Unexpected tag for child of struct/class/union: "
562                 << Hex(child_tag) << ", " << EntryToString(child);
563       }
564     }
565 
566     if (variant_and_members.has_value()) {
567       // Add a Variant node since this entry represents a variant rather than a
568       // struct or union.
569       const Id id =
570           AddProcessedNode<Variant>(entry, full_name, GetByteSize(entry),
571                                     variant_and_members->discriminant,
572                                     std::move(variant_and_members->members));
573       AddNamedTypeNode(id);
574       return;
575     }
576 
577     if (entry.GetFlag(DW_AT_declaration) ||
578         !ShouldKeepDefinition(entry, type_name)) {
579       // Declaration may have partial information about members or method.
580       // We only need to parse children for information that will be needed in
581       // complete definition, but don't need to store them in incomplete node.
582       AddProcessedNode<StructUnion>(entry, kind, full_name);
583       return;
584     }
585 
586     const auto byte_size = GetByteSize(entry);
587 
588     const Id id = AddProcessedNode<StructUnion>(
589         entry, kind, full_name, byte_size, std::move(base_classes),
590         std::move(methods), std::move(members));
591     if (!full_name.empty()) {
592       AddNamedTypeNode(id);
593     }
594   }
595 
ProcessVariantMember(Entry & entry)596   void ProcessVariantMember(Entry& entry) {
597     // TODO: Process signed discriminant values.
598     auto dw_discriminant_value =
599         entry.MaybeGetUnsignedConstant(DW_AT_discr_value);
600     auto discriminant_value =
601         dw_discriminant_value
602             ? std::optional(static_cast<int64_t>(*dw_discriminant_value))
603             : std::nullopt;
604 
605     auto children = entry.GetChildren();
606     if (children.size() != 1) {
607       Die() << "Unexpected number of children for variant member: "
608             << EntryToString(entry);
609     }
610 
611     auto child = children[0];
612     if (child.GetTag() != DW_TAG_member) {
613       Die() << "Unexpected tag for variant member child: "
614             << Hex(child.GetTag()) << ", " << EntryToString(child);
615     }
616     if (GetDataBitOffset(child, 0, is_little_endian_binary_) != 0) {
617       Die() << "Unexpected data member location for variant member: "
618             << EntryToString(child);
619     }
620 
621     const std::string name = GetNameOrEmpty(child);
622     auto referred_type_id = GetReferredTypeId(GetReferredType(child));
623     AddProcessedNode<VariantMember>(entry, name, discriminant_value,
624                                     referred_type_id);
625   }
626 
ProcessMember(Entry & entry)627   void ProcessMember(Entry& entry) {
628     const auto name = GetNameOrEmpty(entry);
629     auto referred_type = GetReferredType(entry);
630     const Id referred_type_id = GetIdForEntry(referred_type);
631     auto optional_bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size);
632     // Member has DW_AT_bit_size if and only if it is bit field.
633     // STG uses bit_size == 0 to mark that the member is not a bit field.
634     Check(!optional_bit_size || *optional_bit_size > 0)
635         << "DW_AT_bit_size should be a positive number";
636     auto bit_size = optional_bit_size ? *optional_bit_size : 0;
637     AddProcessedNode<Member>(
638         entry, std::move(name), referred_type_id,
639         GetDataBitOffset(entry, bit_size, is_little_endian_binary_), bit_size);
640   }
641 
ProcessMethod(std::vector<Id> & methods,Entry & entry)642   void ProcessMethod(std::vector<Id>& methods, Entry& entry) {
643     Subprogram subprogram = GetSubprogram(entry);
644     auto id = graph_.Add<Function>(std::move(subprogram.node));
645     if (subprogram.external && subprogram.address) {
646       // Only external functions with address are useful for ABI monitoring
647       // TODO: cover virtual methods
648       const auto new_symbol_idx = result_.symbols.size();
649       result_.symbols.push_back(Types::Symbol{
650           .name = GetScopedNameForSymbol(
651               new_symbol_idx, subprogram.name_with_context),
652           .linkage_name = subprogram.linkage_name,
653           .address = *subprogram.address,
654           .id = id});
655     }
656     const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
657                                  .value_or(DW_VIRTUALITY_none);
658     if (virtuality == DW_VIRTUALITY_virtual ||
659         virtuality == DW_VIRTUALITY_pure_virtual) {
660       if (!subprogram.name_with_context.unscoped_name) {
661         Die() << "Method " << EntryToString(entry) << " should have name";
662       }
663       if (subprogram.name_with_context.specification) {
664         Die() << "Method " << EntryToString(entry)
665               << " shouldn't have specification";
666       }
667       const auto vtable_offset = entry.MaybeGetVtableOffset().value_or(0);
668       // TODO: proper handling of missing linkage name
669       methods.push_back(AddProcessedNode<Method>(
670           entry, subprogram.linkage_name.value_or("{missing}"),
671           *subprogram.name_with_context.unscoped_name, vtable_offset, id));
672     }
673   }
674 
ProcessBaseClass(Entry & entry)675   void ProcessBaseClass(Entry& entry) {
676     const Id type_id = GetReferredTypeId(GetReferredType(entry));
677     const auto byte_offset = entry.MaybeGetMemberByteOffset();
678     if (!byte_offset) {
679       Die() << "No offset found for base class " << EntryToString(entry);
680     }
681     const auto bit_offset = *byte_offset * 8;
682     const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
683                                  .value_or(DW_VIRTUALITY_none);
684     BaseClass::Inheritance inheritance;
685     if (virtuality == DW_VIRTUALITY_none) {
686       inheritance = BaseClass::Inheritance::NON_VIRTUAL;
687     } else if (virtuality == DW_VIRTUALITY_virtual) {
688       inheritance = BaseClass::Inheritance::VIRTUAL;
689     } else {
690       Die() << "Unexpected base class virtuality: " << virtuality;
691     }
692     AddProcessedNode<BaseClass>(entry, type_id, bit_offset, inheritance);
693   }
694 
ProcessArray(Entry & entry)695   void ProcessArray(Entry& entry) {
696     auto referred_type = GetReferredType(entry);
697     Id referred_type_id = GetIdForEntry(referred_type);
698     auto children = entry.GetChildren();
699     // Multiple children in array describe multiple dimensions of this array.
700     // For example, int[M][N] contains two children, M located in the first
701     // child, N located in the second child. But in STG multidimensional arrays
702     // are represented as chain of arrays: int[M][N] is array[M] of array[N] of
703     // int.
704     //
705     // We need to chain children as types together in reversed order.
706     // "referred_type_id" is updated every time to contain the top element in
707     // the chain. Rightmost chldren refers to the original "referred_type_id".
708     for (auto it = children.rbegin(); it != children.rend(); ++it) {
709       auto& child = *it;
710       // All subarrays except the first (last in the reversed order) are
711       // attached to the corresponding child. First subarray (last in the
712       // reversed order) is attached to the original entry itself.
713       auto& entry_to_attach = (it + 1 == children.rend()) ? entry : child;
714       // Update referred_type_id so next array in chain points there.
715       referred_type_id = AddProcessedNode<Array>(
716           entry_to_attach, GetNumberOfElements(child), referred_type_id);
717     }
718   }
719 
ProcessEnum(Entry & entry)720   void ProcessEnum(Entry& entry) {
721     const auto type_name = GetNameOrEmpty(entry);
722     const auto full_name = type_name.empty() ? type_name : scope_ + type_name;
723 
724     if (entry.GetFlag(DW_AT_declaration)) {
725       // It is expected to have only name and no children in declaration.
726       // However, it is not guaranteed and we should do something if we find an
727       // example.
728       CheckNoChildren(entry);
729       AddProcessedNode<Enumeration>(entry, full_name);
730       return;
731     }
732     const Id underlying_type_id =
733         GetReferredTypeId(MaybeGetReferredType(entry));
734     auto children = entry.GetChildren();
735     Enumeration::Enumerators enumerators;
736     enumerators.reserve(children.size());
737     for (auto& child : children) {
738       auto child_tag = child.GetTag();
739       switch (child_tag) {
740         case DW_TAG_enumerator: {
741           const std::string enumerator_name = GetName(child);
742           // TODO: detect signedness of underlying type and call
743           // an appropriate method.
744           std::optional<size_t> value_optional =
745               child.MaybeGetUnsignedConstant(DW_AT_const_value);
746           Check(value_optional.has_value()) << "Enumerator should have value";
747           // TODO: support both uint64_t and int64_t, depending on
748           // signedness of underlying type.
749           enumerators.emplace_back(enumerator_name,
750                                    static_cast<int64_t>(*value_optional));
751           break;
752         }
753         case DW_TAG_subprogram:
754           // STG does not support virtual methods for enums.
755           Check(child.MaybeGetUnsignedConstant(DW_AT_virtuality)
756                     .value_or(DW_VIRTUALITY_none) == DW_VIRTUALITY_none)
757               << "Enums can not have virtual methods: " << EntryToString(child);
758           ProcessFunction(child);
759           break;
760         default:
761           Die() << "Unexpected tag for child of enum: " << Hex(child_tag)
762                 << ", " << EntryToString(child);
763       }
764     }
765     if (!ShouldKeepDefinition(entry, type_name)) {
766       AddProcessedNode<Enumeration>(entry, full_name);
767       return;
768     }
769     const Id id = AddProcessedNode<Enumeration>(
770         entry, full_name, underlying_type_id, std::move(enumerators));
771     if (!full_name.empty()) {
772       AddNamedTypeNode(id);
773     }
774   }
775 
776   struct VariantAndMembers {
777     std::optional<Id> discriminant;
778     std::vector<Id> members;
779   };
780 
GetVariantAndMembers(Entry & entry)781   VariantAndMembers GetVariantAndMembers(Entry& entry) {
782     std::vector<Id> members;
783     std::optional<Id> discriminant = std::nullopt;
784     auto discriminant_entry = entry.MaybeGetReference(DW_AT_discr);
785     if (discriminant_entry.has_value()) {
786       discriminant = GetIdForEntry(*discriminant_entry);
787       ProcessMember(*discriminant_entry);
788     }
789 
790     for (auto& child : entry.GetChildren()) {
791       auto child_tag = child.GetTag();
792       switch (child_tag) {
793         case DW_TAG_member: {
794           if (child.GetOffset() != discriminant_entry->GetOffset()) {
795             Die() << "Encountered rogue member for variant: "
796                   << EntryToString(entry);
797           }
798           if (!child.GetFlag(DW_AT_artificial)) {
799             Die() << "Variant discriminant must be an artificial member: "
800                   << EntryToString(child);
801           }
802           break;
803         }
804         case DW_TAG_variant:
805           members.push_back(GetIdForEntry(child));
806           ProcessVariantMember(child);
807           break;
808         default:
809           Die() << "Unexpected tag for child of variant: " << Hex(child_tag)
810                 << ", " << EntryToString(child);
811       }
812     }
813     return VariantAndMembers{.discriminant = discriminant,
814                              .members = std::move(members)};
815   }
816 
817   struct NameWithContext {
818     std::optional<Dwarf_Off> specification;
819     std::optional<std::string> unscoped_name;
820     std::optional<std::string> scoped_name;
821   };
822 
GetNameWithContext(Entry & entry)823   NameWithContext GetNameWithContext(Entry& entry) {
824     NameWithContext result;
825     // Leaf of specification tree is usually a declaration (of a function or a
826     // method). Then goes definition, which references declaration by
827     // DW_AT_specification. And on top we have instantiation, which references
828     // definition by DW_AT_abstract_origin. In the worst case we have:
829     // * instantiation
830     //     >-DW_AT_abstract_origin-> definition
831     //         >-DW_AT_specification-> declaration
832     //
833     // By using attribute integration we fold all information from definition to
834     // instantiation, flattening hierarchy:
835     // * instantiation + definition
836     //     >-DW_AT_specification-> declaration
837     // NB: DW_AT_abstract_origin attribute is also visible, but it should be
838     // ignored, since we already used it during integration.
839     //
840     // We also need to support this case, when we don't have separate
841     // declaration:
842     // * instantiation +
843     //     >-DW_AT_abstract_origin -> definition
844     //
845     // So the final algorithm is to get final DW_AT_specification through the
846     // whole chain, or use DW_AT_abstract_origin if there is no
847     // DW_AT_specification.
848     if (auto specification = entry.MaybeGetReference(DW_AT_specification)) {
849       result.specification = specification->GetOffset();
850     } else if (auto abstract_origin =
851                    entry.MaybeGetReference(DW_AT_abstract_origin)) {
852       result.specification = abstract_origin->GetOffset();
853     }
854     result.unscoped_name = entry.MaybeGetDirectString(DW_AT_name);
855     if (!result.unscoped_name && !result.specification) {
856       // If there is no name and specification, then this entry is anonymous.
857       // Anonymous entries are modelled as the empty string and not nullopt.
858       // This allows us to fill and register scoped_name (also empty string) to
859       // be used in references.
860       result.unscoped_name = std::string();
861     }
862     if (result.unscoped_name) {
863       result.scoped_name = scope_ + *result.unscoped_name;
864       scoped_names_.emplace_back(
865           entry.GetOffset(), *result.scoped_name);
866     }
867     return result;
868   }
869 
GetScopedNameForSymbol(size_t symbol_idx,const NameWithContext & name)870   std::string GetScopedNameForSymbol(size_t symbol_idx,
871                                      const NameWithContext& name) {
872     // This method is designed to resolve this topology:
873     //   A: specification=B
874     //   B: name="foo"
875     // Any other topologies are rejected:
876     //   * Name and specification in one DIE: checked right below.
877     //   * Chain of specifications will result in symbol referencing another
878     //     specification, which will not be in scoped_names_, because "name and
879     //     specification in one DIE" is rejected.
880     if (name.scoped_name) {
881       if (name.specification) {
882         Die() << "Entry has name " << *name.scoped_name
883               << " and specification " << Hex(*name.specification);
884       }
885       return *name.scoped_name;
886     }
887     if (name.specification) {
888       unresolved_symbol_specifications_.emplace_back(*name.specification,
889                                                      symbol_idx);
890       // Name will be filled in ResolveSymbolSpecifications
891       return {};
892     }
893     Die() << "Entry should have either name or specification";
894   }
895 
ProcessVariable(Entry & entry)896   void ProcessVariable(Entry& entry) {
897     auto name_with_context = GetNameWithContext(entry);
898 
899     auto referred_type = GetReferredType(entry);
900     const Id referred_type_id = GetIdForEntry(referred_type);
901 
902     if (auto address = entry.MaybeGetAddress(DW_AT_location)) {
903       // Only external variables with address are useful for ABI monitoring
904       const auto new_symbol_idx = result_.symbols.size();
905       result_.symbols.push_back(Types::Symbol{
906           .name = GetScopedNameForSymbol(new_symbol_idx, name_with_context),
907           .linkage_name = MaybeGetLinkageName(version_, entry),
908           .address = *address,
909           .id = referred_type_id});
910     }
911   }
912 
ProcessFunction(Entry & entry)913   void ProcessFunction(Entry& entry) {
914     Subprogram subprogram = GetSubprogram(entry);
915     const Id id = AddProcessedNode<Function>(entry, std::move(subprogram.node));
916     if (subprogram.external && subprogram.address) {
917       // Only external functions with address are useful for ABI monitoring
918       const auto new_symbol_idx = result_.symbols.size();
919       result_.symbols.push_back(Types::Symbol{
920           .name = GetScopedNameForSymbol(
921               new_symbol_idx, subprogram.name_with_context),
922           .linkage_name = std::move(subprogram.linkage_name),
923           .address = *subprogram.address,
924           .id = id});
925     }
926   }
927 
928   struct Subprogram {
929     Function node;
930     NameWithContext name_with_context;
931     std::optional<std::string> linkage_name;
932     std::optional<Address> address;
933     bool external;
934   };
935 
GetSubprogram(Entry & entry)936   Subprogram GetSubprogram(Entry& entry) {
937     const Id return_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
938 
939     std::vector<Id> parameters;
940     for (auto& child : entry.GetChildren()) {
941       auto child_tag = child.GetTag();
942       switch (child_tag) {
943         case DW_TAG_formal_parameter:
944           parameters.push_back(GetReferredTypeId(GetReferredType(child)));
945           break;
946         case DW_TAG_unspecified_parameters:
947           // Note: C++ allows a single ... argument specification but C does
948           // not. However, "extern int foo();" (note lack of "void" in
949           // parameters) in C will produce the same DWARF as "extern int
950           // foo(...);" in C++.
951           CheckNoChildren(child);
952           parameters.push_back(variadic_id_);
953           break;
954         case DW_TAG_enumeration_type:
955         case DW_TAG_label:
956         case DW_TAG_lexical_block:
957         case DW_TAG_structure_type:
958         case DW_TAG_class_type:
959         case DW_TAG_union_type:
960         case DW_TAG_typedef:
961         case DW_TAG_const_type:
962         case DW_TAG_volatile_type:
963         case DW_TAG_restrict_type:
964         case DW_TAG_atomic_type:
965         case DW_TAG_array_type:
966         case DW_TAG_pointer_type:
967         case DW_TAG_reference_type:
968         case DW_TAG_rvalue_reference_type:
969         case DW_TAG_ptr_to_member_type:
970         case DW_TAG_unspecified_type:
971         case DW_TAG_inlined_subroutine:
972         case DW_TAG_subprogram:
973         case DW_TAG_variable:
974         case DW_TAG_call_site:
975         case DW_TAG_GNU_call_site:
976           // TODO: Do not leak local types outside this scope.
977           // TODO: It would be better to not process any
978           // information that is function local but there is a dangling
979           // reference Clang bug.
980           Process(child);
981           break;
982         case DW_TAG_imported_declaration:
983         case DW_TAG_imported_module:
984           // For now information there is useless for ABI monitoring, but we
985           // need to check that there is no missing information in descendants.
986           CheckNoChildren(child);
987           break;
988         case DW_TAG_template_type_parameter:
989         case DW_TAG_template_value_parameter:
990         case DW_TAG_GNU_template_template_param:
991         case DW_TAG_GNU_template_parameter_pack:
992           // We just skip these as neither GCC nor Clang seem to use them
993           // properly (resulting in no references to such DIEs).
994           break;
995         case DW_TAG_GNU_formal_parameter_pack:
996           // https://wiki.dwarfstd.org/C++0x_Variadic_templates.md
997           //
998           // As per this (rejected) proposal, GCC includes parameters as
999           // children of this DIE.
1000           for (auto& child2 : child.GetChildren()) {
1001             if (child2.GetTag() == DW_TAG_formal_parameter) {
1002               parameters.push_back(GetReferredTypeId(GetReferredType(child2)));
1003             }
1004           }
1005           break;
1006         default:
1007           Die() << "Unexpected tag for child of function: " << Hex(child_tag)
1008                 << ", " << EntryToString(child);
1009       }
1010     }
1011 
1012     return Subprogram{.node = Function(return_type_id, parameters),
1013                       .name_with_context = GetNameWithContext(entry),
1014                       .linkage_name = MaybeGetLinkageName(version_, entry),
1015                       .address = entry.MaybeGetAddress(DW_AT_low_pc),
1016                       .external = entry.GetFlag(DW_AT_external)};
1017   }
1018 
1019   // Allocate or get already allocated STG Id for Entry.
GetIdForEntry(Entry & entry)1020   Id GetIdForEntry(Entry& entry) {
1021     const auto offset = entry.GetOffset();
1022     const auto [it, emplaced] = id_map_.emplace(offset, Id(-1));
1023     if (emplaced) {
1024       it->second = graph_.Allocate();
1025     }
1026     return it->second;
1027   }
1028 
1029   // Same as GetIdForEntry, but returns "void_id_" for "unspecified" references,
1030   // because it is normal for DWARF (5.2 Unspecified Type Entries).
GetReferredTypeId(std::optional<Entry> referred_type)1031   Id GetReferredTypeId(std::optional<Entry> referred_type) {
1032     return referred_type ? GetIdForEntry(*referred_type) : void_id_;
1033   }
1034 
1035   // Wrapper for GetIdForEntry to allow lvalues.
GetReferredTypeId(Entry referred_type)1036   Id GetReferredTypeId(Entry referred_type) {
1037     return GetIdForEntry(referred_type);
1038   }
1039 
1040   // Populate Id from method above with processed Node.
1041   template <typename Node, typename... Args>
AddProcessedNode(Entry & entry,Args &&...args)1042   Id AddProcessedNode(Entry& entry, Args&&... args) {
1043     const Id id = GetIdForEntry(entry);
1044     graph_.Set<Node>(id, std::forward<Args>(args)...);
1045     return id;
1046   }
1047 
AddNamedTypeNode(Id id)1048   void AddNamedTypeNode(Id id) {
1049     result_.named_type_ids.push_back(id);
1050   }
1051 
1052   Graph& graph_;
1053   Id void_id_;
1054   Id variadic_id_;
1055   bool is_little_endian_binary_;
1056   const std::unique_ptr<Filter>& file_filter_;
1057   Types& result_;
1058   std::unordered_map<Dwarf_Off, Id> id_map_;
1059   std::vector<std::pair<Dwarf_Off, std::string>> scoped_names_;
1060   std::vector<std::pair<Dwarf_Off, size_t>> unresolved_symbol_specifications_;
1061 
1062   // Current scope.
1063   Scope scope_;
1064   int version_;
1065   dwarf::Files files_;
1066   uint64_t language_;
1067 };
1068 
Process(Handler & dwarf,bool is_little_endian_binary,const std::unique_ptr<Filter> & file_filter,Graph & graph)1069 Types Process(Handler& dwarf, bool is_little_endian_binary,
1070               const std::unique_ptr<Filter>& file_filter, Graph& graph) {
1071   Types result;
1072   const Id void_id = graph.Add<Special>(Special::Kind::VOID);
1073   const Id variadic_id = graph.Add<Special>(Special::Kind::VARIADIC);
1074   // TODO: Scope Processor to compilation units?
1075   Processor processor(graph, void_id, variadic_id, is_little_endian_binary,
1076                       file_filter, result);
1077   for (auto& compilation_unit : dwarf.GetCompilationUnits()) {
1078     // Could fetch top-level attributes like compiler here.
1079     processor.ProcessCompilationUnit(compilation_unit);
1080   }
1081   processor.CheckUnresolvedIds();
1082   processor.ResolveSymbolSpecifications();
1083 
1084   return result;
1085 }
1086 
1087 }  // namespace dwarf
1088 }  // namespace stg
1089