1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19
20 #include "dwarf_processor.h"
21
22 #include <dwarf.h>
23 #include <elfutils/libdw.h>
24
25 #include <algorithm>
26 #include <cstddef>
27 #include <cstdint>
28 #include <memory>
29 #include <optional>
30 #include <sstream>
31 #include <string>
32 #include <string_view>
33 #include <unordered_map>
34 #include <utility>
35 #include <vector>
36
37 #include "dwarf_wrappers.h"
38 #include "error.h"
39 #include "filter.h"
40 #include "graph.h"
41 #include "scope.h"
42
43 namespace stg {
44 namespace dwarf {
45
46 namespace {
47
HasIncompleteTypes(uint64_t language)48 bool HasIncompleteTypes(uint64_t language) {
49 return language != DW_LANG_Rust;
50 }
51
EntryToString(Entry & entry)52 std::string EntryToString(Entry& entry) {
53 std::ostringstream os;
54 os << "DWARF entry <" << Hex(entry.GetOffset()) << ">";
55 return os.str();
56 }
57
MaybeGetName(Entry & entry)58 std::optional<std::string> MaybeGetName(Entry& entry) {
59 return entry.MaybeGetString(DW_AT_name);
60 }
61
GetName(Entry & entry)62 std::string GetName(Entry& entry) {
63 auto result = MaybeGetName(entry);
64 if (!result.has_value()) {
65 Die() << "Name was not found for " << EntryToString(entry);
66 }
67 return std::move(*result);
68 }
69
GetNameOrEmpty(Entry & entry)70 std::string GetNameOrEmpty(Entry& entry) {
71 auto result = MaybeGetName(entry);
72 if (!result.has_value()) {
73 return std::string();
74 }
75 return std::move(*result);
76 }
77
MaybeGetLinkageName(int version,Entry & entry)78 std::optional<std::string> MaybeGetLinkageName(int version, Entry& entry) {
79 return entry.MaybeGetString(
80 version < 4 ? DW_AT_MIPS_linkage_name : DW_AT_linkage_name);
81 }
82
GetBitSize(Entry & entry)83 size_t GetBitSize(Entry& entry) {
84 if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
85 return *byte_size * 8;
86 } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
87 return *bit_size;
88 }
89 Die() << "Bit size was not found for " << EntryToString(entry);
90 }
91
GetByteSize(Entry & entry)92 size_t GetByteSize(Entry& entry) {
93 if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
94 return *byte_size;
95 } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
96 // Round up bit_size / 8 to get minimal needed storage size in bytes.
97 return (*bit_size + 7) / 8;
98 }
99 Die() << "Byte size was not found for " << EntryToString(entry);
100 }
101
GetEncoding(Entry & entry)102 Primitive::Encoding GetEncoding(Entry& entry) {
103 const auto dwarf_encoding = entry.MaybeGetUnsignedConstant(DW_AT_encoding);
104 if (!dwarf_encoding) {
105 Die() << "Encoding was not found for " << EntryToString(entry);
106 }
107 switch (*dwarf_encoding) {
108 case DW_ATE_boolean:
109 return Primitive::Encoding::BOOLEAN;
110 case DW_ATE_complex_float:
111 return Primitive::Encoding::COMPLEX_NUMBER;
112 case DW_ATE_float:
113 return Primitive::Encoding::REAL_NUMBER;
114 case DW_ATE_signed:
115 return Primitive::Encoding::SIGNED_INTEGER;
116 case DW_ATE_signed_char:
117 return Primitive::Encoding::SIGNED_CHARACTER;
118 case DW_ATE_unsigned:
119 return Primitive::Encoding::UNSIGNED_INTEGER;
120 case DW_ATE_unsigned_char:
121 return Primitive::Encoding::UNSIGNED_CHARACTER;
122 case DW_ATE_UTF:
123 return Primitive::Encoding::UTF;
124 default:
125 Die() << "Unknown encoding " << Hex(*dwarf_encoding) << " for "
126 << EntryToString(entry);
127 }
128 }
129
MaybeGetReferredType(Entry & entry)130 std::optional<Entry> MaybeGetReferredType(Entry& entry) {
131 return entry.MaybeGetReference(DW_AT_type);
132 }
133
GetReferredType(Entry & entry)134 Entry GetReferredType(Entry& entry) {
135 auto result = MaybeGetReferredType(entry);
136 if (!result.has_value()) {
137 Die() << "Type reference was not found in " << EntryToString(entry);
138 }
139 return *result;
140 }
141
GetNumberOfElements(Entry & entry)142 size_t GetNumberOfElements(Entry& entry) {
143 // DWARF standard says, that array dimensions could be an entry with
144 // either DW_TAG_subrange_type or DW_TAG_enumeration_type. However, this
145 // code supports only the DW_TAG_subrange_type.
146 Check(entry.GetTag() == DW_TAG_subrange_type)
147 << "Array's dimensions should be an entry of DW_TAG_subrange_type";
148 std::optional<size_t> number_of_elements = entry.MaybeGetCount();
149 if (number_of_elements) {
150 return *number_of_elements;
151 }
152 // If a subrange has no DW_AT_count and no DW_AT_upper_bound attribute, its
153 // size is unknown.
154 return 0;
155 }
156
157 // Calculate number of bits from the "beginning" of the containing entity to
158 // the "beginning" of the data member using DW_AT_bit_offset.
159 //
160 // "Number of bits from the beginning", depends on the definition of the
161 // "beginning", which is different for big- and little-endian architectures.
162 // However, DW_AT_bit_offset is defined from the high order bit of the storage
163 // unit to the high order bit of a field and is the same for both architectures.
164
165 // So this function converts DW_AT_bit_offset to the "number of bits from the
166 // beginning".
CalculateBitfieldAdjustment(Entry & entry,size_t bit_size,bool is_little_endian_binary)167 size_t CalculateBitfieldAdjustment(Entry& entry, size_t bit_size,
168 bool is_little_endian_binary) {
169 if (bit_size == 0) {
170 // bit_size == 0 marks that it is not a bit field. No adjustment needed.
171 return 0;
172 }
173 auto container_byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size);
174 auto bit_offset = entry.MaybeGetUnsignedConstant(DW_AT_bit_offset);
175 Check(container_byte_size.has_value() && bit_offset.has_value())
176 << "If member offset is defined as DW_AT_data_member_location, bit field "
177 "should have DW_AT_byte_size and DW_AT_bit_offset";
178 // The following structure will be used as an example in the explanations:
179 // struct foo {
180 // uint16_t rest_of_the_struct;
181 // uint16_t x : 5;
182 // uint16_t y : 6;
183 // uint16_t z : 5;
184 // };
185 if (is_little_endian_binary) {
186 // Compiler usualy packs bit fields starting with the least significant
187 // bits, but DW_AT_bit_offset is counted from high to low bits:
188 //
189 // rest of the struct|< container >
190 // Container bits: 01234|56789A|BCDEF
191 // Bit-fields' bits: 01234|012345|01234
192 // bit_offset: <<<<B<<<<<<5<<<<<0
193 // bits from start: 0>>>>>5>>>>>>B>>>>
194 // <x:5>|< y:6>|<z:5>
195 //
196 // x.bit_offset: 11 (0xB) bits
197 // y.bit_offset: 5 bits
198 // z.bit_offset: 0 bits
199 //
200 // So we need to subtract bit_offset from the container bit size
201 // (container_byte_size * 8) to inverse direction. Also we need to convert
202 // from high- to low-order bit, because the field "begins" with low-order
203 // bit. To do so we need to subtract field's bit size. Resulting formula is:
204 //
205 // container_byte_size * 8 - bit_offset - bit_size
206 //
207 // If we try it on example, we get correct values:
208 // x: 2 * 8 - 11 - 5 = 0
209 // y: 2 * 8 - 5 - 6 = 5
210 // z: 2 * 8 - 0 - 5 = 11 (0xB)
211 return *container_byte_size * 8 - *bit_offset - bit_size;
212 }
213 // Big-endian orders begins with high-order bit and the bit_offset is from the
214 // high order bit:
215 //
216 // rest of the struct|< container >
217 // Container bits: FEDCB|A98765|43210
218 // Bit-fields' bits: 43210|543210|43210
219 // bit_offset: 0>>>>>5>>>>>>B>>>>
220 // bits from start: 0>>>>>5>>>>>>B>>>>
221 // <x:5>|< y:6>|<z:5>
222 //
223 // So we just return bit_offset.
224 return *bit_offset;
225 }
226
227 // Calculate the number of bits from the beginning of the structure to the
228 // beginning of the data member.
GetDataBitOffset(Entry & entry,size_t bit_size,bool is_little_endian_binary)229 size_t GetDataBitOffset(Entry& entry, size_t bit_size,
230 bool is_little_endian_binary) {
231 // Offset may be represented either by DW_AT_data_bit_offset (in bits) or by
232 // DW_AT_data_member_location (in bytes).
233 if (auto data_bit_offset =
234 entry.MaybeGetUnsignedConstant(DW_AT_data_bit_offset)) {
235 // DW_AT_data_bit_offset contains what this function needs for any type
236 // of member (bitfield or not) on architecture of any endianness.
237 return *data_bit_offset;
238 } else if (auto byte_offset = entry.MaybeGetMemberByteOffset()) {
239 // DW_AT_data_member_location contains offset in bytes.
240 const size_t bit_offset = *byte_offset * 8;
241 // But there can be offset part, coming from DW_AT_bit_offset. DWARF 5
242 // standard requires to use DW_AT_data_bit_offset in this case, but a lot
243 // of binaries still use combination of DW_AT_data_member_location and
244 // DW_AT_bit_offset.
245 const size_t bitfield_adjusment =
246 CalculateBitfieldAdjustment(entry, bit_size, is_little_endian_binary);
247 return bit_offset + bitfield_adjusment;
248 } else {
249 // If the beginning of the data member is the same as the beginning of the
250 // containing entity then neither attribute is required.
251 return 0;
252 }
253 }
254
255 } // namespace
256
257 // Transforms DWARF entries to STG.
258 class Processor {
259 public:
Processor(Graph & graph,Id void_id,Id variadic_id,bool is_little_endian_binary,const std::unique_ptr<Filter> & file_filter,Types & result)260 Processor(Graph& graph, Id void_id, Id variadic_id,
261 bool is_little_endian_binary,
262 const std::unique_ptr<Filter>& file_filter, Types& result)
263 : graph_(graph),
264 void_id_(void_id),
265 variadic_id_(variadic_id),
266 is_little_endian_binary_(is_little_endian_binary),
267 file_filter_(file_filter),
268 result_(result) {}
269
ProcessCompilationUnit(CompilationUnit & compilation_unit)270 void ProcessCompilationUnit(CompilationUnit& compilation_unit) {
271 version_ = compilation_unit.version;
272 if (file_filter_ != nullptr) {
273 files_ = dwarf::Files(compilation_unit.entry);
274 }
275 Process(compilation_unit.entry);
276 }
277
CheckUnresolvedIds() const278 void CheckUnresolvedIds() const {
279 for (const auto& [offset, id] : id_map_) {
280 if (!graph_.Is(id)) {
281 Die() << "unresolved id " << id << ", DWARF offset " << Hex(offset);
282 }
283 }
284 }
285
ResolveSymbolSpecifications()286 void ResolveSymbolSpecifications() {
287 std::sort(unresolved_symbol_specifications_.begin(),
288 unresolved_symbol_specifications_.end());
289 std::sort(scoped_names_.begin(), scoped_names_.end());
290 auto symbols_it = unresolved_symbol_specifications_.begin();
291 auto names_it = scoped_names_.begin();
292 while (symbols_it != unresolved_symbol_specifications_.end()) {
293 while (names_it != scoped_names_.end() &&
294 names_it->first < symbols_it->first) {
295 ++names_it;
296 }
297 if (names_it == scoped_names_.end() ||
298 names_it->first != symbols_it->first) {
299 Die() << "Scoped name not found for entry " << Hex(symbols_it->first);
300 }
301 result_.symbols[symbols_it->second].name = names_it->second;
302 ++symbols_it;
303 }
304 }
305
306 private:
Process(Entry & entry)307 void Process(Entry& entry) {
308 try {
309 return ProcessInternal(entry);
310 } catch (Exception& e) {
311 std::ostringstream os;
312 os << "processing DIE " << Hex(entry.GetOffset());
313 e.Add(os.str());
314 throw;
315 }
316 }
317
ProcessInternal(Entry & entry)318 void ProcessInternal(Entry& entry) {
319 ++result_.processed_entries;
320 const auto tag = entry.GetTag();
321 switch (tag) {
322 case DW_TAG_array_type:
323 ProcessArray(entry);
324 break;
325 case DW_TAG_enumeration_type:
326 ProcessEnum(entry);
327 break;
328 case DW_TAG_class_type:
329 ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
330 break;
331 case DW_TAG_structure_type:
332 ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
333 break;
334 case DW_TAG_union_type:
335 ProcessStructUnion(entry, StructUnion::Kind::UNION);
336 break;
337 case DW_TAG_member:
338 Die() << "DW_TAG_member outside of struct/class/union";
339 break;
340 case DW_TAG_pointer_type:
341 ProcessReference<PointerReference>(
342 entry, PointerReference::Kind::POINTER);
343 break;
344 case DW_TAG_reference_type:
345 ProcessReference<PointerReference>(
346 entry, PointerReference::Kind::LVALUE_REFERENCE);
347 break;
348 case DW_TAG_rvalue_reference_type:
349 ProcessReference<PointerReference>(
350 entry, PointerReference::Kind::RVALUE_REFERENCE);
351 break;
352 case DW_TAG_ptr_to_member_type:
353 ProcessPointerToMember(entry);
354 break;
355 case DW_TAG_unspecified_type:
356 ProcessUnspecifiedType(entry);
357 break;
358 case DW_TAG_compile_unit:
359 language_ = entry.MustGetUnsignedConstant(DW_AT_language);
360 ProcessAllChildren(entry);
361 break;
362 case DW_TAG_typedef:
363 ProcessTypedef(entry);
364 break;
365 case DW_TAG_base_type:
366 ProcessBaseType(entry);
367 break;
368 case DW_TAG_const_type:
369 ProcessReference<Qualified>(entry, Qualifier::CONST);
370 break;
371 case DW_TAG_volatile_type:
372 ProcessReference<Qualified>(entry, Qualifier::VOLATILE);
373 break;
374 case DW_TAG_restrict_type:
375 ProcessReference<Qualified>(entry, Qualifier::RESTRICT);
376 break;
377 case DW_TAG_atomic_type:
378 // TODO: test pending BTF / test suite support
379 ProcessReference<Qualified>(entry, Qualifier::ATOMIC);
380 break;
381 case DW_TAG_variable:
382 // Process only variables visible externally
383 if (entry.GetFlag(DW_AT_external)) {
384 ProcessVariable(entry);
385 }
386 break;
387 case DW_TAG_subroutine_type:
388 // Standalone function type, for example, used in function pointers.
389 ProcessFunction(entry);
390 break;
391 case DW_TAG_subprogram:
392 // DWARF equivalent of ELF function symbol.
393 ProcessFunction(entry);
394 break;
395 case DW_TAG_namespace:
396 ProcessNamespace(entry);
397 break;
398 case DW_TAG_lexical_block:
399 ProcessAllChildren(entry);
400 break;
401
402 default:
403 // TODO: die on unexpected tag, when this switch contains
404 // all expected tags
405 break;
406 }
407 }
408
ProcessAllChildren(Entry & entry)409 void ProcessAllChildren(Entry& entry) {
410 for (auto& child : entry.GetChildren()) {
411 Process(child);
412 }
413 }
414
CheckNoChildren(Entry & entry)415 void CheckNoChildren(Entry& entry) {
416 if (!entry.GetChildren().empty()) {
417 Die() << "Entry expected to have no children";
418 }
419 }
420
ProcessNamespace(Entry & entry)421 void ProcessNamespace(Entry& entry) {
422 const auto name = GetNameOrEmpty(entry);
423 const PushScopeName push_scope_name(scope_, "namespace", name);
424 ProcessAllChildren(entry);
425 }
426
ProcessBaseType(Entry & entry)427 void ProcessBaseType(Entry& entry) {
428 CheckNoChildren(entry);
429 const auto type_name = GetName(entry);
430 const size_t bit_size = GetBitSize(entry);
431 if (bit_size % 8) {
432 Die() << "type '" << type_name << "' size is not a multiple of 8";
433 }
434 const size_t byte_size = bit_size / 8;
435 AddProcessedNode<Primitive>(entry, type_name, GetEncoding(entry),
436 byte_size);
437 }
438
ProcessTypedef(Entry & entry)439 void ProcessTypedef(Entry& entry) {
440 const auto type_name = GetName(entry);
441 const auto full_name = scope_ + type_name;
442 const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
443 const Id id = AddProcessedNode<Typedef>(entry, full_name, referred_type_id);
444 if (!ShouldKeepDefinition(entry, type_name)) {
445 // We always model (and keep) typedef definitions. But we should exclude
446 // filtered out types from being type roots.
447 return;
448 }
449 AddNamedTypeNode(id);
450 }
451
452 template<typename Node, typename KindType>
ProcessReference(Entry & entry,KindType kind)453 void ProcessReference(Entry& entry, KindType kind) {
454 const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
455 AddProcessedNode<Node>(entry, kind, referred_type_id);
456 }
457
ProcessPointerToMember(Entry & entry)458 void ProcessPointerToMember(Entry& entry) {
459 const Id containing_type_id =
460 GetReferredTypeId(entry.MaybeGetReference(DW_AT_containing_type));
461 const Id pointee_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
462 AddProcessedNode<PointerToMember>(entry, containing_type_id,
463 pointee_type_id);
464 }
465
ProcessUnspecifiedType(Entry & entry)466 void ProcessUnspecifiedType(Entry& entry) {
467 const std::string type_name = GetName(entry);
468 Check(type_name == "decltype(nullptr)")
469 << "Unsupported DW_TAG_unspecified_type: " << type_name;
470 AddProcessedNode<Special>(entry, Special::Kind::NULLPTR);
471 }
472
ShouldKeepDefinition(Entry & entry,const std::string & name) const473 bool ShouldKeepDefinition(Entry& entry, const std::string& name) const {
474 if (!HasIncompleteTypes(language_) || file_filter_ == nullptr) {
475 return true;
476 }
477 const auto file = files_.MaybeGetFile(entry, DW_AT_decl_file);
478 if (!file) {
479 // Built in types that do not have DW_AT_decl_file should be preserved.
480 static constexpr std::string_view kBuiltinPrefix = "__";
481 // TODO: use std::string_view::starts_with
482 if (name.substr(0, kBuiltinPrefix.size()) == kBuiltinPrefix) {
483 return true;
484 }
485 Die() << "File filter is provided, but " << name << " ("
486 << EntryToString(entry) << ") doesn't have DW_AT_decl_file";
487 }
488 return (*file_filter_)(*file);
489 }
490
ProcessStructUnion(Entry & entry,StructUnion::Kind kind)491 void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) {
492 const auto type_name = GetNameOrEmpty(entry);
493 const auto full_name = type_name.empty() ? type_name : scope_ + type_name;
494 const PushScopeName push_scope_name(scope_, kind, type_name);
495
496 std::vector<Id> base_classes;
497 std::vector<Id> members;
498 std::vector<Id> methods;
499 std::optional<VariantAndMembers> variant_and_members = std::nullopt;
500
501 for (auto& child : entry.GetChildren()) {
502 auto child_tag = child.GetTag();
503 // All possible children of struct/class/union
504 switch (child_tag) {
505 case DW_TAG_member:
506 if (child.GetFlag(DW_AT_external)) {
507 // static members are interpreted as variables and not included in
508 // members.
509 ProcessVariable(child);
510 } else {
511 members.push_back(GetIdForEntry(child));
512 ProcessMember(child);
513 }
514 break;
515 case DW_TAG_subprogram:
516 ProcessMethod(methods, child);
517 break;
518 case DW_TAG_inheritance:
519 base_classes.push_back(GetIdForEntry(child));
520 ProcessBaseClass(child);
521 break;
522 case DW_TAG_structure_type:
523 case DW_TAG_class_type:
524 case DW_TAG_union_type:
525 case DW_TAG_enumeration_type:
526 case DW_TAG_typedef:
527 case DW_TAG_const_type:
528 case DW_TAG_volatile_type:
529 case DW_TAG_restrict_type:
530 case DW_TAG_atomic_type:
531 case DW_TAG_array_type:
532 case DW_TAG_pointer_type:
533 case DW_TAG_reference_type:
534 case DW_TAG_rvalue_reference_type:
535 case DW_TAG_ptr_to_member_type:
536 case DW_TAG_unspecified_type:
537 case DW_TAG_variable:
538 Process(child);
539 break;
540 case DW_TAG_imported_declaration:
541 case DW_TAG_imported_module:
542 // For now information there is useless for ABI monitoring, but we
543 // need to check that there is no missing information in descendants.
544 CheckNoChildren(child);
545 break;
546 case DW_TAG_template_type_parameter:
547 case DW_TAG_template_value_parameter:
548 case DW_TAG_GNU_template_template_param:
549 case DW_TAG_GNU_template_parameter_pack:
550 // We just skip these as neither GCC nor Clang seem to use them
551 // properly (resulting in no references to such DIEs).
552 break;
553 case DW_TAG_variant_part:
554 if (full_name.empty()) {
555 Die() << "Variant name should not be empty: "
556 << EntryToString(entry);
557 }
558 variant_and_members = GetVariantAndMembers(child);
559 break;
560 default:
561 Die() << "Unexpected tag for child of struct/class/union: "
562 << Hex(child_tag) << ", " << EntryToString(child);
563 }
564 }
565
566 if (variant_and_members.has_value()) {
567 // Add a Variant node since this entry represents a variant rather than a
568 // struct or union.
569 const Id id =
570 AddProcessedNode<Variant>(entry, full_name, GetByteSize(entry),
571 variant_and_members->discriminant,
572 std::move(variant_and_members->members));
573 AddNamedTypeNode(id);
574 return;
575 }
576
577 if (entry.GetFlag(DW_AT_declaration) ||
578 !ShouldKeepDefinition(entry, type_name)) {
579 // Declaration may have partial information about members or method.
580 // We only need to parse children for information that will be needed in
581 // complete definition, but don't need to store them in incomplete node.
582 AddProcessedNode<StructUnion>(entry, kind, full_name);
583 return;
584 }
585
586 const auto byte_size = GetByteSize(entry);
587
588 const Id id = AddProcessedNode<StructUnion>(
589 entry, kind, full_name, byte_size, std::move(base_classes),
590 std::move(methods), std::move(members));
591 if (!full_name.empty()) {
592 AddNamedTypeNode(id);
593 }
594 }
595
ProcessVariantMember(Entry & entry)596 void ProcessVariantMember(Entry& entry) {
597 // TODO: Process signed discriminant values.
598 auto dw_discriminant_value =
599 entry.MaybeGetUnsignedConstant(DW_AT_discr_value);
600 auto discriminant_value =
601 dw_discriminant_value
602 ? std::optional(static_cast<int64_t>(*dw_discriminant_value))
603 : std::nullopt;
604
605 auto children = entry.GetChildren();
606 if (children.size() != 1) {
607 Die() << "Unexpected number of children for variant member: "
608 << EntryToString(entry);
609 }
610
611 auto child = children[0];
612 if (child.GetTag() != DW_TAG_member) {
613 Die() << "Unexpected tag for variant member child: "
614 << Hex(child.GetTag()) << ", " << EntryToString(child);
615 }
616 if (GetDataBitOffset(child, 0, is_little_endian_binary_) != 0) {
617 Die() << "Unexpected data member location for variant member: "
618 << EntryToString(child);
619 }
620
621 const std::string name = GetNameOrEmpty(child);
622 auto referred_type_id = GetReferredTypeId(GetReferredType(child));
623 AddProcessedNode<VariantMember>(entry, name, discriminant_value,
624 referred_type_id);
625 }
626
ProcessMember(Entry & entry)627 void ProcessMember(Entry& entry) {
628 const auto name = GetNameOrEmpty(entry);
629 auto referred_type = GetReferredType(entry);
630 const Id referred_type_id = GetIdForEntry(referred_type);
631 auto optional_bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size);
632 // Member has DW_AT_bit_size if and only if it is bit field.
633 // STG uses bit_size == 0 to mark that the member is not a bit field.
634 Check(!optional_bit_size || *optional_bit_size > 0)
635 << "DW_AT_bit_size should be a positive number";
636 auto bit_size = optional_bit_size ? *optional_bit_size : 0;
637 AddProcessedNode<Member>(
638 entry, std::move(name), referred_type_id,
639 GetDataBitOffset(entry, bit_size, is_little_endian_binary_), bit_size);
640 }
641
ProcessMethod(std::vector<Id> & methods,Entry & entry)642 void ProcessMethod(std::vector<Id>& methods, Entry& entry) {
643 Subprogram subprogram = GetSubprogram(entry);
644 auto id = graph_.Add<Function>(std::move(subprogram.node));
645 if (subprogram.external && subprogram.address) {
646 // Only external functions with address are useful for ABI monitoring
647 // TODO: cover virtual methods
648 const auto new_symbol_idx = result_.symbols.size();
649 result_.symbols.push_back(Types::Symbol{
650 .name = GetScopedNameForSymbol(
651 new_symbol_idx, subprogram.name_with_context),
652 .linkage_name = subprogram.linkage_name,
653 .address = *subprogram.address,
654 .id = id});
655 }
656 const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
657 .value_or(DW_VIRTUALITY_none);
658 if (virtuality == DW_VIRTUALITY_virtual ||
659 virtuality == DW_VIRTUALITY_pure_virtual) {
660 if (!subprogram.name_with_context.unscoped_name) {
661 Die() << "Method " << EntryToString(entry) << " should have name";
662 }
663 if (subprogram.name_with_context.specification) {
664 Die() << "Method " << EntryToString(entry)
665 << " shouldn't have specification";
666 }
667 const auto vtable_offset = entry.MaybeGetVtableOffset().value_or(0);
668 // TODO: proper handling of missing linkage name
669 methods.push_back(AddProcessedNode<Method>(
670 entry, subprogram.linkage_name.value_or("{missing}"),
671 *subprogram.name_with_context.unscoped_name, vtable_offset, id));
672 }
673 }
674
ProcessBaseClass(Entry & entry)675 void ProcessBaseClass(Entry& entry) {
676 const Id type_id = GetReferredTypeId(GetReferredType(entry));
677 const auto byte_offset = entry.MaybeGetMemberByteOffset();
678 if (!byte_offset) {
679 Die() << "No offset found for base class " << EntryToString(entry);
680 }
681 const auto bit_offset = *byte_offset * 8;
682 const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
683 .value_or(DW_VIRTUALITY_none);
684 BaseClass::Inheritance inheritance;
685 if (virtuality == DW_VIRTUALITY_none) {
686 inheritance = BaseClass::Inheritance::NON_VIRTUAL;
687 } else if (virtuality == DW_VIRTUALITY_virtual) {
688 inheritance = BaseClass::Inheritance::VIRTUAL;
689 } else {
690 Die() << "Unexpected base class virtuality: " << virtuality;
691 }
692 AddProcessedNode<BaseClass>(entry, type_id, bit_offset, inheritance);
693 }
694
ProcessArray(Entry & entry)695 void ProcessArray(Entry& entry) {
696 auto referred_type = GetReferredType(entry);
697 Id referred_type_id = GetIdForEntry(referred_type);
698 auto children = entry.GetChildren();
699 // Multiple children in array describe multiple dimensions of this array.
700 // For example, int[M][N] contains two children, M located in the first
701 // child, N located in the second child. But in STG multidimensional arrays
702 // are represented as chain of arrays: int[M][N] is array[M] of array[N] of
703 // int.
704 //
705 // We need to chain children as types together in reversed order.
706 // "referred_type_id" is updated every time to contain the top element in
707 // the chain. Rightmost chldren refers to the original "referred_type_id".
708 for (auto it = children.rbegin(); it != children.rend(); ++it) {
709 auto& child = *it;
710 // All subarrays except the first (last in the reversed order) are
711 // attached to the corresponding child. First subarray (last in the
712 // reversed order) is attached to the original entry itself.
713 auto& entry_to_attach = (it + 1 == children.rend()) ? entry : child;
714 // Update referred_type_id so next array in chain points there.
715 referred_type_id = AddProcessedNode<Array>(
716 entry_to_attach, GetNumberOfElements(child), referred_type_id);
717 }
718 }
719
ProcessEnum(Entry & entry)720 void ProcessEnum(Entry& entry) {
721 const auto type_name = GetNameOrEmpty(entry);
722 const auto full_name = type_name.empty() ? type_name : scope_ + type_name;
723
724 if (entry.GetFlag(DW_AT_declaration)) {
725 // It is expected to have only name and no children in declaration.
726 // However, it is not guaranteed and we should do something if we find an
727 // example.
728 CheckNoChildren(entry);
729 AddProcessedNode<Enumeration>(entry, full_name);
730 return;
731 }
732 const Id underlying_type_id =
733 GetReferredTypeId(MaybeGetReferredType(entry));
734 auto children = entry.GetChildren();
735 Enumeration::Enumerators enumerators;
736 enumerators.reserve(children.size());
737 for (auto& child : children) {
738 auto child_tag = child.GetTag();
739 switch (child_tag) {
740 case DW_TAG_enumerator: {
741 const std::string enumerator_name = GetName(child);
742 // TODO: detect signedness of underlying type and call
743 // an appropriate method.
744 std::optional<size_t> value_optional =
745 child.MaybeGetUnsignedConstant(DW_AT_const_value);
746 Check(value_optional.has_value()) << "Enumerator should have value";
747 // TODO: support both uint64_t and int64_t, depending on
748 // signedness of underlying type.
749 enumerators.emplace_back(enumerator_name,
750 static_cast<int64_t>(*value_optional));
751 break;
752 }
753 case DW_TAG_subprogram:
754 // STG does not support virtual methods for enums.
755 Check(child.MaybeGetUnsignedConstant(DW_AT_virtuality)
756 .value_or(DW_VIRTUALITY_none) == DW_VIRTUALITY_none)
757 << "Enums can not have virtual methods: " << EntryToString(child);
758 ProcessFunction(child);
759 break;
760 default:
761 Die() << "Unexpected tag for child of enum: " << Hex(child_tag)
762 << ", " << EntryToString(child);
763 }
764 }
765 if (!ShouldKeepDefinition(entry, type_name)) {
766 AddProcessedNode<Enumeration>(entry, full_name);
767 return;
768 }
769 const Id id = AddProcessedNode<Enumeration>(
770 entry, full_name, underlying_type_id, std::move(enumerators));
771 if (!full_name.empty()) {
772 AddNamedTypeNode(id);
773 }
774 }
775
776 struct VariantAndMembers {
777 std::optional<Id> discriminant;
778 std::vector<Id> members;
779 };
780
GetVariantAndMembers(Entry & entry)781 VariantAndMembers GetVariantAndMembers(Entry& entry) {
782 std::vector<Id> members;
783 std::optional<Id> discriminant = std::nullopt;
784 auto discriminant_entry = entry.MaybeGetReference(DW_AT_discr);
785 if (discriminant_entry.has_value()) {
786 discriminant = GetIdForEntry(*discriminant_entry);
787 ProcessMember(*discriminant_entry);
788 }
789
790 for (auto& child : entry.GetChildren()) {
791 auto child_tag = child.GetTag();
792 switch (child_tag) {
793 case DW_TAG_member: {
794 if (child.GetOffset() != discriminant_entry->GetOffset()) {
795 Die() << "Encountered rogue member for variant: "
796 << EntryToString(entry);
797 }
798 if (!child.GetFlag(DW_AT_artificial)) {
799 Die() << "Variant discriminant must be an artificial member: "
800 << EntryToString(child);
801 }
802 break;
803 }
804 case DW_TAG_variant:
805 members.push_back(GetIdForEntry(child));
806 ProcessVariantMember(child);
807 break;
808 default:
809 Die() << "Unexpected tag for child of variant: " << Hex(child_tag)
810 << ", " << EntryToString(child);
811 }
812 }
813 return VariantAndMembers{.discriminant = discriminant,
814 .members = std::move(members)};
815 }
816
817 struct NameWithContext {
818 std::optional<Dwarf_Off> specification;
819 std::optional<std::string> unscoped_name;
820 std::optional<std::string> scoped_name;
821 };
822
GetNameWithContext(Entry & entry)823 NameWithContext GetNameWithContext(Entry& entry) {
824 NameWithContext result;
825 // Leaf of specification tree is usually a declaration (of a function or a
826 // method). Then goes definition, which references declaration by
827 // DW_AT_specification. And on top we have instantiation, which references
828 // definition by DW_AT_abstract_origin. In the worst case we have:
829 // * instantiation
830 // >-DW_AT_abstract_origin-> definition
831 // >-DW_AT_specification-> declaration
832 //
833 // By using attribute integration we fold all information from definition to
834 // instantiation, flattening hierarchy:
835 // * instantiation + definition
836 // >-DW_AT_specification-> declaration
837 // NB: DW_AT_abstract_origin attribute is also visible, but it should be
838 // ignored, since we already used it during integration.
839 //
840 // We also need to support this case, when we don't have separate
841 // declaration:
842 // * instantiation +
843 // >-DW_AT_abstract_origin -> definition
844 //
845 // So the final algorithm is to get final DW_AT_specification through the
846 // whole chain, or use DW_AT_abstract_origin if there is no
847 // DW_AT_specification.
848 if (auto specification = entry.MaybeGetReference(DW_AT_specification)) {
849 result.specification = specification->GetOffset();
850 } else if (auto abstract_origin =
851 entry.MaybeGetReference(DW_AT_abstract_origin)) {
852 result.specification = abstract_origin->GetOffset();
853 }
854 result.unscoped_name = entry.MaybeGetDirectString(DW_AT_name);
855 if (!result.unscoped_name && !result.specification) {
856 // If there is no name and specification, then this entry is anonymous.
857 // Anonymous entries are modelled as the empty string and not nullopt.
858 // This allows us to fill and register scoped_name (also empty string) to
859 // be used in references.
860 result.unscoped_name = std::string();
861 }
862 if (result.unscoped_name) {
863 result.scoped_name = scope_ + *result.unscoped_name;
864 scoped_names_.emplace_back(
865 entry.GetOffset(), *result.scoped_name);
866 }
867 return result;
868 }
869
GetScopedNameForSymbol(size_t symbol_idx,const NameWithContext & name)870 std::string GetScopedNameForSymbol(size_t symbol_idx,
871 const NameWithContext& name) {
872 // This method is designed to resolve this topology:
873 // A: specification=B
874 // B: name="foo"
875 // Any other topologies are rejected:
876 // * Name and specification in one DIE: checked right below.
877 // * Chain of specifications will result in symbol referencing another
878 // specification, which will not be in scoped_names_, because "name and
879 // specification in one DIE" is rejected.
880 if (name.scoped_name) {
881 if (name.specification) {
882 Die() << "Entry has name " << *name.scoped_name
883 << " and specification " << Hex(*name.specification);
884 }
885 return *name.scoped_name;
886 }
887 if (name.specification) {
888 unresolved_symbol_specifications_.emplace_back(*name.specification,
889 symbol_idx);
890 // Name will be filled in ResolveSymbolSpecifications
891 return {};
892 }
893 Die() << "Entry should have either name or specification";
894 }
895
ProcessVariable(Entry & entry)896 void ProcessVariable(Entry& entry) {
897 auto name_with_context = GetNameWithContext(entry);
898
899 auto referred_type = GetReferredType(entry);
900 const Id referred_type_id = GetIdForEntry(referred_type);
901
902 if (auto address = entry.MaybeGetAddress(DW_AT_location)) {
903 // Only external variables with address are useful for ABI monitoring
904 const auto new_symbol_idx = result_.symbols.size();
905 result_.symbols.push_back(Types::Symbol{
906 .name = GetScopedNameForSymbol(new_symbol_idx, name_with_context),
907 .linkage_name = MaybeGetLinkageName(version_, entry),
908 .address = *address,
909 .id = referred_type_id});
910 }
911 }
912
ProcessFunction(Entry & entry)913 void ProcessFunction(Entry& entry) {
914 Subprogram subprogram = GetSubprogram(entry);
915 const Id id = AddProcessedNode<Function>(entry, std::move(subprogram.node));
916 if (subprogram.external && subprogram.address) {
917 // Only external functions with address are useful for ABI monitoring
918 const auto new_symbol_idx = result_.symbols.size();
919 result_.symbols.push_back(Types::Symbol{
920 .name = GetScopedNameForSymbol(
921 new_symbol_idx, subprogram.name_with_context),
922 .linkage_name = std::move(subprogram.linkage_name),
923 .address = *subprogram.address,
924 .id = id});
925 }
926 }
927
928 struct Subprogram {
929 Function node;
930 NameWithContext name_with_context;
931 std::optional<std::string> linkage_name;
932 std::optional<Address> address;
933 bool external;
934 };
935
GetSubprogram(Entry & entry)936 Subprogram GetSubprogram(Entry& entry) {
937 const Id return_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
938
939 std::vector<Id> parameters;
940 for (auto& child : entry.GetChildren()) {
941 auto child_tag = child.GetTag();
942 switch (child_tag) {
943 case DW_TAG_formal_parameter:
944 parameters.push_back(GetReferredTypeId(GetReferredType(child)));
945 break;
946 case DW_TAG_unspecified_parameters:
947 // Note: C++ allows a single ... argument specification but C does
948 // not. However, "extern int foo();" (note lack of "void" in
949 // parameters) in C will produce the same DWARF as "extern int
950 // foo(...);" in C++.
951 CheckNoChildren(child);
952 parameters.push_back(variadic_id_);
953 break;
954 case DW_TAG_enumeration_type:
955 case DW_TAG_label:
956 case DW_TAG_lexical_block:
957 case DW_TAG_structure_type:
958 case DW_TAG_class_type:
959 case DW_TAG_union_type:
960 case DW_TAG_typedef:
961 case DW_TAG_const_type:
962 case DW_TAG_volatile_type:
963 case DW_TAG_restrict_type:
964 case DW_TAG_atomic_type:
965 case DW_TAG_array_type:
966 case DW_TAG_pointer_type:
967 case DW_TAG_reference_type:
968 case DW_TAG_rvalue_reference_type:
969 case DW_TAG_ptr_to_member_type:
970 case DW_TAG_unspecified_type:
971 case DW_TAG_inlined_subroutine:
972 case DW_TAG_subprogram:
973 case DW_TAG_variable:
974 case DW_TAG_call_site:
975 case DW_TAG_GNU_call_site:
976 // TODO: Do not leak local types outside this scope.
977 // TODO: It would be better to not process any
978 // information that is function local but there is a dangling
979 // reference Clang bug.
980 Process(child);
981 break;
982 case DW_TAG_imported_declaration:
983 case DW_TAG_imported_module:
984 // For now information there is useless for ABI monitoring, but we
985 // need to check that there is no missing information in descendants.
986 CheckNoChildren(child);
987 break;
988 case DW_TAG_template_type_parameter:
989 case DW_TAG_template_value_parameter:
990 case DW_TAG_GNU_template_template_param:
991 case DW_TAG_GNU_template_parameter_pack:
992 // We just skip these as neither GCC nor Clang seem to use them
993 // properly (resulting in no references to such DIEs).
994 break;
995 case DW_TAG_GNU_formal_parameter_pack:
996 // https://wiki.dwarfstd.org/C++0x_Variadic_templates.md
997 //
998 // As per this (rejected) proposal, GCC includes parameters as
999 // children of this DIE.
1000 for (auto& child2 : child.GetChildren()) {
1001 if (child2.GetTag() == DW_TAG_formal_parameter) {
1002 parameters.push_back(GetReferredTypeId(GetReferredType(child2)));
1003 }
1004 }
1005 break;
1006 default:
1007 Die() << "Unexpected tag for child of function: " << Hex(child_tag)
1008 << ", " << EntryToString(child);
1009 }
1010 }
1011
1012 return Subprogram{.node = Function(return_type_id, parameters),
1013 .name_with_context = GetNameWithContext(entry),
1014 .linkage_name = MaybeGetLinkageName(version_, entry),
1015 .address = entry.MaybeGetAddress(DW_AT_low_pc),
1016 .external = entry.GetFlag(DW_AT_external)};
1017 }
1018
1019 // Allocate or get already allocated STG Id for Entry.
GetIdForEntry(Entry & entry)1020 Id GetIdForEntry(Entry& entry) {
1021 const auto offset = entry.GetOffset();
1022 const auto [it, emplaced] = id_map_.emplace(offset, Id(-1));
1023 if (emplaced) {
1024 it->second = graph_.Allocate();
1025 }
1026 return it->second;
1027 }
1028
1029 // Same as GetIdForEntry, but returns "void_id_" for "unspecified" references,
1030 // because it is normal for DWARF (5.2 Unspecified Type Entries).
GetReferredTypeId(std::optional<Entry> referred_type)1031 Id GetReferredTypeId(std::optional<Entry> referred_type) {
1032 return referred_type ? GetIdForEntry(*referred_type) : void_id_;
1033 }
1034
1035 // Wrapper for GetIdForEntry to allow lvalues.
GetReferredTypeId(Entry referred_type)1036 Id GetReferredTypeId(Entry referred_type) {
1037 return GetIdForEntry(referred_type);
1038 }
1039
1040 // Populate Id from method above with processed Node.
1041 template <typename Node, typename... Args>
AddProcessedNode(Entry & entry,Args &&...args)1042 Id AddProcessedNode(Entry& entry, Args&&... args) {
1043 const Id id = GetIdForEntry(entry);
1044 graph_.Set<Node>(id, std::forward<Args>(args)...);
1045 return id;
1046 }
1047
AddNamedTypeNode(Id id)1048 void AddNamedTypeNode(Id id) {
1049 result_.named_type_ids.push_back(id);
1050 }
1051
1052 Graph& graph_;
1053 Id void_id_;
1054 Id variadic_id_;
1055 bool is_little_endian_binary_;
1056 const std::unique_ptr<Filter>& file_filter_;
1057 Types& result_;
1058 std::unordered_map<Dwarf_Off, Id> id_map_;
1059 std::vector<std::pair<Dwarf_Off, std::string>> scoped_names_;
1060 std::vector<std::pair<Dwarf_Off, size_t>> unresolved_symbol_specifications_;
1061
1062 // Current scope.
1063 Scope scope_;
1064 int version_;
1065 dwarf::Files files_;
1066 uint64_t language_;
1067 };
1068
Process(Handler & dwarf,bool is_little_endian_binary,const std::unique_ptr<Filter> & file_filter,Graph & graph)1069 Types Process(Handler& dwarf, bool is_little_endian_binary,
1070 const std::unique_ptr<Filter>& file_filter, Graph& graph) {
1071 Types result;
1072 const Id void_id = graph.Add<Special>(Special::Kind::VOID);
1073 const Id variadic_id = graph.Add<Special>(Special::Kind::VARIADIC);
1074 // TODO: Scope Processor to compilation units?
1075 Processor processor(graph, void_id, variadic_id, is_little_endian_binary,
1076 file_filter, result);
1077 for (auto& compilation_unit : dwarf.GetCompilationUnits()) {
1078 // Could fetch top-level attributes like compiler here.
1079 processor.ProcessCompilationUnit(compilation_unit);
1080 }
1081 processor.CheckUnresolvedIds();
1082 processor.ResolveSymbolSpecifications();
1083
1084 return result;
1085 }
1086
1087 } // namespace dwarf
1088 } // namespace stg
1089