1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <elf.h>
18
19 #include <fstream>
20 #include <iostream>
21 #include <map>
22 #include <optional>
23 #include <queue>
24 #include <string>
25
26 #include <sys/param.h>
27
28 #include "dwarf_constants.h"
29 #include "dwarf_info.h"
30 #include "elf_reader.h"
31
32 #include <berberis/base/algorithm.h>
33 #include <berberis/base/stringprintf.h>
34 #include <json/json.h>
35
36 namespace {
37
38 using berberis::StringPrintf;
39
40 constexpr const char* kKindArray = "array";
41 constexpr const char* kKindAtomic = "atomic";
42 constexpr const char* kKindConst = "const";
43 constexpr const char* kKindClass = "class";
44 constexpr const char* kKindFunction = "function";
45 constexpr const char* kKindIncomplete = "incomplete";
46 constexpr const char* kKindRestrict = "restrict";
47 constexpr const char* kKindStruct = "struct";
48 constexpr const char* kKindUnion = "union";
49 constexpr const char* kKindVolatile = "volatile";
50
51 class JsonNameValue {
52 public:
JsonNameValue(const std::string name,const Json::Value & value)53 JsonNameValue(const std::string name, const Json::Value& value) : name_(name), value_(value) {}
name() const54 const std::string& name() const { return name_; }
55
value() const56 const Json::Value& value() const { return value_; }
57
58 private:
59 std::string name_;
60 Json::Value value_;
61 };
62
63 class TypeInfo {
64 public:
TypeInfo(uint64_t id,const char * kind,const std::string & name,uint64_t size_bits)65 TypeInfo(uint64_t id, const char* kind, const std::string& name, uint64_t size_bits)
66 : id_(id), kind_(kind), name_(name), size_bits_(size_bits) {}
~TypeInfo()67 virtual ~TypeInfo() {}
68
id() const69 uint64_t id() const { return id_; }
70
kind() const71 const char* kind() const { return kind_; }
name() const72 const std::string& name() const { return name_; }
size() const73 uint64_t size() const { return size_bits_; }
74
75 virtual JsonNameValue GetJson() const = 0;
76
EqualsTo(const TypeInfo * other) const77 virtual bool EqualsTo(const TypeInfo* other) const {
78 // This is default implementation - should work for most TypeInfos
79 return kind_ == other->kind_ && size_bits_ == other->size_bits_ && name_ == other->name_;
80 }
81
82 // It usually is just a name but for classes and function it represents just
83 // the class or function name without 'class'/'func' prefix. Used to correctly
84 // resolve names for nested classes/unions/...
base_name() const85 virtual const std::string& base_name() const { return name(); }
86
87 private:
88 uint64_t id_;
89
90 protected:
91 const char* kind_;
92 std::string name_;
93 uint64_t size_bits_;
94
95 private:
96 DISALLOW_IMPLICIT_CONSTRUCTORS(TypeInfo);
97 };
98
usage(const char * argv0)99 void usage(const char* argv0) {
100 printf("usage: %s [--filter=<path_to_filter_file>] <path_to_elf_file>\n", argv0);
101 }
102
error(const char * fmt,...)103 __attribute__((__noreturn__)) void error(const char* fmt, ...) {
104 va_list ap;
105 va_start(ap, fmt);
106 vfprintf(stderr, fmt, ap);
107 va_end(ap);
108 fprintf(stderr, "\n");
109 exit(1);
110 }
111
warning(const char * fmt,...)112 void warning(const char* fmt, ...) {
113 va_list ap;
114 va_start(ap, fmt);
115 vfprintf(stderr, fmt, ap);
116 va_end(ap);
117 fprintf(stderr, "\n");
118 }
119
120 // TODO: This method does not provide necessary guarantees for being able to
121 // compare anonymous types by name.
122 //
123 // * There are number of situation where a type does not have a name
124 // * 1. There are anonymous function pointers
125 // * 2. Unnamed unions and structs inside other unions or structs
126 // The current approach is to use global counter.
127 //
128 // Note that there is no guarantee that these names are going to be same for
129 // a library compiled on different architectures.
GenerateGlobalAnonName()130 std::string GenerateGlobalAnonName() {
131 static size_t counter = 0;
132 return StringPrintf("#%zd", ++counter);
133 }
134
135 class TypeInfoFunction : public TypeInfo {
136 public:
TypeInfoFunction(uint64_t id,const std::string & name,const std::string & base_name)137 TypeInfoFunction(uint64_t id, const std::string& name, const std::string& base_name)
138 : TypeInfo(id, kKindFunction, name, 0),
139 base_name_(base_name),
140 has_variadic_args_(false),
141 is_virtual_method_(false) {}
142
~TypeInfoFunction()143 virtual ~TypeInfoFunction() {}
144
SetReturnType(const std::string & return_type)145 void SetReturnType(const std::string& return_type) { return_type_ = return_type; }
146
SetHasVariadicArgs(bool has_variadic_args)147 void SetHasVariadicArgs(bool has_variadic_args) { has_variadic_args_ = has_variadic_args; }
148
SetCallingConvention(const std::string & calling_convention)149 void SetCallingConvention(const std::string& calling_convention) {
150 calling_convention_ = calling_convention;
151 }
152
AddParam(const std::string & param_name)153 void AddParam(const std::string& param_name) { params_.push_back(param_name); }
154
EqualsTo(const TypeInfo *) const155 virtual bool EqualsTo(const TypeInfo*) const override {
156 // This method is not applicable for function types.
157 return false;
158 }
159
GetJson() const160 virtual JsonNameValue GetJson() const override {
161 Json::Value obj(Json::objectValue);
162
163 obj["has_variadic_args"] = has_variadic_args_;
164 obj["is_virtual_method"] = is_virtual_method_;
165 obj["kind"] = kind_;
166 Json::Value params_array(Json::arrayValue);
167 for (const auto& param : params_) {
168 params_array.append(param);
169 }
170 obj["params"] = params_array;
171 obj["return_type"] = return_type_;
172 obj["size"] = Json::UInt64(size_bits_);
173
174 if (!calling_convention_.empty()) {
175 obj["calling_convention"] = calling_convention_;
176 }
177
178 return JsonNameValue(name_, obj);
179 }
180
base_name() const181 virtual const std::string& base_name() const override { return base_name_; }
182
183 private:
184 std::string base_name_;
185 bool has_variadic_args_;
186 bool is_virtual_method_;
187 std::string return_type_;
188 std::string calling_convention_;
189 std::vector<std::string> params_;
190 };
191
192 enum class ReferenceType { pointer, reference, rvalue_reference };
193
194 class TypeInfoReference : public TypeInfo {
195 public:
TypeInfoReference(uint64_t id,const char * kind,const std::string & name,uint64_t size_bits,const std::string & pointee_type)196 TypeInfoReference(uint64_t id,
197 const char* kind,
198 const std::string& name,
199 uint64_t size_bits,
200 const std::string& pointee_type)
201 : TypeInfo(id, kind, name, size_bits), pointee_type_(pointee_type) {}
202
~TypeInfoReference()203 virtual ~TypeInfoReference() {}
204
GetJson() const205 virtual JsonNameValue GetJson() const override {
206 Json::Value obj(Json::objectValue);
207
208 obj["kind"] = kind_;
209 obj["pointee_type"] = pointee_type_;
210 obj["size"] = Json::UInt64(size_bits_);
211
212 return JsonNameValue(name_, obj);
213 }
214
215 private:
216 std::string pointee_type_;
217 };
218
219 class TypeInfoModifier : public TypeInfo {
220 public:
TypeInfoModifier(uint64_t id,const char * kind,const std::string & name,uint64_t size_bits,const std::string & base_type)221 TypeInfoModifier(uint64_t id,
222 const char* kind,
223 const std::string& name,
224 uint64_t size_bits,
225 const std::string& base_type)
226 : TypeInfo(id, kind, name, size_bits), base_type_(base_type) {}
227
~TypeInfoModifier()228 virtual ~TypeInfoModifier() {}
229
GetJson() const230 virtual JsonNameValue GetJson() const override {
231 Json::Value obj(Json::objectValue);
232
233 obj["kind"] = kind_;
234 obj["base_type"] = base_type_;
235 obj["size"] = Json::UInt64(size_bits_);
236
237 return JsonNameValue(name_, obj);
238 }
239
240 private:
241 std::string base_type_;
242 };
243
244 class TypeInfoIncomplete : public TypeInfo {
245 public:
TypeInfoIncomplete(uint64_t id,const std::string & name,const std::string & base_name)246 TypeInfoIncomplete(uint64_t id, const std::string& name, const std::string& base_name)
247 : TypeInfo(id, kKindIncomplete, name, 0), base_name_(base_name) {}
~TypeInfoIncomplete()248 virtual ~TypeInfoIncomplete() {}
249
GetJson() const250 virtual JsonNameValue GetJson() const override {
251 Json::Value obj(Json::objectValue);
252
253 obj["kind"] = kind_;
254
255 return JsonNameValue(name_, obj);
256 }
257
base_name() const258 virtual const std::string& base_name() const override { return base_name_; }
259
260 private:
261 std::string base_name_;
262 };
263
264 class TypeInfoVoid : public TypeInfoIncomplete {
265 public:
TypeInfoVoid()266 TypeInfoVoid() : TypeInfoIncomplete(0, "void", "void") {}
~TypeInfoVoid()267 virtual ~TypeInfoVoid() {}
268 };
269
270 class TypeInfoBase : public TypeInfo {
271 public:
TypeInfoBase(uint64_t id,const std::string & name,uint64_t size_bits,const char * kind,bool is_signed)272 TypeInfoBase(uint64_t id,
273 const std::string& name,
274 uint64_t size_bits,
275 const char* kind,
276 bool is_signed)
277 : TypeInfo(id, kind, name, size_bits), is_signed_(is_signed) {}
~TypeInfoBase()278 virtual ~TypeInfoBase() {}
279
GetJson() const280 virtual JsonNameValue GetJson() const override {
281 Json::Value obj(Json::objectValue);
282
283 obj["kind"] = kind_;
284 obj["signed"] = is_signed_;
285 obj["size"] = Json::UInt64(size_bits_);
286
287 return JsonNameValue(name_, obj);
288 }
289
290 private:
291 bool is_signed_;
292 };
293
294 class TypeInfoArray : public TypeInfo {
295 public:
TypeInfoArray(uint64_t id,const std::string & name,uint64_t size_bits,const std::string & element_type)296 TypeInfoArray(uint64_t id,
297 const std::string& name,
298 uint64_t size_bits,
299 const std::string& element_type)
300 : TypeInfo(id, kKindArray, name, size_bits), element_type_(element_type) {}
~TypeInfoArray()301 virtual ~TypeInfoArray() {}
302
GetJson() const303 virtual JsonNameValue GetJson() const override {
304 Json::Value obj(Json::objectValue);
305
306 obj["kind"] = kind_;
307 obj["element_type"] = element_type_;
308 obj["size"] = Json::UInt64(size_bits_);
309
310 return JsonNameValue(name_, obj);
311 }
312
313 private:
314 std::string element_type_;
315 };
316
317 class TypeInfoClassField {
318 public:
TypeInfoClassField()319 TypeInfoClassField() : offset_bits_(0) {}
TypeInfoClassField(const std::string & name,const std::string & type_name,uint32_t offset_bits)320 TypeInfoClassField(const std::string& name, const std::string& type_name, uint32_t offset_bits)
321 : name_(name), type_name_(type_name), offset_bits_(offset_bits) {}
322
323 TypeInfoClassField(TypeInfoClassField&& that) = default;
324 TypeInfoClassField& operator=(TypeInfoClassField&& that) = default;
325
name() const326 const std::string& name() const { return name_; }
type_name() const327 const std::string& type_name() const { return type_name_; }
offset_bits() const328 uint64_t offset_bits() const { return offset_bits_; }
329
330 private:
331 std::string name_;
332 std::string type_name_;
333 uint64_t offset_bits_;
334
335 friend bool operator!=(const TypeInfoClassField& one, const TypeInfoClassField& two);
336
337 DISALLOW_COPY_AND_ASSIGN(TypeInfoClassField);
338 };
339
operator !=(const TypeInfoClassField & one,const TypeInfoClassField & two)340 bool operator!=(const TypeInfoClassField& one, const TypeInfoClassField& two) {
341 return one.offset_bits_ != two.offset_bits_ || one.name_ != two.name_ /* ||
342 one.type_name_ != two.type_name_*/
343 ;
344 }
345
346 class TypeInfoClass : public TypeInfo {
347 public:
TypeInfoClass(uint64_t id,const char * kind,const std::string & name,uint64_t size_bits,const std::string & base_name)348 TypeInfoClass(uint64_t id,
349 const char* kind,
350 const std::string& name,
351 uint64_t size_bits,
352 const std::string& base_name)
353 : TypeInfo(id, kind, name, size_bits), base_name_(base_name) {}
~TypeInfoClass()354 virtual ~TypeInfoClass() {}
355
AddField(const std::string & name,const std::string & type_name,uint32_t offset_bits)356 void AddField(const std::string& name, const std::string& type_name, uint32_t offset_bits) {
357 fields_.push_back(TypeInfoClassField(name, type_name, offset_bits));
358 }
359
AddInheritance(const std::string & name)360 void AddInheritance(const std::string& name) { inheritance_types_.push_back(name); }
361
EqualsTo(const TypeInfo * other) const362 virtual bool EqualsTo(const TypeInfo* other) const override {
363 if (!TypeInfo::EqualsTo(other)) {
364 return false;
365 }
366
367 auto other_class = static_cast<const TypeInfoClass*>(other);
368
369 if (fields_.size() != other_class->fields_.size()) {
370 return false;
371 }
372
373 for (size_t i = 0; i < fields_.size(); ++i) {
374 if (fields_[i] != other_class->fields_[i]) {
375 return false;
376 }
377 }
378
379 return true;
380 }
381
GetJson() const382 virtual JsonNameValue GetJson() const override {
383 Json::Value fields(Json::arrayValue);
384
385 for (auto& field : fields_) {
386 Json::Value field_obj(Json::objectValue);
387 field_obj["name"] = field.name();
388 field_obj["offset"] = Json::UInt64(field.offset_bits());
389 field_obj["type"] = field.type_name();
390 fields.append(field_obj);
391 }
392
393 Json::Value inheritance_types_array(Json::arrayValue);
394 for (const auto& inheritance_type : inheritance_types_) {
395 inheritance_types_array.append(inheritance_type);
396 }
397
398 Json::Value obj(Json::objectValue);
399
400 obj["inheritance"] = inheritance_types_array;
401 obj["fields"] = fields;
402 obj["kind"] = kind_;
403 obj["size"] = Json::UInt64(size_bits_);
404
405 return JsonNameValue(name_, obj);
406 }
407
base_name() const408 virtual const std::string& base_name() const override { return base_name_; }
409
410 private:
411 std::string base_name_;
412 std::vector<TypeInfoClassField> fields_;
413 std::vector<std::string> inheritance_types_;
414 };
415
416 // Returns nullptr for 'void'
GetAtTypeDie(const nogrod::DwarfDie * die,const nogrod::DwarfInfo * info)417 const nogrod::DwarfDie* GetAtTypeDie(const nogrod::DwarfDie* die, const nogrod::DwarfInfo* info) {
418 auto offset = die->GetUint64Attribute(DW_AT_type);
419 if (offset) {
420 auto target_die = info->GetDieByOffset(offset.value());
421 if (target_die == nullptr) {
422 error("Couldn't find die for type of die at offset 0x%" PRIx64 " (DW_AT_type=0x%" PRIx64 ")",
423 die->offset(),
424 offset.value());
425 }
426
427 return target_die;
428 }
429
430 // If there is no DW_AT_type check DW_AT_specification
431 auto specification_offset = die->GetUint64Attribute(DW_AT_specification);
432 if (!specification_offset) { // this is 'void'
433 return nullptr;
434 }
435
436 auto specification_die = info->GetDieByOffset(specification_offset.value());
437 if (specification_die == nullptr) {
438 error("Couldn't find die for specification of die at offset 0x%" PRIx64
439 " (DW_AT_type=0x%" PRIx64 ")",
440 die->offset(),
441 specification_offset.value());
442 }
443
444 return GetAtTypeDie(specification_die, info);
445 }
446
ParseBaseType(const nogrod::DwarfDie * die)447 std::unique_ptr<TypeInfo> ParseBaseType(const nogrod::DwarfDie* die) {
448 auto encoding_attr = die->GetUint64Attribute(DW_AT_encoding);
449 if (!encoding_attr) {
450 error("Couldn't find DW_AT_encoding for DW_TAG_base_type at offset 0x%" PRIx64, die->offset());
451 }
452 uint64_t encoding = encoding_attr.value();
453
454 auto size_attr = die->GetUint64Attribute(DW_AT_byte_size);
455 uint64_t size = 0;
456
457 if ((encoding == DW_ATE_signed_char || encoding == DW_ATE_unsigned_char) && !size_attr) {
458 size = 1;
459 } else {
460 if (!size_attr) {
461 error("Couldn't find DW_AT_byte_size for DW_TAG_base_type at offset 0x%" PRIx64,
462 die->offset());
463 }
464 size = size_attr.value();
465 }
466
467 if (size > 128 || !powerof2(size)) {
468 error("Unsupported size %" PRId64 " for DW_TAG_base_type at offset 0x%" PRIx64
469 " - must be no greater than 128 and a power of 2",
470 size,
471 die->offset());
472 }
473
474 bool is_signed = false;
475 const char* kind;
476 const char* prefix;
477
478 switch (encoding) {
479 case DW_ATE_signed:
480 kind = "int";
481 prefix = "int";
482 is_signed = true;
483 break;
484 case DW_ATE_unsigned:
485 case DW_ATE_boolean:
486 kind = "int";
487 prefix = "unsigned int";
488 is_signed = false;
489 break;
490 case DW_ATE_float:
491 kind = "float";
492 prefix = "float";
493 is_signed = true;
494 break;
495 case DW_ATE_signed_char:
496 kind = "char";
497 prefix = "char";
498 is_signed = true;
499 break;
500 case DW_ATE_unsigned_char:
501 case DW_ATE_UTF:
502 kind = "char";
503 prefix = "unsigned char";
504 is_signed = false;
505 break;
506 default:
507 error("Unsupported DW_AT_encoding=0x%" PRIx64 " for DW_TAG_base_type at offset 0x%" PRIx64,
508 encoding,
509 die->offset());
510 }
511
512 std::string name = StringPrintf("%s%" PRId64, prefix, size * CHAR_BIT);
513
514 return std::unique_ptr<TypeInfoBase>(
515 new TypeInfoBase(die->offset(), name, size * CHAR_BIT, kind, is_signed));
516 }
517
ParseEnumType(const nogrod::DwarfDie * die)518 std::unique_ptr<TypeInfo> ParseEnumType(const nogrod::DwarfDie* die) {
519 auto size_attr = die->GetUint64Attribute(DW_AT_byte_size);
520 if (!size_attr) {
521 error("Couldn't find DW_AT_byte_size for DW_TAG_base_type at offset 0x%" PRIx64, die->offset());
522 }
523
524 uint64_t size = size_attr.value() * CHAR_BIT;
525
526 std::string name = StringPrintf("%s%" PRId64, "unsigned int", size);
527
528 return std::unique_ptr<TypeInfoBase>(new TypeInfoBase(die->offset(), name, size, "int", false));
529 }
530
GetDieName(const nogrod::DwarfDie * die)531 std::optional<std::string> GetDieName(const nogrod::DwarfDie* die) {
532 auto die_name = die->GetStringAttribute(DW_AT_linkage_name);
533
534 if (!die_name) {
535 die_name = die->GetStringAttribute(DW_AT_name);
536 }
537
538 return die_name;
539 }
540
541 const TypeInfo* ParseDie(const nogrod::DwarfDie* start,
542 const nogrod::DwarfDie* referenced_by,
543 const nogrod::DwarfInfo* dwarf_info,
544 std::unordered_map<uint64_t, std::unique_ptr<TypeInfo>>* types);
545
ParseClass(const char * kind,const nogrod::DwarfDie * die,const nogrod::DwarfDie * referenced_by,const nogrod::DwarfInfo * dwarf_info,std::unordered_map<uint64_t,std::unique_ptr<TypeInfo>> * types)546 const TypeInfo* ParseClass(const char* kind,
547 const nogrod::DwarfDie* die,
548 const nogrod::DwarfDie* referenced_by,
549 const nogrod::DwarfInfo* dwarf_info,
550 std::unordered_map<uint64_t, std::unique_ptr<TypeInfo>>* types) {
551 auto die_name = GetDieName(die);
552 auto die_tag = die->tag();
553 // Use typedef name in case if this class is part of
554 // "typedef struct { .. } blah;" declaration
555 if (!die_name && referenced_by != nullptr && referenced_by->tag() == DW_TAG_typedef) {
556 die_name = GetDieName(referenced_by);
557 die_tag = referenced_by->tag();
558 }
559
560 std::string class_name;
561 if (die_name) {
562 class_name = die_name.value();
563 } else {
564 class_name = GenerateGlobalAnonName();
565 }
566
567 auto parent_die = die->parent();
568
569 if (parent_die->tag() == DW_TAG_structure_type || parent_die->tag() == DW_TAG_class_type ||
570 parent_die->tag() == DW_TAG_union_type) {
571 const TypeInfo* parent_type_info = ParseDie(parent_die, nullptr, dwarf_info, types);
572 CHECK(parent_type_info != nullptr);
573 class_name = StringPrintf("%s::%s", parent_type_info->base_name().c_str(), class_name.c_str());
574 }
575
576 while (parent_die->tag() == DW_TAG_namespace) {
577 // Note: if type placed in anonymous namespace is used with template, e.g.,
578 // "icu_65::MaybeStackArray<icu_65::(anonymous namespace)::LocaleAndWeight, 20>"
579 // then string "(anonymous namespace)" is used by clang. But the namespace object
580 // itself doesn't have a name. Assign name "(anonymous namespace)" for consistency.
581 static constexpr const char* kAnonymousNamespaceName = "(anonymous namespace)";
582 auto parent_die_optional_name = GetDieName(parent_die);
583 const char* parent_die_name = parent_die_optional_name
584 ? parent_die_optional_name.value().c_str()
585 : kAnonymousNamespaceName;
586 class_name = StringPrintf("%s::%s", parent_die_name, class_name.c_str());
587 parent_die = parent_die->parent();
588 }
589
590 std::string name = StringPrintf("%s %s", kind, class_name.c_str());
591
592 // TODO: align????
593 bool incomplete = die->GetBoolAttributeOr(DW_AT_declaration, false);
594
595 if (incomplete) {
596 if (!die_name) {
597 warning("The incomplete type at offset 0x%" PRIx64 " referenced by \"%s\"@0x%" PRIx64
598 " is anonymous (ignoring)",
599 die->offset(),
600 referenced_by != nullptr ? GetDieName(referenced_by).value_or("<no name>").c_str()
601 : "<null>",
602 referenced_by != nullptr ? referenced_by->offset() : 0);
603 }
604
605 std::unique_ptr<TypeInfoIncomplete> incomplete_type_holder(
606 new TypeInfoIncomplete(die->offset(), name, class_name));
607 TypeInfoIncomplete* result = incomplete_type_holder.get();
608 (*types)[die->offset()] = std::move(incomplete_type_holder);
609 // An incomplete struct - find other dies by name and parse them too.
610 // This should solve the case where actual type is declared in another
611 // compilation unit. We could get some false positives - this is ok.
612 std::vector<const nogrod::DwarfDie*> dies = dwarf_info->FindDiesByName(class_name);
613 if (dies.empty()) {
614 warning(
615 "Couldn't find dies by name \"%s\" for incomplete type at the offset 0x%x (likely "
616 "because it had no name) - ignoring",
617 class_name.c_str(),
618 result->id());
619 }
620
621 for (auto namefellow_die : dies) {
622 // Limit to the tag of the original incomplete type
623 if (namefellow_die->tag() != die_tag) {
624 continue;
625 }
626 ParseDie(namefellow_die, nullptr, dwarf_info, types);
627 }
628 return result;
629 }
630
631 auto size = die->GetUint64Attribute(DW_AT_byte_size);
632
633 if (!size) {
634 error("No DW_AT_byte_size specified for type at offset 0x%" PRIx64, die->offset());
635 }
636
637 std::unique_ptr<TypeInfoClass> type_info_holder(
638 new TypeInfoClass(die->offset(), kind, name, size.value() * CHAR_BIT, class_name));
639 TypeInfoClass* type_info = type_info_holder.get();
640 (*types)[die->offset()] = std::move(type_info_holder);
641
642 const auto& children = die->children();
643 for (auto child : children) {
644 if (child->tag() == DW_TAG_subprogram) {
645 // TODO: is this correct way to handle these?
646 // Current implementation ignores member functions - we are going to do
647 // the same
648 continue;
649 }
650
651 // Skip nested types - they are parsed only if referenced by a DW_AT_member (see below).
652 if (child->tag() == DW_TAG_structure_type || child->tag() == DW_TAG_union_type ||
653 child->tag() == DW_TAG_class_type || child->tag() == DW_TAG_enumeration_type ||
654 child->tag() == DW_TAG_typedef) {
655 continue;
656 }
657
658 if (child->tag() == DW_TAG_inheritance) {
659 auto inheritance_die = GetAtTypeDie(child, dwarf_info);
660 CHECK(inheritance_die != nullptr); // voids are not allowed here.
661 auto inheritance_type_info = ParseDie(inheritance_die, die, dwarf_info, types);
662 type_info->AddInheritance(inheritance_type_info->name());
663 continue;
664 }
665
666 if (child->tag() == DW_TAG_template_type_parameter ||
667 child->tag() == DW_TAG_template_value_parameter ||
668 child->tag() == DW_TAG_GNU_template_parameter_pack ||
669 child->tag() == DW_TAG_GNU_template_template_param) {
670 // These types do not affect struct layout unless they are used
671 // for members. This is why we should probably ignore them here.
672 // auto type_die = GetAtTypeDie(child, dwarf_info);
673 // ParseDie(type_die, dwarf_info, types);
674 continue;
675 }
676
677 if (child->tag() != DW_TAG_member) { // see if this is the case...
678 error("Unexpected tag 0x%x for the die at offset 0x%" PRIx64 ", expected DW_TAG_member",
679 child->tag(),
680 child->offset());
681 }
682
683 if (child->GetBoolAttributeOr(DW_AT_external, false)) {
684 // DW_AT_external is dwarvish for static member
685 continue;
686 }
687
688 auto member_die = GetAtTypeDie(child, dwarf_info);
689 CHECK(member_die != nullptr);
690 auto member_type_info = ParseDie(member_die, die, dwarf_info, types);
691
692 auto name = child->GetStringAttribute(DW_AT_name);
693
694 // Nested unions and structs may not have a name.
695 if (!name && member_die->tag() != DW_TAG_union_type &&
696 member_die->tag() != DW_TAG_structure_type) {
697 error("DW_AT_name is not set for the die at offset 0x%" PRIx64, child->offset());
698 }
699
700 std::string type_name = member_type_info->name();
701
702 // TODO: handle bit offset
703 auto offset = child->GetUint64AttributeOr(DW_AT_data_member_location, 0);
704 type_info->AddField(name.value_or(""), type_name, offset * CHAR_BIT);
705 }
706
707 // is_polymorphic??
708
709 return type_info;
710 }
711
ParseFunction(const nogrod::DwarfDie * die,const nogrod::DwarfInfo * dwarf_info,std::unordered_map<uint64_t,std::unique_ptr<TypeInfo>> * types)712 const TypeInfo* ParseFunction(const nogrod::DwarfDie* die,
713 const nogrod::DwarfInfo* dwarf_info,
714 std::unordered_map<uint64_t, std::unique_ptr<TypeInfo>>* types) {
715 auto die_name = GetDieName(die);
716 if (!die_name && die->tag() != DW_TAG_subroutine_type) {
717 error("Couldn't resolve name for die at offset=0x%" PRIx64, die->offset());
718 }
719
720 std::string function_name = die_name ? die_name.value() : GenerateGlobalAnonName();
721
722 std::string name = StringPrintf("func %s", function_name.c_str());
723
724 std::unique_ptr<TypeInfoFunction> type_info_holder(
725 new TypeInfoFunction(die->offset(), name, function_name));
726 TypeInfoFunction* type_info = type_info_holder.get();
727 (*types)[die->offset()] = std::move(type_info_holder);
728
729 auto return_die = GetAtTypeDie(die, dwarf_info);
730 type_info->SetReturnType(ParseDie(return_die, die, dwarf_info, types)->name());
731
732 // This is special case of hard-fp (AAPCS_VFP)
733 if (die->GetUint64AttributeOr(DW_AT_calling_convention, 0) == DW_CC_LLVM_AAPCS_VFP) {
734 type_info->SetCallingConvention("aapcs-vfp");
735 }
736
737 // parse parameters
738 const auto& children = die->children();
739 for (auto child : children) {
740 if (child->tag() == DW_TAG_formal_parameter) {
741 auto param_die = GetAtTypeDie(child, dwarf_info);
742 // presumably we cannot have void formal parameter... DW_AT_type is
743 // required here
744 CHECK(param_die != nullptr); // FAIL_IF?
745 type_info->AddParam(ParseDie(param_die, die, dwarf_info, types)->name());
746 } else if (child->tag() == DW_TAG_unspecified_parameters) {
747 type_info->SetHasVariadicArgs(true);
748 break; // No more formal_parameters after this. TODO: replace with stricter check maybe?
749 }
750 }
751
752 return type_info;
753 }
754
ParseReference(const ReferenceType reference_type,const nogrod::DwarfDie * die,const nogrod::DwarfInfo * dwarf_info,std::unordered_map<uint64_t,std::unique_ptr<TypeInfo>> * types)755 std::unique_ptr<TypeInfo> ParseReference(
756 const ReferenceType reference_type,
757 const nogrod::DwarfDie* die,
758 const nogrod::DwarfInfo* dwarf_info,
759 std::unordered_map<uint64_t, std::unique_ptr<TypeInfo>>* types) {
760 auto referenced_die = GetAtTypeDie(die, dwarf_info);
761 std::string referenced_type_name = ParseDie(referenced_die, die, dwarf_info, types)->name();
762 std::string name = referenced_type_name;
763 const char* kind = nullptr;
764
765 switch (reference_type) {
766 case ReferenceType::pointer:
767 name += "*";
768 kind = "pointer";
769 break;
770 case ReferenceType::reference:
771 name += "&";
772 kind = "reference";
773 break;
774 case ReferenceType::rvalue_reference:
775 name += "&&";
776 kind = "rvalue_reference";
777 break;
778 }
779
780 return std::make_unique<TypeInfoReference>(
781 die->offset(),
782 kind,
783 name,
784 die->compilation_unit_header()->address_size() * CHAR_BIT,
785 referenced_type_name);
786 }
787
ParseModifier(const char * kind,const nogrod::DwarfDie * die,const nogrod::DwarfInfo * dwarf_info,std::unordered_map<uint64_t,std::unique_ptr<TypeInfo>> * types)788 std::unique_ptr<TypeInfo> ParseModifier(
789 const char* kind,
790 const nogrod::DwarfDie* die,
791 const nogrod::DwarfInfo* dwarf_info,
792 std::unordered_map<uint64_t, std::unique_ptr<TypeInfo>>* types) {
793 // The only field we need is base_type
794 auto base_die = GetAtTypeDie(die, dwarf_info);
795 auto base_type = ParseDie(base_die, die, dwarf_info, types);
796 std::string base_type_name = base_type->name();
797 uint64_t base_type_size = base_type->size();
798
799 std::string name = StringPrintf("%s %s", base_type_name.c_str(), kind);
800
801 return std::make_unique<TypeInfoModifier>(
802 die->offset(), kind, name, base_type_size, base_type_name);
803 }
804
ParseArray(const nogrod::DwarfDie * die,const nogrod::DwarfInfo * dwarf_info,std::unordered_map<uint64_t,std::unique_ptr<TypeInfo>> * types)805 std::unique_ptr<TypeInfo> ParseArray(
806 const nogrod::DwarfDie* die,
807 const nogrod::DwarfInfo* dwarf_info,
808 std::unordered_map<uint64_t, std::unique_ptr<TypeInfo>>* types) {
809 uint64_t count = 0;
810
811 auto element_die = GetAtTypeDie(die, dwarf_info);
812 if (element_die == nullptr) {
813 error("'void' cannot be element type of an array (die at offset 0x%" PRIx64 ")", die->offset());
814 }
815
816 auto element_type = ParseDie(element_die, die, dwarf_info, types);
817
818 auto children = die->children();
819
820 std::string name = element_type->name();
821
822 for (auto child : die->children()) {
823 if (child->tag() != DW_TAG_subrange_type) {
824 error("Unexpected tag 0x%x for the die at offset 0x%" PRIx64
825 ", expected DW_TAG_subrange_type",
826 child->tag(),
827 child->offset());
828 }
829
830 auto count_attr = child->GetUint64Attribute(DW_AT_count);
831 if (count_attr) {
832 count = count_attr.value();
833 } else { // use DW_AT_upper_bound/lower_bound
834 count = child->GetUint64AttributeOr(DW_AT_upper_bound, 0) -
835 child->GetUint64AttributeOr(DW_AT_lower_bound, 0) + 1;
836 }
837
838 name += StringPrintf("[%" PRId64 "]", count);
839 }
840
841 return std::make_unique<TypeInfoArray>(
842 die->offset(), name, count * element_type->size(), element_type->name());
843 }
844
ParseUnspecifiedType(const nogrod::DwarfDie * die)845 std::unique_ptr<TypeInfo> ParseUnspecifiedType(const nogrod::DwarfDie* die) {
846 // The only unspecified_type we support is nullptr_t
847 auto die_name = GetDieName(die);
848 if (!die_name) {
849 error("Couldn't resolve name for die at offset=0x%" PRIx64, die->offset());
850 }
851
852 if (die_name.value() != "decltype(nullptr)") {
853 error("Unspecified type \"%s\" at offset 0x%" PRIx64
854 " is not supported "
855 "(the only supported unspecified type is nullptr_t)",
856 die_name.value().c_str(),
857 die->offset());
858 }
859
860 return std::make_unique<TypeInfoBase>(die->offset(), die_name.value(), 32, "nullptr_t", false);
861 }
862
ParseDie(const nogrod::DwarfDie * die,const nogrod::DwarfDie * referenced_by,const nogrod::DwarfInfo * dwarf_info,std::unordered_map<uint64_t,std::unique_ptr<TypeInfo>> * types)863 const TypeInfo* ParseDie(const nogrod::DwarfDie* die,
864 const nogrod::DwarfDie* referenced_by,
865 const nogrod::DwarfInfo* dwarf_info,
866 std::unordered_map<uint64_t, std::unique_ptr<TypeInfo>>* types) {
867 if (die == nullptr) {
868 auto it = types->find(0);
869 if (it != types->end()) {
870 return it->second.get();
871 } else {
872 std::unique_ptr<TypeInfo> void_type(new TypeInfoVoid());
873 TypeInfo* result = void_type.get();
874 (*types)[0] = std::move(void_type);
875 return result;
876 }
877 }
878
879 auto it = types->find(die->offset());
880 if (it != types->end()) {
881 return it->second.get();
882 }
883
884 std::unique_ptr<TypeInfo> type_info;
885
886 switch (die->tag()) {
887 case DW_TAG_subprogram:
888 case DW_TAG_subroutine_type:
889 case DW_TAG_label:
890 return ParseFunction(die, dwarf_info, types);
891 case DW_TAG_pointer_type:
892 case DW_TAG_ptr_to_member_type:
893 type_info = ParseReference(ReferenceType::pointer, die, dwarf_info, types);
894 break;
895 case DW_TAG_reference_type:
896 type_info = ParseReference(ReferenceType::reference, die, dwarf_info, types);
897 break;
898 case DW_TAG_rvalue_reference_type:
899 type_info = ParseReference(ReferenceType::rvalue_reference, die, dwarf_info, types);
900 break;
901 case DW_TAG_atomic_type:
902 type_info = ParseModifier(kKindAtomic, die, dwarf_info, types);
903 break;
904 case DW_TAG_const_type:
905 type_info = ParseModifier(kKindConst, die, dwarf_info, types);
906 break;
907 case DW_TAG_restrict_type:
908 type_info = ParseModifier(kKindRestrict, die, dwarf_info, types);
909 break;
910 case DW_TAG_volatile_type:
911 type_info = ParseModifier(kKindVolatile, die, dwarf_info, types);
912 break;
913 case DW_TAG_typedef: {
914 auto typedef_type = GetAtTypeDie(die, dwarf_info);
915 return ParseDie(typedef_type, die, dwarf_info, types);
916 }
917 case DW_TAG_structure_type:
918 return ParseClass(kKindStruct, die, referenced_by, dwarf_info, types);
919 case DW_TAG_class_type:
920 return ParseClass(kKindClass, die, referenced_by, dwarf_info, types);
921 case DW_TAG_union_type:
922 return ParseClass(kKindUnion, die, referenced_by, dwarf_info, types);
923 case DW_TAG_base_type:
924 type_info = ParseBaseType(die);
925 break;
926 case DW_TAG_enumeration_type:
927 type_info = ParseEnumType(die);
928 break;
929 case DW_TAG_unspecified_type:
930 type_info = ParseUnspecifiedType(die);
931 break;
932 case DW_TAG_array_type:
933 type_info = ParseArray(die, dwarf_info, types);
934 break;
935 default:
936 error("Unsupported die tag: 0x%x at the offset 0x%x", die->tag(), die->offset());
937 }
938
939 CHECK(type_info);
940
941 const TypeInfo* result = type_info.get();
942 (*types)[die->offset()] = std::move(type_info);
943 return result;
944 }
945
IsModifierType(const TypeInfo * type)946 bool IsModifierType(const TypeInfo* type) {
947 std::string kind = type->kind();
948 return kind == kKindConst || kind == kKindVolatile || kind == kKindRestrict;
949 }
950
IsArrayType(const TypeInfo * type)951 bool IsArrayType(const TypeInfo* type) {
952 return type->kind() == kKindArray;
953 }
954
warning_too_many_dies(const std::string & symbol_name,const std::vector<const nogrod::DwarfDie * > & dies)955 void warning_too_many_dies(const std::string& symbol_name,
956 const std::vector<const nogrod::DwarfDie*>& dies) {
957 std::string offsets;
958 for (auto die : dies) {
959 offsets += StringPrintf("0x%" PRIx64 " ", die->offset());
960 }
961
962 warning("Too many DIEs for %s - offsets=[ %s] - will consider only the first one",
963 symbol_name.c_str(),
964 offsets.c_str());
965 }
966
error_unsuccessful_dedup(const std::string & type_name,const std::vector<const TypeInfo * > & types)967 __attribute__((__noreturn__)) void error_unsuccessful_dedup(
968 const std::string& type_name,
969 const std::vector<const TypeInfo*>& types) {
970 std::string type_infos;
971 for (auto type : types) {
972 type_infos += StringPrintf("(id=0x%" PRIx64 ", kind=\'%s\', name='%s', size=%" PRId64 ") ",
973 type->id(),
974 type->kind(),
975 type->name().c_str(),
976 type->size());
977 }
978
979 error("Unsuccessful dedup for %s, number of types left=%d, type_infos=[%s]",
980 type_name.c_str(),
981 types.size(),
982 type_infos.c_str());
983 }
984
FindBestDie(const nogrod::DwarfInfo * dwarf_info,const std::string & name)985 const nogrod::DwarfDie* FindBestDie(const nogrod::DwarfInfo* dwarf_info, const std::string& name) {
986 std::vector<const nogrod::DwarfDie*> dies = dwarf_info->FindDiesByName(name);
987 if (dies.empty()) {
988 return nullptr;
989 }
990
991 const nogrod::DwarfDie* variable_die = nullptr;
992 const nogrod::DwarfDie* subprogram_die = nullptr;
993 const nogrod::DwarfDie* label_die = nullptr;
994
995 for (const auto die : dies) {
996 if (die->tag() == DW_TAG_variable) {
997 if (variable_die != nullptr) {
998 warning("Multiple variable DIEs for %s - will consider only the first one", name.c_str());
999 } else {
1000 variable_die = die;
1001 }
1002 } else if (die->tag() == DW_TAG_subprogram) {
1003 if (subprogram_die != nullptr) {
1004 warning("Multiple subprogram DIEs for %s - will consider only the first one", name.c_str());
1005 } else {
1006 subprogram_die = die;
1007 }
1008 } else if (die->tag() == DW_TAG_label) {
1009 if (label_die != nullptr) {
1010 warning("Multiple label DIEs for %s - will consider only the first one", name.c_str());
1011 } else {
1012 label_die = die;
1013 }
1014 }
1015 }
1016
1017 if (variable_die != nullptr) {
1018 return variable_die;
1019 }
1020 if (subprogram_die != nullptr) {
1021 return subprogram_die;
1022 }
1023 if (label_die != nullptr) {
1024 return label_die;
1025 }
1026
1027 if (dies.size() > 1) {
1028 warning_too_many_dies(name, dies);
1029 }
1030 return dies[0];
1031 }
1032
ReadFileToStringVector(const char * name,std::vector<std::string> * lines)1033 bool ReadFileToStringVector(const char* name, std::vector<std::string>* lines) {
1034 std::ifstream fs(name);
1035 if (!fs.is_open()) {
1036 return false;
1037 }
1038 std::string line;
1039 while (std::getline(fs, line)) {
1040 lines->push_back(line);
1041 }
1042 return true;
1043 }
1044
1045 } // namespace
1046
main(int argc,const char ** argv)1047 int main(int argc, const char** argv) {
1048 const char* elf_file_name = nullptr;
1049 const char* filter_file_name = nullptr;
1050
1051 if (argc == 2) {
1052 elf_file_name = argv[1];
1053 } else if (argc == 3 && strncmp(argv[1], "--filter=", 9) == 0) {
1054 filter_file_name = argv[1] + 9;
1055 elf_file_name = argv[2];
1056 } else {
1057 usage(argv[0]);
1058 return 0;
1059 }
1060
1061 std::string error_msg;
1062
1063 std::unique_ptr<nogrod::ElfFile> elf_file = nogrod::ElfFile::Load(elf_file_name, &error_msg);
1064
1065 if (!elf_file) {
1066 error("Error loading elf-file \"%s\": %s", elf_file_name, error_msg.c_str());
1067 }
1068
1069 std::vector<std::string> names;
1070
1071 if (filter_file_name) {
1072 if (!ReadFileToStringVector(filter_file_name, &names)) {
1073 error("Error reading symbols from \"%s\"", filter_file_name);
1074 }
1075 } else {
1076 if (!elf_file->ReadExportedSymbols(&names, &error_msg)) {
1077 error("Error reading exported symbols from \"%s\": %s", elf_file_name, error_msg.c_str());
1078 }
1079 }
1080
1081 std::unique_ptr<nogrod::DwarfInfo> dwarf_info = elf_file->ReadDwarfInfo(&error_msg);
1082 if (!dwarf_info) {
1083 error("Error loading dwarf_info from \"%s\": %s", elf_file_name, error_msg.c_str());
1084 }
1085
1086 // map: type id (offset) -> type
1087 std::unordered_map<uint64_t, std::unique_ptr<TypeInfo>> types;
1088
1089 // map: symbol name -> type id (offset)
1090 std::map<std::string, uint64_t> symbols;
1091
1092 for (const auto& name : names) {
1093 const nogrod::DwarfDie* die = FindBestDie(dwarf_info.get(), name);
1094 if (die == nullptr) {
1095 warning("Couldn't find compatible DIE for %s - skipping...", name.c_str());
1096 continue;
1097 }
1098
1099 if (die->tag() == DW_TAG_subprogram || die->tag() == DW_TAG_label) {
1100 const TypeInfo* subprogram_type = ParseDie(die, nullptr, dwarf_info.get(), &types);
1101 symbols[name] = subprogram_type->id();
1102 } else if (die->tag() == DW_TAG_variable) {
1103 auto variable_type_die = GetAtTypeDie(die, dwarf_info.get());
1104 const TypeInfo* variable_type = ParseDie(variable_type_die, die, dwarf_info.get(), &types);
1105 symbols[name] = variable_type->id();
1106 } else { // Something else
1107 // TODO(random-googler): parse something else meaningfully...
1108 ParseDie(die, nullptr, dwarf_info.get(), &types);
1109 }
1110 }
1111
1112 Json::Value root(Json::objectValue);
1113 Json::Value symbols_json(Json::objectValue);
1114 for (const auto& symbol : symbols) {
1115 auto& type_name = types[symbol.second]->name();
1116 symbols_json[symbol.first]["type"] = type_name;
1117 }
1118
1119 root["symbols"] = symbols_json;
1120
1121 // Sort types by name.
1122 std::map<std::string, std::vector<const TypeInfo*>> types_by_name;
1123 for (auto& elem : types) {
1124 const TypeInfo* type_info = elem.second.get();
1125 const std::string& name = type_info->name();
1126 std::vector<const TypeInfo*>& types_list = types_by_name[name];
1127 // Remove duplicate types.
1128 bool type_info_exists = berberis::ContainsIf(
1129 types_list, [type_info](const TypeInfo* element) { return element->EqualsTo(type_info); });
1130 if (!type_info_exists) {
1131 types_list.push_back(type_info);
1132 }
1133 }
1134
1135 // Second pass
1136 for (auto& entry : types_by_name) {
1137 auto& types = entry.second;
1138 if (types.size() == 1) {
1139 continue;
1140 }
1141
1142 // Remove incomplete types
1143 // TODO: Improve this by removing all types referencing the incomplete type.
1144 // Once it is done the next step (removing modifiers and arrays with size=0)
1145 // can be removed as well.
1146 types.erase(
1147 std::remove_if(types.begin(),
1148 types.end(),
1149 [](const TypeInfo* element) { return element->kind() == kKindIncomplete; }),
1150 types.end());
1151
1152 // Remove modifier and array types with size = 0
1153 // TODO: This is mostly correct, see TODO above for details.
1154 types.erase(std::remove_if(types.begin(),
1155 types.end(),
1156 [](const TypeInfo* element) {
1157 return (IsModifierType(element) || IsArrayType(element)) &&
1158 element->size() == 0;
1159 }),
1160 types.end());
1161
1162 if (types.size() != 1) {
1163 error_unsuccessful_dedup(entry.first, types);
1164 }
1165 }
1166
1167 Json::Value types_json(Json::objectValue);
1168 for (const auto& type : types_by_name) {
1169 auto json_with_name = type.second[0]->GetJson();
1170 types_json[json_with_name.name()] = json_with_name.value();
1171 }
1172
1173 root["types"] = types_json;
1174
1175 Json::StreamWriterBuilder factory;
1176 std::unique_ptr<Json::StreamWriter> const json_writer(factory.newStreamWriter());
1177 json_writer->write(root, &std::cout);
1178
1179 return 0;
1180 }
1181