1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2020 Red Hat, Inc.
5 //
6 // Author: Dodji Seketeli
7
8 /// @file
9 ///
10 /// This file contains the definitions of the entry points to
11 /// de-serialize an instance of @ref abigail::corpus from a file in
12 /// elf format, containing dwarf information.
13
14 #include "config.h"
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <fcntl.h>
18 #include <unistd.h>
19 #include <libgen.h>
20 #include <assert.h>
21 #include <limits.h>
22 #include <elfutils/libdwfl.h>
23 #include <dwarf.h>
24 #include <algorithm>
25 #include <cmath>
26 #include <cstring>
27 #include <deque>
28 #include <list>
29 #include <memory>
30 #include <ostream>
31 #include <sstream>
32 #include <stack>
33 #include <unordered_map>
34 #include <unordered_set>
35 #include <map>
36
37 #include "abg-ir-priv.h"
38 #include "abg-suppression-priv.h"
39 #include "abg-corpus-priv.h"
40 #include "abg-elf-helpers.h"
41 #include "abg-internal.h"
42
43 // <headers defining libabigail's API go under here>
44 ABG_BEGIN_EXPORT_DECLARATIONS
45
46 #include "abg-dwarf-reader.h"
47 #include "abg-sptr-utils.h"
48 #include "abg-symtab-reader.h"
49 #include "abg-tools-utils.h"
50
51 ABG_END_EXPORT_DECLARATIONS
52 // </headers defining libabigail's API>
53
54 #ifndef UINT64_MAX
55 #define UINT64_MAX 0xffffffffffffffff
56 #endif
57
58 using std::string;
59 using namespace abigail::elf_reader;
60
61 namespace abigail
62 {
63
64 using std::cerr;
65
66 /// The namespace for the DWARF reader.
67 namespace dwarf_reader
68 {
69
70 using std::dynamic_pointer_cast;
71 using std::static_pointer_cast;
72 using std::unordered_map;
73 using std::unordered_set;
74 using std::stack;
75 using std::deque;
76 using std::list;
77 using std::map;
78
79 using namespace elf_helpers; // TODO: avoid using namespace
80
81 /// Where a DIE comes from. For instance, a DIE can come from the main
82 /// debug info section, the alternate debug info section or from the
83 /// type unit section.
84 enum die_source
85 {
86 NO_DEBUG_INFO_DIE_SOURCE,
87 PRIMARY_DEBUG_INFO_DIE_SOURCE,
88 ALT_DEBUG_INFO_DIE_SOURCE,
89 TYPE_UNIT_DIE_SOURCE,
90 NUMBER_OF_DIE_SOURCES, // This one must always be the latest
91 // enumerator
92 };
93
94 /// Prefix increment operator for @ref die_source.
95 ///
96 /// @param source the die_source to increment.
97 /// @return the incremented source.
98 static die_source&
operator ++(die_source & source)99 operator++(die_source& source)
100 {
101 source = static_cast<die_source>(source + 1);
102 return source;
103 }
104
105 /// A functor used by @ref dwfl_sptr.
106 struct dwfl_deleter
107 {
108 void
operator ()abigail::dwarf_reader::dwfl_deleter109 operator()(Dwfl* dwfl)
110 {dwfl_end(dwfl);}
111 };//end struct dwfl_deleter
112
113 /// A convenience typedef for a shared pointer to a Dwfl.
114 typedef shared_ptr<Dwfl> dwfl_sptr;
115
116 /// A convenience typedef for a vector of Dwarf_Off.
117 typedef vector<Dwarf_Off> dwarf_offsets_type;
118
119 /// Convenience typedef for a map which key is the offset of a dwarf
120 /// die and which value is the corresponding artefact.
121 typedef unordered_map<Dwarf_Off, type_or_decl_base_sptr> die_artefact_map_type;
122
123 /// Convenience typedef for a map which key is the offset of a dwarf
124 /// die, (given by dwarf_dieoffset()) and which value is the
125 /// corresponding class_decl.
126 typedef unordered_map<Dwarf_Off, class_decl_sptr> die_class_map_type;
127
128 /// Convenience typedef for a map which key is the offset of a dwarf
129 /// die, (given by dwarf_dieoffset()) and which value is the
130 /// corresponding class_or_union_sptr.
131 typedef unordered_map<Dwarf_Off, class_or_union_sptr> die_class_or_union_map_type;
132
133 /// Convenience typedef for a map which key the offset of a dwarf die
134 /// and which value is the corresponding function_decl.
135 typedef unordered_map<Dwarf_Off, function_decl_sptr> die_function_decl_map_type;
136
137 /// Convenience typedef for a map which key is the offset of a dwarf
138 /// die and which value is the corresponding function_type.
139 typedef unordered_map<Dwarf_Off, function_type_sptr> die_function_type_map_type;
140
141 /// Convenience typedef for a map which key is the offset of a
142 /// DW_TAG_compile_unit and the value is the corresponding @ref
143 /// translation_unit_sptr.
144 typedef unordered_map<Dwarf_Off, translation_unit_sptr> die_tu_map_type;
145
146 /// Convenience typedef for a map which key is the offset of a DIE and
147 /// the value is the corresponding qualified name of the DIE.
148 typedef unordered_map<Dwarf_Off, interned_string> die_istring_map_type;
149
150 /// Convenience typedef for a map which is an interned_string and
151 /// which value is a vector of offsets.
152 typedef unordered_map<interned_string,
153 dwarf_offsets_type,
154 hash_interned_string>
155 istring_dwarf_offsets_map_type;
156
157 /// Convenience typedef for a map which key is an elf address and
158 /// which value is an elf_symbol_sptr.
159 typedef unordered_map<GElf_Addr, elf_symbol_sptr> addr_elf_symbol_sptr_map_type;
160
161 /// Convenience typedef for a set of ELF addresses.
162 typedef unordered_set<GElf_Addr> address_set_type;
163
164 /// A hasher for a pair of Dwarf_Off. This is used as a hasher for
165 /// the type @ref dwarf_offset_pair_set_type.
166 struct dwarf_offset_pair_hash
167 {
168 size_t
operator ()abigail::dwarf_reader::dwarf_offset_pair_hash169 operator()(const std::pair<Dwarf_Off, Dwarf_Off>& p) const
170 {return abigail::hashing::combine_hashes(p.first, p.second);}
171 };// end struct dwarf_offset_pair_hash
172
173 typedef unordered_set<std::pair<Dwarf_Off,
174 Dwarf_Off>,
175 dwarf_offset_pair_hash> dwarf_offset_pair_set_type;
176
177 /// Convenience typedef for a shared pointer to an @ref address_set_type.
178 typedef shared_ptr<address_set_type> address_set_sptr;
179
180 /// Convenience typedef for a shared pointer to an
181 /// addr_elf_symbol_sptr_map_type.
182 typedef shared_ptr<addr_elf_symbol_sptr_map_type> addr_elf_symbol_sptr_map_sptr;
183
184 /// Convenience typedef for a map that associates an @ref
185 /// interned_string to a @ref function_type_sptr.
186 typedef unordered_map<interned_string,
187 function_type_sptr,
188 hash_interned_string> istring_fn_type_map_type;
189
190 /// Convenience typedef for a stack containing the scopes up to the
191 /// current point in the abigail Internal Representation (aka IR) tree
192 /// that is being built.
193 typedef stack<scope_decl*> scope_stack_type;
194
195 /// Convenience typedef for a map which key is a dwarf offset. The
196 /// value is also a dwarf offset.
197 typedef unordered_map<Dwarf_Off, Dwarf_Off> offset_offset_map_type;
198
199 /// Convenience typedef for a map which key is a string and which
200 /// value is a vector of smart pointer to a class.
201 typedef unordered_map<string, classes_type> string_classes_map;
202
203 /// Convenience typedef for a map which key is a string and which
204 /// value is a vector of smart pointer to a enum.
205 typedef unordered_map<string, enums_type> string_enums_map;
206
207 /// The abstraction of the place where a partial unit has been
208 /// imported. This is what the DW_TAG_imported_unit DIE expresses.
209 ///
210 /// This type thus contains:
211 /// - the offset to which the partial unit is imported
212 /// - the offset of the imported partial unit.
213 /// - the offset of the imported partial unit.
214 struct imported_unit_point
215 {
216 Dwarf_Off offset_of_import;
217 // The boolean below is true iff the imported unit comes from the
218 // alternate debug info file.
219 die_source imported_unit_die_source;
220 Dwarf_Off imported_unit_die_off;
221 Dwarf_Off imported_unit_cu_off;
222 Dwarf_Off imported_unit_child_off;
223
224 /// Default constructor for @ref the type imported_unit_point.
imported_unit_pointabigail::dwarf_reader::imported_unit_point225 imported_unit_point()
226 : offset_of_import(),
227 imported_unit_die_source(PRIMARY_DEBUG_INFO_DIE_SOURCE),
228 imported_unit_die_off(),
229 imported_unit_cu_off(),
230 imported_unit_child_off()
231 {}
232
233 /// Constructor of @ref the type imported_unit_point.
234 ///
235 /// @param import_off the offset of the point at which the unit has
236 /// been imported.
imported_unit_pointabigail::dwarf_reader::imported_unit_point237 imported_unit_point(Dwarf_Off import_off)
238 : offset_of_import(import_off),
239 imported_unit_die_source(PRIMARY_DEBUG_INFO_DIE_SOURCE),
240 imported_unit_die_off(),
241 imported_unit_cu_off(),
242 imported_unit_child_off()
243 {}
244
245 /// Constructor of @ref the type imported_unit_point.
246 ///
247 /// @param import_off the offset of the point at which the unit has
248 /// been imported.
249 ///
250 /// @param from where the imported DIE comes from.
251 ///
252 /// @param imported_die the die of the unit that has been imported.
imported_unit_pointabigail::dwarf_reader::imported_unit_point253 imported_unit_point(Dwarf_Off import_off,
254 const Dwarf_Die& imported_die,
255 die_source from)
256 : offset_of_import(import_off),
257 imported_unit_die_source(from),
258 imported_unit_die_off(dwarf_dieoffset
259 (const_cast<Dwarf_Die*>(&imported_die))),
260 imported_unit_cu_off(),
261 imported_unit_child_off()
262 {
263 Dwarf_Die imported_unit_child;
264
265 ABG_ASSERT(dwarf_child(const_cast<Dwarf_Die*>(&imported_die),
266 &imported_unit_child) == 0);
267
268 imported_unit_child_off =
269 dwarf_dieoffset(const_cast<Dwarf_Die*>(&imported_unit_child));
270
271 Dwarf_Die cu_die_memory;
272 Dwarf_Die *cu_die;
273
274 cu_die = dwarf_diecu(const_cast<Dwarf_Die*>(&imported_unit_child),
275 &cu_die_memory, 0, 0);
276 imported_unit_cu_off = dwarf_dieoffset(cu_die);
277 }
278 }; // struct imported_unit_point
279
280 /// Convenience typedef for a vector of @ref imported_unit_point.
281 typedef vector<imported_unit_point> imported_unit_points_type;
282
283 /// Convenience typedef for a vector of @ref imported_unit_point.
284 typedef unordered_map<Dwarf_Off, imported_unit_points_type>
285 tu_die_imported_unit_points_map_type;
286
287 /// "Less than" operator for instances of @ref imported_unit_point
288 /// type.
289 ///
290 /// @param the left hand side operand of the "Less than" operator.
291 ///
292 /// @param the right hand side operand of the "Less than" operator.
293 ///
294 /// @return true iff @p l is less than @p r.
295 static bool
operator <(const imported_unit_point & l,const imported_unit_point & r)296 operator<(const imported_unit_point& l, const imported_unit_point& r)
297 {return l.offset_of_import < r.offset_of_import;}
298
299 static bool
300 get_parent_die(const read_context& ctxt,
301 const Dwarf_Die* die,
302 Dwarf_Die& parent_die,
303 size_t where_offset);
304
305 static bool
306 get_scope_die(const read_context& ctxt,
307 const Dwarf_Die* die,
308 size_t where_offset,
309 Dwarf_Die& scope_die);
310
311 static Dwarf_Off
312 die_offset(Dwarf_Die* die);
313
314 static Dwarf_Off
315 die_offset(const Dwarf_Die* die);
316
317 static bool
318 die_is_anonymous(const Dwarf_Die* die);
319
320 static bool
321 die_is_type(const Dwarf_Die* die);
322
323 static bool
324 die_is_decl(const Dwarf_Die* die);
325
326 static bool
327 die_is_namespace(const Dwarf_Die* die);
328
329 static bool
330 die_is_unspecified(Dwarf_Die* die);
331
332 static bool
333 die_is_void_type(Dwarf_Die* die);
334
335 static bool
336 die_is_pointer_type(const Dwarf_Die* die);
337
338 static bool
339 pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die* die);
340
341 static bool
342 die_is_reference_type(const Dwarf_Die* die);
343
344 static bool
345 die_is_pointer_or_reference_type(const Dwarf_Die* die);
346
347 static bool
348 die_is_pointer_reference_or_typedef_type(const Dwarf_Die* die);
349
350 static bool
351 die_is_class_type(const Dwarf_Die* die);
352
353 static bool
354 die_is_qualified_type(const Dwarf_Die* die);
355
356 static bool
357 die_is_function_type(const Dwarf_Die *die);
358
359 static bool
360 die_has_object_pointer(const Dwarf_Die* die,
361 Dwarf_Die& object_pointer);
362
363 static bool
364 die_has_children(const Dwarf_Die* die);
365
366 static bool
367 die_this_pointer_from_object_pointer(Dwarf_Die* die,
368 Dwarf_Die& this_pointer);
369
370 static bool
371 die_this_pointer_is_const(Dwarf_Die* die);
372
373 static bool
374 die_object_pointer_is_for_const_method(Dwarf_Die* die);
375
376 static bool
377 die_is_at_class_scope(const read_context& ctxt,
378 const Dwarf_Die* die,
379 size_t where_offset,
380 Dwarf_Die& class_scope_die);
381 static bool
382 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
383 size_t expr_len,
384 int64_t& value,
385 bool& is_tls_address);
386
387 static translation_unit::language
388 dwarf_language_to_tu_language(size_t l);
389
390 static bool
391 die_unsigned_constant_attribute(const Dwarf_Die* die,
392 unsigned attr_name,
393 uint64_t& cst);
394
395 static bool
396 die_signed_constant_attribute(const Dwarf_Die*die,
397 unsigned attr_name,
398 int64_t& cst);
399
400 static bool
401 die_constant_attribute(const Dwarf_Die *die,
402 unsigned attr_name,
403 bool is_signed,
404 array_type_def::subrange_type::bound_value &value);
405
406 static bool
407 form_is_DW_FORM_strx(unsigned form);
408
409 static bool
410 form_is_DW_FORM_line_strp(unsigned form);
411
412 static bool
413 die_address_attribute(Dwarf_Die* die, unsigned attr_name, Dwarf_Addr& result);
414
415 static string
416 die_name(const Dwarf_Die* die);
417
418 static location
419 die_location(const read_context& ctxt, const Dwarf_Die* die);
420
421 static bool
422 die_location_address(Dwarf_Die* die,
423 Dwarf_Addr& address,
424 bool& is_tls_address);
425
426 static bool
427 die_die_attribute(const Dwarf_Die* die,
428 unsigned attr_name,
429 Dwarf_Die& result,
430 bool recursively = true);
431
432 static string
433 get_internal_anonymous_die_prefix_name(const Dwarf_Die *die);
434
435 static string
436 build_internal_anonymous_die_name(const string &base_name,
437 size_t anonymous_type_index);
438
439 static string
440 get_internal_anonymous_die_name(Dwarf_Die *die,
441 size_t anonymous_type_index);
442
443 static string
444 build_internal_underlying_enum_type_name(const string &base_name,
445 bool is_anonymous,
446 uint64_t size);
447
448 static string
449 die_qualified_type_name(const read_context& ctxt,
450 const Dwarf_Die* die,
451 size_t where);
452
453 static string
454 die_qualified_decl_name(const read_context& ctxt,
455 const Dwarf_Die* die,
456 size_t where);
457
458 static string
459 die_qualified_name(const read_context& ctxt,
460 const Dwarf_Die* die,
461 size_t where);
462
463 static bool
464 die_qualified_type_name_empty(const read_context& ctxt,
465 const Dwarf_Die* die, size_t where,
466 string &qualified_name);
467
468 static void
469 die_return_and_parm_names_from_fn_type_die(const read_context& ctxt,
470 const Dwarf_Die* die,
471 size_t where_offset,
472 bool pretty_print,
473 string &return_type_name,
474 string &class_name,
475 vector<string>& parm_names,
476 bool& is_const,
477 bool& is_static);
478
479 static string
480 die_function_signature(const read_context& ctxt,
481 const Dwarf_Die *die,
482 size_t where_offset);
483
484 static bool
485 die_peel_qual_ptr(Dwarf_Die *die, Dwarf_Die& peeled_die);
486
487 static bool
488 die_function_type_is_method_type(const read_context& ctxt,
489 const Dwarf_Die *die,
490 size_t where_offset,
491 Dwarf_Die& object_pointer_die,
492 Dwarf_Die& class_die,
493 bool& is_static);
494
495 static string
496 die_pretty_print_type(read_context& ctxt,
497 const Dwarf_Die* die,
498 size_t where_offset);
499
500 static string
501 die_pretty_print_decl(read_context& ctxt,
502 const Dwarf_Die* die,
503 size_t where_offset);
504
505 static string
506 die_pretty_print(read_context& ctxt,
507 const Dwarf_Die* die,
508 size_t where_offset);
509
510 static void
511 maybe_canonicalize_type(const Dwarf_Die* die,
512 read_context& ctxt);
513
514 static void
515 maybe_canonicalize_type(const type_base_sptr& t,
516 read_context& ctxt);
517
518 static uint64_t
519 get_default_array_lower_bound(translation_unit::language l);
520
521 static bool
522 find_lower_bound_in_imported_unit_points(const imported_unit_points_type&,
523 Dwarf_Off,
524 imported_unit_points_type::const_iterator&);
525
526 static array_type_def::subrange_sptr
527 build_subrange_type(read_context& ctxt,
528 const Dwarf_Die* die,
529 size_t where_offset,
530 bool associate_type_to_die = true);
531
532 static void
533 build_subranges_from_array_type_die(read_context& ctxt,
534 const Dwarf_Die* die,
535 array_type_def::subranges_type& subranges,
536 size_t where_offset,
537 bool associate_type_to_die = true);
538
539 static bool
540 compare_dies(const read_context& ctxt,
541 const Dwarf_Die *l, const Dwarf_Die *r,
542 bool update_canonical_dies_on_the_fly);
543
544
545 /// Find the file name of the alternate debug info file.
546 ///
547 /// @param elf_module the elf module to consider.
548 ///
549 /// @param out parameter. Is set to the file name of the alternate
550 /// debug info file, iff this function returns true.
551 ///
552 /// @return true iff the location of the alternate debug info file was
553 /// found.
554 static bool
find_alt_debug_info_link(Dwfl_Module * elf_module,string & alt_file_name)555 find_alt_debug_info_link(Dwfl_Module *elf_module,
556 string &alt_file_name)
557 {
558 GElf_Addr bias = 0;
559 Dwarf *dwarf = dwfl_module_getdwarf(elf_module, &bias);
560 Elf *elf = dwarf_getelf(dwarf);
561 GElf_Ehdr ehmem, *elf_header;
562 elf_header = gelf_getehdr(elf, &ehmem);
563
564 Elf_Scn* section = 0;
565 while ((section = elf_nextscn(elf, section)) != 0)
566 {
567 GElf_Shdr header_mem, *header;
568 header = gelf_getshdr(section, &header_mem);
569 if (header->sh_type != SHT_PROGBITS)
570 continue;
571
572 const char *section_name = elf_strptr(elf,
573 elf_header->e_shstrndx,
574 header->sh_name);
575
576 char *alt_name = 0;
577 char *buildid = 0;
578 size_t buildid_len = 0;
579 if (section_name != 0
580 && strcmp(section_name, ".gnu_debugaltlink") == 0)
581 {
582 Elf_Data *data = elf_getdata(section, 0);
583 if (data != 0 && data->d_size != 0)
584 {
585 alt_name = (char*) data->d_buf;
586 char *end_of_alt_name =
587 (char *) memchr(alt_name, '\0', data->d_size);
588 buildid_len = data->d_size - (end_of_alt_name - alt_name + 1);
589 if (buildid_len == 0)
590 return false;
591 buildid = end_of_alt_name + 1;
592 }
593 }
594 else
595 continue;
596
597 if (buildid == 0 || alt_name == 0)
598 return false;
599
600 alt_file_name = alt_name;
601 return true;
602 }
603
604 return false;
605 }
606
607 /// Find alternate debuginfo file of a given "link" under a set of
608 /// root directories.
609 ///
610 /// The link is a string that is read by the function
611 /// find_alt_debug_info_link(). That link is a path that is relative
612 /// to a given debug info file, e.g, "../../../.dwz/something.debug".
613 /// It designates the alternate debug info file associated to a given
614 /// debug info file.
615 ///
616 /// This function will thus try to find the .dwz/something.debug file
617 /// under some given root directories.
618 ///
619 /// @param root_dirs the set of root directories to look from.
620 ///
621 /// @param alt_file_name a relative path to the alternate debug info
622 /// file to look for.
623 ///
624 /// @param alt_file_path the resulting absolute path to the alternate
625 /// debuginfo path denoted by @p alt_file_name and found under one of
626 /// the directories in @p root_dirs. This is set iff the function
627 /// returns true.
628 ///
629 /// @return true iff the function found the alternate debuginfo file.
630 static bool
find_alt_debug_info_path(const vector<char ** > root_dirs,const string & alt_file_name,string & alt_file_path)631 find_alt_debug_info_path(const vector<char**> root_dirs,
632 const string &alt_file_name,
633 string &alt_file_path)
634 {
635 if (alt_file_name.empty())
636 return false;
637
638 string altfile_name = tools_utils::trim_leading_string(alt_file_name, "../");
639
640 for (vector<char**>::const_iterator i = root_dirs.begin();
641 i != root_dirs.end();
642 ++i)
643 if (tools_utils::find_file_under_dir(**i, altfile_name, alt_file_path))
644 return true;
645
646 return false;
647 }
648
649 /// Return the alternate debug info associated to a given main debug
650 /// info file.
651 ///
652 /// @param elf_module the elf module to consider.
653 ///
654 /// @param debug_root_dirs a set of root debuginfo directories under
655 /// which too look for the alternate debuginfo file.
656 ///
657 /// @param alt_file_name output parameter. This is set to the file
658 /// path of the alternate debug info file associated to @p elf_module.
659 /// This is set iff the function returns a non-null result.
660 ///
661 /// @param alt_fd the file descriptor used to access the alternate
662 /// debug info. If this parameter is set by the function, then the
663 /// caller needs to fclose it, otherwise the file descriptor is going
664 /// to be leaked. Note however that on recent versions of elfutils
665 /// where libdw.h contains the function dwarf_getalt(), this parameter
666 /// is set to 0, so it doesn't need to be fclosed.
667 ///
668 /// Note that the alternate debug info file is a DWARF extension as of
669 /// DWARF 4 ans is decribed at
670 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
671 ///
672 /// @return the alternate debuginfo, or null. If @p alt_fd is
673 /// non-zero, then the caller of this function needs to call
674 /// dwarf_end() on the returned alternate debuginfo pointer,
675 /// otherwise, it's going to be leaked.
676 static Dwarf*
find_alt_debug_info(Dwfl_Module * elf_module,const vector<char ** > debug_root_dirs,string & alt_file_name,int & alt_fd)677 find_alt_debug_info(Dwfl_Module *elf_module,
678 const vector<char**> debug_root_dirs,
679 string& alt_file_name,
680 int& alt_fd)
681 {
682 if (elf_module == 0)
683 return 0;
684
685 Dwarf* result = 0;
686 find_alt_debug_info_link(elf_module, alt_file_name);
687
688 #ifdef LIBDW_HAS_DWARF_GETALT
689 // We are on recent versions of elfutils where the function
690 // dwarf_getalt exists, so let's use it.
691 Dwarf_Addr bias = 0;
692 Dwarf* dwarf = dwfl_module_getdwarf(elf_module, &bias);
693 result = dwarf_getalt(dwarf);
694 alt_fd = 0;
695 #else
696 // We are on an old version of elfutils where the function
697 // dwarf_getalt doesn't exist yet, so let's open code its
698 // functionality
699 char *alt_name = 0;
700 const char *file_name = 0;
701 void **user_data = 0;
702 Dwarf_Addr low_addr = 0;
703 char *alt_file = 0;
704
705 file_name = dwfl_module_info(elf_module, &user_data,
706 &low_addr, 0, 0, 0, 0, 0);
707
708 alt_fd = dwfl_standard_find_debuginfo(elf_module, user_data,
709 file_name, low_addr,
710 alt_name, file_name,
711 0, &alt_file);
712
713 result = dwarf_begin(alt_fd, DWARF_C_READ);
714 #endif
715
716 if (result == 0)
717 {
718 // So we didn't find the alternate debuginfo file from the
719 // information that is in the debuginfo file associated to
720 // elf_module. Maybe the alternate debuginfo file is located
721 // under one of the directories in debug_root_dirs. So let's
722 // look in there.
723 string alt_file_path;
724 if (!find_alt_debug_info_path(debug_root_dirs,
725 alt_file_name,
726 alt_file_path))
727 return result;
728
729 // If we reach this point it means we have found the path to the
730 // alternate debuginfo file and it's in alt_file_path. So let's
731 // open it and read it.
732 int fd = open(alt_file_path.c_str(), O_RDONLY);
733 if (fd == -1)
734 return result;
735 result = dwarf_begin(fd, DWARF_C_READ);
736
737 #ifdef LIBDW_HAS_DWARF_GETALT
738 Dwarf_Addr bias = 0;
739 Dwarf* dwarf = dwfl_module_getdwarf(elf_module, &bias);
740 dwarf_setalt(dwarf, result);
741 #endif
742 }
743
744 return result;
745 }
746
747 /// Compare a symbol name against another name, possibly demangling
748 /// the symbol_name before performing the comparison.
749 ///
750 /// @param symbol_name the symbol_name to take in account.
751 ///
752 /// @param name the second name to take in account.
753 ///
754 /// @param demangle if true, demangle @p symbol_name and compare the
755 /// result of the demangling with @p name.
756 ///
757 /// @return true iff symbol_name equals name.
758 static bool
compare_symbol_name(const string & symbol_name,const string & name,bool demangle)759 compare_symbol_name(const string& symbol_name,
760 const string& name,
761 bool demangle)
762 {
763 if (demangle)
764 {
765 string m = demangle_cplus_mangled_name(symbol_name);
766 return m == name;
767 }
768 return symbol_name == name;
769 }
770
771 /// Lookup a symbol using the SysV ELF hash table.
772 ///
773 /// Note that this function hasn't been tested. So it hasn't been
774 /// debugged yet. IOW, it is not known to work. Or rather, it's
775 /// almost like it's surely doesn't work ;-)
776 ///
777 /// Use it at your own risks. :-)
778 ///
779 ///@parm env the environment we are operating from.
780 ///
781 /// @param elf_handle the elf_handle to use.
782 ///
783 /// @param sym_name the symbol name to look for.
784 ///
785 /// @param ht_index the index (in the section headers table) of the
786 /// hash table section to use.
787 ///
788 /// @param sym_tab_index the index (in the section headers table) of
789 /// the symbol table to use.
790 ///
791 /// @param demangle if true, demangle @p sym_name before comparing it
792 /// to names from the symbol table.
793 ///
794 /// @param syms_found a vector of symbols found with the name @p
795 /// sym_name. table.
796 static bool
lookup_symbol_from_sysv_hash_tab(const environment * env,Elf * elf_handle,const string & sym_name,size_t ht_index,size_t sym_tab_index,bool demangle,vector<elf_symbol_sptr> & syms_found)797 lookup_symbol_from_sysv_hash_tab(const environment* env,
798 Elf* elf_handle,
799 const string& sym_name,
800 size_t ht_index,
801 size_t sym_tab_index,
802 bool demangle,
803 vector<elf_symbol_sptr>& syms_found)
804 {
805 Elf_Scn* sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
806 ABG_ASSERT(sym_tab_section);
807
808 Elf_Data* sym_tab_data = elf_getdata(sym_tab_section, 0);
809 ABG_ASSERT(sym_tab_data);
810
811 GElf_Shdr sheader_mem;
812 GElf_Shdr* sym_tab_section_header = gelf_getshdr(sym_tab_section,
813 &sheader_mem);
814 Elf_Scn* hash_section = elf_getscn(elf_handle, ht_index);
815 ABG_ASSERT(hash_section);
816
817 // Poke at the different parts of the hash table and get them ready
818 // to be used.
819 unsigned long hash = elf_hash(sym_name.c_str());
820 Elf_Data* ht_section_data = elf_getdata(hash_section, 0);
821 Elf32_Word* ht_data = reinterpret_cast<Elf32_Word*>(ht_section_data->d_buf);
822 size_t nb_buckets = ht_data[0];
823 size_t nb_chains = ht_data[1];
824
825 if (nb_buckets == 0)
826 // An empty hash table. Not sure if that is possible, but it
827 // would mean an empty table of exported symbols.
828 return false;
829
830 //size_t nb_chains = ht_data[1];
831 Elf32_Word* ht_buckets = &ht_data[2];
832 Elf32_Word* ht_chains = &ht_buckets[nb_buckets];
833
834 // Now do the real work.
835 size_t bucket = hash % nb_buckets;
836 size_t symbol_index = ht_buckets[bucket];
837
838 GElf_Sym symbol;
839 const char* sym_name_str;
840 size_t sym_size;
841 elf_symbol::type sym_type;
842 elf_symbol::binding sym_binding;
843 elf_symbol::visibility sym_visibility;
844 bool found = false;
845
846 do
847 {
848 ABG_ASSERT(gelf_getsym(sym_tab_data, symbol_index, &symbol));
849 sym_name_str = elf_strptr(elf_handle,
850 sym_tab_section_header->sh_link,
851 symbol.st_name);
852 if (sym_name_str
853 && compare_symbol_name(sym_name_str, sym_name, demangle))
854 {
855 sym_type = stt_to_elf_symbol_type(GELF_ST_TYPE(symbol.st_info));
856 sym_binding = stb_to_elf_symbol_binding(GELF_ST_BIND(symbol.st_info));
857 sym_visibility =
858 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(symbol.st_other));
859 sym_size = symbol.st_size;
860 elf_symbol::version ver;
861 if (get_version_for_symbol(elf_handle, symbol_index,
862 /*get_def_version=*/true, ver))
863 ABG_ASSERT(!ver.str().empty());
864 elf_symbol_sptr symbol_found =
865 elf_symbol::create(env,
866 symbol_index,
867 sym_size,
868 sym_name_str,
869 sym_type,
870 sym_binding,
871 symbol.st_shndx != SHN_UNDEF,
872 symbol.st_shndx == SHN_COMMON,
873 ver, sym_visibility);
874 syms_found.push_back(symbol_found);
875 found = true;
876 }
877 symbol_index = ht_chains[symbol_index];
878 } while (symbol_index != STN_UNDEF || symbol_index >= nb_chains);
879
880 return found;
881 }
882
883 /// Get the size of the elf class, in bytes.
884 ///
885 /// @param elf_handle the elf handle to use.
886 ///
887 /// @return the size computed.
888 static char
get_elf_class_size_in_bytes(Elf * elf_handle)889 get_elf_class_size_in_bytes(Elf* elf_handle)
890 {
891 char result = 0;
892 GElf_Ehdr hdr;
893
894 ABG_ASSERT(gelf_getehdr(elf_handle, &hdr));
895 int c = hdr.e_ident[EI_CLASS];
896
897 switch (c)
898 {
899 case ELFCLASS32:
900 result = 4;
901 break;
902 case ELFCLASS64:
903 result = 8;
904 break;
905 default:
906 ABG_ASSERT_NOT_REACHED;
907 }
908
909 return result;
910 }
911
912 /// Get a given word of a bloom filter, referred to by the index of
913 /// the word.
914 ///
915 /// The bloom word size depends on the current elf class (32 bits for
916 /// an ELFCLASS32 or 64 bits for an ELFCLASS64 one) and this function
917 /// abstracts that nicely.
918 ///
919 /// @param elf_handle the elf handle to use.
920 ///
921 /// @param bloom_filter the bloom filter to consider.
922 ///
923 /// @param index the index of the bloom filter to return.
924 ///
925 /// @return a 64 bits work containing the bloom word found at index @p
926 /// index. Note that if we are looking at an ELFCLASS32 binary, the 4
927 /// most significant bytes of the result are going to be zero.
928 static Elf64_Xword
bloom_word_at(Elf * elf_handle,Elf32_Word * bloom_filter,size_t index)929 bloom_word_at(Elf* elf_handle,
930 Elf32_Word* bloom_filter,
931 size_t index)
932 {
933 Elf64_Xword result = 0;
934 GElf_Ehdr h;
935 ABG_ASSERT(gelf_getehdr(elf_handle, &h));
936 int c;
937 c = h.e_ident[EI_CLASS];
938
939 switch(c)
940 {
941 case ELFCLASS32:
942 result = bloom_filter[index];
943 break ;
944 case ELFCLASS64:
945 {
946 Elf64_Xword* f= reinterpret_cast<Elf64_Xword*>(bloom_filter);
947 result = f[index];
948 }
949 break;
950 default:
951 abort();
952 }
953
954 return result;
955 }
956
957 /// The abstraction of the gnu elf hash table.
958 ///
959 /// The members of this struct are explained at
960 /// - https://sourceware.org/ml/binutils/2006-10/msg00377.html
961 /// - https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections.
962 struct gnu_ht
963 {
964 size_t nb_buckets;
965 Elf32_Word* buckets;
966 Elf32_Word* chain;
967 size_t first_sym_index;
968 size_t bf_nwords;
969 size_t bf_size;
970 Elf32_Word* bloom_filter;
971 size_t shift;
972 size_t sym_count;
973 Elf_Scn* sym_tab_section;
974 GElf_Shdr sym_tab_section_header;
975
gnu_htabigail::dwarf_reader::gnu_ht976 gnu_ht()
977 : nb_buckets(0),
978 buckets(0),
979 chain(0),
980 first_sym_index(0),
981 bf_nwords(0),
982 bf_size(0),
983 bloom_filter(0),
984 shift(0),
985 sym_count(0),
986 sym_tab_section(0)
987 {}
988 }; // end struct gnu_ht
989
990 /// Setup the members of the gnu hash table.
991 ///
992 /// @param elf_handle a handle on the elf file to use.
993 ///
994 /// @param ht_index the index (into the elf section headers table) of
995 /// the hash table section to use.
996 ///
997 /// @param sym_tab_index the index (into the elf section headers
998 /// table) of the symbol table the gnu hash table is about.
999 ///
1000 /// @param ht the resulting hash table.
1001 ///
1002 /// @return true iff the hash table @ ht could be setup.
1003 static bool
setup_gnu_ht(Elf * elf_handle,size_t ht_index,size_t sym_tab_index,gnu_ht & ht)1004 setup_gnu_ht(Elf* elf_handle,
1005 size_t ht_index,
1006 size_t sym_tab_index,
1007 gnu_ht& ht)
1008 {
1009 ht.sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
1010 ABG_ASSERT(ht.sym_tab_section);
1011 ABG_ASSERT(gelf_getshdr(ht.sym_tab_section, &ht.sym_tab_section_header));
1012 ht.sym_count =
1013 ht.sym_tab_section_header.sh_size / ht.sym_tab_section_header.sh_entsize;
1014 Elf_Scn* hash_section = elf_getscn(elf_handle, ht_index);
1015 ABG_ASSERT(hash_section);
1016
1017 // Poke at the different parts of the hash table and get them ready
1018 // to be used.
1019 Elf_Data* ht_section_data = elf_getdata(hash_section, 0);
1020 Elf32_Word* ht_data = reinterpret_cast<Elf32_Word*>(ht_section_data->d_buf);
1021
1022 ht.nb_buckets = ht_data[0];
1023 if (ht.nb_buckets == 0)
1024 // An empty hash table. Not sure if that is possible, but it
1025 // would mean an empty table of exported symbols.
1026 return false;
1027 ht.first_sym_index = ht_data[1];
1028 // The number of words used by the bloom filter. A size of a word
1029 // is ELFCLASS.
1030 ht.bf_nwords = ht_data[2];
1031 // The shift used by the bloom filter code.
1032 ht.shift = ht_data[3];
1033 // The data of the bloom filter proper.
1034 ht.bloom_filter = &ht_data[4];
1035 // The size of the bloom filter in 4 bytes word. This is going to
1036 // be used to index the 'bloom_filter' above, which is of type
1037 // Elf32_Word*; thus we need that bf_size be expressed in 4 bytes
1038 // words.
1039 ht.bf_size = (get_elf_class_size_in_bytes(elf_handle) / 4) * ht.bf_nwords;
1040 // The buckets of the hash table.
1041 ht.buckets = ht.bloom_filter + ht.bf_size;
1042 // The chain of the hash table.
1043 ht.chain = ht.buckets + ht.nb_buckets;
1044
1045 return true;
1046 }
1047
1048 /// Look into the symbol tables of the underlying elf file and find
1049 /// the symbol we are being asked.
1050 ///
1051 /// This function uses the GNU hash table for the symbol lookup.
1052 ///
1053 /// The reference of for the implementation of this function can be
1054 /// found at:
1055 /// - https://sourceware.org/ml/binutils/2006-10/msg00377.html
1056 /// - https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections.
1057 ///
1058 /// @param elf_handle the elf handle to use.
1059 ///
1060 /// @param sym_name the name of the symbol to look for.
1061 ///
1062 /// @param ht_index the index of the hash table header to use.
1063 ///
1064 /// @param sym_tab_index the index of the symbol table header to use
1065 /// with this hash table.
1066 ///
1067 /// @param demangle if true, demangle @p sym_name.
1068 ///
1069 /// @param syms_found the vector of symbols found with the name @p
1070 /// sym_name.
1071 ///
1072 /// @return true if a symbol was actually found.
1073 static bool
lookup_symbol_from_gnu_hash_tab(const environment * env,Elf * elf_handle,const string & sym_name,size_t ht_index,size_t sym_tab_index,bool demangle,vector<elf_symbol_sptr> & syms_found)1074 lookup_symbol_from_gnu_hash_tab(const environment* env,
1075 Elf* elf_handle,
1076 const string& sym_name,
1077 size_t ht_index,
1078 size_t sym_tab_index,
1079 bool demangle,
1080 vector<elf_symbol_sptr>& syms_found)
1081 {
1082 gnu_ht ht;
1083 if (!setup_gnu_ht(elf_handle, ht_index, sym_tab_index, ht))
1084 return false;
1085
1086 // Now do the real work.
1087
1088 // Compute bloom hashes (GNU hash and second bloom specific hashes).
1089 size_t h1 = elf_gnu_hash(sym_name.c_str());
1090 size_t h2 = h1 >> ht.shift;
1091 // The size of one of the words used in the bloom
1092 // filter, in bits.
1093 int c = get_elf_class_size_in_bytes(elf_handle) * 8;
1094 int n = (h1 / c) % ht.bf_nwords;
1095 // The bitmask of the bloom filter has a size of either 32-bits on
1096 // ELFCLASS32 binaries or 64-bits on ELFCLASS64 binaries. So we
1097 // need a 64-bits type to hold the bitmap, hence the Elf64_Xword
1098 // type used here. When dealing with 32bits binaries, the upper
1099 // bits of the bitmask will be zero anyway.
1100 Elf64_Xword bitmask = (1ul << (h1 % c)) | (1ul << (h2 % c));
1101
1102 // Test if the symbol is *NOT* present in this ELF file.
1103 if ((bloom_word_at(elf_handle, ht.bloom_filter, n) & bitmask) != bitmask)
1104 return false;
1105
1106 size_t i = ht.buckets[h1 % ht.nb_buckets];
1107 if (i == STN_UNDEF)
1108 return false;
1109
1110 Elf32_Word stop_word, *stop_wordp;
1111 elf_symbol::version ver;
1112 GElf_Sym symbol;
1113 const char* sym_name_str;
1114 bool found = false;
1115
1116 elf_symbol::type sym_type;
1117 elf_symbol::binding sym_binding;
1118 elf_symbol::visibility sym_visibility;
1119
1120 // Let's walk the hash table and record the versions of all the
1121 // symbols which name equal sym_name.
1122 for (i = ht.buckets[h1 % ht.nb_buckets],
1123 stop_wordp = &ht.chain[i - ht.first_sym_index];
1124 i != STN_UNDEF
1125 && (stop_wordp
1126 < ht.chain + (ht.sym_count - ht.first_sym_index));
1127 ++i, ++stop_wordp)
1128 {
1129 stop_word = *stop_wordp;
1130 if ((stop_word & ~ 1)!= (h1 & ~1))
1131 // A given bucket can reference several hashes. Here we
1132 // stumbled across a hash value different from the one we are
1133 // looking for. Let's keep walking.
1134 continue;
1135
1136 ABG_ASSERT(gelf_getsym(elf_getdata(ht.sym_tab_section, 0),
1137 i, &symbol));
1138 sym_name_str = elf_strptr(elf_handle,
1139 ht.sym_tab_section_header.sh_link,
1140 symbol.st_name);
1141 if (sym_name_str
1142 && compare_symbol_name(sym_name_str, sym_name, demangle))
1143 {
1144 // So we found a symbol (in the symbol table) that equals
1145 // sym_name. Now lets try to get its version and record it.
1146 sym_type = stt_to_elf_symbol_type(GELF_ST_TYPE(symbol.st_info));
1147 sym_binding = stb_to_elf_symbol_binding(GELF_ST_BIND(symbol.st_info));
1148 sym_visibility =
1149 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(symbol.st_other));
1150
1151 if (get_version_for_symbol(elf_handle, i,
1152 /*get_def_version=*/true,
1153 ver))
1154 ABG_ASSERT(!ver.str().empty());
1155
1156 elf_symbol_sptr symbol_found =
1157 elf_symbol::create(env, i,
1158 symbol.st_size,
1159 sym_name_str,
1160 sym_type, sym_binding,
1161 symbol.st_shndx != SHN_UNDEF,
1162 symbol.st_shndx == SHN_COMMON,
1163 ver, sym_visibility);
1164 syms_found.push_back(symbol_found);
1165 found = true;
1166 }
1167
1168 if (stop_word & 1)
1169 // The last bit of the stop_word is 1. That means we need to
1170 // stop here. We reached the end of the chain of values
1171 // referenced by the hask bucket.
1172 break;
1173 }
1174 return found;
1175 }
1176
1177 /// Look into the symbol tables of the underlying elf file and find
1178 /// the symbol we are being asked.
1179 ///
1180 /// This function uses the elf hash table (be it the GNU hash table or
1181 /// the sysv hash table) for the symbol lookup.
1182 ///
1183 /// @param env the environment we are operating from.
1184 ///
1185 /// @param elf_handle the elf handle to use.
1186 ///
1187 /// @param ht_kind the kind of hash table to use. This is returned by
1188 /// the function function find_hash_table_section_index.
1189 ///
1190 /// @param ht_index the index (in the section headers table) of the
1191 /// hash table section to use.
1192 ///
1193 /// @param sym_tab_index the index (in section headers table) of the
1194 /// symbol table index to use with this hash table.
1195 ///
1196 /// @param symbol_name the name of the symbol to look for.
1197 ///
1198 /// @param demangle if true, demangle @p sym_name.
1199 ///
1200 /// @param syms_found the symbols that were actually found with the
1201 /// name @p symbol_name.
1202 ///
1203 /// @return true iff the function found the symbol from the elf hash
1204 /// table.
1205 static bool
lookup_symbol_from_elf_hash_tab(const environment * env,Elf * elf_handle,hash_table_kind ht_kind,size_t ht_index,size_t symtab_index,const string & symbol_name,bool demangle,vector<elf_symbol_sptr> & syms_found)1206 lookup_symbol_from_elf_hash_tab(const environment* env,
1207 Elf* elf_handle,
1208 hash_table_kind ht_kind,
1209 size_t ht_index,
1210 size_t symtab_index,
1211 const string& symbol_name,
1212 bool demangle,
1213 vector<elf_symbol_sptr>& syms_found)
1214 {
1215 if (elf_handle == 0 || symbol_name.empty())
1216 return false;
1217
1218 if (ht_kind == NO_HASH_TABLE_KIND)
1219 return false;
1220
1221 if (ht_kind == SYSV_HASH_TABLE_KIND)
1222 return lookup_symbol_from_sysv_hash_tab(env,
1223 elf_handle, symbol_name,
1224 ht_index,
1225 symtab_index,
1226 demangle,
1227 syms_found);
1228 else if (ht_kind == GNU_HASH_TABLE_KIND)
1229 return lookup_symbol_from_gnu_hash_tab(env,
1230 elf_handle, symbol_name,
1231 ht_index,
1232 symtab_index,
1233 demangle,
1234 syms_found);
1235 return false;
1236 }
1237
1238 /// Lookup a symbol from the symbol table directly.
1239 ///
1240 ///
1241 /// @param env the environment we are operating from.
1242 ///
1243 /// @param elf_handle the elf handle to use.
1244 ///
1245 /// @param sym_name the name of the symbol to look up.
1246 ///
1247 /// @param sym_tab_index the index (in the section headers table) of
1248 /// the symbol table section.
1249 ///
1250 /// @param demangle if true, demangle the names found in the symbol
1251 /// table before comparing them with @p sym_name.
1252 ///
1253 /// @param sym_name_found the actual name of the symbol found.
1254 ///
1255 /// @param sym_type the type of the symbol found.
1256 ///
1257 /// @param sym_binding the binding of the symbol found.
1258 ///
1259 /// @param sym_versions the versions of the symbol found.
1260 ///
1261 /// @return true iff the symbol was found.
1262 static bool
lookup_symbol_from_symtab(const environment * env,Elf * elf_handle,const string & sym_name,size_t sym_tab_index,bool demangle,vector<elf_symbol_sptr> & syms_found)1263 lookup_symbol_from_symtab(const environment* env,
1264 Elf* elf_handle,
1265 const string& sym_name,
1266 size_t sym_tab_index,
1267 bool demangle,
1268 vector<elf_symbol_sptr>& syms_found)
1269 {
1270 // TODO: read all of the symbol table, store it in memory in a data
1271 // structure that associates each symbol with its versions and in
1272 // which lookups of a given symbol is fast.
1273 Elf_Scn* sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
1274 ABG_ASSERT(sym_tab_section);
1275
1276 GElf_Shdr header_mem;
1277 GElf_Shdr * sym_tab_header = gelf_getshdr(sym_tab_section,
1278 &header_mem);
1279
1280 size_t symcount = sym_tab_header->sh_size / sym_tab_header->sh_entsize;
1281 Elf_Data* symtab = elf_getdata(sym_tab_section, NULL);
1282 GElf_Sym* sym;
1283 char* name_str = 0;
1284 elf_symbol::version ver;
1285 bool found = false;
1286
1287 for (size_t i = 0; i < symcount; ++i)
1288 {
1289 GElf_Sym sym_mem;
1290 sym = gelf_getsym(symtab, i, &sym_mem);
1291 name_str = elf_strptr(elf_handle,
1292 sym_tab_header->sh_link,
1293 sym->st_name);
1294
1295 if (name_str && compare_symbol_name(name_str, sym_name, demangle))
1296 {
1297 elf_symbol::type sym_type =
1298 stt_to_elf_symbol_type(GELF_ST_TYPE(sym->st_info));
1299 elf_symbol::binding sym_binding =
1300 stb_to_elf_symbol_binding(GELF_ST_BIND(sym->st_info));
1301 elf_symbol::visibility sym_visibility =
1302 stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(sym->st_other));
1303 bool sym_is_defined = sym->st_shndx != SHN_UNDEF;
1304 bool sym_is_common = sym->st_shndx == SHN_COMMON;
1305
1306 if (get_version_for_symbol(elf_handle, i,
1307 /*get_def_version=*/sym_is_defined,
1308 ver))
1309 ABG_ASSERT(!ver.str().empty());
1310 elf_symbol_sptr symbol_found =
1311 elf_symbol::create(env, i, sym->st_size,
1312 name_str, sym_type,
1313 sym_binding, sym_is_defined,
1314 sym_is_common, ver, sym_visibility);
1315 syms_found.push_back(symbol_found);
1316 found = true;
1317 }
1318 }
1319
1320 if (found)
1321 return true;
1322
1323 return false;
1324 }
1325
1326 /// Look into the symbol tables of the underlying elf file and see
1327 /// if we find a given symbol.
1328 ///
1329 /// @param env the environment we are operating from.
1330 ///
1331 /// @param symbol_name the name of the symbol to look for.
1332 ///
1333 /// @param demangle if true, try to demangle the symbol name found in
1334 /// the symbol table before comparing it to @p symbol_name.
1335 ///
1336 /// @param syms_found the list of symbols found, with the name @p
1337 /// symbol_name.
1338 ///
1339 /// @param sym_type this is set to the type of the symbol found. This
1340 /// shall b a standard elf.h value for symbol types, that is SHT_OBJECT,
1341 /// STT_FUNC, STT_IFUNC, etc ...
1342 ///
1343 /// Note that this parameter is set iff the function returns true.
1344 ///
1345 /// @param sym_binding this is set to the binding of the symbol found.
1346 /// This is a standard elf.h value of the symbol binding kind, that
1347 /// is, STB_LOCAL, STB_GLOBAL, or STB_WEAK.
1348 ///
1349 /// @param symbol_versions the versions of the symbol @p symbol_name,
1350 /// if it was found.
1351 ///
1352 /// @return true iff a symbol with the name @p symbol_name was found.
1353 static bool
lookup_symbol_from_elf(const environment * env,Elf * elf_handle,const string & symbol_name,bool demangle,vector<elf_symbol_sptr> & syms_found)1354 lookup_symbol_from_elf(const environment* env,
1355 Elf* elf_handle,
1356 const string& symbol_name,
1357 bool demangle,
1358 vector<elf_symbol_sptr>& syms_found)
1359 {
1360 size_t hash_table_index = 0, symbol_table_index = 0;
1361 hash_table_kind ht_kind = NO_HASH_TABLE_KIND;
1362
1363 if (!demangle)
1364 ht_kind = find_hash_table_section_index(elf_handle,
1365 hash_table_index,
1366 symbol_table_index);
1367
1368 if (ht_kind == NO_HASH_TABLE_KIND)
1369 {
1370 if (!find_symbol_table_section_index(elf_handle, symbol_table_index))
1371 return false;
1372
1373 return lookup_symbol_from_symtab(env,
1374 elf_handle,
1375 symbol_name,
1376 symbol_table_index,
1377 demangle,
1378 syms_found);
1379 }
1380
1381 return lookup_symbol_from_elf_hash_tab(env,
1382 elf_handle,
1383 ht_kind,
1384 hash_table_index,
1385 symbol_table_index,
1386 symbol_name,
1387 demangle,
1388 syms_found);
1389 }
1390
1391 /// Look into the symbol tables of the underlying elf file and see if
1392 /// we find a given public (global or weak) symbol of function type.
1393 ///
1394 /// @param env the environment we are operating from.
1395 ///
1396 /// @param elf_handle the elf handle to use for the query.
1397 ///
1398 /// @param symbol_name the function symbol to look for.
1399 ///
1400 /// @param func_syms the vector of public functions symbols found, if
1401 /// any.
1402 ///
1403 /// @return true iff the symbol was found.
1404 static bool
lookup_public_function_symbol_from_elf(const environment * env,Elf * elf_handle,const string & symbol_name,vector<elf_symbol_sptr> & func_syms)1405 lookup_public_function_symbol_from_elf(const environment* env,
1406 Elf* elf_handle,
1407 const string& symbol_name,
1408 vector<elf_symbol_sptr>& func_syms)
1409 {
1410 vector<elf_symbol_sptr> syms_found;
1411 bool found = false;
1412
1413 if (lookup_symbol_from_elf(env, elf_handle, symbol_name,
1414 /*demangle=*/false, syms_found))
1415 {
1416 for (vector<elf_symbol_sptr>::const_iterator i = syms_found.begin();
1417 i != syms_found.end();
1418 ++i)
1419 {
1420 elf_symbol::type type = (*i)->get_type();
1421 elf_symbol::binding binding = (*i)->get_binding();
1422
1423 if ((type == elf_symbol::FUNC_TYPE
1424 || type == elf_symbol::GNU_IFUNC_TYPE
1425 || type == elf_symbol::COMMON_TYPE)
1426 && (binding == elf_symbol::GLOBAL_BINDING
1427 || binding == elf_symbol::WEAK_BINDING))
1428 {
1429 func_syms.push_back(*i);
1430 found = true;
1431 }
1432 }
1433 }
1434
1435 return found;
1436 }
1437
1438 /// Get data tag information of an ELF file by looking up into its
1439 /// dynamic segment
1440 ///
1441 /// @param elf the elf handle to use for the query.
1442 ///
1443 /// @param dt_tag data tag to look for in dynamic segment
1444 /// @param dt_tag_data vector of found information for a given @p data_tag
1445 ///
1446 /// @return true iff data tag @p data_tag was found
1447
1448 bool
lookup_data_tag_from_dynamic_segment(Elf * elf,Elf64_Sxword data_tag,vector<string> & dt_tag_data)1449 lookup_data_tag_from_dynamic_segment(Elf* elf,
1450 Elf64_Sxword data_tag,
1451 vector<string>& dt_tag_data)
1452 {
1453 size_t num_prog_headers = 0;
1454 bool found = false;
1455 if (elf_getphdrnum(elf, &num_prog_headers) < 0)
1456 return found;
1457
1458 // Cycle through each program header.
1459 for (size_t i = 0; i < num_prog_headers; ++i)
1460 {
1461 GElf_Phdr phdr_mem;
1462 GElf_Phdr *phdr = gelf_getphdr(elf, i, &phdr_mem);
1463 if (phdr == NULL || phdr->p_type != PT_DYNAMIC)
1464 continue;
1465
1466 // Poke at the dynamic segment like a section, so that we can
1467 // get its section header information; also we'd like to read
1468 // the data of the segment by using elf_getdata() but that
1469 // function needs a Elf_Scn data structure to act on.
1470 // Elfutils doesn't really have any particular function to
1471 // access segment data, other than the functions used to
1472 // access section data.
1473 Elf_Scn *dynamic_section = gelf_offscn(elf, phdr->p_offset);
1474 GElf_Shdr shdr_mem;
1475 GElf_Shdr *dynamic_section_header = gelf_getshdr(dynamic_section,
1476 &shdr_mem);
1477 if (dynamic_section_header == NULL
1478 || dynamic_section_header->sh_type != SHT_DYNAMIC)
1479 continue;
1480
1481 // Get data of the dynamic segment (seen as a section).
1482 Elf_Data *data = elf_getdata(dynamic_section, NULL);
1483 if (data == NULL)
1484 continue;
1485
1486 // Get the index of the section headers string table.
1487 size_t string_table_index = 0;
1488 ABG_ASSERT (elf_getshdrstrndx(elf, &string_table_index) >= 0);
1489
1490 size_t dynamic_section_header_entry_size = gelf_fsize(elf,
1491 ELF_T_DYN, 1,
1492 EV_CURRENT);
1493
1494 GElf_Shdr link_mem;
1495 GElf_Shdr *link =
1496 gelf_getshdr(elf_getscn(elf,
1497 dynamic_section_header->sh_link),
1498 &link_mem);
1499 ABG_ASSERT(link != NULL);
1500
1501 size_t num_dynamic_section_entries =
1502 dynamic_section_header->sh_size / dynamic_section_header_entry_size;
1503
1504 // Now walk through all the DT_* data tags that are in the
1505 // segment/section
1506 for (size_t j = 0; j < num_dynamic_section_entries; ++j)
1507 {
1508 GElf_Dyn dynamic_section_mem;
1509 GElf_Dyn *dynamic_section = gelf_getdyn(data,
1510 j,
1511 &dynamic_section_mem);
1512 if (dynamic_section->d_tag == data_tag)
1513 {
1514 dt_tag_data.push_back(elf_strptr(elf,
1515 dynamic_section_header->sh_link,
1516 dynamic_section->d_un.d_val));
1517 found = true;
1518 }
1519 }
1520 }
1521 return found;
1522 }
1523
1524 /// Convert the type of ELF file into @ref elf_type.
1525 ///
1526 /// @param elf the elf handle to use for the query.
1527 ///
1528 /// @return the @ref elf_type for a given elf type.
1529 static elf_type
elf_file_type(Elf * elf)1530 elf_file_type(Elf* elf)
1531 {
1532 GElf_Ehdr ehdr_mem;
1533 GElf_Ehdr *header = gelf_getehdr (elf, &ehdr_mem);
1534 vector<string> dt_debug_data;
1535
1536 switch (header->e_type)
1537 {
1538 case ET_DYN:
1539 if (lookup_data_tag_from_dynamic_segment(elf, DT_DEBUG, dt_debug_data))
1540 return ELF_TYPE_PI_EXEC;
1541 else
1542 return ELF_TYPE_DSO;
1543 case ET_EXEC:
1544 return ELF_TYPE_EXEC;
1545 case ET_REL:
1546 return ELF_TYPE_RELOCATABLE;
1547 default:
1548 return ELF_TYPE_UNKNOWN;
1549 }
1550 }
1551
1552 // ---------------------------------------
1553 // <location expression evaluation types>
1554 // ---------------------------------------
1555
1556 /// An abstraction of a value representing the result of the
1557 /// evaluation of a dwarf expression. This is abstraction represents
1558 /// a partial view on the possible values because we are only
1559 /// interested in extracting the latest and longuest constant
1560 /// sub-expression of a given dwarf expression.
1561 class expr_result
1562 {
1563 bool is_const_;
1564 int64_t const_value_;
1565
1566 public:
expr_result()1567 expr_result()
1568 : is_const_(true),
1569 const_value_(0)
1570 {}
1571
expr_result(bool is_const)1572 expr_result(bool is_const)
1573 : is_const_(is_const),
1574 const_value_(0)
1575 {}
1576
expr_result(int64_t v)1577 explicit expr_result(int64_t v)
1578 :is_const_(true),
1579 const_value_(v)
1580 {}
1581
1582 /// @return true if the value is a constant. Otherwise, return
1583 /// false, meaning the value represents a quantity for which we need
1584 /// inferior (a running program) state to determine the value.
1585 bool
is_const() const1586 is_const() const
1587 {return is_const_;}
1588
1589
1590 /// @param f a flag saying if the value is set to a constant or not.
1591 void
is_const(bool f)1592 is_const(bool f)
1593 {is_const_ = f;}
1594
1595 /// Get the current constant value iff this represents a
1596 /// constant.
1597 ///
1598 /// @param value the out parameter. Is set to the constant value of
1599 /// the @ref expr_result. This is set iff the function return true.
1600 ///
1601 ///@return true if this has a constant value, false otherwise.
1602 bool
const_value(int64_t & value)1603 const_value(int64_t& value)
1604 {
1605 if (is_const())
1606 {
1607 value = const_value_;
1608 return true;
1609 }
1610 return false;
1611 }
1612
1613 /// Getter of the constant value of the current @ref expr_result.
1614 ///
1615 /// Note that the current @ref expr_result must be constant,
1616 /// otherwise the current process is aborted.
1617 ///
1618 /// @return the constant value of the current @ref expr_result.
1619 int64_t
const_value() const1620 const_value() const
1621 {
1622 ABG_ASSERT(is_const());
1623 return const_value_;
1624 }
1625
operator int64_t() const1626 operator int64_t() const
1627 {return const_value();}
1628
1629 expr_result&
operator =(const int64_t v)1630 operator=(const int64_t v)
1631 {
1632 const_value_ = v;
1633 return *this;
1634 }
1635
1636 bool
operator ==(const expr_result & o) const1637 operator==(const expr_result& o) const
1638 {return const_value_ == o.const_value_ && is_const_ == o.is_const_;}
1639
1640 bool
operator >=(const expr_result & o) const1641 operator>=(const expr_result& o) const
1642 {return const_value_ >= o.const_value_;}
1643
1644 bool
operator <=(const expr_result & o) const1645 operator<=(const expr_result& o) const
1646 {return const_value_ <= o.const_value_;}
1647
1648 bool
operator >(const expr_result & o) const1649 operator>(const expr_result& o) const
1650 {return const_value_ > o.const_value_;}
1651
1652 bool
operator <(const expr_result & o) const1653 operator<(const expr_result& o) const
1654 {return const_value_ < o.const_value_;}
1655
1656 expr_result
operator +(const expr_result & v) const1657 operator+(const expr_result& v) const
1658 {
1659 expr_result r(*this);
1660 r.const_value_ += v.const_value_;
1661 r.is_const_ = r.is_const_ && v.is_const_;
1662 return r;
1663 }
1664
1665 expr_result&
operator +=(int64_t v)1666 operator+=(int64_t v)
1667 {
1668 const_value_ += v;
1669 return *this;
1670 }
1671
1672 expr_result
operator -(const expr_result & v) const1673 operator-(const expr_result& v) const
1674 {
1675 expr_result r(*this);
1676 r.const_value_ -= v.const_value_;
1677 r.is_const_ = r.is_const_ && v.is_const_;
1678 return r;
1679 }
1680
1681 expr_result
operator %(const expr_result & v) const1682 operator%(const expr_result& v) const
1683 {
1684 expr_result r(*this);
1685 r.const_value_ %= v.const_value_;
1686 r.is_const_ = r.is_const_ && v.is_const();
1687 return r;
1688 }
1689
1690 expr_result
operator *(const expr_result & v) const1691 operator*(const expr_result& v) const
1692 {
1693 expr_result r(*this);
1694 r.const_value_ *= v.const_value_;
1695 r.is_const_ = r.is_const_ && v.is_const();
1696 return r;
1697 }
1698
1699 expr_result
operator |(const expr_result & v) const1700 operator|(const expr_result& v) const
1701 {
1702 expr_result r(*this);
1703 r.const_value_ |= v.const_value_;
1704 r.is_const_ = r.is_const_ && v.is_const_;
1705 return r;
1706 }
1707
1708 expr_result
operator ^(const expr_result & v) const1709 operator^(const expr_result& v) const
1710 {
1711 expr_result r(*this);
1712 r.const_value_ ^= v.const_value_;
1713 r.is_const_ = r.is_const_ && v.is_const_;
1714 return r;
1715 }
1716
1717 expr_result
operator >>(const expr_result & v) const1718 operator>>(const expr_result& v) const
1719 {
1720 expr_result r(*this);
1721 r.const_value_ = r.const_value_ >> v.const_value_;
1722 r.is_const_ = r.is_const_ && v.is_const_;
1723 return r;
1724 }
1725
1726 expr_result
operator <<(const expr_result & v) const1727 operator<<(const expr_result& v) const
1728 {
1729 expr_result r(*this);
1730 r.const_value_ = r.const_value_ << v.const_value_;
1731 r.is_const_ = r.is_const_ && v.is_const_;
1732 return r;
1733 }
1734
1735 expr_result
operator ~() const1736 operator~() const
1737 {
1738 expr_result r(*this);
1739 r.const_value_ = ~r.const_value_;
1740 return r;
1741 }
1742
1743 expr_result
neg() const1744 neg() const
1745 {
1746 expr_result r(*this);
1747 r.const_value_ = -r.const_value_;
1748 return r;
1749 }
1750
1751 expr_result
abs() const1752 abs() const
1753 {
1754 expr_result r = *this;
1755 r.const_value_ = std::abs(static_cast<long double>(r.const_value()));
1756 return r;
1757 }
1758
1759 expr_result
operator &(const expr_result & o)1760 operator&(const expr_result& o)
1761 {
1762 expr_result r(*this);
1763 r.const_value_ &= o.const_value_;
1764 r.is_const_ = r.is_const_ && o.is_const_;
1765 return r;
1766 }
1767
1768 expr_result
operator /(const expr_result & o)1769 operator/(const expr_result& o)
1770 {
1771 expr_result r(*this);
1772 r.is_const_ = r.is_const_ && o.is_const_;
1773 return r.const_value() / o.const_value();
1774 }
1775 };// class end expr_result;
1776
1777 /// A class that implements a stack of @ref expr_result, to be used in
1778 /// the engine evaluating DWARF expressions.
1779 class expr_result_stack_type
1780 {
1781 vector<expr_result> elems_;
1782
1783 public:
1784
expr_result_stack_type()1785 expr_result_stack_type()
1786 {elems_.reserve(4);}
1787
1788 expr_result&
operator [](unsigned i)1789 operator[](unsigned i)
1790 {
1791 unsigned s = elems_.size();
1792 ABG_ASSERT(s > i);
1793 return elems_[s - 1 -i];
1794 }
1795
1796 const expr_result&
operator [](unsigned i) const1797 operator[](unsigned i) const
1798 {return const_cast<expr_result_stack_type*>(this)->operator[](i);}
1799
1800 unsigned
size() const1801 size() const
1802 {return elems_.size();}
1803
1804 vector<expr_result>::reverse_iterator
begin()1805 begin()
1806 {return elems_.rbegin();}
1807
1808 const vector<expr_result>::reverse_iterator
begin() const1809 begin() const
1810 {return const_cast<expr_result_stack_type*>(this)->begin();}
1811
1812 vector<expr_result>::reverse_iterator
end()1813 end()
1814 {return elems_.rend();}
1815
1816 const vector<expr_result>::reverse_iterator
end() const1817 end() const
1818 {return const_cast<expr_result_stack_type*>(this)->end();}
1819
1820 expr_result&
front()1821 front()
1822 {return elems_.back();}
1823
1824 const expr_result&
front() const1825 front() const
1826 {return const_cast<expr_result_stack_type*>(this)->front();}
1827
1828 void
push_front(expr_result e)1829 push_front(expr_result e)
1830 {elems_.push_back(e);}
1831
1832 expr_result
pop_front()1833 pop_front()
1834 {
1835 expr_result r = front();
1836 elems_.pop_back();
1837 return r;
1838 }
1839
1840 void
erase(vector<expr_result>::reverse_iterator i)1841 erase(vector<expr_result>::reverse_iterator i)
1842 {elems_.erase(--i.base());}
1843
1844 void
clear()1845 clear()
1846 {elems_.clear();}
1847 }; // end class expr_result_stack_type
1848
1849 /// Abstraction of the evaluation context of a dwarf expression.
1850 struct dwarf_expr_eval_context
1851 {
1852 expr_result accum;
1853 expr_result_stack_type stack;
1854 // Is set to true if the result of the expression that got evaluated
1855 // is a TLS address.
1856 bool set_tls_addr;
1857
dwarf_expr_eval_contextabigail::dwarf_reader::dwarf_expr_eval_context1858 dwarf_expr_eval_context()
1859 : accum(/*is_const=*/false),
1860 set_tls_addr(false)
1861 {
1862 stack.push_front(expr_result(true));
1863 }
1864
1865 void
resetabigail::dwarf_reader::dwarf_expr_eval_context1866 reset()
1867 {
1868 stack.clear();
1869 stack.push_front(expr_result(true));
1870 accum = expr_result(false);
1871 set_tls_addr = false;
1872 }
1873
1874 /// Set a flag to to tell that the result of the expression that got
1875 /// evaluated is a TLS address.
1876 ///
1877 /// @param f true iff the result of the expression that got
1878 /// evaluated is a TLS address, false otherwise.
1879 void
set_tls_addressabigail::dwarf_reader::dwarf_expr_eval_context1880 set_tls_address(bool f)
1881 {set_tls_addr = f;}
1882
1883 /// Getter for the flag that tells if the result of the expression
1884 /// that got evaluated is a TLS address.
1885 ///
1886 /// @return true iff the result of the expression that got evaluated
1887 /// is a TLS address.
1888 bool
set_tls_addressabigail::dwarf_reader::dwarf_expr_eval_context1889 set_tls_address() const
1890 {return set_tls_addr;}
1891
1892 expr_result
popabigail::dwarf_reader::dwarf_expr_eval_context1893 pop()
1894 {
1895 expr_result r = stack.front();
1896 stack.pop_front();
1897 return r;
1898 }
1899
1900 void
pushabigail::dwarf_reader::dwarf_expr_eval_context1901 push(const expr_result& v)
1902 {stack.push_front(v);}
1903 };//end class dwarf_expr_eval_context
1904
1905 // ---------------------------------------
1906 // </location expression evaluation types>
1907 // ---------------------------------------
1908
1909 /// The context used to build ABI corpus from debug info in DWARF
1910 /// format.
1911 ///
1912 /// This context is to be created by create_read_context(). It's then
1913 /// passed to all the routines that read specific dwarf bits as they
1914 /// get some important data from it.
1915 ///
1916 /// When a new data member is added to this context, it must be
1917 /// initiliazed by the read_context::initiliaze() function. So please
1918 /// do not forget.
1919 class read_context
1920 {
1921 public:
1922 struct options_type
1923 {
1924 environment* env;
1925 bool load_in_linux_kernel_mode;
1926 bool load_all_types;
1927 bool show_stats;
1928 bool do_log;
1929
options_typeabigail::dwarf_reader::read_context::options_type1930 options_type()
1931 : env(),
1932 load_in_linux_kernel_mode(),
1933 load_all_types(),
1934 show_stats(),
1935 do_log()
1936 {}
1937 };// read_context::options_type
1938
1939 /// A set of containers that contains one container per kind of @ref
1940 /// die_source. This allows to associate DIEs to things, depending
1941 /// on the source of the DIE.
1942 template <typename ContainerType>
1943 class die_source_dependant_container_set
1944 {
1945 ContainerType primary_debug_info_container_;
1946 ContainerType alt_debug_info_container_;
1947 ContainerType type_unit_container_;
1948
1949 public:
1950
1951 /// Getter for the container associated to DIEs coming from a
1952 /// given @ref die_source.
1953 ///
1954 /// @param source the die_source for which we want the container.
1955 ///
1956 /// @return the container that associates DIEs coming from @p
1957 /// source to something.
1958 ContainerType&
get_container(die_source source)1959 get_container(die_source source)
1960 {
1961 ContainerType *result = 0;
1962 switch (source)
1963 {
1964 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
1965 result = &primary_debug_info_container_;
1966 break;
1967 case ALT_DEBUG_INFO_DIE_SOURCE:
1968 result = &alt_debug_info_container_;
1969 break;
1970 case TYPE_UNIT_DIE_SOURCE:
1971 result = &type_unit_container_;
1972 break;
1973 case NO_DEBUG_INFO_DIE_SOURCE:
1974 case NUMBER_OF_DIE_SOURCES:
1975 ABG_ASSERT_NOT_REACHED;
1976 }
1977 return *result;
1978 }
1979
1980 /// Getter for the container associated to DIEs coming from a
1981 /// given @ref die_source.
1982 ///
1983 /// @param source the die_source for which we want the container.
1984 ///
1985 /// @return the container that associates DIEs coming from @p
1986 /// source to something.
1987 const ContainerType&
get_container(die_source source) const1988 get_container(die_source source) const
1989 {
1990 return const_cast<die_source_dependant_container_set*>(this)->
1991 get_container(source);
1992 }
1993
1994 /// Getter for the container associated to DIEs coming from the
1995 /// same source as a given DIE.
1996 ///
1997 /// @param ctxt the read context to consider.
1998 ///
1999 /// @param die the DIE which should have the same source as the
2000 /// source of the container we want.
2001 ///
2002 /// @return the container that associates DIEs coming from the
2003 /// same source as @p die.
2004 ContainerType&
get_container(const read_context & ctxt,const Dwarf_Die * die)2005 get_container(const read_context& ctxt, const Dwarf_Die *die)
2006 {
2007 const die_source source = ctxt.get_die_source(die);
2008 return get_container(source);
2009 }
2010
2011 /// Getter for the container associated to DIEs coming from the
2012 /// same source as a given DIE.
2013 ///
2014 /// @param ctxt the read context to consider.
2015 ///
2016 /// @param die the DIE which should have the same source as the
2017 /// source of the container we want.
2018 ///
2019 /// @return the container that associates DIEs coming from the
2020 /// same source as @p die.
2021 const ContainerType&
get_container(const read_context & ctxt,const Dwarf_Die * die) const2022 get_container(const read_context& ctxt, const Dwarf_Die *die) const
2023 {
2024 return const_cast<die_source_dependant_container_set*>(this)->
2025 get_container(ctxt, die);
2026 }
2027
2028 /// Clear the container set.
2029 void
clear()2030 clear()
2031 {
2032 primary_debug_info_container_.clear();
2033 alt_debug_info_container_.clear();
2034 type_unit_container_.clear();
2035 }
2036 }; // end die_dependant_container_set
2037
2038 suppr::suppressions_type supprs_;
2039 unsigned short dwarf_version_;
2040 Dwfl_Callbacks offline_callbacks_;
2041 // The set of directories under which to look for debug info.
2042 vector<char**> debug_info_root_paths_;
2043 dwfl_sptr handle_;
2044 Dwarf* dwarf_;
2045 // The alternate debug info. Alternate debug info sections are a
2046 // DWARF extension as of DWARF4 and are described at
2047 // http://www.dwarfstd.org/ShowIssue.php?issue=120604.1. Below are
2048 // the file desctor used to access the alternate debug info
2049 // sections, and the representation of the DWARF debug info. Both
2050 // need to be freed after we are done using them, with fclose and
2051 // dwarf_end.
2052 int alt_fd_;
2053 Dwarf* alt_dwarf_;
2054 string alt_debug_info_path_;
2055 // The address range of the offline elf file we are looking at.
2056 Dwfl_Module* elf_module_;
2057 mutable Elf* elf_handle_;
2058 string elf_path_;
2059 mutable Elf_Scn* symtab_section_;
2060 Dwarf_Die* cur_tu_die_;
2061 mutable dwarf_expr_eval_context dwarf_expr_eval_context_;
2062 // A set of maps (one per kind of die source) that associates a decl
2063 // string representation with the DIEs (offsets) representing that
2064 // decl.
2065 mutable die_source_dependant_container_set<istring_dwarf_offsets_map_type>
2066 decl_die_repr_die_offsets_maps_;
2067 // A set of maps (one per kind of die source) that associates a type
2068 // string representation with the DIEs (offsets) representing that
2069 // type.
2070 mutable die_source_dependant_container_set<istring_dwarf_offsets_map_type>
2071 type_die_repr_die_offsets_maps_;
2072 mutable die_source_dependant_container_set<die_istring_map_type>
2073 die_qualified_name_maps_;
2074 mutable die_source_dependant_container_set<die_istring_map_type>
2075 die_pretty_repr_maps_;
2076 mutable die_source_dependant_container_set<die_istring_map_type>
2077 die_pretty_type_repr_maps_;
2078 // A set of maps (one per kind of die source) that associates the
2079 // offset of a decl die to its corresponding decl artifact.
2080 mutable die_source_dependant_container_set<die_artefact_map_type>
2081 decl_die_artefact_maps_;
2082 // A set of maps (one per kind of die source) that associates the
2083 // offset of a type die to its corresponding type artifact.
2084 mutable die_source_dependant_container_set<die_artefact_map_type>
2085 type_die_artefact_maps_;
2086 /// A set of vectors (one per kind of die source) that associates
2087 /// the offset of a type DIE to the offset of its canonical DIE.
2088 mutable die_source_dependant_container_set<offset_offset_map_type>
2089 canonical_type_die_offsets_;
2090 /// A set of vectors (one per kind of die source) that associates
2091 /// the offset of a decl DIE to the offset of its canonical DIE.
2092 mutable die_source_dependant_container_set<offset_offset_map_type>
2093 canonical_decl_die_offsets_;
2094 /// A map that associates a function type representations to
2095 /// function types, inside a translation unit.
2096 mutable istring_fn_type_map_type per_tu_repr_to_fn_type_maps_;
2097 mutable std::unordered_map<std::pair<Dwarf_Off, Dwarf_Off>,
2098 size_t,
2099 dwarf_offset_pair_hash> die_comparison_visits_;
2100
2101 die_class_or_union_map_type die_wip_classes_map_;
2102 die_class_or_union_map_type alternate_die_wip_classes_map_;
2103 die_class_or_union_map_type type_unit_die_wip_classes_map_;
2104 die_function_type_map_type die_wip_function_types_map_;
2105 die_function_type_map_type alternate_die_wip_function_types_map_;
2106 die_function_type_map_type type_unit_die_wip_function_types_map_;
2107 die_function_decl_map_type die_function_with_no_symbol_map_;
2108 vector<Dwarf_Off> types_to_canonicalize_;
2109 vector<Dwarf_Off> alt_types_to_canonicalize_;
2110 vector<Dwarf_Off> type_unit_types_to_canonicalize_;
2111 vector<type_base_sptr> extra_types_to_canonicalize_;
2112 string_classes_map decl_only_classes_map_;
2113 string_enums_map decl_only_enums_map_;
2114 die_tu_map_type die_tu_map_;
2115 corpus_group_sptr cur_corpus_group_;
2116 corpus_sptr cur_corpus_;
2117 translation_unit_sptr cur_tu_;
2118 scope_decl_sptr nil_scope_;
2119 scope_stack_type scope_stack_;
2120 offset_offset_map_type primary_die_parent_map_;
2121 // A map that associates each tu die to a vector of unit import
2122 // points, in the main debug info
2123 tu_die_imported_unit_points_map_type tu_die_imported_unit_points_map_;
2124 // A map that associates each tu die to a vector of unit import
2125 // points, in the alternate debug info
2126 tu_die_imported_unit_points_map_type alt_tu_die_imported_unit_points_map_;
2127 tu_die_imported_unit_points_map_type type_units_tu_die_imported_unit_points_map_;
2128 // A DIE -> parent map for DIEs coming from the alternate debug info
2129 // file.
2130 offset_offset_map_type alternate_die_parent_map_;
2131 offset_offset_map_type type_section_die_parent_map_;
2132 list<var_decl_sptr> var_decls_to_add_;
2133 vector<string> dt_needed_;
2134 string dt_soname_;
2135 string elf_architecture_;
2136 corpus::exported_decls_builder* exported_decls_builder_;
2137 options_type options_;
2138 bool drop_undefined_syms_;
2139 read_context();
2140
2141 private:
2142 mutable symtab_reader::symtab_sptr symtab_;
2143
2144 public:
2145
2146 /// Constructor of read_context.
2147 ///
2148 /// @param elf_path the path to the elf file the context is to be
2149 /// used for.
2150 ///
2151 /// @param debug_info_root_paths a vector of pointers to the path to
2152 /// the root directory under which the debug info is to be found for
2153 /// @p elf_path. Leave this empty if the debug info is not in a
2154 /// split file.
2155 ///
2156 /// @param environment the environment used by the current context.
2157 /// This environment contains resources needed by the reader and by
2158 /// the types and declarations that are to be created later. Note
2159 /// that ABI artifacts that are to be compared all need to be
2160 /// created within the same environment.
2161 ///
2162 /// Please also note that the life time of this environment object
2163 /// must be greater than the life time of the resulting @ref
2164 /// read_context the context uses resources that are allocated in
2165 /// the environment.
2166 ///
2167 /// @param load_all_types if set to false only the types that are
2168 /// reachable from publicly exported declarations (of functions and
2169 /// variables) are read. If set to true then all types found in the
2170 /// debug information are loaded.
2171 ///
2172 /// @param linux_kernel_mode if set to true, then consider the special
2173 /// linux kernel symbol tables when determining if a symbol is
2174 /// exported or not.
read_context(const string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * environment,bool load_all_types,bool linux_kernel_mode)2175 read_context(const string& elf_path,
2176 const vector<char**>& debug_info_root_paths,
2177 ir::environment* environment,
2178 bool load_all_types,
2179 bool linux_kernel_mode)
2180 {
2181 initialize(elf_path, debug_info_root_paths, environment,
2182 load_all_types, linux_kernel_mode);
2183 }
2184
2185 /// Initializer of read_context.
2186 ///
2187 /// @param elf_path the path to the elf file the context is to be
2188 /// used for.
2189 ///
2190 /// @param debug_info_root_paths a vector of pointers to the path to
2191 /// the root directory under which the debug info is to be found for
2192 /// @p elf_path. Leave this empty if the debug info is not in a
2193 /// split file.
2194 ///
2195 /// @param environment the environment used by the current context.
2196 /// This environment contains resources needed by the reader and by
2197 /// the types and declarations that are to be created later. Note
2198 /// that ABI artifacts that are to be compared all need to be
2199 /// created within the same environment.
2200 ///
2201 /// Please also note that the life time of this environment object
2202 /// must be greater than the life time of the resulting @ref
2203 /// read_context the context uses resources that are allocated in
2204 /// the environment.
2205 ///
2206 /// @param load_all_types if set to false only the types that are
2207 /// reachable from publicly exported declarations (of functions and
2208 /// variables) are read. If set to true then all types found in the
2209 /// debug information are loaded.
2210 ///
2211 /// @param linux_kernel_mode if set to true, then consider the
2212 /// special linux kernel symbol tables when determining if a symbol
2213 /// is exported or not.
2214 void
initialize(const string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * environment,bool load_all_types,bool linux_kernel_mode)2215 initialize(const string& elf_path,
2216 const vector<char**>& debug_info_root_paths,
2217 ir::environment* environment,
2218 bool load_all_types,
2219 bool linux_kernel_mode)
2220 {
2221 dwarf_version_ = 0;
2222 dwarf_ = 0;
2223 handle_.reset();
2224 alt_fd_ = 0;
2225 alt_dwarf_ = 0;
2226 elf_module_ = 0;
2227 elf_handle_ = 0;
2228 elf_path_ = elf_path;
2229 symtab_section_ = 0;
2230 cur_tu_die_ = 0;
2231 exported_decls_builder_ = 0;
2232
2233 clear_alt_debug_info_data();
2234
2235 supprs_.clear();
2236 decl_die_repr_die_offsets_maps_.clear();
2237 type_die_repr_die_offsets_maps_.clear();
2238 die_qualified_name_maps_.clear();
2239 die_pretty_repr_maps_.clear();
2240 die_pretty_type_repr_maps_.clear();
2241 decl_die_artefact_maps_.clear();
2242 type_die_artefact_maps_.clear();
2243 canonical_type_die_offsets_.clear();
2244 canonical_decl_die_offsets_.clear();
2245 die_wip_classes_map_.clear();
2246 alternate_die_wip_classes_map_.clear();
2247 type_unit_die_wip_classes_map_.clear();
2248 die_wip_function_types_map_.clear();
2249 alternate_die_wip_function_types_map_.clear();
2250 type_unit_die_wip_function_types_map_.clear();
2251 die_function_with_no_symbol_map_.clear();
2252 types_to_canonicalize_.clear();
2253 alt_types_to_canonicalize_.clear();
2254 type_unit_types_to_canonicalize_.clear();
2255 extra_types_to_canonicalize_.clear();
2256 decl_only_classes_map_.clear();
2257 die_tu_map_.clear();
2258 cur_corpus_group_.reset();
2259 cur_corpus_.reset();
2260 cur_tu_.reset();
2261 primary_die_parent_map_.clear();
2262 tu_die_imported_unit_points_map_.clear();
2263 alt_tu_die_imported_unit_points_map_.clear();
2264 type_units_tu_die_imported_unit_points_map_.clear();
2265 alternate_die_parent_map_.clear();
2266 type_section_die_parent_map_.clear();
2267 var_decls_to_add_.clear();
2268 dt_needed_.clear();
2269 dt_soname_.clear();
2270 elf_architecture_.clear();
2271
2272 symtab_.reset();
2273
2274 clear_per_translation_unit_data();
2275
2276 memset(&offline_callbacks_, 0, sizeof(offline_callbacks_));
2277 create_default_dwfl(debug_info_root_paths);
2278 options_.env = environment;
2279 options_.load_in_linux_kernel_mode = linux_kernel_mode;
2280 options_.load_all_types = load_all_types;
2281 drop_undefined_syms_ = false;
2282 load_in_linux_kernel_mode(linux_kernel_mode);
2283 }
2284
2285 /// Clear the resources related to the alternate DWARF data.
2286 void
clear_alt_debug_info_data()2287 clear_alt_debug_info_data()
2288 {
2289 if (alt_fd_)
2290 {
2291 close(alt_fd_);
2292 alt_fd_ = 0;
2293 if (alt_dwarf_)
2294 {
2295 dwarf_end(alt_dwarf_);
2296 alt_dwarf_ = 0;
2297 }
2298 alt_debug_info_path_.clear();
2299 }
2300 }
2301
2302 /// Detructor of the @ref read_context type.
~read_context()2303 ~read_context()
2304 {
2305 clear_alt_debug_info_data();
2306 }
2307
2308 /// Clear the data that is relevant only for the current translation
2309 /// unit being read. The rest of the data is relevant for the
2310 /// entire ABI corpus.
2311 void
clear_per_translation_unit_data()2312 clear_per_translation_unit_data()
2313 {
2314 while (!scope_stack().empty())
2315 scope_stack().pop();
2316 var_decls_to_re_add_to_tree().clear();
2317 per_tu_repr_to_fn_type_maps().clear();
2318 }
2319
2320 /// Clear the data that is relevant for the current corpus being
2321 /// read.
2322 void
clear_per_corpus_data()2323 clear_per_corpus_data()
2324 {
2325 die_qualified_name_maps_.clear();
2326 die_pretty_repr_maps_.clear();
2327 die_pretty_type_repr_maps_.clear();
2328 clear_types_to_canonicalize();
2329 }
2330
2331 /// Getter for the current environment.
2332 ///
2333 /// @return the current environment.
2334 const ir::environment*
env() const2335 env() const
2336 {return options_.env;}
2337
2338 /// Getter for the current environment.
2339 ///
2340 /// @return the current environment.
2341 ir::environment*
env()2342 env()
2343 {return options_.env;}
2344
2345 /// Setter for the current environment.
2346 ///
2347 /// @param env the new current environment.
2348 void
env(ir::environment * env)2349 env(ir::environment* env)
2350 {options_.env = env;}
2351
2352 /// Getter for the flag that tells us if we are dropping functions
2353 /// and variables that have undefined symbols.
2354 ///
2355 /// @return true iff we are dropping functions and variables that have
2356 /// undefined symbols.
2357 bool
drop_undefined_syms() const2358 drop_undefined_syms() const
2359 {return drop_undefined_syms_;}
2360
2361 /// Setter for the flag that tells us if we are dropping functions
2362 /// and variables that have undefined symbols.
2363 ///
2364 /// @param f the new value of the flag.
2365 void
drop_undefined_syms(bool f)2366 drop_undefined_syms(bool f)
2367 {drop_undefined_syms_ = f;}
2368
2369 /// Getter of the suppression specifications to be used during
2370 /// ELF/DWARF parsing.
2371 ///
2372 /// @return the suppression specifications.
2373 const suppr::suppressions_type&
get_suppressions() const2374 get_suppressions() const
2375 {return supprs_;}
2376
2377 /// Getter of the suppression specifications to be used during
2378 /// ELF/DWARF parsing.
2379 ///
2380 /// @return the suppression specifications.
2381 suppr::suppressions_type&
get_suppressions()2382 get_suppressions()
2383 {return supprs_;}
2384
2385 /// Getter for the callbacks of the Dwarf Front End library of
2386 /// elfutils that is used by this reader to read dwarf.
2387 ///
2388 /// @return the callbacks.
2389 const Dwfl_Callbacks*
offline_callbacks() const2390 offline_callbacks() const
2391 {return &offline_callbacks_;}
2392
2393 /// Getter for the callbacks of the Dwarf Front End library of
2394 /// elfutils that is used by this reader to read dwarf.
2395 /// @returnthe callbacks
2396 Dwfl_Callbacks*
offline_callbacks()2397 offline_callbacks()
2398 {return &offline_callbacks_;}
2399
2400 /// Constructor for a default Dwfl handle that knows how to load debug
2401 /// info from a library or executable elf file.
2402 ///
2403 /// @param debug_info_root_paths a vector of pointers to the root
2404 /// path under which to look for the debug info of the elf files
2405 /// that are later handled by the Dwfl. This is for cases where the
2406 /// debug info is split into a different file from the binary we
2407 /// want to inspect. On Red Hat compatible systems, this root path
2408 /// is usually /usr/lib/debug by default. If this argument is set
2409 /// to the empty set, then "./debug" and /usr/lib/debug will be
2410 /// searched for sub-directories containing the debug info file.
2411 /// Note that for now, elfutils wants this path to be absolute
2412 /// otherwise things just don't work and the debug info is not
2413 /// found.
2414 ///
2415 /// @return the constructed Dwfl handle.
2416 void
create_default_dwfl(const vector<char ** > & debug_info_root_paths)2417 create_default_dwfl(const vector<char**>& debug_info_root_paths)
2418 {
2419 offline_callbacks()->find_debuginfo = dwfl_standard_find_debuginfo;
2420 offline_callbacks()->section_address = dwfl_offline_section_address;
2421 offline_callbacks()->debuginfo_path =
2422 debug_info_root_paths.empty() ? 0 : debug_info_root_paths.front();
2423 handle_.reset(dwfl_begin(offline_callbacks()),
2424 dwfl_deleter());
2425 debug_info_root_paths_ = debug_info_root_paths;
2426 }
2427
2428 unsigned short
dwarf_version() const2429 dwarf_version() const
2430 {return dwarf_version_;}
2431
2432 void
dwarf_version(unsigned short v)2433 dwarf_version(unsigned short v)
2434 {dwarf_version_ = v;}
2435
2436 /// Getter for a smart pointer to a handle on the dwarf front end
2437 /// library that we use to read dwarf.
2438 ///
2439 /// @return the dwfl handle.
2440 dwfl_sptr
dwfl_handle() const2441 dwfl_handle() const
2442 {return handle_;}
2443
2444 /// Setter for a smart pointer to a handle on the dwarf front end
2445 /// library that we use to read dwarf.
2446 ///
2447 /// @param h the new dwfl handle.
2448 void
dwfl_handle(dwfl_sptr & h)2449 dwfl_handle(dwfl_sptr& h)
2450 {handle_ = h;}
2451
2452 Dwfl_Module*
elf_module() const2453 elf_module() const
2454 {return elf_module_;}
2455
2456 /// Return the ELF descriptor for the binary we are analizing.
2457 ///
2458 /// @return a pointer to the Elf descriptor representing the binary
2459 /// we are analizing.
2460 Elf*
elf_handle() const2461 elf_handle() const
2462 {
2463 if (elf_handle_ == 0)
2464 {
2465 if (elf_module())
2466 {
2467 GElf_Addr bias = 0;
2468 elf_handle_ = dwfl_module_getelf(elf_module(), &bias);
2469 }
2470 }
2471 return elf_handle_;
2472 }
2473
2474 /// Return the ELF descriptor used for DWARF access.
2475 ///
2476 /// This can be the same as read_context::elf_handle() above, if the
2477 /// DWARF info is in the same ELF file as the one of the binary we
2478 /// are analizing. It is different if e.g, the debug info is split
2479 /// from the ELF file we are analizing.
2480 ///
2481 /// @return a pointer to the ELF descriptor used to access debug
2482 /// info.
2483 Elf*
dwarf_elf_handle() const2484 dwarf_elf_handle() const
2485 {return dwarf_getelf(dwarf());}
2486
2487 /// Test if the debug information is in a separate ELF file wrt the
2488 /// main ELF file of the program (application or shared library) we
2489 /// are analizing.
2490 ///
2491 /// @return true if the debug information is in a separate ELF file
2492 /// compared to the main ELF file of the program (application or
2493 /// shared library) that we are looking at.
2494 bool
dwarf_is_splitted() const2495 dwarf_is_splitted() const
2496 {return dwarf_elf_handle() != elf_handle();}
2497
2498 /// Add paths to the set of paths under which to look for split
2499 /// debuginfo files.
2500 ///
2501 /// @param debug_info_root_paths the paths to add.
2502 void
add_debug_info_root_paths(const vector<char ** > & debug_info_root_paths)2503 add_debug_info_root_paths(const vector<char **>& debug_info_root_paths)
2504 {
2505 debug_info_root_paths_.insert(debug_info_root_paths_.end(),
2506 debug_info_root_paths.begin(),
2507 debug_info_root_paths.end());
2508 }
2509
2510 /// Add a path to the set of paths under which to look for split
2511 /// debuginfo files.
2512 ///
2513 /// @param debug_info_root_path the path to add.
2514 void
add_debug_info_root_path(char ** debug_info_root_path)2515 add_debug_info_root_path(char** debug_info_root_path)
2516 {debug_info_root_paths_.push_back(debug_info_root_path);}
2517
2518 /// Find the alternate debuginfo file associated to a given elf file.
2519 ///
2520 /// @param elf_module represents the elf file to consider.
2521 ///
2522 /// @param alt_file_name the resulting path to the alternate
2523 /// debuginfo file found. This is set iff the function returns a
2524 /// non-nil value.
2525 Dwarf*
find_alt_debug_info(Dwfl_Module * elf_module,string & alt_file_name,int & alt_fd)2526 find_alt_debug_info(Dwfl_Module *elf_module,
2527 string& alt_file_name,
2528 int& alt_fd)
2529 {
2530 Dwarf *result = 0;
2531 result = dwarf_reader::find_alt_debug_info(elf_module,
2532 debug_info_root_paths_,
2533 alt_file_name, alt_fd);
2534 return result;
2535 }
2536
2537 /// Load the debug info associated with an elf file that is at a
2538 /// given path.
2539 ///
2540 /// @return a pointer to the DWARF debug info pointer upon
2541 /// successful debug info loading, NULL otherwise.
2542 Dwarf*
load_debug_info()2543 load_debug_info()
2544 {
2545 if (!dwfl_handle())
2546 return 0;
2547
2548 if (dwarf_)
2549 return dwarf_;
2550
2551 elf_module_ =
2552 dwfl_report_offline(dwfl_handle().get(),
2553 basename(const_cast<char*>(elf_path().c_str())),
2554 elf_path().c_str(),
2555 -1);
2556 dwfl_report_end(dwfl_handle().get(), 0, 0);
2557
2558 Dwarf_Addr bias = 0;
2559 dwarf_ = dwfl_module_getdwarf(elf_module_, &bias);
2560 // Look for split debuginfo files under multiple possible
2561 // debuginfo roots.
2562 for (vector<char**>::const_iterator i = debug_info_root_paths_.begin();
2563 dwarf_ == 0 && i != debug_info_root_paths_.end();
2564 ++i)
2565 {
2566 offline_callbacks()->debuginfo_path = *i;
2567 dwarf_ = dwfl_module_getdwarf(elf_module_, &bias);
2568 }
2569
2570 if (!alt_dwarf_)
2571 alt_dwarf_ = find_alt_debug_info(elf_module_,
2572 alt_debug_info_path_,
2573 alt_fd_);
2574
2575 return dwarf_;
2576 }
2577
2578 /// Return the main debug info we are looking at.
2579 ///
2580 /// @return the main debug info.
2581 Dwarf*
dwarf() const2582 dwarf() const
2583 {return dwarf_;}
2584
2585 /// Return the alternate debug info we are looking at.
2586 ///
2587 /// Note that "alternate debug info sections" is a GNU extension as
2588 /// of DWARF4 and is described at
2589 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
2590 ///
2591 /// @return the alternate debug info.
2592 Dwarf*
alt_dwarf() const2593 alt_dwarf() const
2594 {return alt_dwarf_;}
2595
2596 /// Return the correct debug info, depending on the DIE source we
2597 /// are looking at.
2598 ///
2599 /// @param source the DIE source to consider.
2600 ///
2601 /// @return the right debug info, depending on @p source.
2602 Dwarf*
dwarf_per_die_source(die_source source) const2603 dwarf_per_die_source(die_source source) const
2604 {
2605 Dwarf *result = 0;
2606 switch(source)
2607 {
2608 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
2609 case TYPE_UNIT_DIE_SOURCE:
2610 result = dwarf();
2611 break;
2612 case ALT_DEBUG_INFO_DIE_SOURCE:
2613 result = alt_dwarf();
2614 break;
2615 case NO_DEBUG_INFO_DIE_SOURCE:
2616 case NUMBER_OF_DIE_SOURCES:
2617 ABG_ASSERT_NOT_REACHED;
2618 }
2619 return result;
2620 }
2621
2622 /// Return the path to the alternate debug info as contained in the
2623 /// .gnu_debugaltlink section of the main elf file.
2624 ///
2625 /// Note that "alternate debug info sections" is a GNU extension as
2626 /// of DWARF4 and is described at
2627 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
2628 ///
2629 /// @return the path to the alternate debug info file, or an empty
2630 /// path if no alternate debug info file is associated.
2631 const string&
alt_debug_info_path() const2632 alt_debug_info_path() const
2633 {return alt_debug_info_path_;}
2634
2635 /// Return the path to the ELF path we are reading.
2636 ///
2637 /// @return the elf path.
2638 const string&
elf_path() const2639 elf_path() const
2640 {return elf_path_;}
2641
2642 const Dwarf_Die*
cur_tu_die() const2643 cur_tu_die() const
2644 {return cur_tu_die_;}
2645
2646 void
cur_tu_die(Dwarf_Die * cur_tu_die)2647 cur_tu_die(Dwarf_Die* cur_tu_die)
2648 {cur_tu_die_ = cur_tu_die;}
2649
2650 dwarf_expr_eval_context&
dwarf_expr_eval_ctxt() const2651 dwarf_expr_eval_ctxt() const
2652 {return dwarf_expr_eval_context_;}
2653
2654 /// Getter of the maps set that associates a representation of a
2655 /// decl DIE to a vector of offsets of DIEs having that representation.
2656 ///
2657 /// @return the maps set that associates a representation of a decl
2658 /// DIE to a vector of offsets of DIEs having that representation.
2659 const die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
decl_die_repr_die_offsets_maps() const2660 decl_die_repr_die_offsets_maps() const
2661 {return decl_die_repr_die_offsets_maps_;}
2662
2663 /// Getter of the maps set that associates a representation of a
2664 /// decl DIE to a vector of offsets of DIEs having that representation.
2665 ///
2666 /// @return the maps set that associates a representation of a decl
2667 /// DIE to a vector of offsets of DIEs having that representation.
2668 die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
decl_die_repr_die_offsets_maps()2669 decl_die_repr_die_offsets_maps()
2670 {return decl_die_repr_die_offsets_maps_;}
2671
2672 /// Getter of the maps set that associate a representation of a type
2673 /// DIE to a vector of offsets of DIEs having that representation.
2674 ///
2675 /// @return the maps set that associate a representation of a type
2676 /// DIE to a vector of offsets of DIEs having that representation.
2677 const die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
type_die_repr_die_offsets_maps() const2678 type_die_repr_die_offsets_maps() const
2679 {return type_die_repr_die_offsets_maps_;}
2680
2681 /// Getter of the maps set that associate a representation of a type
2682 /// DIE to a vector of offsets of DIEs having that representation.
2683 ///
2684 /// @return the maps set that associate a representation of a type
2685 /// DIE to a vector of offsets of DIEs having that representation.
2686 die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
type_die_repr_die_offsets_maps()2687 type_die_repr_die_offsets_maps()
2688 {return type_die_repr_die_offsets_maps_;}
2689
2690
2691 /// Compute the offset of the canonical DIE of a given DIE.
2692 ///
2693 /// @param die the DIE to consider.
2694 ///
2695 /// @param canonical_die_offset out parameter. This is set to the
2696 /// resulting canonical DIE that was computed.
2697 ///
2698 /// @param die_as_type if yes, it means @p die has to be considered
2699 /// as a type.
2700 void
compute_canonical_die_offset(const Dwarf_Die * die,Dwarf_Off & canonical_die_offset,bool die_as_type) const2701 compute_canonical_die_offset(const Dwarf_Die *die,
2702 Dwarf_Off &canonical_die_offset,
2703 bool die_as_type) const
2704 {
2705 offset_offset_map_type &canonical_dies =
2706 die_as_type
2707 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
2708 get_container(*this, die)
2709 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
2710 get_container(*this, die);
2711
2712 Dwarf_Die canonical_die;
2713 compute_canonical_die(die, canonical_dies, canonical_die, die_as_type);
2714
2715 canonical_die_offset = dwarf_dieoffset(&canonical_die);
2716 }
2717
2718 /// Compute (find) the canonical DIE of a given DIE.
2719 ///
2720 /// @param die the DIE to consider.
2721 ///
2722 /// @param canonical_dies the vector in which the canonical dies ar
2723 /// stored. The index of each element is the offset of the DIE we
2724 /// want the canonical DIE for. And the value of the element at
2725 /// that index is the canonical DIE offset we are looking for.
2726 ///
2727 /// @param canonical_die_offset out parameter. This is set to the
2728 /// resulting canonical DIE that was computed.
2729 ///
2730 /// @param die_as_type if yes, it means @p die has to be considered
2731 /// as a type.
2732 void
compute_canonical_die(const Dwarf_Die * die,offset_offset_map_type & canonical_dies,Dwarf_Die & canonical_die,bool die_as_type) const2733 compute_canonical_die(const Dwarf_Die *die,
2734 offset_offset_map_type& canonical_dies,
2735 Dwarf_Die &canonical_die,
2736 bool die_as_type) const
2737 {
2738 const die_source source = get_die_source(die);
2739
2740 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
2741
2742 compute_canonical_die(die_offset, source,
2743 canonical_dies,
2744 canonical_die, die_as_type);
2745 }
2746
2747 /// Compute (find) the canonical DIE of a given DIE.
2748 ///
2749 /// @param die_offset the offset of the DIE to consider.
2750 ///
2751 /// @param source the source of the DIE to consider.
2752 ///
2753 /// @param canonical_dies the vector in which the canonical dies ar
2754 /// stored. The index of each element is the offset of the DIE we
2755 /// want the canonical DIE for. And the value of the element at
2756 /// that index is the canonical DIE offset we are looking for.
2757 ///
2758 /// @param canonical_die_offset out parameter. This is set to the
2759 /// resulting canonical DIE that was computed.
2760 ///
2761 /// @param die_as_type if yes, it means @p die has to be considered
2762 /// as a type.
2763 void
compute_canonical_die(Dwarf_Off die_offset,die_source source,offset_offset_map_type & canonical_dies,Dwarf_Die & canonical_die,bool die_as_type) const2764 compute_canonical_die(Dwarf_Off die_offset,
2765 die_source source,
2766 offset_offset_map_type& canonical_dies,
2767 Dwarf_Die &canonical_die,
2768 bool die_as_type) const
2769 {
2770 // The map that associates the string representation of 'die'
2771 // with a vector of offsets of potentially equivalent DIEs.
2772 istring_dwarf_offsets_map_type& map =
2773 die_as_type
2774 ? (const_cast<read_context*>(this)->
2775 type_die_repr_die_offsets_maps().get_container(source))
2776 : (const_cast<read_context*>(this)->
2777 decl_die_repr_die_offsets_maps().get_container(source));
2778
2779 Dwarf_Die die;
2780 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), die_offset, &die));
2781
2782 // The variable repr is the the string representation of 'die'.
2783 //
2784 // Even if die_as_type is true -- which means that 'die' is said
2785 // to be considered as a type -- we always consider a
2786 // DW_TAG_subprogram DIE as a decl here, as far as its string
2787 // representation is concerned.
2788 interned_string name =
2789 (die_as_type)
2790 ? get_die_pretty_type_representation(&die, /*where=*/0)
2791 : get_die_pretty_representation(&die, /*where=*/0);
2792
2793 Dwarf_Off canonical_die_offset = 0;
2794 istring_dwarf_offsets_map_type::iterator i = map.find(name);
2795 if (i == map.end())
2796 {
2797 dwarf_offsets_type offsets;
2798 offsets.push_back(die_offset);
2799 map[name] = offsets;
2800 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
2801 get_die_from_offset(source, die_offset, &canonical_die);
2802 return;
2803 }
2804
2805 if (odr_is_relevant(&die))
2806 {
2807 // ODR is relevant for this DIE. In this case, all types with
2808 // the same name are considered equivalent. So the array
2809 // i->second shoud only have on element. If not, then
2810 // the DIEs referenced in the array should all compare equal.
2811 // Otherwise, this is an ODR violation. In any case, return
2812 // the first element of the array.
2813 // ABG_ASSERT(i->second.size() == 1);
2814 canonical_die_offset = i->second.front();
2815 get_die_from_offset(source, canonical_die_offset, &canonical_die);
2816 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
2817 return;
2818 }
2819
2820 Dwarf_Off cur_die_offset;
2821 Dwarf_Die potential_canonical_die;
2822 for (dwarf_offsets_type::const_iterator o = i->second.begin();
2823 o != i->second.end();
2824 ++o)
2825 {
2826 cur_die_offset = *o;
2827 get_die_from_offset(source, cur_die_offset, &potential_canonical_die);
2828 if (compare_dies(*this, &die, &potential_canonical_die,
2829 /*update_canonical_dies_on_the_fly=*/false))
2830 {
2831 canonical_die_offset = cur_die_offset;
2832 set_canonical_die_offset(canonical_dies, die_offset,
2833 canonical_die_offset);
2834 get_die_from_offset(source, canonical_die_offset, &canonical_die);
2835 return;
2836 }
2837 }
2838
2839 canonical_die_offset = die_offset;
2840 i->second.push_back(die_offset);
2841 set_canonical_die_offset(canonical_dies, die_offset, die_offset);
2842 get_die_from_offset(source, canonical_die_offset, &canonical_die);
2843 }
2844
2845 /// Getter of the canonical DIE of a given DIE.
2846 ///
2847 /// @param die the DIE to consider.
2848 ///
2849 /// @param canonical_die output parameter. Is set to the resuling
2850 /// canonical die, if this function returns true.
2851 ///
2852 /// @param where the offset of the logical DIE we are supposed to be
2853 /// calling this function from. If set to zero this means this is
2854 /// to be ignored.
2855 ///
2856 /// @param die_as_type if set to yes, it means @p die is to be
2857 /// considered as a type DIE.
2858 ///
2859 /// @return true iff a canonical DIE was found for @p die.
2860 bool
get_canonical_die(const Dwarf_Die * die,Dwarf_Die & canonical_die,size_t where,bool die_as_type) const2861 get_canonical_die(const Dwarf_Die *die,
2862 Dwarf_Die &canonical_die,
2863 size_t where,
2864 bool die_as_type) const
2865 {
2866 const die_source source = get_die_source(die);
2867
2868 offset_offset_map_type &canonical_dies =
2869 die_as_type
2870 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
2871 get_container(source)
2872 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
2873 get_container(source);
2874
2875 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
2876 if (Dwarf_Off canonical_die_offset =
2877 get_canonical_die_offset(canonical_dies, die_offset))
2878 {
2879 get_die_from_offset(source, canonical_die_offset, &canonical_die);
2880 return true;
2881 }
2882
2883 // The map that associates the string representation of 'die'
2884 // with a vector of offsets of potentially equivalent DIEs.
2885 istring_dwarf_offsets_map_type& map =
2886 die_as_type
2887 ? (const_cast<read_context*>(this)->
2888 type_die_repr_die_offsets_maps().get_container(*this, die))
2889 : (const_cast<read_context*>(this)->
2890 decl_die_repr_die_offsets_maps().get_container(*this, die));
2891
2892 // The variable repr is the the string representation of 'die'.
2893 //
2894 // Even if die_as_type is true -- which means that 'die' is said
2895 // to be considered as a type -- we always consider a
2896 // DW_TAG_subprogram DIE as a decl here, as far as its string
2897 // representation is concerned.
2898 interned_string name =
2899 (die_as_type /*&& dwarf_tag(die) != DW_TAG_subprogram*/)
2900 ? get_die_pretty_type_representation(die, where)
2901 : get_die_pretty_representation(die, where);
2902
2903 istring_dwarf_offsets_map_type::iterator i = map.find(name);
2904 if (i == map.end())
2905 return false;
2906
2907 if (odr_is_relevant(die))
2908 {
2909 // ODR is relevant for this DIE. In this case, all types with
2910 // the same name are considered equivalent. So the array
2911 // i->second shoud only have on element. If not, then
2912 // the DIEs referenced in the array should all compare equal.
2913 // Otherwise, this is an ODR violation. In any case, return
2914 // the first element of the array.
2915 // ABG_ASSERT(i->second.size() == 1);
2916 Dwarf_Off canonical_die_offset = i->second.front();
2917 get_die_from_offset(source, canonical_die_offset, &canonical_die);
2918 set_canonical_die_offset(canonical_dies,
2919 die_offset,
2920 canonical_die_offset);
2921 return true;
2922 }
2923
2924 Dwarf_Off cur_die_offset;
2925 for (dwarf_offsets_type::const_iterator o = i->second.begin();
2926 o != i->second.end();
2927 ++o)
2928 {
2929 cur_die_offset = *o;
2930 get_die_from_offset(source, cur_die_offset, &canonical_die);
2931 // compare die and canonical_die.
2932 if (compare_dies(*this, die, &canonical_die,
2933 /*update_canonical_dies_on_the_fly=*/true))
2934 {
2935 set_canonical_die_offset(canonical_dies,
2936 die_offset,
2937 cur_die_offset);
2938 return true;
2939 }
2940 }
2941
2942 return false;
2943 }
2944
2945 /// Retrieve the canonical DIE of a given DIE.
2946 ///
2947 /// The canonical DIE is a DIE that is structurally equivalent to
2948 /// this one.
2949 ///
2950 /// Note that this function caches the canonical DIE that was
2951 /// computed. Subsequent invocations of this function on the same
2952 /// DIE return the same cached DIE.
2953 ///
2954 /// @param die the DIE to get a canonical type for.
2955 ///
2956 /// @param canonical_die the resulting canonical DIE.
2957 ///
2958 /// @param where the offset of the logical DIE we are supposed to be
2959 /// calling this function from. If set to zero this means this is
2960 /// to be ignored.
2961 ///
2962 /// @param die_as_type if true, consider DIE is a type.
2963 ///
2964 /// @return true if an *existing* canonical DIE was found.
2965 /// Otherwise, @p die is considered as being a canonical DIE for
2966 /// itself. @p canonical_die is thus set to the canonical die in
2967 /// either cases.
2968 bool
get_or_compute_canonical_die(const Dwarf_Die * die,Dwarf_Die & canonical_die,size_t where,bool die_as_type) const2969 get_or_compute_canonical_die(const Dwarf_Die* die,
2970 Dwarf_Die& canonical_die,
2971 size_t where,
2972 bool die_as_type) const
2973 {
2974 const die_source source = get_die_source(die);
2975
2976 offset_offset_map_type &canonical_dies =
2977 die_as_type
2978 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
2979 get_container(source)
2980 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
2981 get_container(source);
2982
2983 Dwarf_Off initial_die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
2984
2985 if (Dwarf_Off canonical_die_offset =
2986 get_canonical_die_offset(canonical_dies,
2987 initial_die_offset))
2988 {
2989 get_die_from_offset(source, canonical_die_offset, &canonical_die);
2990 return true;
2991 }
2992
2993 // The map that associates the string representation of 'die'
2994 // with a vector of offsets of potentially equivalent DIEs.
2995 istring_dwarf_offsets_map_type& map =
2996 die_as_type
2997 ? (const_cast<read_context*>(this)->
2998 type_die_repr_die_offsets_maps().get_container(*this, die))
2999 : (const_cast<read_context*>(this)->
3000 decl_die_repr_die_offsets_maps().get_container(*this, die));
3001
3002 // The variable repr is the the string representation of 'die'.
3003 //
3004 // Even if die_as_type is true -- which means that 'die' is said
3005 // to be considered as a type -- we always consider a
3006 // DW_TAG_subprogram DIE as a decl here, as far as its string
3007 // representation is concerned.
3008 interned_string name =
3009 (die_as_type)
3010 ? get_die_pretty_type_representation(die, where)
3011 : get_die_pretty_representation(die, where);
3012
3013 istring_dwarf_offsets_map_type::iterator i = map.find(name);
3014 if (i == map.end())
3015 {
3016 dwarf_offsets_type offsets;
3017 offsets.push_back(initial_die_offset);
3018 map[name] = offsets;
3019 get_die_from_offset(source, initial_die_offset, &canonical_die);
3020 set_canonical_die_offset(canonical_dies,
3021 initial_die_offset,
3022 initial_die_offset);
3023 return false;
3024 }
3025
3026 if (odr_is_relevant(die))
3027 {
3028 // ODR is relevant for this DIE. In this case, all types with
3029 // the same name are considered equivalent. So the array
3030 // i->second shoud only have on element. If not, then
3031 // the DIEs referenced in the array should all compare equal.
3032 // Otherwise, this is an ODR violation. In any case, return
3033 // the first element of the array.
3034 // ABG_ASSERT(i->second.size() == 1);
3035 Dwarf_Off die_offset = i->second.front();
3036 get_die_from_offset(source, die_offset, &canonical_die);
3037 set_canonical_die_offset(canonical_dies,
3038 initial_die_offset,
3039 die_offset);
3040 return true;
3041 }
3042
3043 // walk i->second without any iterator (using a while loop rather
3044 // than a for loop) because compare_dies might add new content to
3045 // the end of the i->second vector during the walking.
3046 dwarf_offsets_type::size_type n = 0, s = i->second.size();
3047 while (n < s)
3048 {
3049 Dwarf_Off die_offset = i->second[n];
3050 get_die_from_offset(source, die_offset, &canonical_die);
3051 // compare die and canonical_die.
3052 if (compare_dies(*this, die, &canonical_die,
3053 /*update_canonical_dies_on_the_fly=*/true))
3054 {
3055 set_canonical_die_offset(canonical_dies,
3056 initial_die_offset,
3057 die_offset);
3058 return true;
3059 }
3060 ++n;
3061 }
3062
3063 // We didn't find a canonical DIE for 'die'. So let's consider
3064 // that it is its own canonical DIE.
3065 get_die_from_offset(source, initial_die_offset, &canonical_die);
3066 i->second.push_back(initial_die_offset);
3067 set_canonical_die_offset(canonical_dies,
3068 initial_die_offset,
3069 initial_die_offset);
3070
3071 return false;
3072 }
3073
3074 /// Get the source of the DIE.
3075 ///
3076 /// The function returns an enumerator value saying if the DIE comes
3077 /// from the .debug_info section of the primary debug info file, the
3078 /// .debug_info section of the alternate debug info file, or the
3079 /// .debug_types section.
3080 ///
3081 /// @param die the DIE to get the source of.
3082 ///
3083 /// @return the source of the DIE if it could be determined,
3084 /// NO_DEBUG_INFO_DIE_SOURCE otherwise.
3085 die_source
get_die_source(const Dwarf_Die * die) const3086 get_die_source(const Dwarf_Die *die) const
3087 {
3088 die_source source = NO_DEBUG_INFO_DIE_SOURCE;
3089 ABG_ASSERT(die);
3090 ABG_ASSERT(get_die_source(*die, source));
3091 return source;
3092 }
3093
3094 /// Get the source of the DIE.
3095 ///
3096 /// The function returns an enumerator value saying if the DIE comes
3097 /// from the .debug_info section of the primary debug info file, the
3098 /// .debug_info section of the alternate debug info file, or the
3099 /// .debug_types section.
3100 ///
3101 /// @param die the DIE to get the source of.
3102 ///
3103 /// @param source out parameter. The function sets this parameter
3104 /// to the source of the DIE @p iff it returns true.
3105 ///
3106 /// @return true iff the source of the DIE could be determined and
3107 /// returned.
3108 bool
get_die_source(const Dwarf_Die & die,die_source & source) const3109 get_die_source(const Dwarf_Die &die, die_source &source) const
3110 {
3111 Dwarf_Die cu_die;
3112 Dwarf_Die cu_kind;
3113 uint8_t address_size = 0, offset_size = 0;
3114 if (!dwarf_diecu(const_cast<Dwarf_Die*>(&die),
3115 &cu_die, &address_size,
3116 &offset_size))
3117 return false;
3118
3119 Dwarf_Half version = 0;
3120 Dwarf_Off abbrev_offset = 0;
3121 uint64_t type_signature = 0;
3122 Dwarf_Off type_offset = 0;
3123 if (!dwarf_cu_die(cu_die.cu, &cu_kind,
3124 &version, &abbrev_offset,
3125 &address_size, &offset_size,
3126 &type_signature, &type_offset))
3127 return false;
3128
3129 int tag = dwarf_tag(&cu_kind);
3130
3131 if (tag == DW_TAG_compile_unit
3132 || tag == DW_TAG_partial_unit)
3133 {
3134 Dwarf *die_dwarf = dwarf_cu_getdwarf(cu_die.cu);
3135 if (dwarf() == die_dwarf)
3136 source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
3137 else if (alt_dwarf() == die_dwarf)
3138 source = ALT_DEBUG_INFO_DIE_SOURCE;
3139 else
3140 ABG_ASSERT_NOT_REACHED;
3141 }
3142 else if (tag == DW_TAG_type_unit)
3143 source = TYPE_UNIT_DIE_SOURCE;
3144 else
3145 return false;
3146
3147 return true;
3148 }
3149
3150 /// Getter for the DIE designated by an offset.
3151 ///
3152 /// @param source the source of the DIE to get.
3153 ///
3154 /// @param offset the offset of the DIE to get.
3155 ///
3156 /// @param die the resulting DIE. The pointer has to point to an
3157 /// allocated memory region.
3158 void
get_die_from_offset(die_source source,Dwarf_Off offset,Dwarf_Die * die) const3159 get_die_from_offset(die_source source, Dwarf_Off offset, Dwarf_Die *die) const
3160 {
3161 if (source == TYPE_UNIT_DIE_SOURCE)
3162 ABG_ASSERT(dwarf_offdie_types(dwarf_per_die_source(source), offset, die));
3163 else
3164 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), offset, die));
3165 }
3166
3167 public:
3168
3169 /// Add an entry to the relevant die->decl map.
3170 ///
3171 /// @param die the DIE to add the the map.
3172 ///
3173 /// @param decl the decl to consider.
3174 ///
3175 /// @param where_offset where in the DIE stream we logically are.
3176 ///
3177 /// @param do_associate_by_repr if true then this function
3178 /// associates the representation string of @p die with the
3179 /// declaration @p decl, in a corpus-wide manner. That is, in the
3180 /// entire current corpus, there is going to be just one declaration
3181 /// associated with a DIE of the string representation of @p die.
3182 ///
3183 /// @param do_associate_by_repr_per_tu if true, then this function
3184 /// associates the representation string of @p die with the
3185 /// declaration @p decl in a translation unit wide manner. That is,
3186 /// in the entire current translation unit, there is going to be
3187 /// just one declaration associated with a DIE of the string
3188 /// representation of @p die.
3189 void
associate_die_to_decl(Dwarf_Die * die,decl_base_sptr decl,size_t where_offset,bool do_associate_by_repr=false)3190 associate_die_to_decl(Dwarf_Die* die,
3191 decl_base_sptr decl,
3192 size_t where_offset,
3193 bool do_associate_by_repr = false)
3194 {
3195 const die_source source = get_die_source(die);
3196
3197 die_artefact_map_type& m =
3198 decl_die_artefact_maps().get_container(source);
3199
3200 size_t die_offset;
3201 if (do_associate_by_repr)
3202 {
3203 Dwarf_Die equiv_die;
3204 get_or_compute_canonical_die(die, equiv_die, where_offset,
3205 /*die_as_type=*/false);
3206 die_offset = dwarf_dieoffset(&equiv_die);
3207 }
3208 else
3209 die_offset = dwarf_dieoffset(die);
3210
3211 m[die_offset] = decl;
3212 }
3213
3214 /// Lookup the decl for a given DIE.
3215 ///
3216 /// The returned decl is either the decl of the DIE that as the
3217 /// exact offset @p die_offset
3218 /// die_offset, or
3219 /// give
3220 ///
3221 /// @param die_offset the offset of the DIE to consider.
3222 ///
3223 /// @param source where the DIE represented by @p die_offset comes
3224 /// from.
3225 ///
3226 /// Note that "alternate debug info sections" is a GNU extension as
3227 /// of DWARF4 and is described at
3228 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
3229 ///
3230 /// @return the resulting decl, or null if no decl is associated to
3231 /// the DIE represented by @p die_offset.
3232 decl_base_sptr
lookup_decl_from_die_offset(Dwarf_Off die_offset,die_source source)3233 lookup_decl_from_die_offset(Dwarf_Off die_offset, die_source source)
3234 {
3235 decl_base_sptr result =
3236 is_decl(lookup_artifact_from_die_offset(die_offset, source,
3237 /*die_as_type=*/false));
3238
3239 return result;
3240 }
3241
3242 /// Get the qualified name of a given DIE.
3243 ///
3244 /// If the name of the DIE was already computed before just return
3245 /// that name from a cache. Otherwise, build the name, cache it and
3246 /// return it.
3247 ///
3248 /// @param die the DIE to consider.
3249 ///
3250 /// @param where_offset where in the DIE stream we logically are.
3251 ///
3252 /// @return the interned string representing the qualified name of
3253 /// @p die.
3254 interned_string
get_die_qualified_name(Dwarf_Die * die,size_t where_offset)3255 get_die_qualified_name(Dwarf_Die *die, size_t where_offset)
3256 {
3257 ABG_ASSERT(die);
3258 die_istring_map_type& map =
3259 die_qualified_name_maps_.get_container(*this, die);
3260
3261 size_t die_offset = dwarf_dieoffset(die);
3262 die_istring_map_type::const_iterator i = map.find(die_offset);
3263
3264 if (i == map.end())
3265 {
3266 read_context& ctxt = *const_cast<read_context*>(this);
3267 string qualified_name = die_qualified_name(ctxt, die, where_offset);
3268 interned_string istr = env()->intern(qualified_name);
3269 map[die_offset] = istr;
3270 return istr;
3271 }
3272
3273 return i->second;
3274 }
3275
3276 /// Get the qualified name of a given DIE.
3277 ///
3278 /// If the name of the DIE was already computed before just return
3279 /// that name from a cache. Otherwise, build the name, cache it and
3280 /// return it.
3281 ///
3282 /// @param die the DIE to consider.
3283 ///
3284 /// @param where_offset where in the DIE stream we logically are.
3285 ///
3286 /// @return the interned string representing the qualified name of
3287 /// @p die.
3288 interned_string
get_die_qualified_name(Dwarf_Die * die,size_t where_offset) const3289 get_die_qualified_name(Dwarf_Die *die, size_t where_offset) const
3290 {
3291 return const_cast<read_context*>(this)->
3292 get_die_qualified_name(die, where_offset);
3293 }
3294
3295 /// Get the qualified name of a given DIE which is considered to be
3296 /// the DIE for a type.
3297 ///
3298 /// For instance, for a DW_TAG_subprogram DIE, this function
3299 /// computes the name of the function *type* that corresponds to the
3300 /// function.
3301 ///
3302 /// If the name of the DIE was already computed before just return
3303 /// that name from a cache. Otherwise, build the name, cache it and
3304 /// return it.
3305 ///
3306 /// @param die the DIE to consider.
3307 ///
3308 /// @param where_offset where in the DIE stream we logically are.
3309 ///
3310 /// @return the interned string representing the qualified name of
3311 /// @p die.
3312 interned_string
get_die_qualified_type_name(const Dwarf_Die * die,size_t where_offset) const3313 get_die_qualified_type_name(const Dwarf_Die *die, size_t where_offset) const
3314 {
3315 ABG_ASSERT(die);
3316
3317 // The name of the translation unit die is "".
3318 if (die == cur_tu_die())
3319 return env()->intern("");
3320
3321 die_istring_map_type& map =
3322 die_qualified_name_maps_.get_container(*const_cast<read_context*>(this),
3323 die);
3324
3325 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3326 die_istring_map_type::const_iterator i =
3327 map.find(die_offset);
3328
3329 if (i == map.end())
3330 {
3331 read_context& ctxt = *const_cast<read_context*>(this);
3332 string qualified_name;
3333 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
3334 if ((tag == DW_TAG_structure_type
3335 || tag == DW_TAG_class_type
3336 || tag == DW_TAG_union_type)
3337 && die_is_anonymous(die))
3338 {
3339 location l = die_location(*this, die);
3340 qualified_name = l ? l.expand() : "noloc";
3341 qualified_name = "unnamed-at-" + qualified_name;
3342 }
3343 else
3344 qualified_name =
3345 die_qualified_type_name(ctxt, die, where_offset);
3346
3347 interned_string istr = env()->intern(qualified_name);
3348 map[die_offset] = istr;
3349 return istr;
3350 }
3351
3352 return i->second;
3353 }
3354
3355 /// Get the pretty representation of a DIE that represents a type.
3356 ///
3357 /// For instance, for the DW_TAG_subprogram, this function computes
3358 /// the pretty representation of the type of the function, not the
3359 /// pretty representation of the function declaration.
3360 ///
3361 /// Once the pretty representation is computed, it's stored in a
3362 /// cache. Subsequent invocations of this function on the same DIE
3363 /// will yield the cached name.
3364 ///
3365 /// @param die the DIE to consider.
3366 ///
3367 /// @param where_offset where in the DIE stream we logically are.
3368 ///
3369 /// @return the interned_string that represents the pretty
3370 /// representation.
3371 interned_string
get_die_pretty_type_representation(const Dwarf_Die * die,size_t where_offset) const3372 get_die_pretty_type_representation(const Dwarf_Die *die,
3373 size_t where_offset) const
3374 {
3375 ABG_ASSERT(die);
3376 die_istring_map_type& map =
3377 die_pretty_type_repr_maps_.get_container(*const_cast<read_context*>(this),
3378 die);
3379
3380 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3381 die_istring_map_type::const_iterator i = map.find(die_offset);
3382
3383 if (i == map.end())
3384 {
3385 read_context& ctxt = *const_cast<read_context*>(this);
3386 string pretty_representation =
3387 die_pretty_print_type(ctxt, die, where_offset);
3388 interned_string istr = env()->intern(pretty_representation);
3389 map[die_offset] = istr;
3390 return istr;
3391 }
3392
3393 return i->second;
3394 }
3395
3396 /// Get the pretty representation of a DIE.
3397 ///
3398 /// Once the pretty representation is computed, it's stored in a
3399 /// cache. Subsequent invocations of this function on the same DIE
3400 /// will yield the cached name.
3401 ///
3402 /// @param die the DIE to consider.
3403 ///
3404 /// @param where_offset where in the DIE stream we logically are.
3405 ///
3406 /// @return the interned_string that represents the pretty
3407 /// representation.
3408 interned_string
get_die_pretty_representation(const Dwarf_Die * die,size_t where_offset) const3409 get_die_pretty_representation(const Dwarf_Die *die, size_t where_offset) const
3410 {
3411 ABG_ASSERT(die);
3412
3413 die_istring_map_type& map =
3414 die_pretty_repr_maps_.get_container(*const_cast<read_context*>(this),
3415 die);
3416
3417 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3418 die_istring_map_type::const_iterator i = map.find(die_offset);
3419
3420 if (i == map.end())
3421 {
3422 read_context& ctxt = *const_cast<read_context*>(this);
3423 string pretty_representation =
3424 die_pretty_print(ctxt, die, where_offset);
3425 interned_string istr = env()->intern(pretty_representation);
3426 map[die_offset] = istr;
3427 return istr;
3428 }
3429
3430 return i->second;
3431 }
3432
3433 /// Lookup the artifact that was built to represent a type that has
3434 /// the same pretty representation as the type denoted by a given
3435 /// DIE.
3436 ///
3437 /// Note that the DIE must have previously been associated with the
3438 /// artifact using the functions associate_die_to_decl or
3439 /// associate_die_to_type.
3440 ///
3441 /// Also, note that the scope of the lookup is the current ABI
3442 /// corpus.
3443 ///
3444 /// @param die the DIE to consider.
3445 ///
3446 /// @param where_offset where in the DIE stream we logically are.
3447 ///
3448 /// @return the type artifact found.
3449 type_or_decl_base_sptr
lookup_type_artifact_from_die(Dwarf_Die * die) const3450 lookup_type_artifact_from_die(Dwarf_Die *die) const
3451 {
3452 type_or_decl_base_sptr artifact =
3453 lookup_artifact_from_die(die, /*type_as_die=*/true);
3454 if (function_decl_sptr fn = is_function_decl(artifact))
3455 return fn->get_type();
3456 return artifact;
3457 }
3458
3459 /// Lookup the artifact that was built to represent a type or a
3460 /// declaration that has the same pretty representation as the type
3461 /// denoted by a given DIE.
3462 ///
3463 /// Note that the DIE must have previously been associated with the
3464 /// artifact using the functions associate_die_to_decl or
3465 /// associate_die_to_type.
3466 ///
3467 /// Also, note that the scope of the lookup is the current ABI
3468 /// corpus.
3469 ///
3470 /// @param die the DIE to consider.
3471 ///
3472 /// @param where_offset where in the DIE stream we logically are.
3473 ///
3474 /// @param die_as_type if true, it means the DIE is to be considered
3475 /// as a type.
3476 ///
3477 /// @return the artifact found.
3478 type_or_decl_base_sptr
lookup_artifact_from_die(const Dwarf_Die * die,bool die_as_type=false) const3479 lookup_artifact_from_die(const Dwarf_Die *die, bool die_as_type = false) const
3480 {
3481 Dwarf_Die equiv_die;
3482 if (!get_or_compute_canonical_die(die, equiv_die, /*where=*/0, die_as_type))
3483 return type_or_decl_base_sptr();
3484
3485 const die_artefact_map_type& m =
3486 die_as_type
3487 ? type_die_artefact_maps().get_container(*this, &equiv_die)
3488 : decl_die_artefact_maps().get_container(*this, &equiv_die);
3489
3490 size_t die_offset = dwarf_dieoffset(&equiv_die);
3491 die_artefact_map_type::const_iterator i = m.find(die_offset);
3492
3493 if (i == m.end())
3494 return type_or_decl_base_sptr();
3495 return i->second;
3496 }
3497
3498 /// Lookup the artifact that was built to represent a type or a
3499 /// declaration that has the same pretty representation as the type
3500 /// denoted by the offset of a given DIE.
3501 ///
3502 /// Note that the DIE must have previously been associated with the
3503 /// artifact using either associate_die_to_decl or
3504 /// associate_die_to_type.
3505 ///
3506 /// Also, note that the scope of the lookup is the current ABI
3507 /// corpus.
3508 ///
3509 /// @param die the DIE to consider.
3510 ///
3511 /// @param where_offset where in the DIE stream we logically are.
3512 ///
3513 /// @param die_as_type if true, it means the DIE is to be considered
3514 /// as a type.
3515 ///
3516 /// @return the artifact found.
3517 type_or_decl_base_sptr
lookup_artifact_from_die_offset(Dwarf_Off die_offset,die_source source,bool die_as_type=false) const3518 lookup_artifact_from_die_offset(Dwarf_Off die_offset,
3519 die_source source,
3520 bool die_as_type = false) const
3521 {
3522 const die_artefact_map_type& m =
3523 die_as_type
3524 ? type_die_artefact_maps().get_container(source)
3525 : decl_die_artefact_maps().get_container(source);
3526
3527 die_artefact_map_type::const_iterator i = m.find(die_offset);
3528 if (i == m.end())
3529 return type_or_decl_base_sptr();
3530 return i->second;
3531 }
3532
3533 /// Get the language used to generate a given DIE.
3534 ///
3535 /// @param die the DIE to consider.
3536 ///
3537 /// @param lang the resulting language.
3538 ///
3539 /// @return true iff the language of the DIE was found.
3540 bool
get_die_language(const Dwarf_Die * die,translation_unit::language & lang) const3541 get_die_language(const Dwarf_Die *die, translation_unit::language &lang) const
3542 {
3543 Dwarf_Die cu_die;
3544 ABG_ASSERT(dwarf_diecu(const_cast<Dwarf_Die*>(die), &cu_die, 0, 0));
3545
3546 uint64_t l = 0;
3547 if (!die_unsigned_constant_attribute(&cu_die, DW_AT_language, l))
3548 return false;
3549
3550 lang = dwarf_language_to_tu_language(l);
3551 return true;
3552 }
3553
3554 /// Test if a given DIE originates from a program written in the C
3555 /// language.
3556 ///
3557 /// @param die the DIE to consider.
3558 ///
3559 /// @return true iff @p die originates from a program in the C
3560 /// language.
3561 bool
die_is_in_c(const Dwarf_Die * die) const3562 die_is_in_c(const Dwarf_Die *die) const
3563 {
3564 translation_unit::language l = translation_unit::LANG_UNKNOWN;
3565 if (!get_die_language(die, l))
3566 return false;
3567 return is_c_language(l);
3568 }
3569
3570 /// Test if a given DIE originates from a program written in the C++
3571 /// language.
3572 ///
3573 /// @param die the DIE to consider.
3574 ///
3575 /// @return true iff @p die originates from a program in the C++
3576 /// language.
3577 bool
die_is_in_cplus_plus(const Dwarf_Die * die) const3578 die_is_in_cplus_plus(const Dwarf_Die *die) const
3579 {
3580 translation_unit::language l = translation_unit::LANG_UNKNOWN;
3581 if (!get_die_language(die, l))
3582 return false;
3583 return is_cplus_plus_language(l);
3584 }
3585
3586 /// Test if a given DIE originates from a program written either in
3587 /// C or C++.
3588 ///
3589 /// @param die the DIE to consider.
3590 ///
3591 /// @return true iff @p die originates from a program written either in
3592 /// C or C++.
3593 bool
die_is_in_c_or_cplusplus(const Dwarf_Die * die) const3594 die_is_in_c_or_cplusplus(const Dwarf_Die *die) const
3595 {
3596 translation_unit::language l = translation_unit::LANG_UNKNOWN;
3597 if (!get_die_language(die, l))
3598 return false;
3599 return (is_cplus_plus_language(l) || is_c_language(l));
3600 }
3601
3602 /// Check if we can assume the One Definition Rule[1] to be relevant
3603 /// for the current translation unit.
3604 ///
3605 /// [1]: https://en.wikipedia.org/wiki/One_Definition_Rule
3606 ///
3607 /// At the moment this returns true if the current translation unit
3608 /// is in C++ language. In that case, it's relevant to assume that
3609 /// we use optimizations based on the ODR.
3610 bool
odr_is_relevant() const3611 odr_is_relevant() const
3612 {return odr_is_relevant(cur_transl_unit()->get_language());}
3613
3614 /// Check if we can assume the One Definition Rule[1] to be relevant
3615 /// for a given language.
3616 ///
3617 /// [1]: https://en.wikipedia.org/wiki/One_Definition_Rule
3618 ///
3619 /// At the moment this returns true if the language considered
3620 /// is C++, Java or Ada.
3621 bool
odr_is_relevant(translation_unit::language l) const3622 odr_is_relevant(translation_unit::language l) const
3623 {
3624 return (is_cplus_plus_language(l)
3625 || is_java_language(l)
3626 || is_ada_language(l));
3627 }
3628
3629 /// Check if we can assume the One Definition Rule to be relevant
3630 /// for a given DIE.
3631 ///
3632 /// @param die the DIE to consider.
3633 ///
3634 /// @return true if the ODR is relevant for @p die.
3635 bool
odr_is_relevant(Dwarf_Off die_offset,die_source source) const3636 odr_is_relevant(Dwarf_Off die_offset, die_source source) const
3637 {
3638 Dwarf_Die die;
3639 ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), die_offset, &die));
3640 return odr_is_relevant(&die);
3641 }
3642
3643 /// Check if we can assume the One Definition Rule to be relevant
3644 /// for a given DIE.
3645 ///
3646 /// @param die the DIE to consider.
3647 ///
3648 /// @return true if the ODR is relevant for @p die.
3649 bool
odr_is_relevant(const Dwarf_Die * die) const3650 odr_is_relevant(const Dwarf_Die *die) const
3651 {
3652 translation_unit::language lang;
3653 if (!get_die_language(die, lang))
3654 return odr_is_relevant();
3655
3656 return odr_is_relevant(lang);
3657 }
3658
3659 /// Getter for the maps set that associates a decl DIE offset to an
3660 /// artifact.
3661 ///
3662 /// @return the maps set that associates a decl DIE offset to an
3663 /// artifact.
3664 die_source_dependant_container_set<die_artefact_map_type>&
decl_die_artefact_maps()3665 decl_die_artefact_maps()
3666 {return decl_die_artefact_maps_;}
3667
3668 /// Getter for the maps set that associates a decl DIE offset to an
3669 /// artifact.
3670 ///
3671 /// @return the maps set that associates a decl DIE offset to an
3672 /// artifact.
3673 const die_source_dependant_container_set<die_artefact_map_type>&
decl_die_artefact_maps() const3674 decl_die_artefact_maps() const
3675 {return decl_die_artefact_maps_;}
3676
3677 /// Getter for the maps set that associates a type DIE offset to an
3678 /// artifact.
3679 ///
3680 /// @return the maps set that associates a type DIE offset to an
3681 /// artifact.
3682 die_source_dependant_container_set<die_artefact_map_type>&
type_die_artefact_maps()3683 type_die_artefact_maps()
3684 {return type_die_artefact_maps_;}
3685
3686 /// Getter for the maps set that associates a type DIE offset to an
3687 /// artifact.
3688 ///
3689 /// @return the maps set that associates a type DIE offset to an
3690 /// artifact.
3691 const die_source_dependant_container_set<die_artefact_map_type>&
type_die_artefact_maps() const3692 type_die_artefact_maps() const
3693 {return type_die_artefact_maps_;}
3694
3695 /// Getter of the maps that associates function type representations
3696 /// to function types, inside a translation unit.
3697 ///
3698 /// @return the maps that associates function type representations
3699 /// to function types, inside a translation unit.
3700 istring_fn_type_map_type&
per_tu_repr_to_fn_type_maps()3701 per_tu_repr_to_fn_type_maps()
3702 {return per_tu_repr_to_fn_type_maps_;}
3703
3704 /// Getter of the maps that associates function type representations
3705 /// to function types, inside a translation unit.
3706 ///
3707 /// @return the maps that associates function type representations
3708 /// to function types, inside a translation unit.
3709 const istring_fn_type_map_type&
per_tu_repr_to_fn_type_maps() const3710 per_tu_repr_to_fn_type_maps() const
3711 {return per_tu_repr_to_fn_type_maps_;}
3712
3713 /// Associate the representation of a function type DIE to a given
3714 /// function type, inside the current translation unit.
3715 ///
3716 /// @param die the DIE to associate to the function type, using its
3717 /// representation.
3718 ///
3719 /// @param fn_type the function type to associate to @p die.
3720 void
associate_die_repr_to_fn_type_per_tu(const Dwarf_Die * die,const function_type_sptr & fn_type)3721 associate_die_repr_to_fn_type_per_tu(const Dwarf_Die *die,
3722 const function_type_sptr &fn_type)
3723 {
3724 if (!die_is_function_type(die))
3725 return;
3726
3727 interned_string repr =
3728 get_die_pretty_type_representation(die, /*where=*/0);
3729 ABG_ASSERT(!repr.empty());
3730
3731 per_tu_repr_to_fn_type_maps()[repr]= fn_type;
3732 }
3733
3734 /// Lookup the function type associated to a given function type
3735 /// DIE, in the current translation unit.
3736 ///
3737 /// @param die the DIE of function type to consider.
3738 ///
3739 /// @return the @ref function_type_sptr associated to @p die, or nil
3740 /// of no function_type is associated to @p die.
3741 function_type_sptr
lookup_fn_type_from_die_repr_per_tu(const Dwarf_Die * die)3742 lookup_fn_type_from_die_repr_per_tu(const Dwarf_Die *die)
3743 {
3744 if (!die_is_function_type(die))
3745 return function_type_sptr();
3746
3747 interned_string repr =
3748 get_die_pretty_representation(die, /*where=*/0);
3749 ABG_ASSERT(!repr.empty());
3750
3751 istring_fn_type_map_type::const_iterator i =
3752 per_tu_repr_to_fn_type_maps().find(repr);
3753
3754 if (i == per_tu_repr_to_fn_type_maps().end())
3755 return function_type_sptr();
3756
3757 return i->second;
3758 }
3759
3760 /// Set the canonical DIE offset of a given DIE.
3761 ///
3762 /// @param canonical_dies the vector that holds canonical DIEs.
3763 ///
3764 /// @param die_offset the offset of the DIE to set the canonical DIE
3765 /// for.
3766 ///
3767 /// @param canonical_die_offset the canonical DIE offset to
3768 /// associate to @p die_offset.
3769 void
set_canonical_die_offset(offset_offset_map_type & canonical_dies,Dwarf_Off die_offset,Dwarf_Off canonical_die_offset) const3770 set_canonical_die_offset(offset_offset_map_type &canonical_dies,
3771 Dwarf_Off die_offset,
3772 Dwarf_Off canonical_die_offset) const
3773 {
3774 canonical_dies[die_offset] = canonical_die_offset;}
3775
3776 /// Set the canonical DIE offset of a given DIE.
3777 ///
3778 ///
3779 /// @param die_offset the offset of the DIE to set the canonical DIE
3780 /// for.
3781 ///
3782 /// @param source the source of the DIE denoted by @p die_offset.
3783 ///
3784 /// @param canonical_die_offset the canonical DIE offset to
3785 /// associate to @p die_offset.
3786 ///
3787 /// @param die_as_type if true, it means that @p die_offset has to
3788 /// be considered as a type.
3789 void
set_canonical_die_offset(Dwarf_Off die_offset,die_source source,Dwarf_Off canonical_die_offset,bool die_as_type) const3790 set_canonical_die_offset(Dwarf_Off die_offset,
3791 die_source source,
3792 Dwarf_Off canonical_die_offset,
3793 bool die_as_type) const
3794 {
3795 offset_offset_map_type &canonical_dies =
3796 die_as_type
3797 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
3798 get_container(source)
3799 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
3800 get_container(source);
3801
3802 set_canonical_die_offset(canonical_dies,
3803 die_offset,
3804 canonical_die_offset);
3805 }
3806
3807 /// Set the canonical DIE offset of a given DIE.
3808 ///
3809 ///
3810 /// @param die the DIE to set the canonical DIE for.
3811 ///
3812 /// @param canonical_die_offset the canonical DIE offset to
3813 /// associate to @p die_offset.
3814 ///
3815 /// @param die_as_type if true, it means that @p die has to be
3816 /// considered as a type.
3817 void
set_canonical_die_offset(const Dwarf_Die * die,Dwarf_Off canonical_die_offset,bool die_as_type) const3818 set_canonical_die_offset(const Dwarf_Die *die,
3819 Dwarf_Off canonical_die_offset,
3820 bool die_as_type) const
3821 {
3822 const die_source source = get_die_source(die);
3823
3824 Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3825
3826 set_canonical_die_offset(die_offset, source,
3827 canonical_die_offset,
3828 die_as_type);
3829 }
3830
3831 /// Get the canonical DIE offset of a given DIE.
3832 ///
3833 /// @param canonical_dies the vector that contains canonical DIES.
3834 ///
3835 /// @param die_offset the offset of the DIE to consider.
3836 ///
3837 /// @return the canonical of the DIE denoted by @p die_offset, or
3838 /// zero if no canonical DIE was found.
3839 Dwarf_Off
get_canonical_die_offset(offset_offset_map_type & canonical_dies,Dwarf_Off die_offset) const3840 get_canonical_die_offset(offset_offset_map_type &canonical_dies,
3841 Dwarf_Off die_offset) const
3842 {
3843 offset_offset_map_type::const_iterator it = canonical_dies.find(die_offset);
3844 if (it == canonical_dies.end())
3845 return 0;
3846 return it->second;
3847 }
3848
3849 /// Get the canonical DIE offset of a given DIE.
3850 ///
3851 /// @param die_offset the offset of the DIE to consider.
3852 ///
3853 /// @param source the source of the DIE denoted by @p die_offset.
3854 ///
3855 /// @param die_as_type if true, it means that @p is to be considered
3856 /// as a type DIE.
3857 ///
3858 /// @return the canonical of the DIE denoted by @p die_offset, or
3859 /// zero if no canonical DIE was found.
3860 Dwarf_Off
get_canonical_die_offset(Dwarf_Off die_offset,die_source source,bool die_as_type) const3861 get_canonical_die_offset(Dwarf_Off die_offset,
3862 die_source source,
3863 bool die_as_type) const
3864 {
3865 offset_offset_map_type &canonical_dies =
3866 die_as_type
3867 ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
3868 get_container(source)
3869 : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
3870 get_container(source);
3871
3872 return get_canonical_die_offset(canonical_dies, die_offset);
3873 }
3874
3875 /// Associate a DIE (representing a type) to the type that it
3876 /// represents.
3877 ///
3878 /// @param die the DIE to consider.
3879 ///
3880 /// @param type the type to associate the DIE to.
3881 ///
3882 /// @param where_offset where in the DIE stream we logically are.
3883 void
associate_die_to_type(const Dwarf_Die * die,type_base_sptr type,size_t where)3884 associate_die_to_type(const Dwarf_Die *die,
3885 type_base_sptr type,
3886 size_t where)
3887 {
3888 if (!type)
3889 return;
3890
3891 Dwarf_Die equiv_die;
3892 get_or_compute_canonical_die(die, equiv_die, where, /*die_as_type=*/true);
3893
3894 die_artefact_map_type& m =
3895 type_die_artefact_maps().get_container(*this, &equiv_die);
3896
3897 size_t die_offset = dwarf_dieoffset(&equiv_die);
3898 m[die_offset] = type;
3899 }
3900
3901 /// Lookup the type associated to a given DIE.
3902 ///
3903 /// Note that the DIE must have been associated to type by a
3904 /// previous invocation of the function
3905 /// read_context::associate_die_to_type().
3906 ///
3907 /// @param die the DIE to consider.
3908 ///
3909 /// @return the type associated to the DIE or NULL if no type is
3910 /// associated to the DIE.
3911 type_base_sptr
lookup_type_from_die(const Dwarf_Die * die) const3912 lookup_type_from_die(const Dwarf_Die* die) const
3913 {
3914 type_or_decl_base_sptr artifact =
3915 lookup_artifact_from_die(die, /*die_as_type=*/true);
3916 if (function_decl_sptr fn = is_function_decl(artifact))
3917 return fn->get_type();
3918 return is_type(artifact);
3919 }
3920
3921 /// Lookup the type associated to a DIE at a given offset, from a
3922 /// given source.
3923 ///
3924 /// Note that the DIE must have been associated to type by a
3925 /// previous invocation of the function
3926 /// read_context::associate_die_to_type().
3927 ///
3928 /// @param die_offset the offset of the DIE to consider.
3929 ///
3930 /// @param source the source of the DIE to consider.
3931 ///
3932 /// @return the type associated to the DIE or NULL if no type is
3933 /// associated to the DIE.
3934 type_base_sptr
lookup_type_from_die_offset(size_t die_offset,die_source source) const3935 lookup_type_from_die_offset(size_t die_offset, die_source source) const
3936 {
3937 type_base_sptr result;
3938 const die_artefact_map_type& m =
3939 type_die_artefact_maps().get_container(source);
3940 die_artefact_map_type::const_iterator i = m.find(die_offset);
3941 if (i != m.end())
3942 {
3943 if (function_decl_sptr fn = is_function_decl(i->second))
3944 return fn->get_type();
3945 result = is_type(i->second);
3946 }
3947
3948 if (!result)
3949 {
3950 // Maybe we are looking for a class type being constructed?
3951 const die_class_or_union_map_type& m = die_wip_classes_map(source);
3952 die_class_or_union_map_type::const_iterator i = m.find(die_offset);
3953
3954 if (i != m.end())
3955 result = i->second;
3956 }
3957
3958 if (!result)
3959 {
3960 // Maybe we are looking for a function type being constructed?
3961 const die_function_type_map_type& m =
3962 die_wip_function_types_map(source);
3963 die_function_type_map_type::const_iterator i = m.find(die_offset);
3964
3965 if (i != m.end())
3966 result = i->second;
3967 }
3968
3969 return result;
3970 }
3971
3972 /// Getter of a map that associates a die that represents a
3973 /// class/struct with the declaration of the class, while the class
3974 /// is being constructed.
3975 ///
3976 /// @param source where the DIE is from.
3977 ///
3978 /// @return the map that associates a DIE to the class that is being
3979 /// built.
3980 const die_class_or_union_map_type&
die_wip_classes_map(die_source source) const3981 die_wip_classes_map(die_source source) const
3982 {return const_cast<read_context*>(this)->die_wip_classes_map(source);}
3983
3984 /// Getter of a map that associates a die that represents a
3985 /// class/struct with the declaration of the class, while the class
3986 /// is being constructed.
3987 ///
3988 /// @param source where the DIE comes from.
3989 ///
3990 /// @return the map that associates a DIE to the class that is being
3991 /// built.
3992 die_class_or_union_map_type&
die_wip_classes_map(die_source source)3993 die_wip_classes_map(die_source source)
3994 {
3995 switch (source)
3996 {
3997 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
3998 break;
3999 case ALT_DEBUG_INFO_DIE_SOURCE:
4000 return alternate_die_wip_classes_map_;
4001 case TYPE_UNIT_DIE_SOURCE:
4002 return type_unit_die_wip_classes_map_;
4003 case NO_DEBUG_INFO_DIE_SOURCE:
4004 case NUMBER_OF_DIE_SOURCES:
4005 ABG_ASSERT_NOT_REACHED;
4006 }
4007 return die_wip_classes_map_;
4008 }
4009
4010 /// Getter for a map that associates a die (that represents a
4011 /// function type) whith a function type, while the function type is
4012 /// being constructed (WIP == work in progress).
4013 ///
4014 /// @param source where the DIE comes from.n
4015 ///
4016 /// @return the map of wip function types.
4017 const die_function_type_map_type&
die_wip_function_types_map(die_source source) const4018 die_wip_function_types_map(die_source source) const
4019 {return const_cast<read_context*>(this)->die_wip_function_types_map(source);}
4020
4021 /// Getter for a map that associates a die (that represents a
4022 /// function type) whith a function type, while the function type is
4023 /// being constructed (WIP == work in progress).
4024 ///
4025 /// @param source where DIEs of the map come from.
4026 ///
4027 /// @return the map of wip function types.
4028 die_function_type_map_type&
die_wip_function_types_map(die_source source)4029 die_wip_function_types_map(die_source source)
4030 {
4031 switch (source)
4032 {
4033 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
4034 break;
4035 case ALT_DEBUG_INFO_DIE_SOURCE:
4036 return alternate_die_wip_function_types_map_;
4037 case TYPE_UNIT_DIE_SOURCE:
4038 return type_unit_die_wip_function_types_map_;
4039 case NO_DEBUG_INFO_DIE_SOURCE:
4040 case NUMBER_OF_DIE_SOURCES:
4041 ABG_ASSERT_NOT_REACHED;
4042 }
4043 return die_wip_function_types_map_;
4044 }
4045
4046 /// Getter for a map that associates a die with a function decl
4047 /// which has a linkage name but no elf symbol yet.
4048 ///
4049 /// This is to fixup function decls with linkage names, but with no
4050 /// link to their underlying elf symbol. There are some DIEs like
4051 /// that in DWARF sometimes, especially when the compiler optimizes
4052 /// stuff aggressively.
4053 die_function_decl_map_type&
die_function_decl_with_no_symbol_map()4054 die_function_decl_with_no_symbol_map()
4055 {return die_function_with_no_symbol_map_;}
4056
4057 /// Return true iff a given offset is for the DIE of a class that is
4058 /// being built, but that is not fully built yet. WIP == "work in
4059 /// progress".
4060 ///
4061 /// @param offset the DIE offset to consider.
4062 ///
4063 /// @param source where the DIE of the map come from.
4064 ///
4065 /// @return true iff @p offset is the offset of the DIE of a class
4066 /// that is being currently built.
4067 bool
is_wip_class_die_offset(Dwarf_Off offset,die_source source) const4068 is_wip_class_die_offset(Dwarf_Off offset, die_source source) const
4069 {
4070 die_class_or_union_map_type::const_iterator i =
4071 die_wip_classes_map(source).find(offset);
4072 return (i != die_wip_classes_map(source).end());
4073 }
4074
4075 /// Return true iff a given offset is for the DIE of a function type
4076 /// that is being built at the moment, but is not fully built yet.
4077 /// WIP == work in progress.
4078 ///
4079 /// @param offset DIE offset to consider.
4080 ///
4081 /// @param source where the DIE comes from.
4082 ///
4083 /// @return true iff @p offset is the offset of the DIE of a
4084 /// function type that is being currently built.
4085 bool
is_wip_function_type_die_offset(Dwarf_Off offset,die_source source) const4086 is_wip_function_type_die_offset(Dwarf_Off offset, die_source source) const
4087 {
4088 die_function_type_map_type::const_iterator i =
4089 die_wip_function_types_map(source).find(offset);
4090 return (i != die_wip_function_types_map(source).end());
4091 }
4092
4093 /// Getter for the map of declaration-only classes that are to be
4094 /// resolved to their definition classes by the end of the corpus
4095 /// loading.
4096 ///
4097 /// @return a map of string -> vector of classes where the key is
4098 /// the fully qualified name of the class and the value is the
4099 /// vector of declaration-only class.
4100 const string_classes_map&
declaration_only_classes() const4101 declaration_only_classes() const
4102 {return decl_only_classes_map_;}
4103
4104 /// Getter for the map of declaration-only classes that are to be
4105 /// resolved to their definition classes by the end of the corpus
4106 /// loading.
4107 ///
4108 /// @return a map of string -> vector of classes where the key is
4109 /// the fully qualified name of the class and the value is the
4110 /// vector of declaration-only class.
4111 string_classes_map&
declaration_only_classes()4112 declaration_only_classes()
4113 {return decl_only_classes_map_;}
4114
4115 /// If a given class is a declaration-only class then stash it on
4116 /// the side so that at the end of the corpus reading we can resolve
4117 /// it to its definition.
4118 ///
4119 /// @param klass the class to consider.
4120 void
maybe_schedule_declaration_only_class_for_resolution(class_decl_sptr & klass)4121 maybe_schedule_declaration_only_class_for_resolution(class_decl_sptr& klass)
4122 {
4123 if (klass->get_is_declaration_only()
4124 && klass->get_definition_of_declaration() == 0)
4125 {
4126 string qn = klass->get_qualified_name();
4127 string_classes_map::iterator record =
4128 declaration_only_classes().find(qn);
4129 if (record == declaration_only_classes().end())
4130 declaration_only_classes()[qn].push_back(klass);
4131 else
4132 record->second.push_back(klass);
4133 }
4134 }
4135
4136 /// Test if a given declaration-only class has been scheduled for
4137 /// resolution to a defined class.
4138 ///
4139 /// @param klass the class to consider for the test.
4140 ///
4141 /// @return true iff @p klass is a declaration-only class and if
4142 /// it's been scheduled for resolution to a defined class.
4143 bool
is_decl_only_class_scheduled_for_resolution(class_decl_sptr & klass)4144 is_decl_only_class_scheduled_for_resolution(class_decl_sptr& klass)
4145 {
4146 if (klass->get_is_declaration_only())
4147 return (declaration_only_classes().find(klass->get_qualified_name())
4148 != declaration_only_classes().end());
4149
4150 return false;
4151 }
4152
4153 /// Compare two ABI artifacts in a context which canonicalization
4154 /// has not be done yet.
4155 ///
4156 /// @param l the left-hand-side operand of the comparison
4157 ///
4158 /// @param r the right-hand-side operand of the comparison.
4159 ///
4160 /// @return true if @p l equals @p r.
4161 bool
compare_before_canonicalisation(const type_or_decl_base_sptr & l,const type_or_decl_base_sptr & r)4162 compare_before_canonicalisation(const type_or_decl_base_sptr &l,
4163 const type_or_decl_base_sptr &r)
4164 {
4165 if (!l || !r)
4166 return !!l == !!r;
4167
4168 const environment* e = l->get_environment();
4169 ABG_ASSERT(!e->canonicalization_is_done());
4170
4171 bool s0 = e->decl_only_class_equals_definition();
4172 e->decl_only_class_equals_definition(true);
4173 bool equal = l == r;
4174 e->decl_only_class_equals_definition(s0);
4175 return equal;
4176 }
4177
4178 /// Walk the declaration-only classes that have been found during
4179 /// the building of the corpus and resolve them to their definitions.
4180 void
resolve_declaration_only_classes()4181 resolve_declaration_only_classes()
4182 {
4183 vector<string> resolved_classes;
4184
4185 for (string_classes_map::iterator i =
4186 declaration_only_classes().begin();
4187 i != declaration_only_classes().end();
4188 ++i)
4189 {
4190 bool to_resolve = false;
4191 for (classes_type::iterator j = i->second.begin();
4192 j != i->second.end();
4193 ++j)
4194 if ((*j)->get_is_declaration_only()
4195 && ((*j)->get_definition_of_declaration() == 0))
4196 to_resolve = true;
4197
4198 if (!to_resolve)
4199 {
4200 resolved_classes.push_back(i->first);
4201 continue;
4202 }
4203
4204 // Now, for each decl-only class that have the current name
4205 // 'i->first', let's try to poke at the fully defined class
4206 // that is defined in the same translation unit as the
4207 // declaration.
4208 //
4209 // If we find one class (defined in the TU of the declaration)
4210 // that defines the declaration, then the declaration can be
4211 // resolved to that class.
4212 //
4213 // If no defining class is found in the TU of the declaration,
4214 // then there are possibly three cases to consider:
4215 //
4216 // 1/ There is exactly one class that defines the
4217 // declaration and that class is defined in another TU. In
4218 // this case, the declaration is resolved to that
4219 // definition.
4220 //
4221 // 2/ There are more than one class that define that
4222 // declaration and none of them is defined in the TU of the
4223 // declaration. If those classes are all different, then
4224 // the declaration is left unresolved.
4225 //
4226 // 3/ No class defines the declaration. In this case, the
4227 // declaration is left unresoved.
4228
4229 // So get the classes that might define the current
4230 // declarations which name is i->first.
4231 const type_base_wptrs_type *classes =
4232 lookup_class_types(i->first, *current_corpus());
4233 if (!classes)
4234 continue;
4235
4236 // This is a map that associates the translation unit path to
4237 // the class (that potentially defines the declarations that
4238 // we consider) that are defined in that translation unit. It
4239 // should stay ordered by using the TU path as key to ensure
4240 // stability of the order of classe definitions in ABIXML
4241 // output.
4242 map<string, class_decl_sptr> per_tu_class_map;
4243 for (type_base_wptrs_type::const_iterator c = classes->begin();
4244 c != classes->end();
4245 ++c)
4246 {
4247 class_decl_sptr klass = is_class_type(type_base_sptr(*c));
4248 ABG_ASSERT(klass);
4249
4250 klass = is_class_type(look_through_decl_only_class(klass));
4251 if (klass->get_is_declaration_only())
4252 continue;
4253
4254 string tu_path = klass->get_translation_unit()->get_absolute_path();
4255 if (tu_path.empty())
4256 continue;
4257
4258 // Build a map that associates the translation unit path
4259 // to the class (that potentially defines the declarations
4260 // that we consider) that are defined in that translation unit.
4261 per_tu_class_map[tu_path] = klass;
4262 }
4263
4264 if (!per_tu_class_map.empty())
4265 {
4266 // Walk the declarations to resolve and resolve them
4267 // either to the definitions that are in the same TU as
4268 // the declaration, or to the definition found elsewhere,
4269 // if there is only one such definition.
4270 for (classes_type::iterator j = i->second.begin();
4271 j != i->second.end();
4272 ++j)
4273 {
4274 if ((*j)->get_is_declaration_only()
4275 && ((*j)->get_definition_of_declaration() == 0))
4276 {
4277 string tu_path =
4278 (*j)->get_translation_unit()->get_absolute_path();
4279 map<string, class_decl_sptr>::const_iterator e =
4280 per_tu_class_map.find(tu_path);
4281 if (e != per_tu_class_map.end())
4282 (*j)->set_definition_of_declaration(e->second);
4283 else if (per_tu_class_map.size() == 1)
4284 (*j)->set_definition_of_declaration
4285 (per_tu_class_map.begin()->second);
4286 else if (per_tu_class_map.size() > 1)
4287 {
4288 // We are in case where there are more than
4289 // one definition for the declaration. Let's
4290 // see if they are all equal. If they are,
4291 // then the declaration resolves to the
4292 // definition. Otherwise, we are in the case
4293 // 3/ described above.
4294 map<string,
4295 class_decl_sptr>::const_iterator it;
4296 class_decl_sptr first_class =
4297 per_tu_class_map.begin()->second;
4298 bool all_class_definitions_are_equal = true;
4299 for (it = per_tu_class_map.begin();
4300 it != per_tu_class_map.end();
4301 ++it)
4302 {
4303 if (it == per_tu_class_map.begin())
4304 continue;
4305 else
4306 {
4307 if (!compare_before_canonicalisation(it->second,
4308 first_class))
4309 {
4310 all_class_definitions_are_equal = false;
4311 break;
4312 }
4313 }
4314 }
4315 if (all_class_definitions_are_equal)
4316 (*j)->set_definition_of_declaration(first_class);
4317 }
4318 }
4319 }
4320 resolved_classes.push_back(i->first);
4321 }
4322 }
4323
4324 size_t num_decl_only_classes = declaration_only_classes().size(),
4325 num_resolved = resolved_classes.size();
4326 if (show_stats())
4327 cerr << "resolved " << num_resolved
4328 << " class declarations out of "
4329 << num_decl_only_classes
4330 << "\n";
4331
4332 for (vector<string>::const_iterator i = resolved_classes.begin();
4333 i != resolved_classes.end();
4334 ++i)
4335 declaration_only_classes().erase(*i);
4336
4337 for (string_classes_map::iterator i = declaration_only_classes().begin();
4338 i != declaration_only_classes().end();
4339 ++i)
4340 {
4341 if (show_stats())
4342 {
4343 if (i == declaration_only_classes().begin())
4344 cerr << "Here are the "
4345 << num_decl_only_classes - num_resolved
4346 << " unresolved class declarations:\n";
4347 else
4348 cerr << " " << i->first << "\n";
4349 }
4350 }
4351 }
4352
4353 /// Getter for the map of declaration-only enums that are to be
4354 /// resolved to their definition enums by the end of the corpus
4355 /// loading.
4356 ///
4357 /// @return a map of string -> vector of enums where the key is
4358 /// the fully qualified name of the enum and the value is the
4359 /// vector of declaration-only enum.
4360 const string_enums_map&
declaration_only_enums() const4361 declaration_only_enums() const
4362 {return decl_only_enums_map_;}
4363
4364 /// Getter for the map of declaration-only enums that are to be
4365 /// resolved to their definition enums by the end of the corpus
4366 /// loading.
4367 ///
4368 /// @return a map of string -> vector of enums where the key is
4369 /// the fully qualified name of the enum and the value is the
4370 /// vector of declaration-only enum.
4371 string_enums_map&
declaration_only_enums()4372 declaration_only_enums()
4373 {return decl_only_enums_map_;}
4374
4375 /// If a given enum is a declaration-only enum then stash it on
4376 /// the side so that at the end of the corpus reading we can resolve
4377 /// it to its definition.
4378 ///
4379 /// @param enom the enum to consider.
4380 void
maybe_schedule_declaration_only_enum_for_resolution(enum_type_decl_sptr & enom)4381 maybe_schedule_declaration_only_enum_for_resolution(enum_type_decl_sptr& enom)
4382 {
4383 if (enom->get_is_declaration_only()
4384 && enom->get_definition_of_declaration() == 0)
4385 {
4386 string qn = enom->get_qualified_name();
4387 string_enums_map::iterator record =
4388 declaration_only_enums().find(qn);
4389 if (record == declaration_only_enums().end())
4390 declaration_only_enums()[qn].push_back(enom);
4391 else
4392 record->second.push_back(enom);
4393 }
4394 }
4395
4396 /// Test if a given declaration-only enum has been scheduled for
4397 /// resolution to a defined enum.
4398 ///
4399 /// @param enom the enum to consider for the test.
4400 ///
4401 /// @return true iff @p enom is a declaration-only enum and if
4402 /// it's been scheduled for resolution to a defined enum.
4403 bool
is_decl_only_enum_scheduled_for_resolution(enum_type_decl_sptr & enom)4404 is_decl_only_enum_scheduled_for_resolution(enum_type_decl_sptr& enom)
4405 {
4406 if (enom->get_is_declaration_only())
4407 return (declaration_only_enums().find(enom->get_qualified_name())
4408 != declaration_only_enums().end());
4409
4410 return false;
4411 }
4412
4413 /// Walk the declaration-only enums that have been found during
4414 /// the building of the corpus and resolve them to their definitions.
4415 ///
4416 /// TODO: Do away with this function by factorizing it with
4417 /// resolve_declaration_only_classes. All declaration-only decls
4418 /// could be handled the same way as declaration-only-ness is a
4419 /// property of abigail::ir::decl_base now.
4420 void
resolve_declaration_only_enums()4421 resolve_declaration_only_enums()
4422 {
4423 vector<string> resolved_enums;
4424
4425 for (string_enums_map::iterator i =
4426 declaration_only_enums().begin();
4427 i != declaration_only_enums().end();
4428 ++i)
4429 {
4430 bool to_resolve = false;
4431 for (enums_type::iterator j = i->second.begin();
4432 j != i->second.end();
4433 ++j)
4434 if ((*j)->get_is_declaration_only()
4435 && ((*j)->get_definition_of_declaration() == 0))
4436 to_resolve = true;
4437
4438 if (!to_resolve)
4439 {
4440 resolved_enums.push_back(i->first);
4441 continue;
4442 }
4443
4444 // Now, for each decl-only enum that have the current name
4445 // 'i->first', let's try to poke at the fully defined enum
4446 // that is defined in the same translation unit as the
4447 // declaration.
4448 //
4449 // If we find one enum (defined in the TU of the declaration)
4450 // that defines the declaration, then the declaration can be
4451 // resolved to that enum.
4452 //
4453 // If no defining enum is found in the TU of the declaration,
4454 // then there are possibly three cases to consider:
4455 //
4456 // 1/ There is exactly one enum that defines the
4457 // declaration and that enum is defined in another TU. In
4458 // this case, the declaration is resolved to that
4459 // definition.
4460 //
4461 // 2/ There are more than one enum that define that
4462 // declaration and none of them is defined in the TU of the
4463 // declaration. In this case, the declaration is left
4464 // unresolved.
4465 //
4466 // 3/ No enum defines the declaration. In this case, the
4467 // declaration is left unresoved.
4468
4469 // So get the enums that might define the current
4470 // declarations which name is i->first.
4471 const type_base_wptrs_type *enums =
4472 lookup_enum_types(i->first, *current_corpus());
4473 if (!enums)
4474 continue;
4475
4476 unordered_map<string, enum_type_decl_sptr> per_tu_enum_map;
4477 for (type_base_wptrs_type::const_iterator c = enums->begin();
4478 c != enums->end();
4479 ++c)
4480 {
4481 enum_type_decl_sptr enom = is_enum_type(type_base_sptr(*c));
4482 ABG_ASSERT(enom);
4483
4484 enom = is_enum_type(look_through_decl_only_enum(enom));
4485 if (enom->get_is_declaration_only())
4486 continue;
4487
4488 string tu_path = enom->get_translation_unit()->get_absolute_path();
4489 if (tu_path.empty())
4490 continue;
4491
4492 // Build a map that associates the translation unit path
4493 // to the enum (that potentially defines the declarations
4494 // that we consider) that are defined in that translation unit.
4495 per_tu_enum_map[tu_path] = enom;
4496 }
4497
4498 if (!per_tu_enum_map.empty())
4499 {
4500 // Walk the declarations to resolve and resolve them
4501 // either to the definitions that are in the same TU as
4502 // the declaration, or to the definition found elsewhere,
4503 // if there is only one such definition.
4504 for (enums_type::iterator j = i->second.begin();
4505 j != i->second.end();
4506 ++j)
4507 {
4508 if ((*j)->get_is_declaration_only()
4509 && ((*j)->get_definition_of_declaration() == 0))
4510 {
4511 string tu_path =
4512 (*j)->get_translation_unit()->get_absolute_path();
4513 unordered_map<string, enum_type_decl_sptr>::const_iterator e =
4514 per_tu_enum_map.find(tu_path);
4515 if (e != per_tu_enum_map.end())
4516 (*j)->set_definition_of_declaration(e->second);
4517 else if (per_tu_enum_map.size() == 1)
4518 (*j)->set_definition_of_declaration
4519 (per_tu_enum_map.begin()->second);
4520 }
4521 }
4522 resolved_enums.push_back(i->first);
4523 }
4524 }
4525
4526 size_t num_decl_only_enums = declaration_only_enums().size(),
4527 num_resolved = resolved_enums.size();
4528 if (show_stats())
4529 cerr << "resolved " << num_resolved
4530 << " enum declarations out of "
4531 << num_decl_only_enums
4532 << "\n";
4533
4534 for (vector<string>::const_iterator i = resolved_enums.begin();
4535 i != resolved_enums.end();
4536 ++i)
4537 declaration_only_enums().erase(*i);
4538
4539 for (string_enums_map::iterator i = declaration_only_enums().begin();
4540 i != declaration_only_enums().end();
4541 ++i)
4542 {
4543 if (show_stats())
4544 {
4545 if (i == declaration_only_enums().begin())
4546 cerr << "Here are the "
4547 << num_decl_only_enums - num_resolved
4548 << " unresolved enum declarations:\n";
4549 else
4550 cerr << " " << i->first << "\n";
4551 }
4552 }
4553 }
4554
4555 /// Test if a symbol belongs to a function of the current ABI
4556 /// corpus.
4557 ///
4558 /// This is a sub-routine of fixup_functions_with_no_symbols.
4559 ///
4560 /// @param fn the function symbol to consider.
4561 ///
4562 /// @returnt true if @p fn belongs to a function of the current ABI
4563 /// corpus.
4564 bool
symbol_already_belongs_to_a_function(elf_symbol_sptr & fn)4565 symbol_already_belongs_to_a_function(elf_symbol_sptr& fn)
4566 {
4567 corpus_sptr corp = current_corpus();
4568 if (!corp)
4569 return false;
4570
4571 string id = fn->get_id_string();
4572
4573 const vector<function_decl*> *fns = corp->lookup_functions(id);
4574 if (!fns)
4575 return false;
4576
4577 for (vector<function_decl*>::const_iterator i = fns->begin();
4578 i != fns->end();
4579 ++i)
4580 {
4581 function_decl* f = *i;
4582 ABG_ASSERT(f);
4583 if (f->get_symbol())
4584 return true;
4585 }
4586 return false;
4587 }
4588
4589 /// Some functions described by DWARF may have their linkage name
4590 /// set, but no link to their actual underlying elf symbol. When
4591 /// these are virtual member functions, comparing the enclosing type
4592 /// against another one which has its underlying symbol properly set
4593 /// might lead to spurious type changes.
4594 ///
4595 /// If the corpus contains a symbol with the same name as the
4596 /// linkage name of the function, then set up the link between the
4597 /// function and its underlying symbol.
4598 ///
4599 /// Note that for the moment, only virtual member functions are
4600 /// fixed up like this. This is because they really are the only
4601 /// fuctions of functions that can affect types (in spurious ways).
4602 void
fixup_functions_with_no_symbols()4603 fixup_functions_with_no_symbols()
4604 {
4605 corpus_sptr corp = current_corpus();
4606 if (!corp)
4607 return;
4608
4609 die_function_decl_map_type &fns_with_no_symbol =
4610 die_function_decl_with_no_symbol_map();
4611
4612 if (do_log())
4613 cerr << fns_with_no_symbol.size()
4614 << " functions to fixup, potentially\n";
4615
4616 for (die_function_decl_map_type::iterator i = fns_with_no_symbol.begin();
4617 i != fns_with_no_symbol.end();
4618 ++i)
4619 if (elf_symbol_sptr sym =
4620 corp->lookup_function_symbol(i->second->get_linkage_name()))
4621 {
4622 // So i->second is a virtual member function that was
4623 // previously scheduled to be set a function symbol.
4624 //
4625 // But if it appears that it now has a symbol already set,
4626 // then do not set a symbol to it again.
4627 //
4628 // Or if it appears that another virtual member function
4629 // from the current ABI Corpus, with the same linkage
4630 // (mangled) name has already been set a symbol, then do not
4631 // set a symbol to this function either. Otherwise, there
4632 // will be two virtual member functions with the same symbol
4633 // in the class and that leads to spurious hard-to-debug
4634 // change reports later down the road.
4635 if (i->second->get_symbol()
4636 || symbol_already_belongs_to_a_function(sym))
4637 continue;
4638
4639 ABG_ASSERT(is_member_function(i->second));
4640 ABG_ASSERT(get_member_function_is_virtual(i->second));
4641 i->second->set_symbol(sym);
4642 // The function_decl now has an associated (public) ELF symbol so
4643 // it ought to be advertised as being public.
4644 i->second->set_is_in_public_symbol_table(true);
4645 // Add the function to the set of exported decls of the
4646 // current corpus.
4647 maybe_add_fn_to_exported_decls(i->second.get());
4648 if (do_log())
4649 cerr << "fixed up '"
4650 << i->second->get_pretty_representation()
4651 << "' with symbol '"
4652 << sym->get_id_string()
4653 << "'\n";
4654 }
4655
4656 fns_with_no_symbol.clear();
4657 }
4658
4659 /// Return a reference to the vector containing the offsets of the
4660 /// types that need late canonicalizing.
4661 ///
4662 /// @param source whe DIEs referred to by the offsets contained in
4663 /// the vector to return are from.
4664 vector<Dwarf_Off>&
types_to_canonicalize(die_source source)4665 types_to_canonicalize(die_source source)
4666 {
4667 switch (source)
4668 {
4669 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
4670 break;
4671 case ALT_DEBUG_INFO_DIE_SOURCE:
4672 return alt_types_to_canonicalize_;
4673 case TYPE_UNIT_DIE_SOURCE:
4674 return type_unit_types_to_canonicalize_;
4675 case NO_DEBUG_INFO_DIE_SOURCE:
4676 case NUMBER_OF_DIE_SOURCES:
4677 ABG_ASSERT_NOT_REACHED;
4678 }
4679 return types_to_canonicalize_;
4680 }
4681
4682 /// Return a reference to the vector containing the offsets of the
4683 /// types that need late canonicalizing.
4684 ///
4685 /// @param source where the DIEs referred to by the offset in the
4686 /// returned vector are from.
4687 const vector<Dwarf_Off>&
types_to_canonicalize(die_source source) const4688 types_to_canonicalize(die_source source) const
4689 {return const_cast<read_context*>(this)->types_to_canonicalize(source);}
4690
4691 /// Return a reference to the vector containing the types created
4692 /// during the binary analysis but that are not tied to a given
4693 /// DWARF DIE.
4694 ///
4695 /// @return reference to the vector containing the types created
4696 /// during the binary analysis but that are not tied to a given
4697 /// DWARF DIE.
4698 const vector<type_base_sptr>&
extra_types_to_canonicalize() const4699 extra_types_to_canonicalize() const
4700 {return extra_types_to_canonicalize_;}
4701
4702 /// Clear the containers holding types to canonicalize.
4703 void
clear_types_to_canonicalize()4704 clear_types_to_canonicalize()
4705 {
4706 types_to_canonicalize_.clear();
4707 alt_types_to_canonicalize_.clear();
4708 type_unit_types_to_canonicalize_.clear();
4709 extra_types_to_canonicalize_.clear();
4710 }
4711
4712 /// Put the offset of a DIE representing a type on a side vector so
4713 /// that when the reading of the debug info of the current
4714 /// translation unit is done, we can get back to the type DIE and
4715 /// from there, to the type it's associated to, and then
4716 /// canonicalize it. This what we call late canonicalization.
4717 ///
4718 /// @param die the type DIE to schedule for late type
4719 /// canonicalization.
4720 void
schedule_type_for_late_canonicalization(const Dwarf_Die * die)4721 schedule_type_for_late_canonicalization(const Dwarf_Die *die)
4722 {
4723 Dwarf_Off o;
4724
4725 Dwarf_Die equiv_die;
4726 ABG_ASSERT(get_canonical_die(die, equiv_die,
4727 /*where=*/0,
4728 /*die_as_type=*/true));
4729
4730 const die_source source = get_die_source(&equiv_die);
4731 o = dwarf_dieoffset(&equiv_die);
4732
4733 const die_artefact_map_type& m =
4734 type_die_artefact_maps().get_container(*this, die);
4735
4736 die_artefact_map_type::const_iterator i = m.find(o);
4737 ABG_ASSERT(i != m.end());
4738
4739 // Then really do the scheduling.
4740 types_to_canonicalize(source).push_back(o);
4741 }
4742
4743 /// Types that were created but not tied to a particular DIE, must
4744 /// be scheduled for late canonicalization using this method.
4745 ///
4746 /// @param t the type to schedule for late canonicalization.
4747 void
schedule_type_for_late_canonicalization(const type_base_sptr & t)4748 schedule_type_for_late_canonicalization(const type_base_sptr &t)
4749 {
4750 extra_types_to_canonicalize_.push_back(t);
4751 }
4752
4753 /// Canonicalize types which DIE offsets are stored in vectors on
4754 /// the side. This is a sub-routine of
4755 /// read_context::perform_late_type_canonicalizing().
4756 ///
4757 /// @param source where the DIE of the types to canonicalize are
4758 /// from.
4759 void
canonicalize_types_scheduled(die_source source)4760 canonicalize_types_scheduled(die_source source)
4761 {
4762 tools_utils::timer cn_timer;
4763 if (do_log())
4764 {
4765 cerr << "going to canonicalize types";
4766 corpus_sptr c = current_corpus();
4767 if (c)
4768 cerr << " of corpus " << current_corpus()->get_path();
4769 cerr << " (DIEs source: " << source << ")\n";
4770 cn_timer.start();
4771 }
4772
4773 if (!types_to_canonicalize(source).empty()
4774 || !extra_types_to_canonicalize().empty())
4775 {
4776 tools_utils::timer single_type_cn_timer;
4777 size_t total = types_to_canonicalize(source).size();
4778 if (do_log())
4779 cerr << total << " types to canonicalize\n";
4780 for (size_t i = 0; i < total; ++i)
4781 {
4782 Dwarf_Off element = types_to_canonicalize(source)[i];
4783 type_base_sptr t =
4784 lookup_type_from_die_offset(element, source);
4785 ABG_ASSERT(t);
4786 if (do_log())
4787 {
4788 cerr << "canonicalizing type "
4789 << get_pretty_representation(t, false)
4790 << " [" << i + 1 << "/" << total << "]";
4791 if (corpus_sptr c = current_corpus())
4792 cerr << "@" << c->get_path();
4793 cerr << " ...";
4794 single_type_cn_timer.start();
4795 }
4796 canonicalize(t);
4797 if (do_log())
4798 {
4799 cerr << " DONE";
4800 single_type_cn_timer.stop();
4801 cerr << ":" <<single_type_cn_timer << "\n";
4802 }
4803 }
4804
4805 // Now canonicalize types that were created but not tied to
4806 // any DIE.
4807 if (!extra_types_to_canonicalize().empty())
4808 {
4809 tools_utils::timer single_type_cn_timer;
4810 size_t total = extra_types_to_canonicalize().size();
4811 if (do_log())
4812 cerr << total << " extra types to canonicalize\n";
4813 size_t i = 1;
4814 for (vector<type_base_sptr>::const_iterator it =
4815 extra_types_to_canonicalize().begin();
4816 it != extra_types_to_canonicalize().end();
4817 ++it, ++i)
4818 {
4819 if (do_log())
4820 {
4821 cerr << "canonicalizing extra type "
4822 << get_pretty_representation(*it, false)
4823 << " [" << i << "/" << total << "]";
4824 if (corpus_sptr c = current_corpus())
4825 cerr << "@" << c->get_path();
4826 cerr << " ...";
4827 single_type_cn_timer.start();
4828 }
4829 canonicalize(*it);
4830 if (do_log())
4831 {
4832 single_type_cn_timer.stop();
4833 cerr << "DONE:"
4834 << single_type_cn_timer
4835 << "\n";
4836 }
4837 }
4838 }
4839 }
4840
4841 if (do_log())
4842 {
4843 cn_timer.stop();
4844 cerr << "finished canonicalizing types";
4845 corpus_sptr c = current_corpus();
4846 if (c)
4847 cerr << " of corpus " << current_corpus()->get_path();
4848 cerr << " (DIEs source: "
4849 << source << "):"
4850 << cn_timer
4851 << "\n";
4852 }
4853 }
4854
4855 /// Compute the number of canonicalized and missed types in the late
4856 /// canonicalization phase.
4857 ///
4858 /// @param source where the DIEs of the canonicalized types are
4859 /// from.
4860 ///
4861 /// @param canonicalized the number of types that got canonicalized
4862 /// is added to the value already present in this parameter.
4863 ///
4864 /// @param missed the number of types scheduled for late
4865 /// canonicalization and which couldn't be canonicalized (for a
4866 /// reason) is added to the value already present in this parameter.
4867 void
add_late_canonicalized_types_stats(die_source source,size_t & canonicalized,size_t & missed) const4868 add_late_canonicalized_types_stats(die_source source,
4869 size_t& canonicalized,
4870 size_t& missed) const
4871 {
4872 for (vector<Dwarf_Off>::const_iterator i =
4873 types_to_canonicalize(source).begin();
4874 i != types_to_canonicalize(source).end();
4875 ++i)
4876 {
4877 type_base_sptr t = lookup_type_from_die_offset(*i, source);
4878 if (t->get_canonical_type())
4879 ++canonicalized;
4880 else
4881 ++missed;
4882 }
4883 }
4884
4885 /// Compute the number of canonicalized and missed types in the late
4886 /// canonicalization phase.
4887 ///
4888 /// @param canonicalized the number of types that got canonicalized
4889 /// is added to the value already present in this parameter.
4890 ///
4891 /// @param missed the number of types scheduled for late
4892 /// canonicalization and which couldn't be canonicalized (for a
4893 /// reason) is added to the value already present in this parameter.
4894 void
add_late_canonicalized_types_stats(size_t & canonicalized,size_t & missed) const4895 add_late_canonicalized_types_stats(size_t& canonicalized,
4896 size_t& missed) const
4897 {
4898 for (die_source source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
4899 source < NUMBER_OF_DIE_SOURCES;
4900 ++source)
4901 add_late_canonicalized_types_stats(source, canonicalized, missed);
4902 }
4903
4904 // Look at the types that need to be canonicalized after the
4905 // translation unit has been constructed and canonicalize them.
4906 void
perform_late_type_canonicalizing()4907 perform_late_type_canonicalizing()
4908 {
4909 for (die_source source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
4910 source < NUMBER_OF_DIE_SOURCES;
4911 ++source)
4912 canonicalize_types_scheduled(source);
4913
4914 if (show_stats())
4915 {
4916 size_t num_canonicalized = 0, num_missed = 0, total = 0;
4917 add_late_canonicalized_types_stats(num_canonicalized,
4918 num_missed);
4919 total = num_canonicalized + num_missed;
4920 cerr << "binary: "
4921 << elf_path()
4922 << "\n";
4923 cerr << " # late canonicalized types: "
4924 << num_canonicalized;
4925 if (total)
4926 cerr << " (" << num_canonicalized * 100 / total << "%)";
4927 cerr << "\n"
4928 << " # missed canonicalization opportunities: "
4929 << num_missed;
4930 if (total)
4931 cerr << " (" << num_missed * 100 / total << "%)";
4932 cerr << "\n";
4933 }
4934
4935 }
4936
4937 const die_tu_map_type&
die_tu_map() const4938 die_tu_map() const
4939 {return die_tu_map_;}
4940
4941 die_tu_map_type&
die_tu_map()4942 die_tu_map()
4943 {return die_tu_map_;}
4944
4945 /// Getter for the map that associates a translation unit DIE to the
4946 /// vector of imported unit points that it contains.
4947 ///
4948 /// @param source where the DIEs are from.
4949 ///
4950 /// @return the map.
4951 const tu_die_imported_unit_points_map_type&
tu_die_imported_unit_points_map(die_source source) const4952 tu_die_imported_unit_points_map(die_source source) const
4953 {return const_cast<read_context*>(this)->tu_die_imported_unit_points_map(source);}
4954
4955 /// Getter for the map that associates a translation unit DIE to the
4956 /// vector of imported unit points that it contains.
4957 ///
4958 /// @param source where the DIEs are from.
4959 ///
4960 /// @return the map.
4961 tu_die_imported_unit_points_map_type&
tu_die_imported_unit_points_map(die_source source)4962 tu_die_imported_unit_points_map(die_source source)
4963 {
4964 switch (source)
4965 {
4966 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
4967 break;
4968 case ALT_DEBUG_INFO_DIE_SOURCE:
4969 return alt_tu_die_imported_unit_points_map_;
4970 case TYPE_UNIT_DIE_SOURCE:
4971 return type_units_tu_die_imported_unit_points_map_;
4972 case NO_DEBUG_INFO_DIE_SOURCE:
4973 case NUMBER_OF_DIE_SOURCES:
4974 // We cannot reach this point.
4975 ABG_ASSERT_NOT_REACHED;
4976 }
4977 return tu_die_imported_unit_points_map_;
4978 }
4979
4980 /// Getter of the current corpus being constructed.
4981 ///
4982 /// @return the current corpus.
4983 const corpus_sptr
current_corpus() const4984 current_corpus() const
4985 {return cur_corpus_;}
4986
4987 /// Getter of the current corpus being constructed.
4988 ///
4989 /// @return the current corpus.
4990 corpus_sptr
current_corpus()4991 current_corpus()
4992 {return cur_corpus_;}
4993
4994 /// Setter of the current corpus being constructed.
4995 ///
4996 /// @param c the new corpus.
4997 void
current_corpus(const corpus_sptr & c)4998 current_corpus(const corpus_sptr& c)
4999 {
5000 if (c)
5001 cur_corpus_ = c;
5002 }
5003
5004 /// Reset the current corpus being constructed.
5005 ///
5006 /// This actually deletes the current corpus being constructed.
5007 void
reset_current_corpus()5008 reset_current_corpus()
5009 {cur_corpus_.reset();}
5010
5011 /// Getter of the current corpus group being constructed.
5012 ///
5013 /// @return current the current corpus being constructed, if any, or
5014 /// nil.
5015 const corpus_group_sptr
current_corpus_group() const5016 current_corpus_group() const
5017 {return cur_corpus_group_;}
5018
5019 /// Getter of the current corpus group being constructed.
5020 ///
5021 /// @return current the current corpus being constructed, if any, or
5022 /// nil.
5023 corpus_group_sptr
current_corpus_group()5024 current_corpus_group()
5025 {return cur_corpus_group_;}
5026
5027 /// Setter of the current corpus group being constructed.
5028 ///
5029 /// @param g the new corpus group.
5030 void
current_corpus_group(const corpus_group_sptr & g)5031 current_corpus_group(const corpus_group_sptr& g)
5032 {
5033 if (g)
5034 cur_corpus_group_ = g;
5035 }
5036
5037 /// Test if there is a corpus group being built.
5038 ///
5039 /// @return if there is a corpus group being built, false otherwise.
5040 bool
has_corpus_group() const5041 has_corpus_group() const
5042 {return bool(cur_corpus_group_);}
5043
5044 /// Return the main corpus from the current corpus group, if any.
5045 ///
5046 /// @return the main corpus of the current corpus group, if any, nil
5047 /// if no corpus group is being constructed.
5048 corpus_sptr
main_corpus_from_current_group()5049 main_corpus_from_current_group()
5050 {
5051 if (cur_corpus_group_)
5052 return cur_corpus_group_->get_main_corpus();
5053 return corpus_sptr();
5054 }
5055
5056 /// Return the main corpus from the current corpus group, if any.
5057 ///
5058 /// @return the main corpus of the current corpus group, if any, nil
5059 /// if no corpus group is being constructed.
5060 const corpus_sptr
main_corpus_from_current_group() const5061 main_corpus_from_current_group() const
5062 {return const_cast<read_context*>(this)->main_corpus_from_current_group();}
5063
5064 /// Test if the current corpus being built is the main corpus of the
5065 /// current corpus group.
5066 ///
5067 /// @return return true iff the current corpus being built is the
5068 /// main corpus of the current corpus group.
5069 bool
current_corpus_is_main_corpus_from_current_group() const5070 current_corpus_is_main_corpus_from_current_group() const
5071 {
5072 corpus_sptr main_corpus = main_corpus_from_current_group();
5073
5074 if (main_corpus && main_corpus.get() == cur_corpus_.get())
5075 return true;
5076
5077 return false;
5078 }
5079
5080 /// Return true if the current corpus is part of a corpus group
5081 /// being built and if it's not the main corpus of the group.
5082 ///
5083 /// For instance, this would return true if we are loading a linux
5084 /// kernel *module* that is part of the current corpus group that is
5085 /// being built. In this case, it means we should re-use types
5086 /// coming from the "vmlinux" binary that is the main corpus of the
5087 /// group.
5088 ///
5089 /// @return the corpus group the current corpus belongs to, if the
5090 /// current corpus is part of a corpus group being built. Nil otherwise.
5091 corpus_sptr
should_reuse_type_from_corpus_group() const5092 should_reuse_type_from_corpus_group() const
5093 {
5094 if (has_corpus_group() && is_c_language(cur_transl_unit()->get_language()))
5095 if (corpus_sptr main_corpus = main_corpus_from_current_group())
5096 if (!current_corpus_is_main_corpus_from_current_group())
5097 return current_corpus_group();
5098
5099 return corpus_sptr();
5100 }
5101
5102 /// Get the map that associates each DIE to its parent DIE. This is
5103 /// for DIEs coming from the main debug info sections.
5104 ///
5105 /// @param source where the DIEs in the map come from.
5106 ///
5107 /// @return the DIE -> parent map.
5108 const offset_offset_map_type&
die_parent_map(die_source source) const5109 die_parent_map(die_source source) const
5110 {return const_cast<read_context*>(this)->die_parent_map(source);}
5111
5112 /// Get the map that associates each DIE to its parent DIE. This is
5113 /// for DIEs coming from the main debug info sections.
5114 ///
5115 /// @param source where the DIEs in the map come from.
5116 ///
5117 /// @return the DIE -> parent map.
5118 offset_offset_map_type&
die_parent_map(die_source source)5119 die_parent_map(die_source source)
5120 {
5121 switch (source)
5122 {
5123 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5124 break;
5125 case ALT_DEBUG_INFO_DIE_SOURCE:
5126 return alternate_die_parent_map_;
5127 case TYPE_UNIT_DIE_SOURCE:
5128 return type_section_die_parent_map();
5129 case NO_DEBUG_INFO_DIE_SOURCE:
5130 case NUMBER_OF_DIE_SOURCES:
5131 ABG_ASSERT_NOT_REACHED;
5132 }
5133 return primary_die_parent_map_;
5134 }
5135
5136 const offset_offset_map_type&
type_section_die_parent_map() const5137 type_section_die_parent_map() const
5138 {return type_section_die_parent_map_;}
5139
5140 offset_offset_map_type&
type_section_die_parent_map()5141 type_section_die_parent_map()
5142 {return type_section_die_parent_map_;}
5143
5144 /// Getter of the current translation unit.
5145 ///
5146 /// @return the current translation unit being constructed.
5147 const translation_unit_sptr&
cur_transl_unit() const5148 cur_transl_unit() const
5149 {return cur_tu_;}
5150
5151 /// Getter of the current translation unit.
5152 ///
5153 /// @return the current translation unit being constructed.
5154 translation_unit_sptr&
cur_transl_unit()5155 cur_transl_unit()
5156 {return cur_tu_;}
5157
5158 /// Setter of the current translation unit.
5159 ///
5160 /// @param tu the current translation unit being constructed.
5161 void
cur_transl_unit(translation_unit_sptr tu)5162 cur_transl_unit(translation_unit_sptr tu)
5163 {
5164 if (tu)
5165 cur_tu_ = tu;
5166 }
5167
5168 /// Return the global scope of the current translation unit.
5169 ///
5170 /// @return the global scope of the current translation unit.
5171 const scope_decl_sptr&
global_scope() const5172 global_scope() const
5173 {return cur_transl_unit()->get_global_scope();}
5174
5175 /// Return a scope that is nil.
5176 ///
5177 /// @return a scope that is nil.
5178 const scope_decl_sptr&
nil_scope() const5179 nil_scope() const
5180 {return nil_scope_;}
5181
5182 const scope_stack_type&
scope_stack() const5183 scope_stack() const
5184 {return scope_stack_;}
5185
5186 scope_stack_type&
scope_stack()5187 scope_stack()
5188 {return scope_stack_;}
5189
5190 scope_decl*
current_scope()5191 current_scope()
5192 {
5193 if (scope_stack().empty())
5194 {
5195 if (cur_transl_unit())
5196 scope_stack().push(cur_transl_unit()->get_global_scope().get());
5197 }
5198 return scope_stack().top();
5199 }
5200
5201 list<var_decl_sptr>&
var_decls_to_re_add_to_tree()5202 var_decls_to_re_add_to_tree()
5203 {return var_decls_to_add_;}
5204
5205 /// The section containing the symbol table from the current ELF
5206 /// file.
5207 ///
5208 /// Note that after it's first invocation, this function caches the
5209 /// symbol table that it found. Subsequent invocations just return
5210 /// the cached symbol table section.
5211 ///
5212 /// @return the symbol table section if found
5213 Elf_Scn*
find_symbol_table_section() const5214 find_symbol_table_section() const
5215 {
5216 if (!symtab_section_)
5217 symtab_section_ = elf_helpers::find_symbol_table_section(elf_handle());
5218 return symtab_section_;
5219 }
5220
5221 /// Lookup an elf symbol, referred to by its index, from the .symtab
5222 /// section.
5223 ///
5224 /// The resulting symbol returned is an instance of a GElf_Sym, from
5225 /// the libelf library.
5226 ///
5227 /// @param symbol_index the index of the symbol to look up.
5228 ///
5229 /// @param elf_sym out parameter. This is set to the resulting ELF
5230 /// symbol iff the function returns TRUE, meaning the symbol was
5231 /// found.
5232 ///
5233 /// @return TRUE iff the symbol was found.
5234 bool
lookup_native_elf_symbol_from_index(size_t symbol_index,GElf_Sym & elf_sym)5235 lookup_native_elf_symbol_from_index(size_t symbol_index, GElf_Sym &elf_sym)
5236 {
5237 Elf_Scn* symtab_section = find_symbol_table_section();
5238 if (!symtab_section)
5239 return false;
5240
5241 Elf_Data* symtab = elf_getdata(symtab_section, 0);
5242 ABG_ASSERT(symtab);
5243
5244 if (!gelf_getsym(symtab, symbol_index, &elf_sym))
5245 return false;
5246
5247 return true;
5248 }
5249
5250 /// Test if a given function symbol has been exported.
5251 ///
5252 /// @param symbol_address the address of the symbol we are looking
5253 /// for. Note that this address must be a relative offset from the
5254 /// beginning of the .text section, just like the kind of addresses
5255 /// that are present in the .symtab section.
5256 ///
5257 /// @returnthe elf symbol if found, or nil otherwise.
5258 elf_symbol_sptr
function_symbol_is_exported(GElf_Addr symbol_address) const5259 function_symbol_is_exported(GElf_Addr symbol_address) const
5260 {
5261 elf_symbol_sptr symbol = symtab()->lookup_symbol(symbol_address);
5262 if (!symbol)
5263 return symbol;
5264
5265 if (!symbol->is_function() || !symbol->is_public())
5266 return elf_symbol_sptr();
5267
5268 address_set_sptr set;
5269 bool looking_at_linux_kernel_binary =
5270 load_in_linux_kernel_mode() && is_linux_kernel(elf_handle());
5271
5272 if (looking_at_linux_kernel_binary)
5273 {
5274 if (symbol->is_in_ksymtab())
5275 return symbol;
5276 return elf_symbol_sptr();
5277 }
5278
5279 return symbol;
5280 }
5281
5282 /// Test if a given variable symbol has been exported.
5283 ///
5284 /// @param symbol_address the address of the symbol we are looking
5285 /// for. Note that this address must be a relative offset from the
5286 /// beginning of the .text section, just like the kind of addresses
5287 /// that are present in the .symtab section.
5288 ///
5289 /// @returnthe elf symbol if found, or nil otherwise.
5290 elf_symbol_sptr
variable_symbol_is_exported(GElf_Addr symbol_address) const5291 variable_symbol_is_exported(GElf_Addr symbol_address) const
5292 {
5293 elf_symbol_sptr symbol = symtab()->lookup_symbol(symbol_address);
5294 if (!symbol)
5295 return symbol;
5296
5297 if (!symbol->is_variable() || !symbol->is_public())
5298 return elf_symbol_sptr();
5299
5300 address_set_sptr set;
5301 bool looking_at_linux_kernel_binary =
5302 load_in_linux_kernel_mode() && is_linux_kernel(elf_handle());
5303
5304 if (looking_at_linux_kernel_binary)
5305 {
5306 if (symbol->is_in_ksymtab())
5307 return symbol;
5308 return elf_symbol_sptr();
5309 }
5310
5311 return symbol;
5312 }
5313
5314 /// Getter for the symtab reader. Will load the symtab from the elf handle if
5315 /// not yet set.
5316 ///
5317 /// @return a shared pointer to the symtab object
5318 const symtab_reader::symtab_sptr&
symtab() const5319 symtab() const
5320 {
5321 if (!symtab_)
5322 symtab_ = symtab_reader::symtab::load
5323 (elf_handle(), options_.env,
5324 [&](const elf_symbol_sptr& symbol)
5325 {return is_elf_symbol_suppressed(symbol);});
5326
5327 if (!symtab_)
5328 std::cerr << "Symbol table of '" << elf_path_
5329 << "' could not be loaded\n";
5330 return symtab_;
5331 }
5332
5333 /// Getter for the ELF dt_needed tag.
5334 const vector<string>&
dt_needed() const5335 dt_needed() const
5336 {return dt_needed_;}
5337
5338 /// Getter for the ELF dt_soname tag.
5339 const string&
dt_soname() const5340 dt_soname() const
5341 {return dt_soname_;}
5342
5343 /// Getter for the ELF architecture of the current file.
5344 const string&
elf_architecture() const5345 elf_architecture() const
5346 {return elf_architecture_;}
5347
5348 /// Test if a given ELF symbol was suppressed by a suppression
5349 /// specification.
5350 ///
5351 /// @param symbol the ELF symbol to consider.
5352 ///
5353 /// @return true iff @p symbol is suppressed.
5354 bool
is_elf_symbol_suppressed(const elf_symbol_sptr & symbol) const5355 is_elf_symbol_suppressed(const elf_symbol_sptr& symbol) const
5356 {
5357 return (symbol
5358 && suppr::is_elf_symbol_suppressed(*this,
5359 symbol->get_name(),
5360 symbol->get_type()));
5361 }
5362
5363 /// Load the DT_NEEDED and DT_SONAME elf TAGS.
5364 ///
5365 void
load_dt_soname_and_needed()5366 load_dt_soname_and_needed()
5367 {
5368 lookup_data_tag_from_dynamic_segment(elf_handle(), DT_NEEDED, dt_needed_);
5369
5370 vector<string> dt_tag_data;
5371 lookup_data_tag_from_dynamic_segment(elf_handle(), DT_SONAME, dt_tag_data);
5372 if (!dt_tag_data.empty())
5373 dt_soname_ = dt_tag_data[0];
5374 }
5375
5376 /// Read the string representing the architecture of the current ELF
5377 /// file.
5378 void
load_elf_architecture()5379 load_elf_architecture()
5380 {
5381 if (!elf_handle())
5382 return;
5383
5384 GElf_Ehdr eh_mem;
5385 GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
5386
5387 elf_architecture_ = e_machine_to_string(elf_header->e_machine);
5388 }
5389
5390 /// Load various ELF data.
5391 ///
5392 /// This function loads ELF data that are not symbol maps or debug
5393 /// info. That is, things like various tags, elf architecture and
5394 /// so on.
5395 void
load_elf_properties()5396 load_elf_properties()
5397 {
5398 load_dt_soname_and_needed();
5399 load_elf_architecture();
5400 }
5401
5402 /// This is a sub-routine of maybe_adjust_fn_sym_address and
5403 /// maybe_adjust_var_sym_address.
5404 ///
5405 /// Given an address that we got by looking at some debug
5406 /// information (e.g, a symbol's address referred to by a DWARF
5407 /// TAG), If the ELF file we are interested in is a shared library
5408 /// or an executable, then adjust the address to be coherent with
5409 /// where the executable (or shared library) is loaded. That way,
5410 /// the address can be used to look for symbols in the executable or
5411 /// shared library.
5412 ///
5413 /// @return the adjusted address, or the same address as @p addr if
5414 /// it didn't need any adjustment.
5415 Dwarf_Addr
maybe_adjust_address_for_exec_or_dyn(Dwarf_Addr addr) const5416 maybe_adjust_address_for_exec_or_dyn(Dwarf_Addr addr) const
5417 {
5418 if (addr == 0)
5419 return addr;
5420
5421 GElf_Ehdr eh_mem;
5422 GElf_Ehdr *elf_header = gelf_getehdr(elf_handle(), &eh_mem);
5423
5424 if (elf_header->e_type == ET_DYN || elf_header->e_type == ET_EXEC)
5425 {
5426 Dwarf_Addr dwarf_elf_load_address = 0, elf_load_address = 0;
5427 ABG_ASSERT(get_binary_load_address(dwarf_elf_handle(),
5428 dwarf_elf_load_address));
5429 ABG_ASSERT(get_binary_load_address(elf_handle(),
5430 elf_load_address));
5431 if (dwarf_is_splitted()
5432 && (dwarf_elf_load_address != elf_load_address))
5433 // This means that in theory the DWARF and the executable are
5434 // not loaded at the same address. And addr is meaningful
5435 // only in the context of the DWARF.
5436 //
5437 // So let's transform addr into an offset relative to where
5438 // the DWARF is loaded, and let's add that relative offset
5439 // to the load address of the executable. That way, addr
5440 // becomes meaningful in the context of the executable and
5441 // can thus be used to compare against the address of
5442 // symbols of the executable, for instance.
5443 addr = addr - dwarf_elf_load_address + elf_load_address;
5444 }
5445
5446 return addr;
5447 }
5448
5449 /// For a relocatable (*.o) elf file, this function expects an
5450 /// absolute address, representing a function symbol. It then
5451 /// extracts the address of the .text section from the symbol
5452 /// absolute address to get the relative address of the function
5453 /// from the beginning of the .text section.
5454 ///
5455 /// For executable or shared library, this function expects an
5456 /// address of a function symbol that was retrieved by looking at a
5457 /// DWARF "file". The function thus adjusts the address to make it
5458 /// be meaningful in the context of the ELF file.
5459 ///
5460 /// In both cases, the address can then be compared against the
5461 /// st_value field of a function symbol from the ELF file.
5462 ///
5463 /// @param addr an adress for a function symbol that was retrieved
5464 /// from a DWARF file.
5465 ///
5466 /// @return the (possibly) adjusted address, or just @p addr if no
5467 /// adjustment took place.
5468 Dwarf_Addr
maybe_adjust_fn_sym_address(Dwarf_Addr addr) const5469 maybe_adjust_fn_sym_address(Dwarf_Addr addr) const
5470 {
5471 if (addr == 0)
5472 return addr;
5473
5474 Elf* elf = elf_handle();
5475 GElf_Ehdr eh_mem;
5476 GElf_Ehdr* elf_header = gelf_getehdr(elf, &eh_mem);
5477
5478 if (elf_header->e_type == ET_REL)
5479 // We are looking at a relocatable file. In this case, we don't
5480 // do anything because:
5481 //
5482 // 1/ the addresses from DWARF are absolute (relative to the
5483 // beginning of the relocatable file)
5484 //
5485 // 2/ The ELF symbol addresses that we store in our lookup
5486 // tables are translated from section-related to absolute as
5487 // well. So we don't have anything to do at this point for
5488 // ET_REL files.
5489 ;
5490 else
5491 addr = maybe_adjust_address_for_exec_or_dyn(addr);
5492
5493 return addr;
5494 }
5495
5496 /// For a relocatable (*.o) elf file, this function expects an
5497 /// absolute address, representing a global variable symbol. It
5498 /// then extracts the address of the {.data,.data1,.rodata,.bss}
5499 /// section from the symbol absolute address to get the relative
5500 /// address of the variable from the beginning of the data section.
5501 ///
5502 /// For executable or shared library, this function expects an
5503 /// address of a variable symbol that was retrieved by looking at a
5504 /// DWARF "file". The function thus adjusts the address to make it
5505 /// be meaningful in the context of the ELF file.
5506 ///
5507 /// In both cases, the address can then be compared against the
5508 /// st_value field of a function symbol from the ELF file.
5509 ///
5510 /// @param addr an address for a global variable symbol that was
5511 /// retrieved from a DWARF file.
5512 ///
5513 /// @return the (possibly) adjusted address, or just @p addr if no
5514 /// adjustment took place.
5515 Dwarf_Addr
maybe_adjust_var_sym_address(Dwarf_Addr addr) const5516 maybe_adjust_var_sym_address(Dwarf_Addr addr) const
5517 {
5518 Elf* elf = elf_handle();
5519 GElf_Ehdr eh_mem;
5520 GElf_Ehdr* elf_header = gelf_getehdr(elf, &eh_mem);
5521
5522 if (elf_header->e_type == ET_REL)
5523 // We are looking at a relocatable file. In this case, we don't
5524 // do anything because:
5525 //
5526 // 1/ the addresses from DWARF are absolute (relative to the
5527 // beginning of the relocatable file)
5528 //
5529 // 2/ The ELF symbol addresses that we store in our lookup
5530 // tables are translated from section-related to absolute as
5531 // well. So we don't have anything to do at this point for
5532 // ET_REL files.
5533 ;
5534 else
5535 addr = maybe_adjust_address_for_exec_or_dyn(addr);
5536
5537 return addr;
5538 }
5539
5540 /// Get the first exported function address in the set of addresses
5541 /// referred to by the DW_AT_ranges attribute of a given DIE.
5542 ///
5543 /// @param die the DIE we are considering.
5544 ///
5545 /// @param address output parameter. This is set to the first
5546 /// address found in the sequence pointed to by the DW_AT_ranges
5547 /// attribute found on the DIE @p die, iff the function returns
5548 /// true. Otherwise, no value is set into this output parameter.
5549 ///
5550 /// @return true iff the DIE @p die does have a DW_AT_ranges
5551 /// attribute and an address of an exported function was found in
5552 /// its sequence value.
5553 bool
get_first_exported_fn_address_from_DW_AT_ranges(Dwarf_Die * die,Dwarf_Addr & address) const5554 get_first_exported_fn_address_from_DW_AT_ranges(Dwarf_Die* die,
5555 Dwarf_Addr& address) const
5556 {
5557 Dwarf_Addr base;
5558 Dwarf_Addr end_addr;
5559 ptrdiff_t offset = 0;
5560
5561 do
5562 {
5563 Dwarf_Addr addr = 0, fn_addr = 0;
5564 if ((offset = dwarf_ranges(die, offset, &base, &addr, &end_addr)) >= 0)
5565 {
5566 fn_addr = maybe_adjust_fn_sym_address(addr);
5567 if (function_symbol_is_exported(fn_addr))
5568 {
5569 address = fn_addr;
5570 return true;
5571 }
5572 }
5573 } while (offset > 0);
5574 return false;
5575 }
5576
5577 /// Get the address of the function.
5578 ///
5579 /// The address of the function is considered to be the value of the
5580 /// DW_AT_low_pc attribute, possibly adjusted (in relocatable files
5581 /// only) to not point to an absolute address anymore, but rather to
5582 /// the address of the function inside the .text segment.
5583 ///
5584 /// @param function_die the die of the function to consider.
5585 ///
5586 /// @param address the resulting address iff the function returns
5587 /// true.
5588 ///
5589 /// @return true if the function address was found.
5590 bool
get_function_address(Dwarf_Die * function_die,Dwarf_Addr & address) const5591 get_function_address(Dwarf_Die* function_die, Dwarf_Addr& address) const
5592 {
5593 if (!die_address_attribute(function_die, DW_AT_low_pc, address))
5594 // So no DW_AT_low_pc was found. Let's see if the function DIE
5595 // has got a DW_AT_ranges attribute instead. If it does, the
5596 // first address of the set of addresses represented by the
5597 // value of that DW_AT_ranges represents the function (symbol)
5598 // address we are looking for.
5599 if (!get_first_exported_fn_address_from_DW_AT_ranges(function_die,
5600 address))
5601 return false;
5602
5603 address = maybe_adjust_fn_sym_address(address);
5604 return true;
5605 }
5606
5607 /// Get the address of the global variable.
5608 ///
5609 /// The address of the global variable is considered to be the value
5610 /// of the DW_AT_location attribute, possibly adjusted (in
5611 /// relocatable files only) to not point to an absolute address
5612 /// anymore, but rather to the address of the global variable inside
5613 /// the data segment.
5614 ///
5615 /// @param variable_die the die of the function to consider.
5616 ///
5617 /// @param address the resulting address iff this function returns
5618 /// true.
5619 ///
5620 /// @return true if the variable address was found.
5621 bool
get_variable_address(Dwarf_Die * variable_die,Dwarf_Addr & address) const5622 get_variable_address(Dwarf_Die* variable_die,
5623 Dwarf_Addr& address) const
5624 {
5625 bool is_tls_address = false;
5626 if (!die_location_address(variable_die, address, is_tls_address))
5627 return false;
5628 if (!is_tls_address)
5629 address = maybe_adjust_var_sym_address(address);
5630 return true;
5631 }
5632
5633 /// Tests if a suppression specification can match ABI artifacts
5634 /// coming from the binary being analyzed.
5635 ///
5636 /// This tests if the suppression can match the soname of and binary
5637 /// name of the ELF binary being analyzed. More precisely, if there
5638 /// are any soname or file name property in the suppression and if
5639 /// those do *NOT* match the current binary, then the function
5640 /// returns false.
5641 ///
5642 /// @param s the suppression specification to consider.
5643 ///
5644 /// @return true iff either there are no soname/filename related
5645 /// property on the suppression, or if none of the soname/filename
5646 /// properties of the suppression match the current binary.
5647 bool
suppression_can_match(const suppr::suppression_base & s) const5648 suppression_can_match(const suppr::suppression_base& s) const
5649 {
5650 if (!s.priv_->matches_soname(dt_soname()))
5651 if (s.has_soname_related_property())
5652 // The suppression has some SONAME related properties, but
5653 // none of them match the SONAME of the current binary. So
5654 // the suppression cannot match the current binary.
5655 return false;
5656
5657 if (!s.priv_->matches_binary_name(elf_path()))
5658 if (s.has_file_name_related_property())
5659 // The suppression has some file_name related properties, but
5660 // none of them match the file name of the current binary. So
5661 // the suppression cannot match the current binary.
5662 return false;
5663
5664 return true;
5665 }
5666
5667 /// Test whether if a given function suppression matches a function
5668 /// designated by a regular expression that describes its linkage
5669 /// name (symbol name).
5670 ///
5671 /// @param s the suppression specification to evaluate to see if it
5672 /// matches a given function linkage name
5673 ///
5674 /// @param fn_linkage_name the linkage name of the function of interest.
5675 ///
5676 /// @return true iff the suppression specification @p s matches the
5677 /// function whose linkage name is @p fn_linkage_name.
5678 bool
suppression_matches_function_sym_name(const suppr::function_suppression & s,const string & fn_linkage_name) const5679 suppression_matches_function_sym_name(const suppr::function_suppression& s,
5680 const string& fn_linkage_name) const
5681 {
5682 if (!suppression_can_match(s))
5683 return false;
5684
5685 return suppr::suppression_matches_function_sym_name(s, fn_linkage_name);
5686 }
5687
5688 /// Test whether if a given function suppression matches a function
5689 /// designated by a regular expression that describes its name.
5690 ///
5691 /// @param s the suppression specification to evaluate to see if it
5692 /// matches a given function name.
5693 ///
5694 /// @param fn_name the name of the function of interest. Note that
5695 /// this name must be *non* qualified.
5696 ///
5697 /// @return true iff the suppression specification @p s matches the
5698 /// function whose name is @p fn_name.
5699 bool
suppression_matches_function_name(const suppr::function_suppression & s,const string & fn_name) const5700 suppression_matches_function_name(const suppr::function_suppression& s,
5701 const string& fn_name) const
5702 {
5703 if (!suppression_can_match(s))
5704 return false;
5705
5706 return suppr::suppression_matches_function_name(s, fn_name);
5707 }
5708
5709 /// Test whether if a given variable suppression specification
5710 /// matches a variable denoted by its name.
5711 ///
5712 /// @param s the variable suppression specification to consider.
5713 ///
5714 /// @param var_name the name of the variable to consider.
5715 ///
5716 /// @return true iff the suppression specification @p s matches the
5717 /// variable whose name is @p var_name.
5718 bool
suppression_matches_variable_name(const suppr::variable_suppression & s,const string & var_name) const5719 suppression_matches_variable_name(const suppr::variable_suppression& s,
5720 const string& var_name) const
5721 {
5722 if (!suppression_can_match(s))
5723 return false;
5724
5725 return suppr::suppression_matches_variable_name(s, var_name);
5726 }
5727
5728 /// Test whether if a given variable suppression specification
5729 /// matches a variable denoted by its linkage name.
5730 ///
5731 /// @param s the variable suppression specification to consider.
5732 ///
5733 /// @param var_linkage_name the linkage name of the variable to consider.
5734 ///
5735 /// @return true iff variable suppression specification @p s matches
5736 /// the variable denoted by linkage name @p var_linkage_name.
5737 bool
suppression_matches_variable_sym_name(const suppr::variable_suppression & s,const string & var_linkage_name) const5738 suppression_matches_variable_sym_name(const suppr::variable_suppression& s,
5739 const string& var_linkage_name) const
5740 {
5741 if (!suppression_can_match(s))
5742 return false;
5743
5744 return suppr::suppression_matches_variable_sym_name(s, var_linkage_name);
5745 }
5746
5747 /// Test if a given type suppression specification matches a type
5748 /// designated by its name and location.
5749 ///
5750 /// @param s the suppression specification to consider.
5751 ///
5752 /// @param type_name the fully qualified type name to consider.
5753 ///
5754 /// @param type_location the type location to consider.
5755 ///
5756 /// @return true iff the type suppression specification matches a
5757 /// type of a given name and location.
5758 bool
suppression_matches_type_name_or_location(const suppr::type_suppression & s,const string & type_name,const location & type_location) const5759 suppression_matches_type_name_or_location(const suppr::type_suppression& s,
5760 const string& type_name,
5761 const location& type_location) const
5762 {
5763 if (!suppression_can_match(s))
5764 return false;
5765
5766 return suppr::suppression_matches_type_name_or_location(s, type_name,
5767 type_location);
5768 }
5769
5770 /// Getter of the exported decls builder object.
5771 ///
5772 /// @return the exported decls builder.
5773 corpus::exported_decls_builder*
exported_decls_builder()5774 exported_decls_builder()
5775 {return exported_decls_builder_;}
5776
5777 /// Setter of the exported decls builder object.
5778 ///
5779 /// Note that this @ref read_context is not responsible for the live
5780 /// time of the exported_decls_builder object. The corpus is.
5781 ///
5782 /// @param b the new builder.
5783 void
exported_decls_builder(corpus::exported_decls_builder * b)5784 exported_decls_builder(corpus::exported_decls_builder* b)
5785 {exported_decls_builder_ = b;}
5786
5787 /// Getter of the "load_all_types" flag. This flag tells if all the
5788 /// types (including those not reachable by public declarations) are
5789 /// to be read and represented in the final ABI corpus.
5790 ///
5791 /// @return the load_all_types flag.
5792 bool
load_all_types() const5793 load_all_types() const
5794 {return options_.load_all_types;}
5795
5796 /// Setter of the "load_all_types" flag. This flag tells if all the
5797 /// types (including those not reachable by public declarations) are
5798 /// to be read and represented in the final ABI corpus.
5799 ///
5800 /// @param f the new load_all_types flag.
5801 void
load_all_types(bool f)5802 load_all_types(bool f)
5803 {options_.load_all_types = f;}
5804
5805 bool
load_in_linux_kernel_mode() const5806 load_in_linux_kernel_mode() const
5807 {return options_.load_in_linux_kernel_mode;}
5808
5809 void
load_in_linux_kernel_mode(bool f)5810 load_in_linux_kernel_mode(bool f)
5811 {options_.load_in_linux_kernel_mode = f;}
5812
5813 /// Getter of the "show_stats" flag.
5814 ///
5815 /// This flag tells if we should emit statistics about various
5816 /// internal stuff.
5817 ///
5818 /// @return the value of the flag.
5819 bool
show_stats() const5820 show_stats() const
5821 {return options_.show_stats;}
5822
5823 /// Setter of the "show_stats" flag.
5824 ///
5825 /// This flag tells if we should emit statistics about various
5826 /// internal stuff.
5827 ///
5828 /// @param f the value of the flag.
5829 void
show_stats(bool f)5830 show_stats(bool f)
5831 {options_.show_stats = f;}
5832
5833 /// Getter of the "do_log" flag.
5834 ///
5835 /// This flag tells if we should log about various internal
5836 /// details.
5837 ///
5838 /// return the "do_log" flag.
5839 bool
do_log() const5840 do_log() const
5841 {return options_.do_log;}
5842
5843 /// Setter of the "do_log" flag.
5844 ///
5845 /// This flag tells if we should log about various internal details.
5846 ///
5847 /// @param f the new value of the flag.
5848 void
do_log(bool f)5849 do_log(bool f)
5850 {options_.do_log = f;}
5851
5852 /// If a given function decl is suitable for the set of exported
5853 /// functions of the current corpus, this function adds it to that
5854 /// set.
5855 ///
5856 /// @param fn the function to consider for inclusion into the set of
5857 /// exported functions of the current corpus.
5858 void
maybe_add_fn_to_exported_decls(function_decl * fn)5859 maybe_add_fn_to_exported_decls(function_decl* fn)
5860 {
5861 if (fn)
5862 if (corpus::exported_decls_builder* b = exported_decls_builder())
5863 b->maybe_add_fn_to_exported_fns(fn);
5864 }
5865
5866 /// If a given variable decl is suitable for the set of exported
5867 /// variables of the current corpus, this variable adds it to that
5868 /// set.
5869 ///
5870 /// @param fn the variable to consider for inclusion into the set of
5871 /// exported variables of the current corpus.
5872 void
maybe_add_var_to_exported_decls(var_decl * var)5873 maybe_add_var_to_exported_decls(var_decl* var)
5874 {
5875 if (var)
5876 if (corpus::exported_decls_builder* b = exported_decls_builder())
5877 b->maybe_add_var_to_exported_vars(var);
5878 }
5879
5880 /// Walk the DIEs under a given die and for each child, populate the
5881 /// die -> parent map to record the child -> parent relationship
5882 /// that
5883 /// exists between the child and the given die.
5884 ///
5885 /// The function also builds the vector of places where units are
5886 /// imported.
5887 ///
5888 /// This is done recursively as for each child DIE, this function
5889 /// walks its children as well.
5890 ///
5891 /// @param die the DIE whose children to walk recursively.
5892 ///
5893 /// @param source where the DIE @p die comes from.
5894 ///
5895 /// @param imported_units a vector containing all the offsets of the
5896 /// points where unit have been imported, under @p die.
5897 void
build_die_parent_relations_under(Dwarf_Die * die,die_source source,imported_unit_points_type & imported_units)5898 build_die_parent_relations_under(Dwarf_Die* die,
5899 die_source source,
5900 imported_unit_points_type & imported_units)
5901 {
5902 if (!die)
5903 return;
5904
5905 offset_offset_map_type& parent_of = die_parent_map(source);
5906
5907 Dwarf_Die child;
5908 if (dwarf_child(die, &child) != 0)
5909 return;
5910
5911 do
5912 {
5913 parent_of[dwarf_dieoffset(&child)] = dwarf_dieoffset(die);
5914 if (dwarf_tag(&child) == DW_TAG_imported_unit)
5915 {
5916 Dwarf_Die imported_unit;
5917 if (die_die_attribute(&child, DW_AT_import, imported_unit)
5918 // If the imported_unit has a sub-tree, let's record
5919 // this point at which the sub-tree is imported into
5920 // the current debug info.
5921 //
5922 // Otherwise, if the imported_unit has no sub-tree,
5923 // there is no point in recording where a non-existent
5924 // sub-tree is being imported.
5925 //
5926 // Note that the imported_unit_points_type type below
5927 // expects the imported_unit to have a sub-tree.
5928 && die_has_children(&imported_unit))
5929 {
5930 die_source imported_unit_die_source = NO_DEBUG_INFO_DIE_SOURCE;
5931 ABG_ASSERT(get_die_source(imported_unit, imported_unit_die_source));
5932 imported_units.push_back
5933 (imported_unit_point(dwarf_dieoffset(&child),
5934 imported_unit,
5935 imported_unit_die_source));
5936 }
5937 }
5938 build_die_parent_relations_under(&child, source, imported_units);
5939 }
5940 while (dwarf_siblingof(&child, &child) == 0);
5941
5942 }
5943
5944 /// Determine if we do have to build a DIE -> parent map, depending
5945 /// on a given language.
5946 ///
5947 /// Some languages like C++, Ada etc, do have the concept of
5948 /// namespace and yet, the DIE data structure doesn't provide us
5949 /// with a way to get the parent namespace of a given DIE. So for
5950 /// those languages, we need to build a DIE -> parent map so that we
5951 /// can get the namespace DIE (or more generally the scope DIE) of a given
5952 /// DIE as we need it.
5953 ///
5954 /// But then some more basic languages like C or assembly don't have
5955 /// that need.
5956 ///
5957 /// This function, depending on the language, tells us if we need to
5958 /// build the DIE -> parent map or not.
5959 ///
5960 /// @param lang the language to consider.
5961 ///
5962 /// @return true iff we need to build the DIE -> parent map for this
5963 /// language.
5964 bool
do_we_build_die_parent_maps(translation_unit::language lang)5965 do_we_build_die_parent_maps(translation_unit::language lang)
5966 {
5967 if (is_c_language(lang))
5968 return false;
5969
5970 switch (lang)
5971 {
5972 case translation_unit::LANG_UNKNOWN:
5973 #ifdef HAVE_DW_LANG_Mips_Assembler_enumerator
5974 case translation_unit::LANG_Mips_Assembler:
5975 #endif
5976 return false;
5977 default:
5978 break;
5979 }
5980 return true;
5981 }
5982
5983 /// Walk all the DIEs accessible in the debug info (and in the
5984 /// alternate debug info as well) and build maps representing the
5985 /// relationship DIE -> parent. That is, make it so that we can get
5986 /// the parent for a given DIE.
5987 ///
5988 /// Note that the goal of this map is to be able to get the parent
5989 /// of a given DIE. This is to mainly to handle namespaces. For instance,
5990 /// when we get a DIE of a type, and we want to build an internal
5991 /// representation for it, we need to get its fully qualified name.
5992 /// For that, we need to know what is the parent DIE of that type
5993 /// DIE, so that we can know what the namespace of that type is.
5994 ///
5995 /// Note that as the C language doesn't have namespaces (all types
5996 /// are defined in the same global namespace), this function doesn't
5997 /// build the DIE -> parent map if the current translation unit
5998 /// comes from C. This saves time on big C ELF files with a lot of
5999 /// DIEs.
6000 void
build_die_parent_maps()6001 build_die_parent_maps()
6002 {
6003 bool we_do_have_to_build_die_parent_map = false;
6004 uint8_t address_size = 0;
6005 size_t header_size = 0;
6006 // Get the DIE of the current translation unit, look at it to get
6007 // its language. If that language is in C, then all types are in
6008 // the global namespace so we don't need to build the DIE ->
6009 // parent map. So we dont build it in that case.
6010 for (Dwarf_Off offset = 0, next_offset = 0;
6011 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
6012 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
6013 offset = next_offset)
6014 {
6015 Dwarf_Off die_offset = offset + header_size;
6016 Dwarf_Die cu;
6017 if (!dwarf_offdie(dwarf(), die_offset, &cu))
6018 continue;
6019
6020 uint64_t l = 0;
6021 die_unsigned_constant_attribute(&cu, DW_AT_language, l);
6022 translation_unit::language lang = dwarf_language_to_tu_language(l);
6023 if (do_we_build_die_parent_maps(lang))
6024 we_do_have_to_build_die_parent_map = true;
6025 }
6026
6027 if (!we_do_have_to_build_die_parent_map)
6028 return;
6029
6030 // Build the DIE -> parent relation for DIEs coming from the
6031 // .debug_info section in the alternate debug info file.
6032 die_source source = ALT_DEBUG_INFO_DIE_SOURCE;
6033 for (Dwarf_Off offset = 0, next_offset = 0;
6034 (dwarf_next_unit(alt_dwarf(), offset, &next_offset, &header_size,
6035 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
6036 offset = next_offset)
6037 {
6038 Dwarf_Off die_offset = offset + header_size;
6039 Dwarf_Die cu;
6040 if (!dwarf_offdie(alt_dwarf(), die_offset, &cu))
6041 continue;
6042 cur_tu_die(&cu);
6043
6044 imported_unit_points_type& imported_units =
6045 tu_die_imported_unit_points_map(source)[die_offset] =
6046 imported_unit_points_type();
6047 build_die_parent_relations_under(&cu, source, imported_units);
6048 }
6049
6050 // Build the DIE -> parent relation for DIEs coming from the
6051 // .debug_info section of the main debug info file.
6052 source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
6053 address_size = 0;
6054 header_size = 0;
6055 for (Dwarf_Off offset = 0, next_offset = 0;
6056 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
6057 NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
6058 offset = next_offset)
6059 {
6060 Dwarf_Off die_offset = offset + header_size;
6061 Dwarf_Die cu;
6062 if (!dwarf_offdie(dwarf(), die_offset, &cu))
6063 continue;
6064 cur_tu_die(&cu);
6065 imported_unit_points_type& imported_units =
6066 tu_die_imported_unit_points_map(source)[die_offset] =
6067 imported_unit_points_type();
6068 build_die_parent_relations_under(&cu, source, imported_units);
6069 }
6070
6071 // Build the DIE -> parent relation for DIEs coming from the
6072 // .debug_types section.
6073 source = TYPE_UNIT_DIE_SOURCE;
6074 address_size = 0;
6075 header_size = 0;
6076 uint64_t type_signature = 0;
6077 Dwarf_Off type_offset;
6078 for (Dwarf_Off offset = 0, next_offset = 0;
6079 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
6080 NULL, NULL, &address_size, NULL,
6081 &type_signature, &type_offset) == 0);
6082 offset = next_offset)
6083 {
6084 Dwarf_Off die_offset = offset + header_size;
6085 Dwarf_Die cu;
6086
6087 if (!dwarf_offdie_types(dwarf(), die_offset, &cu))
6088 continue;
6089 cur_tu_die(&cu);
6090 imported_unit_points_type& imported_units =
6091 tu_die_imported_unit_points_map(source)[die_offset] =
6092 imported_unit_points_type();
6093 build_die_parent_relations_under(&cu, source, imported_units);
6094 }
6095 }
6096 };// end class read_context.
6097
6098 static type_or_decl_base_sptr
6099 build_ir_node_from_die(read_context& ctxt,
6100 Dwarf_Die* die,
6101 scope_decl* scope,
6102 bool called_from_public_decl,
6103 size_t where_offset,
6104 bool is_declaration_only = true,
6105 bool is_required_decl_spec = false);
6106
6107 static type_or_decl_base_sptr
6108 build_ir_node_from_die(read_context& ctxt,
6109 Dwarf_Die* die,
6110 bool called_from_public_decl,
6111 size_t where_offset);
6112
6113 static class_decl_sptr
6114 add_or_update_class_type(read_context& ctxt,
6115 Dwarf_Die* die,
6116 scope_decl* scope,
6117 bool is_struct,
6118 class_decl_sptr klass,
6119 bool called_from_public_decl,
6120 size_t where_offset,
6121 bool is_declaration_only);
6122
6123 static union_decl_sptr
6124 add_or_update_union_type(read_context& ctxt,
6125 Dwarf_Die* die,
6126 scope_decl* scope,
6127 union_decl_sptr union_type,
6128 bool called_from_public_decl,
6129 size_t where_offset,
6130 bool is_declaration_only);
6131
6132 static decl_base_sptr
6133 build_ir_node_for_void_type(read_context& ctxt);
6134
6135 static decl_base_sptr
6136 build_ir_node_for_variadic_parameter_type(read_context &ctxt);
6137
6138 static function_decl_sptr
6139 build_function_decl(read_context& ctxt,
6140 Dwarf_Die* die,
6141 size_t where_offset,
6142 function_decl_sptr fn);
6143
6144 static bool
6145 function_is_suppressed(const read_context& ctxt,
6146 const scope_decl* scope,
6147 Dwarf_Die *function_die,
6148 bool is_declaration_only);
6149
6150 static function_decl_sptr
6151 build_or_get_fn_decl_if_not_suppressed(read_context& ctxt,
6152 scope_decl *scope,
6153 Dwarf_Die *die,
6154 size_t where_offset,
6155 bool is_declaration_only,
6156 function_decl_sptr f);
6157
6158 static var_decl_sptr
6159 build_var_decl(read_context& ctxt,
6160 Dwarf_Die *die,
6161 size_t where_offset,
6162 var_decl_sptr result = var_decl_sptr());
6163
6164 static var_decl_sptr
6165 build_or_get_var_decl_if_not_suppressed(read_context& ctxt,
6166 scope_decl *scope,
6167 Dwarf_Die *die,
6168 size_t where_offset,
6169 var_decl_sptr res = var_decl_sptr(),
6170 bool is_required_decl_spec = false);
6171 static bool
6172 variable_is_suppressed(const read_context& ctxt,
6173 const scope_decl* scope,
6174 Dwarf_Die *variable_die,
6175 bool is_required_decl_spec = false);
6176
6177 static void
6178 finish_member_function_reading(Dwarf_Die* die,
6179 const function_decl_sptr& f,
6180 const class_or_union_sptr klass,
6181 read_context& ctxt);
6182
6183 /// Setter of the debug info root path for a dwarf reader context.
6184 ///
6185 /// @param ctxt the dwarf reader context to consider.
6186 ///
6187 /// @param path the new debug info root path. This must be a pointer to a
6188 /// character string which life time should be greater than the life
6189 /// time of the read context.
6190 void
set_debug_info_root_path(read_context & ctxt,char ** path)6191 set_debug_info_root_path(read_context& ctxt, char** path)
6192 {ctxt.offline_callbacks()->debuginfo_path = path;}
6193
6194 /// Setter of the debug info root path for a dwarf reader context.
6195 ///
6196 /// @param ctxt the dwarf reader context to consider.
6197 ///
6198 /// @return a pointer to the debug info root path.
6199 ///
6200 /// time of the read context.
6201 char**
get_debug_info_root_path(read_context & ctxt)6202 get_debug_info_root_path(read_context& ctxt)
6203 {return ctxt.offline_callbacks()->debuginfo_path;}
6204
6205 /// Getter of the "show_stats" flag.
6206 ///
6207 /// This flag tells if we should emit statistics about various
6208 /// internal stuff.
6209 ///
6210 /// @param ctx the read context to consider for this flag.
6211 ///
6212 /// @return the value of the flag.
6213 bool
get_show_stats(read_context & ctxt)6214 get_show_stats(read_context& ctxt)
6215 {return ctxt.show_stats();}
6216
6217 /// Setter of the "show_stats" flag.
6218 ///
6219 /// This flag tells if we should emit statistics about various
6220 /// internal stuff.
6221 ///
6222 /// @param ctxt the read context to consider for this flag.
6223 ///
6224 /// @param f the value of the flag.
6225 void
set_show_stats(read_context & ctxt,bool f)6226 set_show_stats(read_context& ctxt, bool f)
6227 {ctxt.show_stats(f);}
6228
6229 /// Setter of the "drop_undefined_syms" flag.
6230 ///
6231 /// This flag tells if we should drop functions or variables
6232 /// with undefined symbols.
6233 ///
6234 /// @param ctxt the read context to consider for this flag.
6235 ///
6236 /// @param f the value of the flag.
6237 void
set_drop_undefined_syms(read_context & ctxt,bool f)6238 set_drop_undefined_syms(read_context& ctxt, bool f)
6239 {ctxt.drop_undefined_syms(f);}
6240
6241 /// Setter of the "do_log" flag.
6242 ///
6243 /// This flag tells if we should emit verbose logs for various
6244 /// internal things related to DWARF reading.
6245 ///
6246 /// @param ctxt the DWARF reading context to consider.
6247 ///
6248 /// @param f the new value of the flag.
6249 void
set_do_log(read_context & ctxt,bool f)6250 set_do_log(read_context& ctxt, bool f)
6251 {ctxt.do_log(f);}
6252
6253 /// Get the offset of a DIE
6254 ///
6255 /// @param die the DIE to consider.
6256 ///
6257 /// @return the offset of the DIE.
6258 static Dwarf_Off
die_offset(Dwarf_Die * die)6259 die_offset(Dwarf_Die* die)
6260 {return dwarf_dieoffset(die);}
6261
6262 /// Get the offset of a DIE
6263 ///
6264 /// @param die the DIE to consider.
6265 ///
6266 /// @return the offset of the DIE.
6267 static Dwarf_Off
die_offset(const Dwarf_Die * die)6268 die_offset(const Dwarf_Die* die)
6269 {return die_offset(const_cast<Dwarf_Die*>(die));}
6270
6271 /// Test if a given DIE is anonymous
6272 ///
6273 /// @param die the DIE to consider.
6274 ///
6275 /// @return true iff @p die is anonymous.
6276 static bool
die_is_anonymous(const Dwarf_Die * die)6277 die_is_anonymous(const Dwarf_Die* die)
6278 {
6279 Dwarf_Attribute attr;
6280 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), DW_AT_name, &attr))
6281 return true;
6282 return false;
6283 }
6284
6285 /// Get the value of an attribute that is supposed to be a string, or
6286 /// an empty string if the attribute could not be found.
6287 ///
6288 /// @param die the DIE to get the attribute value from.
6289 ///
6290 /// @param attr_name the attribute name. Must come from dwarf.h and
6291 /// be an enumerator representing an attribute like, e.g, DW_AT_name.
6292 ///
6293 /// @return the string representing the value of the attribute, or an
6294 /// empty string if no string attribute could be found.
6295 static string
die_string_attribute(const Dwarf_Die * die,unsigned attr_name)6296 die_string_attribute(const Dwarf_Die* die, unsigned attr_name)
6297 {
6298 if (!die)
6299 return "";
6300
6301 Dwarf_Attribute attr;
6302 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
6303 return "";
6304
6305 const char* str = dwarf_formstring(&attr);
6306 return str ? str : "";
6307 }
6308
6309 /// Get the value of an attribute that is supposed to be an unsigned
6310 /// constant.
6311 ///
6312 /// @param die the DIE to read the information from.
6313 ///
6314 /// @param attr_name the DW_AT_* name of the attribute. Must come
6315 /// from dwarf.h and be an enumerator representing an attribute like,
6316 /// e.g, DW_AT_decl_line.
6317 ///
6318 ///@param cst the output parameter that is set to the value of the
6319 /// attribute @p attr_name. This parameter is set iff the function
6320 /// return true.
6321 ///
6322 /// @return true if there was an attribute of the name @p attr_name
6323 /// and with a value that is a constant, false otherwise.
6324 static bool
die_unsigned_constant_attribute(const Dwarf_Die * die,unsigned attr_name,uint64_t & cst)6325 die_unsigned_constant_attribute(const Dwarf_Die* die,
6326 unsigned attr_name,
6327 uint64_t& cst)
6328 {
6329 if (!die)
6330 return false;
6331
6332 Dwarf_Attribute attr;
6333 Dwarf_Word result = 0;
6334 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
6335 || dwarf_formudata(&attr, &result))
6336 return false;
6337
6338 cst = result;
6339 return true;
6340 }
6341
6342 /// Read a signed constant value from a given attribute.
6343 ///
6344 /// The signed constant expected must be of constant form.
6345 ///
6346 /// @param die the DIE to get the attribute from.
6347 ///
6348 /// @param attr_name the attribute name.
6349 ///
6350 /// @param cst the resulting signed constant read.
6351 ///
6352 /// @return true iff a signed constant attribute of the name @p
6353 /// attr_name was found on the DIE @p die.
6354 static bool
die_signed_constant_attribute(const Dwarf_Die * die,unsigned attr_name,int64_t & cst)6355 die_signed_constant_attribute(const Dwarf_Die *die,
6356 unsigned attr_name,
6357 int64_t& cst)
6358 {
6359 if (!die)
6360 return false;
6361
6362 Dwarf_Attribute attr;
6363 Dwarf_Sword result = 0;
6364 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
6365 || dwarf_formsdata(&attr, &result))
6366 return false;
6367
6368 cst = result;
6369 return true;
6370 }
6371
6372 /// Read the value of a constant attribute that is either signed or
6373 /// unsigned into a array_type_def::subrange_type::bound_value value.
6374 ///
6375 /// The bound_value instance will capture the actual signedness of the
6376 /// read attribute.
6377 ///
6378 /// @param die the DIE from which to read the value of the attribute.
6379 ///
6380 /// @param attr_name the attribute name to consider.
6381 ///
6382 /// @param is_signed true if the attribute value has to read as
6383 /// signed.
6384 ///
6385 /// @param value the resulting value read from attribute @p attr_name
6386 /// on DIE @p die.
6387 ///
6388 /// @return true iff DIE @p die has an attribute named @p attr_name
6389 /// with a constant value.
6390 static bool
die_constant_attribute(const Dwarf_Die * die,unsigned attr_name,bool is_signed,array_type_def::subrange_type::bound_value & value)6391 die_constant_attribute(const Dwarf_Die *die,
6392 unsigned attr_name,
6393 bool is_signed,
6394 array_type_def::subrange_type::bound_value &value)
6395 {
6396 if (!is_signed)
6397 {
6398 uint64_t l = 0;
6399 if (!die_unsigned_constant_attribute(die, attr_name, l))
6400 return false;
6401 value.set_unsigned(l);
6402 }
6403 else
6404 {
6405 int64_t l = 0;
6406 if (!die_signed_constant_attribute(die, attr_name, l))
6407 return false;
6408 value.set_signed(l);
6409 }
6410 return true;
6411 }
6412
6413 /// Test if a given DWARF form is DW_FORM_strx{1,4}.
6414 ///
6415 /// Unfortunaly, the DW_FORM_strx{1,4} are enumerators of an untagged
6416 /// enum in dwarf.h so we have to use an unsigned int for the form,
6417 /// grrr.
6418 ///
6419 /// @param form the form to consider.
6420 ///
6421 /// @return true iff @p form is DW_FORM_strx{1,4}.
6422 static bool
form_is_DW_FORM_strx(unsigned form)6423 form_is_DW_FORM_strx(unsigned form)
6424 {
6425 if (form)
6426 {
6427 #if defined HAVE_DW_FORM_strx1 \
6428 && defined HAVE_DW_FORM_strx2 \
6429 && defined HAVE_DW_FORM_strx3 \
6430 && defined HAVE_DW_FORM_strx4
6431 if (form == DW_FORM_strx1
6432 || form == DW_FORM_strx2
6433 || form == DW_FORM_strx3
6434 ||form == DW_FORM_strx4)
6435 return true;
6436 #endif
6437 }
6438 return false;
6439 }
6440
6441 /// Test if a given DWARF form is DW_FORM_line_strp.
6442 ///
6443 /// Unfortunaly, the DW_FORM_line_strp is an enumerator of an untagged
6444 /// enum in dwarf.h so we have to use an unsigned int for the form,
6445 /// grrr.
6446 ///
6447 /// @param form the form to consider.
6448 ///
6449 /// @return true iff @p form is DW_FORM_line_strp.
6450 static bool
form_is_DW_FORM_line_strp(unsigned form)6451 form_is_DW_FORM_line_strp(unsigned form)
6452 {
6453 if (form)
6454 {
6455 #if defined HAVE_DW_FORM_line_strp
6456 if (form == DW_FORM_line_strp)
6457 return true;
6458 #endif
6459 }
6460 return false;
6461 }
6462
6463 /// Get the value of a DIE attribute; that value is meant to be a
6464 /// flag.
6465 ///
6466 /// @param die the DIE to get the attribute from.
6467 ///
6468 /// @param attr_name the DW_AT_* name of the attribute. Must come
6469 /// from dwarf.h and be an enumerator representing an attribute like,
6470 /// e.g, DW_AT_external.
6471 ///
6472 /// @param flag the output parameter to store the flag value into.
6473 /// This is set iff the function returns true.
6474 ///
6475 /// @param recursively if true, the function looks through the
6476 /// possible DW_AT_specification and DW_AT_abstract_origin attribute
6477 /// all the way down to the initial DIE that is cloned and look on
6478 /// that DIE to see if it has the @p attr_name attribute.
6479 ///
6480 /// @return true if the DIE has a flag attribute named @p attr_name,
6481 /// false otherwise.
6482 static bool
die_flag_attribute(const Dwarf_Die * die,unsigned attr_name,bool & flag,bool recursively=true)6483 die_flag_attribute(const Dwarf_Die* die,
6484 unsigned attr_name,
6485 bool& flag,
6486 bool recursively = true)
6487 {
6488 Dwarf_Attribute attr;
6489 if (recursively
6490 ? !dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
6491 : !dwarf_attr(const_cast<Dwarf_Die*>(die), attr_name, &attr))
6492 return false;
6493
6494 bool f = false;
6495 if (dwarf_formflag(&attr, &f))
6496 return false;
6497
6498 flag = f;
6499 return true;
6500 }
6501
6502 /// Get the mangled name from a given DIE.
6503 ///
6504 /// @param die the DIE to read the mangled name from.
6505 ///
6506 /// @return the mangled name if it's present in the DIE, or just an
6507 /// empty string if it's not.
6508 static string
die_linkage_name(const Dwarf_Die * die)6509 die_linkage_name(const Dwarf_Die* die)
6510 {
6511 if (!die)
6512 return "";
6513
6514 string linkage_name = die_string_attribute(die, DW_AT_linkage_name);
6515 if (linkage_name.empty())
6516 linkage_name = die_string_attribute(die, DW_AT_MIPS_linkage_name);
6517 return linkage_name;
6518 }
6519
6520 /// Get the file path that is the value of the DW_AT_decl_file
6521 /// attribute on a given DIE, if the DIE is a decl DIE having that
6522 /// attribute.
6523 ///
6524 /// @param die the DIE to consider.
6525 ///
6526 /// @return a string containing the file path that is the logical
6527 /// value of the DW_AT_decl_file attribute. If the DIE @p die
6528 /// doesn't have a DW_AT_decl_file attribute, then the return value is
6529 /// just an empty string.
6530 static string
die_decl_file_attribute(const Dwarf_Die * die)6531 die_decl_file_attribute(const Dwarf_Die* die)
6532 {
6533 if (!die)
6534 return "";
6535
6536 const char* str = dwarf_decl_file(const_cast<Dwarf_Die*>(die));
6537
6538 return str ? str : "";
6539 }
6540
6541 /// Get the value of an attribute which value is supposed to be a
6542 /// reference to a DIE.
6543 ///
6544 /// @param die the DIE to read the value from.
6545 ///
6546 /// @param attr_name the DW_AT_* attribute name to read.
6547 ///
6548 /// @param result the DIE resulting from reading the attribute value.
6549 /// This is set iff the function returns true.
6550 ///
6551 /// @param recursively if true, the function looks through the
6552 /// possible DW_AT_specification and DW_AT_abstract_origin attribute
6553 /// all the way down to the initial DIE that is cloned and look on
6554 /// that DIE to see if it has the @p attr_name attribute.
6555 ///
6556 /// @return true if the DIE @p die contains an attribute named @p
6557 /// attr_name that is a DIE reference, false otherwise.
6558 static bool
die_die_attribute(const Dwarf_Die * die,unsigned attr_name,Dwarf_Die & result,bool recursively)6559 die_die_attribute(const Dwarf_Die* die,
6560 unsigned attr_name,
6561 Dwarf_Die& result,
6562 bool recursively)
6563 {
6564 Dwarf_Attribute attr;
6565 if (recursively
6566 ? !dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
6567 : !dwarf_attr(const_cast<Dwarf_Die*>(die), attr_name, &attr))
6568 return false;
6569
6570 return dwarf_formref_die(&attr, &result);
6571 }
6572
6573 /// Read and return an addresss class attribute from a given DIE.
6574 ///
6575 /// @param die the DIE to consider.
6576 ///
6577 /// @param attr_name the name of the address class attribute to read
6578 /// the value from.
6579 ///
6580 /// @param the resulting address.
6581 ///
6582 /// @return true iff the attribute could be read, was of the expected
6583 /// address class and could thus be translated into the @p result.
6584 static bool
die_address_attribute(Dwarf_Die * die,unsigned attr_name,Dwarf_Addr & result)6585 die_address_attribute(Dwarf_Die* die, unsigned attr_name, Dwarf_Addr& result)
6586 {
6587 Dwarf_Attribute attr;
6588 if (!dwarf_attr_integrate(die, attr_name, &attr))
6589 return false;
6590 return dwarf_formaddr(&attr, &result) == 0;
6591 }
6592
6593 /// Returns the source location associated with a decl DIE.
6594 ///
6595 /// @param ctxt the @ref read_context to use.
6596 ///
6597 /// @param die the DIE the read the source location from.
6598 ///
6599 /// @return the location associated with @p die.
6600 static location
die_location(const read_context & ctxt,const Dwarf_Die * die)6601 die_location(const read_context& ctxt, const Dwarf_Die* die)
6602 {
6603 if (!die)
6604 return location();
6605
6606 string file = die_decl_file_attribute(die);
6607 uint64_t line = 0;
6608 die_unsigned_constant_attribute(die, DW_AT_decl_line, line);
6609
6610 if (!file.empty() && line != 0)
6611 {
6612 translation_unit_sptr tu = ctxt.cur_transl_unit();
6613 location l = tu->get_loc_mgr().create_new_location(file, line, 1);
6614 return l;
6615 }
6616 return location();
6617 }
6618
6619 /// Return a copy of the name of a DIE.
6620 ///
6621 /// @param die the DIE to consider.
6622 ///
6623 /// @return a copy of the name of the DIE.
6624 static string
die_name(const Dwarf_Die * die)6625 die_name(const Dwarf_Die* die)
6626 {
6627 string name = die_string_attribute(die, DW_AT_name);
6628 return name;
6629 }
6630
6631 /// Return the location, the name and the mangled name of a given DIE.
6632 ///
6633 /// @param ctxt the read context to use.
6634 ///
6635 /// @param die the DIE to read location and names from.
6636 ///
6637 /// @param loc the location output parameter to set.
6638 ///
6639 /// @param name the name output parameter to set.
6640 ///
6641 /// @param linkage_name the linkage_name output parameter to set.
6642 static void
die_loc_and_name(const read_context & ctxt,Dwarf_Die * die,location & loc,string & name,string & linkage_name)6643 die_loc_and_name(const read_context& ctxt,
6644 Dwarf_Die* die,
6645 location& loc,
6646 string& name,
6647 string& linkage_name)
6648 {
6649 loc = die_location(ctxt, die);
6650 name = die_name(die);
6651 linkage_name = die_linkage_name(die);
6652 }
6653
6654 /// Get the size of a (type) DIE as the value for the parameter
6655 /// DW_AT_byte_size or DW_AT_bit_size.
6656 ///
6657 /// @param die the DIE to read the information from.
6658 ///
6659 /// @param size the resulting size in bits. This is set iff the
6660 /// function return true.
6661 ///
6662 /// @return true if the size attribute was found.
6663 static bool
die_size_in_bits(const Dwarf_Die * die,uint64_t & size)6664 die_size_in_bits(const Dwarf_Die* die, uint64_t& size)
6665 {
6666 if (!die)
6667 return false;
6668
6669 uint64_t byte_size = 0, bit_size = 0;
6670
6671 if (!die_unsigned_constant_attribute(die, DW_AT_byte_size, byte_size))
6672 {
6673 if (!die_unsigned_constant_attribute(die, DW_AT_bit_size, bit_size))
6674 return false;
6675 }
6676 else
6677 bit_size = byte_size * 8;
6678
6679 size = bit_size;
6680
6681 return true;
6682 }
6683
6684 /// Get the access specifier (from the DW_AT_accessibility attribute
6685 /// value) of a given DIE.
6686 ///
6687 /// @param die the DIE to consider.
6688 ///
6689 /// @param access the resulting access. This is set iff the function
6690 /// returns true.
6691 ///
6692 /// @return bool if the DIE contains the DW_AT_accessibility die.
6693 static bool
die_access_specifier(Dwarf_Die * die,access_specifier & access)6694 die_access_specifier(Dwarf_Die * die, access_specifier& access)
6695 {
6696 if (!die)
6697 return false;
6698
6699 uint64_t a = 0;
6700 if (!die_unsigned_constant_attribute(die, DW_AT_accessibility, a))
6701 return false;
6702
6703 access_specifier result = private_access;
6704
6705 switch (a)
6706 {
6707 case private_access:
6708 result = private_access;
6709 break;
6710
6711 case protected_access:
6712 result = protected_access;
6713 break;
6714
6715 case public_access:
6716 result = public_access;
6717 break;
6718
6719 default:
6720 break;
6721 }
6722
6723 access = result;
6724 return true;
6725 }
6726
6727 /// Test whether a given DIE represents a decl that is public. That
6728 /// is, one with the DW_AT_external attribute set.
6729 ///
6730 /// @param die the DIE to consider for testing.
6731 ///
6732 /// @return true if a DW_AT_external attribute is present and its
6733 /// value is set to the true; return false otherwise.
6734 static bool
die_is_public_decl(const Dwarf_Die * die)6735 die_is_public_decl(const Dwarf_Die* die)
6736 {
6737 bool is_public = false;
6738 die_flag_attribute(die, DW_AT_external, is_public);
6739 return is_public;
6740 }
6741
6742 /// Test if a DIE is effectively public.
6743 ///
6744 /// This is meant to return true when either the DIE is public or when
6745 /// it's a variable DIE that is at (global) namespace level.
6746 ///
6747 /// @return true iff either the DIE is public or is a variable DIE
6748 /// that is at (global) namespace level.
6749 static bool
die_is_effectively_public_decl(const read_context & ctxt,const Dwarf_Die * die)6750 die_is_effectively_public_decl(const read_context& ctxt,
6751 const Dwarf_Die* die)
6752 {
6753 if (die_is_public_decl(die))
6754 return true;
6755
6756 unsigned tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
6757 if (tag == DW_TAG_variable || tag == DW_TAG_member)
6758 {
6759 // The DIE is a variable.
6760 Dwarf_Die parent_die;
6761 size_t where_offset = 0;
6762 if (!get_parent_die(ctxt, die, parent_die, where_offset))
6763 return false;
6764
6765 tag = dwarf_tag(&parent_die);
6766 if (tag == DW_TAG_compile_unit
6767 || tag == DW_TAG_partial_unit
6768 || tag == DW_TAG_type_unit)
6769 // The DIE is at global scope.
6770 return true;
6771
6772 if (tag == DW_TAG_namespace)
6773 {
6774 string name = die_name(&parent_die);
6775 if (name.empty())
6776 // The DIE at unnamed namespace scope, so it's not public.
6777 return false;
6778 // The DIE is at namespace scope.
6779 return true;
6780 }
6781 }
6782 return false;
6783 }
6784
6785 /// Test whether a given DIE represents a declaration-only DIE.
6786 ///
6787 /// That is, if the DIE has the DW_AT_declaration flag set.
6788 ///
6789 /// @param die the DIE to consider.
6790 //
6791 /// @return true if a DW_AT_declaration is present, false otherwise.
6792 static bool
die_is_declaration_only(Dwarf_Die * die)6793 die_is_declaration_only(Dwarf_Die* die)
6794 {
6795 bool is_declaration_only = false;
6796 die_flag_attribute(die, DW_AT_declaration, is_declaration_only, false);
6797 return is_declaration_only;
6798 }
6799
6800 /// Tests whether a given DIE is artificial.
6801 ///
6802 /// @param die the test to test for.
6803 ///
6804 /// @return true if the DIE is artificial, false otherwise.
6805 static bool
die_is_artificial(Dwarf_Die * die)6806 die_is_artificial(Dwarf_Die* die)
6807 {
6808 bool is_artificial;
6809 return die_flag_attribute(die, DW_AT_artificial, is_artificial);
6810 }
6811
6812 ///@return true if a tag represents a type, false otherwise.
6813 ///
6814 ///@param tag the tag to consider.
6815 static bool
is_type_tag(unsigned tag)6816 is_type_tag(unsigned tag)
6817 {
6818 bool result = false;
6819
6820 switch (tag)
6821 {
6822 case DW_TAG_array_type:
6823 case DW_TAG_class_type:
6824 case DW_TAG_enumeration_type:
6825 case DW_TAG_pointer_type:
6826 case DW_TAG_reference_type:
6827 case DW_TAG_string_type:
6828 case DW_TAG_structure_type:
6829 case DW_TAG_subroutine_type:
6830 case DW_TAG_typedef:
6831 case DW_TAG_union_type:
6832 case DW_TAG_ptr_to_member_type:
6833 case DW_TAG_set_type:
6834 case DW_TAG_subrange_type:
6835 case DW_TAG_base_type:
6836 case DW_TAG_const_type:
6837 case DW_TAG_file_type:
6838 case DW_TAG_packed_type:
6839 case DW_TAG_thrown_type:
6840 case DW_TAG_volatile_type:
6841 case DW_TAG_restrict_type:
6842 case DW_TAG_interface_type:
6843 case DW_TAG_unspecified_type:
6844 case DW_TAG_shared_type:
6845 case DW_TAG_rvalue_reference_type:
6846 case DW_TAG_coarray_type:
6847 case DW_TAG_atomic_type:
6848 case DW_TAG_immutable_type:
6849 result = true;
6850 break;
6851
6852 default:
6853 result = false;
6854 break;
6855 }
6856
6857 return result;
6858 }
6859
6860 /// Test if a given DIE is a type to be canonicalized. note that a
6861 /// function DIE (DW_TAG_subprogram) is considered to be a
6862 /// canonicalize-able type too because we can consider that DIE as
6863 /// being the type of the function, as well as the function decl
6864 /// itself.
6865 ///
6866 /// @param tag the tag of the DIE to consider.
6867 ///
6868 /// @return true iff the DIE of tag @p tag is a canonicalize-able DIE.
6869 static bool
is_canonicalizeable_type_tag(unsigned tag)6870 is_canonicalizeable_type_tag(unsigned tag)
6871 {
6872 bool result = false;
6873
6874 switch (tag)
6875 {
6876 case DW_TAG_array_type:
6877 case DW_TAG_class_type:
6878 case DW_TAG_enumeration_type:
6879 case DW_TAG_pointer_type:
6880 case DW_TAG_reference_type:
6881 case DW_TAG_structure_type:
6882 case DW_TAG_subroutine_type:
6883 case DW_TAG_subprogram:
6884 case DW_TAG_typedef:
6885 case DW_TAG_union_type:
6886 case DW_TAG_base_type:
6887 case DW_TAG_const_type:
6888 case DW_TAG_volatile_type:
6889 case DW_TAG_restrict_type:
6890 case DW_TAG_rvalue_reference_type:
6891 result = true;
6892 break;
6893
6894 default:
6895 result = false;
6896 break;
6897 }
6898
6899 return result;
6900 }
6901
6902 /// Test if a DIE tag represents a declaration.
6903 ///
6904 /// @param tag the DWARF tag to consider.
6905 ///
6906 /// @return true iff @p tag is for a declaration.
6907 static bool
is_decl_tag(unsigned tag)6908 is_decl_tag(unsigned tag)
6909 {
6910 switch (tag)
6911 {
6912 case DW_TAG_formal_parameter:
6913 case DW_TAG_imported_declaration:
6914 case DW_TAG_member:
6915 case DW_TAG_unspecified_parameters:
6916 case DW_TAG_subprogram:
6917 case DW_TAG_variable:
6918 case DW_TAG_namespace:
6919 case DW_TAG_GNU_template_template_param:
6920 case DW_TAG_GNU_template_parameter_pack:
6921 case DW_TAG_GNU_formal_parameter_pack:
6922 return true;
6923 }
6924 return false;
6925 }
6926
6927 /// Test if a DIE represents a type DIE.
6928 ///
6929 /// @param die the DIE to consider.
6930 ///
6931 /// @return true if @p die represents a type, false otherwise.
6932 static bool
die_is_type(const Dwarf_Die * die)6933 die_is_type(const Dwarf_Die* die)
6934 {
6935 if (!die)
6936 return false;
6937 return is_type_tag(dwarf_tag(const_cast<Dwarf_Die*>(die)));
6938 }
6939
6940 /// Test if a DIE represents a declaration.
6941 ///
6942 /// @param die the DIE to consider.
6943 ///
6944 /// @return true if @p die represents a decl, false otherwise.
6945 static bool
die_is_decl(const Dwarf_Die * die)6946 die_is_decl(const Dwarf_Die* die)
6947 {
6948 if (!die)
6949 return false;
6950 return is_decl_tag(dwarf_tag(const_cast<Dwarf_Die*>(die)));
6951 }
6952
6953 /// Test if a DIE represents a namespace.
6954 ///
6955 /// @param die the DIE to consider.
6956 ///
6957 /// @return true if @p die represents a namespace, false otherwise.
6958 static bool
die_is_namespace(const Dwarf_Die * die)6959 die_is_namespace(const Dwarf_Die* die)
6960 {
6961 if (!die)
6962 return false;
6963 return (dwarf_tag(const_cast<Dwarf_Die*>(die)) == DW_TAG_namespace);
6964 }
6965
6966 /// Test if a DIE has tag DW_TAG_unspecified_type.
6967 ///
6968 /// @param die the DIE to consider.
6969 ///
6970 /// @return true if @p die has tag DW_TAG_unspecified_type.
6971 static bool
die_is_unspecified(Dwarf_Die * die)6972 die_is_unspecified(Dwarf_Die* die)
6973 {
6974 if (!die)
6975 return false;
6976 return (dwarf_tag(die) == DW_TAG_unspecified_type);
6977 }
6978
6979 /// Test if a DIE represents a void type.
6980 ///
6981 /// @param die the DIE to consider.
6982 ///
6983 /// @return true if @p die represents a void type, false otherwise.
6984 static bool
die_is_void_type(Dwarf_Die * die)6985 die_is_void_type(Dwarf_Die* die)
6986 {
6987 if (!die || dwarf_tag(die) != DW_TAG_base_type)
6988 return false;
6989
6990 string name = die_name(die);
6991 if (name == "void")
6992 return true;
6993
6994 return false;
6995 }
6996
6997 /// Test if a DIE represents a pointer type.
6998 ///
6999 /// @param die the die to consider.
7000 ///
7001 /// @return true iff @p die represents a pointer type.
7002 static bool
die_is_pointer_type(const Dwarf_Die * die)7003 die_is_pointer_type(const Dwarf_Die* die)
7004 {
7005 if (!die)
7006 return false;
7007
7008 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7009 if (tag == DW_TAG_pointer_type)
7010 return true;
7011
7012 return false;
7013 }
7014
7015 /// Test if a DIE is for a pointer, reference or qualified type to
7016 /// anonymous class or struct.
7017 ///
7018 /// @param die the DIE to consider.
7019 ///
7020 /// @return true iff @p is for a pointer, reference or qualified type
7021 /// to anonymous class or struct.
7022 static bool
pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die * die)7023 pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die* die)
7024 {
7025 if (!die_is_pointer_or_reference_type(die)
7026 && !die_is_qualified_type(die))
7027 return false;
7028
7029 Dwarf_Die underlying_type_die;
7030 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
7031 return false;
7032
7033 if (!die_is_class_type(&underlying_type_die))
7034 return false;
7035
7036 string name = die_name(&underlying_type_die);
7037
7038 return name.empty();
7039 }
7040
7041 /// Test if a DIE represents a reference type.
7042 ///
7043 /// @param die the die to consider.
7044 ///
7045 /// @return true iff @p die represents a reference type.
7046 static bool
die_is_reference_type(const Dwarf_Die * die)7047 die_is_reference_type(const Dwarf_Die* die)
7048 {
7049 if (!die)
7050 return false;
7051
7052 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7053 if (tag == DW_TAG_reference_type || tag == DW_TAG_rvalue_reference_type)
7054 return true;
7055
7056 return false;
7057 }
7058
7059 /// Test if a DIE represents an array type.
7060 ///
7061 /// @param die the die to consider.
7062 ///
7063 /// @return true iff @p die represents an array type.
7064 static bool
die_is_array_type(const Dwarf_Die * die)7065 die_is_array_type(const Dwarf_Die* die)
7066 {
7067 if (!die)
7068 return false;
7069
7070 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7071 if (tag == DW_TAG_array_type)
7072 return true;
7073
7074 return false;
7075 }
7076
7077 /// Test if a DIE represents a pointer, reference or array type.
7078 ///
7079 /// @param die the die to consider.
7080 ///
7081 /// @return true iff @p die represents a pointer or reference type.
7082 static bool
die_is_pointer_or_reference_type(const Dwarf_Die * die)7083 die_is_pointer_or_reference_type(const Dwarf_Die* die)
7084 {return (die_is_pointer_type(die)
7085 || die_is_reference_type(die)
7086 || die_is_array_type(die));}
7087
7088 /// Test if a DIE represents a pointer, a reference or a typedef type.
7089 ///
7090 /// @param die the die to consider.
7091 ///
7092 /// @return true iff @p die represents a pointer, a reference or a
7093 /// typedef type.
7094 static bool
die_is_pointer_reference_or_typedef_type(const Dwarf_Die * die)7095 die_is_pointer_reference_or_typedef_type(const Dwarf_Die* die)
7096 {return (die_is_pointer_or_reference_type(die)
7097 || dwarf_tag(const_cast<Dwarf_Die*>(die)) == DW_TAG_typedef);}
7098
7099 /// Test if a DIE represents a class type.
7100 ///
7101 /// @param die the die to consider.
7102 ///
7103 /// @return true iff @p die represents a class type.
7104 static bool
die_is_class_type(const Dwarf_Die * die)7105 die_is_class_type(const Dwarf_Die* die)
7106 {
7107 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7108
7109 if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
7110 return true;
7111
7112 return false;
7113 }
7114
7115 /// Test if a DIE is for a qualified type.
7116 ///
7117 /// @param die the DIE to consider.
7118 ///
7119 /// @return true iff @p die is for a qualified type.
7120 static bool
die_is_qualified_type(const Dwarf_Die * die)7121 die_is_qualified_type(const Dwarf_Die* die)
7122 {
7123 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7124 if (tag == DW_TAG_const_type
7125 || tag == DW_TAG_volatile_type
7126 || tag == DW_TAG_restrict_type)
7127 return true;
7128
7129 return false;
7130 }
7131
7132 /// Test if a DIE is for a function type.
7133 ///
7134 /// @param die the DIE to consider.
7135 ///
7136 /// @return true iff @p die is for a function type.
7137 static bool
die_is_function_type(const Dwarf_Die * die)7138 die_is_function_type(const Dwarf_Die *die)
7139 {
7140 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7141 if (tag == DW_TAG_subprogram || tag == DW_TAG_subroutine_type)
7142 return true;
7143
7144 return false;
7145 }
7146
7147 /// Test if a DIE for a function pointer or member function has an
7148 /// DW_AT_object_pointer attribute.
7149 ///
7150 /// @param die the DIE to consider.
7151 ///
7152 /// @param object_pointer out parameter. It's set to the DIE for the
7153 /// object pointer iff the function returns true.
7154 ///
7155 /// @return true iff the DIE @p die has an object pointer. In that
7156 /// case, the parameter @p object_pointer is set to the DIE of that
7157 /// object pointer.
7158 static bool
die_has_object_pointer(const Dwarf_Die * die,Dwarf_Die & object_pointer)7159 die_has_object_pointer(const Dwarf_Die* die, Dwarf_Die& object_pointer)
7160 {
7161 if (!die)
7162 return false;
7163
7164 if (die_die_attribute(die, DW_AT_object_pointer, object_pointer))
7165 return true;
7166
7167 return false;
7168 }
7169
7170 /// Test if a DIE has children DIEs.
7171 ///
7172 /// @param die the DIE to consider.
7173 ///
7174 /// @return true iff @p DIE has at least one child node.
7175 static bool
die_has_children(const Dwarf_Die * die)7176 die_has_children(const Dwarf_Die* die)
7177 {
7178 if (!die)
7179 return false;
7180
7181 Dwarf_Die child;
7182 if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
7183 return true;
7184
7185 return false;
7186 }
7187
7188 /// When given the object pointer DIE of a function type or member
7189 /// function DIE, this function returns the "this" pointer that points
7190 /// to the associated class.
7191 ///
7192 /// @param die the DIE of the object pointer of the function or member
7193 /// function to consider.
7194 ///
7195 /// @param this_pointer_die out parameter. This is set to the DIE of
7196 /// the "this" pointer iff the function returns true.
7197 ///
7198 /// @return true iff the function found the "this" pointer from the
7199 /// object pointer DIE @p die. In that case, the parameter @p
7200 /// this_pointer_die is set to the DIE of that "this" pointer.
7201 static bool
die_this_pointer_from_object_pointer(Dwarf_Die * die,Dwarf_Die & this_pointer_die)7202 die_this_pointer_from_object_pointer(Dwarf_Die* die,
7203 Dwarf_Die& this_pointer_die)
7204 {
7205 ABG_ASSERT(die);
7206 ABG_ASSERT(dwarf_tag(die) == DW_TAG_formal_parameter);
7207
7208 if (die_die_attribute(die, DW_AT_type, this_pointer_die))
7209 return true;
7210
7211 return false;
7212 }
7213
7214 /// Test if a given "this" pointer that points to a particular class
7215 /// type is for a const class or not. If it's for a const class, then
7216 /// it means the function type or the member function associated to
7217 /// that "this" pointer is const.
7218 ///
7219 /// @param die the DIE of the "this" pointer to consider.
7220 ///
7221 /// @return true iff @p die points to a const class type.
7222 static bool
die_this_pointer_is_const(Dwarf_Die * die)7223 die_this_pointer_is_const(Dwarf_Die* die)
7224 {
7225 ABG_ASSERT(die);
7226
7227 if (dwarf_tag(die) == DW_TAG_pointer_type)
7228 {
7229 Dwarf_Die pointed_to_type_die;
7230 if (die_die_attribute(die, DW_AT_type, pointed_to_type_die))
7231 if (dwarf_tag(&pointed_to_type_die) == DW_TAG_const_type)
7232 return true;
7233 }
7234
7235 return false;
7236 }
7237
7238 /// Test if an object pointer (referred-to via a DW_AT_object_pointer
7239 /// attribute) points to a const implicit class and so is for a const
7240 /// method or or a const member function type.
7241 ///
7242 /// @param die the DIE of the object pointer to consider.
7243 ///
7244 /// @return true iff the object pointer represented by @p die is for a
7245 /// a const method or const member function type.
7246 static bool
die_object_pointer_is_for_const_method(Dwarf_Die * die)7247 die_object_pointer_is_for_const_method(Dwarf_Die* die)
7248 {
7249 ABG_ASSERT(die);
7250 ABG_ASSERT(dwarf_tag(die) == DW_TAG_formal_parameter);
7251
7252 Dwarf_Die this_pointer_die;
7253 if (die_this_pointer_from_object_pointer(die, this_pointer_die))
7254 if (die_this_pointer_is_const(&this_pointer_die))
7255 return true;
7256
7257 return false;
7258 }
7259
7260 /// Test if a DIE represents an entity that is at class scope.
7261 ///
7262 /// @param ctxt the read context to use.
7263 ///
7264 /// @param die the DIE to consider.
7265 ///
7266 /// @param where_offset where we are logically at in the DIE stream.
7267 ///
7268 /// @param class_scope_die out parameter. Set to the DIE of the
7269 /// containing class iff @p die happens to be at class scope; that is,
7270 /// iff the function returns true.
7271 ///
7272 /// @return true iff @p die is at class scope. In that case, @p
7273 /// class_scope_die is set to the DIE of the class that contains @p
7274 /// die.
7275 static bool
die_is_at_class_scope(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset,Dwarf_Die & class_scope_die)7276 die_is_at_class_scope(const read_context& ctxt,
7277 const Dwarf_Die* die,
7278 size_t where_offset,
7279 Dwarf_Die& class_scope_die)
7280 {
7281 if (!get_scope_die(ctxt, die, where_offset, class_scope_die))
7282 return false;
7283
7284 int tag = dwarf_tag(&class_scope_die);
7285
7286 return (tag == DW_TAG_structure_type
7287 || tag == DW_TAG_class_type
7288 || tag == DW_TAG_union_type);
7289 }
7290
7291 /// Return the leaf object under a pointer, reference or qualified
7292 /// type DIE.
7293 ///
7294 /// @param die the DIE of the type to consider.
7295 ///
7296 /// @param peeled_die out parameter. Set to the DIE of the leaf
7297 /// object iff the function actually peeled anything.
7298 ///
7299 /// @return true upon successful completion.
7300 static bool
die_peel_qual_ptr(Dwarf_Die * die,Dwarf_Die & peeled_die)7301 die_peel_qual_ptr(Dwarf_Die *die, Dwarf_Die& peeled_die)
7302 {
7303 if (!die)
7304 return false;
7305
7306 int tag = dwarf_tag(die);
7307
7308 if (tag == DW_TAG_const_type
7309 || tag == DW_TAG_volatile_type
7310 || tag == DW_TAG_restrict_type
7311 || tag == DW_TAG_pointer_type
7312 || tag == DW_TAG_reference_type
7313 || tag == DW_TAG_rvalue_reference_type)
7314 {
7315 if (!die_die_attribute(die, DW_AT_type, peeled_die))
7316 return false;
7317 }
7318 else
7319 return false;
7320
7321 while (tag == DW_TAG_const_type
7322 || tag == DW_TAG_volatile_type
7323 || tag == DW_TAG_restrict_type
7324 || tag == DW_TAG_pointer_type
7325 || tag == DW_TAG_reference_type
7326 || tag == DW_TAG_rvalue_reference_type)
7327 {
7328 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
7329 break;
7330 tag = dwarf_tag(&peeled_die);
7331 }
7332
7333 return true;
7334 }
7335
7336 /// Return the leaf object under a typedef type DIE.
7337 ///
7338 /// @param die the DIE of the type to consider.
7339 ///
7340 /// @param peeled_die out parameter. Set to the DIE of the leaf
7341 /// object iff the function actually peeled anything.
7342 ///
7343 /// @return true upon successful completion.
7344 static bool
die_peel_typedef(Dwarf_Die * die,Dwarf_Die & peeled_die)7345 die_peel_typedef(Dwarf_Die *die, Dwarf_Die& peeled_die)
7346 {
7347 if (!die)
7348 return false;
7349
7350 int tag = dwarf_tag(die);
7351
7352 if (tag == DW_TAG_typedef)
7353 {
7354 if (!die_die_attribute(die, DW_AT_type, peeled_die))
7355 return false;
7356 }
7357 else
7358 return false;
7359
7360 while (tag == DW_TAG_typedef)
7361 {
7362 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
7363 break;
7364 tag = dwarf_tag(&peeled_die);
7365 }
7366
7367 return true;
7368
7369 }
7370
7371 /// Return the leaf DIE under a pointer, a reference or a typedef DIE.
7372 ///
7373 /// @param die the DIE to consider.
7374 ///
7375 /// @param peeled_die the resulting peeled (or leaf) DIE. This is set
7376 /// iff the function returned true.
7377 ///
7378 /// @return true iff the function could peel @p die.
7379 static bool
die_peel_pointer_and_typedef(const Dwarf_Die * die,Dwarf_Die & peeled_die)7380 die_peel_pointer_and_typedef(const Dwarf_Die *die, Dwarf_Die& peeled_die)
7381 {
7382 if (!die)
7383 return false;
7384
7385 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7386
7387 if (tag == DW_TAG_pointer_type
7388 || tag == DW_TAG_reference_type
7389 || tag == DW_TAG_rvalue_reference_type
7390 || tag == DW_TAG_typedef)
7391 {
7392 if (!die_die_attribute(die, DW_AT_type, peeled_die))
7393 return false;
7394 }
7395 else
7396 return false;
7397
7398 while (tag == DW_TAG_pointer_type
7399 || tag == DW_TAG_reference_type
7400 || tag == DW_TAG_rvalue_reference_type
7401 || tag == DW_TAG_typedef)
7402 {
7403 if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
7404 break;
7405 tag = dwarf_tag(&peeled_die);
7406 }
7407 return true;
7408 }
7409
7410 /// Test if a DIE for a function type represents a method type.
7411 ///
7412 /// @param ctxt the read context.
7413 ///
7414 /// @param die the DIE to consider.
7415 ///
7416 /// @param where_offset where we logically are in the stream of DIEs.
7417 ///
7418 /// @param object_pointer_die out parameter. This is set by the
7419 /// function to the DIE that refers to the formal function parameter
7420 /// which holds the implicit "this" pointer of the method. That die
7421 /// is called the object pointer DIE. This is set iff the function
7422 ///
7423 /// @param class_die out parameter. This is set by the function to
7424 /// the DIE that represents the class of the method type. This is set
7425 /// iff the function returns true.
7426 ///
7427 /// @param is_static out parameter. This is set to true by the
7428 /// function if @p die is a static method. This is set iff the
7429 /// function returns true.
7430 ///
7431 /// @return true iff @p die is a DIE for a method type.
7432 static bool
die_function_type_is_method_type(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset,Dwarf_Die & object_pointer_die,Dwarf_Die & class_die,bool & is_static)7433 die_function_type_is_method_type(const read_context& ctxt,
7434 const Dwarf_Die *die,
7435 size_t where_offset,
7436 Dwarf_Die& object_pointer_die,
7437 Dwarf_Die& class_die,
7438 bool& is_static)
7439 {
7440 if (!die)
7441 return false;
7442
7443 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7444 ABG_ASSERT(tag == DW_TAG_subroutine_type || tag == DW_TAG_subprogram);
7445
7446 bool has_object_pointer = false;
7447 is_static = false;
7448 if (tag == DW_TAG_subprogram)
7449 {
7450 Dwarf_Die spec_or_origin_die;
7451 if (die_die_attribute(die, DW_AT_specification,
7452 spec_or_origin_die)
7453 || die_die_attribute(die, DW_AT_abstract_origin,
7454 spec_or_origin_die))
7455 {
7456 if (die_has_object_pointer(&spec_or_origin_die,
7457 object_pointer_die))
7458 has_object_pointer = true;
7459 else
7460 {
7461 if (die_is_at_class_scope(ctxt, &spec_or_origin_die,
7462 where_offset, class_die))
7463 is_static = true;
7464 else
7465 return false;
7466 }
7467 }
7468 else
7469 {
7470 if (die_has_object_pointer(die, object_pointer_die))
7471 has_object_pointer = true;
7472 else
7473 {
7474 if (die_is_at_class_scope(ctxt, die, where_offset, class_die))
7475 is_static = true;
7476 else
7477 return false;
7478 }
7479 }
7480 }
7481 else
7482 {
7483 if (die_has_object_pointer(die, object_pointer_die))
7484 has_object_pointer = true;
7485 else
7486 return false;
7487 }
7488
7489 if (!is_static)
7490 {
7491 ABG_ASSERT(has_object_pointer);
7492 // The object pointer die points to a DW_TAG_formal_parameter which
7493 // is the "this" parameter. The type of the "this" parameter is a
7494 // pointer. Let's get that pointer type.
7495 Dwarf_Die this_type_die;
7496 if (!die_die_attribute(&object_pointer_die, DW_AT_type, this_type_die))
7497 return false;
7498
7499 // So the class type is the type pointed to by the type of the "this"
7500 // parameter.
7501 if (!die_peel_qual_ptr(&this_type_die, class_die))
7502 return false;
7503
7504 // And make we return a class type, rather than a typedef to a
7505 // class.
7506 die_peel_typedef(&class_die, class_die);
7507 }
7508
7509 return true;
7510 }
7511
7512 enum virtuality
7513 {
7514 VIRTUALITY_NOT_VIRTUAL,
7515 VIRTUALITY_VIRTUAL,
7516 VIRTUALITY_PURE_VIRTUAL
7517 };
7518
7519 /// Get the virtual-ness of a given DIE, that is, the value of the
7520 /// DW_AT_virtuality attribute.
7521 ///
7522 /// @param die the DIE to read from.
7523 ///
7524 /// @param virt the resulting virtuality attribute. This is set iff
7525 /// the function returns true.
7526 ///
7527 /// @return true if the virtual-ness could be determined.
7528 static bool
die_virtuality(const Dwarf_Die * die,virtuality & virt)7529 die_virtuality(const Dwarf_Die* die, virtuality& virt)
7530 {
7531 if (!die)
7532 return false;
7533
7534 uint64_t v = 0;
7535 die_unsigned_constant_attribute(die, DW_AT_virtuality, v);
7536
7537 if (v == DW_VIRTUALITY_virtual)
7538 virt = VIRTUALITY_VIRTUAL;
7539 else if (v == DW_VIRTUALITY_pure_virtual)
7540 virt = VIRTUALITY_PURE_VIRTUAL;
7541 else
7542 virt = VIRTUALITY_NOT_VIRTUAL;
7543
7544 return true;
7545 }
7546
7547 /// Test whether the DIE represent either a virtual base or function.
7548 ///
7549 /// @param die the DIE to consider.
7550 ///
7551 /// @return bool if the DIE represents a virtual base or function,
7552 /// false othersise.
7553 static bool
die_is_virtual(const Dwarf_Die * die)7554 die_is_virtual(const Dwarf_Die* die)
7555 {
7556 virtuality v;
7557 if (!die_virtuality(die, v))
7558 return false;
7559
7560 return v == VIRTUALITY_PURE_VIRTUAL || v == VIRTUALITY_VIRTUAL;
7561 }
7562
7563 /// Test if the DIE represents an entity that was declared inlined.
7564 ///
7565 /// @param die the DIE to test for.
7566 ///
7567 /// @return true if the DIE represents an entity that was declared
7568 /// inlined.
7569 static bool
die_is_declared_inline(Dwarf_Die * die)7570 die_is_declared_inline(Dwarf_Die* die)
7571 {
7572 uint64_t inline_value = 0;
7573 if (!die_unsigned_constant_attribute(die, DW_AT_inline, inline_value))
7574 return false;
7575 return inline_value == DW_INL_declared_inlined;
7576 }
7577
7578 /// This function is a fast routine (optimization) to compare the
7579 /// values of two string attributes of two DIEs.
7580 ///
7581 /// @param l the first DIE to consider.
7582 ///
7583 /// @param r the second DIE to consider.
7584 ///
7585 /// @param attr_name the name of the attribute to compare, on the two
7586 /// DIEs above.
7587 ///
7588 /// @param result out parameter. This is set to the result of the
7589 /// comparison. If the value of attribute @p attr_name on DIE @p l
7590 /// equals the value of attribute @p attr_name on DIE @p r, then the
7591 /// the argument of this parameter is set to true. Otherwise, it's
7592 /// set to false. Note that the argument of this parameter is set iff
7593 /// the function returned true.
7594 ///
7595 /// @return true iff the comparison could be performed. There are
7596 /// cases in which the comparison cannot be performed. For instance,
7597 /// if one of the DIEs does not have the attribute @p attr_name. In
7598 /// any case, if this function returns true, then the parameter @p
7599 /// result is set to the result of the comparison.
7600 static bool
compare_dies_string_attribute_value(const Dwarf_Die * l,const Dwarf_Die * r,unsigned attr_name,bool & result)7601 compare_dies_string_attribute_value(const Dwarf_Die *l, const Dwarf_Die *r,
7602 unsigned attr_name,
7603 bool &result)
7604 {
7605 Dwarf_Attribute l_attr, r_attr;
7606 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(l), attr_name, &l_attr)
7607 || !dwarf_attr_integrate(const_cast<Dwarf_Die*>(r), attr_name, &r_attr))
7608 return false;
7609
7610 ABG_ASSERT(l_attr.form == DW_FORM_strp
7611 || l_attr.form == DW_FORM_string
7612 || l_attr.form == DW_FORM_GNU_strp_alt
7613 || form_is_DW_FORM_strx(l_attr.form)
7614 || form_is_DW_FORM_line_strp(l_attr.form));
7615
7616 ABG_ASSERT(r_attr.form == DW_FORM_strp
7617 || r_attr.form == DW_FORM_string
7618 || r_attr.form == DW_FORM_GNU_strp_alt
7619 || form_is_DW_FORM_strx(r_attr.form)
7620 || form_is_DW_FORM_line_strp(r_attr.form));
7621
7622 if ((l_attr.form == DW_FORM_strp
7623 && r_attr.form == DW_FORM_strp)
7624 || (l_attr.form == DW_FORM_GNU_strp_alt
7625 && r_attr.form == DW_FORM_GNU_strp_alt)
7626 || (form_is_DW_FORM_strx(l_attr.form)
7627 && form_is_DW_FORM_strx(r_attr.form))
7628 || (form_is_DW_FORM_line_strp(l_attr.form)
7629 && form_is_DW_FORM_line_strp(r_attr.form)))
7630 {
7631 // So these string attributes are actually pointers into a
7632 // string table. The string table is most likely de-duplicated
7633 // so comparing the *values* of the pointers should be enough.
7634 //
7635 // This is the fast path.
7636 if (l_attr.valp == r_attr.valp)
7637 result = true;
7638 else if (l_attr.valp && r_attr.valp)
7639 result = *l_attr.valp == *r_attr.valp;
7640 else
7641 result = false;
7642 return true;
7643 }
7644
7645 // If we reached this point it means we couldn't use the fast path
7646 // because the string atttributes are strings that are "inline" in
7647 // the debug info section. Let's just compare them the slow and
7648 // obvious way.
7649 string l_str = die_string_attribute(l, attr_name),
7650 r_str = die_string_attribute(r, attr_name);
7651 result = l_str == r_str;
7652
7653 return true;
7654 }
7655
7656 /// Compare the file path of the compilation units (aka CUs)
7657 /// associated to two DIEs.
7658 ///
7659 /// If the DIEs are for pointers or typedefs, this function also
7660 /// compares the file paths of the CUs of the leaf DIEs (underlying
7661 /// DIEs of the pointer or the typedef).
7662 ///
7663 /// @param l the first type DIE to consider.
7664 ///
7665 /// @param r the second type DIE to consider.
7666 ///
7667 /// @return true iff the file paths of the DIEs of the two types are
7668 /// equal.
7669 static bool
compare_dies_cu_decl_file(const Dwarf_Die * l,const Dwarf_Die * r,bool & result)7670 compare_dies_cu_decl_file(const Dwarf_Die* l, const Dwarf_Die *r, bool &result)
7671 {
7672 Dwarf_Die l_cu, r_cu;
7673 if (!dwarf_diecu(const_cast<Dwarf_Die*>(l), &l_cu, 0, 0)
7674 ||!dwarf_diecu(const_cast<Dwarf_Die*>(r), &r_cu, 0, 0))
7675 return false;
7676
7677 bool compared =
7678 compare_dies_string_attribute_value(&l_cu, &r_cu,
7679 DW_AT_name,
7680 result);
7681 if (compared)
7682 {
7683 Dwarf_Die peeled_l, peeled_r;
7684 if (die_is_pointer_reference_or_typedef_type(l)
7685 && die_is_pointer_reference_or_typedef_type(r)
7686 && die_peel_pointer_and_typedef(l, peeled_l)
7687 && die_peel_pointer_and_typedef(r, peeled_r))
7688 {
7689 if (!dwarf_diecu(&peeled_l, &l_cu, 0, 0)
7690 ||!dwarf_diecu(&peeled_r, &r_cu, 0, 0))
7691 return false;
7692 compared =
7693 compare_dies_string_attribute_value(&l_cu, &r_cu,
7694 DW_AT_name,
7695 result);
7696 }
7697 }
7698
7699 return compared;
7700 }
7701
7702 // -----------------------------------
7703 // <location expression evaluation>
7704 // -----------------------------------
7705
7706 /// Get the value of a given DIE attribute, knowing that it must be a
7707 /// location expression.
7708 ///
7709 /// @param die the DIE to read the attribute from.
7710 ///
7711 /// @param attr_name the name of the attribute to read the value for.
7712 ///
7713 /// @param expr the pointer to allocate and fill with the resulting
7714 /// array of operators + operands forming a dwarf expression. This is
7715 /// set iff the function returns true.
7716 ///
7717 /// @param expr_len the length of the resulting dwarf expression.
7718 /// This is set iff the function returns true.
7719 ///
7720 /// @return true if the attribute exists and has a non-empty dwarf expression
7721 /// as value. In that case the expr and expr_len arguments are set to the
7722 /// resulting dwarf expression.
7723 static bool
die_location_expr(const Dwarf_Die * die,unsigned attr_name,Dwarf_Op ** expr,size_t * expr_len)7724 die_location_expr(const Dwarf_Die* die,
7725 unsigned attr_name,
7726 Dwarf_Op** expr,
7727 size_t* expr_len)
7728 {
7729 if (!die)
7730 return false;
7731
7732 Dwarf_Attribute attr;
7733 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
7734 return false;
7735
7736 size_t len = 0;
7737 bool result = (dwarf_getlocation(&attr, expr, &len) == 0);
7738
7739 // Ignore location expressions where reading them succeeded but
7740 // their length is 0.
7741 result &= len > 0;
7742
7743 if (result)
7744 *expr_len = len;
7745
7746 return result;
7747 }
7748
7749 /// If the current operation in the dwarf expression represents a push
7750 /// of a constant value onto the dwarf expr virtual machine (aka
7751 /// DEVM), perform the operation and update the DEVM.
7752 ///
7753 /// If the result of the operation is a constant, update the DEVM
7754 /// accumulator with its value. Otherwise, the DEVM accumulator is
7755 /// left with its previous value.
7756 ///
7757 /// @param ops the array of the dwarf expression operations to consider.
7758 ///
7759 /// @param ops_len the lengths of @p ops array above.
7760 ///
7761 /// @param index the index of the operation to interpret, in @p ops.
7762 ///
7763 /// @param next_index the index of the operation to interpret at the
7764 /// next step, after this function completed and returned. This is
7765 /// set an output parameter that is set iff the function returns true.
7766 ///
7767 /// @param ctxt the DEVM evaluation context.
7768 ///
7769 /// @return true if the current operation actually pushes a constant
7770 /// value onto the DEVM stack, false otherwise.
7771 static bool
op_pushes_constant_value(Dwarf_Op * ops,size_t ops_len,size_t index,size_t & next_index,dwarf_expr_eval_context & ctxt)7772 op_pushes_constant_value(Dwarf_Op* ops,
7773 size_t ops_len,
7774 size_t index,
7775 size_t& next_index,
7776 dwarf_expr_eval_context& ctxt)
7777 {
7778 ABG_ASSERT(index < ops_len);
7779
7780 Dwarf_Op& op = ops[index];
7781 int64_t value = 0;
7782
7783 switch (op.atom)
7784 {
7785 case DW_OP_addr:
7786 value = ops[index].number;
7787 break;
7788
7789 case DW_OP_const1u:
7790 case DW_OP_const1s:
7791 case DW_OP_const2u:
7792 case DW_OP_const2s:
7793 case DW_OP_const4u:
7794 case DW_OP_const4s:
7795 case DW_OP_const8u:
7796 case DW_OP_const8s:
7797 case DW_OP_constu:
7798 case DW_OP_consts:
7799 value = ops[index].number;
7800 break;
7801
7802 case DW_OP_lit0:
7803 value = 0;
7804 break;
7805 case DW_OP_lit1:
7806 value = 1;
7807 break;
7808 case DW_OP_lit2:
7809 value = 2;
7810 break;
7811 case DW_OP_lit3:
7812 value = 3;
7813 break;
7814 case DW_OP_lit4:
7815 value = 4;
7816 break;
7817 case DW_OP_lit5:
7818 value = 5;
7819 break;
7820 case DW_OP_lit6:
7821 value = 6;
7822 break;
7823 case DW_OP_lit7:
7824 value = 7;
7825 break;
7826 case DW_OP_lit8:
7827 value = 8;
7828 break;
7829 case DW_OP_lit9:
7830 value = 9;
7831 break;
7832 case DW_OP_lit10:
7833 value = 10;
7834 break;
7835 case DW_OP_lit11:
7836 value = 11;
7837 break;
7838 case DW_OP_lit12:
7839 value = 12;
7840 break;
7841 case DW_OP_lit13:
7842 value = 13;
7843 break;
7844 case DW_OP_lit14:
7845 value = 14;
7846 break;
7847 case DW_OP_lit15:
7848 value = 15;
7849 break;
7850 case DW_OP_lit16:
7851 value = 16;
7852 break;
7853 case DW_OP_lit17:
7854 value = 17;
7855 break;
7856 case DW_OP_lit18:
7857 value = 18;
7858 break;
7859 case DW_OP_lit19:
7860 value = 19;
7861 break;
7862 case DW_OP_lit20:
7863 value = 20;
7864 break;
7865 case DW_OP_lit21:
7866 value = 21;
7867 break;
7868 case DW_OP_lit22:
7869 value = 22;
7870 break;
7871 case DW_OP_lit23:
7872 value = 23;
7873 break;
7874 case DW_OP_lit24:
7875 value = 24;
7876 break;
7877 case DW_OP_lit25:
7878 value = 25;
7879 break;
7880 case DW_OP_lit26:
7881 value = 26;
7882 break;
7883 case DW_OP_lit27:
7884 value = 27;
7885 break;
7886 case DW_OP_lit28:
7887 value = 28;
7888 break;
7889 case DW_OP_lit29:
7890 value = 29;
7891 break;
7892 case DW_OP_lit30:
7893 value = 30;
7894 break;
7895 case DW_OP_lit31:
7896 value = 31;
7897 break;
7898
7899 default:
7900 return false;
7901 }
7902
7903 expr_result r(value);
7904 ctxt.push(r);
7905 ctxt.accum = r;
7906 next_index = index + 1;
7907
7908 return true;
7909 }
7910
7911 /// If the current operation in the dwarf expression represents a push
7912 /// of a non-constant value onto the dwarf expr virtual machine (aka
7913 /// DEVM), perform the operation and update the DEVM. A non-constant
7914 /// is namely a quantity for which we need inferior (a running program
7915 /// image) state to know the exact value.
7916 ///
7917 /// Upon successful completion, as the result of the operation is a
7918 /// non-constant the DEVM accumulator value is left to its state as of
7919 /// before the invocation of this function.
7920 ///
7921 /// @param ops the array of the dwarf expression operations to consider.
7922 ///
7923 /// @param ops_len the lengths of @p ops array above.
7924 ///
7925 /// @param index the index of the operation to interpret, in @p ops.
7926 ///
7927 /// @param next_index the index of the operation to interpret at the
7928 /// next step, after this function completed and returned. This is
7929 /// set an output parameter that is set iff the function returns true.
7930 ///
7931 /// @param ctxt the DEVM evaluation context.
7932 ///
7933 /// @return true if the current operation actually pushes a
7934 /// non-constant value onto the DEVM stack, false otherwise.
7935 static bool
op_pushes_non_constant_value(Dwarf_Op * ops,size_t ops_len,size_t index,size_t & next_index,dwarf_expr_eval_context & ctxt)7936 op_pushes_non_constant_value(Dwarf_Op* ops,
7937 size_t ops_len,
7938 size_t index,
7939 size_t& next_index,
7940 dwarf_expr_eval_context& ctxt)
7941 {
7942 ABG_ASSERT(index < ops_len);
7943 Dwarf_Op& op = ops[index];
7944
7945 switch (op.atom)
7946 {
7947 case DW_OP_reg0:
7948 case DW_OP_reg1:
7949 case DW_OP_reg2:
7950 case DW_OP_reg3:
7951 case DW_OP_reg4:
7952 case DW_OP_reg5:
7953 case DW_OP_reg6:
7954 case DW_OP_reg7:
7955 case DW_OP_reg8:
7956 case DW_OP_reg9:
7957 case DW_OP_reg10:
7958 case DW_OP_reg11:
7959 case DW_OP_reg12:
7960 case DW_OP_reg13:
7961 case DW_OP_reg14:
7962 case DW_OP_reg15:
7963 case DW_OP_reg16:
7964 case DW_OP_reg17:
7965 case DW_OP_reg18:
7966 case DW_OP_reg19:
7967 case DW_OP_reg20:
7968 case DW_OP_reg21:
7969 case DW_OP_reg22:
7970 case DW_OP_reg23:
7971 case DW_OP_reg24:
7972 case DW_OP_reg25:
7973 case DW_OP_reg26:
7974 case DW_OP_reg27:
7975 case DW_OP_reg28:
7976 case DW_OP_reg29:
7977 case DW_OP_reg30:
7978 case DW_OP_reg31:
7979 next_index = index + 1;
7980 break;
7981
7982 case DW_OP_breg0:
7983 case DW_OP_breg1:
7984 case DW_OP_breg2:
7985 case DW_OP_breg3:
7986 case DW_OP_breg4:
7987 case DW_OP_breg5:
7988 case DW_OP_breg6:
7989 case DW_OP_breg7:
7990 case DW_OP_breg8:
7991 case DW_OP_breg9:
7992 case DW_OP_breg10:
7993 case DW_OP_breg11:
7994 case DW_OP_breg12:
7995 case DW_OP_breg13:
7996 case DW_OP_breg14:
7997 case DW_OP_breg15:
7998 case DW_OP_breg16:
7999 case DW_OP_breg17:
8000 case DW_OP_breg18:
8001 case DW_OP_breg19:
8002 case DW_OP_breg20:
8003 case DW_OP_breg21:
8004 case DW_OP_breg22:
8005 case DW_OP_breg23:
8006 case DW_OP_breg24:
8007 case DW_OP_breg25:
8008 case DW_OP_breg26:
8009 case DW_OP_breg27:
8010 case DW_OP_breg28:
8011 case DW_OP_breg29:
8012 case DW_OP_breg30:
8013 case DW_OP_breg31:
8014 next_index = index + 1;
8015 break;
8016
8017 case DW_OP_regx:
8018 next_index = index + 2;
8019 break;
8020
8021 case DW_OP_fbreg:
8022 next_index = index + 1;
8023 break;
8024
8025 case DW_OP_bregx:
8026 next_index = index + 1;
8027 break;
8028
8029 default:
8030 return false;
8031 }
8032
8033 expr_result r(false);
8034 ctxt.push(r);
8035
8036 return true;
8037 }
8038
8039 /// If the current operation in the dwarf expression represents a
8040 /// manipulation of the stack of the DWARF Expression Virtual Machine
8041 /// (aka DEVM), this function performs the operation and updates the
8042 /// state of the DEVM. If the result of the operation represents a
8043 /// constant value, then the accumulator of the DEVM is set to that
8044 /// result's value, Otherwise, the DEVM accumulator is left with its
8045 /// previous value.
8046 ///
8047 /// @param expr the array of the dwarf expression operations to consider.
8048 ///
8049 /// @param expr_len the lengths of @p ops array above.
8050 ///
8051 /// @param index the index of the operation to interpret, in @p ops.
8052 ///
8053 /// @param next_index the index of the operation to interpret at the
8054 /// next step, after this function completed and returned. This is
8055 /// set an output parameter that is set iff the function returns true.
8056 ///
8057 /// @param ctxt the DEVM evaluation context.
8058 ///
8059 /// @return true if the current operation actually manipulates the
8060 /// DEVM stack, false otherwise.
8061 static bool
op_manipulates_stack(Dwarf_Op * expr,size_t expr_len,size_t index,size_t & next_index,dwarf_expr_eval_context & ctxt)8062 op_manipulates_stack(Dwarf_Op* expr,
8063 size_t expr_len,
8064 size_t index,
8065 size_t& next_index,
8066 dwarf_expr_eval_context& ctxt)
8067 {
8068 Dwarf_Op& op = expr[index];
8069 expr_result v;
8070
8071 switch (op.atom)
8072 {
8073 case DW_OP_dup:
8074 v = ctxt.stack.front();
8075 ctxt.push(v);
8076 break;
8077
8078 case DW_OP_drop:
8079 v = ctxt.stack.front();
8080 ctxt.pop();
8081 break;
8082
8083 case DW_OP_over:
8084 ABG_ASSERT(ctxt.stack.size() > 1);
8085 v = ctxt.stack[1];
8086 ctxt.push(v);
8087 break;
8088
8089 case DW_OP_pick:
8090 ABG_ASSERT(index + 1 < expr_len);
8091 v = op.number;
8092 ctxt.push(v);
8093 break;
8094
8095 case DW_OP_swap:
8096 ABG_ASSERT(ctxt.stack.size() > 1);
8097 v = ctxt.stack[1];
8098 ctxt.stack.erase(ctxt.stack.begin() + 1);
8099 ctxt.push(v);
8100 break;
8101
8102 case DW_OP_rot:
8103 ABG_ASSERT(ctxt.stack.size() > 2);
8104 v = ctxt.stack[2];
8105 ctxt.stack.erase(ctxt.stack.begin() + 2);
8106 ctxt.push(v);
8107 break;
8108
8109 case DW_OP_deref:
8110 case DW_OP_deref_size:
8111 ABG_ASSERT(ctxt.stack.size() > 0);
8112 ctxt.pop();
8113 v.is_const(false);
8114 ctxt.push(v);
8115 break;
8116
8117 case DW_OP_xderef:
8118 case DW_OP_xderef_size:
8119 ABG_ASSERT(ctxt.stack.size() > 1);
8120 ctxt.pop();
8121 ctxt.pop();
8122 v.is_const(false);
8123 ctxt.push(v);
8124 break;
8125
8126 case DW_OP_push_object_address:
8127 v.is_const(false);
8128 ctxt.push(v);
8129 break;
8130
8131 case DW_OP_form_tls_address:
8132 case DW_OP_GNU_push_tls_address:
8133 ABG_ASSERT(ctxt.stack.size() > 0);
8134 v = ctxt.pop();
8135 if (op.atom == DW_OP_form_tls_address)
8136 v.is_const(false);
8137 ctxt.push(v);
8138 break;
8139
8140 case DW_OP_call_frame_cfa:
8141 v.is_const(false);
8142 ctxt.push(v);
8143 break;
8144
8145 default:
8146 return false;
8147 }
8148
8149 if (v.is_const())
8150 ctxt.accum = v;
8151
8152 if (op.atom == DW_OP_form_tls_address
8153 || op.atom == DW_OP_GNU_push_tls_address)
8154 ctxt.set_tls_address(true);
8155 else
8156 ctxt.set_tls_address(false);
8157
8158 next_index = index + 1;
8159
8160 return true;
8161 }
8162
8163 /// If the current operation in the dwarf expression represents a push
8164 /// of an arithmetic or logic operation onto the dwarf expr virtual
8165 /// machine (aka DEVM), perform the operation and update the DEVM.
8166 ///
8167 /// If the result of the operation is a constant, update the DEVM
8168 /// accumulator with its value. Otherwise, the DEVM accumulator is
8169 /// left with its previous value.
8170 ///
8171 /// @param expr the array of the dwarf expression operations to consider.
8172 ///
8173 /// @param expr_len the lengths of @p expr array above.
8174 ///
8175 /// @param index the index of the operation to interpret, in @p expr.
8176 ///
8177 /// @param next_index the index of the operation to interpret at the
8178 /// next step, after this function completed and returned. This is
8179 /// set an output parameter that is set iff the function returns true.
8180 ///
8181 /// @param ctxt the DEVM evaluation context.
8182 ///
8183 /// @return true if the current operation actually represent an
8184 /// arithmetic or logic operation.
8185 static bool
op_is_arith_logic(Dwarf_Op * expr,size_t expr_len,size_t index,size_t & next_index,dwarf_expr_eval_context & ctxt)8186 op_is_arith_logic(Dwarf_Op* expr,
8187 size_t expr_len,
8188 size_t index,
8189 size_t& next_index,
8190 dwarf_expr_eval_context& ctxt)
8191 {
8192 ABG_ASSERT(index < expr_len);
8193
8194 Dwarf_Op& op = expr[index];
8195 expr_result val1, val2;
8196
8197 switch (op.atom)
8198 {
8199 case DW_OP_abs:
8200 val1 = ctxt.pop();
8201 val1 = val1.abs();
8202 ctxt.push(val1);
8203 break;
8204
8205 case DW_OP_and:
8206 ABG_ASSERT(ctxt.stack.size() > 1);
8207 val1 = ctxt.pop();
8208 val2 = ctxt.pop();
8209 ctxt.push(val1 & val2);
8210 break;
8211
8212 case DW_OP_div:
8213 val1 = ctxt.pop();
8214 val2 = ctxt.pop();
8215 if (!val1.is_const())
8216 val1 = 1;
8217 ctxt.push(val2 / val1);
8218 break;
8219
8220 case DW_OP_minus:
8221 val1 = ctxt.pop();
8222 val2 = ctxt.pop();
8223 ctxt.push(val2 - val1);
8224 break;
8225
8226 case DW_OP_mod:
8227 val1 = ctxt.pop();
8228 val2 = ctxt.pop();
8229 ctxt.push(val2 % val1);
8230 break;
8231
8232 case DW_OP_mul:
8233 val1 = ctxt.pop();
8234 val2 = ctxt.pop();
8235 ctxt.push(val2 * val1);
8236 break;
8237
8238 case DW_OP_neg:
8239 val1 = ctxt.pop();
8240 ctxt.push(-val1);
8241 break;
8242
8243 case DW_OP_not:
8244 val1 = ctxt.pop();
8245 ctxt.push(~val1);
8246 break;
8247
8248 case DW_OP_or:
8249 val1 = ctxt.pop();
8250 val2 = ctxt.pop();
8251 ctxt.push(val1 | val2);
8252 break;
8253
8254 case DW_OP_plus:
8255 val1 = ctxt.pop();
8256 val2 = ctxt.pop();
8257 ctxt.push(val2 + val1);
8258 break;
8259
8260 case DW_OP_plus_uconst:
8261 val1 = ctxt.pop();
8262 val1 += op.number;
8263 ctxt.push(val1);
8264 break;
8265
8266 case DW_OP_shl:
8267 val1 = ctxt.pop();
8268 val2 = ctxt.pop();
8269 ctxt.push(val2 << val1);
8270 break;
8271
8272 case DW_OP_shr:
8273 case DW_OP_shra:
8274 val1 = ctxt.pop();
8275 val2 = ctxt.pop();
8276 ctxt.push(val2 >> val1);
8277 break;
8278
8279 case DW_OP_xor:
8280 val1 = ctxt.pop();
8281 val2 = ctxt.pop();
8282 ctxt.push(val2 ^ val1);
8283 break;
8284
8285 default:
8286 return false;
8287 }
8288
8289 if (ctxt.stack.front().is_const())
8290 ctxt.accum = ctxt.stack.front();
8291
8292 next_index = index + 1;
8293 return true;
8294 }
8295
8296 /// If the current operation in the dwarf expression represents a push
8297 /// of a control flow operation onto the dwarf expr virtual machine
8298 /// (aka DEVM), perform the operation and update the DEVM.
8299 ///
8300 /// If the result of the operation is a constant, update the DEVM
8301 /// accumulator with its value. Otherwise, the DEVM accumulator is
8302 /// left with its previous value.
8303 ///
8304 /// @param expr the array of the dwarf expression operations to consider.
8305 ///
8306 /// @param expr_len the lengths of @p expr array above.
8307 ///
8308 /// @param index the index of the operation to interpret, in @p expr.
8309 ///
8310 /// @param next_index the index of the operation to interpret at the
8311 /// next step, after this function completed and returned. This is
8312 /// set an output parameter that is set iff the function returns true.
8313 ///
8314 /// @param ctxt the DEVM evaluation context.
8315 ///
8316 /// @return true if the current operation actually represents a
8317 /// control flow operation, false otherwise.
8318 static bool
op_is_control_flow(Dwarf_Op * expr,size_t expr_len,size_t index,size_t & next_index,dwarf_expr_eval_context & ctxt)8319 op_is_control_flow(Dwarf_Op* expr,
8320 size_t expr_len,
8321 size_t index,
8322 size_t& next_index,
8323 dwarf_expr_eval_context& ctxt)
8324 {
8325 ABG_ASSERT(index < expr_len);
8326
8327 Dwarf_Op& op = expr[index];
8328 expr_result val1, val2;
8329
8330 switch (op.atom)
8331 {
8332 case DW_OP_eq:
8333 case DW_OP_ge:
8334 case DW_OP_gt:
8335 case DW_OP_le:
8336 case DW_OP_lt:
8337 case DW_OP_ne:
8338 {
8339 bool value = true;
8340 val1 = ctxt.pop();
8341 val2 = ctxt.pop();
8342 if (op.atom == DW_OP_eq)
8343 value = val2 == val1;
8344 else if (op.atom == DW_OP_ge)
8345 value = val2 >= val1;
8346 else if (op.atom == DW_OP_gt)
8347 value = val2 > val1;
8348 else if (op.atom == DW_OP_le)
8349 value = val2 <= val1;
8350 else if (op.atom == DW_OP_lt)
8351 value = val2 < val1;
8352 else if (op.atom == DW_OP_ne)
8353 value = val2 != val1;
8354
8355 val1 = value ? 1 : 0;
8356 ctxt.push(val1);
8357 }
8358 break;
8359
8360 case DW_OP_skip:
8361 if (op.number > 0)
8362 index += op.number - 1;
8363 break;
8364
8365 case DW_OP_bra:
8366 val1 = ctxt.pop();
8367 if (val1.const_value() != 0)
8368 index += val1.const_value() - 1;
8369 break;
8370
8371 case DW_OP_call2:
8372 case DW_OP_call4:
8373 case DW_OP_call_ref:
8374 case DW_OP_nop:
8375 break;
8376
8377 default:
8378 return false;
8379 }
8380
8381 if (ctxt.stack.front().is_const())
8382 ctxt.accum = ctxt.stack.front();
8383
8384 next_index = index + 1;
8385 return true;
8386 }
8387
8388 /// This function quickly evaluates a DWARF expression that is a
8389 /// constant.
8390 ///
8391 /// This is a "fast path" function that quickly evaluates a DWARF
8392 /// expression that is only made of a DW_OP_plus_uconst operator.
8393 ///
8394 /// This is a sub-routine of die_member_offset.
8395 ///
8396 /// @param expr the DWARF expression to evaluate.
8397 ///
8398 /// @param expr_len the length of the expression @p expr.
8399 ///
8400 /// @param value out parameter. This is set to the result of the
8401 /// evaluation of @p expr, iff this function returns true.
8402 ///
8403 /// @return true iff the evaluation of @p expr went OK.
8404 static bool
eval_quickly(Dwarf_Op * expr,uint64_t expr_len,int64_t & value)8405 eval_quickly(Dwarf_Op* expr,
8406 uint64_t expr_len,
8407 int64_t& value)
8408 {
8409 if (expr_len == 1 && (expr[0].atom == DW_OP_plus_uconst))
8410 {
8411 value = expr[0].number;
8412 return true;
8413 }
8414 return false;
8415 }
8416
8417 /// Evaluate the value of the last sub-expression that is a constant,
8418 /// inside a given DWARF expression.
8419 ///
8420 /// @param expr the DWARF expression to consider.
8421 ///
8422 /// @param expr_len the length of the expression to consider.
8423 ///
8424 /// @param value the resulting value of the last constant
8425 /// sub-expression of the DWARF expression. This is set iff the
8426 /// function returns true.
8427 ///
8428 /// @param is_tls_address out parameter. This is set to true iff
8429 /// the resulting value of the evaluation is a TLS (thread local
8430 /// storage) address.
8431 ///
8432 /// @param eval_ctxt the evaluation context to (re)use. Note that
8433 /// this function initializes this context before using it.
8434 ///
8435 /// @return true if the function could find a constant sub-expression
8436 /// to evaluate, false otherwise.
8437 static bool
eval_last_constant_dwarf_sub_expr(Dwarf_Op * expr,size_t expr_len,int64_t & value,bool & is_tls_address,dwarf_expr_eval_context & eval_ctxt)8438 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
8439 size_t expr_len,
8440 int64_t& value,
8441 bool& is_tls_address,
8442 dwarf_expr_eval_context &eval_ctxt)
8443 {
8444 // Reset the evaluation context before evaluating the constant sub
8445 // expression contained in the DWARF expression 'expr'.
8446 eval_ctxt.reset();
8447
8448 size_t index = 0, next_index = 0;
8449 do
8450 {
8451 if (op_is_arith_logic(expr, expr_len, index,
8452 next_index, eval_ctxt)
8453 || op_pushes_constant_value(expr, expr_len, index,
8454 next_index, eval_ctxt)
8455 || op_manipulates_stack(expr, expr_len, index,
8456 next_index, eval_ctxt)
8457 || op_pushes_non_constant_value(expr, expr_len, index,
8458 next_index, eval_ctxt)
8459 || op_is_control_flow(expr, expr_len, index,
8460 next_index, eval_ctxt))
8461 ;
8462 else
8463 next_index = index + 1;
8464
8465 ABG_ASSERT(next_index > index);
8466 index = next_index;
8467 } while (index < expr_len);
8468
8469 is_tls_address = eval_ctxt.set_tls_address();
8470 if (eval_ctxt.accum.is_const())
8471 {
8472 value = eval_ctxt.accum;
8473 return true;
8474 }
8475 return false;
8476 }
8477
8478 /// Evaluate the value of the last sub-expression that is a constant,
8479 /// inside a given DWARF expression.
8480 ///
8481 /// @param expr the DWARF expression to consider.
8482 ///
8483 /// @param expr_len the length of the expression to consider.
8484 ///
8485 /// @param value the resulting value of the last constant
8486 /// sub-expression of the DWARF expression. This is set iff the
8487 /// function returns true.
8488 ///
8489 /// @return true if the function could find a constant sub-expression
8490 /// to evaluate, false otherwise.
8491 static bool
eval_last_constant_dwarf_sub_expr(Dwarf_Op * expr,size_t expr_len,int64_t & value,bool & is_tls_address)8492 eval_last_constant_dwarf_sub_expr(Dwarf_Op* expr,
8493 size_t expr_len,
8494 int64_t& value,
8495 bool& is_tls_address)
8496 {
8497 dwarf_expr_eval_context eval_ctxt;
8498 return eval_last_constant_dwarf_sub_expr(expr, expr_len, value,
8499 is_tls_address, eval_ctxt);
8500 }
8501
8502 // -----------------------------------
8503 // </location expression evaluation>
8504 // -----------------------------------
8505
8506 /// Convert a DW_AT_bit_offset attribute value into the same value as
8507 /// DW_AT_data_bit_offset - 8 * DW_AT_data_member_location.
8508 ///
8509 /// On big endian machines, the value of the DW_AT_bit_offset
8510 /// attribute + 8 * the value of the DW_AT_data_member_location
8511 /// attribute is the same as the value of the DW_AT_data_bit_offset
8512 /// attribute.
8513 ///
8514 /// On little endian machines however, the situation is different.
8515 /// The DW_AT_bit_offset value for a bit field is the number of bits
8516 /// to the left of the most significant bit of the bit field, within
8517 /// the integer value at DW_AT_data_member_location.
8518 ///
8519 /// The DW_AT_data_bit_offset offset value is the number of bits to
8520 /// the right of the least significant bit of the bit field, again
8521 /// relative to the containing integer value.
8522 ///
8523 /// In other words, DW_AT_data_bit_offset is what everybody would
8524 /// instinctively think of as being the "offset of the bit field". 8 *
8525 /// DW_AT_data_member_location + DW_AT_bit_offset however is very
8526 /// counter-intuitive on little endian machines.
8527 ///
8528 /// This function thus reads the value of a DW_AT_bit_offset property
8529 /// of a DIE and converts it into what the DW_AT_data_bit_offset would
8530 /// have been if it was present, ignoring the contribution of
8531 /// DW_AT_data_member_location.
8532 ///
8533 /// Note that DW_AT_bit_offset has been made obsolete starting from
8534 /// DWARF5 (for GCC; Clang still emits it).
8535 ///
8536 /// If you like coffee and it's not too late, now might be a good time
8537 /// to have a coffee break. Otherwise if it's late at night, you
8538 /// might want to consider an herbal tea break. Then come back to
8539 /// read this.
8540 ///
8541 ///
8542 /// In what follows, the bit fields are all contained within the first
8543 /// whole int of the struct, so DW_AT_data_member_location is 0.
8544 ///
8545 /// Okay, to have a better idea of what DW_AT_bit_offset and
8546 /// DW_AT_data_bit_offset represent, let's consider a struct 'S' which
8547 /// have bit fields data members defined as:
8548 ///
8549 /// struct S
8550 /// {
8551 /// int j:5;
8552 /// int k:6;
8553 /// int m:5;
8554 /// int n:8;
8555 /// };
8556 ///
8557 /// The below wonderful (at least!) ASCII art sketch describes the
8558 /// layout of the bitfields of 'struct S' on a little endian machine.
8559 /// You need to read the sketch from the bottom-up.
8560 ///
8561 /// So please scroll down to its bottom. Note how the 32 bits integer
8562 /// word containing the bit fields is laid out with its least
8563 /// significant bit starting on the right hand side, at index 0.
8564 ///
8565 /// Then slowly scroll up starting from there, and take the time to
8566 /// read each line and see how the bit fields are laid out and what
8567 /// DW_AT_bit_offset and DW_AT_data_bit_offset represent for each of
8568 /// the bit fields.
8569 ///
8570 /// DW_AT_bit_offset(n)
8571 /// < - - - - - - >
8572 /// | | n |
8573 /// ^ ^< - - - - >^
8574 /// DW_AT_data_bit_offset(n)
8575 /// < - - - - - - - - - - - - - - - >
8576 /// | |
8577 /// ^ ^
8578 /// DW_AT_bit_offset(m)
8579 /// <--------------------------------->
8580 /// | | m |
8581 /// ^ ^< - >^
8582 /// DW_AT_data_bit_offset(m)
8583 /// < - - - - - - - - - - >
8584 /// | |
8585 /// ^ ^
8586 /// DW_AT_bit_offset(k)
8587 /// <-------------------------------------------->
8588 /// | | k |
8589 /// ^ ^< - - >^
8590 /// DW_AT_data_bit_offset(k)
8591 /// < - - - - >
8592 /// | |
8593 /// ^ ^
8594 /// DW_AT_bit_offset(j)
8595 /// <-------------------------------------------------------->
8596 /// | |
8597 /// ^ ^
8598 /// n m k j
8599 /// < - - - - - - > < - - - > < - - - - > < - - - >
8600 ///
8601 /// | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
8602 /// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
8603 /// 31 27 23 16 15 11 10 6 5 4 0
8604 ///
8605 /// So, the different bit fields all fit in one 32 bits word, assuming
8606 /// the bit fields are tightly packed.
8607 ///
8608 /// Let's look at what DW_AT_bit_offset of the 'j' bit field would be
8609 /// on this little endian machine and let's see how it relates to
8610 /// DW_AT_data_bit_offset of j.
8611 ///
8612 /// DW_AT_bit_offset(j) would be equal to the number of bits from the
8613 /// left of the 32 bits word (i.e from bit number 31) to the most
8614 /// significant bit of the j bit field (i.e, bit number 4). Thus:
8615 ///
8616 /// DW_AT_bit_offset(j) =
8617 /// sizeof_in_bits(int) - size_in_bits_of(j) = 32 - 5 = 27.
8618 ///
8619 /// DW_AT_data_bit_offset(j) is the number of bits from the right of the
8620 /// 32 bits word (i.e, bit number 0) to the lest significant bit of
8621 /// the 'j' bit field (ie, bit number 0). Thus:
8622 ///
8623 /// DW_AT_data_bit_offset(j) = 0.
8624 ///
8625 /// More generally, we can notice that:
8626 ///
8627 /// sizeof_in_bits(int) =
8628 /// DW_AT_bit_offset(j) + sizeof_in_bits(j) + DW_AT_data_bit_offset(j).
8629 ///
8630 /// It follows that:
8631 ///
8632 /// DW_AT_data_bit_offset(j) =
8633 /// sizeof_in_bits(int) - sizeof_in_bits(j) - DW_AT_bit_offset(j);
8634 ///
8635 /// Thus:
8636 ///
8637 /// DW_AT_data_bit_offset(j) = 32 - 27 - 5 = 0;
8638 ///
8639 /// Note that DW_AT_data_bit_offset(j) is the offset of 'j' starting
8640 /// from the right hand side of the word. It is what we would
8641 /// intuitively think it is. DW_AT_bit_offset however is super
8642 /// counter-intuitive, pfff.
8643 ///
8644 /// Anyway, this general equation holds true for all bit fields.
8645 ///
8646 /// Similarly, it follows that:
8647 ///
8648 /// DW_AT_bit_offset(k) =
8649 /// sizeof_in_bits(int) - sizeof_in_bits(k) - DW_AT_data_bit_offset(k);
8650 ///
8651 /// Thus:
8652 /// DW_AT_bit_offset(k) = 32 - 6 - 5 = 21.
8653 ///
8654 ///
8655 /// Likewise:
8656 ///
8657 /// DW_AT_bit_offset(m) =
8658 /// sizeof_in_bits(int) - sizeof_in_bits(m) - DW_AT_data_bit_offset(m);
8659 ///
8660 ///
8661 /// Thus:
8662 /// DW_AT_bit_offset(m) = 32 - 5 - (5 + 6) = 16.
8663 ///
8664 /// And:
8665 ///
8666 ///
8667 /// Lastly:
8668 ///
8669 /// DW_AT_bit_offset(n) =
8670 /// sizeof_in_bits(int) - sizeof_in_bits(n) - DW_AT_bit_offset(n);
8671 ///
8672 /// Thus:
8673 /// DW_AT_bit_offset(n) = 32 - 8 - (5 + 6 + 5) = 8.
8674 ///
8675 /// Luckily, the body of the function is much smaller than this
8676 /// comment. Enjoy!
8677 ///
8678 /// @param die the DIE to consider.
8679 ///
8680 /// @param is_big_endian this is true iff the machine we are looking at
8681 /// is big endian.
8682 ///
8683 /// @param offset this is the output parameter into which the value of
8684 /// the DW_AT_bit_offset is put, converted as if it was the value of
8685 /// the DW_AT_data_bit_offset parameter, less the contribution of
8686 /// DW_AT_data_member_location. This parameter is set iff the
8687 /// function returns true.
8688 ///
8689 /// @return true if DW_AT_bit_offset was found on @p die.
8690 static bool
read_and_convert_DW_at_bit_offset(const Dwarf_Die * die,bool is_big_endian,uint64_t & offset)8691 read_and_convert_DW_at_bit_offset(const Dwarf_Die* die,
8692 bool is_big_endian,
8693 uint64_t &offset)
8694 {
8695 uint64_t off = 0;
8696 if (!die_unsigned_constant_attribute(die, DW_AT_bit_offset, off))
8697 return false;
8698
8699 if (is_big_endian)
8700 {
8701 offset = off;
8702 return true;
8703 }
8704
8705 // Okay, we are looking at a little endian machine. We need to
8706 // convert DW_AT_bit_offset into what DW_AT_data_bit_offset would
8707 // have been. To understand this, you really need to read the
8708 // preliminary comment of this function.
8709 uint64_t containing_anonymous_object_size = 0;
8710 ABG_ASSERT(die_unsigned_constant_attribute(die, DW_AT_byte_size,
8711 containing_anonymous_object_size));
8712 containing_anonymous_object_size *= 8;
8713
8714 uint64_t bitfield_size = 0;
8715 ABG_ASSERT(die_unsigned_constant_attribute(die, DW_AT_bit_size,
8716 bitfield_size));
8717
8718 // As noted in the the preliminary comment of this function if we
8719 // want to get the DW_AT_data_bit_offset of a bit field 'k' from the
8720 // its DW_AT_bit_offset value, the equation is:
8721 //
8722 // DW_AT_data_bit_offset(k) =
8723 // sizeof_in_bits(containing_anonymous_object_size)
8724 // - DW_AT_data_bit_offset(k)
8725 // - sizeof_in_bits(k)
8726 offset = containing_anonymous_object_size - off - bitfield_size;
8727
8728 return true;
8729 }
8730
8731 /// Get the value of the DW_AT_data_member_location of the given DIE
8732 /// attribute as an constant.
8733 ///
8734 /// @param die the DIE to read the attribute from.
8735 ///
8736 /// @param offset the attribute as a constant value. This is set iff
8737 /// the function returns true.
8738 ///
8739 /// @return true if the attribute exists and has a constant value. In
8740 /// that case the offset is set to the value.
8741 static bool
die_constant_data_member_location(const Dwarf_Die * die,int64_t & offset)8742 die_constant_data_member_location(const Dwarf_Die *die,
8743 int64_t& offset)
8744 {
8745 if (!die)
8746 return false;
8747
8748 Dwarf_Attribute attr;
8749 if (!dwarf_attr(const_cast<Dwarf_Die*>(die),
8750 DW_AT_data_member_location,
8751 &attr))
8752 return false;
8753
8754 Dwarf_Word val;
8755 if (dwarf_formudata(&attr, &val) != 0)
8756 return false;
8757
8758 offset = val;
8759 return true;
8760 }
8761
8762 /// Get the offset of a struct/class member as represented by the
8763 /// value of the DW_AT_data_member_location attribute.
8764 ///
8765 /// There is a huge gotcha in here. The value of the
8766 /// DW_AT_data_member_location is not necessarily a constant that one
8767 /// would just read and be done with it. Rather, it can be a DWARF
8768 /// expression that one has to interpret. In general, the offset can
8769 /// be given by the DW_AT_data_bit_offset or by the
8770 /// DW_AT_data_member_location attribute and optionally the
8771 /// DW_AT_bit_offset attribute. The bit offset attributes are
8772 /// always simple constants, but the DW_AT_data_member_location
8773 /// attribute is a DWARF location expression.
8774 ///
8775 /// When it's the DW_AT_data_member_location that is present,
8776 /// there are three cases to possibly take into account:
8777 ///
8778 /// 1/ The offset in the vtable where the offset of a virtual base
8779 /// can be found, aka vptr offset. Given the address of a
8780 /// given object O, the vptr offset for B is given by the
8781 /// (DWARF) expression:
8782 ///
8783 /// address(O) + *(*address(0) - VIRTUAL_OFFSET)
8784 ///
8785 /// where VIRTUAL_OFFSET is a constant value; In this case,
8786 /// this function returns the constant VIRTUAL_OFFSET, as this
8787 /// is enough to detect changes in a given virtual base
8788 /// relative to the other virtual bases.
8789 ///
8790 /// 2/ The offset of a regular data member. Given the address of
8791 /// a struct object named O, the memory location for a
8792 /// particular data member is given by the (DWARF) expression:
8793 ///
8794 /// address(O) + OFFSET
8795 ///
8796 /// where OFFSET is a constant. In this case, this function
8797 /// returns the OFFSET constant.
8798 ///
8799 /// 3/ The offset of a virtual member function in the virtual
8800 /// pointer. The DWARF expression is a constant that designates
8801 /// the offset of the function in the vtable. In this case this
8802 /// function returns that constant.
8803 ///
8804 /// @param ctxt the read context to consider.
8805 ///
8806 /// @param die the DIE to read the information from.
8807 ///
8808 /// @param offset the resulting constant offset, in bits. This
8809 /// argument is set iff the function returns true.
8810 static bool
die_member_offset(const read_context & ctxt,const Dwarf_Die * die,int64_t & offset)8811 die_member_offset(const read_context& ctxt,
8812 const Dwarf_Die* die,
8813 int64_t& offset)
8814 {
8815 Dwarf_Op* expr = NULL;
8816 size_t expr_len = 0;
8817 uint64_t bit_offset = 0;
8818
8819 // First let's see if the DW_AT_data_bit_offset attribute is
8820 // present.
8821 if (die_unsigned_constant_attribute(die, DW_AT_data_bit_offset, bit_offset))
8822 {
8823 offset = bit_offset;
8824 return true;
8825 }
8826
8827 // First try to read DW_AT_data_member_location as a plain constant.
8828 // We do this because the generic method using die_location_expr
8829 // might hit a bug in elfutils libdw dwarf_location_expression only
8830 // fixed in elfutils 0.184+. The bug only triggers if the attribute
8831 // is expressed as a (DWARF 5) DW_FORM_implicit_constant. But we
8832 // handle all constants here because that is more consistent (and
8833 // slightly faster in the general case where the attribute isn't a
8834 // full DWARF expression).
8835 if (!die_constant_data_member_location(die, offset))
8836 {
8837 // Otherwise, let's see if the DW_AT_data_member_location
8838 // attribute and, optionally, the DW_AT_bit_offset attributes
8839 // are present.
8840 if (!die_location_expr(die, DW_AT_data_member_location,
8841 &expr, &expr_len))
8842 return false;
8843
8844 // The DW_AT_data_member_location attribute is present. Let's
8845 // evaluate it and get its constant sub-expression and return
8846 // that one.
8847 if (!eval_quickly(expr, expr_len, offset))
8848 {
8849 bool is_tls_address = false;
8850 if (!eval_last_constant_dwarf_sub_expr(expr, expr_len,
8851 offset, is_tls_address,
8852 ctxt.dwarf_expr_eval_ctxt()))
8853 return false;
8854 }
8855 }
8856 offset *= 8;
8857
8858 // On little endian machines, we need to convert the
8859 // DW_AT_bit_offset attribute into a relative offset to 8 *
8860 // DW_AT_data_member_location equal to what DW_AT_data_bit_offset
8861 // would be if it were used instead.
8862 //
8863 // In other words, before adding it to 8 *
8864 // DW_AT_data_member_location, DW_AT_bit_offset needs to be
8865 // converted into a human-understandable form that represents the
8866 // offset of the bitfield data member it describes. For details
8867 // about the conversion, please read the extensive comments of
8868 // read_and_convert_DW_at_bit_offset.
8869 bool is_big_endian = architecture_is_big_endian(ctxt.elf_handle());
8870 if (read_and_convert_DW_at_bit_offset(die, is_big_endian, bit_offset))
8871 offset += bit_offset;
8872
8873 return true;
8874 }
8875
8876 /// Read the value of the DW_AT_location attribute from a DIE,
8877 /// evaluate the resulting DWARF expression and, if it's a constant
8878 /// expression, return it.
8879 ///
8880 /// @param die the DIE to consider.
8881 ///
8882 /// @param address the resulting constant address. This is set iff
8883 /// the function returns true.
8884 ///
8885 /// @return true iff the whole sequence of action described above
8886 /// could be completed normally.
8887 static bool
die_location_address(Dwarf_Die * die,Dwarf_Addr & address,bool & is_tls_address)8888 die_location_address(Dwarf_Die* die,
8889 Dwarf_Addr& address,
8890 bool& is_tls_address)
8891 {
8892 Dwarf_Op* expr = NULL;
8893 size_t expr_len = 0;
8894
8895 is_tls_address = false;
8896
8897 if (!die)
8898 return false;
8899
8900 Dwarf_Attribute attr;
8901 if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), DW_AT_location, &attr))
8902 return false;
8903
8904 if (dwarf_getlocation(&attr, &expr, &expr_len))
8905 return false;
8906 // Ignore location expressions where reading them succeeded but
8907 // their length is 0.
8908 if (expr_len == 0)
8909 return false;
8910
8911 Dwarf_Attribute result;
8912 if (!dwarf_getlocation_attr(&attr, expr, &result))
8913 // A location that has been interpreted as an address.
8914 return !dwarf_formaddr(&result, &address);
8915
8916 // Just get the address out of the number field.
8917 address = expr->number;
8918 return true;
8919 }
8920
8921 /// Return the index of a function in its virtual table. That is,
8922 /// return the value of the DW_AT_vtable_elem_location attribute.
8923 ///
8924 /// @param die the DIE of the function to consider.
8925 ///
8926 /// @param vindex the resulting index. This is set iff the function
8927 /// returns true.
8928 ///
8929 /// @return true if the DIE has a DW_AT_vtable_elem_location
8930 /// attribute.
8931 static bool
die_virtual_function_index(Dwarf_Die * die,int64_t & vindex)8932 die_virtual_function_index(Dwarf_Die* die,
8933 int64_t& vindex)
8934 {
8935 if (!die)
8936 return false;
8937
8938 Dwarf_Op* expr = NULL;
8939 size_t expr_len = 0;
8940 if (!die_location_expr(die, DW_AT_vtable_elem_location,
8941 &expr, &expr_len))
8942 return false;
8943
8944 int64_t i = 0;
8945 bool is_tls_addr = false;
8946 if (!eval_last_constant_dwarf_sub_expr(expr, expr_len, i, is_tls_addr))
8947 return false;
8948
8949 vindex = i;
8950 return true;
8951 }
8952
8953 /// Test if a given DIE represents an anonymous type.
8954 ///
8955 /// Anonymous types we are interested in are classes, unions and
8956 /// enumerations.
8957 ///
8958 /// @param die the DIE to consider.
8959 ///
8960 /// @return true iff @p die represents an anonymous type.
8961 bool
is_anonymous_type_die(Dwarf_Die * die)8962 is_anonymous_type_die(Dwarf_Die *die)
8963 {
8964 int tag = dwarf_tag(die);
8965
8966 if (tag == DW_TAG_class_type
8967 || tag == DW_TAG_structure_type
8968 || tag == DW_TAG_union_type
8969 || tag == DW_TAG_enumeration_type)
8970 return die_is_anonymous(die);
8971
8972 return false;
8973 }
8974
8975 /// Return the base of the internal name to represent an anonymous
8976 /// type.
8977 ///
8978 /// Typically, anonymous enums would be named
8979 /// __anonymous_enum__<number>, anonymous struct or classes would be
8980 /// named __anonymous_struct__<number> and anonymous unions would be
8981 /// named __anonymous_union__<number>. The first part of these
8982 /// anonymous names (i.e, __anonymous_{enum,struct,union}__ is called
8983 /// the base name. This function returns that base name, depending on
8984 /// the kind of type DIE we are looking at.
8985 ///
8986 /// @param die the type DIE to look at. This function expects a type
8987 /// DIE with an empty DW_AT_name property value (anonymous).
8988 ///
8989 /// @return a string representing the base of the internal anonymous
8990 /// name.
8991 static string
get_internal_anonymous_die_prefix_name(const Dwarf_Die * die)8992 get_internal_anonymous_die_prefix_name(const Dwarf_Die *die)
8993 {
8994 ABG_ASSERT(die_is_type(die));
8995 ABG_ASSERT(die_string_attribute(die, DW_AT_name) == "");
8996
8997 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
8998 string type_name;
8999 if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
9000 type_name = tools_utils::get_anonymous_struct_internal_name_prefix();
9001 else if (tag == DW_TAG_union_type)
9002 type_name = tools_utils::get_anonymous_union_internal_name_prefix();
9003 else if (tag == DW_TAG_enumeration_type)
9004 type_name = tools_utils::get_anonymous_enum_internal_name_prefix();
9005
9006 return type_name;
9007 }
9008
9009 /// Build a full internal anonymous type name.
9010 ///
9011 /// @param base_name this is the base name as returned by the function
9012 /// @ref get_internal_anonymous_die_prefix_name.
9013 ///
9014 /// @param anonymous_type_index this is the index of the anonymous
9015 /// type in its scope. That is, if there are more than one anonymous
9016 /// types of a given kind in a scope, this index is what tells them
9017 /// appart, starting from 0.
9018 ///
9019 /// @return the built string, which is a concatenation of @p base_name
9020 /// and @p anonymous_type_index.
9021 static string
build_internal_anonymous_die_name(const string & base_name,size_t anonymous_type_index)9022 build_internal_anonymous_die_name(const string &base_name,
9023 size_t anonymous_type_index)
9024 {
9025 string name = base_name;
9026 if (anonymous_type_index && !base_name.empty())
9027 {
9028 std::ostringstream o;
9029 o << base_name << anonymous_type_index;
9030 name = o.str();
9031 }
9032 return name;
9033 }
9034
9035 /// Build the internal name of the underlying type of an enum.
9036 ///
9037 /// @param base_name the (unqualified) name of the enum the underlying
9038 /// type is destined to.
9039 ///
9040 /// @param is_anonymous true if the underlying type of the enum is to
9041 /// be anonymous.
9042 static string
build_internal_underlying_enum_type_name(const string & base_name,bool is_anonymous,uint64_t size)9043 build_internal_underlying_enum_type_name(const string &base_name,
9044 bool is_anonymous,
9045 uint64_t size)
9046 {
9047 std::ostringstream o;
9048
9049 if (is_anonymous)
9050 o << "unnamed-enum";
9051 else
9052 o << "enum-" << base_name;
9053
9054 o << "-underlying-type-" << size;
9055
9056 return o.str();
9057 }
9058
9059 /// Build a full internal anonymous type name.
9060 ///
9061 /// @param die the DIE representing the anonymous type to consider.
9062 ///
9063 /// @param anonymous_type_index the index of the anonymous type
9064 /// represented by @p DIE, in its scope. That is, if there are
9065 /// several different anonymous types of the same kind as @p die, this
9066 /// index is what tells them appart.
9067 ///
9068 /// @return the internal name of the anonymous type represented by @p
9069 /// DIE.
9070 static string
get_internal_anonymous_die_name(Dwarf_Die * die,size_t anonymous_type_index)9071 get_internal_anonymous_die_name(Dwarf_Die *die,
9072 size_t anonymous_type_index)
9073 {
9074 string name = get_internal_anonymous_die_prefix_name(die);
9075 name = build_internal_anonymous_die_name(name, anonymous_type_index);
9076 return name;
9077 }
9078
9079 // ------------------------------------
9080 // <DIE pretty printer>
9081 // ------------------------------------
9082
9083 /// Compute the qualified name of a DIE that represents a type.
9084 ///
9085 /// For instance, if the DIE tag is DW_TAG_subprogram then this
9086 /// function computes the name of the function *type*.
9087 ///
9088 /// @param ctxt the read context.
9089 ///
9090 /// @param die the DIE to consider.
9091 ///
9092 /// @param where_offset where in the are logically are in the DIE
9093 /// stream.
9094 ///
9095 /// @return a copy of the qualified name of the type.
9096 static string
die_qualified_type_name(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9097 die_qualified_type_name(const read_context& ctxt,
9098 const Dwarf_Die* die,
9099 size_t where_offset)
9100 {
9101 if (!die)
9102 return "";
9103
9104 int tag = dwarf_tag (const_cast<Dwarf_Die*>(die));
9105 if (tag == DW_TAG_compile_unit
9106 || tag == DW_TAG_partial_unit
9107 || tag == DW_TAG_type_unit)
9108 return "";
9109
9110 string name = die_name(die);
9111
9112 Dwarf_Die scope_die;
9113 if (!get_scope_die(ctxt, die, where_offset, scope_die))
9114 return "";
9115
9116 string parent_name = die_qualified_name(ctxt, &scope_die, where_offset);
9117 bool colon_colon = die_is_type(die) || die_is_namespace(die);
9118 string separator = colon_colon ? "::" : ".";
9119
9120 string repr;
9121
9122 switch (tag)
9123 {
9124 case DW_TAG_unspecified_type:
9125 break;
9126
9127 case DW_TAG_base_type:
9128 {
9129 abigail::ir::integral_type int_type;
9130 if (parse_integral_type(name, int_type))
9131 repr = int_type;
9132 else
9133 repr = name;
9134 }
9135 break;
9136
9137 case DW_TAG_typedef:
9138 case DW_TAG_enumeration_type:
9139 case DW_TAG_structure_type:
9140 case DW_TAG_class_type:
9141 case DW_TAG_union_type:
9142 {
9143 if (tag == DW_TAG_typedef)
9144 {
9145 // If the underlying type of the typedef is unspecified,
9146 // bail out as we don't support that yet.
9147 Dwarf_Die underlying_type_die;
9148 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
9149 {
9150 string n = die_qualified_type_name(ctxt, &underlying_type_die,
9151 where_offset);
9152 if (die_is_unspecified(&underlying_type_die)
9153 || n.empty())
9154 break;
9155 }
9156 }
9157
9158 if (name.empty())
9159 // TODO: handle cases where there are more than one
9160 // anonymous type of the same kind in the same scope. In
9161 // that case, their name must be built with the function
9162 // get_internal_anonymous_die_name or something of the same
9163 // kind.
9164 name = get_internal_anonymous_die_prefix_name(die);
9165
9166 ABG_ASSERT(!name.empty());
9167 repr = parent_name.empty() ? name : parent_name + separator + name;
9168 }
9169 break;
9170
9171 case DW_TAG_const_type:
9172 case DW_TAG_volatile_type:
9173 case DW_TAG_restrict_type:
9174 {
9175 Dwarf_Die underlying_type_die;
9176 bool has_underlying_type_die =
9177 die_die_attribute(die, DW_AT_type, underlying_type_die);
9178
9179 if (has_underlying_type_die && die_is_unspecified(&underlying_type_die))
9180 break;
9181
9182 if (tag == DW_TAG_const_type)
9183 {
9184 if (has_underlying_type_die
9185 && die_is_reference_type(&underlying_type_die))
9186 // A reference is always const. So, to lower false
9187 // positive reports in diff computations, we consider a
9188 // const reference just as a reference. But we need to
9189 // keep the qualified-ness of the type. So we introduce
9190 // a 'no-op' qualifier here. Please remember that this
9191 // has to be kept in sync with what is done in
9192 // get_name_of_qualified_type. So if you change this
9193 // here, you have to change that code there too.
9194 repr = "";
9195 else if (!has_underlying_type_die
9196 || die_is_void_type(&underlying_type_die))
9197 {
9198 repr = "void";
9199 break;
9200 }
9201 else
9202 repr = "const";
9203 }
9204 else if (tag == DW_TAG_volatile_type)
9205 repr = "volatile";
9206 else if (tag == DW_TAG_restrict_type)
9207 repr = "restrict";
9208 else
9209 ABG_ASSERT_NOT_REACHED;
9210
9211 string underlying_type_repr;
9212 if (has_underlying_type_die)
9213 underlying_type_repr =
9214 die_qualified_type_name(ctxt, &underlying_type_die, where_offset);
9215 else
9216 underlying_type_repr = "void";
9217
9218 if (underlying_type_repr.empty())
9219 repr.clear();
9220 else
9221 {
9222 if (has_underlying_type_die
9223 && die_is_pointer_or_reference_type(&underlying_type_die))
9224 repr = underlying_type_repr + " " + repr;
9225 else
9226 repr += " " + underlying_type_repr;
9227 }
9228 }
9229 break;
9230
9231 case DW_TAG_pointer_type:
9232 case DW_TAG_reference_type:
9233 case DW_TAG_rvalue_reference_type:
9234 {
9235 Dwarf_Die pointed_to_type_die;
9236 if (!die_die_attribute(die, DW_AT_type, pointed_to_type_die))
9237 {
9238 if (tag == DW_TAG_pointer_type)
9239 repr = "void*";
9240 break;
9241 }
9242
9243 if (die_is_unspecified(&pointed_to_type_die))
9244 break;
9245
9246 string pointed_type_repr =
9247 die_qualified_type_name(ctxt, &pointed_to_type_die, where_offset);
9248
9249 repr = pointed_type_repr;
9250 if (repr.empty())
9251 break;
9252
9253 if (tag == DW_TAG_pointer_type)
9254 repr += "*";
9255 else if (tag == DW_TAG_reference_type)
9256 repr += "&";
9257 else if (tag == DW_TAG_rvalue_reference_type)
9258 repr += "&&";
9259 else
9260 ABG_ASSERT_NOT_REACHED;
9261 }
9262 break;
9263
9264 case DW_TAG_subrange_type:
9265 {
9266 // In Ada, this one can be generated on its own, that is, not
9267 // as a sub-type of an array. So we need to support it on its
9268 // own. Note that when it's emitted as the sub-type of an
9269 // array like in C and C++, this is handled differently, for
9270 // now. But we try to make this usable by other languages
9271 // that are not Ada, even if we modelled it after Ada.
9272
9273 // So we build a subrange type for the sole purpose of using
9274 // the ::as_string() method of that type. So we don't add
9275 // that type to the current type tree being built.
9276 array_type_def::subrange_sptr s =
9277 build_subrange_type(const_cast<read_context&>(ctxt),
9278 die, where_offset,
9279 /*associate_die_to_type=*/false);
9280 repr += s->as_string();
9281 break;
9282 }
9283
9284 case DW_TAG_array_type:
9285 {
9286 Dwarf_Die element_type_die;
9287 if (!die_die_attribute(die, DW_AT_type, element_type_die))
9288 break;
9289 string element_type_name =
9290 die_qualified_type_name(ctxt, &element_type_die, where_offset);
9291 if (element_type_name.empty())
9292 break;
9293
9294 array_type_def::subranges_type subranges;
9295 build_subranges_from_array_type_die(const_cast<read_context&>(ctxt),
9296 die, subranges, where_offset,
9297 /*associate_type_to_die=*/false);
9298
9299 repr = element_type_name;
9300 repr += array_type_def::subrange_type::vector_as_string(subranges);
9301 }
9302 break;
9303
9304 case DW_TAG_subroutine_type:
9305 case DW_TAG_subprogram:
9306 {
9307 string return_type_name;
9308 string class_name;
9309 vector<string> parm_names;
9310 bool is_const = false;
9311 bool is_static = false;
9312
9313 die_return_and_parm_names_from_fn_type_die(ctxt, die, where_offset,
9314 /*pretty_print=*/true,
9315 return_type_name, class_name,
9316 parm_names, is_const,
9317 is_static);
9318 if (return_type_name.empty())
9319 return_type_name = "void";
9320
9321 repr = return_type_name;
9322
9323 if (!class_name.empty())
9324 {
9325 // This is a method, so print the class name.
9326 repr += " (" + class_name + "::*)";
9327 }
9328
9329 // Now parameters.
9330 repr += " (";
9331 for (vector<string>::const_iterator i = parm_names.begin();
9332 i != parm_names.end();
9333 ++i)
9334 {
9335 if (i != parm_names.begin())
9336 repr += ", ";
9337 repr += *i;
9338 }
9339 repr += ")";
9340
9341 }
9342 break;
9343
9344 case DW_TAG_string_type:
9345 case DW_TAG_ptr_to_member_type:
9346 case DW_TAG_set_type:
9347 case DW_TAG_file_type:
9348 case DW_TAG_packed_type:
9349 case DW_TAG_thrown_type:
9350 case DW_TAG_interface_type:
9351 case DW_TAG_shared_type:
9352 break;
9353 }
9354
9355 return repr;
9356 }
9357
9358 /// Compute the qualified name of a decl represented by a given DIE.
9359 ///
9360 /// For instance, for a DIE of tag DW_TAG_subprogram this function
9361 /// computes the signature of the function *declaration*.
9362 ///
9363 /// @param ctxt the read context.
9364 ///
9365 /// @param die the DIE to consider.
9366 ///
9367 /// @param where_offset where we are logically at in the DIE stream.
9368 ///
9369 /// @return a copy of the computed name.
9370 static string
die_qualified_decl_name(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9371 die_qualified_decl_name(const read_context& ctxt,
9372 const Dwarf_Die* die,
9373 size_t where_offset)
9374 {
9375 if (!die || !die_is_decl(die))
9376 return "";
9377
9378 string name = die_name(die);
9379
9380 Dwarf_Die scope_die;
9381 if (!get_scope_die(ctxt, die, where_offset, scope_die))
9382 return "";
9383
9384 string scope_name = die_qualified_name(ctxt, &scope_die, where_offset);
9385 string separator = "::";
9386
9387 string repr;
9388
9389 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
9390 switch (tag)
9391 {
9392 case DW_TAG_namespace:
9393 case DW_TAG_member:
9394 case DW_TAG_variable:
9395 repr = scope_name.empty() ? name : scope_name + separator + name;
9396 break;
9397 case DW_TAG_subprogram:
9398 repr = die_function_signature(ctxt, die, where_offset);
9399 break;
9400
9401 case DW_TAG_unspecified_parameters:
9402 repr = "...";
9403 break;
9404
9405 case DW_TAG_formal_parameter:
9406 case DW_TAG_imported_declaration:
9407 case DW_TAG_GNU_template_template_param:
9408 case DW_TAG_GNU_template_parameter_pack:
9409 case DW_TAG_GNU_formal_parameter_pack:
9410 break;
9411 }
9412 return repr;
9413 }
9414
9415 /// Compute the qualified name of the artifact represented by a given
9416 /// DIE.
9417 ///
9418 /// If the DIE represents a type, then the function computes the name
9419 /// of the type. Otherwise, if the DIE represents a decl then the
9420 /// function computes the name of the decl. Note that a DIE of tag
9421 /// DW_TAG_subprogram is going to be considered as a "type" -- just
9422 /// like if it was a DW_TAG_subroutine_type.
9423 ///
9424 /// @param ctxt the read context.
9425 ///
9426 /// @param die the DIE to consider.
9427 ///
9428 /// @param where_offset where we are logically at in the DIE stream.
9429 ///
9430 /// @return a copy of the computed name.
9431 static string
die_qualified_name(const read_context & ctxt,const Dwarf_Die * die,size_t where)9432 die_qualified_name(const read_context& ctxt, const Dwarf_Die* die, size_t where)
9433 {
9434 if (die_is_type(die))
9435 return die_qualified_type_name(ctxt, die, where);
9436 else if (die_is_decl(die))
9437 return die_qualified_decl_name(ctxt, die, where);
9438 return "";
9439 }
9440
9441 /// Test if the qualified name of a given type should be empty.
9442 ///
9443 /// The reason why the name of a DIE with a given tag would be empty
9444 /// is that libabigail's internal representation doesn't yet support
9445 /// that tag; or if the DIE's qualified name is built from names of
9446 /// sub-types DIEs whose tags are not yet supported.
9447 ///
9448 /// @param ctxt the reading context.
9449 ///
9450 /// @param die the DIE to consider.
9451 ///
9452 /// @param where where we are logically at, in the DIE stream.
9453 ///
9454 /// @param qualified_name the qualified name of the DIE. This is set
9455 /// only iff the function returns false.
9456 ///
9457 /// @return true if the qualified name of the DIE is empty.
9458 static bool
die_qualified_type_name_empty(const read_context & ctxt,const Dwarf_Die * die,size_t where,string & qualified_name)9459 die_qualified_type_name_empty(const read_context& ctxt,
9460 const Dwarf_Die* die,
9461 size_t where, string &qualified_name)
9462 {
9463 if (!die)
9464 return true;
9465
9466 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
9467
9468 string qname;
9469 if (tag == DW_TAG_typedef
9470 || tag == DW_TAG_pointer_type
9471 || tag == DW_TAG_reference_type
9472 || tag == DW_TAG_rvalue_reference_type
9473 || tag == DW_TAG_array_type
9474 || tag == DW_TAG_const_type
9475 || tag == DW_TAG_volatile_type
9476 || tag == DW_TAG_restrict_type)
9477 {
9478 Dwarf_Die underlying_type_die;
9479 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
9480 {
9481 string name =
9482 die_qualified_type_name(ctxt, &underlying_type_die, where);
9483 if (name.empty())
9484 return true;
9485 }
9486 }
9487 else
9488 {
9489 string name = die_qualified_type_name(ctxt, die, where);
9490 if (name.empty())
9491 return true;
9492 }
9493
9494 qname = die_qualified_type_name(ctxt, die, where);
9495 if (qname.empty())
9496 return true;
9497
9498 qualified_name = qname;
9499 return false;
9500 }
9501
9502 /// Given the DIE that represents a function type, compute the names
9503 /// of the following properties the function's type:
9504 ///
9505 /// - return type
9506 /// - enclosing class (if the function is a member function)
9507 /// - function parameter types
9508 ///
9509 /// When the function we are looking at is a member function, it also
9510 /// tells if it's const.
9511 ///
9512 /// @param ctxt the reading context.
9513 ///
9514 /// @param die the DIE of the function or function type we are looking
9515 /// at.
9516 ///
9517 /// @param where_offset where we are logically at in the DIE stream.
9518 ///
9519 /// @param pretty_print if set to yes, the type names are going to be
9520 /// pretty-printed names; otherwise, they are just qualified type
9521 /// names.
9522 ///
9523 /// @param return_type_name out parameter. This contains the name of
9524 /// the return type of the function.
9525 ///
9526 /// @param class_name out parameter. If the function is a member
9527 /// function, this contains the name of the enclosing class.
9528 ///
9529 /// @param parm_names out parameter. This vector is set to the names
9530 /// of the types of the parameters of the function.
9531 ///
9532 /// @param is_const out parameter. If the function is a member
9533 /// function, this is set to true iff the member function is const.
9534 ///
9535 /// @param is_static out parameter. If the function is a static
9536 /// member function, then this is set to true.
9537 static void
die_return_and_parm_names_from_fn_type_die(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset,bool pretty_print,string & return_type_name,string & class_name,vector<string> & parm_names,bool & is_const,bool & is_static)9538 die_return_and_parm_names_from_fn_type_die(const read_context& ctxt,
9539 const Dwarf_Die* die,
9540 size_t where_offset,
9541 bool pretty_print,
9542 string &return_type_name,
9543 string &class_name,
9544 vector<string>& parm_names,
9545 bool& is_const,
9546 bool& is_static)
9547 {
9548 Dwarf_Die child;
9549 Dwarf_Die ret_type_die;
9550 if (!die_die_attribute(die, DW_AT_type, ret_type_die))
9551 return_type_name = "void";
9552 else
9553 return_type_name =
9554 pretty_print
9555 ? ctxt.get_die_pretty_representation(&ret_type_die, where_offset)
9556 : ctxt.get_die_qualified_type_name(&ret_type_die, where_offset);
9557
9558 if (return_type_name.empty())
9559 return_type_name = "void";
9560
9561 Dwarf_Die object_pointer_die, class_die;
9562 bool is_method_type =
9563 die_function_type_is_method_type(ctxt, die, where_offset,
9564 object_pointer_die,
9565 class_die, is_static);
9566
9567 is_const = false;
9568 if (is_method_type)
9569 {
9570 class_name = ctxt.get_die_qualified_type_name(&class_die, where_offset);
9571
9572 Dwarf_Die this_pointer_die;
9573 Dwarf_Die pointed_to_die;
9574 if (!is_static
9575 && die_die_attribute(&object_pointer_die, DW_AT_type,
9576 this_pointer_die))
9577 if (die_die_attribute(&this_pointer_die, DW_AT_type, pointed_to_die))
9578 if (dwarf_tag(&pointed_to_die) == DW_TAG_const_type)
9579 is_const = true;
9580
9581 string fn_name = die_name(die);
9582 string non_qualified_class_name = die_name(&class_die);
9583 bool is_ctor = fn_name == non_qualified_class_name;
9584 bool is_dtor = !fn_name.empty() && fn_name[0] == '~';
9585
9586 if (is_ctor || is_dtor)
9587 return_type_name.clear();
9588 }
9589
9590 if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
9591 do
9592 {
9593 int child_tag = dwarf_tag(&child);
9594 if (child_tag == DW_TAG_formal_parameter)
9595 {
9596 Dwarf_Die parm_type_die;
9597 if (!die_die_attribute(&child, DW_AT_type, parm_type_die))
9598 continue;
9599 string qualified_name =
9600 pretty_print
9601 ? ctxt.get_die_pretty_representation(&parm_type_die, where_offset)
9602 : ctxt.get_die_qualified_type_name(&parm_type_die, where_offset);
9603
9604 if (qualified_name.empty())
9605 continue;
9606 parm_names.push_back(qualified_name);
9607 }
9608 else if (child_tag == DW_TAG_unspecified_parameters)
9609 {
9610 // This is a variadic function parameter.
9611 parm_names.push_back("variadic parameter type");
9612 // After a DW_TAG_unspecified_parameters tag, we shouldn't
9613 // keep reading for parameters. The
9614 // unspecified_parameters TAG should be the last parameter
9615 // that we record. For instance, if there are multiple
9616 // DW_TAG_unspecified_parameters DIEs then we should care
9617 // only for the first one.
9618 break;
9619 }
9620 }
9621 while (dwarf_siblingof(&child, &child) == 0);
9622
9623 if (class_name.empty())
9624 {
9625 Dwarf_Die parent_die;
9626 if (get_parent_die(ctxt, die, parent_die, where_offset))
9627 {
9628 if (die_is_class_type(&parent_die))
9629 class_name =
9630 ctxt.get_die_qualified_type_name(&parent_die, where_offset);
9631 }
9632 }
9633 }
9634
9635 /// This computes the signature of the a function declaration
9636 /// represented by a DIE.
9637 ///
9638 /// @param ctxt the reading context.
9639 ///
9640 /// @param fn_die the DIE of the function to consider.
9641 ///
9642 /// @param where_offset where we are logically at in the stream of
9643 /// DIEs.
9644 ///
9645 /// @return a copy of the computed function signature string.
9646 static string
die_function_signature(const read_context & ctxt,const Dwarf_Die * fn_die,size_t where_offset)9647 die_function_signature(const read_context& ctxt,
9648 const Dwarf_Die *fn_die,
9649 size_t where_offset)
9650 {
9651
9652 translation_unit::language lang;
9653 bool has_lang = false;
9654 if ((has_lang = ctxt.get_die_language(fn_die, lang)))
9655 {
9656 // In a binary originating from the C language, it's OK to use
9657 // the linkage name of the function as a key for the map which
9658 // is meant to reduce the number of DIE comparisons involved
9659 // during DIE canonicalization computation.
9660 if (is_c_language(lang))
9661 {
9662 string fn_name = die_linkage_name(fn_die);
9663 if (fn_name.empty())
9664 fn_name = die_name(fn_die);
9665 return fn_name;
9666 }
9667 }
9668
9669 // TODO: When we can structurally compare DIEs originating from C++
9670 // as well, we can use the linkage name of functions in C++ too, to
9671 // reduce the number of comparisons involved during DIE
9672 // canonicalization.
9673
9674 string return_type_name;
9675 Dwarf_Die ret_type_die;
9676 if (die_die_attribute(fn_die, DW_AT_type, ret_type_die))
9677 return_type_name = ctxt.get_die_qualified_type_name(&ret_type_die,
9678 where_offset);
9679
9680 if (return_type_name.empty())
9681 return_type_name = "void";
9682
9683 Dwarf_Die scope_die;
9684 string scope_name;
9685 if (get_scope_die(ctxt, fn_die, where_offset, scope_die))
9686 scope_name = ctxt.get_die_qualified_name(&scope_die, where_offset);
9687 string fn_name = die_name(fn_die);
9688 if (!scope_name.empty())
9689 fn_name = scope_name + "::" + fn_name;
9690
9691 string class_name;
9692 vector<string> parm_names;
9693 bool is_const = false;
9694 bool is_static = false;
9695
9696 die_return_and_parm_names_from_fn_type_die(ctxt, fn_die, where_offset,
9697 /*pretty_print=*/false,
9698 return_type_name, class_name,
9699 parm_names, is_const, is_static);
9700
9701 bool is_virtual = die_is_virtual(fn_die);
9702
9703 string repr = class_name.empty() ? "function" : "method";
9704 if (is_virtual)
9705 repr += " virtual";
9706
9707 if (!return_type_name.empty())
9708 repr += " " + return_type_name;
9709
9710 repr += " " + fn_name;
9711
9712 // Now parameters.
9713 repr += "(";
9714 bool some_parm_emitted = false;
9715 for (vector<string>::const_iterator i = parm_names.begin();
9716 i != parm_names.end();
9717 ++i)
9718 {
9719 if (i != parm_names.begin())
9720 {
9721 if (some_parm_emitted)
9722 repr += ", ";
9723 }
9724 else
9725 if (!is_static && !class_name.empty())
9726 // We are printing a non-static method name, skip the implicit "this"
9727 // parameter type.
9728 continue;
9729 repr += *i;
9730 some_parm_emitted = true;
9731 }
9732 repr += ")";
9733
9734 if (is_const)
9735 {
9736 ABG_ASSERT(!class_name.empty());
9737 repr += " const";
9738 }
9739
9740 return repr;
9741 }
9742
9743 /// Return a pretty string representation of a type, for internal purposes.
9744 ///
9745 /// By internal purpose, we mean things like key-ing types for lookup
9746 /// purposes and so on.
9747 ///
9748 /// Note that this function is also used to pretty print functions.
9749 /// For functions, it prints the *type* of the function.
9750 ///
9751 /// @param ctxt the context to use.
9752 ///
9753 /// @param the DIE of the type to pretty print.
9754 ///
9755 /// @param where_offset where we logically are placed when calling
9756 /// this. It's useful to handle inclusion of DW_TAG_compile_unit
9757 /// entries.
9758 ///
9759 /// @return the resulting pretty representation.
9760 static string
die_pretty_print_type(read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9761 die_pretty_print_type(read_context& ctxt,
9762 const Dwarf_Die* die,
9763 size_t where_offset)
9764 {
9765 if (!die
9766 || (!die_is_type(die)
9767 && dwarf_tag(const_cast<Dwarf_Die*>(die)) != DW_TAG_subprogram))
9768 return "";
9769
9770 string repr;
9771
9772 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
9773 switch (tag)
9774 {
9775 case DW_TAG_string_type:
9776 // For now, we won't try to go get the actual representation of
9777 // the string because this would make things more complicated;
9778 // for that we'd need to interpret some location expressions to
9779 // get the length of the string. And for dynamically allocated
9780 // strings, the result of the location expression evaluation
9781 // might not even be a constant. So at the moment I consider
9782 // this to be a lot of hassle for no great return. Until proven
9783 // otherwise, of course.
9784 repr = "string type";
9785 break;
9786
9787 case DW_TAG_unspecified_type:
9788 case DW_TAG_ptr_to_member_type:
9789 break;
9790
9791 case DW_TAG_namespace:
9792 repr = "namespace " + ctxt.get_die_qualified_type_name(die, where_offset);
9793 break;
9794
9795 case DW_TAG_base_type:
9796 repr = ctxt.get_die_qualified_type_name(die, where_offset);
9797 break;
9798
9799 case DW_TAG_typedef:
9800 {
9801 string qualified_name;
9802 if (!die_qualified_type_name_empty(ctxt, die,
9803 where_offset,
9804 qualified_name))
9805 repr = "typedef " + qualified_name;
9806 }
9807 break;
9808
9809 case DW_TAG_const_type:
9810 case DW_TAG_volatile_type:
9811 case DW_TAG_restrict_type:
9812 case DW_TAG_pointer_type:
9813 case DW_TAG_reference_type:
9814 case DW_TAG_rvalue_reference_type:
9815 repr = ctxt.get_die_qualified_type_name(die, where_offset);
9816 break;
9817
9818 case DW_TAG_enumeration_type:
9819 {
9820 string qualified_name =
9821 ctxt.get_die_qualified_type_name(die, where_offset);
9822 repr = "enum " + qualified_name;
9823 }
9824 break;
9825
9826 case DW_TAG_structure_type:
9827 case DW_TAG_class_type:
9828 {
9829 string qualified_name =
9830 ctxt.get_die_qualified_type_name(die, where_offset);
9831 repr = "class " + qualified_name;
9832 }
9833 break;
9834
9835 case DW_TAG_union_type:
9836 {
9837 string qualified_name =
9838 ctxt.get_die_qualified_type_name(die, where_offset);
9839 repr = "union " + qualified_name;
9840 }
9841 break;
9842
9843 case DW_TAG_array_type:
9844 {
9845 Dwarf_Die element_type_die;
9846 if (!die_die_attribute(die, DW_AT_type, element_type_die))
9847 break;
9848 string element_type_name =
9849 ctxt.get_die_qualified_type_name(&element_type_die, where_offset);
9850 if (element_type_name.empty())
9851 break;
9852
9853 array_type_def::subranges_type subranges;
9854 build_subranges_from_array_type_die(ctxt, die, subranges, where_offset,
9855 /*associate_type_to_die=*/false);
9856
9857 repr = element_type_name;
9858 repr += array_type_def::subrange_type::vector_as_string(subranges);
9859 }
9860 break;
9861
9862 case DW_TAG_subrange_type:
9863 {
9864 // So this can be generated by Ada, on its own; that is, not
9865 // as a subtype of an array. In that case we need to handle
9866 // it properly.
9867
9868 // For now, we consider that the pretty printed name of the
9869 // subrange type is its name. We might need something more
9870 // advance, should the needs of the users get more
9871 // complicated.
9872 repr += die_qualified_type_name(ctxt, die, where_offset);
9873 }
9874 break;
9875
9876 case DW_TAG_subroutine_type:
9877 case DW_TAG_subprogram:
9878 {
9879 string return_type_name;
9880 string class_name;
9881 vector<string> parm_names;
9882 bool is_const = false;
9883 bool is_static = false;
9884
9885 die_return_and_parm_names_from_fn_type_die(ctxt, die, where_offset,
9886 /*pretty_print=*/true,
9887 return_type_name, class_name,
9888 parm_names, is_const,
9889 is_static);
9890 if (class_name.empty())
9891 repr = "function type";
9892 else
9893 repr = "method type";
9894 repr += " " + ctxt.get_die_qualified_type_name(die, where_offset);
9895 }
9896 break;
9897
9898 case DW_TAG_set_type:
9899 case DW_TAG_file_type:
9900 case DW_TAG_packed_type:
9901 case DW_TAG_thrown_type:
9902 case DW_TAG_interface_type:
9903 case DW_TAG_shared_type:
9904 ABG_ASSERT_NOT_REACHED;
9905 }
9906
9907 return repr;
9908 }
9909
9910 /// Return a pretty string representation of a declaration, for
9911 /// internal purposes.
9912 ///
9913 /// By internal purpose, we mean things like key-ing declarations for
9914 /// lookup purposes and so on.
9915 ///
9916 /// Note that this function is also used to pretty print functions.
9917 /// For functions, it prints the signature of the function.
9918 ///
9919 /// @param ctxt the context to use.
9920 ///
9921 /// @param the DIE of the declaration to pretty print.
9922 ///
9923 /// @param where_offset where we logically are placed when calling
9924 /// this. It's useful to handle inclusion of DW_TAG_compile_unit
9925 /// entries.
9926 ///
9927 /// @return the resulting pretty representation.
9928 static string
die_pretty_print_decl(read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9929 die_pretty_print_decl(read_context& ctxt,
9930 const Dwarf_Die* die,
9931 size_t where_offset)
9932 {
9933 if (!die || !die_is_decl(die))
9934 return "";
9935
9936 string repr;
9937
9938 int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
9939 switch (tag)
9940 {
9941 case DW_TAG_namespace:
9942 repr = "namespace " + die_qualified_name(ctxt, die, where_offset);
9943 break;
9944
9945 case DW_TAG_member:
9946 case DW_TAG_variable:
9947 {
9948 string type_repr = "void";
9949 Dwarf_Die type_die;
9950 if (die_die_attribute(die, DW_AT_type, type_die))
9951 type_repr = die_qualified_type_name(ctxt, &type_die, where_offset);
9952 repr = die_qualified_name(ctxt, die, where_offset);
9953 if (!repr.empty())
9954 repr = type_repr + " " + repr;
9955 }
9956 break;
9957
9958 case DW_TAG_subprogram:
9959 repr = die_function_signature(ctxt, die, where_offset);
9960 break;
9961
9962 default:
9963 break;
9964 }
9965 return repr;
9966 }
9967
9968 /// Compute the pretty printed representation of an artifact
9969 /// represented by a DIE.
9970 ///
9971 /// If the DIE is a type, compute the its pretty representation as a
9972 /// type; otherwise, if it's a declaration, compute its pretty
9973 /// representation as a declaration. Note for For instance, that a
9974 /// DW_TAG_subprogram DIE is going to be represented as a function
9975 /// *type*.
9976 ///
9977 /// @param ctxt the reading context.
9978 ///
9979 /// @param die the DIE to consider.
9980 ///
9981 /// @param where_offset we in the DIE stream we are logically at.
9982 ///
9983 /// @return a copy of the pretty printed artifact.
9984 static string
die_pretty_print(read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9985 die_pretty_print(read_context& ctxt, const Dwarf_Die* die, size_t where_offset)
9986 {
9987 if (die_is_type(die))
9988 return die_pretty_print_type(ctxt, die, where_offset);
9989 else if (die_is_decl(die))
9990 return die_pretty_print_decl(ctxt, die, where_offset);
9991 return "";
9992 }
9993
9994 // -----------------------------------
9995 // </die pretty printer>
9996 // -----------------------------------
9997
9998
9999 // ----------------------------------
10000 // <die comparison engine>
10001 // ---------------------------------
10002
10003 /// Compares two decls DIEs
10004 ///
10005 /// This works only for DIEs emitted by the C language.
10006 ///
10007 /// This implementation doesn't yet support namespaces.
10008 ///
10009 /// This is a subroutine of compare_dies.
10010 ///
10011 /// @return true iff @p l equals @p r.
10012 static bool
compare_as_decl_dies(const Dwarf_Die * l,const Dwarf_Die * r)10013 compare_as_decl_dies(const Dwarf_Die *l, const Dwarf_Die *r)
10014 {
10015 ABG_ASSERT(l && r);
10016
10017 int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l));
10018 int r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
10019 if (l_tag != r_tag)
10020 return false;
10021
10022 bool result = false;
10023
10024 if (l_tag == DW_TAG_subprogram || l_tag == DW_TAG_variable)
10025 {
10026 // Fast path for functions and global variables.
10027 if (compare_dies_string_attribute_value(l, r, DW_AT_linkage_name,
10028 result)
10029 || compare_dies_string_attribute_value(l, r, DW_AT_MIPS_linkage_name,
10030 result))
10031 {
10032 if (!result)
10033 return false;
10034 }
10035
10036 if (compare_dies_string_attribute_value(l, r, DW_AT_name,
10037 result))
10038 {
10039 if (!result)
10040 return false;
10041 }
10042 return true;
10043 }
10044
10045 // Fast path for types.
10046 if (compare_dies_string_attribute_value(l, r, DW_AT_name,
10047 result))
10048 return result;
10049 return true;
10050 }
10051
10052 /// Compares two type DIEs
10053 ///
10054 /// This is a subroutine of compare_dies.
10055 ///
10056 /// @param l the left operand of the comparison operator.
10057 ///
10058 /// @param r the right operand of the comparison operator.
10059 ///
10060 /// @return true iff @p l equals @p r.
10061 static bool
compare_as_type_dies(const Dwarf_Die * l,const Dwarf_Die * r)10062 compare_as_type_dies(const Dwarf_Die *l, const Dwarf_Die *r)
10063 {
10064 ABG_ASSERT(l && r);
10065 ABG_ASSERT(die_is_type(l));
10066 ABG_ASSERT(die_is_type(r));
10067
10068 if (dwarf_tag(const_cast<Dwarf_Die*>(l)) == DW_TAG_string_type
10069 && dwarf_tag(const_cast<Dwarf_Die*>(r)) == DW_TAG_string_type
10070 && (dwarf_dieoffset(const_cast<Dwarf_Die*>(l))
10071 != dwarf_dieoffset(const_cast<Dwarf_Die*>(r))))
10072 // For now, we cannot compare DW_TAG_string_type because of its
10073 // string_length attribute that is a location descriptor that is
10074 // not necessarily a constant. So it's super hard to evaluate it
10075 // in a libabigail context. So for now, we just say that all
10076 // DW_TAG_string_type DIEs are different, by default.
10077 return false;
10078
10079 uint64_t l_size = 0, r_size = 0;
10080 die_size_in_bits(l, l_size);
10081 die_size_in_bits(r, r_size);
10082
10083 return l_size == r_size;
10084 }
10085
10086 /// Test if two DIEs representing function declarations have the same
10087 /// linkage name, and thus are considered equal if they are C or C++,
10088 /// because the two DIEs represent functions in the same binary.
10089 ///
10090 /// If the DIEs don't have a linkage name, the function compares their
10091 /// name. But in that case, the caller of the function must know that
10092 /// in C++ for instance, that doesn't imply that the two functions are
10093 /// equal.
10094 ///
10095 /// @param ctxt the @ref read_context to consider.
10096 ///
10097 /// @param l the first function DIE to consider.
10098 ///
10099 /// @param r the second function DIE to consider.
10100 ///
10101 /// @return true iff the function represented by @p l have the same
10102 /// linkage name as the function represented by @p r.
10103 static bool
fn_die_equal_by_linkage_name(const read_context & ctxt,const Dwarf_Die * l,const Dwarf_Die * r)10104 fn_die_equal_by_linkage_name(const read_context &ctxt,
10105 const Dwarf_Die *l,
10106 const Dwarf_Die *r)
10107 {
10108 if (!!l != !!r)
10109 return false;
10110
10111 if (!l)
10112 return false;
10113
10114 int tag = dwarf_tag(const_cast<Dwarf_Die*>(l));
10115 ABG_ASSERT(tag == DW_TAG_subprogram);
10116 tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
10117 ABG_ASSERT(tag == DW_TAG_subprogram);
10118
10119 string lname = die_name(l), rname = die_name(r);
10120 string llinkage_name = die_linkage_name(l),
10121 rlinkage_name = die_linkage_name(r);
10122
10123 if (ctxt.die_is_in_c_or_cplusplus(l)
10124 && ctxt.die_is_in_c_or_cplusplus(r))
10125 {
10126 if (!llinkage_name.empty() && !rlinkage_name.empty())
10127 return llinkage_name == rlinkage_name;
10128 else if (!!llinkage_name.empty() != !!rlinkage_name.empty())
10129 return false;
10130 else
10131 return lname == rname;
10132 }
10133
10134 return (!llinkage_name.empty()
10135 && !rlinkage_name.empty()
10136 && llinkage_name == rlinkage_name);
10137 }
10138
10139 /// Test if the pair of offset {p1,p2} is present in a set.
10140 ///
10141 /// @param set the set of pairs of DWARF offsets to consider.
10142 ///
10143 /// @param p1 the first value of the pair.
10144 ///
10145 /// @param p2 the second value of the pair.
10146 ///
10147 /// @return if the pair {p1,p2} is present in the set.
10148 static bool
has_offset_pair(const dwarf_offset_pair_set_type & set,Dwarf_Off p1,Dwarf_Off p2)10149 has_offset_pair(const dwarf_offset_pair_set_type& set,
10150 Dwarf_Off p1, Dwarf_Off p2)
10151 {
10152 if (set.find(std::make_pair(p1, p2)) != set.end())
10153 return true;
10154 return false;
10155 }
10156
10157 /// Insert a new pair of offset into the set of pair.
10158 ///
10159 /// @param set the set of pairs of DWARF offsets to consider.
10160 ///
10161 /// @param p1 the first value of the pair.
10162 ///
10163 /// @param p2 the second value of the pair.
10164 static void
insert_offset_pair(dwarf_offset_pair_set_type & set,Dwarf_Off p1,Dwarf_Off p2)10165 insert_offset_pair(dwarf_offset_pair_set_type& set, Dwarf_Off p1, Dwarf_Off p2)
10166 {set.insert(std::make_pair(p1, p2));}
10167
10168 /// Erase a pair of DWARF offset from a set of pairs.
10169 ///
10170 ///
10171 /// @param set the set of pairs of DWARF offsets to consider.
10172 ///
10173 /// @param p1 the first value of the pair.
10174 ///
10175 /// @param p2 the second value of the pair.
10176 static void
erase_offset_pair(dwarf_offset_pair_set_type & set,Dwarf_Off p1,Dwarf_Off p2)10177 erase_offset_pair(dwarf_offset_pair_set_type& set, Dwarf_Off p1, Dwarf_Off p2)
10178 {
10179 std::pair<Dwarf_Off, Dwarf_Off> p(p1, p2);
10180 set.erase(p);
10181 }
10182
10183 /// Compare two DIEs emitted by a C compiler.
10184 ///
10185 /// @param ctxt the read context used to load the DWARF information.
10186 ///
10187 /// @param l the left-hand-side argument of this comparison operator.
10188 ///
10189 /// @param r the righ-hand-side argument of this comparison operator.
10190 ///
10191 /// @param aggregates_being_compared this holds the names of the set
10192 /// of aggregates being compared. It's used by the comparison
10193 /// function to avoid recursing infinitely when faced with types
10194 /// referencing themselves through pointers or references. By
10195 /// default, just pass an empty instance of @ref istring_set_type to
10196 /// it.
10197 ///
10198 /// @param update_canonical_dies_on_the_fly if true, when two
10199 /// sub-types compare equal (during the comparison of @p l and @p r)
10200 /// update their canonical type. That way, two types of the same name
10201 /// are structurally compared to each other only once. So the
10202 /// non-linear structural comparison of two types of the same name
10203 /// only happen once.
10204 ///
10205 /// @return true iff @p l equals @p r.
10206 static bool
compare_dies(const read_context & ctxt,const Dwarf_Die * l,const Dwarf_Die * r,dwarf_offset_pair_set_type & aggregates_being_compared,bool update_canonical_dies_on_the_fly)10207 compare_dies(const read_context& ctxt,
10208 const Dwarf_Die *l, const Dwarf_Die *r,
10209 dwarf_offset_pair_set_type& aggregates_being_compared,
10210 bool update_canonical_dies_on_the_fly)
10211 {
10212 ABG_ASSERT(l);
10213 ABG_ASSERT(r);
10214
10215 int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l)),
10216 r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
10217
10218 if (l_tag != r_tag)
10219 return false;
10220
10221 Dwarf_Off l_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(l)),
10222 r_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(r));
10223
10224 if (l_offset == r_offset)
10225 return true;
10226 auto& visit = ctxt.die_comparison_visits_[std::make_pair(l_offset, r_offset)];
10227 if (visit == 10000)
10228 return true;
10229 else
10230 ++visit;
10231
10232 Dwarf_Off l_canonical_die_offset = 0, r_canonical_die_offset = 0;
10233 const die_source l_die_source = ctxt.get_die_source(l);
10234 const die_source r_die_source = ctxt.get_die_source(r);
10235
10236 // If 'l' and 'r' already have canonical DIEs, then just compare the
10237 // offsets of their canonical DIEs.
10238 bool l_has_canonical_die_offset =
10239 (l_canonical_die_offset =
10240 ctxt.get_canonical_die_offset(l_offset, l_die_source,
10241 /*die_as_type=*/true));
10242
10243 bool r_has_canonical_die_offset =
10244 (r_canonical_die_offset =
10245 ctxt.get_canonical_die_offset(r_offset, r_die_source,
10246 /*die_as_type=*/true));
10247
10248 if (l_has_canonical_die_offset && r_has_canonical_die_offset)
10249 return l_canonical_die_offset == r_canonical_die_offset;
10250
10251 bool result = true;
10252 bool aggregate_redundancy_detected = false;
10253
10254 switch (l_tag)
10255 {
10256 case DW_TAG_base_type:
10257 case DW_TAG_string_type:
10258 if (!compare_as_type_dies(l, r)
10259 || !compare_as_decl_dies(l, r))
10260 result = false;
10261 break;
10262
10263 case DW_TAG_typedef:
10264 case DW_TAG_pointer_type:
10265 case DW_TAG_reference_type:
10266 case DW_TAG_rvalue_reference_type:
10267 case DW_TAG_const_type:
10268 case DW_TAG_volatile_type:
10269 case DW_TAG_restrict_type:
10270 {
10271 if (!compare_as_type_dies(l, r))
10272 {
10273 result = false;
10274 break;
10275 }
10276
10277 bool from_the_same_tu = false;
10278 if (!pointer_or_qual_die_of_anonymous_class_type(l)
10279 && compare_dies_cu_decl_file(l, r, from_the_same_tu)
10280 && from_the_same_tu)
10281 {
10282 // These two typedefs, pointer, reference, or qualified
10283 // types have the same name and are defined in the same TU.
10284 // They thus ought to be the same.
10285 //
10286 // Note that pointers, reference or qualified types to
10287 // anonymous types are not taking into account here because
10288 // those always need to be structurally compared.
10289 result = true;
10290 break;
10291 }
10292 }
10293
10294 {
10295 // No fancy optimization in this case. We need to
10296 // structurally compare the two DIEs.
10297 Dwarf_Die lu_type_die, ru_type_die;
10298 bool lu_is_void, ru_is_void;
10299
10300 lu_is_void = !die_die_attribute(l, DW_AT_type, lu_type_die);
10301 ru_is_void = !die_die_attribute(r, DW_AT_type, ru_type_die);
10302
10303 if (lu_is_void && ru_is_void)
10304 result = true;
10305 else if (lu_is_void != ru_is_void)
10306 result = false;
10307 else
10308 result = compare_dies(ctxt, &lu_type_die, &ru_type_die,
10309 aggregates_being_compared,
10310 update_canonical_dies_on_the_fly);
10311 }
10312 break;
10313
10314 case DW_TAG_enumeration_type:
10315 if (!compare_as_type_dies(l, r)
10316 || !compare_as_decl_dies(l, r))
10317 result = false;
10318 else
10319 {
10320 // Walk the enumerators.
10321 Dwarf_Die l_enumtor, r_enumtor;
10322 bool found_l_enumtor, found_r_enumtor;
10323
10324 for (found_l_enumtor = dwarf_child(const_cast<Dwarf_Die*>(l),
10325 &l_enumtor) == 0,
10326 found_r_enumtor = dwarf_child(const_cast<Dwarf_Die*>(r),
10327 &r_enumtor) == 0;
10328 found_l_enumtor && found_r_enumtor;
10329 found_l_enumtor = dwarf_siblingof(&l_enumtor, &l_enumtor) == 0,
10330 found_r_enumtor = dwarf_siblingof(&r_enumtor, &r_enumtor) == 0)
10331 {
10332 int l_tag = dwarf_tag(&l_enumtor), r_tag = dwarf_tag(&r_enumtor);
10333 if ( l_tag != r_tag)
10334 {
10335 result = false;
10336 break;
10337 }
10338
10339 if (l_tag != DW_TAG_enumerator)
10340 continue;
10341
10342 uint64_t l_val = 0, r_val = 0;
10343 die_unsigned_constant_attribute(&l_enumtor,
10344 DW_AT_const_value,
10345 l_val);
10346 die_unsigned_constant_attribute(&r_enumtor,
10347 DW_AT_const_value,
10348 r_val);
10349 if (l_val != r_val)
10350 {
10351 result = false;
10352 break;
10353 }
10354 }
10355 if (found_l_enumtor != found_r_enumtor )
10356 result = false;
10357
10358 }
10359 break;
10360
10361 case DW_TAG_structure_type:
10362 case DW_TAG_union_type:
10363 {
10364 if (has_offset_pair(aggregates_being_compared,
10365 die_offset(l), die_offset(r)))
10366 {
10367 result = true;
10368 aggregate_redundancy_detected = true;
10369 break;
10370 }
10371 else if (!compare_as_decl_dies(l, r) || !compare_as_type_dies(l, r))
10372 result = false;
10373 else
10374 {
10375 insert_offset_pair(aggregates_being_compared,
10376 die_offset(l), die_offset(r));
10377 Dwarf_Die l_member, r_member;
10378 bool found_l_member, found_r_member;
10379 for (found_l_member = dwarf_child(const_cast<Dwarf_Die*>(l),
10380 &l_member) == 0,
10381 found_r_member = dwarf_child(const_cast<Dwarf_Die*>(r),
10382 &r_member) == 0;
10383 found_l_member && found_r_member;
10384 found_l_member = dwarf_siblingof(&l_member, &l_member) == 0,
10385 found_r_member = dwarf_siblingof(&r_member, &r_member) == 0)
10386 {
10387 int l_tag = dwarf_tag(&l_member), r_tag = dwarf_tag(&r_member);
10388 if (l_tag != r_tag)
10389 {
10390 result = false;
10391 break;
10392 }
10393
10394 if (l_tag != DW_TAG_member && l_tag != DW_TAG_variable)
10395 continue;
10396
10397 if (!compare_dies(ctxt, &l_member, &r_member,
10398 aggregates_being_compared,
10399 update_canonical_dies_on_the_fly))
10400 {
10401 result = false;
10402 break;
10403 }
10404 }
10405 if (found_l_member != found_r_member)
10406 result = false;
10407
10408 erase_offset_pair(aggregates_being_compared,
10409 die_offset(l), die_offset(r));
10410 }
10411 }
10412 break;
10413
10414 case DW_TAG_array_type:
10415 {
10416 Dwarf_Die l_child, r_child;
10417 bool found_l_child, found_r_child;
10418 for (found_l_child = dwarf_child(const_cast<Dwarf_Die*>(l),
10419 &l_child) == 0,
10420 found_r_child = dwarf_child(const_cast<Dwarf_Die*>(r),
10421 &r_child) == 0;
10422 found_l_child && found_r_child;
10423 found_l_child = dwarf_siblingof(&l_child, &l_child) == 0,
10424 found_r_child = dwarf_siblingof(&r_child, &r_child) == 0)
10425 {
10426 int l_child_tag = dwarf_tag(&l_child),
10427 r_child_tag = dwarf_tag(&r_child);
10428 if (l_child_tag == DW_TAG_subrange_type
10429 || r_child_tag == DW_TAG_subrange_type)
10430 if (!compare_dies(ctxt, &l_child, &r_child,
10431 aggregates_being_compared,
10432 update_canonical_dies_on_the_fly))
10433 {
10434 result = false;
10435 break;
10436 }
10437 }
10438 if (found_l_child != found_r_child)
10439 result = false;
10440 // Compare the types of the elements of the array.
10441 Dwarf_Die ltype_die, rtype_die;
10442 bool found_ltype = die_die_attribute(l, DW_AT_type, ltype_die);
10443 bool found_rtype = die_die_attribute(r, DW_AT_type, rtype_die);
10444 ABG_ASSERT(found_ltype && found_rtype);
10445
10446 if (!compare_dies(ctxt, <ype_die, &rtype_die,
10447 aggregates_being_compared,
10448 update_canonical_dies_on_the_fly))
10449 return false;
10450 }
10451 break;
10452
10453 case DW_TAG_subrange_type:
10454 {
10455 uint64_t l_lower_bound = 0, r_lower_bound = 0,
10456 l_upper_bound = 0, r_upper_bound = 0;
10457 die_unsigned_constant_attribute(l, DW_AT_lower_bound, l_lower_bound);
10458 die_unsigned_constant_attribute(r, DW_AT_lower_bound, r_lower_bound);
10459 if (!die_unsigned_constant_attribute(l, DW_AT_upper_bound,
10460 l_upper_bound))
10461 {
10462 uint64_t l_count = 0;
10463 if (die_unsigned_constant_attribute(l, DW_AT_count, l_count))
10464 {
10465 l_upper_bound = l_lower_bound + l_count;
10466 if (l_upper_bound)
10467 --l_upper_bound;
10468 }
10469 }
10470 if (!die_unsigned_constant_attribute(r, DW_AT_upper_bound,
10471 r_upper_bound))
10472 {
10473 uint64_t r_count = 0;
10474 if (die_unsigned_constant_attribute(l, DW_AT_count, r_count))
10475 {
10476 r_upper_bound = r_lower_bound + r_count;
10477 if (r_upper_bound)
10478 --r_upper_bound;
10479 }
10480 }
10481
10482 if ((l_lower_bound != r_lower_bound)
10483 || (l_upper_bound != r_upper_bound))
10484 result = false;
10485 }
10486 break;
10487
10488 case DW_TAG_subroutine_type:
10489 case DW_TAG_subprogram:
10490 {
10491 interned_string ln = ctxt.get_die_pretty_type_representation(l, 0);
10492 interned_string rn = ctxt.get_die_pretty_type_representation(r, 0);
10493
10494 if (has_offset_pair(aggregates_being_compared, die_offset(l),
10495 die_offset(r)))
10496 {
10497 result = true;
10498 aggregate_redundancy_detected = true;
10499 break;
10500 }
10501 else if (l_tag == DW_TAG_subroutine_type)
10502 {
10503 // So, we are looking at types that are pointed to by a
10504 // function pointer. These are not real concrete function
10505 // types, rather, they denote interfaces of functions.
10506 //
10507 // If the textual representations are different, then
10508 // obviously they are different DIEs.
10509 if (ln != rn)
10510 {
10511 result = false;
10512 break;
10513 }
10514
10515 // So if their textual representation are the same and
10516 // they come from the same TU, then they represent the
10517 // same DIE.
10518 bool from_the_same_tu = false;
10519 if (compare_dies_cu_decl_file(l, r, from_the_same_tu)
10520 && from_the_same_tu)
10521 {
10522 result = true;
10523 break;
10524 }
10525 }
10526
10527 if (l_tag == DW_TAG_subprogram
10528 && !fn_die_equal_by_linkage_name(ctxt, l, r))
10529 {
10530 result = false;
10531 break;
10532 }
10533 else if (l_tag == DW_TAG_subprogram
10534 && ctxt.die_is_in_c(l) && ctxt.die_is_in_c(r)
10535 /*&& fn_die_equal_by_linkage_name(ctxt, l, r)*/)
10536 {
10537 result = true;
10538 break;
10539 }
10540 else if (!ctxt.die_is_in_c(l) && !ctxt.die_is_in_c(r))
10541 {
10542 // In C, we cannot have two different functions with the
10543 // same linkage name in a given binary. But here we are
10544 // looking at DIEs that don't originate from C. So we
10545 // need to compare return types and parameter types.
10546 Dwarf_Die l_return_type, r_return_type;
10547 bool l_return_type_is_void = !die_die_attribute(l, DW_AT_type,
10548 l_return_type);
10549 bool r_return_type_is_void = !die_die_attribute(r, DW_AT_type,
10550 r_return_type);
10551 if (l_return_type_is_void != r_return_type_is_void
10552 || (!l_return_type_is_void
10553 && !compare_dies(ctxt,
10554 &l_return_type, &r_return_type,
10555 aggregates_being_compared,
10556 update_canonical_dies_on_the_fly)))
10557 result = false;
10558 else
10559 {
10560 Dwarf_Die l_child, r_child;
10561 bool found_l_child, found_r_child;
10562 for (found_l_child = dwarf_child(const_cast<Dwarf_Die*>(l),
10563 &l_child) == 0,
10564 found_r_child = dwarf_child(const_cast<Dwarf_Die*>(r),
10565 &r_child) == 0;
10566 found_l_child && found_r_child;
10567 found_l_child = dwarf_siblingof(&l_child,
10568 &l_child) == 0,
10569 found_r_child = dwarf_siblingof(&r_child,
10570 &r_child)==0)
10571 {
10572 int l_child_tag = dwarf_tag(&l_child);
10573 int r_child_tag = dwarf_tag(&r_child);
10574 if (l_child_tag != r_child_tag
10575 || (l_child_tag == DW_TAG_formal_parameter
10576 && !compare_dies(ctxt, &l_child, &r_child,
10577 aggregates_being_compared,
10578 update_canonical_dies_on_the_fly)))
10579 {
10580 result = false;
10581 break;
10582 }
10583 }
10584 if (found_l_child != found_r_child)
10585 result = false;
10586 }
10587 }
10588
10589 erase_offset_pair(aggregates_being_compared,
10590 die_offset(l), die_offset(r));
10591 }
10592 break;
10593
10594 case DW_TAG_formal_parameter:
10595 {
10596 Dwarf_Die l_type, r_type;
10597 bool l_type_is_void = !die_die_attribute(l, DW_AT_type, l_type);
10598 bool r_type_is_void = !die_die_attribute(r, DW_AT_type, r_type);
10599 if ((l_type_is_void != r_type_is_void)
10600 || !compare_dies(ctxt, &l_type, &r_type,
10601 aggregates_being_compared,
10602 update_canonical_dies_on_the_fly))
10603 result = false;
10604 }
10605 break;
10606
10607 case DW_TAG_variable:
10608 case DW_TAG_member:
10609 if (compare_as_decl_dies(l, r))
10610 {
10611 // Compare the offsets of the data members
10612 if (l_tag == DW_TAG_member)
10613 {
10614 int64_t l_offset_in_bits = 0, r_offset_in_bits = 0;
10615 die_member_offset(ctxt, l, l_offset_in_bits);
10616 die_member_offset(ctxt, r, r_offset_in_bits);
10617 if (l_offset_in_bits != r_offset_in_bits)
10618 result = false;
10619 }
10620 if (result)
10621 {
10622 // Compare the types of the data members or variables.
10623 Dwarf_Die l_type, r_type;
10624 ABG_ASSERT(die_die_attribute(l, DW_AT_type, l_type));
10625 ABG_ASSERT(die_die_attribute(r, DW_AT_type, r_type));
10626 if (!compare_dies(ctxt, &l_type, &r_type,
10627 aggregates_being_compared,
10628 update_canonical_dies_on_the_fly))
10629 result = false;
10630 }
10631 }
10632 else
10633 result = false;
10634 break;
10635
10636 case DW_TAG_class_type:
10637 case DW_TAG_enumerator:
10638 case DW_TAG_packed_type:
10639 case DW_TAG_set_type:
10640 case DW_TAG_file_type:
10641 case DW_TAG_ptr_to_member_type:
10642 case DW_TAG_thrown_type:
10643 case DW_TAG_interface_type:
10644 case DW_TAG_unspecified_type:
10645 case DW_TAG_shared_type:
10646 case DW_TAG_compile_unit:
10647 case DW_TAG_namespace:
10648 case DW_TAG_module:
10649 case DW_TAG_constant:
10650 case DW_TAG_partial_unit:
10651 case DW_TAG_imported_unit:
10652 case DW_TAG_dwarf_procedure:
10653 case DW_TAG_imported_declaration:
10654 case DW_TAG_entry_point:
10655 case DW_TAG_label:
10656 case DW_TAG_lexical_block:
10657 case DW_TAG_unspecified_parameters:
10658 case DW_TAG_variant:
10659 case DW_TAG_common_block:
10660 case DW_TAG_common_inclusion:
10661 case DW_TAG_inheritance:
10662 case DW_TAG_inlined_subroutine:
10663 case DW_TAG_with_stmt:
10664 case DW_TAG_access_declaration:
10665 case DW_TAG_catch_block:
10666 case DW_TAG_friend:
10667 case DW_TAG_namelist:
10668 case DW_TAG_namelist_item:
10669 case DW_TAG_template_type_parameter:
10670 case DW_TAG_template_value_parameter:
10671 case DW_TAG_try_block:
10672 case DW_TAG_variant_part:
10673 case DW_TAG_imported_module:
10674 case DW_TAG_condition:
10675 case DW_TAG_type_unit:
10676 case DW_TAG_template_alias:
10677 case DW_TAG_lo_user:
10678 case DW_TAG_MIPS_loop:
10679 case DW_TAG_format_label:
10680 case DW_TAG_function_template:
10681 case DW_TAG_class_template:
10682 case DW_TAG_GNU_BINCL:
10683 case DW_TAG_GNU_EINCL:
10684 case DW_TAG_GNU_template_template_param:
10685 case DW_TAG_GNU_template_parameter_pack:
10686 case DW_TAG_GNU_formal_parameter_pack:
10687 case DW_TAG_GNU_call_site:
10688 case DW_TAG_GNU_call_site_parameter:
10689 case DW_TAG_hi_user:
10690 ABG_ASSERT_NOT_REACHED;
10691 }
10692
10693 if (result == true
10694 && !aggregate_redundancy_detected
10695 && update_canonical_dies_on_the_fly
10696 && is_canonicalizeable_type_tag(l_tag))
10697 {
10698 // If 'l' has no canonical DIE and if 'r' has one, then propagage
10699 // the canonical DIE of 'r' to 'l'.
10700 //
10701 // In case 'r' has no canonical DIE, then compute it, and then
10702 // propagate that canonical DIE to 'r'.
10703 const die_source l_source = ctxt.get_die_source(l);
10704 const die_source r_source = ctxt.get_die_source(r);
10705
10706 if (!l_has_canonical_die_offset
10707 // A DIE can be equivalent only to another DIE of the same
10708 // source.
10709 && l_source == r_source)
10710 {
10711 if (!r_has_canonical_die_offset)
10712 ctxt.compute_canonical_die_offset(r, r_canonical_die_offset,
10713 /*die_as_type=*/true);
10714 ABG_ASSERT(r_canonical_die_offset);
10715 ctxt.set_canonical_die_offset(l, r_canonical_die_offset,
10716 /*die_as_type=*/true);
10717 }
10718 }
10719 return result;
10720 }
10721
10722 /// Compare two DIEs emitted by a C compiler.
10723 ///
10724 /// @param ctxt the read context used to load the DWARF information.
10725 ///
10726 /// @param l the left-hand-side argument of this comparison operator.
10727 ///
10728 /// @param r the righ-hand-side argument of this comparison operator.
10729 ///
10730 /// @param update_canonical_dies_on_the_fly if yes, then this function
10731 /// updates the canonical DIEs of sub-type DIEs of 'l' and 'r', while
10732 /// comparing l and r. This helps in making so that sub-type DIEs of
10733 /// 'l' and 'r' are compared structurally only once. This is how we
10734 /// turn this exponential comparison problem into a problem that is a
10735 /// closer to a linear one.
10736 ///
10737 /// @return true iff @p l equals @p r.
10738 static bool
compare_dies(const read_context & ctxt,const Dwarf_Die * l,const Dwarf_Die * r,bool update_canonical_dies_on_the_fly)10739 compare_dies(const read_context& ctxt,
10740 const Dwarf_Die *l,
10741 const Dwarf_Die *r,
10742 bool update_canonical_dies_on_the_fly)
10743 {
10744 dwarf_offset_pair_set_type aggregates_being_compared;
10745 return compare_dies(ctxt, l, r, aggregates_being_compared,
10746 update_canonical_dies_on_the_fly);
10747 }
10748
10749 // ----------------------------------
10750 // </die comparison engine>
10751 // ---------------------------------
10752
10753 /// Get the point where a DW_AT_import DIE is used to import a given
10754 /// (unit) DIE, between two DIEs.
10755 ///
10756 /// @param ctxt the dwarf reading context to consider.
10757 ///
10758 /// @param partial_unit_offset the imported unit for which we want to
10759 /// know the insertion point. This is usually a partial unit (with
10760 /// tag DW_TAG_partial_unit) but it does not necessarily have to be
10761 /// so.
10762 ///
10763 /// @param first_die_offset the offset of the DIE from which this
10764 /// function starts looking for the import point of
10765 /// @partial_unit_offset. Note that this offset is excluded from the
10766 /// set of potential solutions.
10767 ///
10768 /// @param first_die_cu_offset the offset of the (compilation) unit
10769 /// that @p first_die_cu_offset belongs to.
10770 ///
10771 /// @param source where the DIE of first_die_cu_offset unit comes
10772 /// from.
10773 ///
10774 /// @param last_die_offset the offset of the last DIE of the up to
10775 /// which this function looks for the import point of @p
10776 /// partial_unit_offset. Note that this offset is excluded from the
10777 /// set of potential solutions.
10778 ///
10779 /// @param imported_point_offset. The resulting
10780 /// imported_point_offset. Note that if the imported DIE @p
10781 /// partial_unit_offset is not found between @p first_die_offset and
10782 /// @p last_die_offset, this parameter is left untouched by this
10783 /// function.
10784 ///
10785 /// @return true iff an imported unit is found between @p
10786 /// first_die_offset and @p last_die_offset.
10787 static bool
find_import_unit_point_between_dies(const read_context & ctxt,size_t partial_unit_offset,Dwarf_Off first_die_offset,Dwarf_Off first_die_cu_offset,die_source source,size_t last_die_offset,size_t & imported_point_offset)10788 find_import_unit_point_between_dies(const read_context& ctxt,
10789 size_t partial_unit_offset,
10790 Dwarf_Off first_die_offset,
10791 Dwarf_Off first_die_cu_offset,
10792 die_source source,
10793 size_t last_die_offset,
10794 size_t& imported_point_offset)
10795 {
10796 const tu_die_imported_unit_points_map_type& tu_die_imported_unit_points_map =
10797 ctxt.tu_die_imported_unit_points_map(source);
10798
10799 tu_die_imported_unit_points_map_type::const_iterator iter =
10800 tu_die_imported_unit_points_map.find(first_die_cu_offset);
10801
10802 ABG_ASSERT(iter != tu_die_imported_unit_points_map.end());
10803
10804 const imported_unit_points_type& imported_unit_points = iter->second;
10805 if (imported_unit_points.empty())
10806 return false;
10807
10808 imported_unit_points_type::const_iterator b = imported_unit_points.begin();
10809 imported_unit_points_type::const_iterator e = imported_unit_points.end();
10810
10811 find_lower_bound_in_imported_unit_points(imported_unit_points,
10812 first_die_offset,
10813 b);
10814
10815 if (last_die_offset != static_cast<size_t>(-1))
10816 find_lower_bound_in_imported_unit_points(imported_unit_points,
10817 last_die_offset,
10818 e);
10819
10820 if (e != imported_unit_points.end())
10821 {
10822 for (imported_unit_points_type::const_iterator i = e; i >= b; --i)
10823 if (i->imported_unit_die_off == partial_unit_offset)
10824 {
10825 imported_point_offset = i->offset_of_import ;
10826 return true;
10827 }
10828
10829 for (imported_unit_points_type::const_iterator i = e; i >= b; --i)
10830 {
10831 if (find_import_unit_point_between_dies(ctxt,
10832 partial_unit_offset,
10833 i->imported_unit_child_off,
10834 i->imported_unit_cu_off,
10835 i->imported_unit_die_source,
10836 /*(Dwarf_Off)*/-1,
10837 imported_point_offset))
10838 return true;
10839 }
10840 }
10841 else
10842 {
10843 for (imported_unit_points_type::const_iterator i = b; i != e; ++i)
10844 if (i->imported_unit_die_off == partial_unit_offset)
10845 {
10846 imported_point_offset = i->offset_of_import ;
10847 return true;
10848 }
10849
10850 for (imported_unit_points_type::const_iterator i = b; i != e; ++i)
10851 {
10852 if (find_import_unit_point_between_dies(ctxt,
10853 partial_unit_offset,
10854 i->imported_unit_child_off,
10855 i->imported_unit_cu_off,
10856 i->imported_unit_die_source,
10857 /*(Dwarf_Off)*/-1,
10858 imported_point_offset))
10859 return true;
10860 }
10861 }
10862
10863 return false;
10864 }
10865
10866 /// In the current translation unit, get the last point where a
10867 /// DW_AT_import DIE is used to import a given (unit) DIE, before a
10868 /// given DIE is found. That given DIE is called the limit DIE.
10869 ///
10870 /// Said otherwise, this function returns the last import point of a
10871 /// unit, before a limit.
10872 ///
10873 /// @param ctxt the dwarf reading context to consider.
10874 ///
10875 /// @param partial_unit_offset the imported unit for which we want to
10876 /// know the insertion point of. This is usually a partial unit (with
10877 /// tag DW_TAG_partial_unit) but it does not necessarily have to be
10878 /// so.
10879 ///
10880 /// @param where_offset the offset of the limit DIE.
10881 ///
10882 /// @param imported_point_offset. The resulting imported_point_offset.
10883 /// Note that if the imported DIE @p partial_unit_offset is not found
10884 /// before @p die_offset, this is set to the last @p
10885 /// partial_unit_offset found under @p parent_die.
10886 ///
10887 /// @return true iff an imported unit is found before @p die_offset.
10888 /// Note that if an imported unit is found after @p die_offset then @p
10889 /// imported_point_offset is set and the function return false.
10890 static bool
find_import_unit_point_before_die(const read_context & ctxt,size_t partial_unit_offset,size_t where_offset,size_t & imported_point_offset)10891 find_import_unit_point_before_die(const read_context& ctxt,
10892 size_t partial_unit_offset,
10893 size_t where_offset,
10894 size_t& imported_point_offset)
10895 {
10896 size_t import_point_offset = 0;
10897 Dwarf_Die first_die_of_tu;
10898
10899 if (dwarf_child(const_cast<Dwarf_Die*>(ctxt.cur_tu_die()),
10900 &first_die_of_tu) != 0)
10901 return false;
10902
10903 Dwarf_Die cu_die_memory;
10904 Dwarf_Die *cu_die;
10905
10906 cu_die = dwarf_diecu(const_cast<Dwarf_Die*>(&first_die_of_tu),
10907 &cu_die_memory, 0, 0);
10908
10909 if (find_import_unit_point_between_dies(ctxt, partial_unit_offset,
10910 dwarf_dieoffset(&first_die_of_tu),
10911 dwarf_dieoffset(cu_die),
10912 /*source=*/PRIMARY_DEBUG_INFO_DIE_SOURCE,
10913 where_offset,
10914 import_point_offset))
10915 {
10916 imported_point_offset = import_point_offset;
10917 return true;
10918 }
10919
10920 if (import_point_offset)
10921 {
10922 imported_point_offset = import_point_offset;
10923 return true;
10924 }
10925
10926 return false;
10927 }
10928
10929 /// Return the parent DIE for a given DIE.
10930 ///
10931 /// Note that the function build_die_parent_map() must have been
10932 /// called before this one can work. This function either succeeds or
10933 /// aborts the current process.
10934 ///
10935 /// @param ctxt the read context to consider.
10936 ///
10937 /// @param die the DIE for which we want the parent.
10938 ///
10939 /// @param parent_die the output parameter set to the parent die of
10940 /// @p die. Its memory must be allocated and handled by the caller.
10941 ///
10942 /// @param where_offset the offset of the DIE where we are "logically"
10943 /// positionned at, in the DIE tree. This is useful when @p die is
10944 /// e.g, DW_TAG_partial_unit that can be included in several places in
10945 /// the DIE tree.
10946 ///
10947 /// @return true if the function could get a parent DIE, false
10948 /// otherwise.
10949 static bool
get_parent_die(const read_context & ctxt,const Dwarf_Die * die,Dwarf_Die & parent_die,size_t where_offset)10950 get_parent_die(const read_context& ctxt,
10951 const Dwarf_Die* die,
10952 Dwarf_Die& parent_die,
10953 size_t where_offset)
10954 {
10955 ABG_ASSERT(ctxt.dwarf());
10956
10957 const die_source source = ctxt.get_die_source(die);
10958
10959 const offset_offset_map_type& m = ctxt.die_parent_map(source);
10960 offset_offset_map_type::const_iterator i =
10961 m.find(dwarf_dieoffset(const_cast<Dwarf_Die*>(die)));
10962
10963 if (i == m.end())
10964 return false;
10965
10966 switch (source)
10967 {
10968 case PRIMARY_DEBUG_INFO_DIE_SOURCE:
10969 ABG_ASSERT(dwarf_offdie(ctxt.dwarf(), i->second, &parent_die));
10970 break;
10971 case ALT_DEBUG_INFO_DIE_SOURCE:
10972 ABG_ASSERT(dwarf_offdie(ctxt.alt_dwarf(), i->second, &parent_die));
10973 break;
10974 case TYPE_UNIT_DIE_SOURCE:
10975 ABG_ASSERT(dwarf_offdie_types(ctxt.dwarf(), i->second, &parent_die));
10976 break;
10977 case NO_DEBUG_INFO_DIE_SOURCE:
10978 case NUMBER_OF_DIE_SOURCES:
10979 ABG_ASSERT_NOT_REACHED;
10980 }
10981
10982 if (dwarf_tag(&parent_die) == DW_TAG_partial_unit)
10983 {
10984 if (where_offset == 0)
10985 {
10986 parent_die = *ctxt.cur_tu_die();
10987 return true;
10988 }
10989 size_t import_point_offset = 0;
10990 bool found =
10991 find_import_unit_point_before_die(ctxt,
10992 dwarf_dieoffset(&parent_die),
10993 where_offset,
10994 import_point_offset);
10995 if (!found)
10996 // It looks like parent_die (which comes from the alternate
10997 // debug info file) hasn't been imported into this TU. So,
10998 // Let's assume its logical parent is the DIE of the current
10999 // TU.
11000 parent_die = *ctxt.cur_tu_die();
11001 else
11002 {
11003 ABG_ASSERT(import_point_offset);
11004 Dwarf_Die import_point_die;
11005 ABG_ASSERT(dwarf_offdie(ctxt.dwarf(),
11006 import_point_offset,
11007 &import_point_die));
11008 return get_parent_die(ctxt, &import_point_die,
11009 parent_die, where_offset);
11010 }
11011 }
11012
11013 return true;
11014 }
11015
11016 /// Get the DIE representing the scope of a given DIE.
11017 ///
11018 /// Please note that when the DIE we are looking at has a
11019 /// DW_AT_specification or DW_AT_abstract_origin attribute, the scope
11020 /// DIE is the parent DIE of the DIE referred to by that attribute.
11021 /// This is the only case where a scope DIE is different from the
11022 /// parent DIE of a given DIE.
11023 ///
11024 /// Also note that if the current translation unit is from C, then
11025 /// this returns the global scope.
11026 ///
11027 /// @param ctxt the reading context to use.
11028 ///
11029 /// @param die the DIE to consider.
11030 ///
11031 /// @param where_offset where we are logically at in the DIE stream.
11032 ///
11033 /// @param scope_die out parameter. This is set to the resulting
11034 /// scope DIE iff the function returns true.
11035 static bool
get_scope_die(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset,Dwarf_Die & scope_die)11036 get_scope_die(const read_context& ctxt,
11037 const Dwarf_Die* die,
11038 size_t where_offset,
11039 Dwarf_Die& scope_die)
11040 {
11041 if (is_c_language(ctxt.cur_transl_unit()->get_language()))
11042 {
11043 ABG_ASSERT(dwarf_tag(const_cast<Dwarf_Die*>(die)) != DW_TAG_member);
11044 return dwarf_diecu(const_cast<Dwarf_Die*>(die), &scope_die, 0, 0);
11045 }
11046
11047 Dwarf_Die logical_parent_die;
11048 if (die_die_attribute(die, DW_AT_specification,
11049 logical_parent_die, false)
11050 || die_die_attribute(die, DW_AT_abstract_origin,
11051 logical_parent_die, false))
11052 return get_scope_die(ctxt, &logical_parent_die, where_offset, scope_die);
11053
11054 if (!get_parent_die(ctxt, die, scope_die, where_offset))
11055 return false;
11056
11057 if (dwarf_tag(&scope_die) == DW_TAG_subprogram
11058 || dwarf_tag(&scope_die) == DW_TAG_subroutine_type
11059 || dwarf_tag(&scope_die) == DW_TAG_array_type)
11060 return get_scope_die(ctxt, &scope_die, where_offset, scope_die);
11061
11062 return true;
11063 }
11064
11065 /// Return the abigail IR node representing the scope of a given DIE.
11066 ///
11067 /// Note that it is the logical scope that is returned. That is, if
11068 /// the DIE has a DW_AT_specification or DW_AT_abstract_origin
11069 /// attribute, it's the scope of the referred-to DIE (via these
11070 /// attributes) that is returned.
11071 ///
11072 /// Also note that if the current translation unit is from C, then
11073 /// this returns the global scope.
11074 ///
11075 /// @param ctxt the dwarf reading context to use.
11076 ///
11077 /// @param die the DIE to get the scope for.
11078 ///
11079 /// @param called_from_public_decl is true if this function has been
11080 /// initially called within the context of a public decl.
11081 ///
11082 /// @param where_offset the offset of the DIE where we are "logically"
11083 /// positionned at, in the DIE tree. This is useful when @p die is
11084 /// e.g, DW_TAG_partial_unit that can be included in several places in
11085 /// the DIE tree.
11086 static scope_decl_sptr
get_scope_for_die(read_context & ctxt,Dwarf_Die * die,bool called_for_public_decl,size_t where_offset)11087 get_scope_for_die(read_context& ctxt,
11088 Dwarf_Die* die,
11089 bool called_for_public_decl,
11090 size_t where_offset)
11091 {
11092 const die_source source_of_die = ctxt.get_die_source(die);
11093
11094 translation_unit::language die_lang = translation_unit::LANG_UNKNOWN;
11095 ctxt.get_die_language(die, die_lang);
11096 if (is_c_language(die_lang))
11097 {
11098 ABG_ASSERT(dwarf_tag(die) != DW_TAG_member);
11099 return ctxt.global_scope();
11100 }
11101
11102 Dwarf_Die cloned_die;
11103 if (die_die_attribute(die, DW_AT_specification, cloned_die, false)
11104 || die_die_attribute(die, DW_AT_abstract_origin, cloned_die, false))
11105 return get_scope_for_die(ctxt, &cloned_die,
11106 called_for_public_decl,
11107 where_offset);
11108
11109 Dwarf_Die parent_die;
11110
11111 if (!get_parent_die(ctxt, die, parent_die, where_offset))
11112 return ctxt.nil_scope();
11113
11114 if (dwarf_tag(&parent_die) == DW_TAG_compile_unit
11115 || dwarf_tag(&parent_die) == DW_TAG_partial_unit
11116 || dwarf_tag(&parent_die) == DW_TAG_type_unit)
11117 {
11118 if (dwarf_tag(&parent_die) == DW_TAG_partial_unit
11119 || dwarf_tag(&parent_die) == DW_TAG_type_unit)
11120 {
11121 ABG_ASSERT(source_of_die == ALT_DEBUG_INFO_DIE_SOURCE
11122 || source_of_die == TYPE_UNIT_DIE_SOURCE);
11123 return ctxt.cur_transl_unit()->get_global_scope();
11124 }
11125
11126 // For top level DIEs like DW_TAG_compile_unit, we just want to
11127 // return the global scope for the corresponding translation
11128 // unit. This must have been set by
11129 // build_translation_unit_and_add_to_ir if we already started to
11130 // build the translation unit of parent_die. Otherwise, just
11131 // return the global scope of the current translation unit.
11132 die_tu_map_type::const_iterator i =
11133 ctxt.die_tu_map().find(dwarf_dieoffset(&parent_die));
11134 if (i != ctxt.die_tu_map().end())
11135 return i->second->get_global_scope();
11136 return ctxt.cur_transl_unit()->get_global_scope();
11137 }
11138
11139 scope_decl_sptr s;
11140 type_or_decl_base_sptr d;
11141 if (dwarf_tag(&parent_die) == DW_TAG_subprogram
11142 || dwarf_tag(&parent_die) == DW_TAG_array_type)
11143 // this is an entity defined in a scope that is a function.
11144 // Normally, I would say that this should be dropped. But I have
11145 // seen a case where a typedef DIE needed by a function parameter
11146 // was defined right before the parameter, under the scope of the
11147 // function. Yeah, weird. So if I drop the typedef DIE, I'd drop
11148 // the function parm too. So for that case, let's say that the
11149 // scope is the scope of the function itself. Note that this is
11150 // an error of the DWARF emitter. We should never see this DIE in
11151 // this context.
11152 {
11153 scope_decl_sptr s = get_scope_for_die(ctxt, &parent_die,
11154 called_for_public_decl,
11155 where_offset);
11156 if (is_anonymous_type_die(die))
11157 // For anonymous type that have nothing to do in a function or
11158 // array type context, let's put it in the containing
11159 // namespace. That is, do not let it be in a containing class
11160 // or union where it has nothing to do.
11161 while (is_class_or_union_type(s))
11162 {
11163 if (!get_parent_die(ctxt, &parent_die, parent_die, where_offset))
11164 return ctxt.nil_scope();
11165 s = get_scope_for_die(ctxt, &parent_die,
11166 called_for_public_decl,
11167 where_offset);
11168 }
11169 return s;
11170 }
11171 else
11172 d = build_ir_node_from_die(ctxt, &parent_die,
11173 called_for_public_decl,
11174 where_offset);
11175 s = dynamic_pointer_cast<scope_decl>(d);
11176 if (!s)
11177 // this is an entity defined in someting that is not a scope.
11178 // Let's drop it.
11179 return ctxt.nil_scope();
11180
11181 class_decl_sptr cl = dynamic_pointer_cast<class_decl>(d);
11182 if (cl && cl->get_is_declaration_only())
11183 {
11184 scope_decl_sptr scop =
11185 dynamic_pointer_cast<scope_decl>(cl->get_definition_of_declaration());
11186 if (scop)
11187 s = scop;
11188 else
11189 s = cl;
11190 }
11191 return s;
11192 }
11193
11194 /// Convert a DWARF constant representing the value of the
11195 /// DW_AT_language property into the translation_unit::language
11196 /// enumerator.
11197 ///
11198 /// @param l the DWARF constant to convert.
11199 ///
11200 /// @return the resulting translation_unit::language enumerator.
11201 static translation_unit::language
dwarf_language_to_tu_language(size_t l)11202 dwarf_language_to_tu_language(size_t l)
11203 {
11204 switch (l)
11205 {
11206 case DW_LANG_C89:
11207 return translation_unit::LANG_C89;
11208 case DW_LANG_C:
11209 return translation_unit::LANG_C;
11210 case DW_LANG_Ada83:
11211 return translation_unit::LANG_Ada83;
11212 case DW_LANG_C_plus_plus:
11213 return translation_unit::LANG_C_plus_plus;
11214 case DW_LANG_Cobol74:
11215 return translation_unit::LANG_Cobol74;
11216 case DW_LANG_Cobol85:
11217 return translation_unit::LANG_Cobol85;
11218 case DW_LANG_Fortran77:
11219 return translation_unit::LANG_Fortran77;
11220 case DW_LANG_Fortran90:
11221 return translation_unit::LANG_Fortran90;
11222 case DW_LANG_Pascal83:
11223 return translation_unit::LANG_Pascal83;
11224 case DW_LANG_Modula2:
11225 return translation_unit::LANG_Modula2;
11226 case DW_LANG_Java:
11227 return translation_unit::LANG_Java;
11228 case DW_LANG_C99:
11229 return translation_unit::LANG_C99;
11230 case DW_LANG_Ada95:
11231 return translation_unit::LANG_Ada95;
11232 case DW_LANG_Fortran95:
11233 return translation_unit::LANG_Fortran95;
11234 case DW_LANG_PLI:
11235 return translation_unit::LANG_PLI;
11236 case DW_LANG_ObjC:
11237 return translation_unit::LANG_ObjC;
11238 case DW_LANG_ObjC_plus_plus:
11239 return translation_unit::LANG_ObjC_plus_plus;
11240
11241 #ifdef HAVE_DW_LANG_Rust_enumerator
11242 case DW_LANG_Rust:
11243 return translation_unit::LANG_Rust;
11244 #endif
11245
11246 #ifdef HAVE_DW_LANG_UPC_enumerator
11247 case DW_LANG_UPC:
11248 return translation_unit::LANG_UPC;
11249 #endif
11250
11251 #ifdef HAVE_DW_LANG_D_enumerator
11252 case DW_LANG_D:
11253 return translation_unit::LANG_D;
11254 #endif
11255
11256 #ifdef HAVE_DW_LANG_Python_enumerator
11257 case DW_LANG_Python:
11258 return translation_unit::LANG_Python;
11259 #endif
11260
11261 #ifdef HAVE_DW_LANG_Go_enumerator
11262 case DW_LANG_Go:
11263 return translation_unit::LANG_Go;
11264 #endif
11265
11266 #ifdef HAVE_DW_LANG_C11_enumerator
11267 case DW_LANG_C11:
11268 return translation_unit::LANG_C11;
11269 #endif
11270
11271 #ifdef HAVE_DW_LANG_C_plus_plus_03_enumerator
11272 case DW_LANG_C_plus_plus_03:
11273 return translation_unit::LANG_C_plus_plus_03;
11274 #endif
11275
11276 #ifdef HAVE_DW_LANG_C_plus_plus_11_enumerator
11277 case DW_LANG_C_plus_plus_11:
11278 return translation_unit::LANG_C_plus_plus_11;
11279 #endif
11280
11281 #ifdef HAVE_DW_LANG_C_plus_plus_14_enumerator
11282 case DW_LANG_C_plus_plus_14:
11283 return translation_unit::LANG_C_plus_plus_14;
11284 #endif
11285
11286 #ifdef HAVE_DW_LANG_Mips_Assembler_enumerator
11287 case DW_LANG_Mips_Assembler:
11288 return translation_unit::LANG_Mips_Assembler;
11289 #endif
11290
11291 default:
11292 return translation_unit::LANG_UNKNOWN;
11293 }
11294 }
11295
11296 /// Get the default array lower bound value as defined by the DWARF
11297 /// specification, version 4, depending on the language of the
11298 /// translation unit.
11299 ///
11300 /// @param l the language of the translation unit.
11301 ///
11302 /// @return the default array lower bound value.
11303 static uint64_t
get_default_array_lower_bound(translation_unit::language l)11304 get_default_array_lower_bound(translation_unit::language l)
11305 {
11306 int value = 0;
11307 switch (l)
11308 {
11309 case translation_unit::LANG_UNKNOWN:
11310 value = 0;
11311 break;
11312 case translation_unit::LANG_Cobol74:
11313 case translation_unit::LANG_Cobol85:
11314 value = 1;
11315 break;
11316 case translation_unit::LANG_C89:
11317 case translation_unit::LANG_C99:
11318 case translation_unit::LANG_C11:
11319 case translation_unit::LANG_C:
11320 case translation_unit::LANG_C_plus_plus_03:
11321 case translation_unit::LANG_C_plus_plus_11:
11322 case translation_unit::LANG_C_plus_plus_14:
11323 case translation_unit::LANG_C_plus_plus:
11324 case translation_unit::LANG_ObjC:
11325 case translation_unit::LANG_ObjC_plus_plus:
11326 case translation_unit::LANG_Rust:
11327 value = 0;
11328 break;
11329 case translation_unit::LANG_Fortran77:
11330 case translation_unit::LANG_Fortran90:
11331 case translation_unit::LANG_Fortran95:
11332 case translation_unit::LANG_Ada83:
11333 case translation_unit::LANG_Ada95:
11334 case translation_unit::LANG_Pascal83:
11335 case translation_unit::LANG_Modula2:
11336 value = 1;
11337 break;
11338 case translation_unit::LANG_Java:
11339 value = 0;
11340 break;
11341 case translation_unit::LANG_PLI:
11342 value = 1;
11343 break;
11344 case translation_unit::LANG_UPC:
11345 case translation_unit::LANG_D:
11346 case translation_unit::LANG_Python:
11347 case translation_unit::LANG_Go:
11348 case translation_unit::LANG_Mips_Assembler:
11349 value = 0;
11350 break;
11351 }
11352
11353 return value;
11354 }
11355
11356 /// For a given offset, find the lower bound of a sorted vector of
11357 /// imported unit point offset.
11358 ///
11359 /// The lower bound is the smallest point (the point with the smallest
11360 /// offset) which is the greater than a given offset.
11361 ///
11362 /// @param imported_unit_points_type the sorted vector of imported
11363 /// unit points.
11364 ///
11365 /// @param val the offset to consider when looking for the lower
11366 /// bound.
11367 ///
11368 /// @param r an iterator to the lower bound found. This parameter is
11369 /// set iff the function returns true.
11370 ///
11371 /// @return true iff the lower bound has been found.
11372 static bool
find_lower_bound_in_imported_unit_points(const imported_unit_points_type & p,Dwarf_Off val,imported_unit_points_type::const_iterator & r)11373 find_lower_bound_in_imported_unit_points(const imported_unit_points_type& p,
11374 Dwarf_Off val,
11375 imported_unit_points_type::const_iterator& r)
11376 {
11377 imported_unit_point v(val);
11378 imported_unit_points_type::const_iterator result =
11379 std::lower_bound(p.begin(), p.end(), v);
11380
11381 bool is_ok = result != p.end();
11382
11383 if (is_ok)
11384 r = result;
11385
11386 return is_ok;
11387 }
11388
11389 /// Given a DW_TAG_compile_unit, build and return the corresponding
11390 /// abigail::translation_unit ir node. Note that this function
11391 /// recursively reads the children dies of the current DIE and
11392 /// populates the resulting translation unit.
11393 ///
11394 /// @param ctxt the read_context to use.
11395 ///
11396 /// @param die the DW_TAG_compile_unit DIE to consider.
11397 ///
11398 /// @param address_size the size of the addresses expressed in this
11399 /// translation unit in general.
11400 ///
11401 /// @return a pointer to the resulting translation_unit.
11402 static translation_unit_sptr
build_translation_unit_and_add_to_ir(read_context & ctxt,Dwarf_Die * die,char address_size)11403 build_translation_unit_and_add_to_ir(read_context& ctxt,
11404 Dwarf_Die* die,
11405 char address_size)
11406 {
11407 translation_unit_sptr result;
11408
11409 if (!die)
11410 return result;
11411 ABG_ASSERT(dwarf_tag(die) == DW_TAG_compile_unit);
11412
11413 // Clear the part of the context that is dependent on the translation
11414 // unit we are reading.
11415 ctxt.clear_per_translation_unit_data();
11416
11417 ctxt.cur_tu_die(die);
11418
11419 string path = die_string_attribute(die, DW_AT_name);
11420 if (path == "<artificial>")
11421 {
11422 // This is a file artificially generated by the compiler, so its
11423 // name is '<artificial>'. As we want all different translation
11424 // units to have unique path names, let's suffix this path name
11425 // with its die offset.
11426 std::ostringstream o;
11427 o << path << "-" << std::hex << dwarf_dieoffset(die);
11428 path = o.str();
11429 }
11430 string compilation_dir = die_string_attribute(die, DW_AT_comp_dir);
11431
11432 // See if the same translation unit exits already in the current
11433 // corpus. Sometimes, the same translation unit can be present
11434 // several times in the same debug info. The content of the
11435 // different instances of the translation unit are different. So to
11436 // represent that, we are going to re-use the same translation
11437 // unit. That is, it's going to be the union of all the translation
11438 // units of the same path.
11439 {
11440 const string& abs_path =
11441 compilation_dir.empty() ? path : compilation_dir + "/" + path;
11442 result = ctxt.current_corpus()->find_translation_unit(abs_path);
11443 }
11444
11445 if (!result)
11446 {
11447 result.reset(new translation_unit(ctxt.env(),
11448 path,
11449 address_size));
11450 result->set_compilation_dir_path(compilation_dir);
11451 ctxt.current_corpus()->add(result);
11452 uint64_t l = 0;
11453 die_unsigned_constant_attribute(die, DW_AT_language, l);
11454 result->set_language(dwarf_language_to_tu_language(l));
11455 }
11456
11457 ctxt.cur_transl_unit(result);
11458 ctxt.die_tu_map()[dwarf_dieoffset(die)] = result;
11459
11460 Dwarf_Die child;
11461 if (dwarf_child(die, &child) != 0)
11462 return result;
11463
11464 result->set_is_constructed(false);
11465
11466 do
11467 build_ir_node_from_die(ctxt, &child,
11468 die_is_public_decl(&child),
11469 dwarf_dieoffset(&child));
11470 while (dwarf_siblingof(&child, &child) == 0);
11471
11472 if (!ctxt.var_decls_to_re_add_to_tree().empty())
11473 for (list<var_decl_sptr>::const_iterator v =
11474 ctxt.var_decls_to_re_add_to_tree().begin();
11475 v != ctxt.var_decls_to_re_add_to_tree().end();
11476 ++v)
11477 {
11478 if (is_member_decl(*v))
11479 continue;
11480
11481 ABG_ASSERT((*v)->get_scope());
11482 string demangled_name =
11483 demangle_cplus_mangled_name((*v)->get_linkage_name());
11484 if (!demangled_name.empty())
11485 {
11486 std::list<string> fqn_comps;
11487 fqn_to_components(demangled_name, fqn_comps);
11488 string mem_name = fqn_comps.back();
11489 fqn_comps.pop_back();
11490 class_decl_sptr class_type;
11491 string ty_name;
11492 if (!fqn_comps.empty())
11493 {
11494 ty_name = components_to_type_name(fqn_comps);
11495 class_type =
11496 lookup_class_type(ty_name, *ctxt.cur_transl_unit());
11497 }
11498 if (class_type)
11499 {
11500 // So we are seeing a member variable for which there
11501 // is a global variable definition DIE not having a
11502 // reference attribute pointing back to the member
11503 // variable declaration DIE. Thus remove the global
11504 // variable definition from its current non-class
11505 // scope ...
11506 decl_base_sptr d;
11507 if ((d = lookup_var_decl_in_scope(mem_name, class_type)))
11508 // This is the data member with the same name in cl.
11509 // We just need to flag it as static.
11510 ;
11511 else
11512 {
11513 // In this case there is no data member with the
11514 // same name in cl already. Let's add it there then
11515 // ...
11516 remove_decl_from_scope(*v);
11517 d = add_decl_to_scope(*v, class_type);
11518 }
11519
11520 ABG_ASSERT(dynamic_pointer_cast<var_decl>(d));
11521 // Let's flag the data member as static.
11522 set_member_is_static(d, true);
11523 }
11524 }
11525 }
11526 ctxt.var_decls_to_re_add_to_tree().clear();
11527
11528 result->set_is_constructed(true);
11529
11530 return result;
11531 }
11532
11533 /// Build a abigail::namespace_decl out of a DW_TAG_namespace or
11534 /// DW_TAG_module (for fortran) DIE.
11535 ///
11536 /// Note that this function connects the DW_TAG_namespace to the IR
11537 /// being currently created, reads the children of the DIE and
11538 /// connects them to the IR as well.
11539 ///
11540 /// @param ctxt the read context to use.
11541 ///
11542 /// @param die the DIE to read from. Must be either DW_TAG_namespace
11543 /// or DW_TAG_module.
11544 ///
11545 /// @param where_offset the offset of the DIE where we are "logically"
11546 /// positionned at, in the DIE tree. This is useful when @p die is
11547 /// e.g, DW_TAG_partial_unit that can be included in several places in
11548 /// the DIE tree.
11549 ///
11550 /// @return the resulting @ref abigail::namespace_decl or NULL if it
11551 /// couldn't be created.
11552 static namespace_decl_sptr
build_namespace_decl_and_add_to_ir(read_context & ctxt,Dwarf_Die * die,size_t where_offset)11553 build_namespace_decl_and_add_to_ir(read_context& ctxt,
11554 Dwarf_Die* die,
11555 size_t where_offset)
11556 {
11557 namespace_decl_sptr result;
11558
11559 if (!die)
11560 return result;
11561
11562 unsigned tag = dwarf_tag(die);
11563 if (tag != DW_TAG_namespace && tag != DW_TAG_module)
11564 return result;
11565
11566 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
11567 /*called_for_public_decl=*/false,
11568 where_offset);
11569
11570 string name, linkage_name;
11571 location loc;
11572 die_loc_and_name(ctxt, die, loc, name, linkage_name);
11573
11574 result.reset(new namespace_decl(ctxt.env(), name, loc));
11575 add_decl_to_scope(result, scope.get());
11576 ctxt.associate_die_to_decl(die, result, where_offset);
11577
11578 Dwarf_Die child;
11579 if (dwarf_child(die, &child) != 0)
11580 return result;
11581
11582 ctxt.scope_stack().push(result.get());
11583 do
11584 build_ir_node_from_die(ctxt, &child,
11585 /*called_from_public_decl=*/false,
11586 where_offset);
11587 while (dwarf_siblingof(&child, &child) == 0);
11588 ctxt.scope_stack().pop();
11589
11590 return result;
11591 }
11592
11593 /// Build a @ref type_decl out of a DW_TAG_base_type DIE.
11594 ///
11595 /// @param ctxt the read context to use.
11596 ///
11597 /// @param die the DW_TAG_base_type to consider.
11598 ///
11599 /// @param where_offset where we are logically at in the DIE stream.
11600 ///
11601 /// @return the resulting decl_base_sptr.
11602 static type_decl_sptr
build_type_decl(read_context & ctxt,Dwarf_Die * die,size_t where_offset)11603 build_type_decl(read_context& ctxt, Dwarf_Die* die, size_t where_offset)
11604 {
11605 type_decl_sptr result;
11606
11607 if (!die)
11608 return result;
11609 ABG_ASSERT(dwarf_tag(die) == DW_TAG_base_type);
11610
11611 uint64_t byte_size = 0, bit_size = 0;
11612 if (!die_unsigned_constant_attribute(die, DW_AT_byte_size, byte_size))
11613 if (!die_unsigned_constant_attribute(die, DW_AT_bit_size, bit_size))
11614 return result;
11615
11616 if (bit_size == 0 && byte_size != 0)
11617 // Update the bit size.
11618 bit_size = byte_size * 8;
11619
11620 string type_name, linkage_name;
11621 location loc;
11622 die_loc_and_name(ctxt, die, loc, type_name, linkage_name);
11623
11624 if (byte_size == 0)
11625 {
11626 // The size of the type is zero, that must mean that we are
11627 // looking at the definition of the void type.
11628 if (type_name == "void")
11629 result = is_type_decl(build_ir_node_for_void_type(ctxt));
11630 else
11631 // A type of size zero that is not void? Hmmh, I am not sure
11632 // what that means. Return nil for now.
11633 return result;
11634 }
11635
11636 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
11637 {
11638 string normalized_type_name = type_name;
11639 integral_type int_type;
11640 if (parse_integral_type(type_name, int_type))
11641 normalized_type_name = int_type.to_string();
11642 result = lookup_basic_type(normalized_type_name, *corp);
11643 }
11644
11645 if (!result)
11646 if (corpus_sptr corp = ctxt.current_corpus())
11647 result = lookup_basic_type(type_name, *corp);
11648 if (!result)
11649 result.reset(new type_decl(ctxt.env(), type_name, bit_size,
11650 /*alignment=*/0, loc, linkage_name));
11651 ctxt.associate_die_to_type(die, result, where_offset);
11652 return result;
11653 }
11654
11655 /// Construct the type that is to be used as the underlying type of an
11656 /// enum.
11657 ///
11658 /// @param ctxt the read context to use.
11659 ///
11660 /// @param enum_name the name of the enum that this type is going to
11661 /// be the underlying type of.
11662 ///
11663 /// @param enum_size the size of the enum.
11664 ///
11665 /// @param is_anonymous whether the underlying type is anonymous or
11666 /// not. By default, this should be set to true as before c++11 (and
11667 /// in C), it's almost the case.
11668 static type_decl_sptr
build_enum_underlying_type(read_context & ctxt,string enum_name,uint64_t enum_size,bool is_anonymous=true)11669 build_enum_underlying_type(read_context& ctxt,
11670 string enum_name,
11671 uint64_t enum_size,
11672 bool is_anonymous = true)
11673 {
11674 string underlying_type_name =
11675 build_internal_underlying_enum_type_name(enum_name, is_anonymous,
11676 enum_size);
11677
11678 type_decl_sptr result(new type_decl(ctxt.env(), underlying_type_name,
11679 enum_size, enum_size, location()));
11680 result->set_is_anonymous(is_anonymous);
11681 result->set_is_artificial(true);
11682 translation_unit_sptr tu = ctxt.cur_transl_unit();
11683 decl_base_sptr d = add_decl_to_scope(result, tu->get_global_scope().get());
11684 result = dynamic_pointer_cast<type_decl>(d);
11685 ABG_ASSERT(result);
11686 canonicalize(result);
11687 return result;
11688 }
11689
11690 /// Build an enum_type_decl from a DW_TAG_enumeration_type DIE.
11691 ///
11692 /// @param ctxt the read context to use.
11693 ///
11694 /// @param die the DIE to read from.
11695 ///
11696 /// @param scope the scope of the final enum. Note that this function
11697 /// does *NOT* add the built type to this scope. The scope is just so
11698 /// that the function knows how to name anonymous enums.
11699 ///
11700 /// @param is_declaration_only is true if the DIE denoted by @p die is
11701 /// a declaration-only DIE.
11702 ///
11703 /// @return the built enum_type_decl or NULL if it could not be built.
11704 static enum_type_decl_sptr
build_enum_type(read_context & ctxt,Dwarf_Die * die,scope_decl * scope,size_t where_offset,bool is_declaration_only)11705 build_enum_type(read_context& ctxt,
11706 Dwarf_Die* die,
11707 scope_decl* scope,
11708 size_t where_offset,
11709 bool is_declaration_only)
11710 {
11711 enum_type_decl_sptr result;
11712 if (!die)
11713 return result;
11714
11715 unsigned tag = dwarf_tag(die);
11716 if (tag != DW_TAG_enumeration_type)
11717 return result;
11718
11719 string name, linkage_name;
11720 location loc;
11721 die_loc_and_name(ctxt, die, loc, name, linkage_name);
11722
11723 bool is_anonymous = false;
11724 // If the enum is anonymous, let's give it a name.
11725 if (name.empty())
11726 {
11727 name = get_internal_anonymous_die_prefix_name(die);
11728 ABG_ASSERT(!name.empty());
11729 // But we remember that the type is anonymous.
11730 is_anonymous = true;
11731
11732 if (size_t s = scope->get_num_anonymous_member_enums())
11733 name = build_internal_anonymous_die_name(name, s);
11734 }
11735
11736 bool use_odr = ctxt.odr_is_relevant(die);
11737 // If the type has location, then associate it to its
11738 // representation. This way, all occurences of types with the same
11739 // representation (name) and location can be later detected as being
11740 // for the same type.
11741
11742 if (!is_anonymous)
11743 {
11744 if (use_odr)
11745 {
11746 if (enum_type_decl_sptr pre_existing_enum =
11747 is_enum_type(ctxt.lookup_artifact_from_die(die)))
11748 result = pre_existing_enum;
11749 }
11750 else if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
11751 {
11752 if (loc)
11753 result = lookup_enum_type_per_location(loc.expand(), *corp);
11754 }
11755 else if (loc)
11756 {
11757 if (enum_type_decl_sptr pre_existing_enum =
11758 is_enum_type(ctxt.lookup_artifact_from_die(die)))
11759 if (pre_existing_enum->get_location() == loc)
11760 result = pre_existing_enum;
11761 }
11762
11763 if (result)
11764 {
11765 ctxt.associate_die_to_type(die, result, where_offset);
11766 return result;
11767 }
11768 }
11769 // TODO: for anonymous enums, maybe have a map of loc -> enums so that
11770 // we can look them up?
11771
11772 uint64_t size = 0;
11773 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
11774 size *= 8;
11775 bool is_artificial = die_is_artificial(die);
11776
11777 // for now we consider that underlying types of enums are all anonymous
11778 bool enum_underlying_type_is_anonymous= true;
11779
11780 enum_type_decl::enumerators enms;
11781 Dwarf_Die child;
11782 if (dwarf_child(die, &child) == 0)
11783 {
11784 do
11785 {
11786 if (dwarf_tag(&child) != DW_TAG_enumerator)
11787 continue;
11788
11789 string n, m;
11790 location l;
11791 die_loc_and_name(ctxt, &child, l, n, m);
11792 uint64_t val = 0;
11793 die_unsigned_constant_attribute(&child, DW_AT_const_value, val);
11794 enms.push_back(enum_type_decl::enumerator(ctxt.env(), n, val));
11795 }
11796 while (dwarf_siblingof(&child, &child) == 0);
11797 }
11798
11799 // DWARF up to version 4 (at least) doesn't seem to carry the
11800 // underlying type, so let's create an artificial one here, which
11801 // sole purpose is to be passed to the constructor of the
11802 // enum_type_decl type.
11803 type_decl_sptr t =
11804 build_enum_underlying_type(ctxt, name, size,
11805 enum_underlying_type_is_anonymous);
11806 t->set_is_declaration_only(is_declaration_only);
11807
11808 result.reset(new enum_type_decl(name, loc, t, enms, linkage_name));
11809 result->set_is_anonymous(is_anonymous);
11810 result->set_is_declaration_only(is_declaration_only);
11811 result->set_is_artificial(is_artificial);
11812 ctxt.associate_die_to_type(die, result, where_offset);
11813
11814 ctxt.maybe_schedule_declaration_only_enum_for_resolution(result);
11815
11816 return result;
11817 }
11818
11819 /// Once a function_decl has been built and added to a class as a
11820 /// member function, this function updates the information of the
11821 /// function_decl concerning the properties of its relationship with
11822 /// the member class. That is, it updates properties like
11823 /// virtualness, access, constness, cdtorness, etc ...
11824 ///
11825 /// @param die the DIE of the function_decl that has been just built.
11826 ///
11827 /// @param f the function_decl that has just been built from @p die.
11828 ///
11829 /// @param klass the @ref class_or_union that @p f belongs to.
11830 ///
11831 /// @param ctxt the context used to read the ELF/DWARF information.
11832 static void
finish_member_function_reading(Dwarf_Die * die,const function_decl_sptr & f,const class_or_union_sptr klass,read_context & ctxt)11833 finish_member_function_reading(Dwarf_Die* die,
11834 const function_decl_sptr& f,
11835 const class_or_union_sptr klass,
11836 read_context& ctxt)
11837 {
11838 ABG_ASSERT(klass);
11839
11840 method_decl_sptr m = is_method_decl(f);
11841 ABG_ASSERT(m);
11842
11843 method_type_sptr method_t = is_method_type(m->get_type());
11844 ABG_ASSERT(method_t);
11845
11846 bool is_ctor = (f->get_name() == klass->get_name());
11847 bool is_dtor = (!f->get_name().empty()
11848 && static_cast<string>(f->get_name())[0] == '~');
11849 bool is_virtual = die_is_virtual(die);
11850 int64_t vindex = -1;
11851 if (is_virtual)
11852 die_virtual_function_index(die, vindex);
11853 access_specifier access = public_access;
11854 if (class_decl_sptr c = is_class_type(klass))
11855 if (!c->is_struct())
11856 access = private_access;
11857 die_access_specifier(die, access);
11858
11859 bool is_static = false;
11860 {
11861 // Let's see if the first parameter is a pointer to an instance of
11862 // the same class type as the current class and has a
11863 // DW_AT_artificial attribute flag set. We are not looking at
11864 // DW_AT_object_pointer (for DWARF 3) because it wasn't being
11865 // emitted in GCC 4_4, which was already DWARF 3.
11866 function_decl::parameter_sptr first_parm;
11867 if (!f->get_parameters().empty())
11868 first_parm = f->get_parameters()[0];
11869
11870 bool is_artificial = first_parm && first_parm->get_is_artificial();
11871 type_base_sptr this_ptr_type, other_klass;
11872
11873 if (is_artificial)
11874 this_ptr_type = first_parm->get_type();
11875
11876 // Sometimes, the type of the "this" pointer is "const class_type* const".
11877 //
11878 // Meaning that the "this pointer" itself is const qualified. So
11879 // let's get the underlying underlying non-qualified pointer.
11880 if (qualified_type_def_sptr q = is_qualified_type(this_ptr_type))
11881 this_ptr_type = q->get_underlying_type();
11882
11883 // Now, get the pointed-to type.
11884 if (pointer_type_def_sptr p = is_pointer_type(this_ptr_type))
11885 other_klass = p->get_pointed_to_type();
11886
11887 // Sometimes, other_klass can be qualified; e.g, volatile. In
11888 // that case, let's get the unqualified version of other_klass.
11889 if (qualified_type_def_sptr q = is_qualified_type(other_klass))
11890 other_klass = q->get_underlying_type();
11891
11892 if (other_klass
11893 && get_type_name(other_klass) == klass->get_qualified_name())
11894 ;
11895 else
11896 is_static = true;
11897
11898 if (is_static)
11899 {
11900 // If we are looking at a DWARF version that is high enough
11901 // for the DW_AT_object_pointer attribute to be present, let's
11902 // see if it's present. If it is, then the current member
11903 // function is not static.
11904 Dwarf_Die object_pointer_die;
11905 if (die_has_object_pointer(die, object_pointer_die))
11906 is_static = false;
11907 }
11908 }
11909 set_member_access_specifier(m, access);
11910 if (vindex != -1)
11911 set_member_function_vtable_offset(m, vindex);
11912 if (is_virtual)
11913 set_member_function_is_virtual(m, is_virtual);
11914 set_member_is_static(m, is_static);
11915 set_member_function_is_ctor(m, is_ctor);
11916 set_member_function_is_dtor(m, is_dtor);
11917 set_member_function_is_const(m, method_t->get_is_const());
11918
11919 ABG_ASSERT(is_member_function(m));
11920
11921 if (is_virtual && !f->get_linkage_name().empty() && !f->get_symbol())
11922 {
11923 // This is a virtual member function which has a linkage name
11924 // but has no underlying symbol set.
11925 //
11926 // The underlying elf symbol to set to this function can show up
11927 // later in the DWARF input or it can be that, because of some
11928 // compiler optimization, the relation between this function and
11929 // its underlying elf symbol is simply not emitted in the DWARF.
11930 //
11931 // Let's thus schedule this function for a later fixup pass
11932 // (performed by
11933 // read_context::fixup_functions_with_no_symbols()) that will
11934 // set its underlying symbol.
11935 //
11936 // Note that if the underying symbol is encountered later in the
11937 // DWARF input, then the part of build_function_decl() that
11938 // updates the function to set its underlying symbol will
11939 // de-schedule this function wrt fixup pass.
11940 Dwarf_Off die_offset = dwarf_dieoffset(die);
11941 die_function_decl_map_type &fns_with_no_symbol =
11942 ctxt.die_function_decl_with_no_symbol_map();
11943 die_function_decl_map_type::const_iterator i =
11944 fns_with_no_symbol.find(die_offset);
11945 if (i == fns_with_no_symbol.end())
11946 fns_with_no_symbol[die_offset] = f;
11947 }
11948
11949 }
11950
11951 /// If a function DIE has attributes which have not yet been read and
11952 /// added to the internal representation that represents that function
11953 /// then read those extra attributes and update the internal
11954 /// representation.
11955 ///
11956 /// @param ctxt the read context to use.
11957 ///
11958 /// @param die the function DIE to consider.
11959 ///
11960 /// @param where_offset where we logical are, currently, in the stream
11961 /// of DIEs. If you don't know what this is, you can just set it to zero.
11962 ///
11963 /// @param existing_fn the representation of the function to update.
11964 ///
11965 /// @return the updated function representation.
11966 static function_decl_sptr
maybe_finish_function_decl_reading(read_context & ctxt,Dwarf_Die * die,size_t where_offset,const function_decl_sptr & existing_fn)11967 maybe_finish_function_decl_reading(read_context& ctxt,
11968 Dwarf_Die* die,
11969 size_t where_offset,
11970 const function_decl_sptr& existing_fn)
11971 {
11972 function_decl_sptr result = build_function_decl(ctxt, die,
11973 where_offset,
11974 existing_fn);
11975
11976 return result;
11977 }
11978
11979 /// Lookup a class or a typedef with a given qualified name in the
11980 /// corpus that a given scope belongs to.
11981 ///
11982 /// @param scope the scope to consider.
11983 ///
11984 /// @param type_name the qualified name of the type to look for.
11985 ///
11986 /// @return the typedef or class type found.
11987 static type_base_sptr
lookup_class_or_typedef_from_corpus(scope_decl * scope,const string & type_name)11988 lookup_class_or_typedef_from_corpus(scope_decl* scope, const string& type_name)
11989 {
11990 string qname = build_qualified_name(scope, type_name);
11991 corpus* corp = scope->get_corpus();
11992 type_base_sptr result = lookup_class_or_typedef_type(qname, *corp);
11993 return result;
11994 }
11995
11996 /// Lookup a class of typedef type from the current corpus being
11997 /// constructed.
11998 ///
11999 /// The type being looked for has the same name as a given DIE.
12000 ///
12001 /// @param ctxt the reading context to use.
12002 ///
12003 /// @param die the DIE which has the same name as the type we are
12004 /// looking for.
12005 ///
12006 /// @param called_for_public_decl whether this function is being
12007 /// called from a a publicly defined declaration.
12008 ///
12009 /// @param where_offset where we are logically at in the DIE stream.
12010 ///
12011 /// @return the type found.
12012 static type_base_sptr
lookup_class_or_typedef_from_corpus(read_context & ctxt,Dwarf_Die * die,bool called_for_public_decl,size_t where_offset)12013 lookup_class_or_typedef_from_corpus(read_context& ctxt,
12014 Dwarf_Die* die,
12015 bool called_for_public_decl,
12016 size_t where_offset)
12017 {
12018 if (!die)
12019 return class_decl_sptr();
12020
12021 string class_name = die_string_attribute(die, DW_AT_name);
12022 if (class_name.empty())
12023 return class_decl_sptr();
12024
12025 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
12026 called_for_public_decl,
12027 where_offset);
12028 if (scope)
12029 return lookup_class_or_typedef_from_corpus(scope.get(), class_name);
12030
12031 return type_base_sptr();
12032 }
12033
12034 /// Lookup a class, typedef or enum type with a given qualified name
12035 /// in the corpus that a given scope belongs to.
12036 ///
12037 /// @param scope the scope to consider.
12038 ///
12039 /// @param type_name the qualified name of the type to look for.
12040 ///
12041 /// @return the typedef, enum or class type found.
12042 static type_base_sptr
lookup_class_typedef_or_enum_type_from_corpus(scope_decl * scope,const string & type_name)12043 lookup_class_typedef_or_enum_type_from_corpus(scope_decl* scope,
12044 const string& type_name)
12045 {
12046 string qname = build_qualified_name(scope, type_name);
12047 corpus* corp = scope->get_corpus();
12048 type_base_sptr result = lookup_class_typedef_or_enum_type(qname, *corp);
12049 return result;
12050 }
12051
12052 /// Lookup a class, typedef or enum type in a given scope, in the
12053 /// corpus that scope belongs to.
12054 ///
12055 /// @param die the DIE of the class, typedef or enum to lookup.
12056 ///
12057 /// @param anonymous_member_type_idx if @p DIE represents an anonymous
12058 /// type, this is the index of that anonymous type in its scope, in
12059 /// case there are several anonymous types of the same kind in that
12060 /// scope.
12061 ///
12062 /// @param scope the scope in which to look the type for.
12063 ///
12064 /// @return the typedef, enum or class type found.
12065 static type_base_sptr
lookup_class_typedef_or_enum_type_from_corpus(Dwarf_Die * die,size_t anonymous_member_type_idx,scope_decl * scope)12066 lookup_class_typedef_or_enum_type_from_corpus(Dwarf_Die* die,
12067 size_t anonymous_member_type_idx,
12068 scope_decl* scope)
12069 {
12070 if (!die)
12071 return class_decl_sptr();
12072
12073 string type_name = die_string_attribute(die, DW_AT_name);
12074 if (is_anonymous_type_die(die))
12075 type_name =
12076 get_internal_anonymous_die_name(die, anonymous_member_type_idx);
12077
12078 if (type_name.empty())
12079 return class_decl_sptr();
12080
12081 return lookup_class_typedef_or_enum_type_from_corpus(scope, type_name);
12082 }
12083
12084 /// Test if a DIE represents a function that is a member of a given
12085 /// class type.
12086 ///
12087 /// @param ctxt the reading context.
12088 ///
12089 /// @param function_die the DIE of the function to consider.
12090 ///
12091 /// @param class_type the class type to consider.
12092 ///
12093 /// @param where_offset where we are logically at in the DIE stream.
12094 ///
12095 /// @return the method declaration corresponding to the member
12096 /// function of @p class_type, iff @p function_die is for a member
12097 /// function of @p class_type.
12098 static method_decl_sptr
is_function_for_die_a_member_of_class(read_context & ctxt,Dwarf_Die * function_die,const class_or_union_sptr & class_type)12099 is_function_for_die_a_member_of_class(read_context& ctxt,
12100 Dwarf_Die* function_die,
12101 const class_or_union_sptr& class_type)
12102 {
12103 type_or_decl_base_sptr artifact = ctxt.lookup_artifact_from_die(function_die);
12104
12105 if (!artifact)
12106 return method_decl_sptr();
12107
12108 method_decl_sptr method = is_method_decl(artifact);
12109 method_type_sptr method_type;
12110
12111 if (method)
12112 method_type = method->get_type();
12113 else
12114 method_type = is_method_type(artifact);
12115 ABG_ASSERT(method_type);
12116
12117 class_or_union_sptr method_class = method_type->get_class_type();
12118 ABG_ASSERT(method_class);
12119
12120 string method_class_name = method_class->get_qualified_name(),
12121 class_type_name = class_type->get_qualified_name();
12122
12123 if (method_class_name == class_type_name)
12124 {
12125 //ABG_ASSERT(class_type.get() == method_class.get());
12126 return method;
12127 }
12128
12129 return method_decl_sptr();
12130 }
12131
12132 /// If a given function DIE represents an existing member function of
12133 /// a given class, then update that member function with new
12134 /// properties present in the DIE. Otherwise, if the DIE represents a
12135 /// new member function that is not already present in the class then
12136 /// add that new member function to the class.
12137 ///
12138 /// @param ctxt the reading context.
12139 ///
12140 /// @param function_die the DIE of the potential member function to
12141 /// consider.
12142 ///
12143 /// @param class_type the class type to consider.
12144 ///
12145 /// @param called_from_public_decl is true iff this function was
12146 /// called from a publicly defined and exported declaration.
12147 ///
12148 /// @param where_offset where we are logically at in the DIE stream.
12149 ///
12150 /// @return the method decl representing the member function.
12151 static method_decl_sptr
add_or_update_member_function(read_context & ctxt,Dwarf_Die * function_die,const class_or_union_sptr & class_type,bool called_from_public_decl,size_t where_offset)12152 add_or_update_member_function(read_context& ctxt,
12153 Dwarf_Die* function_die,
12154 const class_or_union_sptr& class_type,
12155 bool called_from_public_decl,
12156 size_t where_offset)
12157 {
12158 method_decl_sptr method =
12159 is_function_for_die_a_member_of_class(ctxt, function_die, class_type);
12160
12161 if (!method)
12162 method = is_method_decl(build_ir_node_from_die(ctxt, function_die,
12163 class_type.get(),
12164 called_from_public_decl,
12165 where_offset));
12166 if (!method)
12167 return method_decl_sptr();
12168
12169 finish_member_function_reading(function_die,
12170 is_function_decl(method),
12171 class_type, ctxt);
12172 return method;
12173 }
12174
12175 /// Build a an IR node for class type from a DW_TAG_structure_type or
12176 /// DW_TAG_class_type DIE and add that node to the ABI corpus being
12177 /// currently built.
12178 ///
12179 /// If the represents class type that already exists, then update the
12180 /// existing class type with the new properties found in the DIE.
12181 ///
12182 /// It meanst that this function can also update an existing
12183 /// class_decl node with data members, member functions and other
12184 /// properties coming from the DIE.
12185 ///
12186 /// @param ctxt the read context to consider.
12187 ///
12188 /// @param die the DIE to read information from. Must be either a
12189 /// DW_TAG_structure_type or a DW_TAG_class_type.
12190 ///
12191 /// @param scope a pointer to the scope_decl* under which this class
12192 /// is to be added to.
12193 ///
12194 /// @param is_struct whether the class was declared as a struct.
12195 ///
12196 /// @param klass if non-null, this is a klass to append the members
12197 /// to. Otherwise, this function just builds the class from scratch.
12198 ///
12199 /// @param called_from_public_decl set to true if this class is being
12200 /// called from a "Public declaration like vars or public symbols".
12201 ///
12202 /// @param where_offset the offset of the DIE where we are "logically"
12203 /// positionned at, in the DIE tree. This is useful when @p die is
12204 /// e.g, DW_TAG_partial_unit that can be included in several places in
12205 /// the DIE tree.
12206 ///
12207 /// @param is_declaration_only is true if the DIE denoted by @p die is
12208 /// a declaration-only DIE.
12209 ///
12210 /// @return the resulting class_type.
12211 static class_decl_sptr
add_or_update_class_type(read_context & ctxt,Dwarf_Die * die,scope_decl * scope,bool is_struct,class_decl_sptr klass,bool called_from_public_decl,size_t where_offset,bool is_declaration_only)12212 add_or_update_class_type(read_context& ctxt,
12213 Dwarf_Die* die,
12214 scope_decl* scope,
12215 bool is_struct,
12216 class_decl_sptr klass,
12217 bool called_from_public_decl,
12218 size_t where_offset,
12219 bool is_declaration_only)
12220 {
12221 class_decl_sptr result;
12222 if (!die)
12223 return result;
12224
12225 const die_source source = ctxt.get_die_source(die);
12226
12227 unsigned tag = dwarf_tag(die);
12228
12229 if (tag != DW_TAG_class_type && tag != DW_TAG_structure_type)
12230 return result;
12231
12232 {
12233 die_class_or_union_map_type::const_iterator i =
12234 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
12235 if (i != ctxt.die_wip_classes_map(source).end())
12236 {
12237 class_decl_sptr class_type = is_class_type(i->second);
12238 ABG_ASSERT(class_type);
12239 return class_type;
12240 }
12241 }
12242
12243 string name, linkage_name;
12244 location loc;
12245 die_loc_and_name(ctxt, die, loc, name, linkage_name);
12246
12247 bool is_anonymous = false;
12248 if (name.empty())
12249 {
12250 // So we are looking at an anonymous struct. Let's
12251 // give it a name.
12252 name = get_internal_anonymous_die_prefix_name(die);
12253 ABG_ASSERT(!name.empty());
12254 // But we remember that the type is anonymous.
12255 is_anonymous = true;
12256
12257 if (size_t s = scope->get_num_anonymous_member_classes())
12258 name = build_internal_anonymous_die_name(name, s);
12259 }
12260
12261 if (!is_anonymous)
12262 {
12263 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
12264 {
12265 if (loc)
12266 // TODO: if there is only one class defined in the corpus
12267 // for this location, then re-use it. But if there are
12268 // more than one, then do not re-use it, for now.
12269 result = lookup_class_type_per_location(loc.expand(), *corp);
12270 else
12271 // TODO: if there is just one class for that name defined,
12272 // then re-use it. Otherwise, don't.
12273 result = lookup_class_type(name, *corp);
12274 if (result
12275 // If we are seeing a declaration of a definition we
12276 // already had, or if we are seing a type with the same
12277 // declaration-only-ness that we had before, then keep
12278 // the one we already had.
12279 && (result->get_is_declaration_only() == is_declaration_only
12280 || (!result->get_is_declaration_only()
12281 && is_declaration_only)))
12282 {
12283 ctxt.associate_die_to_type(die, result, where_offset);
12284 return result;
12285 }
12286 else
12287 // We might be seeing the definition of a declaration we
12288 // already had. In that case, keep the definition and
12289 // drop the declaration.
12290 result.reset();
12291 }
12292 }
12293
12294 // If we've already seen the same class as 'die', then let's re-use
12295 // that one, unless it's an anonymous class. We can't really safely
12296 // re-use anonymous classes as they have no name, by construction.
12297 // What we can do, rather, is to reuse the typedef that name them,
12298 // when they do have a naming typedef.
12299 if (!is_anonymous)
12300 if (class_decl_sptr pre_existing_class =
12301 is_class_type(ctxt.lookup_type_artifact_from_die(die)))
12302 klass = pre_existing_class;
12303
12304 uint64_t size = 0;
12305 die_size_in_bits(die, size);
12306 bool is_artificial = die_is_artificial(die);
12307
12308 Dwarf_Die child;
12309 bool has_child = (dwarf_child(die, &child) == 0);
12310
12311 decl_base_sptr res;
12312 if (klass)
12313 {
12314 res = result = klass;
12315 if (has_child && klass->get_is_declaration_only()
12316 && klass->get_definition_of_declaration())
12317 res = result = is_class_type(klass->get_definition_of_declaration());
12318 if (loc)
12319 result->set_location(loc);
12320 }
12321 else
12322 {
12323 result.reset(new class_decl(ctxt.env(), name, size,
12324 /*alignment=*/0, is_struct, loc,
12325 decl_base::VISIBILITY_DEFAULT,
12326 is_anonymous));
12327
12328 result->set_is_declaration_only(is_declaration_only);
12329
12330 res = add_decl_to_scope(result, scope);
12331 result = dynamic_pointer_cast<class_decl>(res);
12332 ABG_ASSERT(result);
12333 }
12334
12335 if (size != result->get_size_in_bits())
12336 result->set_size_in_bits(size);
12337
12338 if (klass)
12339 // We are amending a class that was built before. So let's check
12340 // if we need to amend its "declaration-only-ness" status.
12341 if (!!result->get_size_in_bits() == result->get_is_declaration_only())
12342 // The size of the class doesn't match its
12343 // 'declaration-only-ness". We might have a non-zero sized
12344 // class which is declaration-only, or a zero sized class that
12345 // is not declaration-only. Let's set the declaration-only-ness
12346 // according to what we are instructed to.
12347 //
12348 // Note however that there are binaries out there emitted by
12349 // compilers (Clang, in C++) emit declarations-only classes that
12350 // have non-zero size. So we must honor these too. That is why
12351 // we are not forcing the declaration-only-ness to false when a
12352 // class has non-zero size. An example of such binary is
12353 // tests/data/test-diff-filter/test41-PR21486-abg-writer.llvm.o.
12354 result->set_is_declaration_only(is_declaration_only);
12355
12356 result->set_is_artificial(is_artificial);
12357
12358 ctxt.associate_die_to_type(die, result, where_offset);
12359
12360 ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
12361
12362 if (!has_child)
12363 // TODO: set the access specifier for the declaration-only class
12364 // here.
12365 return result;
12366
12367 ctxt.die_wip_classes_map(source)[dwarf_dieoffset(die)] = result;
12368
12369 scope_decl_sptr scop =
12370 dynamic_pointer_cast<scope_decl>(res);
12371 ABG_ASSERT(scop);
12372 ctxt.scope_stack().push(scop.get());
12373
12374 if (has_child)
12375 {
12376 int anonymous_member_class_index = -1;
12377 int anonymous_member_union_index = -1;
12378 int anonymous_member_enum_index = -1;
12379
12380 do
12381 {
12382 tag = dwarf_tag(&child);
12383
12384 // Handle base classes.
12385 if (tag == DW_TAG_inheritance)
12386 {
12387 result->set_is_declaration_only(false);
12388
12389 Dwarf_Die type_die;
12390 if (!die_die_attribute(&child, DW_AT_type, type_die))
12391 continue;
12392
12393 type_base_sptr base_type;
12394 if (!(base_type =
12395 lookup_class_or_typedef_from_corpus(ctxt, &type_die,
12396 called_from_public_decl,
12397 where_offset)))
12398 {
12399 base_type =
12400 is_type(build_ir_node_from_die(ctxt, &type_die,
12401 called_from_public_decl,
12402 where_offset));
12403 }
12404 // Sometimes base_type can be a typedef. Let's make
12405 // sure that typedef is compatible with a class type.
12406 class_decl_sptr b = is_compatible_with_class_type(base_type);
12407 if (!b)
12408 continue;
12409
12410 access_specifier access =
12411 is_struct
12412 ? public_access
12413 : private_access;
12414
12415 die_access_specifier(&child, access);
12416
12417 bool is_virt= die_is_virtual(&child);
12418 int64_t offset = 0;
12419 bool is_offset_present =
12420 die_member_offset(ctxt, &child, offset);
12421
12422 class_decl::base_spec_sptr base(new class_decl::base_spec
12423 (b, access,
12424 is_offset_present ? offset : -1,
12425 is_virt));
12426 if (b->get_is_declaration_only())
12427 ABG_ASSERT(ctxt.is_decl_only_class_scheduled_for_resolution(b));
12428 if (result->find_base_class(b->get_qualified_name()))
12429 continue;
12430 result->add_base_specifier(base);
12431 }
12432 // Handle data members.
12433 else if (tag == DW_TAG_member
12434 || tag == DW_TAG_variable)
12435 {
12436 Dwarf_Die type_die;
12437 if (!die_die_attribute(&child, DW_AT_type, type_die))
12438 continue;
12439
12440 string n, m;
12441 location loc;
12442 die_loc_and_name(ctxt, &child, loc, n, m);
12443 /// For now, we skip the hidden vtable pointer.
12444 /// Currently, we're looking for a member starting with
12445 /// "_vptr[^0-9a-zA-Z_]", which is what Clang and GCC
12446 /// use as a name for the hidden vtable pointer.
12447 if (n.substr(0, 5) == "_vptr"
12448 && !std::isalnum(n.at(5))
12449 && n.at(5) != '_')
12450 continue;
12451
12452 // If the variable is already a member of this class,
12453 // move on. If it's an anonymous data member, we need
12454 // to handle it differently. We'll do that later below.
12455 if (!n.empty() && lookup_var_decl_in_scope(n, result))
12456 continue;
12457
12458 int64_t offset_in_bits = 0;
12459 bool is_laid_out = die_member_offset(ctxt, &child,
12460 offset_in_bits);
12461 // For now, is_static == !is_laid_out. When we have
12462 // templates, we'll try to be more specific. For now,
12463 // this approximation should do OK.
12464 bool is_static = !is_laid_out;
12465
12466 if (is_static && variable_is_suppressed(ctxt,
12467 result.get(),
12468 &child))
12469 continue;
12470
12471 decl_base_sptr ty = is_decl(build_ir_node_from_die(ctxt, &type_die,
12472 called_from_public_decl,
12473 where_offset));
12474 type_base_sptr t = is_type(ty);
12475 if (!t)
12476 continue;
12477
12478 // The call to build_ir_node_from_die above could have
12479 // triggered the adding of a data member named 'n' into
12480 // result. So let's check again if the variable is
12481 // already a member of this class. Here again, if it's
12482 // an anonymous data member, we need to handle it
12483 // differently. We'll do that later below.
12484 if (!n.empty() && lookup_var_decl_in_scope(n, result))
12485 continue;
12486
12487 if (!is_static)
12488 // We have a non-static data member. So this class
12489 // cannot be a declaration-only class anymore, even if
12490 // some DWARF emitters might consider it otherwise.
12491 result->set_is_declaration_only(false);
12492 access_specifier access =
12493 is_struct
12494 ? public_access
12495 : private_access;
12496
12497 die_access_specifier(&child, access);
12498
12499 var_decl_sptr dm(new var_decl(n, t, loc, m));
12500 if (n.empty() && result->find_data_member(dm))
12501 // dm is an anonymous data member that was already
12502 // present in the current class so let's not add it.
12503 continue;
12504 result->add_data_member(dm, access, is_laid_out,
12505 is_static, offset_in_bits);
12506 ABG_ASSERT(has_scope(dm));
12507 ctxt.associate_die_to_decl(&child, dm, where_offset,
12508 /*associate_by_repr=*/false);
12509 }
12510 // Handle member functions;
12511 else if (tag == DW_TAG_subprogram)
12512 {
12513 decl_base_sptr r =
12514 add_or_update_member_function(ctxt, &child, result,
12515 called_from_public_decl,
12516 where_offset);
12517 if (function_decl_sptr f = is_function_decl(r))
12518 ctxt.associate_die_to_decl(&child, f, where_offset,
12519 /*associate_by_repr=*/true);
12520 }
12521 // Handle member types
12522 else if (die_is_type(&child))
12523 {
12524 // Track the anonymous type index in the current
12525 // scope. Look for what this means by reading the
12526 // comment of the function
12527 // build_internal_anonymous_die_name.
12528 int anonymous_member_type_index = 0;
12529 if (is_anonymous_type_die(&child))
12530 {
12531 // Update the anonymous type index.
12532 if (die_is_class_type(&child))
12533 anonymous_member_type_index =
12534 ++anonymous_member_class_index;
12535 else if (dwarf_tag(&child) == DW_TAG_union_type)
12536 anonymous_member_type_index =
12537 ++anonymous_member_union_index;
12538 else if (dwarf_tag(&child) == DW_TAG_enumeration_type)
12539 anonymous_member_type_index =
12540 ++anonymous_member_enum_index;
12541 }
12542 // if the type is not already a member of this class,
12543 // then add it to the class.
12544 if (!lookup_class_typedef_or_enum_type_from_corpus
12545 (&child, anonymous_member_type_index, result.get()))
12546 build_ir_node_from_die(ctxt, &child, result.get(),
12547 called_from_public_decl,
12548 where_offset);
12549 }
12550 } while (dwarf_siblingof(&child, &child) == 0);
12551 }
12552
12553 ctxt.scope_stack().pop();
12554
12555 {
12556 die_class_or_union_map_type::const_iterator i =
12557 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
12558 if (i != ctxt.die_wip_classes_map(source).end())
12559 {
12560 if (is_member_type(i->second))
12561 set_member_access_specifier(res,
12562 get_member_access_specifier(i->second));
12563 ctxt.die_wip_classes_map(source).erase(i);
12564 }
12565 }
12566
12567 ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
12568 return result;
12569 }
12570
12571 /// Build an @ref union_decl from a DW_TAG_union_type DIE.
12572 ///
12573 /// @param ctxt the read context to use.
12574 ///
12575 /// @param die the DIE to read from.
12576 ///
12577 /// @param scope the scope the resulting @ref union_decl belongs to.
12578 ///
12579 /// @param union_type if this parameter is non-nil, then this function
12580 /// updates the @ref union_decl that it points to, rather than
12581 /// creating a new @ref union_decl.
12582 ///
12583 /// @param called_from_public_decl is true if this function has been
12584 /// initially called within the context of a public decl.
12585 ///
12586 /// @param where_offset the offset of the DIE where we are "logically"
12587 /// positionned at, in the DIE tree. This is useful when @p die is
12588 /// e.g, DW_TAG_partial_unit that can be included in several places in
12589 /// the DIE tree.
12590 ///
12591 /// @param is_declaration_only is true if the DIE denoted by @p die is
12592 /// a declaration-only DIE.
12593 ///
12594 /// @return the resulting @ref union_decl type.
12595 static union_decl_sptr
add_or_update_union_type(read_context & ctxt,Dwarf_Die * die,scope_decl * scope,union_decl_sptr union_type,bool called_from_public_decl,size_t where_offset,bool is_declaration_only)12596 add_or_update_union_type(read_context& ctxt,
12597 Dwarf_Die* die,
12598 scope_decl* scope,
12599 union_decl_sptr union_type,
12600 bool called_from_public_decl,
12601 size_t where_offset,
12602 bool is_declaration_only)
12603 {
12604 union_decl_sptr result;
12605 if (!die)
12606 return result;
12607
12608 unsigned tag = dwarf_tag(die);
12609
12610 if (tag != DW_TAG_union_type)
12611 return result;
12612
12613 const die_source source = ctxt.get_die_source(die);
12614 {
12615 die_class_or_union_map_type::const_iterator i =
12616 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
12617 if (i != ctxt.die_wip_classes_map(source).end())
12618 {
12619 union_decl_sptr u = is_union_type(i->second);
12620 ABG_ASSERT(u);
12621 return u;
12622 }
12623 }
12624
12625 string name, linkage_name;
12626 location loc;
12627 die_loc_and_name(ctxt, die, loc, name, linkage_name);
12628
12629 bool is_anonymous = false;
12630 if (name.empty())
12631 {
12632 // So we are looking at an anonymous union. Let's give it a
12633 // name.
12634 name = get_internal_anonymous_die_prefix_name(die);
12635 ABG_ASSERT(!name.empty());
12636 // But we remember that the type is anonymous.
12637 is_anonymous = true;
12638
12639 if (size_t s = scope->get_num_anonymous_member_unions())
12640 name = build_internal_anonymous_die_name(name, s);
12641 }
12642
12643 // If the type has location, then associate it to its
12644 // representation. This way, all occurences of types with the same
12645 // representation (name) and location can be later detected as being
12646 // for the same type.
12647
12648 if (!is_anonymous)
12649 {
12650 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
12651 {
12652 if (loc)
12653 result = lookup_union_type_per_location(loc.expand(), *corp);
12654 else
12655 result = lookup_union_type(name, *corp);
12656
12657 if (result)
12658 {
12659 ctxt.associate_die_to_type(die, result, where_offset);
12660 return result;
12661 }
12662 }
12663 }
12664
12665 // if we've already seen a union with the same union as 'die' then
12666 // let's re-use that one. We can't really safely re-use anonymous
12667 // unions as they have no name, by construction. What we can do,
12668 // rather, is to reuse the typedef that name them, when they do have
12669 // a naming typedef.
12670 if (!is_anonymous)
12671 if (union_decl_sptr pre_existing_union =
12672 is_union_type(ctxt.lookup_artifact_from_die(die)))
12673 union_type = pre_existing_union;
12674
12675 uint64_t size = 0;
12676 die_size_in_bits(die, size);
12677 bool is_artificial = die_is_artificial(die);
12678
12679 if (union_type)
12680 {
12681 result = union_type;
12682 result->set_location(loc);
12683 }
12684 else
12685 {
12686 result.reset(new union_decl(ctxt.env(), name, size, loc,
12687 decl_base::VISIBILITY_DEFAULT,
12688 is_anonymous));
12689 if (is_declaration_only)
12690 result->set_is_declaration_only(true);
12691 result = is_union_type(add_decl_to_scope(result, scope));
12692 ABG_ASSERT(result);
12693 }
12694
12695 if (size)
12696 {
12697 result->set_size_in_bits(size);
12698 result->set_is_declaration_only(false);
12699 }
12700
12701 result->set_is_artificial(is_artificial);
12702
12703 ctxt.associate_die_to_type(die, result, where_offset);
12704
12705 // TODO: maybe schedule declaration-only union for result like we do
12706 // for classes:
12707 // ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
12708
12709 Dwarf_Die child;
12710 bool has_child = (dwarf_child(die, &child) == 0);
12711 if (!has_child)
12712 return result;
12713
12714 ctxt.die_wip_classes_map(source)[dwarf_dieoffset(die)] = result;
12715
12716 scope_decl_sptr scop =
12717 dynamic_pointer_cast<scope_decl>(result);
12718 ABG_ASSERT(scop);
12719 ctxt.scope_stack().push(scop.get());
12720
12721 if (has_child)
12722 {
12723 do
12724 {
12725 tag = dwarf_tag(&child);
12726 // Handle data members.
12727 if (tag == DW_TAG_member || tag == DW_TAG_variable)
12728 {
12729 Dwarf_Die type_die;
12730 if (!die_die_attribute(&child, DW_AT_type, type_die))
12731 continue;
12732
12733 string n, m;
12734 location loc;
12735 die_loc_and_name(ctxt, &child, loc, n, m);
12736
12737 // Because we can be updating an existing union, let's
12738 // make sure we don't already have a member of the same
12739 // name. Anonymous member are handled a bit later below
12740 // so let's not consider them here.
12741 if (!n.empty() && lookup_var_decl_in_scope(n, result))
12742 continue;
12743
12744 ssize_t offset_in_bits = 0;
12745 decl_base_sptr ty =
12746 is_decl(build_ir_node_from_die(ctxt, &type_die,
12747 called_from_public_decl,
12748 where_offset));
12749 type_base_sptr t = is_type(ty);
12750 if (!t)
12751 continue;
12752
12753 // We have a non-static data member. So this union
12754 // cannot be a declaration-only union anymore, even if
12755 // some DWARF emitters might consider it otherwise.
12756 result->set_is_declaration_only(false);
12757 access_specifier access = public_access;
12758
12759 die_access_specifier(&child, access);
12760
12761 var_decl_sptr dm(new var_decl(n, t, loc, m));
12762 // If dm is an anonymous data member, let's make sure
12763 // the current union doesn't already have it as a data
12764 // member.
12765 if (n.empty() && result->find_data_member(dm))
12766 continue;
12767
12768 result->add_data_member(dm, access, /*is_laid_out=*/true,
12769 /*is_static=*/false,
12770 offset_in_bits);
12771 ABG_ASSERT(has_scope(dm));
12772 ctxt.associate_die_to_decl(&child, dm, where_offset,
12773 /*associate_by_repr=*/false);
12774 }
12775 // Handle member functions;
12776 else if (tag == DW_TAG_subprogram)
12777 {
12778 decl_base_sptr r =
12779 is_decl(build_ir_node_from_die(ctxt, &child,
12780 result.get(),
12781 called_from_public_decl,
12782 where_offset));
12783 if (!r)
12784 continue;
12785
12786 function_decl_sptr f = dynamic_pointer_cast<function_decl>(r);
12787 ABG_ASSERT(f);
12788
12789 finish_member_function_reading(&child, f, result, ctxt);
12790
12791 ctxt.associate_die_to_decl(&child, f, where_offset,
12792 /*associate_by_repr=*/false);
12793 }
12794 // Handle member types
12795 else if (die_is_type(&child))
12796 decl_base_sptr td =
12797 is_decl(build_ir_node_from_die(ctxt, &child, result.get(),
12798 called_from_public_decl,
12799 where_offset));
12800 } while (dwarf_siblingof(&child, &child) == 0);
12801 }
12802
12803 ctxt.scope_stack().pop();
12804
12805 {
12806 die_class_or_union_map_type::const_iterator i =
12807 ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
12808 if (i != ctxt.die_wip_classes_map(source).end())
12809 {
12810 if (is_member_type(i->second))
12811 set_member_access_specifier(result,
12812 get_member_access_specifier(i->second));
12813 ctxt.die_wip_classes_map(source).erase(i);
12814 }
12815 }
12816
12817 return result;
12818 }
12819
12820 /// build a qualified type from a DW_TAG_const_type,
12821 /// DW_TAG_volatile_type or DW_TAG_restrict_type DIE.
12822 ///
12823 /// @param ctxt the read context to consider.
12824 ///
12825 /// @param die the input DIE to read from.
12826 ///
12827 /// @param called_from_public_decl true if this function was called
12828 /// from a context where either a public function or a public variable
12829 /// is being built.
12830 ///
12831 /// @param where_offset the offset of the DIE where we are "logically"
12832 /// positionned at, in the DIE tree. This is useful when @p die is
12833 /// e.g, DW_TAG_partial_unit that can be included in several places in
12834 /// the DIE tree.
12835 ///
12836 /// @return the resulting qualified_type_def.
12837 static type_base_sptr
build_qualified_type(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)12838 build_qualified_type(read_context& ctxt,
12839 Dwarf_Die* die,
12840 bool called_from_public_decl,
12841 size_t where_offset)
12842 {
12843 type_base_sptr result;
12844 if (!die)
12845 return result;
12846
12847 unsigned tag = dwarf_tag(die);
12848
12849 if (tag != DW_TAG_const_type
12850 && tag != DW_TAG_volatile_type
12851 && tag != DW_TAG_restrict_type)
12852 return result;
12853
12854 Dwarf_Die underlying_type_die;
12855 decl_base_sptr utype_decl;
12856 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
12857 // So, if no DW_AT_type is present, then this means (if we are
12858 // looking at a debug info emitted by GCC) that we are looking
12859 // at a qualified void type.
12860 utype_decl = build_ir_node_for_void_type(ctxt);
12861
12862 if (!utype_decl)
12863 utype_decl = is_decl(build_ir_node_from_die(ctxt, &underlying_type_die,
12864 called_from_public_decl,
12865 where_offset));
12866 if (!utype_decl)
12867 return result;
12868
12869 // The call to build_ir_node_from_die() could have triggered the
12870 // creation of the type for this DIE. In that case, just return it.
12871 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
12872 {
12873 result = t;
12874 ctxt.associate_die_to_type(die, result, where_offset);
12875 return result;
12876 }
12877
12878 type_base_sptr utype = is_type(utype_decl);
12879 ABG_ASSERT(utype);
12880
12881 qualified_type_def::CV qual = qualified_type_def::CV_NONE;
12882 if (tag == DW_TAG_const_type)
12883 qual |= qualified_type_def::CV_CONST;
12884 else if (tag == DW_TAG_volatile_type)
12885 qual |= qualified_type_def::CV_VOLATILE;
12886 else if (tag == DW_TAG_restrict_type)
12887 qual |= qualified_type_def::CV_RESTRICT;
12888 else
12889 ABG_ASSERT_NOT_REACHED;
12890
12891 if (!result)
12892 result.reset(new qualified_type_def(utype, qual, location()));
12893
12894 ctxt.associate_die_to_type(die, result, where_offset);
12895
12896 return result;
12897 }
12898
12899 /// Walk a tree of typedef of qualified arrays and schedule all type
12900 /// nodes for canonicalization.
12901 ///
12902 /// This is to be used after an array tree has been cloned. In that
12903 /// case, the newly cloned type nodes have to be scheduled for
12904 /// canonicalization.
12905 ///
12906 /// This is a subroutine of maybe_strip_qualification.
12907 ///
12908 /// @param t the type node to be scheduled for canonicalization.
12909 ///
12910 /// @param ctxt the contexter of the reader to use.
12911 static void
schedule_array_tree_for_late_canonicalization(const type_base_sptr & t,read_context & ctxt)12912 schedule_array_tree_for_late_canonicalization(const type_base_sptr& t,
12913 read_context &ctxt)
12914 {
12915 if (typedef_decl_sptr type = is_typedef(t))
12916 {
12917 schedule_array_tree_for_late_canonicalization(type->get_underlying_type(),
12918 ctxt);
12919 ctxt.schedule_type_for_late_canonicalization(t);
12920 }
12921 else if (qualified_type_def_sptr type = is_qualified_type(t))
12922 {
12923 schedule_array_tree_for_late_canonicalization(type->get_underlying_type(),
12924 ctxt);
12925 ctxt.schedule_type_for_late_canonicalization(t);
12926 }
12927 else if (array_type_def_sptr type = is_array_type(t))
12928 {
12929 for (vector<array_type_def::subrange_sptr>::const_iterator i =
12930 type->get_subranges().begin();
12931 i != type->get_subranges().end();
12932 ++i)
12933 {
12934 if (!(*i)->get_scope())
12935 add_decl_to_scope(*i, ctxt.cur_transl_unit()->get_global_scope());
12936 ctxt.schedule_type_for_late_canonicalization(*i);
12937
12938 }
12939 schedule_array_tree_for_late_canonicalization(type->get_element_type(),
12940 ctxt);
12941 ctxt.schedule_type_for_late_canonicalization(type);
12942 }
12943 }
12944
12945 /// Strip qualification from a qualified type, when it makes sense.
12946 ///
12947 /// DWARF constructs "const reference". This is redundant because a
12948 /// reference is always const. The issue is these redundant types then
12949 /// leak into the IR and make for bad diagnostics.
12950 ///
12951 /// This function thus strips the const qualifier from the type in
12952 /// that case. It might contain code to strip other cases like this
12953 /// in the future.
12954 ///
12955 /// @param t the type to strip const qualification from.
12956 ///
12957 /// @param ctxt the @ref read_context to use.
12958 ///
12959 /// @return the stripped type or just return @p t.
12960 static decl_base_sptr
maybe_strip_qualification(const qualified_type_def_sptr t,read_context & ctxt)12961 maybe_strip_qualification(const qualified_type_def_sptr t,
12962 read_context &ctxt)
12963 {
12964 if (!t)
12965 return t;
12966
12967 decl_base_sptr result = t;
12968 type_base_sptr u = t->get_underlying_type();
12969
12970 result = strip_useless_const_qualification(t);
12971 if (result.get() != t.get())
12972 return result;
12973
12974 if (is_array_type(u) || is_typedef_of_array(u))
12975 {
12976 array_type_def_sptr array;
12977 scope_decl * scope = 0;
12978 if ((array = is_array_type(u)))
12979 {
12980 scope = array->get_scope();
12981 ABG_ASSERT(scope);
12982 array = is_array_type(clone_array_tree(array));
12983 schedule_array_tree_for_late_canonicalization(array, ctxt);
12984 add_decl_to_scope(array, scope);
12985 t->set_underlying_type(array);
12986 u = t->get_underlying_type();
12987 }
12988 else if (is_typedef_of_array(u))
12989 {
12990 scope = is_decl(u)->get_scope();
12991 ABG_ASSERT(scope);
12992 typedef_decl_sptr typdef =
12993 is_typedef(clone_array_tree(is_typedef(u)));
12994 schedule_array_tree_for_late_canonicalization(typdef, ctxt);
12995 ABG_ASSERT(typdef);
12996 add_decl_to_scope(typdef, scope);
12997 t->set_underlying_type(typdef);
12998 u = t->get_underlying_type();
12999 array = is_typedef_of_array(u);
13000 }
13001 else
13002 ABG_ASSERT_NOT_REACHED;
13003
13004 ABG_ASSERT(array);
13005 // We should not be editing types that are already canonicalized.
13006 ABG_ASSERT(!array->get_canonical_type());
13007 type_base_sptr element_type = array->get_element_type();
13008
13009 if (qualified_type_def_sptr qualified = is_qualified_type(element_type))
13010 {
13011 // We should not be editing types that are already canonicalized.
13012 ABG_ASSERT(!qualified->get_canonical_type());
13013 qualified_type_def::CV quals = qualified->get_cv_quals();
13014 quals |= t->get_cv_quals();
13015 qualified->set_cv_quals(quals);
13016 result = is_decl(u);
13017 }
13018 else
13019 {
13020 qualified_type_def_sptr qual_type
13021 (new qualified_type_def(element_type,
13022 t->get_cv_quals(),
13023 t->get_location()));
13024 add_decl_to_scope(qual_type, is_decl(element_type)->get_scope());
13025 array->set_element_type(qual_type);
13026 ctxt.schedule_type_for_late_canonicalization(is_type(qual_type));
13027 result = is_decl(u);
13028 }
13029 }
13030
13031 return result;
13032 }
13033
13034 /// Build a pointer type from a DW_TAG_pointer_type DIE.
13035 ///
13036 /// @param ctxt the read context to consider.
13037 ///
13038 /// @param die the DIE to read information from.
13039 ///
13040 /// @param called_from_public_decl true if this function was called
13041 /// from a context where either a public function or a public variable
13042 /// is being built.
13043 ///
13044 /// @param where_offset the offset of the DIE where we are "logically"
13045 /// positionned at, in the DIE tree. This is useful when @p die is
13046 /// e.g, DW_TAG_partial_unit that can be included in several places in
13047 /// the DIE tree.
13048 ///
13049 /// @return the resulting pointer to pointer_type_def.
13050 static pointer_type_def_sptr
build_pointer_type_def(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)13051 build_pointer_type_def(read_context& ctxt,
13052 Dwarf_Die* die,
13053 bool called_from_public_decl,
13054 size_t where_offset)
13055 {
13056 pointer_type_def_sptr result;
13057
13058 if (!die)
13059 return result;
13060
13061 unsigned tag = dwarf_tag(die);
13062 if (tag != DW_TAG_pointer_type)
13063 return result;
13064
13065 type_or_decl_base_sptr utype_decl;
13066 Dwarf_Die underlying_type_die;
13067 bool has_underlying_type_die = false;
13068 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
13069 // If the DW_AT_type attribute is missing, that means we are
13070 // looking at a pointer to "void".
13071 utype_decl = build_ir_node_for_void_type(ctxt);
13072 else
13073 has_underlying_type_die = true;
13074
13075 if (!utype_decl && has_underlying_type_die)
13076 utype_decl = build_ir_node_from_die(ctxt, &underlying_type_die,
13077 called_from_public_decl,
13078 where_offset);
13079 if (!utype_decl)
13080 return result;
13081
13082 // The call to build_ir_node_from_die() could have triggered the
13083 // creation of the type for this DIE. In that case, just return it.
13084 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
13085 {
13086 result = is_pointer_type(t);
13087 ABG_ASSERT(result);
13088 return result;
13089 }
13090
13091 type_base_sptr utype = is_type(utype_decl);
13092 ABG_ASSERT(utype);
13093
13094 // if the DIE for the pointer type doesn't have a byte_size
13095 // attribute then we assume the size of the pointer is the address
13096 // size of the current translation unit.
13097 uint64_t size = ctxt.cur_transl_unit()->get_address_size();
13098 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
13099 // The size as expressed by DW_AT_byte_size is in byte, so let's
13100 // convert it to bits.
13101 size *= 8;
13102
13103 // And the size of the pointer must be the same as the address size
13104 // of the current translation unit.
13105 ABG_ASSERT((size_t) ctxt.cur_transl_unit()->get_address_size() == size);
13106
13107 result.reset(new pointer_type_def(utype, size, /*alignment=*/0, location()));
13108 ABG_ASSERT(result->get_pointed_to_type());
13109
13110 ctxt.associate_die_to_type(die, result, where_offset);
13111 return result;
13112 }
13113
13114 /// Build a reference type from either a DW_TAG_reference_type or
13115 /// DW_TAG_rvalue_reference_type DIE.
13116 ///
13117 /// @param ctxt the read context to consider.
13118 ///
13119 /// @param die the DIE to read from.
13120 ///
13121 /// @param called_from_public_decl true if this function was called
13122 /// from a context where either a public function or a public variable
13123 /// is being built.
13124 ///
13125 /// @param where_offset the offset of the DIE where we are "logically"
13126 /// positionned at, in the DIE tree. This is useful when @p die is
13127 /// e.g, DW_TAG_partial_unit that can be included in several places in
13128 /// the DIE tree.
13129 ///
13130 /// @return a pointer to the resulting reference_type_def.
13131 static reference_type_def_sptr
build_reference_type(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)13132 build_reference_type(read_context& ctxt,
13133 Dwarf_Die* die,
13134 bool called_from_public_decl,
13135 size_t where_offset)
13136 {
13137 reference_type_def_sptr result;
13138
13139 if (!die)
13140 return result;
13141
13142 unsigned tag = dwarf_tag(die);
13143 if (tag != DW_TAG_reference_type
13144 && tag != DW_TAG_rvalue_reference_type)
13145 return result;
13146
13147 Dwarf_Die underlying_type_die;
13148 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
13149 return result;
13150
13151 type_or_decl_base_sptr utype_decl =
13152 build_ir_node_from_die(ctxt, &underlying_type_die,
13153 called_from_public_decl,
13154 where_offset);
13155 if (!utype_decl)
13156 return result;
13157
13158 // The call to build_ir_node_from_die() could have triggered the
13159 // creation of the type for this DIE. In that case, just return it.
13160 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
13161 {
13162 result = is_reference_type(t);
13163 ABG_ASSERT(result);
13164 return result;
13165 }
13166
13167 type_base_sptr utype = is_type(utype_decl);
13168 ABG_ASSERT(utype);
13169
13170 // if the DIE for the reference type doesn't have a byte_size
13171 // attribute then we assume the size of the reference is the address
13172 // size of the current translation unit.
13173 uint64_t size = ctxt.cur_transl_unit()->get_address_size();
13174 if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
13175 size *= 8;
13176
13177 // And the size of the pointer must be the same as the address size
13178 // of the current translation unit.
13179 ABG_ASSERT((size_t) ctxt.cur_transl_unit()->get_address_size() == size);
13180
13181 bool is_lvalue = tag == DW_TAG_reference_type;
13182
13183 result.reset(new reference_type_def(utype, is_lvalue, size,
13184 /*alignment=*/0,
13185 location()));
13186 if (corpus_sptr corp = ctxt.current_corpus())
13187 if (reference_type_def_sptr t = lookup_reference_type(*result, *corp))
13188 result = t;
13189 ctxt.associate_die_to_type(die, result, where_offset);
13190 return result;
13191 }
13192
13193 /// Build a subroutine type from a DW_TAG_subroutine_type DIE.
13194 ///
13195 /// @param ctxt the read context to consider.
13196 ///
13197 /// @param die the DIE to read from.
13198 ///
13199 /// @param is_method points to a class or union declaration iff we're
13200 /// building the type for a method. This is the enclosing class or
13201 /// union of the method.
13202 ///
13203 /// @param where_offset the offset of the DIE where we are "logically"
13204 /// positioned at, in the DIE tree. This is useful when @p die is
13205 /// e.g, DW_TAG_partial_unit that can be included in several places in
13206 /// the DIE tree.
13207 ///
13208 /// @return a pointer to the resulting function_type_sptr.
13209 static function_type_sptr
build_function_type(read_context & ctxt,Dwarf_Die * die,class_or_union_sptr is_method,size_t where_offset)13210 build_function_type(read_context& ctxt,
13211 Dwarf_Die* die,
13212 class_or_union_sptr is_method,
13213 size_t where_offset)
13214 {
13215 function_type_sptr result;
13216
13217 if (!die)
13218 return result;
13219
13220 ABG_ASSERT(dwarf_tag(die) == DW_TAG_subroutine_type
13221 || dwarf_tag(die) == DW_TAG_subprogram);
13222
13223 const die_source source = ctxt.get_die_source(die);
13224
13225 decl_base_sptr type_decl;
13226
13227 translation_unit_sptr tu = ctxt.cur_transl_unit();
13228 ABG_ASSERT(tu);
13229
13230 /// If, inside the current translation unit, we've already seen a
13231 /// function type with the same text representation, then reuse that
13232 /// one instead.
13233 if (type_base_sptr t = ctxt.lookup_fn_type_from_die_repr_per_tu(die))
13234 {
13235 result = is_function_type(t);
13236 ABG_ASSERT(result);
13237 ctxt.associate_die_to_type(die, result, where_offset);
13238 return result;
13239 }
13240
13241 bool odr_is_relevant = ctxt.odr_is_relevant(die);
13242 if (odr_is_relevant)
13243 {
13244 // So we can rely on the One Definition Rule to say that if
13245 // several different function types have the same name (or
13246 // rather, representation) across the entire binary, then they
13247 // ought to designate the same function type. So let's ensure
13248 // that if we've already seen a function type with the same
13249 // representation as the function type 'die', then it's the same
13250 // type as the one denoted by 'die'.
13251 if (function_type_sptr fn_type =
13252 is_function_type(ctxt.lookup_type_artifact_from_die(die)))
13253 {
13254 ctxt.associate_die_to_type(die, fn_type, where_offset);
13255 return fn_type;
13256 }
13257 }
13258
13259 // Let's look at the DIE to detect if it's the DIE for a method
13260 // (type). If it is, we can deduce the name of its enclosing class
13261 // and if it's a static or const.
13262 bool is_const = false;
13263 bool is_static = false;
13264 Dwarf_Die object_pointer_die;
13265 Dwarf_Die class_type_die;
13266 bool has_this_parm_die =
13267 die_function_type_is_method_type(ctxt, die, where_offset,
13268 object_pointer_die,
13269 class_type_die,
13270 is_static);
13271 if (has_this_parm_die)
13272 {
13273 // The function (type) has a "this" parameter DIE. It means it's
13274 // a member function DIE.
13275 if (!is_static)
13276 if (die_object_pointer_is_for_const_method(&object_pointer_die))
13277 is_const = true;
13278
13279 if (!is_method)
13280 {
13281 // We were initially called as if the function represented
13282 // by DIE was *NOT* a member function. But now we know it's
13283 // a member function. Let's take that into account.
13284 class_or_union_sptr klass_type =
13285 is_class_or_union_type(build_ir_node_from_die(ctxt, &class_type_die,
13286 /*called_from_pub_decl=*/true,
13287 where_offset));
13288 ABG_ASSERT(klass_type);
13289 is_method = klass_type;
13290 }
13291 }
13292
13293 // Let's create the type early and record it as being for the DIE
13294 // 'die'. This way, when building the sub-type triggers the
13295 // creation of a type matching the same 'die', then we'll reuse this
13296 // one.
13297
13298 result.reset(is_method
13299 ? new method_type(is_method, is_const,
13300 tu->get_address_size(),
13301 /*alignment=*/0)
13302 : new function_type(ctxt.env(), tu->get_address_size(),
13303 /*alignment=*/0));
13304 ctxt.associate_die_to_type(die, result, where_offset);
13305 ctxt.die_wip_function_types_map(source)[dwarf_dieoffset(die)] = result;
13306
13307 type_base_sptr return_type;
13308 Dwarf_Die ret_type_die;
13309 if (die_die_attribute(die, DW_AT_type, ret_type_die))
13310 return_type =
13311 is_type(build_ir_node_from_die(ctxt, &ret_type_die,
13312 /*called_from_public_decl=*/true,
13313 where_offset));
13314 if (!return_type)
13315 return_type = is_type(build_ir_node_for_void_type(ctxt));
13316 result->set_return_type(return_type);
13317
13318 Dwarf_Die child;
13319 function_decl::parameters function_parms;
13320
13321 if (dwarf_child(die, &child) == 0)
13322 do
13323 {
13324 int child_tag = dwarf_tag(&child);
13325 if (child_tag == DW_TAG_formal_parameter)
13326 {
13327 // This is a "normal" function parameter.
13328 string name, linkage_name;
13329 location loc;
13330 die_loc_and_name(ctxt, &child, loc, name, linkage_name);
13331 if (!tools_utils::string_is_ascii_identifier(name))
13332 // Sometimes, bogus compiler emit names that are
13333 // non-ascii garbage. Let's just ditch that for now.
13334 name.clear();
13335 bool is_artificial = die_is_artificial(&child);
13336 type_base_sptr parm_type;
13337 Dwarf_Die parm_type_die;
13338 if (die_die_attribute(&child, DW_AT_type, parm_type_die))
13339 parm_type =
13340 is_type(build_ir_node_from_die(ctxt, &parm_type_die,
13341 /*called_from_public_decl=*/true,
13342 where_offset));
13343 if (!parm_type)
13344 continue;
13345 function_decl::parameter_sptr p
13346 (new function_decl::parameter(parm_type, name, loc,
13347 /*variadic_marker=*/false,
13348 is_artificial));
13349 function_parms.push_back(p);
13350 }
13351 else if (child_tag == DW_TAG_unspecified_parameters)
13352 {
13353 // This is a variadic function parameter.
13354 bool is_artificial = die_is_artificial(&child);
13355 ir::environment* env = ctxt.env();
13356 ABG_ASSERT(env);
13357 type_base_sptr parm_type =
13358 is_type(build_ir_node_for_variadic_parameter_type(ctxt));
13359 function_decl::parameter_sptr p
13360 (new function_decl::parameter(parm_type,
13361 /*name=*/"",
13362 location(),
13363 /*variadic_marker=*/true,
13364 is_artificial));
13365 function_parms.push_back(p);
13366 // After a DW_TAG_unspecified_parameters tag, we shouldn't
13367 // keep reading for parameters. The
13368 // unspecified_parameters TAG should be the last parameter
13369 // that we record. For instance, if there are multiple
13370 // DW_TAG_unspecified_parameters DIEs then we should care
13371 // only for the first one.
13372 break;
13373 }
13374 }
13375 while (dwarf_siblingof(&child, &child) == 0);
13376
13377 result->set_parameters(function_parms);
13378
13379 tu->bind_function_type_life_time(result);
13380
13381 result->set_is_artificial(true);
13382
13383 ctxt.associate_die_repr_to_fn_type_per_tu(die, result);
13384
13385 {
13386 die_function_type_map_type::const_iterator i =
13387 ctxt.die_wip_function_types_map(source).
13388 find(dwarf_dieoffset(die));
13389 if (i != ctxt.die_wip_function_types_map(source).end())
13390 ctxt.die_wip_function_types_map(source).erase(i);
13391 }
13392
13393 maybe_canonicalize_type(result, ctxt);
13394 return result;
13395 }
13396
13397 /// Build a subrange type from a DW_TAG_subrange_type.
13398 ///
13399 /// @param ctxt the read context to consider.
13400 ///
13401 /// @param die the DIE to read from.
13402 ///
13403 /// @param where_offset the offset of the DIE where we are "logically"
13404 /// positionned at in the DIE tree. This is useful when @p die is
13405 /// e,g, DW_TAG_partial_unit that can be included in several places in
13406 /// the DIE tree.
13407 ///
13408 /// @param associate_die_to_type if this is true then the resulting
13409 /// type is associated to the @p die, so that next time when the
13410 /// system looks up the type associated to it, the current resulting
13411 /// type is returned. If false, then no association is done and the
13412 /// resulting type can be destroyed right after. This can be useful
13413 /// when the sole purpose of building the @ref
13414 /// array_type_def::subrange_type is to use some of its method like,
13415 /// e.g, its name pretty printing methods.
13416 ///
13417 /// @return the newly built instance of @ref
13418 /// array_type_def::subrange_type, or nil if no type could be built.
13419 static array_type_def::subrange_sptr
build_subrange_type(read_context & ctxt,const Dwarf_Die * die,size_t where_offset,bool associate_type_to_die)13420 build_subrange_type(read_context& ctxt,
13421 const Dwarf_Die* die,
13422 size_t where_offset,
13423 bool associate_type_to_die)
13424 {
13425 array_type_def::subrange_sptr result;
13426
13427 if (!die)
13428 return result;
13429
13430 unsigned tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
13431 if (tag != DW_TAG_subrange_type)
13432 return result;
13433
13434 string name = die_name(die);
13435
13436 // load the underlying type.
13437 Dwarf_Die underlying_type_die;
13438 type_base_sptr underlying_type;
13439 /* Unless there is an underlying type which says differently. */
13440 bool is_signed = false;
13441 if (die_die_attribute(die, DW_AT_type, underlying_type_die))
13442 underlying_type =
13443 is_type(build_ir_node_from_die(ctxt,
13444 &underlying_type_die,
13445 /*called_from_public_decl=*/true,
13446 where_offset));
13447
13448 if (underlying_type)
13449 {
13450 uint64_t ate;
13451 if (die_unsigned_constant_attribute (&underlying_type_die,
13452 DW_AT_encoding,
13453 ate))
13454 is_signed = (ate == DW_ATE_signed || ate == DW_ATE_signed_char);
13455 }
13456
13457 translation_unit::language language = ctxt.cur_transl_unit()->get_language();
13458 array_type_def::subrange_type::bound_value lower_bound =
13459 get_default_array_lower_bound(language);
13460 array_type_def::subrange_type::bound_value upper_bound;
13461 uint64_t count = 0;
13462 bool is_infinite = false;
13463
13464 // The DWARF 4 specifications says, in [5.11 Subrange
13465 // Type Entries]:
13466 //
13467 // The subrange entry may have the attributes
13468 // DW_AT_lower_bound and DW_AT_upper_bound to
13469 // specify, respectively, the lower and upper bound
13470 // values of the subrange.
13471 //
13472 // So let's look for DW_AT_lower_bound first.
13473 die_constant_attribute(die, DW_AT_lower_bound, is_signed, lower_bound);
13474
13475 // Then, DW_AT_upper_bound.
13476 if (!die_constant_attribute(die, DW_AT_upper_bound, is_signed, upper_bound))
13477 {
13478 // The DWARF 4 spec says, in [5.11 Subrange Type
13479 // Entries]:
13480 //
13481 // The DW_AT_upper_bound attribute may be replaced
13482 // by a DW_AT_count attribute, whose value
13483 // describes the number of elements in the
13484 // subrange rather than the value of the last
13485 // element."
13486 //
13487 // So, as DW_AT_upper_bound is not present in this
13488 // case, let's see if there is a DW_AT_count.
13489 die_unsigned_constant_attribute(die, DW_AT_count, count);
13490
13491 // We can deduce the upper_bound from the
13492 // lower_bound and the number of elements of the
13493 // array:
13494 if (int64_t u = lower_bound.get_signed_value() + count)
13495 upper_bound = u - 1;
13496
13497 if (upper_bound.get_unsigned_value() == 0 && count == 0)
13498 // No upper_bound nor count was present on the DIE, this means
13499 // the array is considered to have an infinite (or rather not
13500 // known) size.
13501 is_infinite = true;
13502 }
13503
13504 if (UINT64_MAX == upper_bound.get_unsigned_value())
13505 {
13506 // If the upper_bound size is the max of the integer value, then
13507 // it most certainly means infinite size.
13508 is_infinite = true;
13509 upper_bound.set_unsigned(0);
13510 }
13511
13512 result.reset
13513 (new array_type_def::subrange_type(ctxt.env(),
13514 name,
13515 lower_bound,
13516 upper_bound,
13517 location()));
13518 result->is_infinite(is_infinite);
13519
13520 if (underlying_type)
13521 result->set_underlying_type(underlying_type);
13522
13523 ABG_ASSERT(result->is_infinite()
13524 || (result->get_length() ==
13525 (uint64_t) (result->get_upper_bound()
13526 - result->get_lower_bound() + 1)));
13527
13528 if (associate_type_to_die)
13529 ctxt.associate_die_to_type(die, result, where_offset);
13530
13531 return result;
13532 }
13533
13534 /// Build the sub-ranges of an array type.
13535 ///
13536 /// This is a sub-routine of build_array_type().
13537 ///
13538 /// @param ctxt the context to read from.
13539 ///
13540 /// @param die the DIE of tag DW_TAG_array_type which contains
13541 /// children DIEs that represent the sub-ranges.
13542 ///
13543 /// @param subranges out parameter. This is set to the sub-ranges
13544 /// that are built from @p die.
13545 ///
13546 /// @param where_offset the offset of the DIE where we are "logically"
13547 /// positioned at, in the DIE tree. This is useful when @p die is
13548 /// e.g, DW_TAG_partial_unit that can be included in several places in
13549 /// the DIE tree.
13550 static void
build_subranges_from_array_type_die(read_context & ctxt,const Dwarf_Die * die,array_type_def::subranges_type & subranges,size_t where_offset,bool associate_type_to_die)13551 build_subranges_from_array_type_die(read_context& ctxt,
13552 const Dwarf_Die* die,
13553 array_type_def::subranges_type& subranges,
13554 size_t where_offset,
13555 bool associate_type_to_die)
13556 {
13557 Dwarf_Die child;
13558
13559 if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
13560 {
13561 do
13562 {
13563 int child_tag = dwarf_tag(&child);
13564 if (child_tag == DW_TAG_subrange_type)
13565 {
13566 array_type_def::subrange_sptr s;
13567 if (associate_type_to_die)
13568 {
13569 // We are being called to create the type, add it to
13570 // the current type graph and associate it to the
13571 // DIE it's been created from.
13572 type_or_decl_base_sptr t =
13573 build_ir_node_from_die(ctxt, &child,
13574 /*called_from_public_decl=*/true,
13575 where_offset);
13576 s = is_subrange_type(t);
13577 }
13578 else
13579 // We are being called to create the type but *NOT*
13580 // add it to the current tyupe tree, *NOR* associate
13581 // it to the DIE it's been created from.
13582 s = build_subrange_type(ctxt, &child,
13583 where_offset,
13584 /*associate_type_to_die=*/false);
13585 if (s)
13586 subranges.push_back(s);
13587 }
13588 }
13589 while (dwarf_siblingof(&child, &child) == 0);
13590 }
13591 }
13592
13593 /// Build an array type from a DW_TAG_array_type DIE.
13594 ///
13595 /// @param ctxt the read context to consider.
13596 ///
13597 /// @param die the DIE to read from.
13598 ///
13599 /// @param called_from_public_decl true if this function was called
13600 /// from a context where either a public function or a public variable
13601 /// is being built.
13602 ///
13603 /// @param where_offset the offset of the DIE where we are "logically"
13604 /// positioned at, in the DIE tree. This is useful when @p die is
13605 /// e.g, DW_TAG_partial_unit that can be included in several places in
13606 /// the DIE tree.
13607 ///
13608 /// @return a pointer to the resulting array_type_def.
13609 static array_type_def_sptr
build_array_type(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)13610 build_array_type(read_context& ctxt,
13611 Dwarf_Die* die,
13612 bool called_from_public_decl,
13613 size_t where_offset)
13614 {
13615 array_type_def_sptr result;
13616
13617 if (!die)
13618 return result;
13619
13620 unsigned tag = dwarf_tag(die);
13621 if (tag != DW_TAG_array_type)
13622 return result;
13623
13624 decl_base_sptr type_decl;
13625 Dwarf_Die type_die;
13626
13627 if (die_die_attribute(die, DW_AT_type, type_die))
13628 type_decl = is_decl(build_ir_node_from_die(ctxt, &type_die,
13629 called_from_public_decl,
13630 where_offset));
13631 if (!type_decl)
13632 return result;
13633
13634 // The call to build_ir_node_from_die() could have triggered the
13635 // creation of the type for this DIE. In that case, just return it.
13636 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
13637 {
13638 result = is_array_type(t);
13639 ABG_ASSERT(result);
13640 return result;
13641 }
13642
13643 type_base_sptr type = is_type(type_decl);
13644 ABG_ASSERT(type);
13645
13646 array_type_def::subranges_type subranges;
13647
13648 build_subranges_from_array_type_die(ctxt, die, subranges, where_offset);
13649
13650 result.reset(new array_type_def(type, subranges, location()));
13651
13652 return result;
13653 }
13654
13655 /// Create a typedef_decl from a DW_TAG_typedef DIE.
13656 ///
13657 /// @param ctxt the read context to consider.
13658 ///
13659 /// @param die the DIE to read from.
13660 ///
13661 /// @param called_from_public_decl true if this function was called
13662 /// from a context where either a public function or a public variable
13663 /// is being built.
13664 ///
13665 /// @param where_offset the offset of the DIE where we are "logically"
13666 /// positionned at, in the DIE tree. This is useful when @p die is
13667 /// e.g, DW_TAG_partial_unit that can be included in several places in
13668 /// the DIE tree.
13669 ///
13670 /// @return the newly created typedef_decl.
13671 static typedef_decl_sptr
build_typedef_type(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)13672 build_typedef_type(read_context& ctxt,
13673 Dwarf_Die* die,
13674 bool called_from_public_decl,
13675 size_t where_offset)
13676 {
13677 typedef_decl_sptr result;
13678
13679 if (!die)
13680 return result;
13681
13682 unsigned tag = dwarf_tag(die);
13683 if (tag != DW_TAG_typedef)
13684 return result;
13685
13686 string name, linkage_name;
13687 location loc;
13688 die_loc_and_name(ctxt, die, loc, name, linkage_name);
13689
13690 if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
13691 if (loc)
13692 result = lookup_typedef_type_per_location(loc.expand(), *corp);
13693
13694 if (!ctxt.odr_is_relevant(die))
13695 if (typedef_decl_sptr t = is_typedef(ctxt.lookup_artifact_from_die(die)))
13696 result = t;
13697
13698 if (!result)
13699 {
13700 type_base_sptr utype;
13701 Dwarf_Die underlying_type_die;
13702 if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
13703 // A typedef DIE with no underlying type means a typedef to
13704 // void type.
13705 utype = ctxt.env()->get_void_type();
13706
13707 if (!utype)
13708 utype =
13709 is_type(build_ir_node_from_die(ctxt,
13710 &underlying_type_die,
13711 called_from_public_decl,
13712 where_offset));
13713 if (!utype)
13714 return result;
13715
13716 // The call to build_ir_node_from_die() could have triggered the
13717 // creation of the type for this DIE. In that case, just return
13718 // it.
13719 if (type_base_sptr t = ctxt.lookup_type_from_die(die))
13720 {
13721 result = is_typedef(t);
13722 ABG_ASSERT(result);
13723 return result;
13724 }
13725
13726 ABG_ASSERT(utype);
13727 result.reset(new typedef_decl(name, utype, loc, linkage_name));
13728
13729 if ((is_class_or_union_type(utype) || is_enum_type(utype))
13730 && is_anonymous_type(utype))
13731 {
13732 // This is a naming typedef for an enum or a class. Let's
13733 // mark the underlying decl as such.
13734 decl_base_sptr decl = is_decl(utype);
13735 ABG_ASSERT(decl);
13736 decl->set_naming_typedef(result);
13737 }
13738 }
13739
13740 ctxt.associate_die_to_type(die, result, where_offset);
13741
13742 return result;
13743 }
13744
13745 /// Build a @ref var_decl out of a DW_TAG_variable DIE if the variable
13746 /// denoted by the DIE is not suppressed by a suppression
13747 /// specification associated to the current read context.
13748 ///
13749 /// Note that if a member variable declaration with the same name as
13750 /// the name of the DIE we are looking at exists, this function returns
13751 /// that existing variable declaration.
13752 ///
13753 /// @param ctxt the read context to use.
13754 ///
13755 /// @param die the DIE representing the variable we are looking at.
13756 ///
13757 /// @param where_offset the offset of the DIE where we are "logically"
13758 /// positionned at, in the DIE tree. This is useful when @p die is
13759 /// e.g, DW_TAG_partial_unit that can be included in several places in
13760 /// the DIE tree.
13761 ///
13762 /// @param result if this is set to an existing var_decl, this means
13763 /// that the function will append the new properties it sees on @p die
13764 /// to that exising var_decl. Otherwise, if this parameter is NULL, a
13765 /// new var_decl is going to be allocated and returned.
13766 ///
13767 /// @param is_required_decl_spec this is true iff the variable to
13768 /// build is referred to as being the specification of another
13769 /// variable.
13770 ///
13771 /// @return a pointer to the newly created var_decl. If the var_decl
13772 /// could not be built, this function returns NULL.
13773 static var_decl_sptr
build_or_get_var_decl_if_not_suppressed(read_context & ctxt,scope_decl * scope,Dwarf_Die * die,size_t where_offset,var_decl_sptr result,bool is_required_decl_spec)13774 build_or_get_var_decl_if_not_suppressed(read_context& ctxt,
13775 scope_decl *scope,
13776 Dwarf_Die *die,
13777 size_t where_offset,
13778 var_decl_sptr result,
13779 bool is_required_decl_spec)
13780 {
13781 var_decl_sptr var;
13782 if (variable_is_suppressed(ctxt, scope, die, is_required_decl_spec))
13783 return var;
13784
13785 if (class_decl* class_type = is_class_type(scope))
13786 {
13787 string var_name = die_name(die);
13788 if (!var_name.empty())
13789 if ((var = class_type->find_data_member(var_name)))
13790 return var;
13791 }
13792 var = build_var_decl(ctxt, die, where_offset, result);
13793 return var;
13794 }
13795
13796 /// Build a @ref var_decl out of a DW_TAG_variable DIE.
13797 ///
13798 /// @param ctxt the read context to use.
13799 ///
13800 /// @param die the DIE representing the variable we are looking at.
13801 ///
13802 /// @param where_offset the offset of the DIE where we are "logically"
13803 /// positionned at, in the DIE tree. This is useful when @p die is
13804 /// e.g, DW_TAG_partial_unit that can be included in several places in
13805 /// the DIE tree.
13806 ///
13807 /// @param result if this is set to an existing var_decl, this means
13808 /// that the function will append the new properties it sees on @p die
13809 /// to that exising var_decl. Otherwise, if this parameter is NULL, a
13810 /// new var_decl is going to be allocated and returned.
13811 ///
13812 /// @return a pointer to the newly created var_decl. If the var_decl
13813 /// could not be built, this function returns NULL.
13814 static var_decl_sptr
build_var_decl(read_context & ctxt,Dwarf_Die * die,size_t where_offset,var_decl_sptr result)13815 build_var_decl(read_context& ctxt,
13816 Dwarf_Die *die,
13817 size_t where_offset,
13818 var_decl_sptr result)
13819 {
13820 if (!die)
13821 return result;
13822
13823 int tag = dwarf_tag(die);
13824 ABG_ASSERT(tag == DW_TAG_variable || tag == DW_TAG_member);
13825
13826 if (!die_is_public_decl(die))
13827 return result;
13828
13829 type_base_sptr type;
13830 Dwarf_Die type_die;
13831 if (die_die_attribute(die, DW_AT_type, type_die))
13832 {
13833 decl_base_sptr ty =
13834 is_decl(build_ir_node_from_die(ctxt, &type_die,
13835 /*called_from_public_decl=*/true,
13836 where_offset));
13837 if (!ty)
13838 return result;
13839 type = is_type(ty);
13840 ABG_ASSERT(type);
13841 }
13842
13843 if (!type)
13844 return result;
13845
13846 string name, linkage_name;
13847 location loc;
13848 die_loc_and_name(ctxt, die, loc, name, linkage_name);
13849
13850 if (!result)
13851 result.reset(new var_decl(name, type, loc, linkage_name));
13852 else
13853 {
13854 // We were called to append properties that might have been
13855 // missing from the first version of the variable. And usually
13856 // that missing property is the mangled name.
13857 if (!linkage_name.empty())
13858 result->set_linkage_name(linkage_name);
13859 }
13860
13861 // Check if a variable symbol with this name is exported by the elf
13862 // binary. If it is, then set the symbol of the variable, if it's
13863 // not set already.
13864 if (!result->get_symbol())
13865 {
13866 elf_symbol_sptr var_sym;
13867 Dwarf_Addr var_addr;
13868 if (ctxt.get_variable_address(die, var_addr))
13869 {
13870 ctxt.symtab()->update_main_symbol(var_addr,
13871 result->get_linkage_name().empty()
13872 ? result->get_name()
13873 : result->get_linkage_name());
13874 var_sym = ctxt.variable_symbol_is_exported(var_addr);
13875 }
13876
13877 if (var_sym)
13878 {
13879 result->set_symbol(var_sym);
13880 // If the linkage name is not set or is wrong, set it to
13881 // the name of the underlying symbol.
13882 string linkage_name = result->get_linkage_name();
13883 if (linkage_name.empty()
13884 || !var_sym->get_alias_from_name(linkage_name))
13885 result->set_linkage_name(var_sym->get_name());
13886 result->set_is_in_public_symbol_table(true);
13887 }
13888 }
13889
13890 return result;
13891 }
13892
13893 /// Test if a given function denoted by its DIE and its scope is
13894 /// suppressed by any of the suppression specifications associated to
13895 /// a given context of ELF/DWARF reading.
13896 ///
13897 /// Note that a non-member function which symbol is not exported is
13898 /// also suppressed.
13899 ///
13900 /// @param ctxt the ELF/DWARF reading content of interest.
13901 ///
13902 /// @param scope of the scope of the function.
13903 ///
13904 /// @param function_die the DIE representing the function.
13905 ///
13906 /// @param is_declaration_only is true if the DIE denoted by @p die is
13907 /// a declaration-only DIE.
13908 ///
13909 /// @return true iff @p function_die is suppressed by at least one
13910 /// suppression specification attached to the @p ctxt.
13911 static bool
function_is_suppressed(const read_context & ctxt,const scope_decl * scope,Dwarf_Die * function_die,bool is_declaration_only)13912 function_is_suppressed(const read_context& ctxt,
13913 const scope_decl* scope,
13914 Dwarf_Die *function_die,
13915 bool is_declaration_only)
13916 {
13917 if (function_die == 0
13918 || dwarf_tag(function_die) != DW_TAG_subprogram)
13919 return false;
13920
13921 string fname = die_string_attribute(function_die, DW_AT_name);
13922 string flinkage_name = die_linkage_name(function_die);
13923 if (flinkage_name.empty() && ctxt.die_is_in_c(function_die))
13924 flinkage_name = fname;
13925 string qualified_name = build_qualified_name(scope, fname);
13926
13927 // A non-member non-static function which symbol is not exported is
13928 // suppressed.
13929 //
13930 // Note that if the non-member non-static function has an undefined
13931 // symbol, by default, it's not suppressed. Unless we are asked to
13932 // drop undefined symbols too.
13933 if (!is_class_type(scope)
13934 && (!is_declaration_only || ctxt.drop_undefined_syms()))
13935 {
13936 Dwarf_Addr fn_addr;
13937 if (!ctxt.get_function_address(function_die, fn_addr))
13938 return true;
13939
13940 elf_symbol_sptr symbol = ctxt.function_symbol_is_exported(fn_addr);
13941 if (!symbol)
13942 return true;
13943 if (!symbol->is_suppressed())
13944 return false;
13945
13946 // Since there is only one symbol in DWARF associated with an elf_symbol,
13947 // we can assume this is the main symbol then. Otherwise the main hinting
13948 // did not work as expected.
13949 ABG_ASSERT(symbol->is_main_symbol());
13950 if (symbol->has_aliases())
13951 for (elf_symbol_sptr a = symbol->get_next_alias();
13952 !a->is_main_symbol(); a = a->get_next_alias())
13953 if (!a->is_suppressed())
13954 return false;
13955 }
13956
13957 return suppr::function_is_suppressed(ctxt, qualified_name,
13958 flinkage_name,
13959 /*require_drop_property=*/true);
13960 }
13961
13962 /// Build a @ref function_decl out of a DW_TAG_subprogram DIE if the
13963 /// function denoted by the DIE is not suppressed by a suppression
13964 /// specification associated to the current read context.
13965 ///
13966 /// Note that if a member function declaration with the same signature
13967 /// (pretty representation) as one of the DIE we are looking at
13968 /// exists, this function returns that existing function declaration.
13969 /// Similarly, if there is already a constructed member function with
13970 /// the same linkage name as the one on the DIE, this function returns
13971 /// that member function.
13972 ///
13973 /// Also note that the function_decl IR returned by this function must
13974 /// be passed to finish_member_function_reading because several
13975 /// properties from the DIE are actually read by that function, and
13976 /// the corresponding properties on the function_decl IR are updated
13977 /// accordingly. This is done to support "updating" a function_decl
13978 /// IR with properties scathered across several DIEs.
13979 ///
13980 /// @param ctxt the read context to use.
13981 ///
13982 /// @param scope the scope of the function we are looking at.
13983 ///
13984 /// @param fn_die the DIE representing the function we are looking at.
13985 ///
13986 /// @param where_offset the offset of the DIE where we are "logically"
13987 /// positionned at, in the DIE tree. This is useful when @p die is
13988 /// e.g, DW_TAG_partial_unit that can be included in several places in
13989 /// the DIE tree.
13990 ///
13991 /// @param is_declaration_only is true if the DIE denoted by @p fn_die
13992 /// is a declaration-only DIE.
13993 ///
13994 /// @param result if this is set to an existing function_decl, this
13995 /// means that the function will append the new properties it sees on
13996 /// @p fn_die to that exising function_decl. Otherwise, if this
13997 /// parameter is NULL, a new function_decl is going to be allocated
13998 /// and returned.
13999 ///
14000 /// @return a pointer to the newly created var_decl. If the var_decl
14001 /// could not be built, this function returns NULL.
14002 static function_decl_sptr
build_or_get_fn_decl_if_not_suppressed(read_context & ctxt,scope_decl * scope,Dwarf_Die * fn_die,size_t where_offset,bool is_declaration_only,function_decl_sptr result)14003 build_or_get_fn_decl_if_not_suppressed(read_context& ctxt,
14004 scope_decl *scope,
14005 Dwarf_Die *fn_die,
14006 size_t where_offset,
14007 bool is_declaration_only,
14008 function_decl_sptr result)
14009 {
14010 function_decl_sptr fn;
14011 if (function_is_suppressed(ctxt, scope, fn_die, is_declaration_only))
14012 return fn;
14013
14014 string name = die_name(fn_die);
14015 string linkage_name = die_linkage_name(fn_die);
14016 bool is_dtor = !name.empty() && name[0]== '~';
14017 bool is_virtual = false;
14018 if (is_dtor)
14019 {
14020 Dwarf_Attribute attr;
14021 if (dwarf_attr_integrate(const_cast<Dwarf_Die*>(fn_die),
14022 DW_AT_vtable_elem_location,
14023 &attr))
14024 is_virtual = true;
14025 }
14026
14027
14028 // If we've already built an IR for a function with the same
14029 // signature (from another DIE), reuse it, unless that function is a
14030 // virtual C++ destructor. Several virtual C++ destructors with the
14031 // same signature can be implemented by several different ELF
14032 // symbols. So re-using C++ destructors like that can lead to us
14033 // missing some destructors.
14034 if (!result && (!(is_dtor && is_virtual)))
14035 if ((fn = is_function_decl(ctxt.lookup_artifact_from_die(fn_die))))
14036 {
14037 fn = maybe_finish_function_decl_reading(ctxt, fn_die, where_offset, fn);
14038 ctxt.associate_die_to_decl(fn_die, fn, /*do_associate_by_repr=*/true);
14039 ctxt.associate_die_to_type(fn_die, fn->get_type(), where_offset);
14040 return fn;
14041 }
14042
14043 // If a member function with the same linkage name as the one
14044 // carried by the DIE already exists, then return it.
14045 if (class_decl* klass = is_class_type(scope))
14046 {
14047 string linkage_name = die_linkage_name(fn_die);
14048 fn = klass->find_member_function_sptr(linkage_name);
14049 }
14050
14051 if (!fn || !fn->get_symbol())
14052 // We haven't yet been able to construct a function IR, or, we
14053 // have one 'partial' function IR that doesn't have any associated
14054 // symbol yet. Note that in the later case, a function IR without
14055 // any associated symbol will be dropped on the floor by
14056 // potential_member_fn_should_be_dropped. So let's build or a new
14057 // function IR or complete the existing partial IR.
14058 fn = build_function_decl(ctxt, fn_die, where_offset, result);
14059
14060 return fn;
14061 }
14062
14063 /// Test if a given variable denoted by its DIE and its scope is
14064 /// suppressed by any of the suppression specifications associated to
14065 /// a given context of ELF/DWARF reading.
14066 ///
14067 /// @param ctxt the ELF/DWARF reading content of interest.
14068 ///
14069 /// @param scope of the scope of the variable.
14070 ///
14071 /// @param variable_die the DIE representing the variable.
14072 ///
14073 /// @param is_required_decl_spec if true, means that the @p
14074 /// variable_die being considered is for a variable decl that is a
14075 /// specification for a concrete variable being built.
14076 ///
14077 /// @return true iff @p variable_die is suppressed by at least one
14078 /// suppression specification attached to the @p ctxt.
14079 static bool
variable_is_suppressed(const read_context & ctxt,const scope_decl * scope,Dwarf_Die * variable_die,bool is_required_decl_spec)14080 variable_is_suppressed(const read_context& ctxt,
14081 const scope_decl* scope,
14082 Dwarf_Die *variable_die,
14083 bool is_required_decl_spec)
14084 {
14085 if (variable_die == 0
14086 || (dwarf_tag(variable_die) != DW_TAG_variable
14087 && dwarf_tag(variable_die) != DW_TAG_member))
14088 return false;
14089
14090 string name = die_string_attribute(variable_die, DW_AT_name);
14091 string linkage_name = die_linkage_name(variable_die);
14092 if (linkage_name.empty() && ctxt.die_is_in_c(variable_die))
14093 linkage_name = name;
14094 string qualified_name = build_qualified_name(scope, name);
14095
14096 // If a non member variable that is a declaration (has no defined
14097 // and exported symbol) and is not the specification of another
14098 // concrete variable, then it's suppressed. This is a size
14099 // optimization; it removes useless declaration-only variables from
14100 // the IR.
14101 if (!is_class_type(scope) && !is_required_decl_spec)
14102 {
14103 Dwarf_Addr var_addr = 0;
14104 if (!ctxt.get_variable_address(variable_die, var_addr))
14105 return true;
14106
14107 elf_symbol_sptr symbol = ctxt.variable_symbol_is_exported(var_addr);
14108 if (!symbol)
14109 return true;
14110 if (!symbol->is_suppressed())
14111 return false;
14112
14113 // Since there is only one symbol in DWARF associated with an elf_symbol,
14114 // we can assume this is the main symbol then. Otherwise the main hinting
14115 // did not work as expected.
14116 ABG_ASSERT(symbol->is_main_symbol());
14117 if (symbol->has_aliases())
14118 for (elf_symbol_sptr a = symbol->get_next_alias();
14119 !a->is_main_symbol(); a = a->get_next_alias())
14120 if (!a->is_suppressed())
14121 return false;
14122 }
14123
14124 return suppr::variable_is_suppressed(ctxt, qualified_name,
14125 linkage_name,
14126 /*require_drop_property=*/true);
14127 }
14128
14129 /// Test if a type (designated by a given DIE) in a given scope is
14130 /// suppressed by the suppression specifications that are associated
14131 /// to a given read context.
14132 ///
14133 /// @param ctxt the read context to consider.
14134 ///
14135 /// @param scope of the scope of the type DIE to consider.
14136 ///
14137 /// @param type_die the DIE that designates the type to consider.
14138 ///
14139 /// @param type_is_private out parameter. If this function returns
14140 /// true (the type @p type_die is suppressed) and if the type was
14141 /// suppressed because it's private then this parameter is set to
14142 /// true.
14143 ///
14144 /// @return true iff the type designated by the DIE @p type_die, in
14145 /// the scope @p scope is suppressed by at the suppression
14146 /// specifications associated to the current read context.
14147 static bool
type_is_suppressed(const read_context & ctxt,const scope_decl * scope,Dwarf_Die * type_die,bool & type_is_private)14148 type_is_suppressed(const read_context& ctxt,
14149 const scope_decl* scope,
14150 Dwarf_Die *type_die,
14151 bool &type_is_private)
14152 {
14153 if (type_die == 0
14154 || (dwarf_tag(type_die) != DW_TAG_enumeration_type
14155 && dwarf_tag(type_die) != DW_TAG_class_type
14156 && dwarf_tag(type_die) != DW_TAG_structure_type
14157 && dwarf_tag(type_die) != DW_TAG_union_type))
14158 return false;
14159
14160 string type_name, linkage_name;
14161 location type_location;
14162 die_loc_and_name(ctxt, type_die, type_location, type_name, linkage_name);
14163 string qualified_name = build_qualified_name(scope, type_name);
14164
14165 return suppr::type_is_suppressed(ctxt, qualified_name,
14166 type_location,
14167 type_is_private,
14168 /*require_drop_property=*/true);
14169 }
14170
14171 /// Test if a type (designated by a given DIE) in a given scope is
14172 /// suppressed by the suppression specifications that are associated
14173 /// to a given read context.
14174 ///
14175 /// @param ctxt the read context to consider.
14176 ///
14177 /// @param scope of the scope of the type DIE to consider.
14178 ///
14179 /// @param type_die the DIE that designates the type to consider.
14180 ///
14181 /// @return true iff the type designated by the DIE @p type_die, in
14182 /// the scope @p scope is suppressed by at the suppression
14183 /// specifications associated to the current read context.
14184 static bool
type_is_suppressed(const read_context & ctxt,const scope_decl * scope,Dwarf_Die * type_die)14185 type_is_suppressed(const read_context& ctxt,
14186 const scope_decl* scope,
14187 Dwarf_Die *type_die)
14188 {
14189 bool type_is_private = false;
14190 return type_is_suppressed(ctxt, scope, type_die, type_is_private);
14191 }
14192
14193 /// Get the opaque version of a type that was suppressed because it's
14194 /// a private type.
14195 ///
14196 /// The opaque version version of the type is just a declared-only
14197 /// version of the type (class, union or enum type) denoted by @p
14198 /// type_die.
14199 ///
14200 /// @param ctxt the read context in use.
14201 ///
14202 /// @param scope the scope of the type die we are looking at.
14203 ///
14204 /// @param type_die the type DIE we are looking at.
14205 ///
14206 /// @param where_offset the offset of the DIE where we are "logically"
14207 /// positionned at, in the DIE tree. This is useful when @p die is
14208 /// e.g, DW_TAG_partial_unit that can be included in several places in
14209 /// the DIE tree.
14210 ///
14211 /// @return the opaque version of the type denoted by @p type_die or
14212 /// nil if no opaque version was found.
14213 static type_or_decl_base_sptr
get_opaque_version_of_type(read_context & ctxt,scope_decl * scope,Dwarf_Die * type_die,size_t where_offset)14214 get_opaque_version_of_type(read_context &ctxt,
14215 scope_decl *scope,
14216 Dwarf_Die *type_die,
14217 size_t where_offset)
14218 {
14219 type_or_decl_base_sptr result;
14220
14221 if (type_die == 0)
14222 return result;
14223
14224 unsigned tag = dwarf_tag(type_die);
14225 if (tag != DW_TAG_class_type
14226 && tag != DW_TAG_structure_type
14227 && tag != DW_TAG_union_type
14228 && tag != DW_TAG_enumeration_type)
14229 return result;
14230
14231 string type_name, linkage_name;
14232 location type_location;
14233 die_loc_and_name(ctxt, type_die, type_location, type_name, linkage_name);
14234 if (!type_location)
14235 return result;
14236
14237 string qualified_name = build_qualified_name(scope, type_name);
14238
14239 //
14240 // TODO: also handle declaration-only unions. To do that, we mostly
14241 // need to adapt add_or_update_union_type to make it schedule
14242 // declaration-only unions for resolution too.
14243 //
14244 if (tag == DW_TAG_structure_type || tag == DW_TAG_class_type)
14245 {
14246 string_classes_map::const_iterator i =
14247 ctxt.declaration_only_classes().find(qualified_name);
14248 if (i != ctxt.declaration_only_classes().end())
14249 result = i->second.back();
14250
14251 if (!result)
14252 {
14253 // So we didn't find any pre-existing forward-declared-only
14254 // class for the class definition that we could return as an
14255 // opaque type. So let's build one.
14256 //
14257 // TODO: we need to be able to do this for unions too!
14258 class_decl_sptr klass(new class_decl(ctxt.env(), type_name,
14259 /*alignment=*/0, /*size=*/0,
14260 tag == DW_TAG_structure_type,
14261 type_location,
14262 decl_base::VISIBILITY_DEFAULT));
14263 klass->set_is_declaration_only(true);
14264 klass->set_is_artificial(die_is_artificial(type_die));
14265 add_decl_to_scope(klass, scope);
14266 ctxt.associate_die_to_type(type_die, klass, where_offset);
14267 ctxt.maybe_schedule_declaration_only_class_for_resolution(klass);
14268 result = klass;
14269 }
14270 }
14271
14272 if (tag == DW_TAG_enumeration_type)
14273 {
14274 string_enums_map::const_iterator i =
14275 ctxt.declaration_only_enums().find(qualified_name);
14276 if (i != ctxt.declaration_only_enums().end())
14277 result = i->second.back();
14278
14279 if (!result)
14280 {
14281 uint64_t size = 0;
14282 if (die_unsigned_constant_attribute(type_die, DW_AT_byte_size, size))
14283 size *= 8;
14284 type_decl_sptr underlying_type =
14285 build_enum_underlying_type(ctxt, type_name, size,
14286 /*anonymous=*/true);
14287 enum_type_decl::enumerators enumeratorz;
14288 enum_type_decl_sptr enum_type (new enum_type_decl(type_name,
14289 type_location,
14290 underlying_type,
14291 enumeratorz,
14292 linkage_name));
14293 enum_type->set_is_artificial(die_is_artificial(type_die));
14294 add_decl_to_scope(enum_type, scope);
14295 result = enum_type;
14296 }
14297 }
14298
14299 return result;
14300 }
14301
14302 /// Create a function symbol with a given name.
14303 ///
14304 /// @param sym_name the name of the symbol to create.
14305 ///
14306 /// @param env the environment to create the symbol in.
14307 ///
14308 /// @return the newly created symbol.
14309 elf_symbol_sptr
create_default_fn_sym(const string & sym_name,const environment * env)14310 create_default_fn_sym(const string& sym_name, const environment *env)
14311 {
14312 elf_symbol::version ver;
14313 elf_symbol_sptr result =
14314 elf_symbol::create(env,
14315 /*symbol index=*/ 0,
14316 /*symbol size=*/ 0,
14317 sym_name,
14318 /*symbol type=*/ elf_symbol::FUNC_TYPE,
14319 /*symbol binding=*/ elf_symbol::GLOBAL_BINDING,
14320 /*symbol is defined=*/ true,
14321 /*symbol is common=*/ false,
14322 /*symbol version=*/ ver,
14323 /*symbol visibility=*/elf_symbol::DEFAULT_VISIBILITY);
14324 return result;
14325 }
14326
14327 /// Build a @ref function_decl our of a DW_TAG_subprogram DIE.
14328 ///
14329 /// @param ctxt the read context to use
14330 ///
14331 /// @param die the DW_TAG_subprogram DIE to read from.
14332 ///
14333 /// @param where_offset the offset of the DIE where we are "logically"
14334 /// positionned at, in the DIE tree. This is useful when @p die is
14335 /// e.g, DW_TAG_partial_unit that can be included in several places in
14336 /// the DIE tree.
14337 ///
14338 /// @param called_for_public_decl this is set to true if the function
14339 /// was called for a public (function) decl.
14340 static function_decl_sptr
build_function_decl(read_context & ctxt,Dwarf_Die * die,size_t where_offset,function_decl_sptr fn)14341 build_function_decl(read_context& ctxt,
14342 Dwarf_Die* die,
14343 size_t where_offset,
14344 function_decl_sptr fn)
14345 {
14346 function_decl_sptr result = fn;
14347 if (!die)
14348 return result;
14349 ABG_ASSERT(dwarf_tag(die) == DW_TAG_subprogram);
14350
14351 if (!die_is_public_decl(die))
14352 return result;
14353
14354 translation_unit_sptr tu = ctxt.cur_transl_unit();
14355 ABG_ASSERT(tu);
14356
14357 string fname, flinkage_name;
14358 location floc;
14359 die_loc_and_name(ctxt, die, floc, fname, flinkage_name);
14360
14361 size_t is_inline = die_is_declared_inline(die);
14362 class_or_union_sptr is_method =
14363 is_class_or_union_type(get_scope_for_die(ctxt, die, true, where_offset));
14364
14365 if (result)
14366 {
14367 // Add the properties that might have been missing from the
14368 // first declaration of the function. For now, it usually is
14369 // the mangled name that goes missing in the first declarations.
14370 //
14371 // Also note that if 'fn' has just been cloned, the current
14372 // linkage name (of the current DIE) might be different from the
14373 // linkage name of 'fn'. In that case, update the linkage name
14374 // of 'fn' too.
14375 if (!flinkage_name.empty()
14376 && result->get_linkage_name() != flinkage_name)
14377 result->set_linkage_name(flinkage_name);
14378 if (floc)
14379 if (!result->get_location())
14380 result->set_location(floc);
14381 }
14382 else
14383 {
14384 function_type_sptr fn_type(build_function_type(ctxt, die, is_method,
14385 where_offset));
14386 if (!fn_type)
14387 return result;
14388
14389 maybe_canonicalize_type(fn_type, ctxt);
14390
14391 result.reset(is_method
14392 ? new method_decl(fname, fn_type,
14393 is_inline, floc,
14394 flinkage_name)
14395 : new function_decl(fname, fn_type,
14396 is_inline, floc,
14397 flinkage_name));
14398 }
14399
14400 // Set the symbol of the function. If the linkage name is not set
14401 // or is wrong, set it to the name of the underlying symbol.
14402 if (!result->get_symbol())
14403 {
14404 elf_symbol_sptr fn_sym;
14405 Dwarf_Addr fn_addr;
14406 if (ctxt.get_function_address(die, fn_addr))
14407 {
14408 ctxt.symtab()->update_main_symbol(fn_addr,
14409 result->get_linkage_name().empty()
14410 ? result->get_name()
14411 : result->get_linkage_name());
14412 fn_sym = ctxt.function_symbol_is_exported(fn_addr);
14413 }
14414
14415 if (fn_sym && !ctxt.symbol_already_belongs_to_a_function(fn_sym))
14416 {
14417 result->set_symbol(fn_sym);
14418 string linkage_name = result->get_linkage_name();
14419 if (linkage_name.empty()
14420 || !fn_sym->get_alias_from_name(linkage_name))
14421 result->set_linkage_name(fn_sym->get_name());
14422 result->set_is_in_public_symbol_table(true);
14423 }
14424 }
14425
14426 ctxt.associate_die_to_type(die, result->get_type(), where_offset);
14427
14428 size_t die_offset = dwarf_dieoffset(die);
14429
14430 if (fn
14431 && is_member_function(fn)
14432 && get_member_function_is_virtual(fn)
14433 && !result->get_linkage_name().empty())
14434 // This function is a virtual member function which has its
14435 // linkage name *and* and has its underlying symbol correctly set.
14436 // It thus doesn't need any fixup related to elf symbol. So
14437 // remove it from the set of virtual member functions with linkage
14438 // names and no elf symbol that need to be fixed up.
14439 ctxt.die_function_decl_with_no_symbol_map().erase(die_offset);
14440 return result;
14441 }
14442
14443 /// Read all @ref abigail::translation_unit possible from the debug info
14444 /// accessible through a DWARF Front End Library handle, and stuff
14445 /// them into a libabigail ABI Corpus.
14446 ///
14447 /// @param ctxt the read context.
14448 ///
14449 /// @return a pointer to the resulting corpus, or NULL if the corpus
14450 /// could not be constructed.
14451 static corpus_sptr
read_debug_info_into_corpus(read_context & ctxt)14452 read_debug_info_into_corpus(read_context& ctxt)
14453 {
14454 ctxt.clear_per_corpus_data();
14455 ctxt.current_corpus(std::make_shared<corpus>(ctxt.env(), ctxt.elf_path()));
14456
14457 // First set some mundane properties of the corpus gathered from
14458 // ELF.
14459 ctxt.current_corpus()->set_path(ctxt.elf_path());
14460
14461 corpus::origin origin = corpus::DWARF_ORIGIN;
14462 if (is_linux_kernel(ctxt.elf_handle()))
14463 origin |= corpus::LINUX_KERNEL_BINARY_ORIGIN;
14464 ctxt.current_corpus()->set_origin(origin);
14465
14466 ctxt.current_corpus()->set_soname(ctxt.dt_soname());
14467 ctxt.current_corpus()->set_needed(ctxt.dt_needed());
14468 ctxt.current_corpus()->set_architecture_name(ctxt.elf_architecture());
14469 if (corpus_group_sptr group = ctxt.current_corpus_group())
14470 group->add_corpus(ctxt.current_corpus());
14471
14472 // Set symbols information to the corpus.
14473 ctxt.current_corpus()->set_symtab(ctxt.symtab());
14474
14475 // Get out now if no debug info is found.
14476 if (!ctxt.dwarf())
14477 return ctxt.current_corpus();
14478
14479 uint8_t address_size = 0;
14480 size_t header_size = 0;
14481
14482 // Set the set of exported declaration that are defined.
14483 ctxt.exported_decls_builder
14484 (ctxt.current_corpus()->get_exported_decls_builder().get());
14485
14486 #ifdef WITH_DEBUG_SELF_COMPARISON
14487 if (ctxt.env()->self_comparison_debug_is_on())
14488 ctxt.env()->set_self_comparison_debug_input(ctxt.current_corpus());
14489 #endif
14490
14491 // Walk all the DIEs of the debug info to build a DIE -> parent map
14492 // useful for get_die_parent() to work.
14493 {
14494 tools_utils::timer t;
14495 if (ctxt.do_log())
14496 {
14497 cerr << "building die -> parent maps ...";
14498 t.start();
14499 }
14500
14501 ctxt.build_die_parent_maps();
14502
14503 if (ctxt.do_log())
14504 {
14505 t.stop();
14506 cerr << " DONE@" << ctxt.current_corpus()->get_path()
14507 << ":"
14508 << t
14509 << "\n";
14510 }
14511 }
14512
14513 ctxt.env()->canonicalization_is_done(false);
14514
14515 {
14516 tools_utils::timer t;
14517 if (ctxt.do_log())
14518 {
14519 cerr << "building the libabigail internal representation ...";
14520 t.start();
14521 }
14522 // And now walk all the DIEs again to build the libabigail IR.
14523 Dwarf_Half dwarf_version = 0;
14524 for (Dwarf_Off offset = 0, next_offset = 0;
14525 (dwarf_next_unit(ctxt.dwarf(), offset, &next_offset, &header_size,
14526 &dwarf_version, NULL, &address_size, NULL,
14527 NULL, NULL) == 0);
14528 offset = next_offset)
14529 {
14530 Dwarf_Off die_offset = offset + header_size;
14531 Dwarf_Die unit;
14532 if (!dwarf_offdie(ctxt.dwarf(), die_offset, &unit)
14533 || dwarf_tag(&unit) != DW_TAG_compile_unit)
14534 continue;
14535
14536 ctxt.dwarf_version(dwarf_version);
14537
14538 address_size *= 8;
14539
14540 // Build a translation_unit IR node from cu; note that cu must
14541 // be a DW_TAG_compile_unit die.
14542 translation_unit_sptr ir_node =
14543 build_translation_unit_and_add_to_ir(ctxt, &unit, address_size);
14544 ABG_ASSERT(ir_node);
14545 }
14546 if (ctxt.do_log())
14547 {
14548 t.stop();
14549 cerr << " DONE@" << ctxt.current_corpus()->get_path()
14550 << ":"
14551 << t
14552 << "\n";
14553 }
14554 }
14555
14556 {
14557 tools_utils::timer t;
14558 if (ctxt.do_log())
14559 {
14560 cerr << "resolving declaration only classes ...";
14561 t.start();
14562 }
14563 ctxt.resolve_declaration_only_classes();
14564 if (ctxt.do_log())
14565 {
14566 t.stop();
14567 cerr << " DONE@" << ctxt.current_corpus()->get_path()
14568 << ":"
14569 << t
14570 <<"\n";
14571 }
14572 }
14573
14574 {
14575 tools_utils::timer t;
14576 if (ctxt.do_log())
14577 {
14578 cerr << "resolving declaration only enums ...";
14579 t.start();
14580 }
14581 ctxt.resolve_declaration_only_enums();
14582 if (ctxt.do_log())
14583 {
14584 t.stop();
14585 cerr << " DONE@" << ctxt.current_corpus()->get_path()
14586 << ":"
14587 << t
14588 <<"\n";
14589 }
14590 }
14591
14592 {
14593 tools_utils::timer t;
14594 if (ctxt.do_log())
14595 {
14596 cerr << "fixing up functions with linkage name but "
14597 << "no advertised underlying symbols ....";
14598 t.start();
14599 }
14600 ctxt.fixup_functions_with_no_symbols();
14601 if (ctxt.do_log())
14602 {
14603 t.stop();
14604 cerr << " DONE@" << ctxt.current_corpus()->get_path()
14605 <<":"
14606 << t
14607 <<"\n";
14608 }
14609 }
14610
14611 /// Now, look at the types that needs to be canonicalized after the
14612 /// translation has been constructed (which is just now) and
14613 /// canonicalize them.
14614 ///
14615 /// These types need to be constructed at the end of the translation
14616 /// unit reading phase because some types are modified by some DIEs
14617 /// even after the principal DIE describing the type has been read;
14618 /// this happens for clones of virtual destructors (for instance) or
14619 /// even for some static data members. We need to do that for types
14620 /// are in the alternate debug info section and for types that in
14621 /// the main debug info section.
14622 {
14623 tools_utils::timer t;
14624 if (ctxt.do_log())
14625 {
14626 cerr << "perform late type canonicalizing ...\n";
14627 t.start();
14628 }
14629
14630 ctxt.perform_late_type_canonicalizing();
14631 if (ctxt.do_log())
14632 {
14633 t.stop();
14634 cerr << "late type canonicalizing DONE@"
14635 << ctxt.current_corpus()->get_path()
14636 << ":"
14637 << t
14638 << "\n";
14639 }
14640 }
14641
14642 ctxt.env()->canonicalization_is_done(true);
14643
14644 {
14645 tools_utils::timer t;
14646 if (ctxt.do_log())
14647 {
14648 cerr << "sort functions and variables ...";
14649 t.start();
14650 }
14651 ctxt.current_corpus()->sort_functions();
14652 ctxt.current_corpus()->sort_variables();
14653 if (ctxt.do_log())
14654 {
14655 t.stop();
14656 cerr << " DONE@" << ctxt.current_corpus()->get_path()
14657 << ":"
14658 << t
14659 <<" \n";
14660 }
14661 }
14662
14663 #ifdef WITH_DEBUG_SELF_COMPARISON
14664 if (ctxt.env()->self_comparison_debug_is_on())
14665 ctxt.env()->set_self_comparison_debug_input(ctxt.current_corpus());
14666 #endif
14667
14668 return ctxt.current_corpus();
14669 }
14670
14671 /// Canonicalize a type if it's suitable for early canonicalizing, or,
14672 /// if it's not, schedule it for late canonicalization, after the
14673 /// debug info of the current translation unit has been fully read.
14674 ///
14675 /// A (composite) type is deemed suitable for early canonicalizing iff
14676 /// all of its sub-types are canonicalized themselve. Non composite
14677 /// types are always deemed suitable for early canonicalization.
14678 ///
14679 /// Note that this function doesn't work on *ANONYMOUS* classes,
14680 /// structs, unions or enums because it first does some
14681 /// canonicalization of the DWARF DIE @p die. That canonicalization
14682 /// is done by looking up @p die by name; and because these are
14683 /// anonymous types, they don't have names! and so that
14684 /// canonicalization fails. So the type artifact associated to @p
14685 /// die often ends being *NOT* canonicalized. This later leads to
14686 /// extreme slowness of operation, especially when comparisons are
14687 /// later performed on these anonymous types.
14688 ///
14689 /// So when you have classes, structs, unions, or enums that can be
14690 /// anonymous, please use this overload instead:
14691 ///
14692 /// void
14693 /// maybe_canonicalize_type(const Dwarf_Die* die,
14694 /// const type_base_sptr& t,
14695 /// read_context& ctxt);
14696 ///
14697 /// It knows how to deal with anonymous types.
14698 ///
14699 /// @p looks up the type artifact
14700 /// associated to @p die. During that lookup, ; but then those types don't have
14701 /// names because they are anonymous.
14702 ///
14703 /// @param die the type DIE to consider for canonicalization. Note
14704 /// that this DIE must have been associated with its type using the
14705 /// function read_context::associate_die_to_type() prior to calling
14706 /// this function.
14707 ///
14708 /// @param ctxt the @ref read_context to use.
14709 static void
maybe_canonicalize_type(const Dwarf_Die * die,read_context & ctxt)14710 maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt)
14711 {
14712 const die_source source = ctxt.get_die_source(die);
14713
14714 size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
14715 type_base_sptr t = ctxt.lookup_type_from_die(die);
14716
14717 if (!t)
14718 return;
14719
14720 type_base_sptr peeled_type = peel_typedef_pointer_or_reference_type(t);
14721 if (is_class_type(peeled_type)
14722 || is_union_type(peeled_type)
14723 || is_function_type(peeled_type)
14724 || is_array_type(peeled_type)
14725 || is_qualified_type(peeled_type)
14726 || is_typedef(t))
14727 // We delay canonicalization of classes/unions or typedef,
14728 // pointers, references and array to classes/unions. This is
14729 // because the (underlying) class might not be finished yet and we
14730 // might not be able to able detect it here (thinking about
14731 // classes that are work-in-progress, or classes that might be
14732 // later amended by some DWARF construct). So we err on the safe
14733 // side. We also delay canonicalization for array and qualified
14734 // types because they can be edited (in particular by
14735 // maybe_strip_qualification) after they are initially built.
14736 ctxt.schedule_type_for_late_canonicalization(die);
14737 else if (is_decl(t) && is_decl(t)->get_is_anonymous())
14738 ctxt.schedule_type_for_late_canonicalization(t);
14739 else if ((is_function_type(t)
14740 && ctxt.is_wip_function_type_die_offset(die_offset, source))
14741 || type_has_non_canonicalized_subtype(t))
14742 ctxt.schedule_type_for_late_canonicalization(die);
14743 else
14744 canonicalize(t);
14745 }
14746
14747 /// Canonicalize a type if it's suitable for early canonicalizing, or,
14748 /// if it's not, schedule it for late canonicalization, after the
14749 /// debug info of the current translation unit has been fully read.
14750 ///
14751 /// A (composite) type is deemed suitable for early canonicalizing iff
14752 /// all of its sub-types are canonicalized themselve. Non composite
14753 /// types are always deemed suitable for early canonicalization.
14754 ///
14755 /// Note that this function nows how to deal with anonymous classes,
14756 /// structs and enums, unlike the overload below:
14757 ///
14758 /// void maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt)
14759 ///
14760 /// The problem, though is that this function is much slower that that
14761 /// overload above because of how the types that are meant for later
14762 /// canonicalization are stored. So the idea is that this function
14763 /// should be used only for the smallest possible subset of types that
14764 /// are anonymous and thus cannot be handled by the overload above.
14765 ///
14766 /// @param t the type DIE to consider for canonicalization.
14767 ///
14768 /// @param ctxt the @ref read_context to use.
14769 static void
maybe_canonicalize_type(const type_base_sptr & t,read_context & ctxt)14770 maybe_canonicalize_type(const type_base_sptr& t,
14771 read_context& ctxt)
14772 {
14773 if (!t)
14774 return;
14775
14776 type_base_sptr peeled_type = peel_typedef_pointer_or_reference_type(t);
14777 if (is_class_type(peeled_type)
14778 || is_union_type(peeled_type)
14779 || is_function_type(peeled_type)
14780 || is_array_type(peeled_type)
14781 || is_qualified_type(peeled_type)
14782 ||(is_decl(peeled_type) && is_decl(peeled_type)->get_is_anonymous()))
14783 // We delay canonicalization of classes/unions or typedef,
14784 // pointers, references and array to classes/unions. This is
14785 // because the (underlying) class might not be finished yet and we
14786 // might not be able to able detect it here (thinking about
14787 // classes that are work-in-progress, or classes that might be
14788 // later amended by some DWARF construct). So we err on the safe
14789 // side. We also delay canonicalization for array and qualified
14790 // types because they can be edited (in particular by
14791 // maybe_strip_qualification) after they are initially built.
14792 ctxt.schedule_type_for_late_canonicalization(t);
14793 else if (type_has_non_canonicalized_subtype(t))
14794 ctxt.schedule_type_for_late_canonicalization(t);
14795 else
14796 canonicalize(t);
14797 }
14798
14799 /// If a given decl is a member type declaration, set its access
14800 /// specifier from the DIE that represents it.
14801 ///
14802 /// @param member_type_declaration the member type declaration to
14803 /// consider.
14804 static void
maybe_set_member_type_access_specifier(decl_base_sptr member_type_declaration,Dwarf_Die * die)14805 maybe_set_member_type_access_specifier(decl_base_sptr member_type_declaration,
14806 Dwarf_Die* die)
14807 {
14808 if (is_type(member_type_declaration)
14809 && is_member_decl(member_type_declaration))
14810 {
14811 class_or_union* scope =
14812 is_class_or_union_type(member_type_declaration->get_scope());
14813 ABG_ASSERT(scope);
14814
14815 access_specifier access = public_access;
14816 if (class_decl* cl = is_class_type(scope))
14817 if (!cl->is_struct())
14818 access = private_access;
14819
14820 die_access_specifier(die, access);
14821 set_member_access_specifier(member_type_declaration, access);
14822 }
14823 }
14824
14825 /// This function tests if a given function which might be intented to
14826 /// be added to a class scope (to become a member function) should be
14827 /// dropped on the floor instead and not be added to the class.
14828 ///
14829 /// This is a subroutine of build_ir_node_from_die.
14830 ///
14831 /// @param fn the function to consider.
14832 ///
14833 /// @param scope the scope the function is intended to be added
14834 /// to. This might be of class type or not.
14835 ///
14836 /// @param fn_die the DWARF die of @p fn.
14837 ///
14838 /// @return true iff @p fn should be dropped on the floor.
14839 static bool
potential_member_fn_should_be_dropped(const function_decl_sptr & fn,Dwarf_Die * fn_die)14840 potential_member_fn_should_be_dropped(const function_decl_sptr& fn,
14841 Dwarf_Die *fn_die)
14842 {
14843 if (!fn || fn->get_scope())
14844 return false;
14845
14846 if (// A function that is not virtual ...
14847 !die_is_virtual(fn_die)
14848 // ... has a linkage name ...
14849 && !fn->get_linkage_name().empty()
14850 // .. and yet has no ELF symbol associated ...
14851 && !fn->get_symbol())
14852 // Should not be added to its class scope.
14853 //
14854 // Why would it? It's not part of the ABI anyway, as it doesn't
14855 // have any ELF symbol associated and is not a virtual member
14856 // function. It just constitutes bloat in the IR and might even
14857 // induce spurious change reports down the road.
14858 return true;
14859
14860 return false;
14861 }
14862
14863 /// Build an IR node from a given DIE and add the node to the current
14864 /// IR being build and held in the read_context. Doing that is called
14865 /// "emitting an IR node for the DIE".
14866 ///
14867 /// @param ctxt the read context.
14868 ///
14869 /// @param die the DIE to consider.
14870 ///
14871 /// @param scope the scope under which the resulting IR node has to be
14872 /// added.
14873 ///
14874 /// @param called_from_public_decl set to yes if this function is
14875 /// called from the functions used to build a public decl (functions
14876 /// and variables). In that case, this function accepts building IR
14877 /// nodes representing types. Otherwise, this function only creates
14878 /// IR nodes representing public decls (functions and variables).
14879 /// This is done to avoid emitting IR nodes for types that are not
14880 /// referenced by public functions or variables.
14881 ///
14882 /// @param where_offset the offset of the DIE where we are "logically"
14883 /// positionned at, in the DIE tree. This is useful when @p die is
14884 /// e.g, DW_TAG_partial_unit that can be included in several places in
14885 /// the DIE tree.
14886 ///
14887 /// @param is_required_decl_spec if true, it means the ir node to
14888 /// build is for a decl that is a specification for another decl that
14889 /// is concrete. If you don't know what this is, set it to false.
14890 ///
14891 /// @param is_declaration_only is true if the DIE denoted by @p die is
14892 /// a declaration-only DIE.
14893 ///
14894 /// @return the resulting IR node.
14895 static type_or_decl_base_sptr
build_ir_node_from_die(read_context & ctxt,Dwarf_Die * die,scope_decl * scope,bool called_from_public_decl,size_t where_offset,bool is_declaration_only,bool is_required_decl_spec)14896 build_ir_node_from_die(read_context& ctxt,
14897 Dwarf_Die* die,
14898 scope_decl* scope,
14899 bool called_from_public_decl,
14900 size_t where_offset,
14901 bool is_declaration_only,
14902 bool is_required_decl_spec)
14903 {
14904 type_or_decl_base_sptr result;
14905
14906 if (!die || !scope)
14907 return result;
14908
14909 int tag = dwarf_tag(die);
14910
14911 if (!called_from_public_decl)
14912 {
14913 if (ctxt.load_all_types() && die_is_type(die))
14914 /* We were instructed to load debug info for all types,
14915 included those that are not reachable from a public
14916 declaration. So load the debug info for this type. */;
14917 else if (tag != DW_TAG_subprogram
14918 && tag != DW_TAG_variable
14919 && tag != DW_TAG_member
14920 && tag != DW_TAG_namespace)
14921 return result;
14922 }
14923
14924 const die_source source_of_die = ctxt.get_die_source(die);
14925
14926 if ((result = ctxt.lookup_decl_from_die_offset(dwarf_dieoffset(die),
14927 source_of_die)))
14928 {
14929 if (ctxt.load_all_types())
14930 if (called_from_public_decl)
14931 if (type_base_sptr t = is_type(result))
14932 if (corpus *abi_corpus = scope->get_corpus())
14933 abi_corpus->record_type_as_reachable_from_public_interfaces(*t);
14934
14935 return result;
14936 }
14937
14938 // This is *the* bit of code that ensures we have the right notion
14939 // of "declared" at any point in a DIE chain formed from
14940 // DW_AT_abstract_origin and DW_AT_specification links. There should
14941 // be no other callers of die_is_declaration_only.
14942 is_declaration_only = is_declaration_only && die_is_declaration_only(die);
14943
14944 switch (tag)
14945 {
14946 // Type DIEs we support.
14947 case DW_TAG_base_type:
14948 if (type_decl_sptr t = build_type_decl(ctxt, die, where_offset))
14949 {
14950 result =
14951 add_decl_to_scope(t, ctxt.cur_transl_unit()->get_global_scope());
14952 canonicalize(t);
14953 }
14954 break;
14955
14956 case DW_TAG_typedef:
14957 {
14958 typedef_decl_sptr t = build_typedef_type(ctxt, die,
14959 called_from_public_decl,
14960 where_offset);
14961 result = add_decl_to_scope(t, scope);
14962 if (result)
14963 {
14964 maybe_set_member_type_access_specifier(is_decl(result), die);
14965 maybe_canonicalize_type(die, ctxt);
14966 }
14967 }
14968 break;
14969
14970 case DW_TAG_pointer_type:
14971 {
14972 pointer_type_def_sptr p =
14973 build_pointer_type_def(ctxt, die,
14974 called_from_public_decl,
14975 where_offset);
14976 if (p)
14977 {
14978 result =
14979 add_decl_to_scope(p, ctxt.cur_transl_unit()->get_global_scope());
14980 ABG_ASSERT(result->get_translation_unit());
14981 maybe_canonicalize_type(die, ctxt);
14982 }
14983 }
14984 break;
14985
14986 case DW_TAG_reference_type:
14987 case DW_TAG_rvalue_reference_type:
14988 {
14989 reference_type_def_sptr r =
14990 build_reference_type(ctxt, die,
14991 called_from_public_decl,
14992 where_offset);
14993 if (r)
14994 {
14995 result =
14996 add_decl_to_scope(r, ctxt.cur_transl_unit()->get_global_scope());
14997
14998 ctxt.associate_die_to_type(die, r, where_offset);
14999 maybe_canonicalize_type(die, ctxt);
15000 }
15001 }
15002 break;
15003
15004 case DW_TAG_const_type:
15005 case DW_TAG_volatile_type:
15006 case DW_TAG_restrict_type:
15007 {
15008 type_base_sptr q =
15009 build_qualified_type(ctxt, die,
15010 called_from_public_decl,
15011 where_offset);
15012 if (q)
15013 {
15014 // Strip some potentially redundant type qualifiers from
15015 // the qualified type we just built.
15016 decl_base_sptr d = maybe_strip_qualification(is_qualified_type(q),
15017 ctxt);
15018 if (!d)
15019 d = get_type_declaration(q);
15020 ABG_ASSERT(d);
15021 type_base_sptr ty = is_type(d);
15022 // Associate the die to type ty again because 'ty'might be
15023 // different from 'q', because 'ty' is 'q' possibly
15024 // stripped from some redundant type qualifier.
15025 ctxt.associate_die_to_type(die, ty, where_offset);
15026 result =
15027 add_decl_to_scope(d, ctxt.cur_transl_unit()->get_global_scope());
15028 maybe_canonicalize_type(die, ctxt);
15029 }
15030 }
15031 break;
15032
15033 case DW_TAG_enumeration_type:
15034 {
15035 bool type_is_private = false;
15036 bool type_suppressed =
15037 type_is_suppressed(ctxt, scope, die, type_is_private);
15038 if (type_suppressed && type_is_private)
15039 {
15040 // The type is suppressed because it's private. If other
15041 // non-suppressed and declaration-only instances of this
15042 // type exist in the current corpus, then it means those
15043 // non-suppressed instances are opaque versions of the
15044 // suppressed private type. Lets return one of these opaque
15045 // types then.
15046 result = get_opaque_version_of_type(ctxt, scope, die, where_offset);
15047 maybe_canonicalize_type(is_type(result), ctxt);
15048 }
15049 else if (!type_suppressed)
15050 {
15051 enum_type_decl_sptr e = build_enum_type(ctxt, die, scope,
15052 where_offset,
15053 is_declaration_only);
15054 result = add_decl_to_scope(e, scope);
15055 if (result)
15056 {
15057 maybe_set_member_type_access_specifier(is_decl(result), die);
15058 maybe_canonicalize_type(die, ctxt);
15059 }
15060 }
15061 }
15062 break;
15063
15064 case DW_TAG_class_type:
15065 case DW_TAG_structure_type:
15066 {
15067 bool type_is_private = false;
15068 bool type_suppressed=
15069 type_is_suppressed(ctxt, scope, die, type_is_private);
15070
15071 if (type_suppressed && type_is_private)
15072 {
15073 // The type is suppressed because it's private. If other
15074 // non-suppressed and declaration-only instances of this
15075 // type exist in the current corpus, then it means those
15076 // non-suppressed instances are opaque versions of the
15077 // suppressed private type. Lets return one of these opaque
15078 // types then.
15079 result = get_opaque_version_of_type(ctxt, scope, die, where_offset);
15080 maybe_canonicalize_type(is_type(result), ctxt);
15081 }
15082 else if (!type_suppressed)
15083 {
15084 Dwarf_Die spec_die;
15085 scope_decl_sptr scop;
15086 class_decl_sptr klass;
15087 if (die_die_attribute(die, DW_AT_specification, spec_die))
15088 {
15089 scope_decl_sptr skope =
15090 get_scope_for_die(ctxt, &spec_die,
15091 called_from_public_decl,
15092 where_offset);
15093 ABG_ASSERT(skope);
15094 decl_base_sptr cl =
15095 is_decl(build_ir_node_from_die(ctxt, &spec_die,
15096 skope.get(),
15097 called_from_public_decl,
15098 where_offset,
15099 is_declaration_only,
15100 /*is_required_decl_spec=*/false));
15101 ABG_ASSERT(cl);
15102 klass = dynamic_pointer_cast<class_decl>(cl);
15103 ABG_ASSERT(klass);
15104
15105 klass =
15106 add_or_update_class_type(ctxt, die,
15107 skope.get(),
15108 tag == DW_TAG_structure_type,
15109 klass,
15110 called_from_public_decl,
15111 where_offset,
15112 is_declaration_only);
15113 }
15114 else
15115 klass =
15116 add_or_update_class_type(ctxt, die, scope,
15117 tag == DW_TAG_structure_type,
15118 class_decl_sptr(),
15119 called_from_public_decl,
15120 where_offset,
15121 is_declaration_only);
15122 result = klass;
15123 if (klass)
15124 {
15125 maybe_set_member_type_access_specifier(klass, die);
15126 maybe_canonicalize_type(klass, ctxt);
15127 }
15128 }
15129 }
15130 break;
15131 case DW_TAG_union_type:
15132 if (!type_is_suppressed(ctxt, scope, die))
15133 {
15134 union_decl_sptr union_type =
15135 add_or_update_union_type(ctxt, die, scope,
15136 union_decl_sptr(),
15137 called_from_public_decl,
15138 where_offset,
15139 is_declaration_only);
15140 if (union_type)
15141 {
15142 maybe_set_member_type_access_specifier(union_type, die);
15143 maybe_canonicalize_type(union_type, ctxt);
15144 }
15145 result = union_type;
15146 }
15147 break;
15148 case DW_TAG_string_type:
15149 break;
15150 case DW_TAG_subroutine_type:
15151 {
15152 function_type_sptr f = build_function_type(ctxt, die,
15153 class_decl_sptr(),
15154 where_offset);
15155 if (f)
15156 {
15157 result = f;
15158 result->set_is_artificial(false);
15159 maybe_canonicalize_type(die, ctxt);
15160 }
15161 }
15162 break;
15163 case DW_TAG_array_type:
15164 {
15165 array_type_def_sptr a = build_array_type(ctxt,
15166 die,
15167 called_from_public_decl,
15168 where_offset);
15169 if (a)
15170 {
15171 result =
15172 add_decl_to_scope(a, ctxt.cur_transl_unit()->get_global_scope());
15173 ctxt.associate_die_to_type(die, a, where_offset);
15174 maybe_canonicalize_type(die, ctxt);
15175 }
15176 break;
15177 }
15178 case DW_TAG_subrange_type:
15179 {
15180 // If we got here, this means the subrange type is a "free
15181 // form" defined in the global namespace of the current
15182 // translation unit, like what is found in Ada.
15183 array_type_def::subrange_sptr s =
15184 build_subrange_type(ctxt, die, where_offset);
15185 if (s)
15186 {
15187 result =
15188 add_decl_to_scope(s, ctxt.cur_transl_unit()->get_global_scope());
15189 ctxt.associate_die_to_type(die, s, where_offset);
15190 maybe_canonicalize_type(die, ctxt);
15191 }
15192 }
15193 break;
15194 case DW_TAG_packed_type:
15195 break;
15196 case DW_TAG_set_type:
15197 break;
15198 case DW_TAG_file_type:
15199 break;
15200 case DW_TAG_ptr_to_member_type:
15201 break;
15202 case DW_TAG_thrown_type:
15203 break;
15204 case DW_TAG_interface_type:
15205 break;
15206 case DW_TAG_unspecified_type:
15207 break;
15208 case DW_TAG_shared_type:
15209 break;
15210
15211 case DW_TAG_compile_unit:
15212 // We shouldn't reach this point b/c this should be handled by
15213 // build_translation_unit.
15214 ABG_ASSERT_NOT_REACHED;
15215
15216 case DW_TAG_namespace:
15217 case DW_TAG_module:
15218 result = build_namespace_decl_and_add_to_ir(ctxt, die, where_offset);
15219 break;
15220
15221 case DW_TAG_variable:
15222 case DW_TAG_member:
15223 {
15224 Dwarf_Die spec_die;
15225 bool var_is_cloned = false;
15226
15227 if (tag == DW_TAG_member)
15228 ABG_ASSERT(!is_c_language(ctxt.cur_transl_unit()->get_language()));
15229
15230 if (die_die_attribute(die, DW_AT_specification, spec_die, false)
15231 || (var_is_cloned = die_die_attribute(die, DW_AT_abstract_origin,
15232 spec_die, false)))
15233 {
15234 scope_decl_sptr spec_scope =
15235 get_scope_for_die(ctxt, &spec_die,
15236 /*called_from_public_decl=*/
15237 die_is_effectively_public_decl(ctxt, die),
15238 where_offset);
15239 if (spec_scope)
15240 {
15241 decl_base_sptr d =
15242 is_decl(build_ir_node_from_die(ctxt, &spec_die,
15243 spec_scope.get(),
15244 called_from_public_decl,
15245 where_offset,
15246 is_declaration_only,
15247 /*is_required_decl_spec=*/true));
15248 if (d)
15249 {
15250 var_decl_sptr m =
15251 dynamic_pointer_cast<var_decl>(d);
15252 if (var_is_cloned)
15253 m = m->clone();
15254 m = build_var_decl(ctxt, die, where_offset, m);
15255 if (is_data_member(m))
15256 {
15257 set_member_is_static(m, true);
15258 ctxt.associate_die_to_decl(die, m, where_offset,
15259 /*associate_by_repr=*/false);
15260 }
15261 else
15262 {
15263 ABG_ASSERT(has_scope(m));
15264 ctxt.var_decls_to_re_add_to_tree().push_back(m);
15265 }
15266 ABG_ASSERT(m->get_scope());
15267 ctxt.maybe_add_var_to_exported_decls(m.get());
15268 return m;
15269 }
15270 }
15271 }
15272 else if (var_decl_sptr v =
15273 build_or_get_var_decl_if_not_suppressed(ctxt, scope, die,
15274 where_offset,
15275 /*result=*/var_decl_sptr(),
15276 is_required_decl_spec))
15277 {
15278 result = add_decl_to_scope(v, scope);
15279 ABG_ASSERT(is_decl(result)->get_scope());
15280 v = dynamic_pointer_cast<var_decl>(result);
15281 ABG_ASSERT(v);
15282 ABG_ASSERT(v->get_scope());
15283 ctxt.var_decls_to_re_add_to_tree().push_back(v);
15284 ctxt.maybe_add_var_to_exported_decls(v.get());
15285 }
15286 }
15287 break;
15288
15289 case DW_TAG_subprogram:
15290 {
15291 Dwarf_Die spec_die;
15292 Dwarf_Die abstract_origin_die;
15293 Dwarf_Die *interface_die = 0, *origin_die = 0;
15294 scope_decl_sptr interface_scope;
15295 if (die_is_artificial(die))
15296 break;
15297
15298 function_decl_sptr fn;
15299 bool has_spec = die_die_attribute(die, DW_AT_specification,
15300 spec_die, true);
15301 bool has_abstract_origin =
15302 die_die_attribute(die, DW_AT_abstract_origin,
15303 abstract_origin_die, true);
15304 if (has_spec || has_abstract_origin)
15305 {
15306 interface_die =
15307 has_spec
15308 ? &spec_die
15309 : &abstract_origin_die;
15310 origin_die =
15311 has_abstract_origin
15312 ? &abstract_origin_die
15313 : &spec_die;
15314
15315 string linkage_name = die_linkage_name(die);
15316 string spec_linkage_name = die_linkage_name(interface_die);
15317
15318 interface_scope = get_scope_for_die(ctxt, interface_die,
15319 called_from_public_decl,
15320 where_offset);
15321 if (interface_scope)
15322 {
15323 decl_base_sptr d;
15324 class_decl_sptr c = is_class_type(interface_scope);
15325 if (c && !linkage_name.empty())
15326 d = c->find_member_function_sptr(linkage_name);
15327
15328 if (!d)
15329 d = is_decl(build_ir_node_from_die(ctxt,
15330 origin_die,
15331 interface_scope.get(),
15332 called_from_public_decl,
15333 where_offset,
15334 is_declaration_only,
15335 /*is_required_decl_spec=*/true));
15336 if (d)
15337 {
15338 fn = dynamic_pointer_cast<function_decl>(d);
15339 if (has_abstract_origin
15340 && (linkage_name != spec_linkage_name))
15341 // The current DIE has 'd' as abstract orign,
15342 // and has a linkage name that is different
15343 // from from the linkage name of 'd'. That
15344 // means, the current DIE represents a clone
15345 // of 'd'.
15346 fn = fn->clone();
15347 }
15348 }
15349 }
15350 ctxt.scope_stack().push(scope);
15351
15352 scope_decl* logical_scope =
15353 interface_scope
15354 ? interface_scope.get()
15355 : scope;
15356
15357 result = build_or_get_fn_decl_if_not_suppressed(ctxt, logical_scope,
15358 die, where_offset,
15359 is_declaration_only,
15360 fn);
15361
15362 if (result && !fn)
15363 {
15364 if (potential_member_fn_should_be_dropped(is_function_decl(result),
15365 die)
15366 && !is_required_decl_spec)
15367 {
15368 result.reset();
15369 break;
15370 }
15371 result = add_decl_to_scope(is_decl(result), logical_scope);
15372 }
15373
15374 fn = is_function_decl(result);
15375 if (fn && is_member_function(fn))
15376 {
15377 class_decl_sptr klass(static_cast<class_decl*>(logical_scope),
15378 sptr_utils::noop_deleter());
15379 ABG_ASSERT(klass);
15380 finish_member_function_reading(die, fn, klass, ctxt);
15381 }
15382
15383 if (fn)
15384 {
15385 ctxt.maybe_add_fn_to_exported_decls(fn.get());
15386 ctxt.associate_die_to_decl(die, fn, where_offset,
15387 /*associate_by_repr=*/false);
15388 maybe_canonicalize_type(die, ctxt);
15389 }
15390
15391 ctxt.scope_stack().pop();
15392 }
15393 break;
15394
15395 case DW_TAG_formal_parameter:
15396 // We should not read this case as it should have been dealt
15397 // with by build_function_decl above.
15398 ABG_ASSERT_NOT_REACHED;
15399
15400 case DW_TAG_constant:
15401 break;
15402 case DW_TAG_enumerator:
15403 break;
15404
15405 case DW_TAG_partial_unit:
15406 case DW_TAG_imported_unit:
15407 // For now, the DIEs under these are read lazily when they are
15408 // referenced by a public decl DIE that is under a
15409 // DW_TAG_compile_unit, so we shouldn't get here.
15410 ABG_ASSERT_NOT_REACHED;
15411
15412 // Other declaration we don't really intend to support yet.
15413 case DW_TAG_dwarf_procedure:
15414 case DW_TAG_imported_declaration:
15415 case DW_TAG_entry_point:
15416 case DW_TAG_label:
15417 case DW_TAG_lexical_block:
15418 case DW_TAG_unspecified_parameters:
15419 case DW_TAG_variant:
15420 case DW_TAG_common_block:
15421 case DW_TAG_common_inclusion:
15422 case DW_TAG_inheritance:
15423 case DW_TAG_inlined_subroutine:
15424 case DW_TAG_with_stmt:
15425 case DW_TAG_access_declaration:
15426 case DW_TAG_catch_block:
15427 case DW_TAG_friend:
15428 case DW_TAG_namelist:
15429 case DW_TAG_namelist_item:
15430 case DW_TAG_template_type_parameter:
15431 case DW_TAG_template_value_parameter:
15432 case DW_TAG_try_block:
15433 case DW_TAG_variant_part:
15434 case DW_TAG_imported_module:
15435 case DW_TAG_condition:
15436 case DW_TAG_type_unit:
15437 case DW_TAG_template_alias:
15438 case DW_TAG_lo_user:
15439 case DW_TAG_MIPS_loop:
15440 case DW_TAG_format_label:
15441 case DW_TAG_function_template:
15442 case DW_TAG_class_template:
15443 case DW_TAG_GNU_BINCL:
15444 case DW_TAG_GNU_EINCL:
15445 case DW_TAG_GNU_template_template_param:
15446 case DW_TAG_GNU_template_parameter_pack:
15447 case DW_TAG_GNU_formal_parameter_pack:
15448 case DW_TAG_GNU_call_site:
15449 case DW_TAG_GNU_call_site_parameter:
15450 case DW_TAG_hi_user:
15451 default:
15452 break;
15453 }
15454
15455 if (result && tag != DW_TAG_subroutine_type)
15456 ctxt.associate_die_to_decl(die, is_decl(result), where_offset,
15457 /*associate_by_repr=*/false);
15458
15459 if (result)
15460 if (ctxt.load_all_types())
15461 if (called_from_public_decl)
15462 if (type_base_sptr t = is_type(result))
15463 if (corpus *abi_corpus = scope->get_corpus())
15464 abi_corpus->record_type_as_reachable_from_public_interfaces(*t);
15465
15466 return result;
15467 }
15468
15469 /// Build the IR node for a void type.
15470 ///
15471 /// @param ctxt the read context to use.
15472 ///
15473 /// @return the void type node.
15474 static decl_base_sptr
build_ir_node_for_void_type(read_context & ctxt)15475 build_ir_node_for_void_type(read_context& ctxt)
15476 {
15477 ir::environment* env = ctxt.env();
15478 ABG_ASSERT(env);
15479 type_base_sptr t = env->get_void_type();
15480 decl_base_sptr type_declaration = get_type_declaration(t);
15481 if (!has_scope(type_declaration))
15482 add_decl_to_scope(type_declaration,
15483 ctxt.cur_transl_unit()->get_global_scope());
15484 canonicalize(t);
15485 return type_declaration;
15486 }
15487
15488 /// Build the IR node for a variadic parameter type.
15489 ///
15490 /// @param ctxt the read context to use.
15491 ///
15492 /// @return the variadic parameter type.
15493 static decl_base_sptr
build_ir_node_for_variadic_parameter_type(read_context & ctxt)15494 build_ir_node_for_variadic_parameter_type(read_context &ctxt)
15495 {
15496
15497 ir::environment* env = ctxt.env();
15498 ABG_ASSERT(env);
15499 type_base_sptr t = env->get_variadic_parameter_type();
15500 decl_base_sptr type_declaration = get_type_declaration(t);
15501 if (!has_scope(type_declaration))
15502 add_decl_to_scope(type_declaration,
15503 ctxt.cur_transl_unit()->get_global_scope());
15504 canonicalize(t);
15505 return type_declaration;
15506 }
15507
15508 /// Build an IR node from a given DIE and add the node to the current
15509 /// IR being build and held in the read_context. Doing that is called
15510 /// "emitting an IR node for the DIE".
15511 ///
15512 /// @param ctxt the read context.
15513 ///
15514 /// @param die the DIE to consider.
15515 ///
15516 /// @param called_from_public_decl set to yes if this function is
15517 /// called from the functions used to build a public decl (functions
15518 /// and variables). In that case, this function accepts building IR
15519 /// nodes representing types. Otherwise, this function only creates
15520 /// IR nodes representing public decls (functions and variables).
15521 /// This is done to avoid emitting IR nodes for types that are not
15522 /// referenced by public functions or variables.
15523 ///
15524 /// @param where_offset the offset of the DIE where we are "logically"
15525 /// positionned at, in the DIE tree. This is useful when @p die is
15526 /// e.g, DW_TAG_partial_unit that can be included in several places in
15527 /// the DIE tree.
15528 ///
15529 /// @return the resulting IR node.
15530 static type_or_decl_base_sptr
build_ir_node_from_die(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)15531 build_ir_node_from_die(read_context& ctxt,
15532 Dwarf_Die* die,
15533 bool called_from_public_decl,
15534 size_t where_offset)
15535 {
15536 if (!die)
15537 return decl_base_sptr();
15538
15539 if (is_c_language(ctxt.cur_transl_unit()->get_language()))
15540 {
15541 const scope_decl_sptr& scop = ctxt.global_scope();
15542 return build_ir_node_from_die(ctxt, die, scop.get(),
15543 called_from_public_decl,
15544 where_offset,
15545 true);
15546 }
15547
15548 // Normaly, a decl that is meant to be external has a DW_AT_external
15549 // set. But then some compilers fail to always emit that flag. For
15550 // instance, for static data members, some compilers won't emit the
15551 // DW_AT_external. In that case, we assume that if the variable is
15552 // at global or named namespace scope, then we can assume it's
15553 // external. If the variable doesn't have any ELF symbol associated
15554 // to it, it'll be dropped on the floor anyway. Those variable
15555 // decls are considered as being "effectively public".
15556 bool consider_as_called_from_public_decl =
15557 called_from_public_decl || die_is_effectively_public_decl(ctxt, die);
15558 scope_decl_sptr scope = get_scope_for_die(ctxt, die,
15559 consider_as_called_from_public_decl,
15560 where_offset);
15561 return build_ir_node_from_die(ctxt, die, scope.get(),
15562 called_from_public_decl,
15563 where_offset,
15564 true);
15565 }
15566
15567 /// Create a dwarf_reader::read_context.
15568 ///
15569 /// @param elf_path the path to the elf file the context is to be used for.
15570 ///
15571 /// @param debug_info_root_paths a pointer to the path to the root
15572 /// directory under which the debug info is to be found for @p
15573 /// elf_path. Leave this to NULL if the debug info is not in a split
15574 /// file.
15575 ///
15576 /// @param environment the environment used by the current context.
15577 /// This environment contains resources needed by the reader and by
15578 /// the types and declarations that are to be created later. Note
15579 /// that ABI artifacts that are to be compared all need to be created
15580 /// within the same environment.
15581 ///
15582 /// Please also note that the life time of this environment object
15583 /// must be greater than the life time of the resulting @ref
15584 /// read_context the context uses resources that are allocated in the
15585 /// environment.
15586 ///
15587 /// @param load_all_types if set to false only the types that are
15588 /// reachable from publicly exported declarations (of functions and
15589 /// variables) are read. If set to true then all types found in the
15590 /// debug information are loaded.
15591 ///
15592 /// @param linux_kernel_mode if set to true, then consider the special
15593 /// linux kernel symbol tables when determining if a symbol is
15594 /// exported or not.
15595 ///
15596 /// @return a smart pointer to the resulting dwarf_reader::read_context.
15597 read_context_sptr
create_read_context(const std::string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * environment,bool load_all_types,bool linux_kernel_mode)15598 create_read_context(const std::string& elf_path,
15599 const vector<char**>& debug_info_root_paths,
15600 ir::environment* environment,
15601 bool load_all_types,
15602 bool linux_kernel_mode)
15603 {
15604 // Create a DWARF Front End Library handle to be used by functions
15605 // of that library.
15606 read_context_sptr result(new read_context(elf_path, debug_info_root_paths,
15607 environment, load_all_types,
15608 linux_kernel_mode));
15609 return result;
15610 }
15611
15612 /// Getter for the path to the binary this @ref read_context is for.
15613 ///
15614 /// @return the path to the binary the @ref read_context is for.
15615 const string&
read_context_get_path(const read_context & ctxt)15616 read_context_get_path(const read_context& ctxt)
15617 {return ctxt.elf_path();}
15618
15619 /// Re-initialize a read_context so that it can re-used to read
15620 /// another binary.
15621 ///
15622 /// @param ctxt the context to re-initialize.
15623 ///
15624 /// @param elf_path the path to the elf file the context is to be used
15625 /// for.
15626 ///
15627 /// @param debug_info_root_path a pointer to the path to the root
15628 /// directory under which the debug info is to be found for @p
15629 /// elf_path. Leave this to NULL if the debug info is not in a split
15630 /// file.
15631 ///
15632 /// @param environment the environment used by the current context.
15633 /// This environment contains resources needed by the reader and by
15634 /// the types and declarations that are to be created later. Note
15635 /// that ABI artifacts that are to be compared all need to be created
15636 /// within the same environment.
15637 ///
15638 /// Please also note that the life time of this environment object
15639 /// must be greater than the life time of the resulting @ref
15640 /// read_context the context uses resources that are allocated in the
15641 /// environment.
15642 ///
15643 /// @param load_all_types if set to false only the types that are
15644 /// reachable from publicly exported declarations (of functions and
15645 /// variables) are read. If set to true then all types found in the
15646 /// debug information are loaded.
15647 ///
15648 /// @param linux_kernel_mode if set to true, then consider the special
15649 /// linux kernel symbol tables when determining if a symbol is
15650 /// exported or not.
15651 ///
15652 /// @return a smart pointer to the resulting dwarf_reader::read_context.
15653 void
reset_read_context(read_context_sptr & ctxt,const std::string & elf_path,const vector<char ** > & debug_info_root_path,ir::environment * environment,bool read_all_types,bool linux_kernel_mode)15654 reset_read_context(read_context_sptr &ctxt,
15655 const std::string& elf_path,
15656 const vector<char**>& debug_info_root_path,
15657 ir::environment* environment,
15658 bool read_all_types,
15659 bool linux_kernel_mode)
15660 {
15661 if (ctxt)
15662 ctxt->initialize(elf_path, debug_info_root_path, environment,
15663 read_all_types, linux_kernel_mode);
15664 }
15665
15666 /// Add suppressions specifications to the set of suppressions to be
15667 /// used during the construction of the ABI internal representation
15668 /// (the ABI corpus) from ELF and DWARF.
15669 ///
15670 /// During the construction of the ABI corpus, ABI artifacts that
15671 /// match the a given suppression specification are dropped on the
15672 /// floor; that is, they are discarded and won't be part of the final
15673 /// ABI corpus. This is a way to reduce the amount of data held by
15674 /// the final ABI corpus.
15675 ///
15676 /// Note that the suppression specifications provided to this function
15677 /// are only considered during the construction of the ABI corpus.
15678 /// For instance, they are not taken into account during e.g
15679 /// comparisons of two ABI corpora that might happen later. If you
15680 /// want to apply suppression specificatins to the comparison (or
15681 /// reporting) of ABI corpora please refer to the documentation of the
15682 /// @ref diff_context type to learn how to set suppressions that are
15683 /// to be used in that context.
15684 ///
15685 /// @param ctxt the context that is going to be used by functions that
15686 /// read ELF and DWARF information to construct and ABI corpus.
15687 ///
15688 /// @param supprs the suppression specifications to be applied during
15689 /// the construction of the ABI corpus.
15690 void
add_read_context_suppressions(read_context & ctxt,const suppr::suppressions_type & supprs)15691 add_read_context_suppressions(read_context& ctxt,
15692 const suppr::suppressions_type& supprs)
15693 {
15694 for (suppr::suppressions_type::const_iterator i = supprs.begin();
15695 i != supprs.end();
15696 ++i)
15697 if ((*i)->get_drops_artifact_from_ir())
15698 ctxt.get_suppressions().push_back(*i);
15699 }
15700
15701 /// Set the @ref corpus_group being created to the current read context.
15702 ///
15703 /// @param ctxt the read_context to consider.
15704 ///
15705 /// @param group the @ref corpus_group to set.
15706 void
set_read_context_corpus_group(read_context & ctxt,corpus_group_sptr & group)15707 set_read_context_corpus_group(read_context& ctxt,
15708 corpus_group_sptr& group)
15709 {
15710 ctxt.cur_corpus_group_ = group;
15711 }
15712
15713 /// Read all @ref abigail::translation_unit possible from the debug info
15714 /// accessible from an elf file, stuff them into a libabigail ABI
15715 /// Corpus and return it.
15716 ///
15717 /// @param ctxt the context to use for reading the elf file.
15718 ///
15719 /// @param resulting_corp a pointer to the resulting abigail::corpus.
15720 ///
15721 /// @return the resulting status.
15722 corpus_sptr
read_corpus_from_elf(read_context & ctxt,status & status)15723 read_corpus_from_elf(read_context& ctxt, status& status)
15724 {
15725 status = STATUS_UNKNOWN;
15726
15727 // Load debug info from the elf path.
15728 if (!ctxt.load_debug_info())
15729 status |= STATUS_DEBUG_INFO_NOT_FOUND;
15730
15731 {
15732 string alt_di_path;
15733 if (refers_to_alt_debug_info(ctxt, alt_di_path) && !ctxt.alt_dwarf())
15734 status |= STATUS_ALT_DEBUG_INFO_NOT_FOUND;
15735 }
15736
15737 ctxt.load_elf_properties(); // DT_SONAME, DT_NEEDED, architecture
15738
15739 if (!ctxt.symtab() || !ctxt.symtab()->has_symbols())
15740 status |= STATUS_NO_SYMBOLS_FOUND;
15741
15742 if (// If no elf symbol was found ...
15743 status & STATUS_NO_SYMBOLS_FOUND
15744 // ... or if debug info was found but not the required alternate
15745 // debug info ...
15746 || ((status & STATUS_ALT_DEBUG_INFO_NOT_FOUND)
15747 && !(status & STATUS_DEBUG_INFO_NOT_FOUND)))
15748 // ... then we cannot handle the binary.
15749 return corpus_sptr();
15750
15751 // Read the variable and function descriptions from the debug info
15752 // we have, through the dwfl handle.
15753 corpus_sptr corp = read_debug_info_into_corpus(ctxt);
15754
15755 status |= STATUS_OK;
15756
15757 return corp;
15758 }
15759
15760 /// Read a corpus and add it to a given @ref corpus_group.
15761 ///
15762 /// @param ctxt the reading context to consider.
15763 ///
15764 /// @param group the @ref corpus_group to add the new corpus to.
15765 ///
15766 /// @param status output parameter. The status of the read. It is set
15767 /// by this function upon its completion.
15768 corpus_sptr
read_and_add_corpus_to_group_from_elf(read_context & ctxt,corpus_group & group,status & status)15769 read_and_add_corpus_to_group_from_elf(read_context& ctxt,
15770 corpus_group& group,
15771 status& status)
15772 {
15773 corpus_sptr result;
15774 corpus_sptr corp = read_corpus_from_elf(ctxt, status);
15775 if (status & STATUS_OK)
15776 {
15777 if (!corp->get_group())
15778 group.add_corpus(corp);
15779 result = corp;
15780 }
15781
15782 return result;
15783 }
15784
15785 /// Read all @ref abigail::translation_unit possible from the debug info
15786 /// accessible from an elf file, stuff them into a libabigail ABI
15787 /// Corpus and return it.
15788 ///
15789 /// @param elf_path the path to the elf file.
15790 ///
15791 /// @param debug_info_root_paths a vector of pointers to root paths
15792 /// under which to look for the debug info of the elf files that are
15793 /// later handled by the Dwfl. This for cases where the debug info is
15794 /// split into a different file from the binary we want to inspect.
15795 /// On Red Hat compatible systems, this root path is usually
15796 /// /usr/lib/debug by default. If this argument is set to NULL, then
15797 /// "./debug" and /usr/lib/debug will be searched for sub-directories
15798 /// containing the debug info file.
15799 ///
15800 /// @param environment the environment used by the current context.
15801 /// This environment contains resources needed by the reader and by
15802 /// the types and declarations that are to be created later. Note
15803 /// that ABI artifacts that are to be compared all need to be created
15804 /// within the same environment. Also, the lifetime of the
15805 /// environment must be greater than the lifetime of the resulting
15806 /// corpus because the corpus uses resources that are allocated in the
15807 /// environment.
15808 ///
15809 /// @param load_all_types if set to false only the types that are
15810 /// reachable from publicly exported declarations (of functions and
15811 /// variables) are read. If set to true then all types found in the
15812 /// debug information are loaded.
15813 ///
15814 /// @param resulting_corp a pointer to the resulting abigail::corpus.
15815 ///
15816 /// @return the resulting status.
15817 corpus_sptr
read_corpus_from_elf(const std::string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * environment,bool load_all_types,status & status)15818 read_corpus_from_elf(const std::string& elf_path,
15819 const vector<char**>& debug_info_root_paths,
15820 ir::environment* environment,
15821 bool load_all_types,
15822 status& status)
15823 {
15824 read_context_sptr c = create_read_context(elf_path,
15825 debug_info_root_paths,
15826 environment,
15827 load_all_types);
15828 read_context& ctxt = *c;
15829 return read_corpus_from_elf(ctxt, status);
15830 }
15831
15832 /// Look into the symbol tables of a given elf file and see if we find
15833 /// a given symbol.
15834 ///
15835 /// @param env the environment we are operating from.
15836 ///
15837 /// @param elf_path the path to the elf file to consider.
15838 ///
15839 /// @param symbol_name the name of the symbol to look for.
15840 ///
15841 /// @param demangle if true, try to demangle the symbol name found in
15842 /// the symbol table.
15843 ///
15844 /// @param syms the vector of symbols found with the name @p symbol_name.
15845 ///
15846 /// @return true iff the symbol was found among the publicly exported
15847 /// symbols of the ELF file.
15848 bool
lookup_symbol_from_elf(const environment * env,const string & elf_path,const string & symbol_name,bool demangle,vector<elf_symbol_sptr> & syms)15849 lookup_symbol_from_elf(const environment* env,
15850 const string& elf_path,
15851 const string& symbol_name,
15852 bool demangle,
15853 vector<elf_symbol_sptr>& syms)
15854
15855 {
15856 if (elf_version(EV_CURRENT) == EV_NONE)
15857 return false;
15858
15859 int fd = open(elf_path.c_str(), O_RDONLY);
15860 if (fd < 0)
15861 return false;
15862
15863 struct stat s;
15864 if (fstat(fd, &s))
15865 return false;
15866
15867 Elf* elf = elf_begin(fd, ELF_C_READ, 0);
15868 if (elf == 0)
15869 return false;
15870
15871 bool value = lookup_symbol_from_elf(env, elf, symbol_name,
15872 demangle, syms);
15873 elf_end(elf);
15874 close(fd);
15875
15876 return value;
15877 }
15878
15879 /// Look into the symbol tables of an elf file to see if a public
15880 /// function of a given name is found.
15881 ///
15882 /// @param env the environment we are operating from.
15883 ///
15884 /// @param elf_path the path to the elf file to consider.
15885 ///
15886 /// @param symbol_name the name of the function to look for.
15887 ///
15888 /// @param syms the vector of public function symbols found with the
15889 /// name @p symname.
15890 ///
15891 /// @return true iff a function with symbol name @p symbol_name is
15892 /// found.
15893 bool
lookup_public_function_symbol_from_elf(const environment * env,const string & path,const string & symname,vector<elf_symbol_sptr> & syms)15894 lookup_public_function_symbol_from_elf(const environment* env,
15895 const string& path,
15896 const string& symname,
15897 vector<elf_symbol_sptr>& syms)
15898 {
15899 if (elf_version(EV_CURRENT) == EV_NONE)
15900 return false;
15901
15902 int fd = open(path.c_str(), O_RDONLY);
15903 if (fd < 0)
15904 return false;
15905
15906 struct stat s;
15907 if (fstat(fd, &s))
15908 return false;
15909
15910 Elf* elf = elf_begin(fd, ELF_C_READ, 0);
15911 if (elf == 0)
15912 return false;
15913
15914 bool value = lookup_public_function_symbol_from_elf(env, elf, symname, syms);
15915 elf_end(elf);
15916 close(fd);
15917
15918 return value;
15919 }
15920
15921 /// Check if the underlying elf file refers to an alternate debug info
15922 /// file associated to it.
15923 ///
15924 /// Note that "alternate debug info sections" is a GNU extension as
15925 /// of DWARF4 and is described at
15926 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
15927 ///
15928 /// @param ctxt the context used to read the elf file.
15929 ///
15930 /// @param alt_di the path to the alternate debug info file. This is
15931 /// set iff the function returns true.
15932 ///
15933 /// @return true if the ELF file refers to an alternate debug info
15934 /// file.
15935 bool
refers_to_alt_debug_info(const read_context & ctxt,string & alt_di_path)15936 refers_to_alt_debug_info(const read_context& ctxt,
15937 string& alt_di_path)
15938 {
15939 if (!ctxt.alt_debug_info_path().empty())
15940 {
15941 alt_di_path = ctxt.alt_debug_info_path();
15942 return true;
15943 }
15944 return false;
15945 }
15946
15947 /// Check if the underlying elf file has an alternate debug info file
15948 /// associated to it.
15949 ///
15950 /// Note that "alternate debug info sections" is a GNU extension as
15951 /// of DWARF4 and is described at
15952 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
15953 ///
15954 /// @param ctxt the read_context to use to handle the underlying elf file.
15955 ///
15956 /// @param has_alt_di out parameter. This is set to true upon
15957 /// succesful completion of the function iff an alternate debug info
15958 /// file was found, false otherwise. Note thas this parameter is set
15959 /// only if the function returns STATUS_OK.
15960 ///
15961 /// @param alt_debug_info_path if the function returned STATUS_OK and
15962 /// if @p has been set to true, then this parameter contains the path
15963 /// to the alternate debug info file found.
15964 ///
15965 /// return STATUS_OK upon successful completion, false otherwise.
15966 status
has_alt_debug_info(read_context & ctxt,bool & has_alt_di,string & alt_debug_info_path)15967 has_alt_debug_info(read_context& ctxt,
15968 bool& has_alt_di,
15969 string& alt_debug_info_path)
15970 {
15971 // Load debug info from the elf path.
15972 if (!ctxt.load_debug_info())
15973 return STATUS_DEBUG_INFO_NOT_FOUND;
15974
15975 if (ctxt.alt_dwarf())
15976 {
15977 has_alt_di = true;
15978 alt_debug_info_path = ctxt.alt_debug_info_path();
15979 }
15980 else
15981 has_alt_di = false;
15982
15983 return STATUS_OK;
15984 }
15985
15986 /// Check if a given elf file has an alternate debug info file
15987 /// associated to it.
15988 ///
15989 /// Note that "alternate debug info sections" is a GNU extension as
15990 /// of DWARF4 and is described at
15991 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
15992 ///
15993 /// @param elf_path the path to the elf file to consider.
15994 ///
15995 /// @param a pointer to the root directory under which the split debug info
15996 /// file associated to elf_path is to be found. This has to be NULL
15997 /// if the debug info file is not in a split file.
15998 ///
15999 /// @param has_alt_di out parameter. This is set to true upon
16000 /// succesful completion of the function iff an alternate debug info
16001 /// file was found, false otherwise. Note thas this parameter is set
16002 /// only if the function returns STATUS_OK.
16003 ///
16004 /// @param alt_debug_info_path if the function returned STATUS_OK and
16005 /// if @p has been set to true, then this parameter contains the path
16006 /// to the alternate debug info file found.
16007 ///
16008 /// return STATUS_OK upon successful completion, false otherwise.
16009 status
has_alt_debug_info(const string & elf_path,char ** debug_info_root_path,bool & has_alt_di,string & alt_debug_info_path)16010 has_alt_debug_info(const string& elf_path,
16011 char** debug_info_root_path,
16012 bool& has_alt_di,
16013 string& alt_debug_info_path)
16014 {
16015 vector<char**> di_roots;
16016 di_roots.push_back(debug_info_root_path);
16017 read_context_sptr c = create_read_context(elf_path, di_roots, 0);
16018 read_context& ctxt = *c;
16019
16020 // Load debug info from the elf path.
16021 if (!ctxt.load_debug_info())
16022 return STATUS_DEBUG_INFO_NOT_FOUND;
16023
16024 if (ctxt.alt_dwarf())
16025 {
16026 has_alt_di = true;
16027 alt_debug_info_path = ctxt.alt_debug_info_path();
16028 }
16029 else
16030 has_alt_di = false;
16031
16032 return STATUS_OK;
16033 }
16034
16035 /// Fetch the SONAME ELF property from an ELF binary file.
16036 ///
16037 /// @param path The path to the elf file to consider.
16038 ///
16039 /// @param soname out parameter. Set to the SONAME property of the
16040 /// binary file, if it present in the ELF file.
16041 ///
16042 /// return false if an error occured while looking for the SONAME
16043 /// property in the binary, true otherwise.
16044 bool
get_soname_of_elf_file(const string & path,string & soname)16045 get_soname_of_elf_file(const string& path, string &soname)
16046 {
16047
16048 int fd = open(path.c_str(), O_RDONLY);
16049 if (fd == -1)
16050 return false;
16051
16052 elf_version (EV_CURRENT);
16053 Elf* elf = elf_begin (fd, ELF_C_READ_MMAP, NULL);
16054
16055 GElf_Ehdr ehdr_mem;
16056 GElf_Ehdr* ehdr = gelf_getehdr (elf, &ehdr_mem);
16057 if (ehdr == NULL)
16058 return false;
16059
16060 for (int i = 0; i < ehdr->e_phnum; ++i)
16061 {
16062 GElf_Phdr phdr_mem;
16063 GElf_Phdr* phdr = gelf_getphdr (elf, i, &phdr_mem);
16064
16065 if (phdr != NULL && phdr->p_type == PT_DYNAMIC)
16066 {
16067 Elf_Scn* scn = gelf_offscn (elf, phdr->p_offset);
16068 GElf_Shdr shdr_mem;
16069 GElf_Shdr* shdr = gelf_getshdr (scn, &shdr_mem);
16070 int maxcnt = (shdr != NULL
16071 ? shdr->sh_size / shdr->sh_entsize : INT_MAX);
16072 ABG_ASSERT (shdr == NULL || shdr->sh_type == SHT_DYNAMIC);
16073 Elf_Data* data = elf_getdata (scn, NULL);
16074 if (data == NULL)
16075 break;
16076
16077 for (int cnt = 0; cnt < maxcnt; ++cnt)
16078 {
16079 GElf_Dyn dynmem;
16080 GElf_Dyn* dyn = gelf_getdyn (data, cnt, &dynmem);
16081 if (dyn == NULL)
16082 continue;
16083
16084 if (dyn->d_tag == DT_NULL)
16085 break;
16086
16087 if (dyn->d_tag != DT_SONAME)
16088 continue;
16089
16090 soname = elf_strptr (elf, shdr->sh_link, dyn->d_un.d_val);
16091 break;
16092 }
16093 break;
16094 }
16095 }
16096
16097 elf_end(elf);
16098 close(fd);
16099
16100 return true;
16101 }
16102
16103 /// Get the type of a given elf type.
16104 ///
16105 /// @param path the absolute path to the ELF file to analyzed.
16106 ///
16107 /// @param type the kind of the ELF file designated by @p path.
16108 ///
16109 /// @param out parameter. Is set to the type of ELF file of @p path.
16110 /// This parameter is set iff the function returns true.
16111 ///
16112 /// @return true iff the file could be opened and analyzed.
16113 bool
get_type_of_elf_file(const string & path,elf_type & type)16114 get_type_of_elf_file(const string& path, elf_type& type)
16115 {
16116 int fd = open(path.c_str(), O_RDONLY);
16117 if (fd == -1)
16118 return false;
16119
16120 elf_version (EV_CURRENT);
16121 Elf *elf = elf_begin (fd, ELF_C_READ_MMAP, NULL);
16122 type = elf_file_type(elf);
16123 elf_end(elf);
16124 close(fd);
16125
16126 return true;
16127 }
16128
16129 }// end namespace dwarf_reader
16130
16131 }// end namespace abigail
16132