• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2021 Oracle, Inc.
5 //
6 // Author: Jose E. Marchesi
7 
8 /// @file
9 ///
10 /// This file contains the definitions of the entry points to
11 /// de-serialize an instance of @ref abigail::corpus from a file in
12 /// ELF format, containing CTF information.
13 
14 #include "config.h"
15 
16 #include <fcntl.h> /* For open(3) */
17 #include <iostream>
18 #include <memory>
19 #include <map>
20 #include <algorithm>
21 
22 #include "ctf-api.h"
23 
24 #include "abg-internal.h"
25 #include "abg-ir-priv.h"
26 #include "abg-elf-helpers.h"
27 
28 // <headers defining libabigail's API go under here>
29 ABG_BEGIN_EXPORT_DECLARATIONS
30 
31 #include "abg-ctf-reader.h"
32 #include "abg-libxml-utils.h"
33 #include "abg-reader.h"
34 #include "abg-corpus.h"
35 #include "abg-symtab-reader.h"
36 #include "abg-tools-utils.h"
37 
38 ABG_END_EXPORT_DECLARATIONS
39 // </headers defining libabigail's API>
40 
41 namespace abigail
42 {
43 namespace ctf_reader
44 {
45 using std::dynamic_pointer_cast;
46 
47 class read_context
48 {
49 public:
50   /// The name of the ELF file from which the CTF archive got
51   /// extracted.
52   string filename;
53 
54   /// The IR environment.
55   ir::environment *ir_env;
56 
57   /// The CTF archive read from FILENAME.  If an archive couldn't
58   /// be read from the file then this is NULL.
59   ctf_archive_t *ctfa;
60 
61   /// A map associating CTF type ids with libabigail IR types.  This
62   /// is used to reuse already generated types.
63   unordered_map<string,type_base_sptr> types_map;
64 
65   /// A set associating unknown CTF type ids
66   std::set<ctf_id_t> unknown_types_set;
67 
68   /// libelf handler for the ELF file from which we read the CTF data,
69   /// and the corresponding file descriptor.
70   Elf *elf_handler;
71   int elf_fd;
72 
73   /// libelf handler for the ELF file from which we read the CTF data,
74   /// and the corresponding file descriptor found in external .debug file
75   Elf *elf_handler_dbg;
76   int elf_fd_dbg;
77 
78   /// The symtab read from the ELF file.
79   symtab_reader::symtab_sptr symtab;
80 
81   /// Raw contents of several sections from the ELF file.  These are
82   /// used by libctf.
83   ctf_sect_t ctf_sect;
84   ctf_sect_t symtab_sect;
85   ctf_sect_t strtab_sect;
86 
87   corpus_sptr			cur_corpus_;
88   corpus_group_sptr		cur_corpus_group_;
89   corpus::exported_decls_builder* exported_decls_builder_;
90   // The set of directories under which to look for debug info.
91   vector<char**>		debug_info_root_paths_;
92 
93   /// Setter of the exported decls builder object.
94   ///
95   /// Note that this @ref read_context is not responsible for the live
96   /// time of the exported_decls_builder object.  The corpus is.
97   ///
98   /// @param b the new builder.
99   void
exported_decls_builder(corpus::exported_decls_builder * b)100   exported_decls_builder(corpus::exported_decls_builder* b)
101   {exported_decls_builder_ = b;}
102 
103   /// Getter of the exported decls builder object.
104   ///
105   /// @return the exported decls builder.
106   corpus::exported_decls_builder*
exported_decls_builder()107   exported_decls_builder()
108   {return exported_decls_builder_;}
109 
110   /// If a given function decl is suitable for the set of exported
111   /// functions of the current corpus, this function adds it to that
112   /// set.
113   ///
114   /// @param fn the function to consider for inclusion into the set of
115   /// exported functions of the current corpus.
116   void
maybe_add_fn_to_exported_decls(function_decl * fn)117   maybe_add_fn_to_exported_decls(function_decl* fn)
118   {
119     if (fn)
120       if (corpus::exported_decls_builder* b = exported_decls_builder())
121 	b->maybe_add_fn_to_exported_fns(fn);
122   }
123 
124   /// If a given variable decl is suitable for the set of exported
125   /// variables of the current corpus, this variable adds it to that
126   /// set.
127   ///
128   /// @param fn the variable to consider for inclusion into the set of
129   /// exported variables of the current corpus.
130   void
maybe_add_var_to_exported_decls(var_decl * var)131   maybe_add_var_to_exported_decls(var_decl* var)
132   {
133     if (var)
134       if (corpus::exported_decls_builder* b = exported_decls_builder())
135 	b->maybe_add_var_to_exported_vars(var);
136   }
137 
138   /// Getter of the current corpus group being constructed.
139   ///
140   /// @return current the current corpus being constructed, if any, or
141   /// nil.
142   const corpus_group_sptr
current_corpus_group() const143   current_corpus_group() const
144   {return cur_corpus_group_;}
145 
146   /// Test if there is a corpus group being built.
147   ///
148   /// @return if there is a corpus group being built, false otherwise.
149   bool
has_corpus_group() const150   has_corpus_group() const
151   {return bool(cur_corpus_group_);}
152 
153   /// Return the main corpus from the current corpus group, if any.
154   ///
155   /// @return the main corpus of the current corpus group, if any, nil
156   /// if no corpus group is being constructed.
157   corpus_sptr
main_corpus_from_current_group()158   main_corpus_from_current_group()
159   {
160     if (cur_corpus_group_)
161       return cur_corpus_group_->get_main_corpus();
162     return corpus_sptr();
163   }
164 
165   /// Test if the current corpus being built is the main corpus of the
166   /// current corpus group.
167   ///
168   /// @return return true iff the current corpus being built is the
169   /// main corpus of the current corpus group.
170   bool
current_corpus_is_main_corpus_from_current_group()171   current_corpus_is_main_corpus_from_current_group()
172   {
173     corpus_sptr main_corpus = main_corpus_from_current_group();
174 
175     if (main_corpus && main_corpus.get() == cur_corpus_.get())
176       return true;
177 
178     return false;
179   }
180 
181   /// Return true if the current corpus is part of a corpus group
182   /// being built and if it's not the main corpus of the group.
183   ///
184   /// For instance, this would return true if we are loading a linux
185   /// kernel *module* that is part of the current corpus group that is
186   /// being built.  In this case, it means we should re-use types
187   /// coming from the "vmlinux" binary that is the main corpus of the
188   /// group.
189   ///
190   /// @return the corpus group the current corpus belongs to, if the
191   /// current corpus is part of a corpus group being built. Nil otherwise.
192   corpus_sptr
should_reuse_type_from_corpus_group()193   should_reuse_type_from_corpus_group()
194   {
195     if (has_corpus_group())
196       if (corpus_sptr main_corpus = main_corpus_from_current_group())
197 	if (!current_corpus_is_main_corpus_from_current_group())
198 	  return current_corpus_group();
199 
200     return corpus_sptr();
201   }
202 
203   /// Associate a given CTF type ID with a given libabigail IR type.
204   ///
205   /// @param dic the dictionnary the type belongs to.
206   ///
207   /// @param ctf_type the type ID.
208   ///
209   /// @param type the type to associate to the ID.
210   void
add_type(ctf_dict_t * dic,ctf_id_t ctf_type,type_base_sptr type)211   add_type(ctf_dict_t *dic, ctf_id_t ctf_type, type_base_sptr type)
212   {
213     string key = dic_type_key(dic, ctf_type);
214     types_map.insert(std::make_pair(key, type));
215   }
216 
217   /// Insert a given CTF unknown type ID.
218   ///
219   /// @param ctf_type the unknown type ID to be added.
220   void
add_unknown_type(ctf_id_t ctf_type)221   add_unknown_type(ctf_id_t ctf_type)
222   {
223     unknown_types_set.insert(ctf_type);
224   }
225 
226   /// Lookup a given CTF type ID in the types map.
227   ///
228   /// @param dic the dictionnary the type belongs to.
229   ///
230   /// @param ctf_type the type ID of the type to lookup.
231   type_base_sptr
lookup_type(ctf_dict_t * dic,ctf_id_t ctf_type)232   lookup_type(ctf_dict_t *dic, ctf_id_t ctf_type)
233   {
234     type_base_sptr result;
235     std::string key = dic_type_key(dic, ctf_type);
236 
237     auto search = types_map.find(key);
238     if (search != types_map.end())
239       result = search->second;
240 
241     return result;
242   }
243 
244   /// Lookup a given CTF unknown type ID in the unknown set.
245   /// @param ctf_type the unknown type ID to lookup.
246   bool
lookup_unknown_type(ctf_id_t ctf_type)247   lookup_unknown_type(ctf_id_t ctf_type)
248   { return unknown_types_set.find(ctf_type) != unknown_types_set.end(); }
249 
250   /// Canonicalize all the types stored in the types map.
251   void
canonicalize_all_types(void)252   canonicalize_all_types(void)
253   {
254     for (auto t = types_map.begin(); t != types_map.end(); t++)
255       canonicalize (t->second);
256   }
257 
258   /// Constructor.
259   ///
260   /// @param elf_path the path to the ELF file.
261   ///
262   /// @param debug_info_root_paths vector with the paths
263   /// to directories where .debug file is located.
264   ///
265   /// @param env the environment used by the current context.
266   /// This environment contains resources needed by the reader and by
267   /// the types and declarations that are to be created later.  Note
268   /// that ABI artifacts that are to be compared all need to be
269   /// created within the same environment.
read_context(const string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * env)270   read_context(const string& elf_path,
271                const vector<char**>& debug_info_root_paths,
272                ir::environment *env) :
273    ctfa(NULL)
274   {
275     initialize(elf_path, debug_info_root_paths, env);
276   }
277 
278   /// Initializer of read_context.
279   ///
280   /// @param elf_path the path to the elf file the context is to be
281   /// used for.
282   ///
283   /// @param debug_info_root_paths vector with the paths
284   /// to directories where .debug file is located.
285   ///
286   /// @param environment the environment used by the current context.
287   /// This environment contains resources needed by the reader and by
288   /// the types and declarations that are to be created later.  Note
289   /// that ABI artifacts that are to be compared all need to be
290   /// created within the same environment.
291   ///
292   /// Please also note that the life time of this environment object
293   /// must be greater than the life time of the resulting @ref
294   /// read_context the context uses resources that are allocated in
295   /// the environment.
296   void
initialize(const string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * env)297   initialize(const string& elf_path,
298              const vector<char**>& debug_info_root_paths,
299              ir::environment *env)
300   {
301     types_map.clear();
302     filename = elf_path;
303     ir_env = env;
304     elf_handler = NULL;
305     elf_handler_dbg = NULL;
306     elf_fd = -1;
307     elf_fd_dbg = -1;
308     symtab.reset();
309     cur_corpus_group_.reset();
310     exported_decls_builder_ = 0;
311     debug_info_root_paths_ = debug_info_root_paths;
312   }
313 
~read_context()314   ~read_context()
315   {
316     ctf_close(ctfa);
317   }
318 }; // end class read_context.
319 
320 /// Forward reference, needed because several of the process_ctf_*
321 /// functions below are indirectly recursive through this call.
322 static type_base_sptr lookup_type(read_context *ctxt,
323                                   corpus_sptr corp,
324                                   translation_unit_sptr tunit,
325                                   ctf_dict_t *ctf_dictionary,
326                                   ctf_id_t ctf_type);
327 
328 /// Build and return a typedef libabigail IR.
329 ///
330 /// @param ctxt the read context.
331 /// @param corp the libabigail IR corpus being constructed.
332 /// @param tunit the current IR translation unit.
333 /// @param ctf_dictionary the CTF dictionary being read.
334 /// @param ctf_type the CTF type ID of the source type.
335 ///
336 /// @return a shared pointer to the IR node for the typedef.
337 
338 static typedef_decl_sptr
process_ctf_typedef(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)339 process_ctf_typedef(read_context *ctxt,
340                     corpus_sptr corp,
341                     translation_unit_sptr tunit,
342                     ctf_dict_t *ctf_dictionary,
343                     ctf_id_t ctf_type)
344 {
345   typedef_decl_sptr result;
346 
347   ctf_id_t ctf_utype = ctf_type_reference(ctf_dictionary, ctf_type);
348   if (ctf_utype == CTF_ERR)
349     return result;
350 
351   const char *typedef_name = ctf_type_name_raw(ctf_dictionary, ctf_type);
352   if (corpus_sptr corp = ctxt->should_reuse_type_from_corpus_group())
353     if (result = lookup_typedef_type(typedef_name, *corp))
354       return result;
355 
356   type_base_sptr utype = lookup_type(ctxt, corp, tunit,
357                                      ctf_dictionary, ctf_utype);
358 
359   if (!utype)
360     return result;
361 
362   result = dynamic_pointer_cast<typedef_decl>(ctxt->lookup_type(ctf_dictionary,
363                                                                 ctf_type));
364   if (result)
365     return result;
366 
367   result.reset(new typedef_decl(typedef_name, utype, location(),
368                                 typedef_name /* mangled_name */));
369 
370   /* If this typedef "names" an anonymous type, reflect this fact in
371      the underlying type.  In C enum, struct and union types can be
372      anonymous.  */
373   if (is_anonymous_type(utype)
374       && (is_enum_type(utype) || is_class_or_union_type(utype)))
375     {
376       decl_base_sptr decl = is_decl(utype);
377       ABG_ASSERT(decl);
378       decl->set_naming_typedef(result);
379     }
380 
381   if (result)
382     {
383       add_decl_to_scope(result, tunit->get_global_scope());
384       ctxt->add_type(ctf_dictionary, ctf_type, result);
385     }
386 
387   return result;
388 }
389 
390 /// Build and return an integer or float type declaration libabigail
391 /// IR.
392 ///
393 /// @param ctxt the read context.
394 /// @param corp the libabigail IR corpus being constructed.
395 /// @param ctf_dictionary the CTF dictionary being read.
396 /// @param ctf_type the CTF type ID of the source type.
397 ///
398 /// @return a shared pointer to the IR node for the type.
399 
400 static type_decl_sptr
process_ctf_base_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)401 process_ctf_base_type(read_context *ctxt,
402                       corpus_sptr corp,
403                       translation_unit_sptr tunit,
404                       ctf_dict_t *ctf_dictionary,
405                       ctf_id_t ctf_type)
406 {
407   type_decl_sptr result;
408 
409   ssize_t type_alignment = ctf_type_align(ctf_dictionary, ctf_type);
410   const char *type_name = ctf_type_name_raw(ctf_dictionary, ctf_type);
411 
412   /* Get the type encoding and extract some useful properties of
413      the type from it.  In case of any error, just ignore the
414      type.  */
415   ctf_encoding_t type_encoding;
416   if (ctf_type_encoding(ctf_dictionary,
417                          ctf_type,
418                          &type_encoding))
419     return result;
420 
421   /* Create the IR type corresponding to the CTF type.  */
422   if (type_encoding.cte_bits == 0
423       && type_encoding.cte_format == CTF_INT_SIGNED)
424     {
425       /* This is the `void' type.  */
426       type_base_sptr void_type = ctxt->ir_env->get_void_type();
427       decl_base_sptr type_declaration = get_type_declaration(void_type);
428       result = is_type_decl(type_declaration);
429       canonicalize(result);
430     }
431   else
432     {
433       if (corpus_sptr corp = ctxt->should_reuse_type_from_corpus_group())
434         {
435           string normalized_type_name = type_name;
436           integral_type int_type;
437           if (parse_integral_type(type_name, int_type))
438             normalized_type_name = int_type.to_string();
439           if (result = lookup_basic_type(normalized_type_name, *corp))
440             return result;
441         }
442 
443       result = lookup_basic_type(type_name, *corp);
444       if (!result)
445         result.reset(new type_decl(ctxt->ir_env,
446                                    type_name,
447                                    type_encoding.cte_bits,
448                                    type_alignment * 8 /* in bits */,
449                                    location(),
450                                    type_name /* mangled_name */));
451 
452     }
453 
454   if (result)
455     {
456       add_decl_to_scope(result, tunit->get_global_scope());
457       ctxt->add_type(ctf_dictionary, ctf_type, result);
458     }
459 
460   return result;
461 }
462 
463 /// Build the IR node for a variadic parameter type.
464 ///
465 /// @param ctxt the read context to use.
466 ///
467 /// @return the variadic parameter type.
468 static decl_base_sptr
build_ir_node_for_variadic_parameter_type(read_context & ctxt,translation_unit_sptr tunit)469 build_ir_node_for_variadic_parameter_type(read_context &ctxt,
470                                           translation_unit_sptr tunit)
471 {
472 
473   ir::environment* env = ctxt.ir_env;
474   ABG_ASSERT(env);
475   type_base_sptr t = env->get_variadic_parameter_type();
476   decl_base_sptr type_declaration = get_type_declaration(t);
477   if (!has_scope(type_declaration))
478     add_decl_to_scope(type_declaration, tunit->get_global_scope());
479   canonicalize(t);
480   return type_declaration;
481 }
482 
483 /// Build and return a function type libabigail IR.
484 ///
485 /// @param ctxt the read context.
486 /// @param corp the libabigail IR corpus being constructed.
487 /// @param tunit the current IR translation unit.
488 /// @param ctf_dictionary the CTF dictionary being read.
489 /// @param ctf_type the CTF type ID of the source type.
490 ///
491 /// @return a shared pointer to the IR node for the function type.
492 
493 static function_type_sptr
process_ctf_function_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)494 process_ctf_function_type(read_context *ctxt,
495                           corpus_sptr corp,
496                           translation_unit_sptr tunit,
497                           ctf_dict_t *ctf_dictionary,
498                           ctf_id_t ctf_type)
499 {
500   function_type_sptr result;
501 
502   /* Fetch the function type info from the CTF type.  */
503   ctf_funcinfo_t funcinfo;
504   ctf_func_type_info(ctf_dictionary, ctf_type, &funcinfo);
505   int vararg_p = funcinfo.ctc_flags & CTF_FUNC_VARARG;
506 
507   /* Take care first of the result type.  */
508   ctf_id_t ctf_ret_type = funcinfo.ctc_return;
509   type_base_sptr ret_type = lookup_type(ctxt, corp, tunit,
510                                         ctf_dictionary, ctf_ret_type);
511   if (!ret_type)
512     return result;
513 
514   /* Now process the argument types.  */
515   int argc = funcinfo.ctc_argc;
516   std::vector<ctf_id_t> argv(argc);
517   if (static_cast<ctf_id_t>(ctf_func_type_args(ctf_dictionary, ctf_type,
518 					       argc, argv.data())) == CTF_ERR)
519     return result;
520 
521   function_decl::parameters function_parms;
522   for (int i = 0; i < argc; i++)
523     {
524       ctf_id_t ctf_arg_type = argv[i];
525       type_base_sptr arg_type = lookup_type(ctxt, corp, tunit,
526                                             ctf_dictionary, ctf_arg_type);
527       if (!arg_type)
528         return result;
529 
530       function_decl::parameter_sptr parm
531         (new function_decl::parameter(arg_type, "",
532                                       location(),
533                                       false,
534                                       false /* is_artificial */));
535       function_parms.push_back(parm);
536     }
537 
538   if (vararg_p)
539     {
540       type_base_sptr arg_type =
541        is_type(build_ir_node_for_variadic_parameter_type(*ctxt, tunit));
542 
543       function_decl::parameter_sptr parm
544        (new function_decl::parameter(arg_type, "",
545                                      location(),
546                                      true,
547                                      false /* is_artificial */));
548       function_parms.push_back(parm);
549     }
550 
551   result = dynamic_pointer_cast<function_type>(ctxt->lookup_type(ctf_dictionary,
552                                                                  ctf_type));
553   if (result)
554     return result;
555 
556   /* Ok now the function type itself.  */
557   result.reset(new function_type(ret_type,
558                                  function_parms,
559                                  tunit->get_address_size(),
560                                  ctf_type_align(ctf_dictionary, ctf_type)));
561 
562   if (result)
563     {
564       tunit->bind_function_type_life_time(result);
565       result->set_is_artificial(true);
566       decl_base_sptr function_type_decl = get_type_declaration(result);
567       add_decl_to_scope(function_type_decl, tunit->get_global_scope());
568       ctxt->add_type(ctf_dictionary, ctf_type, result);
569     }
570 
571   return result;
572 }
573 
574 /// Add member information to a IR struct or union type.
575 ///
576 /// @param ctxt the read context.
577 /// @param corp the libabigail IR corpus being constructed.
578 /// @param tunit the current IR translation unit.
579 /// @param ctf_dictionary the CTF dictionary being read.
580 /// @param ctf_type the CTF type ID of the source type.
581 /// @param sou the IR struct or union type to which add the members.
582 
583 static void
process_ctf_sou_members(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type,class_or_union_sptr sou)584 process_ctf_sou_members(read_context *ctxt,
585                         corpus_sptr corp,
586                         translation_unit_sptr tunit,
587                         ctf_dict_t *ctf_dictionary,
588                         ctf_id_t ctf_type,
589                         class_or_union_sptr sou)
590 {
591   ssize_t member_size;
592   ctf_next_t *member_next = NULL;
593   const char *member_name = NULL;
594   ctf_id_t member_ctf_type;
595 
596   while ((member_size = ctf_member_next(ctf_dictionary, ctf_type,
597                                         &member_next, &member_name,
598                                         &member_ctf_type,
599                                         0 /* flags */)) >= 0)
600     {
601       ctf_membinfo_t membinfo;
602 
603       if (static_cast<ctf_id_t>(ctf_member_info(ctf_dictionary,
604 						ctf_type,
605 						member_name,
606 						&membinfo)) == CTF_ERR)
607         return;
608 
609       /* Build the IR for the member's type.  */
610       type_base_sptr member_type = lookup_type(ctxt, corp, tunit,
611                                                ctf_dictionary,
612                                                member_ctf_type);
613       if (!member_type)
614         /* Ignore this member.  */
615         continue;
616 
617       /* Create a declaration IR node for the member and add it to the
618          struct type.  */
619       var_decl_sptr data_member_decl(new var_decl(member_name,
620                                                   member_type,
621                                                   location(),
622                                                   member_name));
623       sou->add_data_member(data_member_decl,
624                            public_access,
625                            true /* is_laid_out */,
626                            false /* is_static */,
627                            membinfo.ctm_offset);
628     }
629   if (ctf_errno(ctf_dictionary) != ECTF_NEXT_END)
630     fprintf(stderr, "ERROR from ctf_member_next\n");
631 }
632 
633 /// Create a declaration-only union or struct type and add it to the
634 /// IR.
635 ///
636 /// @param ctxt the read context.
637 /// @param tunit the current IR translation unit.
638 /// @param ctf_dictionary the CTF dictionary being read.
639 /// @param ctf_type the CTF type ID of the source type.
640 /// @return the resulting IR node created.
641 
642 static type_base_sptr
process_ctf_forward_type(read_context * ctxt,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)643 process_ctf_forward_type(read_context *ctxt,
644                          translation_unit_sptr tunit,
645                          ctf_dict_t *ctf_dictionary,
646                          ctf_id_t ctf_type)
647 {
648   decl_base_sptr result;
649   std::string type_name = ctf_type_name_raw(ctf_dictionary,
650                                             ctf_type);
651   bool type_is_anonymous = (type_name == "");
652   uint32_t kind = ctf_type_kind_forwarded (ctf_dictionary, ctf_type);
653 
654   if (kind == CTF_K_UNION)
655     {
656       union_decl_sptr
657        union_fwd(new union_decl(ctxt->ir_env,
658                                 type_name,
659                                 /*alignment=*/0,
660                                 location(),
661                                 decl_base::VISIBILITY_DEFAULT,
662                                 type_is_anonymous));
663       union_fwd->set_is_declaration_only(true);
664       result = union_fwd;
665     }
666   else
667     {
668       if (!type_is_anonymous)
669         if (corpus_sptr corp = ctxt->should_reuse_type_from_corpus_group())
670           if (result = lookup_class_type(type_name, *corp))
671             return is_type(result);
672 
673       class_decl_sptr
674        struct_fwd(new class_decl(ctxt->ir_env, type_name,
675                                  /*alignment=*/0, /*size=*/0,
676                                  true /* is_struct */,
677                                  location(),
678                                  decl_base::VISIBILITY_DEFAULT,
679                                  type_is_anonymous));
680       struct_fwd->set_is_declaration_only(true);
681       result = struct_fwd;
682     }
683 
684   if (!result)
685     return is_type(result);
686 
687   add_decl_to_scope(result, tunit->get_global_scope());
688   ctxt->add_type(ctf_dictionary, ctf_type, is_type(result));
689 
690   return is_type(result);
691 }
692 
693 /// Build and return a struct type libabigail IR.
694 ///
695 /// @param ctxt the read context.
696 /// @param corp the libabigail IR corpus being constructed.
697 /// @param tunit the current IR translation unit.
698 /// @param ctf_dictionary the CTF dictionary being read.
699 /// @param ctf_type the CTF type ID of the source type.
700 ///
701 /// @return a shared pointer to the IR node for the struct type.
702 
703 static class_decl_sptr
process_ctf_struct_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)704 process_ctf_struct_type(read_context *ctxt,
705                         corpus_sptr corp,
706                         translation_unit_sptr tunit,
707                         ctf_dict_t *ctf_dictionary,
708                         ctf_id_t ctf_type)
709 {
710   class_decl_sptr result;
711   std::string struct_type_name = ctf_type_name_raw(ctf_dictionary,
712                                                    ctf_type);
713   bool struct_type_is_anonymous = (struct_type_name == "");
714 
715   if (!struct_type_is_anonymous)
716     if (corpus_sptr corp = ctxt->should_reuse_type_from_corpus_group())
717       if (result = lookup_class_type(struct_type_name, *corp))
718         return result;
719 
720   /* The libabigail IR encodes C struct types in `class' IR nodes.  */
721   result.reset(new class_decl(ctxt->ir_env,
722                               struct_type_name,
723                               ctf_type_size(ctf_dictionary, ctf_type) * 8,
724                               ctf_type_align(ctf_dictionary, ctf_type) * 8,
725                               true /* is_struct */,
726                               location(),
727                               decl_base::VISIBILITY_DEFAULT,
728                               struct_type_is_anonymous));
729   if (!result)
730     return result;
731 
732   /* The C type system indirectly supports loops by the mean of
733      pointers to structs or unions.  Since some contained type can
734      refer to this struct, we have to make it available in the cache
735      at this point even if the members haven't been added to the IR
736      node yet.  */
737   add_decl_to_scope(result, tunit->get_global_scope());
738   ctxt->add_type(ctf_dictionary, ctf_type, result);
739 
740   /* Now add the struct members as specified in the CTF type description.
741      This is C, so named types can only be defined in the global
742      scope.  */
743   process_ctf_sou_members(ctxt, corp, tunit, ctf_dictionary, ctf_type,
744                           result);
745 
746   return result;
747 }
748 
749 /// Build and return an union type libabigail IR.
750 ///
751 /// @param ctxt the read context.
752 /// @param corp the libabigail IR corpus being constructed.
753 /// @param tunit the current IR translation unit.
754 /// @param ctf_dictionary the CTF dictionary being read.
755 /// @param ctf_type the CTF type ID of the source type.
756 ///
757 /// @return a shared pointer to the IR node for the union type.
758 
759 static union_decl_sptr
process_ctf_union_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)760 process_ctf_union_type(read_context *ctxt,
761                        corpus_sptr corp,
762                        translation_unit_sptr tunit,
763                        ctf_dict_t *ctf_dictionary,
764                        ctf_id_t ctf_type)
765 {
766   union_decl_sptr result;
767   std::string union_type_name = ctf_type_name_raw(ctf_dictionary,
768                                                    ctf_type);
769   bool union_type_is_anonymous = (union_type_name == "");
770 
771   if (!union_type_is_anonymous)
772     if (corpus_sptr corp = ctxt->should_reuse_type_from_corpus_group())
773       if (result = lookup_union_type(union_type_name, *corp))
774         return result;
775 
776   /* Create the corresponding libabigail union IR node.  */
777   result.reset(new union_decl(ctxt->ir_env,
778                                 union_type_name,
779                                 ctf_type_size(ctf_dictionary, ctf_type) * 8,
780                                 location(),
781                                 decl_base::VISIBILITY_DEFAULT,
782                                 union_type_is_anonymous));
783   if (!result)
784     return result;
785 
786   /* The C type system indirectly supports loops by the mean of
787      pointers to structs or unions.  Since some contained type can
788      refer to this union, we have to make it available in the cache
789      at this point even if the members haven't been added to the IR
790      node yet.  */
791   add_decl_to_scope(result, tunit->get_global_scope());
792   ctxt->add_type(ctf_dictionary, ctf_type, result);
793 
794   /* Now add the union members as specified in the CTF type description.
795      This is C, so named types can only be defined in the global
796      scope.  */
797   process_ctf_sou_members(ctxt, corp, tunit, ctf_dictionary, ctf_type,
798                           result);
799 
800   return result;
801 }
802 
803 /// Build and return an array type libabigail IR.
804 ///
805 /// @param ctxt the read context.
806 /// @param corp the libabigail IR corpus being constructed.
807 /// @param tunit the current IR translation unit.
808 /// @param ctf_dictionary the CTF dictionary being read.
809 /// @param ctf_type the CTF type ID of the source type.
810 ///
811 /// @return a shared pointer to the IR node for the array type.
812 
813 static array_type_def_sptr
process_ctf_array_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)814 process_ctf_array_type(read_context *ctxt,
815                        corpus_sptr corp,
816                        translation_unit_sptr tunit,
817                        ctf_dict_t *ctf_dictionary,
818                        ctf_id_t ctf_type)
819 {
820   array_type_def_sptr result;
821   ctf_arinfo_t ctf_ainfo;
822   bool is_infinite = false;
823 
824   /* First, get the information about the CTF array.  */
825   if (static_cast<ctf_id_t>(ctf_array_info(ctf_dictionary,
826 					   ctf_type,
827 					   &ctf_ainfo)) == CTF_ERR)
828     return result;
829 
830   ctf_id_t ctf_element_type = ctf_ainfo.ctr_contents;
831   ctf_id_t ctf_index_type = ctf_ainfo.ctr_index;
832   uint64_t nelems = ctf_ainfo.ctr_nelems;
833 
834   /* Make sure the element type is generated.  */
835   type_base_sptr element_type = lookup_type(ctxt, corp, tunit,
836                                             ctf_dictionary,
837                                             ctf_element_type);
838   if (!element_type)
839     return result;
840 
841   /* Ditto for the index type.  */
842   type_base_sptr index_type = lookup_type(ctxt, corp, tunit,
843                                           ctf_dictionary,
844                                           ctf_index_type);
845   if (!index_type)
846     return result;
847 
848   result = dynamic_pointer_cast<array_type_def>(ctxt->lookup_type(ctf_dictionary,
849                                                                   ctf_type));
850   if (result)
851     return result;
852 
853   /* The number of elements of the array determines the IR subranges
854      type to build.  */
855   array_type_def::subranges_type subranges;
856   array_type_def::subrange_sptr subrange;
857   array_type_def::subrange_type::bound_value lower_bound;
858   array_type_def::subrange_type::bound_value upper_bound;
859 
860   lower_bound.set_unsigned(0); /* CTF supports C only.  */
861   upper_bound.set_unsigned(nelems > 0 ? nelems - 1 : 0U);
862 
863   /* for VLAs number of array elements is 0 */
864   if (upper_bound.get_unsigned_value() == 0)
865     is_infinite = true;
866 
867   subrange.reset(new array_type_def::subrange_type(ctxt->ir_env,
868                                                    "",
869                                                    lower_bound,
870                                                    upper_bound,
871                                                    index_type,
872                                                    location(),
873                                                    translation_unit::LANG_C));
874   if (!subrange)
875     return result;
876 
877   subrange->is_infinite(is_infinite);
878   add_decl_to_scope(subrange, tunit->get_global_scope());
879   canonicalize(subrange);
880   subranges.push_back(subrange);
881 
882   /* Finally build the IR for the array type and return it.  */
883   result.reset(new array_type_def(element_type, subranges, location()));
884   if (result)
885     {
886       decl_base_sptr array_type_decl = get_type_declaration(result);
887       add_decl_to_scope(array_type_decl, tunit->get_global_scope());
888       ctxt->add_type(ctf_dictionary, ctf_type, result);
889     }
890 
891   return result;
892 }
893 
894 /// Build and return a qualified type libabigail IR.
895 ///
896 /// @param ctxt the read context.
897 /// @param corp the libabigail IR corpus being constructed.
898 /// @param tunit the current IR translation unit.
899 /// @param ctf_dictionary the CTF dictionary being read.
900 /// @param ctf_type the CTF type ID of the source type.
901 
902 static type_base_sptr
process_ctf_qualified_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)903 process_ctf_qualified_type(read_context *ctxt,
904                            corpus_sptr corp,
905                            translation_unit_sptr tunit,
906                            ctf_dict_t *ctf_dictionary,
907                            ctf_id_t ctf_type)
908 {
909   type_base_sptr result;
910   int type_kind = ctf_type_kind(ctf_dictionary, ctf_type);
911   ctf_id_t ctf_utype = ctf_type_reference(ctf_dictionary, ctf_type);
912   type_base_sptr utype = lookup_type(ctxt, corp, tunit,
913                                      ctf_dictionary, ctf_utype);
914   if (!utype)
915     return result;
916 
917   result = dynamic_pointer_cast<type_base>(ctxt->lookup_type(ctf_dictionary,
918                                                              ctf_type));
919   if (result)
920     return result;
921 
922   qualified_type_def::CV qualifiers = qualified_type_def::CV_NONE;
923   if (type_kind == CTF_K_CONST)
924     qualifiers |= qualified_type_def::CV_CONST;
925   else if (type_kind == CTF_K_VOLATILE)
926     qualifiers |= qualified_type_def::CV_VOLATILE;
927   else if (type_kind == CTF_K_RESTRICT)
928     qualifiers |= qualified_type_def::CV_RESTRICT;
929   else
930     ABG_ASSERT_NOT_REACHED;
931 
932   // qualifiers are not be use in functions
933   if (is_function_type(utype))
934     return result;
935 
936   result.reset(new qualified_type_def(utype, qualifiers, location()));
937   if (result)
938     {
939       decl_base_sptr qualified_type_decl = get_type_declaration(result);
940       add_decl_to_scope(qualified_type_decl, tunit->get_global_scope());
941       ctxt->add_type(ctf_dictionary, ctf_type, result);
942     }
943 
944   return result;
945 }
946 
947 /// Build and return a pointer type libabigail IR.
948 ///
949 /// @param ctxt the read context.
950 /// @param corp the libabigail IR corpus being constructed.
951 /// @param tunit the current IR translation unit.
952 /// @param ctf_dictionary the CTF dictionary being read.
953 /// @param ctf_type the CTF type ID of the source type.
954 ///
955 /// @return a shared pointer to the IR node for the pointer type.
956 
957 static pointer_type_def_sptr
process_ctf_pointer_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)958 process_ctf_pointer_type(read_context *ctxt,
959                          corpus_sptr corp,
960                          translation_unit_sptr tunit,
961                          ctf_dict_t *ctf_dictionary,
962                          ctf_id_t ctf_type)
963 {
964   pointer_type_def_sptr result;
965   ctf_id_t ctf_target_type = ctf_type_reference(ctf_dictionary, ctf_type);
966   if (ctf_target_type == CTF_ERR)
967     return result;
968 
969   type_base_sptr target_type = lookup_type(ctxt, corp, tunit,
970                                            ctf_dictionary,
971                                            ctf_target_type);
972   if (!target_type)
973     return result;
974 
975   result = dynamic_pointer_cast<pointer_type_def>(ctxt->lookup_type(ctf_dictionary,
976                                                                     ctf_type));
977   if (result)
978     return result;
979 
980   result.reset(new pointer_type_def(target_type,
981                                       ctf_type_size(ctf_dictionary, ctf_type) * 8,
982                                       ctf_type_align(ctf_dictionary, ctf_type) * 8,
983                                       location()));
984   if (result)
985     {
986       add_decl_to_scope(result, tunit->get_global_scope());
987       ctxt->add_type(ctf_dictionary, ctf_type, result);
988     }
989 
990   return result;
991 }
992 
993 /// Build and return an enum type libabigail IR.
994 ///
995 /// @param ctxt the read context.
996 /// @param corp the libabigail IR corpus being constructed.
997 /// @param tunit the current IR translation unit.
998 /// @param ctf_dictionary the CTF dictionary being read.
999 /// @param ctf_type the CTF type ID of the source type.
1000 ///
1001 /// @return a shared pointer to the IR node for the enum type.
1002 
1003 static enum_type_decl_sptr
process_ctf_enum_type(read_context * ctxt,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)1004 process_ctf_enum_type(read_context *ctxt,
1005                       translation_unit_sptr tunit,
1006                       ctf_dict_t *ctf_dictionary,
1007                       ctf_id_t ctf_type)
1008 {
1009   enum_type_decl_sptr result;
1010   std::string enum_name = ctf_type_name_raw(ctf_dictionary, ctf_type);
1011 
1012   if (!enum_name.empty())
1013     if (corpus_sptr corp = ctxt->should_reuse_type_from_corpus_group())
1014       if (result = lookup_enum_type(enum_name, *corp))
1015         return result;
1016 
1017   /* Build a signed integral type for the type of the enumerators, aka
1018      the underlying type.  The size of the enumerators in bytes is
1019      specified in the CTF enumeration type.  */
1020   size_t utype_size_in_bits = ctf_type_size(ctf_dictionary, ctf_type) * 8;
1021   type_decl_sptr utype;
1022 
1023   utype.reset(new type_decl(ctxt->ir_env,
1024                               "",
1025                               utype_size_in_bits,
1026                               utype_size_in_bits,
1027                               location()));
1028   utype->set_is_anonymous(true);
1029   utype->set_is_artificial(true);
1030   if (!utype)
1031     return result;
1032   add_decl_to_scope(utype, tunit->get_global_scope());
1033   canonicalize(utype);
1034 
1035   /* Iterate over the enum entries.  */
1036   enum_type_decl::enumerators enms;
1037   ctf_next_t *enum_next = NULL;
1038   const char *ename;
1039   int evalue;
1040 
1041   while ((ename = ctf_enum_next(ctf_dictionary, ctf_type, &enum_next, &evalue)))
1042     enms.push_back(enum_type_decl::enumerator(ctxt->ir_env, ename, evalue));
1043   if (ctf_errno(ctf_dictionary) != ECTF_NEXT_END)
1044     {
1045       fprintf(stderr, "ERROR from ctf_enum_next\n");
1046       return result;
1047     }
1048 
1049   result.reset(new enum_type_decl(enum_name.c_str(), location(),
1050                                   utype, enms, enum_name.c_str()));
1051   if (result)
1052     {
1053       add_decl_to_scope(result, tunit->get_global_scope());
1054       ctxt->add_type(ctf_dictionary, ctf_type, result);
1055     }
1056 
1057   return result;
1058 }
1059 
1060 /// Add a new type declaration to the given libabigail IR corpus CORP.
1061 ///
1062 /// @param ctxt the read context.
1063 /// @param corp the libabigail IR corpus being constructed.
1064 /// @param tunit the current IR translation unit.
1065 /// @param ctf_dictionary the CTF dictionary being read.
1066 /// @param ctf_type the CTF type ID of the source type.
1067 ///
1068 /// Note that if @ref ctf_type can't reliably be translated to the IR
1069 /// then it is simply ignored.
1070 ///
1071 /// @return a shared pointer to the IR node for the type.
1072 
1073 static type_base_sptr
process_ctf_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)1074 process_ctf_type(read_context *ctxt,
1075                  corpus_sptr corp,
1076                  translation_unit_sptr tunit,
1077                  ctf_dict_t *ctf_dictionary,
1078                  ctf_id_t ctf_type)
1079 {
1080   int type_kind = ctf_type_kind(ctf_dictionary, ctf_type);
1081   type_base_sptr result;
1082 
1083   if (ctxt->lookup_unknown_type(ctf_type))
1084     return nullptr;
1085 
1086   if ((result = ctxt->lookup_type(ctf_dictionary, ctf_type)))
1087     return result;
1088 
1089   switch (type_kind)
1090     {
1091     case CTF_K_INTEGER:
1092     case CTF_K_FLOAT:
1093       {
1094         type_decl_sptr type_decl
1095           = process_ctf_base_type(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1096         result = is_type(type_decl);
1097         break;
1098       }
1099     case CTF_K_TYPEDEF:
1100       {
1101         typedef_decl_sptr typedef_decl
1102           = process_ctf_typedef(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1103         result = is_type(typedef_decl);
1104         break;
1105       }
1106     case CTF_K_POINTER:
1107       {
1108         pointer_type_def_sptr pointer_type
1109           = process_ctf_pointer_type(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1110         result = pointer_type;
1111         break;
1112       }
1113     case CTF_K_CONST:
1114     case CTF_K_VOLATILE:
1115     case CTF_K_RESTRICT:
1116       {
1117         type_base_sptr qualified_type
1118           = process_ctf_qualified_type(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1119         result = qualified_type;
1120         break;
1121       }
1122     case CTF_K_ARRAY:
1123       {
1124         array_type_def_sptr array_type
1125           = process_ctf_array_type(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1126         result = array_type;
1127         break;
1128       }
1129     case CTF_K_ENUM:
1130       {
1131         enum_type_decl_sptr enum_type
1132           = process_ctf_enum_type(ctxt, tunit, ctf_dictionary, ctf_type);
1133         result = enum_type;
1134         break;
1135       }
1136     case CTF_K_FUNCTION:
1137       {
1138         function_type_sptr function_type
1139           = process_ctf_function_type(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1140         result = function_type;
1141         break;
1142       }
1143     case CTF_K_STRUCT:
1144       {
1145         class_decl_sptr struct_decl
1146           = process_ctf_struct_type(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1147         result = is_type(struct_decl);
1148         break;
1149       }
1150     case CTF_K_FORWARD:
1151       {
1152         result = process_ctf_forward_type(ctxt, tunit,
1153 					  ctf_dictionary,
1154                                           ctf_type);
1155       }
1156       break;
1157     case CTF_K_UNION:
1158       {
1159         union_decl_sptr union_decl
1160           = process_ctf_union_type(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1161         result = is_type(union_decl);
1162         break;
1163       }
1164     case CTF_K_UNKNOWN:
1165       /* Unknown types are simply ignored.  */
1166     default:
1167       break;
1168     }
1169 
1170   if (!result)
1171     {
1172       fprintf(stderr, "NOT PROCESSED TYPE %lu\n", ctf_type);
1173       ctxt->add_unknown_type(ctf_type);
1174     }
1175 
1176   return result;
1177 }
1178 
1179 /// Given a CTF type id, lookup the corresponding libabigail IR type.
1180 /// If the IR type hasn't been generated yet, generate it.
1181 ///
1182 /// @param ctxt the read context.
1183 /// @param corp the libabigail IR corpus being constructed.
1184 /// @param tunit the current IR translation unit.
1185 /// @param ctf_dictionary the CTF dictionary being read.
1186 /// @param ctf_type the CTF type ID of the looked type.
1187 ///
1188 /// Note that if @ref ctf_type can't reliably be translated to the IR
1189 /// then a NULL shared pointer is returned.
1190 ///
1191 /// @return a shared pointer to the IR node for the type.
1192 
1193 static type_base_sptr
lookup_type(read_context * ctxt,corpus_sptr corp,translation_unit_sptr tunit,ctf_dict_t * ctf_dictionary,ctf_id_t ctf_type)1194 lookup_type(read_context *ctxt, corpus_sptr corp,
1195             translation_unit_sptr tunit, ctf_dict_t *ctf_dictionary,
1196             ctf_id_t ctf_type)
1197 {
1198   type_base_sptr result = ctxt->lookup_type(ctf_dictionary, ctf_type);
1199 
1200   if (!result)
1201     result = process_ctf_type(ctxt, corp, tunit, ctf_dictionary, ctf_type);
1202   return result;
1203 }
1204 
1205 /// Process a CTF archive and create libabigail IR for the types,
1206 /// variables and function declarations found in the archive, iterating
1207 /// over public symbols.  The IR is added to the given corpus.
1208 ///
1209 /// @param ctxt the read context containing the CTF archive to
1210 /// process.
1211 /// @param corp the IR corpus to which add the new contents.
1212 
1213 static void
process_ctf_archive(read_context * ctxt,corpus_sptr corp)1214 process_ctf_archive(read_context *ctxt, corpus_sptr corp)
1215 {
1216   /* We only have a translation unit.  */
1217   translation_unit_sptr ir_translation_unit =
1218     std::make_shared<translation_unit>(ctxt->ir_env, "", 64);
1219   ir_translation_unit->set_language(translation_unit::LANG_C);
1220   corp->add(ir_translation_unit);
1221 
1222   int ctf_err;
1223   ctf_dict_t *ctf_dict;
1224   const auto symtab = ctxt->symtab;
1225   symtab_reader::symtab_filter filter = symtab->make_filter();
1226   filter.set_public_symbols();
1227   std::string dict_name;
1228 
1229   if (corp->get_origin() & corpus::LINUX_KERNEL_BINARY_ORIGIN)
1230     {
1231       tools_utils::base_name(ctxt->filename, dict_name);
1232 
1233       if (dict_name != "vmlinux")
1234         // remove .ko suffix
1235         dict_name.erase(dict_name.length() - 3, 3);
1236 
1237       std::replace(dict_name.begin(), dict_name.end(), '-', '_');
1238     }
1239 
1240   if ((ctf_dict = ctf_dict_open(ctxt->ctfa,
1241                                 dict_name.empty() ? NULL : dict_name.c_str(),
1242                                 &ctf_err)) == NULL)
1243     {
1244       fprintf(stderr, "ERROR dictionary not found\n");
1245       abort();
1246     }
1247 
1248   for (const auto& symbol : symtab_reader::filtered_symtab(*symtab, filter))
1249     {
1250       std::string sym_name = symbol->get_name();
1251       ctf_id_t ctf_sym_type;
1252 
1253       ctf_sym_type = ctf_lookup_variable(ctf_dict, sym_name.c_str());
1254       if (ctf_sym_type == (ctf_id_t) -1
1255           && !(corp->get_origin() & corpus::LINUX_KERNEL_BINARY_ORIGIN))
1256         // lookup in function objects
1257         ctf_sym_type = ctf_lookup_by_symbol_name(ctf_dict, sym_name.c_str());
1258 
1259       if (ctf_sym_type == (ctf_id_t) -1)
1260         continue;
1261 
1262       if (ctf_type_kind(ctf_dict, ctf_sym_type) != CTF_K_FUNCTION)
1263         {
1264           const char *var_name = sym_name.c_str();
1265           type_base_sptr var_type = lookup_type(ctxt, corp, ir_translation_unit,
1266                                                 ctf_dict, ctf_sym_type);
1267           if (!var_type)
1268             /* Ignore variable if its type can't be sorted out.  */
1269             continue;
1270 
1271           var_decl_sptr var_declaration;
1272           var_declaration.reset(new var_decl(var_name,
1273                                              var_type,
1274                                              location(),
1275                                              var_name));
1276 
1277           var_declaration->set_symbol(symbol);
1278           add_decl_to_scope(var_declaration,
1279                             ir_translation_unit->get_global_scope());
1280           var_declaration->set_is_in_public_symbol_table(true);
1281           ctxt->maybe_add_var_to_exported_decls(var_declaration.get());
1282         }
1283       else
1284         {
1285           const char *func_name = sym_name.c_str();
1286           ctf_id_t ctf_sym = ctf_sym_type;
1287           type_base_sptr func_type = lookup_type(ctxt, corp, ir_translation_unit,
1288                                                  ctf_dict, ctf_sym);
1289           if (!func_type)
1290             /* Ignore function if its type can't be sorted out.  */
1291             continue;
1292 
1293           function_decl_sptr func_declaration;
1294           func_declaration.reset(new function_decl(func_name,
1295                                                    func_type,
1296                                                    0 /* is_inline */,
1297                                                    location()));
1298 
1299           func_declaration->set_symbol(symbol);
1300           add_decl_to_scope(func_declaration,
1301                             ir_translation_unit->get_global_scope());
1302           func_declaration->set_is_in_public_symbol_table(true);
1303           ctxt->maybe_add_fn_to_exported_decls(func_declaration.get());
1304         }
1305     }
1306 
1307   ctf_dict_close(ctf_dict);
1308   /* Canonicalize all the types generated above.  This must be
1309      done "a posteriori" because the processing of types may
1310      require other related types to not be already
1311      canonicalized.  */
1312   ctxt->canonicalize_all_types();
1313 }
1314 
1315 /// Open the ELF file described by the given read context.
1316 ///
1317 /// @param ctxt the read context.
1318 /// @return 0 if the ELF file can't be opened.
1319 /// @return 1 otherwise.
1320 
1321 static int
open_elf_handler(read_context * ctxt)1322 open_elf_handler(read_context *ctxt)
1323 {
1324   /* libelf requires to negotiate/set the version of ELF.  */
1325   if (elf_version(EV_CURRENT) == EV_NONE)
1326     return 0;
1327 
1328   /* Open an ELF handler.  */
1329   ctxt->elf_fd = open(ctxt->filename.c_str(), O_RDONLY);
1330   if (ctxt->elf_fd == -1)
1331     return 0;
1332 
1333   ctxt->elf_handler = elf_begin(ctxt->elf_fd, ELF_C_READ, NULL);
1334   if (ctxt->elf_handler == NULL)
1335     {
1336       fprintf(stderr, "cannot open %s: %s\n",
1337                ctxt->filename.c_str(), elf_errmsg(elf_errno()));
1338       close(ctxt->elf_fd);
1339       return 0;
1340     }
1341 
1342   return 1;
1343 }
1344 
1345 /// Close the ELF file described by the given read context.
1346 ///
1347 /// @param ctxt the read context.
1348 
1349 static void
close_elf_handler(read_context * ctxt)1350 close_elf_handler (read_context *ctxt)
1351 {
1352   /* Finish the ELF handler and close the associated file.  */
1353   elf_end(ctxt->elf_handler);
1354   close(ctxt->elf_fd);
1355 
1356   /* Finish the ELF handler and close the associated debug file.  */
1357   elf_end(ctxt->elf_handler_dbg);
1358   close(ctxt->elf_fd_dbg);
1359 }
1360 
1361 /// Fill a CTF section description with the information in a given ELF
1362 /// section.
1363 ///
1364 /// @param elf_section the ELF section from which to get.
1365 /// @param ctf_section the CTF section to fill with the raw data.
1366 
1367 static void
fill_ctf_section(Elf_Scn * elf_section,ctf_sect_t * ctf_section)1368 fill_ctf_section(Elf_Scn *elf_section, ctf_sect_t *ctf_section)
1369 {
1370   GElf_Shdr section_header_mem, *section_header;
1371   Elf_Data *section_data;
1372 
1373   section_header = gelf_getshdr(elf_section, &section_header_mem);
1374   section_data = elf_getdata(elf_section, 0);
1375 
1376   ABG_ASSERT (section_header != NULL);
1377   ABG_ASSERT (section_data != NULL);
1378 
1379   ctf_section->cts_name = ""; /* This is not actually used by libctf.  */
1380   ctf_section->cts_data = (char *) section_data->d_buf;
1381   ctf_section->cts_size = section_data->d_size;
1382   ctf_section->cts_entsize = section_header->sh_entsize;
1383 }
1384 
1385 /// Find a CTF section and debug symbols in a given ELF using
1386 /// .gnu_debuglink section.
1387 ///
1388 /// @param ctxt the read context.
1389 /// @param ctf_dbg_section the CTF section to fill with the raw data.
1390 static void
find_alt_debuginfo(read_context * ctxt,Elf_Scn ** ctf_dbg_scn)1391 find_alt_debuginfo(read_context *ctxt, Elf_Scn **ctf_dbg_scn)
1392 {
1393   std::string name;
1394   Elf_Data *data;
1395 
1396   Elf_Scn *section = elf_helpers::find_section
1397     (ctxt->elf_handler, ".gnu_debuglink", SHT_PROGBITS);
1398 
1399   if (section
1400       && (data = elf_getdata(section, NULL))
1401       && data->d_size != 0)
1402     name = (char *) data->d_buf;
1403 
1404   int fd = -1;
1405   Elf *hdlr = NULL;
1406   *ctf_dbg_scn = NULL;
1407 
1408   if (!name.empty())
1409     for (vector<char**>::const_iterator i = ctxt->debug_info_root_paths_.begin();
1410          i != ctxt->debug_info_root_paths_.end();
1411          ++i)
1412       {
1413         std::string file_path;
1414         if (!tools_utils::find_file_under_dir(**i, name, file_path))
1415           continue;
1416 
1417         if ((fd = open(file_path.c_str(), O_RDONLY)) == -1)
1418           continue;
1419 
1420         if ((hdlr = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
1421           {
1422             close(fd);
1423             continue;
1424           }
1425 
1426         ctxt->symtab =
1427           symtab_reader::symtab::load(hdlr, ctxt->ir_env, nullptr);
1428 
1429         // unlikely .ctf was designed to be present in stripped file
1430         *ctf_dbg_scn =
1431           elf_helpers::find_section(hdlr, ".ctf", SHT_PROGBITS);
1432           break;
1433 
1434         elf_end(hdlr);
1435         close(fd);
1436       }
1437 
1438   // If we don't have a symbol table, use current one in ELF file
1439   if (!ctxt->symtab)
1440     ctxt->symtab =
1441      symtab_reader::symtab::load(ctxt->elf_handler, ctxt->ir_env, nullptr);
1442 
1443   ctxt->elf_handler_dbg = hdlr;
1444   ctxt->elf_fd_dbg = fd;
1445 }
1446 
1447 /// Slurp certain information from the ELF file described by a given
1448 /// read context and install it in a libabigail corpus.
1449 ///
1450 /// @param ctxt the read context
1451 /// @param corp the libabigail corpus in which to install the info.
1452 /// @param status the resulting status flags.
1453 static void
slurp_elf_info(read_context * ctxt,corpus_sptr corp,elf_reader::status & status)1454 slurp_elf_info(read_context *ctxt,
1455                corpus_sptr corp,
1456                elf_reader::status& status)
1457 {
1458   /* Set the ELF architecture.  */
1459   GElf_Ehdr *ehdr, eh_mem;
1460   Elf_Scn *symtab_scn;
1461   Elf_Scn *ctf_scn, *ctf_dbg_scn;
1462   Elf_Scn *strtab_scn;
1463 
1464   if (!(ehdr = gelf_getehdr(ctxt->elf_handler, &eh_mem)))
1465       return;
1466 
1467   corp->set_architecture_name(elf_helpers::e_machine_to_string(ehdr->e_machine));
1468 
1469   find_alt_debuginfo(ctxt, &ctf_dbg_scn);
1470   ABG_ASSERT(ctxt->symtab);
1471   corp->set_symtab(ctxt->symtab);
1472 
1473   if (corp->get_origin() & corpus::LINUX_KERNEL_BINARY_ORIGIN)
1474     {
1475       status |= elf_reader::STATUS_OK;
1476       return;
1477     }
1478 
1479   /* Get the raw ELF section contents for libctf.  */
1480   const char *ctf_name = ".ctf";
1481   ctf_scn = elf_helpers::find_section_by_name(ctxt->elf_handler, ctf_name);
1482   if (ctf_scn == NULL)
1483     {
1484       if (ctf_dbg_scn)
1485         ctf_scn = ctf_dbg_scn;
1486       else
1487         {
1488           status |= elf_reader::STATUS_DEBUG_INFO_NOT_FOUND;
1489           return;
1490         }
1491     }
1492 
1493   // ET_{EXEC,DYN} needs .dyn{sym,str} in ctf_arc_bufopen
1494   const char *symtab_name = ".dynsym";
1495   const char *strtab_name = ".dynstr";
1496 
1497   if (ehdr->e_type == ET_REL)
1498     {
1499       symtab_name = ".symtab";
1500       strtab_name = ".strtab";
1501     }
1502 
1503   symtab_scn = elf_helpers::find_section_by_name(ctxt->elf_handler, symtab_name);
1504   strtab_scn = elf_helpers::find_section_by_name(ctxt->elf_handler, strtab_name);
1505   if (symtab_scn == NULL || strtab_scn == NULL)
1506     {
1507       status |= elf_reader::STATUS_NO_SYMBOLS_FOUND;
1508       return;
1509     }
1510 
1511   fill_ctf_section(ctf_scn, &ctxt->ctf_sect);
1512   fill_ctf_section(symtab_scn, &ctxt->symtab_sect);
1513   fill_ctf_section(strtab_scn, &ctxt->strtab_sect);
1514 
1515   status |= elf_reader::STATUS_OK;
1516 }
1517 
1518 /// Create and return a new read context to process CTF information
1519 /// from a given ELF file.
1520 ///
1521 /// @param elf_path the patch of some ELF file.
1522 /// @param env a libabigail IR environment.
1523 
1524 read_context_sptr
create_read_context(const std::string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * env)1525 create_read_context(const std::string& elf_path,
1526                     const vector<char**>& debug_info_root_paths,
1527                     ir::environment *env)
1528 {
1529   read_context_sptr result(new read_context(elf_path,
1530                                             debug_info_root_paths,
1531                                             env));
1532   return result;
1533 }
1534 
1535 /// Read the CTF information from some source described by a given
1536 /// read context and process it to create a libabigail IR corpus.
1537 /// Store the corpus in the same read context.
1538 ///
1539 /// @param ctxt the read context to use.
1540 ///
1541 /// @param status the resulting status of the corpus read.
1542 ///
1543 /// @return a shared pointer to the read corpus.
1544 
1545 corpus_sptr
read_corpus(read_context * ctxt,elf_reader::status & status)1546 read_corpus(read_context *ctxt, elf_reader::status &status)
1547 {
1548   corpus_sptr corp
1549     = std::make_shared<corpus>(ctxt->ir_env, ctxt->filename);
1550   ctxt->cur_corpus_ = corp;
1551   status = elf_reader::STATUS_UNKNOWN;
1552 
1553   /* Open the ELF file.  */
1554   if (!open_elf_handler(ctxt))
1555       return corp;
1556 
1557   bool is_linux_kernel = elf_helpers::is_linux_kernel(ctxt->elf_handler);
1558   corpus::origin origin = corpus::CTF_ORIGIN;
1559 
1560   if (is_linux_kernel)
1561     origin |= corpus::LINUX_KERNEL_BINARY_ORIGIN;
1562   corp->set_origin(origin);
1563 
1564   if (ctxt->cur_corpus_group_)
1565     ctxt->cur_corpus_group_->add_corpus(ctxt->cur_corpus_);
1566 
1567   slurp_elf_info(ctxt, corp, status);
1568   if (!is_linux_kernel
1569       && ((status & elf_reader::STATUS_DEBUG_INFO_NOT_FOUND) |
1570           (status & elf_reader::STATUS_NO_SYMBOLS_FOUND)))
1571       return corp;
1572 
1573   // Set the set of exported declaration that are defined.
1574   ctxt->exported_decls_builder
1575    (ctxt->cur_corpus_->get_exported_decls_builder().get());
1576 
1577   int errp;
1578   if (corp->get_origin() & corpus::LINUX_KERNEL_BINARY_ORIGIN)
1579     {
1580       std::string filename;
1581       if (tools_utils::base_name(ctxt->filename, filename)
1582           && filename == "vmlinux")
1583         {
1584           std::string vmlinux_ctfa_path = ctxt->filename + ".ctfa";
1585           ctxt->ctfa = ctf_arc_open(vmlinux_ctfa_path.c_str(), &errp);
1586         }
1587     }
1588   else
1589     /* Build the ctfa from the contents of the relevant ELF sections,
1590        and process the CTF archive in the read context, if any.
1591        Information about the types, variables, functions, etc contained
1592        in the archive are added to the given corpus.  */
1593     ctxt->ctfa = ctf_arc_bufopen(&ctxt->ctf_sect, &ctxt->symtab_sect,
1594                                  &ctxt->strtab_sect, &errp);
1595 
1596   ctxt->ir_env->canonicalization_is_done(false);
1597   if (ctxt->ctfa == NULL)
1598     status |= elf_reader::STATUS_DEBUG_INFO_NOT_FOUND;
1599   else
1600     {
1601       process_ctf_archive(ctxt, corp);
1602       ctxt->cur_corpus_->sort_functions();
1603       ctxt->cur_corpus_->sort_variables();
1604     }
1605 
1606   ctxt->ir_env->canonicalization_is_done(true);
1607 
1608   /* Cleanup and return.  */
1609   close_elf_handler(ctxt);
1610   return corp;
1611 }
1612 
1613 /// Read the CTF information from some source described by a given
1614 /// read context and process it to create a libabigail IR corpus.
1615 /// Store the corpus in the same read context.
1616 ///
1617 /// @param ctxt the read context to use.
1618 ///
1619 /// @param status the resulting status of the corpus read.
1620 ///
1621 /// @return a shared pointer to the read corpus.
1622 
1623 corpus_sptr
read_corpus(const read_context_sptr & ctxt,elf_reader::status & status)1624 read_corpus(const read_context_sptr &ctxt, elf_reader::status &status)
1625 {return read_corpus(ctxt.get(), status);}
1626 
1627 /// Set the @ref corpus_group being created to the current read context.
1628 ///
1629 /// @param ctxt the read_context to consider.
1630 ///
1631 /// @param group the @ref corpus_group to set.
1632 void
set_read_context_corpus_group(read_context & ctxt,corpus_group_sptr & group)1633 set_read_context_corpus_group(read_context& ctxt,
1634                               corpus_group_sptr& group)
1635 {
1636   ctxt.cur_corpus_group_ = group;
1637 }
1638 
1639 /// Read a corpus and add it to a given @ref corpus_group.
1640 ///
1641 /// @param ctxt the reading context to consider.
1642 ///
1643 /// @param group the @ref corpus_group to add the new corpus to.
1644 ///
1645 /// @param status output parameter. The status of the read.  It is set
1646 /// by this function upon its completion.
1647 corpus_sptr
read_and_add_corpus_to_group_from_elf(read_context * ctxt,corpus_group & group,elf_reader::status & status)1648 read_and_add_corpus_to_group_from_elf(read_context* ctxt,
1649                                       corpus_group& group,
1650                                       elf_reader::status& status)
1651 {
1652   corpus_sptr result;
1653   corpus_sptr corp = read_corpus(ctxt, status);
1654   if (status & elf_reader::STATUS_OK)
1655     {
1656       if (!corp->get_group())
1657         group.add_corpus(corp);
1658       result = corp;
1659     }
1660 
1661   return result;
1662 }
1663 
1664 /// Re-initialize a read_context so that it can re-used to read
1665 /// another binary.
1666 ///
1667 /// @param ctxt the context to re-initialize.
1668 ///
1669 /// @param elf_path the path to the elf file the context is to be used
1670 /// for.
1671 ///
1672 /// @param environment the environment used by the current context.
1673 /// This environment contains resources needed by the reader and by
1674 /// the types and declarations that are to be created later.  Note
1675 /// that ABI artifacts that are to be compared all need to be created
1676 /// within the same environment.
1677 ///
1678 /// Please also note that the life time of this environment object
1679 /// must be greater than the life time of the resulting @ref
1680 /// read_context the context uses resources that are allocated in the
1681 /// environment.
1682 void
reset_read_context(read_context_sptr & ctxt,const std::string & elf_path,const vector<char ** > & debug_info_root_path,ir::environment * environment)1683 reset_read_context(read_context_sptr	&ctxt,
1684                    const std::string&	 elf_path,
1685                    const vector<char**>& debug_info_root_path,
1686                    ir::environment*	 environment)
1687 {
1688   if (ctxt)
1689     ctxt->initialize(elf_path, debug_info_root_path, environment);
1690 }
1691 
1692 /// Returns a key to be use in types_map dict conformed by
1693 /// dictionary id and the CTF type id for a given type.
1694 ///
1695 /// CTF id types are unique by child dictionary, but CTF id
1696 /// types in parent dictionary are unique across the all
1697 /// dictionaries in the CTF archive, to differentiate
1698 /// one each other this member function relies in
1699 /// ctf_type_isparent function.
1700 ///
1701 /// @param dic the pointer to CTF dictionary where the @p type
1702 /// was found.
1703 ///
1704 /// @param type the id for given CTF type.
1705 std::string
dic_type_key(ctf_dict_t * dic,ctf_id_t ctf_type)1706 dic_type_key(ctf_dict_t *dic, ctf_id_t ctf_type)
1707 {
1708   std::stringstream key;
1709 
1710   if (ctf_type_isparent (dic, ctf_type))
1711     key << std::hex << ctf_type;
1712   else
1713     key << std::hex << ctf_type << '-' << ctf_cuname(dic);
1714   return key.str();
1715 }
1716 
1717 } // End of namespace ctf_reader
1718 } // End of namespace abigail
1719