• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2020-2023 Google, Inc.
5 //
6 // Author: Matthias Maennich
7 
8 /// @file
9 ///
10 /// This contains the declarations for the symtab reader.
11 
12 #ifndef __ABG_SYMTAB_READER_H__
13 #define __ABG_SYMTAB_READER_H__
14 
15 #include <gelf.h>
16 
17 #include <functional>
18 #include <iterator>
19 #include <memory>
20 #include <unordered_map>
21 #include <vector>
22 
23 #include "abg-cxx-compat.h"  // for abg_compat::optional
24 #include "abg-ir.h"
25 
26 namespace abigail
27 {
28 namespace symtab_reader
29 {
30 
31 /// The symtab filter is the object passed to the symtab object in order to
32 /// iterate over the symbols in the symtab while applying filters.
33 ///
34 /// The general idea is that it consists of a set of optionally enforced flags,
35 /// such as 'functions' or 'variables'. If not set, those are not filtered for,
36 /// neither inclusive nor exclusive. If set they are all ANDed together.
37 class symtab_filter
38 {
39 public:
40   // Default constructor disabling all features.
symtab_filter()41   symtab_filter() {}
42 
43   bool
44   matches(const elf_symbol& symbol) const;
45 
46   /// Enable or disable function filtering
47   ///
48   /// @param new_value whether to filter for functions
49   void
50   set_functions(bool new_value = true)
51   {functions_ = new_value;};
52 
53   /// Enable or disable variable filtering
54   ///
55   /// @param new_value whether to filter for variables
56   void
57   set_variables(bool new_value = true)
58   {variables_ = new_value;};
59 
60   /// Enable or disable public symbol filtering
61   ///
62   /// @param new_value whether to filter for public symbols
63   void
64   set_public_symbols(bool new_value = true)
65   {public_symbols_ = new_value;};
66 
67   /// Enable or disable undefined symbol filtering
68   ///
69   /// @param new_value whether to filter for undefined symbols
70   void
71   set_undefined_symbols(bool new_value = true)
72   {undefined_symbols_ = new_value;};
73 
74   /// Enable or disable kernel symbol filtering
75   ///
76   /// @param new_value whether to filter for kernel symbols
77   void
78   set_kernel_symbols(bool new_value = true)
79   {kernel_symbols_ = new_value;};
80 
81 private:
82   // The symbol is a function (FUNC)
83   abg_compat::optional<bool> functions_;
84 
85   // The symbol is a variables (OBJECT)
86   abg_compat::optional<bool> variables_;
87 
88   // The symbol is publicly accessible (global/weak with default/protected
89   // visibility)
90   abg_compat::optional<bool> public_symbols_;
91 
92   // The symbols is not defined (declared)
93   abg_compat::optional<bool> undefined_symbols_;
94 
95   // The symbol is listed in the ksymtab (for Linux Kernel binaries).
96   abg_compat::optional<bool> kernel_symbols_;
97 };
98 
99 /// Base iterator for our custom iterator based on whatever the const_iterator
100 /// is for a vector of symbols.
101 /// As of writing this, std::vector<elf_symbol_sptr>::const_iterator.
102 using base_iterator = elf_symbols::const_iterator;
103 
104 /// An iterator to walk a vector of elf_symbols filtered by symtab_filter.
105 ///
106 /// The implementation inherits all properties from the vector's
107 /// const_iterator, but intercepts where necessary to allow effective
108 /// filtering. This makes it a STL compatible iterator for general purpose
109 /// usage.
110 class symtab_iterator : public base_iterator
111 {
112 public:
113   using value_type = base_iterator::value_type;
114   using reference = base_iterator::reference;
115   using pointer = base_iterator::pointer;
116   using difference_type = base_iterator::difference_type;
117   using iterator_category = std::forward_iterator_tag;
118   using iterator_concept = std::forward_iterator_tag;
119 
120   /// Construct the iterator based on a pair of underlying iterators and a
121   /// symtab_filter object. Immediately fast forward to the next element that
122   /// matches the criteria (if any).
123   ///
124   /// @param begin the underlying begin iterator
125   ///
126   /// @param begin the underlying end iterator
127   ///
128   /// @param filter the symtab_filter to apply
129   symtab_iterator(base_iterator	       begin,
130 		  base_iterator	       end,
131 		  const symtab_filter& filter = symtab_filter())
base_iterator(begin)132     : base_iterator(begin), end_(end), filter_(filter)
133   {skip_to_next();}
134 
135   /// Pre-increment operator to advance to the next matching element.
136   ///
137   /// @return itself after incrementing
138   symtab_iterator&
139   operator++()
140   {
141     base_iterator::operator++();
142     skip_to_next();
143     return *this;
144   }
145 
146   /// Post-increment operator to advance to the next matching element.
147   ///
148   /// @return a copy of the iterator before incrementing
149   symtab_iterator
150   operator++(int)
151   {
152     symtab_iterator result(*this);
153     ++(*this);
154     return result;
155   }
156 
157 private:
158   /// The end of the underlying iterator.
159   const base_iterator end_;
160 
161   /// The symtab_filter used to determine when to advance.
162   const symtab_filter& filter_;
163 
164   /// Skip to the next element that matches the filter criteria (if any). Hold
165   /// off when reaching the end of the underlying iterator.
166   void
skip_to_next()167   skip_to_next()
168   {
169     while (*this != end_ && !filter_.matches(***this))
170       ++(*this);
171   }
172 };
173 
174 /// Convenience declaration of a unique_ptr<symtab>
175 class symtab;
176 using symtab_ptr = std::unique_ptr<symtab>;
177 
178 /// symtab is the actual data container of the symtab_reader implementation.
179 ///
180 /// The symtab is instantiated either via an Elf handle (from binary) or from a
181 /// set of existing symbol maps (usually when instantiated from XML). It will
182 /// then discover the symtab, possibly the ksymtab (for Linux Kernel binaries)
183 /// and setup the data containers and lookup maps for later perusal.
184 ///
185 /// The symtab is supposed to be used in a const context as all information is
186 /// already computed at construction time. Symbols are stored sorted to allow
187 /// deterministic reading of the entries.
188 ///
189 /// An example use of the symtab class is
190 ///
191 /// const auto symtab    = symtab::load(elf_handle, env);
192 /// symtab_filter filter = symtab->make_filter();
193 /// filter.set_public_symbols();
194 /// filter.set_functions();
195 ///
196 /// for (const auto& symbol : filtered_symtab(*symtab, filter))
197 ///   {
198 ///     std::cout << symbol->get_name() << "\n";
199 ///   }
200 ///
201 /// This uses the filtered_symtab proxy object to capture the filter.
202 class symtab
203 {
204 public:
205   using symbol_predicate = std::function<bool(const elf_symbol_sptr&)>;
206 
207   /// Indicate whether any (kernel) symbols have been seen at construction.
208   ///
209   /// @return true if there are symbols detected earlier.
210   bool
has_symbols()211   has_symbols() const
212   {return is_kernel_binary_ ? has_ksymtab_entries_ : !symbols_.empty();}
213 
214   symtab_filter
215   make_filter() const;
216 
217   /// The (only) iterator type we offer is a const_iterator implemented by the
218   /// symtab_iterator.
219   using const_iterator = symtab_iterator;
220 
221   /// Obtain an iterator to the beginning of the symtab according to the filter
222   /// criteria. Whenever this iterator advances, it skips elements that do not
223   /// match the filter criteria.
224   ///
225   /// @param filter the symtab_filter to match symbols against
226   ///
227   /// @return a filtering const_iterator of the underlying type
228   const_iterator
begin(const symtab_filter & filter)229   begin(const symtab_filter& filter) const
230   {return symtab_iterator(symbols_.begin(), symbols_.end(), filter);}
231 
232   /// Obtain an iterator to the end of the symtab.
233   ///
234   /// @return an end iterator
235   const_iterator
end()236   end() const
237   {return symtab_iterator(symbols_.end(), symbols_.end());}
238 
239   const elf_symbols&
240   lookup_symbol(const std::string& name) const;
241 
242   const elf_symbol_sptr&
243   lookup_symbol(GElf_Addr symbol_addr) const;
244 
245   static symtab_ptr
246   load(Elf*		elf_handle,
247        const ir::environment& env,
248        symbol_predicate is_suppressed = NULL);
249 
250   static symtab_ptr
251   load(string_elf_symbols_map_sptr function_symbol_map,
252        string_elf_symbols_map_sptr variables_symbol_map);
253 
254   void
255   update_main_symbol(GElf_Addr addr, const std::string& name);
256 
257 private:
258   /// Default constructor. Private to enforce creation by factory methods.
259   symtab();
260 
261   /// The vector of symbols we discovered.
262   elf_symbols symbols_;
263 
264   /// Whether this is a Linux Kernel binary
265   bool is_kernel_binary_;
266 
267   /// Whether this kernel_binary has ksymtab entries
268   ///
269   /// A kernel module might not have a ksymtab if it does not export any
270   /// symbols. In order to quickly decide whether the symbol table is empty, we
271   /// remember whether we ever saw ksymtab entries.
272   bool has_ksymtab_entries_;
273 
274   /// Lookup map name->symbol(s)
275   using name_symbol_map_type =
276       std::unordered_map<std::string, std::vector<elf_symbol_sptr>>;
277   name_symbol_map_type name_symbol_map_;
278 
279   /// Lookup map addr->symbol
280   using addr_symbol_map_type = std::unordered_map<GElf_Addr, elf_symbol_sptr>;
281   addr_symbol_map_type addr_symbol_map_;
282 
283   /// Lookup map function entry address -> symbol
284   addr_symbol_map_type entry_addr_symbol_map_;
285 
286   bool
287   load_(Elf* elf_handle,
288 	const ir::environment& env,
289 	symbol_predicate is_suppressed);
290 
291   bool
292   load_(string_elf_symbols_map_sptr function_symbol_map,
293        string_elf_symbols_map_sptr variables_symbol_map);
294 
295   GElf_Addr
296   setup_symbol_lookup_tables(Elf* elf_handle,
297 			     GElf_Sym* elf_symbol,
298 			     const elf_symbol_sptr& symbol_sptr);
299 
300   void
301   update_function_entry_address_symbol_map(Elf*	     elf_handle,
302 					   GElf_Sym* native_symbol,
303 					   const elf_symbol_sptr& symbol_sptr);
304 
305   void
306   add_alternative_address_lookups(Elf* elf_handle);
307 };
308 
309 /// Helper class to allow range-for loops on symtabs for C++11 and later code.
310 /// It serves as a proxy for the symtab iterator and provides a begin() method
311 /// without arguments, as required for range-for loops (and possibly other
312 /// iterator based transformations).
313 ///
314 /// Example usage:
315 ///
316 ///   for (const auto& symbol : filtered_symtab(tab, filter))
317 ///     {
318 ///       std::cout << symbol->get_name() << "\n";
319 ///     }
320 ///
321 class filtered_symtab
322 {
323   const symtab&	      tab_;
324   const symtab_filter filter_;
325 
326 public:
327   /// Construct the proxy object keeping references to the underlying symtab
328   /// and the filter object.
filtered_symtab(const symtab & tab,const symtab_filter & filter)329   filtered_symtab(const symtab& tab, const symtab_filter& filter)
330     : tab_(tab), filter_(filter)
331   {}
332 
333   /// Pass through symtab.begin(), but also pass on the filter.
334   symtab::const_iterator
begin()335   begin() const
336   {return tab_.begin(filter_);}
337 
338   /// Pass through symtab.end().
339   symtab::const_iterator
end()340   end() const
341   {return tab_.end();}
342 };
343 
344 } // end namespace symtab_reader
345 } // end namespace abigail
346 
347 #endif // __ABG_SYMTAB_READER_H__
348