• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2020-2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Maria Teguiani
19 // Author: Giuliano Procida
20 // Author: Ignes Simeonova
21 // Author: Aleksei Vetrov
22 
23 #include "btf_reader.h"
24 
25 #include <fcntl.h>
26 #include <libelf.h>
27 
28 #include <algorithm>
29 #include <array>
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstring>
33 #include <memory>
34 #include <optional>
35 #include <sstream>
36 #include <string>
37 #include <string_view>
38 #include <utility>
39 #include <vector>
40 
41 #include <linux/btf.h>
42 #include "elf_loader.h"
43 #include "error.h"
44 #include "file_descriptor.h"
45 #include "graph.h"
46 #include "reader_options.h"
47 
48 namespace stg {
49 
50 namespace btf {
51 
Empty() const52 bool Structs::MemoryRange::Empty() const {
53   return start == limit;
54 }
55 
56 template <typename T>
Pull(size_t count)57 const T* Structs::MemoryRange::Pull(size_t count) {
58   const char* saved = start;
59   start += sizeof(T) * count;
60   Check(start <= limit) << "type data extends past end of type section";
61   return reinterpret_cast<const T*>(saved);
62 }
63 
Structs(Graph & graph)64 Structs::Structs(Graph& graph)
65     : graph_(graph) {}
66 
67 // Get the index of the void type, creating one if needed.
GetVoid()68 Id Structs::GetVoid() {
69   if (!void_) {
70     void_ = {graph_.Add<Special>(Special::Kind::VOID)};
71   }
72   return *void_;
73 }
74 
75 // Get the index of the variadic parameter type, creating one if needed.
GetVariadic()76 Id Structs::GetVariadic() {
77   if (!variadic_) {
78     variadic_ = {graph_.Add<Special>(Special::Kind::VARIADIC)};
79   }
80   return *variadic_;
81 }
82 
83 // Map BTF type index to own index.
84 //
85 // If there is no existing mapping for a BTF type, create one pointing to a new
86 // slot at the end of the array.
GetIdRaw(uint32_t btf_index)87 Id Structs::GetIdRaw(uint32_t btf_index) {
88   auto [it, inserted] = btf_type_ids_.insert({btf_index, Id(0)});
89   if (inserted) {
90     it->second = graph_.Allocate();
91   }
92   return it->second;
93 }
94 
95 // Translate BTF type id to own type id, for non-parameters.
GetId(uint32_t btf_index)96 Id Structs::GetId(uint32_t btf_index) {
97   return btf_index ? GetIdRaw(btf_index) : GetVoid();
98 }
99 
100 // Translate BTF type id to own type id, for parameters.
GetParameterId(uint32_t btf_index)101 Id Structs::GetParameterId(uint32_t btf_index) {
102   return btf_index ? GetIdRaw(btf_index) : GetVariadic();
103 }
104 
Process(std::string_view btf_data)105 Id Structs::Process(std::string_view btf_data) {
106   Check(sizeof(btf_header) <= btf_data.size())
107       << "BTF section too small for header";
108   const btf_header* header =
109       reinterpret_cast<const btf_header*>(btf_data.data());
110   Check(reinterpret_cast<uintptr_t>(header) % alignof(btf_header) == 0)
111       << "misaligned BTF data";
112   Check(header->magic == 0xEB9F) << "Magic field must be 0xEB9F for BTF";
113 
114   const char* header_limit = btf_data.begin() + header->hdr_len;
115   const char* type_start = header_limit + header->type_off;
116   const char* type_limit = type_start + header->type_len;
117   const char* string_start = header_limit + header->str_off;
118   const char* string_limit = string_start + header->str_len;
119 
120   Check(btf_data.begin() + sizeof(btf_header) <= header_limit)
121       << "header exceeds length";
122   Check(header_limit <= type_start) << "type section overlaps header";
123   Check(type_start <= type_limit) << "type section ill-formed";
124   Check(reinterpret_cast<uintptr_t>(type_start) % alignof(btf_type) == 0)
125       << "misaligned type section";
126   Check(type_limit <= string_start)
127       << "string section does not follow type section";
128   Check(string_start <= string_limit) << "string section ill-formed";
129   Check(string_limit <= btf_data.end())
130       << "string section extends beyond end of BTF data";
131 
132   const MemoryRange type_section{type_start, type_limit};
133   string_section_ = MemoryRange{string_start, string_limit};
134   return BuildTypes(type_section);
135 }
136 
137 // vlen: vector length, the number of struct/union members
BuildMembers(bool kflag,const btf_member * members,size_t vlen)138 std::vector<Id> Structs::BuildMembers(
139     bool kflag, const btf_member* members, size_t vlen) {
140   std::vector<Id> result;
141   for (size_t i = 0; i < vlen; ++i) {
142     const auto& raw_member = members[i];
143     const auto name = GetName(raw_member.name_off);
144     const auto raw_offset = raw_member.offset;
145     const auto offset = kflag ? BTF_MEMBER_BIT_OFFSET(raw_offset) : raw_offset;
146     const auto bitfield_size = kflag ? BTF_MEMBER_BITFIELD_SIZE(raw_offset) : 0;
147     result.push_back(
148         graph_.Add<Member>(name, GetId(raw_member.type),
149                            static_cast<uint64_t>(offset), bitfield_size));
150   }
151   return result;
152 }
153 
154 // vlen: vector length, the number of enum values
BuildEnums(bool is_signed,const struct btf_enum * enums,size_t vlen)155 std::vector<std::pair<std::string, int64_t>> Structs::BuildEnums(
156     bool is_signed, const struct btf_enum* enums, size_t vlen) {
157   std::vector<std::pair<std::string, int64_t>> result;
158   for (size_t i = 0; i < vlen; ++i) {
159     const auto name = GetName(enums[i].name_off);
160     const uint32_t unsigned_value = enums[i].val;
161     if (is_signed) {
162       const int32_t signed_value = unsigned_value;
163       result.emplace_back(name, static_cast<int64_t>(signed_value));
164     } else {
165       result.emplace_back(name, static_cast<int64_t>(unsigned_value));
166     }
167   }
168   return result;
169 }
170 
BuildEnums64(bool is_signed,const struct btf_enum64 * enums,size_t vlen)171 std::vector<std::pair<std::string, int64_t>> Structs::BuildEnums64(
172     bool is_signed, const struct btf_enum64* enums, size_t vlen) {
173   std::vector<std::pair<std::string, int64_t>> result;
174   for (size_t i = 0; i < vlen; ++i) {
175     const auto name = GetName(enums[i].name_off);
176     const uint32_t low = enums[i].val_lo32;
177     const uint32_t high = enums[i].val_hi32;
178     const uint64_t unsigned_value = (static_cast<uint64_t>(high) << 32) | low;
179     if (is_signed) {
180       const int64_t signed_value = unsigned_value;
181       result.emplace_back(name, signed_value);
182     } else {
183       // TODO: very large unsigned values are stored as negative numbers
184       result.emplace_back(name, static_cast<int64_t>(unsigned_value));
185     }
186   }
187   return result;
188 }
189 
190 // vlen: vector length, the number of parameters
BuildParams(const struct btf_param * params,size_t vlen)191 std::vector<Id> Structs::BuildParams(const struct btf_param* params,
192                                      size_t vlen) {
193   std::vector<Id> result;
194   result.reserve(vlen);
195   for (size_t i = 0; i < vlen; ++i) {
196     const auto name = GetName(params[i].name_off);
197     const auto type = params[i].type;
198     result.push_back(GetParameterId(type));
199   }
200   return result;
201 }
202 
BuildEnumUnderlyingType(size_t size,bool is_signed)203 Id Structs::BuildEnumUnderlyingType(size_t size, bool is_signed) {
204   std::ostringstream os;
205   os << (is_signed ? "enum-underlying-signed-" : "enum-underlying-unsigned-")
206      << (8 * size);
207   const auto encoding = is_signed ? Primitive::Encoding::SIGNED_INTEGER
208                                   : Primitive::Encoding::UNSIGNED_INTEGER;
209   return graph_.Add<Primitive>(os.str(), encoding, size);
210 }
211 
BuildTypes(MemoryRange memory)212 Id Structs::BuildTypes(MemoryRange memory) {
213   // Alas, BTF overloads type id 0 to mean both void (for everything but
214   // function parameters) and variadic (for function parameters). We determine
215   // which is intended and create void and variadic types on demand.
216 
217   // The type section is parsed sequentially and each type's index is its id.
218   uint32_t btf_index = 1;
219   while (!memory.Empty()) {
220     const auto* t = memory.Pull<struct btf_type>();
221     BuildOneType(t, btf_index, memory);
222     ++btf_index;
223   }
224 
225   return BuildSymbols();
226 }
227 
BuildOneType(const btf_type * t,uint32_t btf_index,MemoryRange & memory)228 void Structs::BuildOneType(const btf_type* t, uint32_t btf_index,
229                            MemoryRange& memory) {
230   const auto kind = BTF_INFO_KIND(t->info);
231   const auto vlen = BTF_INFO_VLEN(t->info);
232   Check(kind < NR_BTF_KINDS) << "Unknown BTF kind: " << static_cast<int>(kind);
233 
234   // delay allocation of node id as some BTF nodes are skipped
235   auto id = [&]() {
236     return GetIdRaw(btf_index);
237   };
238 
239   switch (kind) {
240     case BTF_KIND_INT: {
241       const auto info = *memory.Pull<uint32_t>();
242       const auto name = GetName(t->name_off);
243       const auto raw_encoding = BTF_INT_ENCODING(info);
244       const auto offset = BTF_INT_OFFSET(info);
245       const auto bits = BTF_INT_BITS(info);
246       const auto is_bool = raw_encoding & BTF_INT_BOOL;
247       const auto is_signed = raw_encoding & BTF_INT_SIGNED;
248       const auto is_char = raw_encoding & BTF_INT_CHAR;
249       Primitive::Encoding encoding =
250           is_bool ? Primitive::Encoding::BOOLEAN
251                 : is_char ? is_signed ? Primitive::Encoding::SIGNED_CHARACTER
252                                       : Primitive::Encoding::UNSIGNED_CHARACTER
253                           : is_signed ? Primitive::Encoding::SIGNED_INTEGER
254                                       : Primitive::Encoding::UNSIGNED_INTEGER;
255       if (offset) {
256         Die() << "BTF INT non-zero offset " << offset;
257       }
258       if (bits != 8 * t->size) {
259         Die() << "BTF INT bits != 8 * size";
260       }
261       graph_.Set<Primitive>(id(), name, encoding, t->size);
262       break;
263     }
264     case BTF_KIND_FLOAT: {
265       const auto name = GetName(t->name_off);
266       const auto encoding = Primitive::Encoding::REAL_NUMBER;
267       graph_.Set<Primitive>(id(), name, encoding, t->size);
268       break;
269     }
270     case BTF_KIND_PTR: {
271       graph_.Set<PointerReference>(id(), PointerReference::Kind::POINTER,
272                                    GetId(t->type));
273       break;
274     }
275     case BTF_KIND_TYPEDEF: {
276       const auto name = GetName(t->name_off);
277       graph_.Set<Typedef>(id(), name, GetId(t->type));
278       break;
279     }
280     case BTF_KIND_VOLATILE:
281     case BTF_KIND_CONST:
282     case BTF_KIND_RESTRICT: {
283       const auto qualifier = kind == BTF_KIND_CONST
284                              ? Qualifier::CONST
285                              : kind == BTF_KIND_VOLATILE
286                              ? Qualifier::VOLATILE
287                              : Qualifier::RESTRICT;
288       graph_.Set<Qualified>(id(), qualifier, GetId(t->type));
289       break;
290     }
291     case BTF_KIND_ARRAY: {
292       const auto* array = memory.Pull<struct btf_array>();
293       graph_.Set<Array>(id(), array->nelems, GetId(array->type));
294       break;
295     }
296     case BTF_KIND_STRUCT:
297     case BTF_KIND_UNION: {
298       const auto struct_union_kind = kind == BTF_KIND_STRUCT
299                                      ? StructUnion::Kind::STRUCT
300                                      : StructUnion::Kind::UNION;
301       const auto name = GetName(t->name_off);
302       const bool kflag = BTF_INFO_KFLAG(t->info);
303       const auto* btf_members = memory.Pull<struct btf_member>(vlen);
304       const auto members = BuildMembers(kflag, btf_members, vlen);
305       graph_.Set<StructUnion>(id(), struct_union_kind, name, t->size,
306                               std::vector<Id>(), std::vector<Id>(), members);
307       break;
308     }
309     case BTF_KIND_ENUM: {
310       const auto name = GetName(t->name_off);
311       const bool is_signed = BTF_INFO_KFLAG(t->info);
312       const auto* enums = memory.Pull<struct btf_enum>(vlen);
313       const auto enumerators = BuildEnums(is_signed, enums, vlen);
314       // BTF only considers structs and unions as forward-declared types, and
315       // does not include forward-declared enums. They are treated as
316       // BTF_KIND_ENUMs with vlen set to zero.
317       if (vlen) {
318         // create a synthetic underlying type
319         const Id underlying = BuildEnumUnderlyingType(t->size, is_signed);
320         graph_.Set<Enumeration>(id(), name, underlying, enumerators);
321       } else {
322         // BTF actually provides size (4), but it's meaningless.
323         graph_.Set<Enumeration>(id(), name);
324       }
325       break;
326     }
327     case BTF_KIND_ENUM64: {
328       const auto name = GetName(t->name_off);
329       const bool is_signed = BTF_INFO_KFLAG(t->info);
330       const auto* enums = memory.Pull<struct btf_enum64>(vlen);
331       const auto enumerators = BuildEnums64(is_signed, enums, vlen);
332       // create a synthetic underlying type
333       const Id underlying = BuildEnumUnderlyingType(t->size, is_signed);
334       graph_.Set<Enumeration>(id(), name, underlying, enumerators);
335       break;
336     }
337     case BTF_KIND_FWD: {
338       const auto name = GetName(t->name_off);
339       const auto struct_union_kind = BTF_INFO_KFLAG(t->info)
340                                      ? StructUnion::Kind::UNION
341                                      : StructUnion::Kind::STRUCT;
342       graph_.Set<StructUnion>(id(), struct_union_kind, name);
343       break;
344     }
345     case BTF_KIND_FUNC: {
346       const auto name = GetName(t->name_off);
347       // TODO: map linkage (vlen) to symbol properties
348       graph_.Set<ElfSymbol>(id(), name, std::nullopt, true,
349                             ElfSymbol::SymbolType::FUNCTION,
350                             ElfSymbol::Binding::GLOBAL,
351                             ElfSymbol::Visibility::DEFAULT,
352                             std::nullopt,
353                             std::nullopt,
354                             GetId(t->type),
355                             std::nullopt);
356       const bool inserted =
357           btf_symbols_.insert({name, GetIdRaw(btf_index)}).second;
358       Check(inserted) << "duplicate symbol " << name;
359       break;
360     }
361     case BTF_KIND_FUNC_PROTO: {
362       const auto* params = memory.Pull<struct btf_param>(vlen);
363       const auto parameters = BuildParams(params, vlen);
364       graph_.Set<Function>(id(), GetId(t->type), parameters);
365       break;
366     }
367     case BTF_KIND_VAR: {
368       // NOTE: global variables are not yet emitted by pahole -J
369       const auto* variable = memory.Pull<struct btf_var>();
370       const auto name = GetName(t->name_off);
371       // TODO: map variable->linkage to symbol properties
372       (void) variable;
373       graph_.Set<ElfSymbol>(id(), name, std::nullopt, true,
374                             ElfSymbol::SymbolType::OBJECT,
375                             ElfSymbol::Binding::GLOBAL,
376                             ElfSymbol::Visibility::DEFAULT,
377                             std::nullopt,
378                             std::nullopt,
379                             GetId(t->type),
380                             std::nullopt);
381       const bool inserted =
382           btf_symbols_.insert({name, GetIdRaw(btf_index)}).second;
383       Check(inserted) << "duplicate symbol " << name;
384       break;
385     }
386     case BTF_KIND_DATASEC: {
387       // Just skip BTF DATASEC entries. They partially duplicate ELF symbol
388       // table information, if they exist at all.
389       memory.Pull<struct btf_var_secinfo>(vlen);
390       break;
391     }
392     default: {
393       Die() << "Unhandled BTF kind: " << static_cast<int>(kind);
394       break;
395     }
396   }
397 }
398 
GetName(uint32_t name_off)399 std::string Structs::GetName(uint32_t name_off) {
400   const char* name_begin = string_section_.start + name_off;
401   const char* const limit = string_section_.limit;
402   Check(name_begin < limit) << "name offset exceeds string section length";
403   const char* name_end = std::find(name_begin, limit, '\0');
404   Check(name_end < limit) << "name continues past the string section limit";
405   return {name_begin, static_cast<size_t>(name_end - name_begin)};
406 }
407 
BuildSymbols()408 Id Structs::BuildSymbols() {
409   return graph_.Add<Interface>(btf_symbols_);
410 }
411 
ReadFile(Graph & graph,const std::string & path,ReadOptions)412 Id ReadFile(Graph& graph, const std::string& path, ReadOptions) {
413   Check(elf_version(EV_CURRENT) != EV_NONE) << "ELF version mismatch";
414   struct ElfDeleter {
415     void operator()(Elf* elf) {
416       elf_end(elf);
417     }
418   };
419   const FileDescriptor fd(path.c_str(), O_RDONLY);
420   const std::unique_ptr<Elf, ElfDeleter> elf(
421       elf_begin(fd.Value(), ELF_C_READ, nullptr));
422   if (!elf) {
423     const int error_code = elf_errno();
424     const char* error = elf_errmsg(error_code);
425     if (error != nullptr) {
426       Die() << "elf_begin returned error: " << error;
427     } else {
428       Die() << "elf_begin returned error: " << error_code;
429     }
430   }
431   const elf::ElfLoader loader(elf.get());
432   return Structs(graph).Process(loader.GetBtfRawData());
433 }
434 
435 }  // namespace btf
436 
437 }  // namespace stg
438