1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2020-2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Maria Teguiani
19 // Author: Giuliano Procida
20 // Author: Ignes Simeonova
21 // Author: Aleksei Vetrov
22
23 #include "btf_reader.h"
24
25 #include <fcntl.h>
26 #include <libelf.h>
27
28 #include <algorithm>
29 #include <array>
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstring>
33 #include <memory>
34 #include <optional>
35 #include <sstream>
36 #include <string>
37 #include <string_view>
38 #include <utility>
39 #include <vector>
40
41 #include <linux/btf.h>
42 #include "elf_loader.h"
43 #include "error.h"
44 #include "file_descriptor.h"
45 #include "graph.h"
46 #include "reader_options.h"
47
48 namespace stg {
49
50 namespace btf {
51
Empty() const52 bool Structs::MemoryRange::Empty() const {
53 return start == limit;
54 }
55
56 template <typename T>
Pull(size_t count)57 const T* Structs::MemoryRange::Pull(size_t count) {
58 const char* saved = start;
59 start += sizeof(T) * count;
60 Check(start <= limit) << "type data extends past end of type section";
61 return reinterpret_cast<const T*>(saved);
62 }
63
Structs(Graph & graph)64 Structs::Structs(Graph& graph)
65 : graph_(graph) {}
66
67 // Get the index of the void type, creating one if needed.
GetVoid()68 Id Structs::GetVoid() {
69 if (!void_) {
70 void_ = {graph_.Add<Special>(Special::Kind::VOID)};
71 }
72 return *void_;
73 }
74
75 // Get the index of the variadic parameter type, creating one if needed.
GetVariadic()76 Id Structs::GetVariadic() {
77 if (!variadic_) {
78 variadic_ = {graph_.Add<Special>(Special::Kind::VARIADIC)};
79 }
80 return *variadic_;
81 }
82
83 // Map BTF type index to own index.
84 //
85 // If there is no existing mapping for a BTF type, create one pointing to a new
86 // slot at the end of the array.
GetIdRaw(uint32_t btf_index)87 Id Structs::GetIdRaw(uint32_t btf_index) {
88 auto [it, inserted] = btf_type_ids_.insert({btf_index, Id(0)});
89 if (inserted) {
90 it->second = graph_.Allocate();
91 }
92 return it->second;
93 }
94
95 // Translate BTF type id to own type id, for non-parameters.
GetId(uint32_t btf_index)96 Id Structs::GetId(uint32_t btf_index) {
97 return btf_index ? GetIdRaw(btf_index) : GetVoid();
98 }
99
100 // Translate BTF type id to own type id, for parameters.
GetParameterId(uint32_t btf_index)101 Id Structs::GetParameterId(uint32_t btf_index) {
102 return btf_index ? GetIdRaw(btf_index) : GetVariadic();
103 }
104
Process(std::string_view btf_data)105 Id Structs::Process(std::string_view btf_data) {
106 Check(sizeof(btf_header) <= btf_data.size())
107 << "BTF section too small for header";
108 const btf_header* header =
109 reinterpret_cast<const btf_header*>(btf_data.data());
110 Check(reinterpret_cast<uintptr_t>(header) % alignof(btf_header) == 0)
111 << "misaligned BTF data";
112 Check(header->magic == 0xEB9F) << "Magic field must be 0xEB9F for BTF";
113
114 const char* header_limit = btf_data.begin() + header->hdr_len;
115 const char* type_start = header_limit + header->type_off;
116 const char* type_limit = type_start + header->type_len;
117 const char* string_start = header_limit + header->str_off;
118 const char* string_limit = string_start + header->str_len;
119
120 Check(btf_data.begin() + sizeof(btf_header) <= header_limit)
121 << "header exceeds length";
122 Check(header_limit <= type_start) << "type section overlaps header";
123 Check(type_start <= type_limit) << "type section ill-formed";
124 Check(reinterpret_cast<uintptr_t>(type_start) % alignof(btf_type) == 0)
125 << "misaligned type section";
126 Check(type_limit <= string_start)
127 << "string section does not follow type section";
128 Check(string_start <= string_limit) << "string section ill-formed";
129 Check(string_limit <= btf_data.end())
130 << "string section extends beyond end of BTF data";
131
132 const MemoryRange type_section{type_start, type_limit};
133 string_section_ = MemoryRange{string_start, string_limit};
134 return BuildTypes(type_section);
135 }
136
137 // vlen: vector length, the number of struct/union members
BuildMembers(bool kflag,const btf_member * members,size_t vlen)138 std::vector<Id> Structs::BuildMembers(
139 bool kflag, const btf_member* members, size_t vlen) {
140 std::vector<Id> result;
141 for (size_t i = 0; i < vlen; ++i) {
142 const auto& raw_member = members[i];
143 const auto name = GetName(raw_member.name_off);
144 const auto raw_offset = raw_member.offset;
145 const auto offset = kflag ? BTF_MEMBER_BIT_OFFSET(raw_offset) : raw_offset;
146 const auto bitfield_size = kflag ? BTF_MEMBER_BITFIELD_SIZE(raw_offset) : 0;
147 result.push_back(
148 graph_.Add<Member>(name, GetId(raw_member.type),
149 static_cast<uint64_t>(offset), bitfield_size));
150 }
151 return result;
152 }
153
154 // vlen: vector length, the number of enum values
BuildEnums(bool is_signed,const struct btf_enum * enums,size_t vlen)155 std::vector<std::pair<std::string, int64_t>> Structs::BuildEnums(
156 bool is_signed, const struct btf_enum* enums, size_t vlen) {
157 std::vector<std::pair<std::string, int64_t>> result;
158 for (size_t i = 0; i < vlen; ++i) {
159 const auto name = GetName(enums[i].name_off);
160 const uint32_t unsigned_value = enums[i].val;
161 if (is_signed) {
162 const int32_t signed_value = unsigned_value;
163 result.emplace_back(name, static_cast<int64_t>(signed_value));
164 } else {
165 result.emplace_back(name, static_cast<int64_t>(unsigned_value));
166 }
167 }
168 return result;
169 }
170
BuildEnums64(bool is_signed,const struct btf_enum64 * enums,size_t vlen)171 std::vector<std::pair<std::string, int64_t>> Structs::BuildEnums64(
172 bool is_signed, const struct btf_enum64* enums, size_t vlen) {
173 std::vector<std::pair<std::string, int64_t>> result;
174 for (size_t i = 0; i < vlen; ++i) {
175 const auto name = GetName(enums[i].name_off);
176 const uint32_t low = enums[i].val_lo32;
177 const uint32_t high = enums[i].val_hi32;
178 const uint64_t unsigned_value = (static_cast<uint64_t>(high) << 32) | low;
179 if (is_signed) {
180 const int64_t signed_value = unsigned_value;
181 result.emplace_back(name, signed_value);
182 } else {
183 // TODO: very large unsigned values are stored as negative numbers
184 result.emplace_back(name, static_cast<int64_t>(unsigned_value));
185 }
186 }
187 return result;
188 }
189
190 // vlen: vector length, the number of parameters
BuildParams(const struct btf_param * params,size_t vlen)191 std::vector<Id> Structs::BuildParams(const struct btf_param* params,
192 size_t vlen) {
193 std::vector<Id> result;
194 result.reserve(vlen);
195 for (size_t i = 0; i < vlen; ++i) {
196 const auto name = GetName(params[i].name_off);
197 const auto type = params[i].type;
198 result.push_back(GetParameterId(type));
199 }
200 return result;
201 }
202
BuildEnumUnderlyingType(size_t size,bool is_signed)203 Id Structs::BuildEnumUnderlyingType(size_t size, bool is_signed) {
204 std::ostringstream os;
205 os << (is_signed ? "enum-underlying-signed-" : "enum-underlying-unsigned-")
206 << (8 * size);
207 const auto encoding = is_signed ? Primitive::Encoding::SIGNED_INTEGER
208 : Primitive::Encoding::UNSIGNED_INTEGER;
209 return graph_.Add<Primitive>(os.str(), encoding, size);
210 }
211
BuildTypes(MemoryRange memory)212 Id Structs::BuildTypes(MemoryRange memory) {
213 // Alas, BTF overloads type id 0 to mean both void (for everything but
214 // function parameters) and variadic (for function parameters). We determine
215 // which is intended and create void and variadic types on demand.
216
217 // The type section is parsed sequentially and each type's index is its id.
218 uint32_t btf_index = 1;
219 while (!memory.Empty()) {
220 const auto* t = memory.Pull<struct btf_type>();
221 BuildOneType(t, btf_index, memory);
222 ++btf_index;
223 }
224
225 return BuildSymbols();
226 }
227
BuildOneType(const btf_type * t,uint32_t btf_index,MemoryRange & memory)228 void Structs::BuildOneType(const btf_type* t, uint32_t btf_index,
229 MemoryRange& memory) {
230 const auto kind = BTF_INFO_KIND(t->info);
231 const auto vlen = BTF_INFO_VLEN(t->info);
232 Check(kind < NR_BTF_KINDS) << "Unknown BTF kind: " << static_cast<int>(kind);
233
234 // delay allocation of node id as some BTF nodes are skipped
235 auto id = [&]() {
236 return GetIdRaw(btf_index);
237 };
238
239 switch (kind) {
240 case BTF_KIND_INT: {
241 const auto info = *memory.Pull<uint32_t>();
242 const auto name = GetName(t->name_off);
243 const auto raw_encoding = BTF_INT_ENCODING(info);
244 const auto offset = BTF_INT_OFFSET(info);
245 const auto bits = BTF_INT_BITS(info);
246 const auto is_bool = raw_encoding & BTF_INT_BOOL;
247 const auto is_signed = raw_encoding & BTF_INT_SIGNED;
248 const auto is_char = raw_encoding & BTF_INT_CHAR;
249 Primitive::Encoding encoding =
250 is_bool ? Primitive::Encoding::BOOLEAN
251 : is_char ? is_signed ? Primitive::Encoding::SIGNED_CHARACTER
252 : Primitive::Encoding::UNSIGNED_CHARACTER
253 : is_signed ? Primitive::Encoding::SIGNED_INTEGER
254 : Primitive::Encoding::UNSIGNED_INTEGER;
255 if (offset) {
256 Die() << "BTF INT non-zero offset " << offset;
257 }
258 if (bits != 8 * t->size) {
259 Die() << "BTF INT bits != 8 * size";
260 }
261 graph_.Set<Primitive>(id(), name, encoding, t->size);
262 break;
263 }
264 case BTF_KIND_FLOAT: {
265 const auto name = GetName(t->name_off);
266 const auto encoding = Primitive::Encoding::REAL_NUMBER;
267 graph_.Set<Primitive>(id(), name, encoding, t->size);
268 break;
269 }
270 case BTF_KIND_PTR: {
271 graph_.Set<PointerReference>(id(), PointerReference::Kind::POINTER,
272 GetId(t->type));
273 break;
274 }
275 case BTF_KIND_TYPEDEF: {
276 const auto name = GetName(t->name_off);
277 graph_.Set<Typedef>(id(), name, GetId(t->type));
278 break;
279 }
280 case BTF_KIND_VOLATILE:
281 case BTF_KIND_CONST:
282 case BTF_KIND_RESTRICT: {
283 const auto qualifier = kind == BTF_KIND_CONST
284 ? Qualifier::CONST
285 : kind == BTF_KIND_VOLATILE
286 ? Qualifier::VOLATILE
287 : Qualifier::RESTRICT;
288 graph_.Set<Qualified>(id(), qualifier, GetId(t->type));
289 break;
290 }
291 case BTF_KIND_ARRAY: {
292 const auto* array = memory.Pull<struct btf_array>();
293 graph_.Set<Array>(id(), array->nelems, GetId(array->type));
294 break;
295 }
296 case BTF_KIND_STRUCT:
297 case BTF_KIND_UNION: {
298 const auto struct_union_kind = kind == BTF_KIND_STRUCT
299 ? StructUnion::Kind::STRUCT
300 : StructUnion::Kind::UNION;
301 const auto name = GetName(t->name_off);
302 const bool kflag = BTF_INFO_KFLAG(t->info);
303 const auto* btf_members = memory.Pull<struct btf_member>(vlen);
304 const auto members = BuildMembers(kflag, btf_members, vlen);
305 graph_.Set<StructUnion>(id(), struct_union_kind, name, t->size,
306 std::vector<Id>(), std::vector<Id>(), members);
307 break;
308 }
309 case BTF_KIND_ENUM: {
310 const auto name = GetName(t->name_off);
311 const bool is_signed = BTF_INFO_KFLAG(t->info);
312 const auto* enums = memory.Pull<struct btf_enum>(vlen);
313 const auto enumerators = BuildEnums(is_signed, enums, vlen);
314 // BTF only considers structs and unions as forward-declared types, and
315 // does not include forward-declared enums. They are treated as
316 // BTF_KIND_ENUMs with vlen set to zero.
317 if (vlen) {
318 // create a synthetic underlying type
319 const Id underlying = BuildEnumUnderlyingType(t->size, is_signed);
320 graph_.Set<Enumeration>(id(), name, underlying, enumerators);
321 } else {
322 // BTF actually provides size (4), but it's meaningless.
323 graph_.Set<Enumeration>(id(), name);
324 }
325 break;
326 }
327 case BTF_KIND_ENUM64: {
328 const auto name = GetName(t->name_off);
329 const bool is_signed = BTF_INFO_KFLAG(t->info);
330 const auto* enums = memory.Pull<struct btf_enum64>(vlen);
331 const auto enumerators = BuildEnums64(is_signed, enums, vlen);
332 // create a synthetic underlying type
333 const Id underlying = BuildEnumUnderlyingType(t->size, is_signed);
334 graph_.Set<Enumeration>(id(), name, underlying, enumerators);
335 break;
336 }
337 case BTF_KIND_FWD: {
338 const auto name = GetName(t->name_off);
339 const auto struct_union_kind = BTF_INFO_KFLAG(t->info)
340 ? StructUnion::Kind::UNION
341 : StructUnion::Kind::STRUCT;
342 graph_.Set<StructUnion>(id(), struct_union_kind, name);
343 break;
344 }
345 case BTF_KIND_FUNC: {
346 const auto name = GetName(t->name_off);
347 // TODO: map linkage (vlen) to symbol properties
348 graph_.Set<ElfSymbol>(id(), name, std::nullopt, true,
349 ElfSymbol::SymbolType::FUNCTION,
350 ElfSymbol::Binding::GLOBAL,
351 ElfSymbol::Visibility::DEFAULT,
352 std::nullopt,
353 std::nullopt,
354 GetId(t->type),
355 std::nullopt);
356 const bool inserted =
357 btf_symbols_.insert({name, GetIdRaw(btf_index)}).second;
358 Check(inserted) << "duplicate symbol " << name;
359 break;
360 }
361 case BTF_KIND_FUNC_PROTO: {
362 const auto* params = memory.Pull<struct btf_param>(vlen);
363 const auto parameters = BuildParams(params, vlen);
364 graph_.Set<Function>(id(), GetId(t->type), parameters);
365 break;
366 }
367 case BTF_KIND_VAR: {
368 // NOTE: global variables are not yet emitted by pahole -J
369 const auto* variable = memory.Pull<struct btf_var>();
370 const auto name = GetName(t->name_off);
371 // TODO: map variable->linkage to symbol properties
372 (void) variable;
373 graph_.Set<ElfSymbol>(id(), name, std::nullopt, true,
374 ElfSymbol::SymbolType::OBJECT,
375 ElfSymbol::Binding::GLOBAL,
376 ElfSymbol::Visibility::DEFAULT,
377 std::nullopt,
378 std::nullopt,
379 GetId(t->type),
380 std::nullopt);
381 const bool inserted =
382 btf_symbols_.insert({name, GetIdRaw(btf_index)}).second;
383 Check(inserted) << "duplicate symbol " << name;
384 break;
385 }
386 case BTF_KIND_DATASEC: {
387 // Just skip BTF DATASEC entries. They partially duplicate ELF symbol
388 // table information, if they exist at all.
389 memory.Pull<struct btf_var_secinfo>(vlen);
390 break;
391 }
392 default: {
393 Die() << "Unhandled BTF kind: " << static_cast<int>(kind);
394 break;
395 }
396 }
397 }
398
GetName(uint32_t name_off)399 std::string Structs::GetName(uint32_t name_off) {
400 const char* name_begin = string_section_.start + name_off;
401 const char* const limit = string_section_.limit;
402 Check(name_begin < limit) << "name offset exceeds string section length";
403 const char* name_end = std::find(name_begin, limit, '\0');
404 Check(name_end < limit) << "name continues past the string section limit";
405 return {name_begin, static_cast<size_t>(name_end - name_begin)};
406 }
407
BuildSymbols()408 Id Structs::BuildSymbols() {
409 return graph_.Add<Interface>(btf_symbols_);
410 }
411
ReadFile(Graph & graph,const std::string & path,ReadOptions)412 Id ReadFile(Graph& graph, const std::string& path, ReadOptions) {
413 Check(elf_version(EV_CURRENT) != EV_NONE) << "ELF version mismatch";
414 struct ElfDeleter {
415 void operator()(Elf* elf) {
416 elf_end(elf);
417 }
418 };
419 const FileDescriptor fd(path.c_str(), O_RDONLY);
420 const std::unique_ptr<Elf, ElfDeleter> elf(
421 elf_begin(fd.Value(), ELF_C_READ, nullptr));
422 if (!elf) {
423 const int error_code = elf_errno();
424 const char* error = elf_errmsg(error_code);
425 if (error != nullptr) {
426 Die() << "elf_begin returned error: " << error;
427 } else {
428 Die() << "elf_begin returned error: " << error_code;
429 }
430 }
431 const elf::ElfLoader loader(elf.get());
432 return Structs(graph).Process(loader.GetBtfRawData());
433 }
434
435 } // namespace btf
436
437 } // namespace stg
438