1 //===------ BPFAbstractMemberAccess.cpp - Abstracting Member Accesses -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass abstracted struct/union member accesses in order to support
10 // compile-once run-everywhere (CO-RE). The CO-RE intends to compile the program
11 // which can run on different kernels. In particular, if bpf program tries to
12 // access a particular kernel data structure member, the details of the
13 // intermediate member access will be remembered so bpf loader can do
14 // necessary adjustment right before program loading.
15 //
16 // For example,
17 //
18 // struct s {
19 // int a;
20 // int b;
21 // };
22 // struct t {
23 // struct s c;
24 // int d;
25 // };
26 // struct t e;
27 //
28 // For the member access e.c.b, the compiler will generate code
29 // &e + 4
30 //
31 // The compile-once run-everywhere instead generates the following code
32 // r = 4
33 // &e + r
34 // The "4" in "r = 4" can be changed based on a particular kernel version.
35 // For example, on a particular kernel version, if struct s is changed to
36 //
37 // struct s {
38 // int new_field;
39 // int a;
40 // int b;
41 // }
42 //
43 // By repeating the member access on the host, the bpf loader can
44 // adjust "r = 4" as "r = 8".
45 //
46 // This feature relies on the following three intrinsic calls:
47 // addr = preserve_array_access_index(base, dimension, index)
48 // addr = preserve_union_access_index(base, di_index)
49 // !llvm.preserve.access.index <union_ditype>
50 // addr = preserve_struct_access_index(base, gep_index, di_index)
51 // !llvm.preserve.access.index <struct_ditype>
52 //
53 // Bitfield member access needs special attention. User cannot take the
54 // address of a bitfield acceess. To facilitate kernel verifier
55 // for easy bitfield code optimization, a new clang intrinsic is introduced:
56 // uint32_t __builtin_preserve_field_info(member_access, info_kind)
57 // In IR, a chain with two (or more) intrinsic calls will be generated:
58 // ...
59 // addr = preserve_struct_access_index(base, 1, 1) !struct s
60 // uint32_t result = bpf_preserve_field_info(addr, info_kind)
61 //
62 // Suppose the info_kind is FIELD_SIGNEDNESS,
63 // The above two IR intrinsics will be replaced with
64 // a relocatable insn:
65 // signness = /* signness of member_access */
66 // and signness can be changed by bpf loader based on the
67 // types on the host.
68 //
69 // User can also test whether a field exists or not with
70 // uint32_t result = bpf_preserve_field_info(member_access, FIELD_EXISTENCE)
71 // The field will be always available (result = 1) during initial
72 // compilation, but bpf loader can patch with the correct value
73 // on the target host where the member_access may or may not be available
74 //
75 //===----------------------------------------------------------------------===//
76
77 #include "BPF.h"
78 #include "BPFCORE.h"
79 #include "BPFTargetMachine.h"
80 #include "llvm/IR/DebugInfoMetadata.h"
81 #include "llvm/IR/GlobalVariable.h"
82 #include "llvm/IR/Instruction.h"
83 #include "llvm/IR/Instructions.h"
84 #include "llvm/IR/Module.h"
85 #include "llvm/IR/Type.h"
86 #include "llvm/IR/User.h"
87 #include "llvm/IR/Value.h"
88 #include "llvm/Pass.h"
89 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
90 #include <stack>
91
92 #define DEBUG_TYPE "bpf-abstract-member-access"
93
94 namespace llvm {
95 const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama";
96 } // namespace llvm
97
98 using namespace llvm;
99
100 namespace {
101
102 class BPFAbstractMemberAccess final : public ModulePass {
getPassName() const103 StringRef getPassName() const override {
104 return "BPF Abstract Member Access";
105 }
106
107 bool runOnModule(Module &M) override;
108
109 public:
110 static char ID;
111 TargetMachine *TM;
112 // Add optional BPFTargetMachine parameter so that BPF backend can add the phase
113 // with target machine to find out the endianness. The default constructor (without
114 // parameters) is used by the pass manager for managing purposes.
BPFAbstractMemberAccess(BPFTargetMachine * TM=nullptr)115 BPFAbstractMemberAccess(BPFTargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) {}
116
117 struct CallInfo {
118 uint32_t Kind;
119 uint32_t AccessIndex;
120 uint32_t RecordAlignment;
121 MDNode *Metadata;
122 Value *Base;
123 };
124 typedef std::stack<std::pair<CallInst *, CallInfo>> CallInfoStack;
125
126 private:
127 enum : uint32_t {
128 BPFPreserveArrayAI = 1,
129 BPFPreserveUnionAI = 2,
130 BPFPreserveStructAI = 3,
131 BPFPreserveFieldInfoAI = 4,
132 };
133
134 const DataLayout *DL = nullptr;
135
136 std::map<std::string, GlobalVariable *> GEPGlobals;
137 // A map to link preserve_*_access_index instrinsic calls.
138 std::map<CallInst *, std::pair<CallInst *, CallInfo>> AIChain;
139 // A map to hold all the base preserve_*_access_index instrinsic calls.
140 // The base call is not an input of any other preserve_*
141 // intrinsics.
142 std::map<CallInst *, CallInfo> BaseAICalls;
143
144 bool doTransformation(Module &M);
145
146 void traceAICall(CallInst *Call, CallInfo &ParentInfo);
147 void traceBitCast(BitCastInst *BitCast, CallInst *Parent,
148 CallInfo &ParentInfo);
149 void traceGEP(GetElementPtrInst *GEP, CallInst *Parent,
150 CallInfo &ParentInfo);
151 void collectAICallChains(Module &M, Function &F);
152
153 bool IsPreserveDIAccessIndexCall(const CallInst *Call, CallInfo &Cinfo);
154 bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI,
155 const MDNode *ChildMeta);
156 bool removePreserveAccessIndexIntrinsic(Module &M);
157 void replaceWithGEP(std::vector<CallInst *> &CallList,
158 uint32_t NumOfZerosIndex, uint32_t DIIndex);
159 bool HasPreserveFieldInfoCall(CallInfoStack &CallStack);
160 void GetStorageBitRange(DIDerivedType *MemberTy, uint32_t RecordAlignment,
161 uint32_t &StartBitOffset, uint32_t &EndBitOffset);
162 uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy,
163 uint32_t AccessIndex, uint32_t PatchImm,
164 uint32_t RecordAlignment);
165
166 Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo,
167 std::string &AccessKey, MDNode *&BaseMeta);
168 uint64_t getConstant(const Value *IndexValue);
169 bool transformGEPChain(Module &M, CallInst *Call, CallInfo &CInfo);
170 };
171 } // End anonymous namespace
172
173 char BPFAbstractMemberAccess::ID = 0;
174 INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE,
175 "abstracting struct/union member accessees", false, false)
176
createBPFAbstractMemberAccess(BPFTargetMachine * TM)177 ModulePass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) {
178 return new BPFAbstractMemberAccess(TM);
179 }
180
runOnModule(Module & M)181 bool BPFAbstractMemberAccess::runOnModule(Module &M) {
182 LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n");
183
184 // Bail out if no debug info.
185 if (M.debug_compile_units().empty())
186 return false;
187
188 DL = &M.getDataLayout();
189 return doTransformation(M);
190 }
191
SkipDIDerivedTag(unsigned Tag)192 static bool SkipDIDerivedTag(unsigned Tag) {
193 if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
194 Tag != dwarf::DW_TAG_volatile_type &&
195 Tag != dwarf::DW_TAG_restrict_type &&
196 Tag != dwarf::DW_TAG_member)
197 return false;
198 return true;
199 }
200
stripQualifiers(DIType * Ty)201 static DIType * stripQualifiers(DIType *Ty) {
202 while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
203 if (!SkipDIDerivedTag(DTy->getTag()))
204 break;
205 Ty = DTy->getBaseType();
206 }
207 return Ty;
208 }
209
stripQualifiers(const DIType * Ty)210 static const DIType * stripQualifiers(const DIType *Ty) {
211 while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
212 if (!SkipDIDerivedTag(DTy->getTag()))
213 break;
214 Ty = DTy->getBaseType();
215 }
216 return Ty;
217 }
218
calcArraySize(const DICompositeType * CTy,uint32_t StartDim)219 static uint32_t calcArraySize(const DICompositeType *CTy, uint32_t StartDim) {
220 DINodeArray Elements = CTy->getElements();
221 uint32_t DimSize = 1;
222 for (uint32_t I = StartDim; I < Elements.size(); ++I) {
223 if (auto *Element = dyn_cast_or_null<DINode>(Elements[I]))
224 if (Element->getTag() == dwarf::DW_TAG_subrange_type) {
225 const DISubrange *SR = cast<DISubrange>(Element);
226 auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
227 DimSize *= CI->getSExtValue();
228 }
229 }
230
231 return DimSize;
232 }
233
234 /// Check whether a call is a preserve_*_access_index intrinsic call or not.
IsPreserveDIAccessIndexCall(const CallInst * Call,CallInfo & CInfo)235 bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
236 CallInfo &CInfo) {
237 if (!Call)
238 return false;
239
240 const auto *GV = dyn_cast<GlobalValue>(Call->getCalledValue());
241 if (!GV)
242 return false;
243 if (GV->getName().startswith("llvm.preserve.array.access.index")) {
244 CInfo.Kind = BPFPreserveArrayAI;
245 CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
246 if (!CInfo.Metadata)
247 report_fatal_error("Missing metadata for llvm.preserve.array.access.index intrinsic");
248 CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
249 CInfo.Base = Call->getArgOperand(0);
250 CInfo.RecordAlignment =
251 DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
252 return true;
253 }
254 if (GV->getName().startswith("llvm.preserve.union.access.index")) {
255 CInfo.Kind = BPFPreserveUnionAI;
256 CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
257 if (!CInfo.Metadata)
258 report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic");
259 CInfo.AccessIndex = getConstant(Call->getArgOperand(1));
260 CInfo.Base = Call->getArgOperand(0);
261 CInfo.RecordAlignment =
262 DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
263 return true;
264 }
265 if (GV->getName().startswith("llvm.preserve.struct.access.index")) {
266 CInfo.Kind = BPFPreserveStructAI;
267 CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
268 if (!CInfo.Metadata)
269 report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic");
270 CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
271 CInfo.Base = Call->getArgOperand(0);
272 CInfo.RecordAlignment =
273 DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
274 return true;
275 }
276 if (GV->getName().startswith("llvm.bpf.preserve.field.info")) {
277 CInfo.Kind = BPFPreserveFieldInfoAI;
278 CInfo.Metadata = nullptr;
279 // Check validity of info_kind as clang did not check this.
280 uint64_t InfoKind = getConstant(Call->getArgOperand(1));
281 if (InfoKind >= BPFCoreSharedInfo::MAX_FIELD_RELOC_KIND)
282 report_fatal_error("Incorrect info_kind for llvm.bpf.preserve.field.info intrinsic");
283 CInfo.AccessIndex = InfoKind;
284 return true;
285 }
286
287 return false;
288 }
289
replaceWithGEP(std::vector<CallInst * > & CallList,uint32_t DimensionIndex,uint32_t GEPIndex)290 void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList,
291 uint32_t DimensionIndex,
292 uint32_t GEPIndex) {
293 for (auto Call : CallList) {
294 uint32_t Dimension = 1;
295 if (DimensionIndex > 0)
296 Dimension = getConstant(Call->getArgOperand(DimensionIndex));
297
298 Constant *Zero =
299 ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0);
300 SmallVector<Value *, 4> IdxList;
301 for (unsigned I = 0; I < Dimension; ++I)
302 IdxList.push_back(Zero);
303 IdxList.push_back(Call->getArgOperand(GEPIndex));
304
305 auto *GEP = GetElementPtrInst::CreateInBounds(Call->getArgOperand(0),
306 IdxList, "", Call);
307 Call->replaceAllUsesWith(GEP);
308 Call->eraseFromParent();
309 }
310 }
311
removePreserveAccessIndexIntrinsic(Module & M)312 bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) {
313 std::vector<CallInst *> PreserveArrayIndexCalls;
314 std::vector<CallInst *> PreserveUnionIndexCalls;
315 std::vector<CallInst *> PreserveStructIndexCalls;
316 bool Found = false;
317
318 for (Function &F : M)
319 for (auto &BB : F)
320 for (auto &I : BB) {
321 auto *Call = dyn_cast<CallInst>(&I);
322 CallInfo CInfo;
323 if (!IsPreserveDIAccessIndexCall(Call, CInfo))
324 continue;
325
326 Found = true;
327 if (CInfo.Kind == BPFPreserveArrayAI)
328 PreserveArrayIndexCalls.push_back(Call);
329 else if (CInfo.Kind == BPFPreserveUnionAI)
330 PreserveUnionIndexCalls.push_back(Call);
331 else
332 PreserveStructIndexCalls.push_back(Call);
333 }
334
335 // do the following transformation:
336 // . addr = preserve_array_access_index(base, dimension, index)
337 // is transformed to
338 // addr = GEP(base, dimenion's zero's, index)
339 // . addr = preserve_union_access_index(base, di_index)
340 // is transformed to
341 // addr = base, i.e., all usages of "addr" are replaced by "base".
342 // . addr = preserve_struct_access_index(base, gep_index, di_index)
343 // is transformed to
344 // addr = GEP(base, 0, gep_index)
345 replaceWithGEP(PreserveArrayIndexCalls, 1, 2);
346 replaceWithGEP(PreserveStructIndexCalls, 0, 1);
347 for (auto Call : PreserveUnionIndexCalls) {
348 Call->replaceAllUsesWith(Call->getArgOperand(0));
349 Call->eraseFromParent();
350 }
351
352 return Found;
353 }
354
355 /// Check whether the access index chain is valid. We check
356 /// here because there may be type casts between two
357 /// access indexes. We want to ensure memory access still valid.
IsValidAIChain(const MDNode * ParentType,uint32_t ParentAI,const MDNode * ChildType)358 bool BPFAbstractMemberAccess::IsValidAIChain(const MDNode *ParentType,
359 uint32_t ParentAI,
360 const MDNode *ChildType) {
361 if (!ChildType)
362 return true; // preserve_field_info, no type comparison needed.
363
364 const DIType *PType = stripQualifiers(cast<DIType>(ParentType));
365 const DIType *CType = stripQualifiers(cast<DIType>(ChildType));
366
367 // Child is a derived/pointer type, which is due to type casting.
368 // Pointer type cannot be in the middle of chain.
369 if (isa<DIDerivedType>(CType))
370 return false;
371
372 // Parent is a pointer type.
373 if (const auto *PtrTy = dyn_cast<DIDerivedType>(PType)) {
374 if (PtrTy->getTag() != dwarf::DW_TAG_pointer_type)
375 return false;
376 return stripQualifiers(PtrTy->getBaseType()) == CType;
377 }
378
379 // Otherwise, struct/union/array types
380 const auto *PTy = dyn_cast<DICompositeType>(PType);
381 const auto *CTy = dyn_cast<DICompositeType>(CType);
382 assert(PTy && CTy && "ParentType or ChildType is null or not composite");
383
384 uint32_t PTyTag = PTy->getTag();
385 assert(PTyTag == dwarf::DW_TAG_array_type ||
386 PTyTag == dwarf::DW_TAG_structure_type ||
387 PTyTag == dwarf::DW_TAG_union_type);
388
389 uint32_t CTyTag = CTy->getTag();
390 assert(CTyTag == dwarf::DW_TAG_array_type ||
391 CTyTag == dwarf::DW_TAG_structure_type ||
392 CTyTag == dwarf::DW_TAG_union_type);
393
394 // Multi dimensional arrays, base element should be the same
395 if (PTyTag == dwarf::DW_TAG_array_type && PTyTag == CTyTag)
396 return PTy->getBaseType() == CTy->getBaseType();
397
398 DIType *Ty;
399 if (PTyTag == dwarf::DW_TAG_array_type)
400 Ty = PTy->getBaseType();
401 else
402 Ty = dyn_cast<DIType>(PTy->getElements()[ParentAI]);
403
404 return dyn_cast<DICompositeType>(stripQualifiers(Ty)) == CTy;
405 }
406
traceAICall(CallInst * Call,CallInfo & ParentInfo)407 void BPFAbstractMemberAccess::traceAICall(CallInst *Call,
408 CallInfo &ParentInfo) {
409 for (User *U : Call->users()) {
410 Instruction *Inst = dyn_cast<Instruction>(U);
411 if (!Inst)
412 continue;
413
414 if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
415 traceBitCast(BI, Call, ParentInfo);
416 } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
417 CallInfo ChildInfo;
418
419 if (IsPreserveDIAccessIndexCall(CI, ChildInfo) &&
420 IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex,
421 ChildInfo.Metadata)) {
422 AIChain[CI] = std::make_pair(Call, ParentInfo);
423 traceAICall(CI, ChildInfo);
424 } else {
425 BaseAICalls[Call] = ParentInfo;
426 }
427 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
428 if (GI->hasAllZeroIndices())
429 traceGEP(GI, Call, ParentInfo);
430 else
431 BaseAICalls[Call] = ParentInfo;
432 } else {
433 BaseAICalls[Call] = ParentInfo;
434 }
435 }
436 }
437
traceBitCast(BitCastInst * BitCast,CallInst * Parent,CallInfo & ParentInfo)438 void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast,
439 CallInst *Parent,
440 CallInfo &ParentInfo) {
441 for (User *U : BitCast->users()) {
442 Instruction *Inst = dyn_cast<Instruction>(U);
443 if (!Inst)
444 continue;
445
446 if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
447 traceBitCast(BI, Parent, ParentInfo);
448 } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
449 CallInfo ChildInfo;
450 if (IsPreserveDIAccessIndexCall(CI, ChildInfo) &&
451 IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex,
452 ChildInfo.Metadata)) {
453 AIChain[CI] = std::make_pair(Parent, ParentInfo);
454 traceAICall(CI, ChildInfo);
455 } else {
456 BaseAICalls[Parent] = ParentInfo;
457 }
458 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
459 if (GI->hasAllZeroIndices())
460 traceGEP(GI, Parent, ParentInfo);
461 else
462 BaseAICalls[Parent] = ParentInfo;
463 } else {
464 BaseAICalls[Parent] = ParentInfo;
465 }
466 }
467 }
468
traceGEP(GetElementPtrInst * GEP,CallInst * Parent,CallInfo & ParentInfo)469 void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent,
470 CallInfo &ParentInfo) {
471 for (User *U : GEP->users()) {
472 Instruction *Inst = dyn_cast<Instruction>(U);
473 if (!Inst)
474 continue;
475
476 if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
477 traceBitCast(BI, Parent, ParentInfo);
478 } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
479 CallInfo ChildInfo;
480 if (IsPreserveDIAccessIndexCall(CI, ChildInfo) &&
481 IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex,
482 ChildInfo.Metadata)) {
483 AIChain[CI] = std::make_pair(Parent, ParentInfo);
484 traceAICall(CI, ChildInfo);
485 } else {
486 BaseAICalls[Parent] = ParentInfo;
487 }
488 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
489 if (GI->hasAllZeroIndices())
490 traceGEP(GI, Parent, ParentInfo);
491 else
492 BaseAICalls[Parent] = ParentInfo;
493 } else {
494 BaseAICalls[Parent] = ParentInfo;
495 }
496 }
497 }
498
collectAICallChains(Module & M,Function & F)499 void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) {
500 AIChain.clear();
501 BaseAICalls.clear();
502
503 for (auto &BB : F)
504 for (auto &I : BB) {
505 CallInfo CInfo;
506 auto *Call = dyn_cast<CallInst>(&I);
507 if (!IsPreserveDIAccessIndexCall(Call, CInfo) ||
508 AIChain.find(Call) != AIChain.end())
509 continue;
510
511 traceAICall(Call, CInfo);
512 }
513 }
514
getConstant(const Value * IndexValue)515 uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) {
516 const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue);
517 assert(CV);
518 return CV->getValue().getZExtValue();
519 }
520
521 /// Get the start and the end of storage offset for \p MemberTy.
GetStorageBitRange(DIDerivedType * MemberTy,uint32_t RecordAlignment,uint32_t & StartBitOffset,uint32_t & EndBitOffset)522 void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType *MemberTy,
523 uint32_t RecordAlignment,
524 uint32_t &StartBitOffset,
525 uint32_t &EndBitOffset) {
526 uint32_t MemberBitSize = MemberTy->getSizeInBits();
527 uint32_t MemberBitOffset = MemberTy->getOffsetInBits();
528 uint32_t AlignBits = RecordAlignment * 8;
529 if (RecordAlignment > 8 || MemberBitSize > AlignBits)
530 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
531 "requiring too big alignment");
532
533 StartBitOffset = MemberBitOffset & ~(AlignBits - 1);
534 if ((StartBitOffset + AlignBits) < (MemberBitOffset + MemberBitSize))
535 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
536 "cross alignment boundary");
537 EndBitOffset = StartBitOffset + AlignBits;
538 }
539
GetFieldInfo(uint32_t InfoKind,DICompositeType * CTy,uint32_t AccessIndex,uint32_t PatchImm,uint32_t RecordAlignment)540 uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
541 DICompositeType *CTy,
542 uint32_t AccessIndex,
543 uint32_t PatchImm,
544 uint32_t RecordAlignment) {
545 if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE)
546 return 1;
547
548 uint32_t Tag = CTy->getTag();
549 if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_OFFSET) {
550 if (Tag == dwarf::DW_TAG_array_type) {
551 auto *EltTy = stripQualifiers(CTy->getBaseType());
552 PatchImm += AccessIndex * calcArraySize(CTy, 1) *
553 (EltTy->getSizeInBits() >> 3);
554 } else if (Tag == dwarf::DW_TAG_structure_type) {
555 auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
556 if (!MemberTy->isBitField()) {
557 PatchImm += MemberTy->getOffsetInBits() >> 3;
558 } else {
559 unsigned SBitOffset, NextSBitOffset;
560 GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset,
561 NextSBitOffset);
562 PatchImm += SBitOffset >> 3;
563 }
564 }
565 return PatchImm;
566 }
567
568 if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_SIZE) {
569 if (Tag == dwarf::DW_TAG_array_type) {
570 auto *EltTy = stripQualifiers(CTy->getBaseType());
571 return calcArraySize(CTy, 1) * (EltTy->getSizeInBits() >> 3);
572 } else {
573 auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
574 uint32_t SizeInBits = MemberTy->getSizeInBits();
575 if (!MemberTy->isBitField())
576 return SizeInBits >> 3;
577
578 unsigned SBitOffset, NextSBitOffset;
579 GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
580 SizeInBits = NextSBitOffset - SBitOffset;
581 if (SizeInBits & (SizeInBits - 1))
582 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info");
583 return SizeInBits >> 3;
584 }
585 }
586
587 if (InfoKind == BPFCoreSharedInfo::FIELD_SIGNEDNESS) {
588 const DIType *BaseTy;
589 if (Tag == dwarf::DW_TAG_array_type) {
590 // Signedness only checked when final array elements are accessed.
591 if (CTy->getElements().size() != 1)
592 report_fatal_error("Invalid array expression for llvm.bpf.preserve.field.info");
593 BaseTy = stripQualifiers(CTy->getBaseType());
594 } else {
595 auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
596 BaseTy = stripQualifiers(MemberTy->getBaseType());
597 }
598
599 // Only basic types and enum types have signedness.
600 const auto *BTy = dyn_cast<DIBasicType>(BaseTy);
601 while (!BTy) {
602 const auto *CompTy = dyn_cast<DICompositeType>(BaseTy);
603 // Report an error if the field expression does not have signedness.
604 if (!CompTy || CompTy->getTag() != dwarf::DW_TAG_enumeration_type)
605 report_fatal_error("Invalid field expression for llvm.bpf.preserve.field.info");
606 BaseTy = stripQualifiers(CompTy->getBaseType());
607 BTy = dyn_cast<DIBasicType>(BaseTy);
608 }
609 uint32_t Encoding = BTy->getEncoding();
610 return (Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char);
611 }
612
613 if (InfoKind == BPFCoreSharedInfo::FIELD_LSHIFT_U64) {
614 // The value is loaded into a value with FIELD_BYTE_SIZE size,
615 // and then zero or sign extended to U64.
616 // FIELD_LSHIFT_U64 and FIELD_RSHIFT_U64 are operations
617 // to extract the original value.
618 const Triple &Triple = TM->getTargetTriple();
619 DIDerivedType *MemberTy = nullptr;
620 bool IsBitField = false;
621 uint32_t SizeInBits;
622
623 if (Tag == dwarf::DW_TAG_array_type) {
624 auto *EltTy = stripQualifiers(CTy->getBaseType());
625 SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits();
626 } else {
627 MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
628 SizeInBits = MemberTy->getSizeInBits();
629 IsBitField = MemberTy->isBitField();
630 }
631
632 if (!IsBitField) {
633 if (SizeInBits > 64)
634 report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
635 return 64 - SizeInBits;
636 }
637
638 unsigned SBitOffset, NextSBitOffset;
639 GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
640 if (NextSBitOffset - SBitOffset > 64)
641 report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
642
643 unsigned OffsetInBits = MemberTy->getOffsetInBits();
644 if (Triple.getArch() == Triple::bpfel)
645 return SBitOffset + 64 - OffsetInBits - SizeInBits;
646 else
647 return OffsetInBits + 64 - NextSBitOffset;
648 }
649
650 if (InfoKind == BPFCoreSharedInfo::FIELD_RSHIFT_U64) {
651 DIDerivedType *MemberTy = nullptr;
652 bool IsBitField = false;
653 uint32_t SizeInBits;
654 if (Tag == dwarf::DW_TAG_array_type) {
655 auto *EltTy = stripQualifiers(CTy->getBaseType());
656 SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits();
657 } else {
658 MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]);
659 SizeInBits = MemberTy->getSizeInBits();
660 IsBitField = MemberTy->isBitField();
661 }
662
663 if (!IsBitField) {
664 if (SizeInBits > 64)
665 report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
666 return 64 - SizeInBits;
667 }
668
669 unsigned SBitOffset, NextSBitOffset;
670 GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
671 if (NextSBitOffset - SBitOffset > 64)
672 report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
673
674 return 64 - SizeInBits;
675 }
676
677 llvm_unreachable("Unknown llvm.bpf.preserve.field.info info kind");
678 }
679
HasPreserveFieldInfoCall(CallInfoStack & CallStack)680 bool BPFAbstractMemberAccess::HasPreserveFieldInfoCall(CallInfoStack &CallStack) {
681 // This is called in error return path, no need to maintain CallStack.
682 while (CallStack.size()) {
683 auto StackElem = CallStack.top();
684 if (StackElem.second.Kind == BPFPreserveFieldInfoAI)
685 return true;
686 CallStack.pop();
687 }
688 return false;
689 }
690
691 /// Compute the base of the whole preserve_* intrinsics chains, i.e., the base
692 /// pointer of the first preserve_*_access_index call, and construct the access
693 /// string, which will be the name of a global variable.
computeBaseAndAccessKey(CallInst * Call,CallInfo & CInfo,std::string & AccessKey,MDNode * & TypeMeta)694 Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
695 CallInfo &CInfo,
696 std::string &AccessKey,
697 MDNode *&TypeMeta) {
698 Value *Base = nullptr;
699 std::string TypeName;
700 CallInfoStack CallStack;
701
702 // Put the access chain into a stack with the top as the head of the chain.
703 while (Call) {
704 CallStack.push(std::make_pair(Call, CInfo));
705 CInfo = AIChain[Call].second;
706 Call = AIChain[Call].first;
707 }
708
709 // The access offset from the base of the head of chain is also
710 // calculated here as all debuginfo types are available.
711
712 // Get type name and calculate the first index.
713 // We only want to get type name from structure or union.
714 // If user wants a relocation like
715 // int *p; ... __builtin_preserve_access_index(&p[4]) ...
716 // or
717 // int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ...
718 // we will skip them.
719 uint32_t FirstIndex = 0;
720 uint32_t PatchImm = 0; // AccessOffset or the requested field info
721 uint32_t InfoKind = BPFCoreSharedInfo::FIELD_BYTE_OFFSET;
722 while (CallStack.size()) {
723 auto StackElem = CallStack.top();
724 Call = StackElem.first;
725 CInfo = StackElem.second;
726
727 if (!Base)
728 Base = CInfo.Base;
729
730 DIType *Ty = stripQualifiers(cast<DIType>(CInfo.Metadata));
731 if (CInfo.Kind == BPFPreserveUnionAI ||
732 CInfo.Kind == BPFPreserveStructAI) {
733 // struct or union type
734 TypeName = Ty->getName();
735 TypeMeta = Ty;
736 PatchImm += FirstIndex * (Ty->getSizeInBits() >> 3);
737 break;
738 }
739
740 assert(CInfo.Kind == BPFPreserveArrayAI);
741
742 // Array entries will always be consumed for accumulative initial index.
743 CallStack.pop();
744
745 // BPFPreserveArrayAI
746 uint64_t AccessIndex = CInfo.AccessIndex;
747
748 DIType *BaseTy = nullptr;
749 bool CheckElemType = false;
750 if (const auto *CTy = dyn_cast<DICompositeType>(Ty)) {
751 // array type
752 assert(CTy->getTag() == dwarf::DW_TAG_array_type);
753
754
755 FirstIndex += AccessIndex * calcArraySize(CTy, 1);
756 BaseTy = stripQualifiers(CTy->getBaseType());
757 CheckElemType = CTy->getElements().size() == 1;
758 } else {
759 // pointer type
760 auto *DTy = cast<DIDerivedType>(Ty);
761 assert(DTy->getTag() == dwarf::DW_TAG_pointer_type);
762
763 BaseTy = stripQualifiers(DTy->getBaseType());
764 CTy = dyn_cast<DICompositeType>(BaseTy);
765 if (!CTy) {
766 CheckElemType = true;
767 } else if (CTy->getTag() != dwarf::DW_TAG_array_type) {
768 FirstIndex += AccessIndex;
769 CheckElemType = true;
770 } else {
771 FirstIndex += AccessIndex * calcArraySize(CTy, 0);
772 }
773 }
774
775 if (CheckElemType) {
776 auto *CTy = dyn_cast<DICompositeType>(BaseTy);
777 if (!CTy) {
778 if (HasPreserveFieldInfoCall(CallStack))
779 report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
780 return nullptr;
781 }
782
783 unsigned CTag = CTy->getTag();
784 if (CTag == dwarf::DW_TAG_structure_type || CTag == dwarf::DW_TAG_union_type) {
785 TypeName = CTy->getName();
786 } else {
787 if (HasPreserveFieldInfoCall(CallStack))
788 report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
789 return nullptr;
790 }
791 TypeMeta = CTy;
792 PatchImm += FirstIndex * (CTy->getSizeInBits() >> 3);
793 break;
794 }
795 }
796 assert(TypeName.size());
797 AccessKey += std::to_string(FirstIndex);
798
799 // Traverse the rest of access chain to complete offset calculation
800 // and access key construction.
801 while (CallStack.size()) {
802 auto StackElem = CallStack.top();
803 CInfo = StackElem.second;
804 CallStack.pop();
805
806 if (CInfo.Kind == BPFPreserveFieldInfoAI)
807 break;
808
809 // If the next Call (the top of the stack) is a BPFPreserveFieldInfoAI,
810 // the action will be extracting field info.
811 if (CallStack.size()) {
812 auto StackElem2 = CallStack.top();
813 CallInfo CInfo2 = StackElem2.second;
814 if (CInfo2.Kind == BPFPreserveFieldInfoAI) {
815 InfoKind = CInfo2.AccessIndex;
816 assert(CallStack.size() == 1);
817 }
818 }
819
820 // Access Index
821 uint64_t AccessIndex = CInfo.AccessIndex;
822 AccessKey += ":" + std::to_string(AccessIndex);
823
824 MDNode *MDN = CInfo.Metadata;
825 uint32_t RecordAlignment = CInfo.RecordAlignment;
826 // At this stage, it cannot be pointer type.
827 auto *CTy = cast<DICompositeType>(stripQualifiers(cast<DIType>(MDN)));
828 PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm,
829 RecordAlignment);
830 }
831
832 // Access key is the
833 // "llvm." + type name + ":" + reloc type + ":" + patched imm + "$" +
834 // access string,
835 // uniquely identifying one relocation.
836 // The prefix "llvm." indicates this is a temporary global, which should
837 // not be emitted to ELF file.
838 AccessKey = "llvm." + TypeName + ":" + std::to_string(InfoKind) + ":" +
839 std::to_string(PatchImm) + "$" + AccessKey;
840
841 return Base;
842 }
843
844 /// Call/Kind is the base preserve_*_access_index() call. Attempts to do
845 /// transformation to a chain of relocable GEPs.
transformGEPChain(Module & M,CallInst * Call,CallInfo & CInfo)846 bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
847 CallInfo &CInfo) {
848 std::string AccessKey;
849 MDNode *TypeMeta;
850 Value *Base =
851 computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta);
852 if (!Base)
853 return false;
854
855 BasicBlock *BB = Call->getParent();
856 GlobalVariable *GV;
857
858 if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
859 IntegerType *VarType;
860 if (CInfo.Kind == BPFPreserveFieldInfoAI)
861 VarType = Type::getInt32Ty(BB->getContext()); // 32bit return value
862 else
863 VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr arith
864
865 GV = new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage,
866 NULL, AccessKey);
867 GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
868 GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
869 GEPGlobals[AccessKey] = GV;
870 } else {
871 GV = GEPGlobals[AccessKey];
872 }
873
874 if (CInfo.Kind == BPFPreserveFieldInfoAI) {
875 // Load the global variable which represents the returned field info.
876 auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV);
877 BB->getInstList().insert(Call->getIterator(), LDInst);
878 Call->replaceAllUsesWith(LDInst);
879 Call->eraseFromParent();
880 return true;
881 }
882
883 // For any original GEP Call and Base %2 like
884 // %4 = bitcast %struct.net_device** %dev1 to i64*
885 // it is transformed to:
886 // %6 = load sk_buff:50:$0:0:0:2:0
887 // %7 = bitcast %struct.sk_buff* %2 to i8*
888 // %8 = getelementptr i8, i8* %7, %6
889 // %9 = bitcast i8* %8 to i64*
890 // using %9 instead of %4
891 // The original Call inst is removed.
892
893 // Load the global variable.
894 auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV);
895 BB->getInstList().insert(Call->getIterator(), LDInst);
896
897 // Generate a BitCast
898 auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext()));
899 BB->getInstList().insert(Call->getIterator(), BCInst);
900
901 // Generate a GetElementPtr
902 auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()),
903 BCInst, LDInst);
904 BB->getInstList().insert(Call->getIterator(), GEP);
905
906 // Generate a BitCast
907 auto *BCInst2 = new BitCastInst(GEP, Call->getType());
908 BB->getInstList().insert(Call->getIterator(), BCInst2);
909
910 Call->replaceAllUsesWith(BCInst2);
911 Call->eraseFromParent();
912
913 return true;
914 }
915
doTransformation(Module & M)916 bool BPFAbstractMemberAccess::doTransformation(Module &M) {
917 bool Transformed = false;
918
919 for (Function &F : M) {
920 // Collect PreserveDIAccessIndex Intrinsic call chains.
921 // The call chains will be used to generate the access
922 // patterns similar to GEP.
923 collectAICallChains(M, F);
924
925 for (auto &C : BaseAICalls)
926 Transformed = transformGEPChain(M, C.first, C.second) || Transformed;
927 }
928
929 return removePreserveAccessIndexIntrinsic(M) || Transformed;
930 }
931