1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Transforms/Utils/Cloning.h"
39 #include <string>
40
41 #define DEBUG_TYPE "amdgpu-propagate-attributes"
42
43 using namespace llvm;
44
45 namespace llvm {
46 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47 }
48
49 namespace {
50
51 class AMDGPUPropagateAttributes {
52 const FeatureBitset TargetFeatures = {
53 AMDGPU::FeatureWavefrontSize16,
54 AMDGPU::FeatureWavefrontSize32,
55 AMDGPU::FeatureWavefrontSize64
56 };
57
58 class Clone{
59 public:
Clone(FeatureBitset FeatureMask,Function * OrigF,Function * NewF)60 Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
61 FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
62
63 FeatureBitset FeatureMask;
64 Function *OrigF;
65 Function *NewF;
66 };
67
68 const TargetMachine *TM;
69
70 // Clone functions as needed or just set attributes.
71 bool AllowClone;
72
73 // Option propagation roots.
74 SmallSet<Function *, 32> Roots;
75
76 // Clones of functions with their attributes.
77 SmallVector<Clone, 32> Clones;
78
79 // Find a clone with required features.
80 Function *findFunction(const FeatureBitset &FeaturesNeeded,
81 Function *OrigF);
82
83 // Clone function F and set NewFeatures on the clone.
84 // Cole takes the name of original function.
85 Function *cloneWithFeatures(Function &F,
86 const FeatureBitset &NewFeatures);
87
88 // Set new function's features in place.
89 void setFeatures(Function &F, const FeatureBitset &NewFeatures);
90
91 std::string getFeatureString(const FeatureBitset &Features) const;
92
93 // Propagate attributes from Roots.
94 bool process();
95
96 public:
AMDGPUPropagateAttributes(const TargetMachine * TM,bool AllowClone)97 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
98 TM(TM), AllowClone(AllowClone) {}
99
100 // Use F as a root and propagate its attributes.
101 bool process(Function &F);
102
103 // Propagate attributes starting from kernel functions.
104 bool process(Module &M);
105 };
106
107 // Allows to propagate attributes early, but no clonning is allowed as it must
108 // be a function pass to run before any optimizations.
109 // TODO: We shall only need a one instance of module pass, but that needs to be
110 // in the linker pipeline which is currently not possible.
111 class AMDGPUPropagateAttributesEarly : public FunctionPass {
112 const TargetMachine *TM;
113
114 public:
115 static char ID; // Pass identification
116
AMDGPUPropagateAttributesEarly(const TargetMachine * TM=nullptr)117 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
118 FunctionPass(ID), TM(TM) {
119 initializeAMDGPUPropagateAttributesEarlyPass(
120 *PassRegistry::getPassRegistry());
121 }
122
123 bool runOnFunction(Function &F) override;
124 };
125
126 // Allows to propagate attributes with clonning but does that late in the
127 // pipeline.
128 class AMDGPUPropagateAttributesLate : public ModulePass {
129 const TargetMachine *TM;
130
131 public:
132 static char ID; // Pass identification
133
AMDGPUPropagateAttributesLate(const TargetMachine * TM=nullptr)134 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
135 ModulePass(ID), TM(TM) {
136 initializeAMDGPUPropagateAttributesLatePass(
137 *PassRegistry::getPassRegistry());
138 }
139
140 bool runOnModule(Module &M) override;
141 };
142
143 } // end anonymous namespace.
144
145 char AMDGPUPropagateAttributesEarly::ID = 0;
146 char AMDGPUPropagateAttributesLate::ID = 0;
147
148 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
149 "amdgpu-propagate-attributes-early",
150 "Early propagate attributes from kernels to functions",
151 false, false)
152 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
153 "amdgpu-propagate-attributes-late",
154 "Late propagate attributes from kernels to functions",
155 false, false)
156
157 Function *
findFunction(const FeatureBitset & FeaturesNeeded,Function * OrigF)158 AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
159 Function *OrigF) {
160 // TODO: search for clone's clones.
161 for (Clone &C : Clones)
162 if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
163 return C.NewF;
164
165 return nullptr;
166 }
167
process(Module & M)168 bool AMDGPUPropagateAttributes::process(Module &M) {
169 for (auto &F : M.functions())
170 if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
171 Roots.insert(&F);
172
173 return process();
174 }
175
process(Function & F)176 bool AMDGPUPropagateAttributes::process(Function &F) {
177 Roots.insert(&F);
178 return process();
179 }
180
process()181 bool AMDGPUPropagateAttributes::process() {
182 bool Changed = false;
183 SmallSet<Function *, 32> NewRoots;
184 SmallSet<Function *, 32> Replaced;
185
186 if (Roots.empty())
187 return false;
188 Module &M = *(*Roots.begin())->getParent();
189
190 do {
191 Roots.insert(NewRoots.begin(), NewRoots.end());
192 NewRoots.clear();
193
194 for (auto &F : M.functions()) {
195 if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
196 continue;
197
198 const FeatureBitset &CalleeBits =
199 TM->getSubtargetImpl(F)->getFeatureBits();
200 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
201
202 for (User *U : F.users()) {
203 Instruction *I = dyn_cast<Instruction>(U);
204 if (!I)
205 continue;
206 CallBase *CI = dyn_cast<CallBase>(I);
207 if (!CI)
208 continue;
209 Function *Caller = CI->getCaller();
210 if (!Caller)
211 continue;
212 if (!Roots.count(Caller))
213 continue;
214
215 const FeatureBitset &CallerBits =
216 TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
217
218 if (CallerBits == (CalleeBits & TargetFeatures)) {
219 NewRoots.insert(&F);
220 continue;
221 }
222
223 Function *NewF = findFunction(CallerBits, &F);
224 if (!NewF) {
225 FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
226 CallerBits);
227 if (!AllowClone) {
228 // This may set different features on different iteartions if
229 // there is a contradiction in callers' attributes. In this case
230 // we rely on a second pass running on Module, which is allowed
231 // to clone.
232 setFeatures(F, NewFeatures);
233 NewRoots.insert(&F);
234 Changed = true;
235 break;
236 }
237
238 NewF = cloneWithFeatures(F, NewFeatures);
239 Clones.push_back(Clone(CallerBits, &F, NewF));
240 NewRoots.insert(NewF);
241 }
242
243 ToReplace.push_back(std::make_pair(CI, NewF));
244 Replaced.insert(&F);
245
246 Changed = true;
247 }
248
249 while (!ToReplace.empty()) {
250 auto R = ToReplace.pop_back_val();
251 R.first->setCalledFunction(R.second);
252 }
253 }
254 } while (!NewRoots.empty());
255
256 for (Function *F : Replaced) {
257 if (F->use_empty())
258 F->eraseFromParent();
259 }
260
261 return Changed;
262 }
263
264 Function *
cloneWithFeatures(Function & F,const FeatureBitset & NewFeatures)265 AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
266 const FeatureBitset &NewFeatures) {
267 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
268
269 ValueToValueMapTy dummy;
270 Function *NewF = CloneFunction(&F, dummy);
271 setFeatures(*NewF, NewFeatures);
272
273 // Swap names. If that is the only clone it will retain the name of now
274 // dead value.
275 if (F.hasName()) {
276 std::string NewName = NewF->getName();
277 NewF->takeName(&F);
278 F.setName(NewName);
279
280 // Name has changed, it does not need an external symbol.
281 F.setVisibility(GlobalValue::DefaultVisibility);
282 F.setLinkage(GlobalValue::InternalLinkage);
283 }
284
285 return NewF;
286 }
287
setFeatures(Function & F,const FeatureBitset & NewFeatures)288 void AMDGPUPropagateAttributes::setFeatures(Function &F,
289 const FeatureBitset &NewFeatures) {
290 std::string NewFeatureStr = getFeatureString(NewFeatures);
291
292 LLVM_DEBUG(dbgs() << "Set features "
293 << getFeatureString(NewFeatures & TargetFeatures)
294 << " on " << F.getName() << '\n');
295
296 F.removeFnAttr("target-features");
297 F.addFnAttr("target-features", NewFeatureStr);
298 }
299
300 std::string
getFeatureString(const FeatureBitset & Features) const301 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
302 {
303 std::string Ret;
304 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
305 if (Features[KV.Value])
306 Ret += (StringRef("+") + KV.Key + ",").str();
307 else if (TargetFeatures[KV.Value])
308 Ret += (StringRef("-") + KV.Key + ",").str();
309 }
310 Ret.pop_back(); // Remove last comma.
311 return Ret;
312 }
313
runOnFunction(Function & F)314 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
315 if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
316 return false;
317
318 return AMDGPUPropagateAttributes(TM, false).process(F);
319 }
320
runOnModule(Module & M)321 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
322 if (!TM)
323 return false;
324
325 return AMDGPUPropagateAttributes(TM, true).process(M);
326 }
327
328 FunctionPass
createAMDGPUPropagateAttributesEarlyPass(const TargetMachine * TM)329 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
330 return new AMDGPUPropagateAttributesEarly(TM);
331 }
332
333 ModulePass
createAMDGPUPropagateAttributesLatePass(const TargetMachine * TM)334 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
335 return new AMDGPUPropagateAttributesLate(TM);
336 }
337