1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10
11 #include "polly/CodeGen/PerfMonitor.h"
12 #include "polly/CodeGen/RuntimeDebugBuilder.h"
13 #include "polly/ScopInfo.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/IR/IntrinsicsX86.h"
17
18 using namespace llvm;
19 using namespace polly;
20
getAtExit()21 Function *PerfMonitor::getAtExit() {
22 const char *Name = "atexit";
23 Function *F = M->getFunction(Name);
24
25 if (!F) {
26 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
27 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
28 {Builder.getInt8PtrTy()}, false);
29 F = Function::Create(Ty, Linkage, Name, M);
30 }
31
32 return F;
33 }
34
addToGlobalConstructors(Function * Fn)35 void PerfMonitor::addToGlobalConstructors(Function *Fn) {
36 const char *Name = "llvm.global_ctors";
37 GlobalVariable *GV = M->getGlobalVariable(Name);
38 std::vector<Constant *> V;
39
40 if (GV) {
41 Constant *Array = GV->getInitializer();
42 for (Value *X : Array->operand_values())
43 V.push_back(cast<Constant>(X));
44 GV->eraseFromParent();
45 }
46
47 StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(),
48 Builder.getInt8PtrTy());
49
50 V.push_back(
51 ConstantStruct::get(ST, Builder.getInt32(10), Fn,
52 ConstantPointerNull::get(Builder.getInt8PtrTy())));
53 ArrayType *Ty = ArrayType::get(ST, V.size());
54
55 GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
56 ConstantArray::get(Ty, V), Name, nullptr,
57 GlobalVariable::NotThreadLocal);
58 }
59
getRDTSCP()60 Function *PerfMonitor::getRDTSCP() {
61 return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
62 }
63
PerfMonitor(const Scop & S,Module * M)64 PerfMonitor::PerfMonitor(const Scop &S, Module *M)
65 : M(M), Builder(M->getContext()), S(S) {
66 if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
67 Supported = true;
68 else
69 Supported = false;
70 }
71
TryRegisterGlobal(Module * M,const char * Name,Constant * InitialValue,Value ** Location)72 static void TryRegisterGlobal(Module *M, const char *Name,
73 Constant *InitialValue, Value **Location) {
74 *Location = M->getGlobalVariable(Name);
75
76 if (!*Location)
77 *Location = new GlobalVariable(
78 *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
79 InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
80 }
81
82 // Generate a unique name that is usable as a LLVM name for a scop to name its
83 // performance counter.
GetScopUniqueVarname(const Scop & S)84 static std::string GetScopUniqueVarname(const Scop &S) {
85 std::string EntryString, ExitString;
86 std::tie(EntryString, ExitString) = S.getEntryExitStr();
87
88 return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" +
89 EntryString + "__to__" + ExitString)
90 .str();
91 }
92
addScopCounter()93 void PerfMonitor::addScopCounter() {
94 const std::string varname = GetScopUniqueVarname(S);
95 TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
96 &CyclesInCurrentScopPtr);
97
98 TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
99 &TripCountForCurrentScopPtr);
100 }
101
addGlobalVariables()102 void PerfMonitor::addGlobalVariables() {
103 TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
104 &CyclesTotalStartPtr);
105
106 TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(0),
107 &AlreadyInitializedPtr);
108
109 TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
110 &CyclesInScopsPtr);
111
112 TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
113 &CyclesInScopStartPtr);
114 }
115
116 static const char *InitFunctionName = "__polly_perf_init";
117 static const char *FinalReportingFunctionName = "__polly_perf_final";
118
119 static BasicBlock *FinalStartBB = nullptr;
120 static ReturnInst *ReturnFromFinal = nullptr;
121
insertFinalReporting()122 Function *PerfMonitor::insertFinalReporting() {
123 // Create new function.
124 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
125 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
126 Function *ExitFn =
127 Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
128 FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
129 Builder.SetInsertPoint(FinalStartBB);
130
131 if (!Supported) {
132 RuntimeDebugBuilder::createCPUPrinter(
133 Builder, "Polly runtime information generation not supported\n");
134 Builder.CreateRetVoid();
135 return ExitFn;
136 }
137
138 // Measure current cycles and compute final timings.
139 Function *RDTSCPFn = getRDTSCP();
140
141 Value *CurrentCycles =
142 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
143 Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true);
144 Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
145 Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
146
147 // Print the runtime information.
148 RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
149 RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
150 RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
151 RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
152 "\n");
153
154 // Print the preamble for per-scop information.
155 RuntimeDebugBuilder::createCPUPrinter(Builder, "\n");
156 RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n");
157 RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n");
158
159 RuntimeDebugBuilder::createCPUPrinter(
160 Builder, "scop function, "
161 "entry block name, exit block name, total time, trip count\n");
162 ReturnFromFinal = Builder.CreateRetVoid();
163 return ExitFn;
164 }
165
AppendScopReporting()166 void PerfMonitor::AppendScopReporting() {
167 if (!Supported)
168 return;
169
170 assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
171 "PerfMonitor::insertFinalReporting.");
172 assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
173 "PerfMonitor::insertFinalReporting.");
174
175 Builder.SetInsertPoint(FinalStartBB);
176 ReturnFromFinal->eraseFromParent();
177
178 Value *CyclesInCurrentScop =
179 Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
180
181 Value *TripCountForCurrentScop =
182 Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
183
184 std::string EntryName, ExitName;
185 std::tie(EntryName, ExitName) = S.getEntryExitStr();
186
187 // print in CSV for easy parsing with other tools.
188 RuntimeDebugBuilder::createCPUPrinter(
189 Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
190 CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
191
192 ReturnFromFinal = Builder.CreateRetVoid();
193 }
194
195 static Function *FinalReporting = nullptr;
196
initialize()197 void PerfMonitor::initialize() {
198 addGlobalVariables();
199 addScopCounter();
200
201 // Ensure that we only add the final reporting function once.
202 // On later invocations, append to the reporting function.
203 if (!FinalReporting) {
204 FinalReporting = insertFinalReporting();
205
206 Function *InitFn = insertInitFunction(FinalReporting);
207 addToGlobalConstructors(InitFn);
208 }
209
210 AppendScopReporting();
211 }
212
insertInitFunction(Function * FinalReporting)213 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
214 // Insert function definition and BBs.
215 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
216 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
217 Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
218 BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
219 BasicBlock *EarlyReturn =
220 BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
221 BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
222
223 Builder.SetInsertPoint(Start);
224
225 // Check if this function was already run. If yes, return.
226 //
227 // In case profiling has been enabled in multiple translation units, the
228 // initializer function will be added to the global constructors list of
229 // each translation unit. When merging translation units, the global
230 // constructor lists are just appended, such that the initializer will appear
231 // multiple times. To avoid initializations being run multiple times (and
232 // especially to avoid that atExitFn is called more than once), we bail
233 // out if the initializer is run more than once.
234 Value *HasRunBefore = Builder.CreateLoad(AlreadyInitializedPtr);
235 Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
236 Builder.SetInsertPoint(EarlyReturn);
237 Builder.CreateRetVoid();
238
239 // Keep track that this function has been run once.
240 Builder.SetInsertPoint(InitBB);
241 Value *True = Builder.getInt1(true);
242 Builder.CreateStore(True, AlreadyInitializedPtr);
243
244 // Register the final reporting function with atexit().
245 Value *FinalReportingPtr =
246 Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy());
247 Function *AtExitFn = getAtExit();
248 Builder.CreateCall(AtExitFn, {FinalReportingPtr});
249
250 if (Supported) {
251 // Read the currently cycle counter and store the result for later.
252 Function *RDTSCPFn = getRDTSCP();
253 Value *CurrentCycles =
254 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
255 Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
256 }
257 Builder.CreateRetVoid();
258
259 return InitFn;
260 }
261
insertRegionStart(Instruction * InsertBefore)262 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
263 if (!Supported)
264 return;
265
266 Builder.SetInsertPoint(InsertBefore);
267 Function *RDTSCPFn = getRDTSCP();
268 Value *CurrentCycles =
269 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
270 Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
271 }
272
insertRegionEnd(Instruction * InsertBefore)273 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
274 if (!Supported)
275 return;
276
277 Builder.SetInsertPoint(InsertBefore);
278 Function *RDTSCPFn = getRDTSCP();
279 LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true);
280 Value *CurrentCycles =
281 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
282 Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
283 Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
284 CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
285 Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
286
287 Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
288 CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
289 Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
290
291 Value *TripCountForCurrentScop =
292 Builder.CreateLoad(TripCountForCurrentScopPtr, true);
293 TripCountForCurrentScop =
294 Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
295 Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
296 true);
297 }
298