1 //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Instrumentation-based profile-guided optimization
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CodeGenPGO.h"
15 #include "CodeGenFunction.h"
16 #include "CoverageMappingGen.h"
17 #include "clang/AST/RecursiveASTVisitor.h"
18 #include "clang/AST/StmtVisitor.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/IR/MDBuilder.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/MD5.h"
24
25 static llvm::cl::opt<bool> EnableValueProfiling(
26 "enable-value-profiling", llvm::cl::ZeroOrMore,
27 llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));
28
29 using namespace clang;
30 using namespace CodeGen;
31
setFuncName(StringRef Name,llvm::GlobalValue::LinkageTypes Linkage)32 void CodeGenPGO::setFuncName(StringRef Name,
33 llvm::GlobalValue::LinkageTypes Linkage) {
34 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
35 FuncName = llvm::getPGOFuncName(
36 Name, Linkage, CGM.getCodeGenOpts().MainFileName,
37 PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
38
39 // If we're generating a profile, create a variable for the name.
40 if (CGM.getCodeGenOpts().hasProfileClangInstr())
41 FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
42 }
43
setFuncName(llvm::Function * Fn)44 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
45 setFuncName(Fn->getName(), Fn->getLinkage());
46 // Create PGOFuncName meta data.
47 llvm::createPGOFuncNameMetadata(*Fn, FuncName);
48 }
49
50 namespace {
51 /// \brief Stable hasher for PGO region counters.
52 ///
53 /// PGOHash produces a stable hash of a given function's control flow.
54 ///
55 /// Changing the output of this hash will invalidate all previously generated
56 /// profiles -- i.e., don't do it.
57 ///
58 /// \note When this hash does eventually change (years?), we still need to
59 /// support old hashes. We'll need to pull in the version number from the
60 /// profile data format and use the matching hash function.
61 class PGOHash {
62 uint64_t Working;
63 unsigned Count;
64 llvm::MD5 MD5;
65
66 static const int NumBitsPerType = 6;
67 static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
68 static const unsigned TooBig = 1u << NumBitsPerType;
69
70 public:
71 /// \brief Hash values for AST nodes.
72 ///
73 /// Distinct values for AST nodes that have region counters attached.
74 ///
75 /// These values must be stable. All new members must be added at the end,
76 /// and no members should be removed. Changing the enumeration value for an
77 /// AST node will affect the hash of every function that contains that node.
78 enum HashType : unsigned char {
79 None = 0,
80 LabelStmt = 1,
81 WhileStmt,
82 DoStmt,
83 ForStmt,
84 CXXForRangeStmt,
85 ObjCForCollectionStmt,
86 SwitchStmt,
87 CaseStmt,
88 DefaultStmt,
89 IfStmt,
90 CXXTryStmt,
91 CXXCatchStmt,
92 ConditionalOperator,
93 BinaryOperatorLAnd,
94 BinaryOperatorLOr,
95 BinaryConditionalOperator,
96
97 // Keep this last. It's for the static assert that follows.
98 LastHashType
99 };
100 static_assert(LastHashType <= TooBig, "Too many types in HashType");
101
102 // TODO: When this format changes, take in a version number here, and use the
103 // old hash calculation for file formats that used the old hash.
PGOHash()104 PGOHash() : Working(0), Count(0) {}
105 void combine(HashType Type);
106 uint64_t finalize();
107 };
108 const int PGOHash::NumBitsPerType;
109 const unsigned PGOHash::NumTypesPerWord;
110 const unsigned PGOHash::TooBig;
111
112 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
113 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
114 /// The next counter value to assign.
115 unsigned NextCounter;
116 /// The function hash.
117 PGOHash Hash;
118 /// The map of statements to counters.
119 llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
120
MapRegionCounters__anon042c96440111::MapRegionCounters121 MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
122 : NextCounter(0), CounterMap(CounterMap) {}
123
124 // Blocks and lambdas are handled as separate functions, so we need not
125 // traverse them in the parent context.
TraverseBlockExpr__anon042c96440111::MapRegionCounters126 bool TraverseBlockExpr(BlockExpr *BE) { return true; }
TraverseLambdaBody__anon042c96440111::MapRegionCounters127 bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
TraverseCapturedStmt__anon042c96440111::MapRegionCounters128 bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
129
VisitDecl__anon042c96440111::MapRegionCounters130 bool VisitDecl(const Decl *D) {
131 switch (D->getKind()) {
132 default:
133 break;
134 case Decl::Function:
135 case Decl::CXXMethod:
136 case Decl::CXXConstructor:
137 case Decl::CXXDestructor:
138 case Decl::CXXConversion:
139 case Decl::ObjCMethod:
140 case Decl::Block:
141 case Decl::Captured:
142 CounterMap[D->getBody()] = NextCounter++;
143 break;
144 }
145 return true;
146 }
147
VisitStmt__anon042c96440111::MapRegionCounters148 bool VisitStmt(const Stmt *S) {
149 auto Type = getHashType(S);
150 if (Type == PGOHash::None)
151 return true;
152
153 CounterMap[S] = NextCounter++;
154 Hash.combine(Type);
155 return true;
156 }
getHashType__anon042c96440111::MapRegionCounters157 PGOHash::HashType getHashType(const Stmt *S) {
158 switch (S->getStmtClass()) {
159 default:
160 break;
161 case Stmt::LabelStmtClass:
162 return PGOHash::LabelStmt;
163 case Stmt::WhileStmtClass:
164 return PGOHash::WhileStmt;
165 case Stmt::DoStmtClass:
166 return PGOHash::DoStmt;
167 case Stmt::ForStmtClass:
168 return PGOHash::ForStmt;
169 case Stmt::CXXForRangeStmtClass:
170 return PGOHash::CXXForRangeStmt;
171 case Stmt::ObjCForCollectionStmtClass:
172 return PGOHash::ObjCForCollectionStmt;
173 case Stmt::SwitchStmtClass:
174 return PGOHash::SwitchStmt;
175 case Stmt::CaseStmtClass:
176 return PGOHash::CaseStmt;
177 case Stmt::DefaultStmtClass:
178 return PGOHash::DefaultStmt;
179 case Stmt::IfStmtClass:
180 return PGOHash::IfStmt;
181 case Stmt::CXXTryStmtClass:
182 return PGOHash::CXXTryStmt;
183 case Stmt::CXXCatchStmtClass:
184 return PGOHash::CXXCatchStmt;
185 case Stmt::ConditionalOperatorClass:
186 return PGOHash::ConditionalOperator;
187 case Stmt::BinaryConditionalOperatorClass:
188 return PGOHash::BinaryConditionalOperator;
189 case Stmt::BinaryOperatorClass: {
190 const BinaryOperator *BO = cast<BinaryOperator>(S);
191 if (BO->getOpcode() == BO_LAnd)
192 return PGOHash::BinaryOperatorLAnd;
193 if (BO->getOpcode() == BO_LOr)
194 return PGOHash::BinaryOperatorLOr;
195 break;
196 }
197 }
198 return PGOHash::None;
199 }
200 };
201
202 /// A StmtVisitor that propagates the raw counts through the AST and
203 /// records the count at statements where the value may change.
204 struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
205 /// PGO state.
206 CodeGenPGO &PGO;
207
208 /// A flag that is set when the current count should be recorded on the
209 /// next statement, such as at the exit of a loop.
210 bool RecordNextStmtCount;
211
212 /// The count at the current location in the traversal.
213 uint64_t CurrentCount;
214
215 /// The map of statements to count values.
216 llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
217
218 /// BreakContinueStack - Keep counts of breaks and continues inside loops.
219 struct BreakContinue {
220 uint64_t BreakCount;
221 uint64_t ContinueCount;
BreakContinue__anon042c96440111::ComputeRegionCounts::BreakContinue222 BreakContinue() : BreakCount(0), ContinueCount(0) {}
223 };
224 SmallVector<BreakContinue, 8> BreakContinueStack;
225
ComputeRegionCounts__anon042c96440111::ComputeRegionCounts226 ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
227 CodeGenPGO &PGO)
228 : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
229
RecordStmtCount__anon042c96440111::ComputeRegionCounts230 void RecordStmtCount(const Stmt *S) {
231 if (RecordNextStmtCount) {
232 CountMap[S] = CurrentCount;
233 RecordNextStmtCount = false;
234 }
235 }
236
237 /// Set and return the current count.
setCount__anon042c96440111::ComputeRegionCounts238 uint64_t setCount(uint64_t Count) {
239 CurrentCount = Count;
240 return Count;
241 }
242
VisitStmt__anon042c96440111::ComputeRegionCounts243 void VisitStmt(const Stmt *S) {
244 RecordStmtCount(S);
245 for (const Stmt *Child : S->children())
246 if (Child)
247 this->Visit(Child);
248 }
249
VisitFunctionDecl__anon042c96440111::ComputeRegionCounts250 void VisitFunctionDecl(const FunctionDecl *D) {
251 // Counter tracks entry to the function body.
252 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
253 CountMap[D->getBody()] = BodyCount;
254 Visit(D->getBody());
255 }
256
257 // Skip lambda expressions. We visit these as FunctionDecls when we're
258 // generating them and aren't interested in the body when generating a
259 // parent context.
VisitLambdaExpr__anon042c96440111::ComputeRegionCounts260 void VisitLambdaExpr(const LambdaExpr *LE) {}
261
VisitCapturedDecl__anon042c96440111::ComputeRegionCounts262 void VisitCapturedDecl(const CapturedDecl *D) {
263 // Counter tracks entry to the capture body.
264 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
265 CountMap[D->getBody()] = BodyCount;
266 Visit(D->getBody());
267 }
268
VisitObjCMethodDecl__anon042c96440111::ComputeRegionCounts269 void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
270 // Counter tracks entry to the method body.
271 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
272 CountMap[D->getBody()] = BodyCount;
273 Visit(D->getBody());
274 }
275
VisitBlockDecl__anon042c96440111::ComputeRegionCounts276 void VisitBlockDecl(const BlockDecl *D) {
277 // Counter tracks entry to the block body.
278 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
279 CountMap[D->getBody()] = BodyCount;
280 Visit(D->getBody());
281 }
282
VisitReturnStmt__anon042c96440111::ComputeRegionCounts283 void VisitReturnStmt(const ReturnStmt *S) {
284 RecordStmtCount(S);
285 if (S->getRetValue())
286 Visit(S->getRetValue());
287 CurrentCount = 0;
288 RecordNextStmtCount = true;
289 }
290
VisitCXXThrowExpr__anon042c96440111::ComputeRegionCounts291 void VisitCXXThrowExpr(const CXXThrowExpr *E) {
292 RecordStmtCount(E);
293 if (E->getSubExpr())
294 Visit(E->getSubExpr());
295 CurrentCount = 0;
296 RecordNextStmtCount = true;
297 }
298
VisitGotoStmt__anon042c96440111::ComputeRegionCounts299 void VisitGotoStmt(const GotoStmt *S) {
300 RecordStmtCount(S);
301 CurrentCount = 0;
302 RecordNextStmtCount = true;
303 }
304
VisitLabelStmt__anon042c96440111::ComputeRegionCounts305 void VisitLabelStmt(const LabelStmt *S) {
306 RecordNextStmtCount = false;
307 // Counter tracks the block following the label.
308 uint64_t BlockCount = setCount(PGO.getRegionCount(S));
309 CountMap[S] = BlockCount;
310 Visit(S->getSubStmt());
311 }
312
VisitBreakStmt__anon042c96440111::ComputeRegionCounts313 void VisitBreakStmt(const BreakStmt *S) {
314 RecordStmtCount(S);
315 assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
316 BreakContinueStack.back().BreakCount += CurrentCount;
317 CurrentCount = 0;
318 RecordNextStmtCount = true;
319 }
320
VisitContinueStmt__anon042c96440111::ComputeRegionCounts321 void VisitContinueStmt(const ContinueStmt *S) {
322 RecordStmtCount(S);
323 assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
324 BreakContinueStack.back().ContinueCount += CurrentCount;
325 CurrentCount = 0;
326 RecordNextStmtCount = true;
327 }
328
VisitWhileStmt__anon042c96440111::ComputeRegionCounts329 void VisitWhileStmt(const WhileStmt *S) {
330 RecordStmtCount(S);
331 uint64_t ParentCount = CurrentCount;
332
333 BreakContinueStack.push_back(BreakContinue());
334 // Visit the body region first so the break/continue adjustments can be
335 // included when visiting the condition.
336 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
337 CountMap[S->getBody()] = CurrentCount;
338 Visit(S->getBody());
339 uint64_t BackedgeCount = CurrentCount;
340
341 // ...then go back and propagate counts through the condition. The count
342 // at the start of the condition is the sum of the incoming edges,
343 // the backedge from the end of the loop body, and the edges from
344 // continue statements.
345 BreakContinue BC = BreakContinueStack.pop_back_val();
346 uint64_t CondCount =
347 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
348 CountMap[S->getCond()] = CondCount;
349 Visit(S->getCond());
350 setCount(BC.BreakCount + CondCount - BodyCount);
351 RecordNextStmtCount = true;
352 }
353
VisitDoStmt__anon042c96440111::ComputeRegionCounts354 void VisitDoStmt(const DoStmt *S) {
355 RecordStmtCount(S);
356 uint64_t LoopCount = PGO.getRegionCount(S);
357
358 BreakContinueStack.push_back(BreakContinue());
359 // The count doesn't include the fallthrough from the parent scope. Add it.
360 uint64_t BodyCount = setCount(LoopCount + CurrentCount);
361 CountMap[S->getBody()] = BodyCount;
362 Visit(S->getBody());
363 uint64_t BackedgeCount = CurrentCount;
364
365 BreakContinue BC = BreakContinueStack.pop_back_val();
366 // The count at the start of the condition is equal to the count at the
367 // end of the body, plus any continues.
368 uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
369 CountMap[S->getCond()] = CondCount;
370 Visit(S->getCond());
371 setCount(BC.BreakCount + CondCount - LoopCount);
372 RecordNextStmtCount = true;
373 }
374
VisitForStmt__anon042c96440111::ComputeRegionCounts375 void VisitForStmt(const ForStmt *S) {
376 RecordStmtCount(S);
377 if (S->getInit())
378 Visit(S->getInit());
379
380 uint64_t ParentCount = CurrentCount;
381
382 BreakContinueStack.push_back(BreakContinue());
383 // Visit the body region first. (This is basically the same as a while
384 // loop; see further comments in VisitWhileStmt.)
385 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
386 CountMap[S->getBody()] = BodyCount;
387 Visit(S->getBody());
388 uint64_t BackedgeCount = CurrentCount;
389 BreakContinue BC = BreakContinueStack.pop_back_val();
390
391 // The increment is essentially part of the body but it needs to include
392 // the count for all the continue statements.
393 if (S->getInc()) {
394 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
395 CountMap[S->getInc()] = IncCount;
396 Visit(S->getInc());
397 }
398
399 // ...then go back and propagate counts through the condition.
400 uint64_t CondCount =
401 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
402 if (S->getCond()) {
403 CountMap[S->getCond()] = CondCount;
404 Visit(S->getCond());
405 }
406 setCount(BC.BreakCount + CondCount - BodyCount);
407 RecordNextStmtCount = true;
408 }
409
VisitCXXForRangeStmt__anon042c96440111::ComputeRegionCounts410 void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
411 RecordStmtCount(S);
412 Visit(S->getLoopVarStmt());
413 Visit(S->getRangeStmt());
414 Visit(S->getBeginStmt());
415 Visit(S->getEndStmt());
416
417 uint64_t ParentCount = CurrentCount;
418 BreakContinueStack.push_back(BreakContinue());
419 // Visit the body region first. (This is basically the same as a while
420 // loop; see further comments in VisitWhileStmt.)
421 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
422 CountMap[S->getBody()] = BodyCount;
423 Visit(S->getBody());
424 uint64_t BackedgeCount = CurrentCount;
425 BreakContinue BC = BreakContinueStack.pop_back_val();
426
427 // The increment is essentially part of the body but it needs to include
428 // the count for all the continue statements.
429 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
430 CountMap[S->getInc()] = IncCount;
431 Visit(S->getInc());
432
433 // ...then go back and propagate counts through the condition.
434 uint64_t CondCount =
435 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
436 CountMap[S->getCond()] = CondCount;
437 Visit(S->getCond());
438 setCount(BC.BreakCount + CondCount - BodyCount);
439 RecordNextStmtCount = true;
440 }
441
VisitObjCForCollectionStmt__anon042c96440111::ComputeRegionCounts442 void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
443 RecordStmtCount(S);
444 Visit(S->getElement());
445 uint64_t ParentCount = CurrentCount;
446 BreakContinueStack.push_back(BreakContinue());
447 // Counter tracks the body of the loop.
448 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
449 CountMap[S->getBody()] = BodyCount;
450 Visit(S->getBody());
451 uint64_t BackedgeCount = CurrentCount;
452 BreakContinue BC = BreakContinueStack.pop_back_val();
453
454 setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
455 BodyCount);
456 RecordNextStmtCount = true;
457 }
458
VisitSwitchStmt__anon042c96440111::ComputeRegionCounts459 void VisitSwitchStmt(const SwitchStmt *S) {
460 RecordStmtCount(S);
461 Visit(S->getCond());
462 CurrentCount = 0;
463 BreakContinueStack.push_back(BreakContinue());
464 Visit(S->getBody());
465 // If the switch is inside a loop, add the continue counts.
466 BreakContinue BC = BreakContinueStack.pop_back_val();
467 if (!BreakContinueStack.empty())
468 BreakContinueStack.back().ContinueCount += BC.ContinueCount;
469 // Counter tracks the exit block of the switch.
470 setCount(PGO.getRegionCount(S));
471 RecordNextStmtCount = true;
472 }
473
VisitSwitchCase__anon042c96440111::ComputeRegionCounts474 void VisitSwitchCase(const SwitchCase *S) {
475 RecordNextStmtCount = false;
476 // Counter for this particular case. This counts only jumps from the
477 // switch header and does not include fallthrough from the case before
478 // this one.
479 uint64_t CaseCount = PGO.getRegionCount(S);
480 setCount(CurrentCount + CaseCount);
481 // We need the count without fallthrough in the mapping, so it's more useful
482 // for branch probabilities.
483 CountMap[S] = CaseCount;
484 RecordNextStmtCount = true;
485 Visit(S->getSubStmt());
486 }
487
VisitIfStmt__anon042c96440111::ComputeRegionCounts488 void VisitIfStmt(const IfStmt *S) {
489 RecordStmtCount(S);
490 uint64_t ParentCount = CurrentCount;
491 Visit(S->getCond());
492
493 // Counter tracks the "then" part of an if statement. The count for
494 // the "else" part, if it exists, will be calculated from this counter.
495 uint64_t ThenCount = setCount(PGO.getRegionCount(S));
496 CountMap[S->getThen()] = ThenCount;
497 Visit(S->getThen());
498 uint64_t OutCount = CurrentCount;
499
500 uint64_t ElseCount = ParentCount - ThenCount;
501 if (S->getElse()) {
502 setCount(ElseCount);
503 CountMap[S->getElse()] = ElseCount;
504 Visit(S->getElse());
505 OutCount += CurrentCount;
506 } else
507 OutCount += ElseCount;
508 setCount(OutCount);
509 RecordNextStmtCount = true;
510 }
511
VisitCXXTryStmt__anon042c96440111::ComputeRegionCounts512 void VisitCXXTryStmt(const CXXTryStmt *S) {
513 RecordStmtCount(S);
514 Visit(S->getTryBlock());
515 for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
516 Visit(S->getHandler(I));
517 // Counter tracks the continuation block of the try statement.
518 setCount(PGO.getRegionCount(S));
519 RecordNextStmtCount = true;
520 }
521
VisitCXXCatchStmt__anon042c96440111::ComputeRegionCounts522 void VisitCXXCatchStmt(const CXXCatchStmt *S) {
523 RecordNextStmtCount = false;
524 // Counter tracks the catch statement's handler block.
525 uint64_t CatchCount = setCount(PGO.getRegionCount(S));
526 CountMap[S] = CatchCount;
527 Visit(S->getHandlerBlock());
528 }
529
VisitAbstractConditionalOperator__anon042c96440111::ComputeRegionCounts530 void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
531 RecordStmtCount(E);
532 uint64_t ParentCount = CurrentCount;
533 Visit(E->getCond());
534
535 // Counter tracks the "true" part of a conditional operator. The
536 // count in the "false" part will be calculated from this counter.
537 uint64_t TrueCount = setCount(PGO.getRegionCount(E));
538 CountMap[E->getTrueExpr()] = TrueCount;
539 Visit(E->getTrueExpr());
540 uint64_t OutCount = CurrentCount;
541
542 uint64_t FalseCount = setCount(ParentCount - TrueCount);
543 CountMap[E->getFalseExpr()] = FalseCount;
544 Visit(E->getFalseExpr());
545 OutCount += CurrentCount;
546
547 setCount(OutCount);
548 RecordNextStmtCount = true;
549 }
550
VisitBinLAnd__anon042c96440111::ComputeRegionCounts551 void VisitBinLAnd(const BinaryOperator *E) {
552 RecordStmtCount(E);
553 uint64_t ParentCount = CurrentCount;
554 Visit(E->getLHS());
555 // Counter tracks the right hand side of a logical and operator.
556 uint64_t RHSCount = setCount(PGO.getRegionCount(E));
557 CountMap[E->getRHS()] = RHSCount;
558 Visit(E->getRHS());
559 setCount(ParentCount + RHSCount - CurrentCount);
560 RecordNextStmtCount = true;
561 }
562
VisitBinLOr__anon042c96440111::ComputeRegionCounts563 void VisitBinLOr(const BinaryOperator *E) {
564 RecordStmtCount(E);
565 uint64_t ParentCount = CurrentCount;
566 Visit(E->getLHS());
567 // Counter tracks the right hand side of a logical or operator.
568 uint64_t RHSCount = setCount(PGO.getRegionCount(E));
569 CountMap[E->getRHS()] = RHSCount;
570 Visit(E->getRHS());
571 setCount(ParentCount + RHSCount - CurrentCount);
572 RecordNextStmtCount = true;
573 }
574 };
575 } // end anonymous namespace
576
combine(HashType Type)577 void PGOHash::combine(HashType Type) {
578 // Check that we never combine 0 and only have six bits.
579 assert(Type && "Hash is invalid: unexpected type 0");
580 assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
581
582 // Pass through MD5 if enough work has built up.
583 if (Count && Count % NumTypesPerWord == 0) {
584 using namespace llvm::support;
585 uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
586 MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
587 Working = 0;
588 }
589
590 // Accumulate the current type.
591 ++Count;
592 Working = Working << NumBitsPerType | Type;
593 }
594
finalize()595 uint64_t PGOHash::finalize() {
596 // Use Working as the hash directly if we never used MD5.
597 if (Count <= NumTypesPerWord)
598 // No need to byte swap here, since none of the math was endian-dependent.
599 // This number will be byte-swapped as required on endianness transitions,
600 // so we will see the same value on the other side.
601 return Working;
602
603 // Check for remaining work in Working.
604 if (Working)
605 MD5.update(Working);
606
607 // Finalize the MD5 and return the hash.
608 llvm::MD5::MD5Result Result;
609 MD5.final(Result);
610 using namespace llvm::support;
611 return endian::read<uint64_t, little, unaligned>(Result);
612 }
613
assignRegionCounters(GlobalDecl GD,llvm::Function * Fn)614 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
615 const Decl *D = GD.getDecl();
616 bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
617 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
618 if (!InstrumentRegions && !PGOReader)
619 return;
620 if (D->isImplicit())
621 return;
622 // Constructors and destructors may be represented by several functions in IR.
623 // If so, instrument only base variant, others are implemented by delegation
624 // to the base one, it would be counted twice otherwise.
625 if (CGM.getTarget().getCXXABI().hasConstructorVariants() &&
626 ((isa<CXXConstructorDecl>(GD.getDecl()) &&
627 GD.getCtorType() != Ctor_Base) ||
628 (isa<CXXDestructorDecl>(GD.getDecl()) &&
629 GD.getDtorType() != Dtor_Base))) {
630 return;
631 }
632 CGM.ClearUnusedCoverageMapping(D);
633 setFuncName(Fn);
634
635 mapRegionCounters(D);
636 if (CGM.getCodeGenOpts().CoverageMapping)
637 emitCounterRegionMapping(D);
638 if (PGOReader) {
639 SourceManager &SM = CGM.getContext().getSourceManager();
640 loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
641 computeRegionCounts(D);
642 applyFunctionAttributes(PGOReader, Fn);
643 }
644 }
645
mapRegionCounters(const Decl * D)646 void CodeGenPGO::mapRegionCounters(const Decl *D) {
647 RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
648 MapRegionCounters Walker(*RegionCounterMap);
649 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
650 Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
651 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
652 Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
653 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
654 Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
655 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
656 Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
657 assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
658 NumRegionCounters = Walker.NextCounter;
659 FunctionHash = Walker.Hash.finalize();
660 }
661
skipRegionMappingForDecl(const Decl * D)662 bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
663 if (SkipCoverageMapping)
664 return true;
665
666 // Don't map the functions in system headers.
667 const auto &SM = CGM.getContext().getSourceManager();
668 auto Loc = D->getBody()->getLocStart();
669 return SM.isInSystemHeader(Loc);
670 }
671
emitCounterRegionMapping(const Decl * D)672 void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
673 if (skipRegionMappingForDecl(D))
674 return;
675
676 std::string CoverageMapping;
677 llvm::raw_string_ostream OS(CoverageMapping);
678 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
679 CGM.getContext().getSourceManager(),
680 CGM.getLangOpts(), RegionCounterMap.get());
681 MappingGen.emitCounterMapping(D, OS);
682 OS.flush();
683
684 if (CoverageMapping.empty())
685 return;
686
687 CGM.getCoverageMapping()->addFunctionMappingRecord(
688 FuncNameVar, FuncName, FunctionHash, CoverageMapping);
689 }
690
691 void
emitEmptyCounterMapping(const Decl * D,StringRef Name,llvm::GlobalValue::LinkageTypes Linkage)692 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
693 llvm::GlobalValue::LinkageTypes Linkage) {
694 if (skipRegionMappingForDecl(D))
695 return;
696
697 std::string CoverageMapping;
698 llvm::raw_string_ostream OS(CoverageMapping);
699 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
700 CGM.getContext().getSourceManager(),
701 CGM.getLangOpts());
702 MappingGen.emitEmptyMapping(D, OS);
703 OS.flush();
704
705 if (CoverageMapping.empty())
706 return;
707
708 setFuncName(Name, Linkage);
709 CGM.getCoverageMapping()->addFunctionMappingRecord(
710 FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
711 }
712
computeRegionCounts(const Decl * D)713 void CodeGenPGO::computeRegionCounts(const Decl *D) {
714 StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
715 ComputeRegionCounts Walker(*StmtCountMap, *this);
716 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
717 Walker.VisitFunctionDecl(FD);
718 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
719 Walker.VisitObjCMethodDecl(MD);
720 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
721 Walker.VisitBlockDecl(BD);
722 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
723 Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
724 }
725
726 void
applyFunctionAttributes(llvm::IndexedInstrProfReader * PGOReader,llvm::Function * Fn)727 CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
728 llvm::Function *Fn) {
729 if (!haveRegionCounts())
730 return;
731
732 uint64_t FunctionCount = getRegionCount(nullptr);
733 Fn->setEntryCount(FunctionCount);
734 }
735
emitCounterIncrement(CGBuilderTy & Builder,const Stmt * S)736 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
737 if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
738 return;
739 if (!Builder.GetInsertBlock())
740 return;
741
742 unsigned Counter = (*RegionCounterMap)[S];
743 auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
744 Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
745 {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
746 Builder.getInt64(FunctionHash),
747 Builder.getInt32(NumRegionCounters),
748 Builder.getInt32(Counter)});
749 }
750
751 // This method either inserts a call to the profile run-time during
752 // instrumentation or puts profile data into metadata for PGO use.
valueProfile(CGBuilderTy & Builder,uint32_t ValueKind,llvm::Instruction * ValueSite,llvm::Value * ValuePtr)753 void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
754 llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
755
756 if (!EnableValueProfiling)
757 return;
758
759 if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
760 return;
761
762 if (isa<llvm::Constant>(ValuePtr))
763 return;
764
765 bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
766 if (InstrumentValueSites && RegionCounterMap) {
767 auto BuilderInsertPoint = Builder.saveIP();
768 Builder.SetInsertPoint(ValueSite);
769 llvm::Value *Args[5] = {
770 llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
771 Builder.getInt64(FunctionHash),
772 Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
773 Builder.getInt32(ValueKind),
774 Builder.getInt32(NumValueSites[ValueKind]++)
775 };
776 Builder.CreateCall(
777 CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
778 Builder.restoreIP(BuilderInsertPoint);
779 return;
780 }
781
782 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
783 if (PGOReader && haveRegionCounts()) {
784 // We record the top most called three functions at each call site.
785 // Profile metadata contains "VP" string identifying this metadata
786 // as value profiling data, then a uint32_t value for the value profiling
787 // kind, a uint64_t value for the total number of times the call is
788 // executed, followed by the function hash and execution count (uint64_t)
789 // pairs for each function.
790 if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
791 return;
792
793 llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
794 (llvm::InstrProfValueKind)ValueKind,
795 NumValueSites[ValueKind]);
796
797 NumValueSites[ValueKind]++;
798 }
799 }
800
loadRegionCounts(llvm::IndexedInstrProfReader * PGOReader,bool IsInMainFile)801 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
802 bool IsInMainFile) {
803 CGM.getPGOStats().addVisited(IsInMainFile);
804 RegionCounts.clear();
805 llvm::Expected<llvm::InstrProfRecord> RecordExpected =
806 PGOReader->getInstrProfRecord(FuncName, FunctionHash);
807 if (auto E = RecordExpected.takeError()) {
808 auto IPE = llvm::InstrProfError::take(std::move(E));
809 if (IPE == llvm::instrprof_error::unknown_function)
810 CGM.getPGOStats().addMissing(IsInMainFile);
811 else if (IPE == llvm::instrprof_error::hash_mismatch)
812 CGM.getPGOStats().addMismatched(IsInMainFile);
813 else if (IPE == llvm::instrprof_error::malformed)
814 // TODO: Consider a more specific warning for this case.
815 CGM.getPGOStats().addMismatched(IsInMainFile);
816 return;
817 }
818 ProfRecord =
819 llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
820 RegionCounts = ProfRecord->Counts;
821 }
822
823 /// \brief Calculate what to divide by to scale weights.
824 ///
825 /// Given the maximum weight, calculate a divisor that will scale all the
826 /// weights to strictly less than UINT32_MAX.
calculateWeightScale(uint64_t MaxWeight)827 static uint64_t calculateWeightScale(uint64_t MaxWeight) {
828 return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
829 }
830
831 /// \brief Scale an individual branch weight (and add 1).
832 ///
833 /// Scale a 64-bit weight down to 32-bits using \c Scale.
834 ///
835 /// According to Laplace's Rule of Succession, it is better to compute the
836 /// weight based on the count plus 1, so universally add 1 to the value.
837 ///
838 /// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
839 /// greater than \c Weight.
scaleBranchWeight(uint64_t Weight,uint64_t Scale)840 static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
841 assert(Scale && "scale by 0?");
842 uint64_t Scaled = Weight / Scale + 1;
843 assert(Scaled <= UINT32_MAX && "overflow 32-bits");
844 return Scaled;
845 }
846
createProfileWeights(uint64_t TrueCount,uint64_t FalseCount)847 llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
848 uint64_t FalseCount) {
849 // Check for empty weights.
850 if (!TrueCount && !FalseCount)
851 return nullptr;
852
853 // Calculate how to scale down to 32-bits.
854 uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
855
856 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
857 return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
858 scaleBranchWeight(FalseCount, Scale));
859 }
860
861 llvm::MDNode *
createProfileWeights(ArrayRef<uint64_t> Weights)862 CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) {
863 // We need at least two elements to create meaningful weights.
864 if (Weights.size() < 2)
865 return nullptr;
866
867 // Check for empty weights.
868 uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
869 if (MaxWeight == 0)
870 return nullptr;
871
872 // Calculate how to scale down to 32-bits.
873 uint64_t Scale = calculateWeightScale(MaxWeight);
874
875 SmallVector<uint32_t, 16> ScaledWeights;
876 ScaledWeights.reserve(Weights.size());
877 for (uint64_t W : Weights)
878 ScaledWeights.push_back(scaleBranchWeight(W, Scale));
879
880 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
881 return MDHelper.createBranchWeights(ScaledWeights);
882 }
883
createProfileWeightsForLoop(const Stmt * Cond,uint64_t LoopCount)884 llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
885 uint64_t LoopCount) {
886 if (!PGO.haveRegionCounts())
887 return nullptr;
888 Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
889 assert(CondCount.hasValue() && "missing expected loop condition count");
890 if (*CondCount == 0)
891 return nullptr;
892 return createProfileWeights(LoopCount,
893 std::max(*CondCount, LoopCount) - LoopCount);
894 }
895