1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges conditional blocks of code and reduces the number of
10 // conditional branches in the hot paths based on profiles.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
23 #include "llvm/Analysis/RegionInfo.h"
24 #include "llvm/Analysis/RegionIterator.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/BranchProbability.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Transforms/Utils.h"
35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
36 #include "llvm/Transforms/Utils/Cloning.h"
37 #include "llvm/Transforms/Utils/ValueMapper.h"
38
39 #include <set>
40 #include <sstream>
41
42 using namespace llvm;
43
44 #define DEBUG_TYPE "chr"
45
46 #define CHR_DEBUG(X) LLVM_DEBUG(X)
47
48 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
49 cl::desc("Apply CHR for all functions"));
50
51 static cl::opt<double> CHRBiasThreshold(
52 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
53 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
54
55 static cl::opt<unsigned> CHRMergeThreshold(
56 "chr-merge-threshold", cl::init(2), cl::Hidden,
57 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
58
59 static cl::opt<std::string> CHRModuleList(
60 "chr-module-list", cl::init(""), cl::Hidden,
61 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
62
63 static cl::opt<std::string> CHRFunctionList(
64 "chr-function-list", cl::init(""), cl::Hidden,
65 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
66
67 static StringSet<> CHRModules;
68 static StringSet<> CHRFunctions;
69
parseCHRFilterFiles()70 static void parseCHRFilterFiles() {
71 if (!CHRModuleList.empty()) {
72 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
73 if (!FileOrErr) {
74 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
75 std::exit(1);
76 }
77 StringRef Buf = FileOrErr->get()->getBuffer();
78 SmallVector<StringRef, 0> Lines;
79 Buf.split(Lines, '\n');
80 for (StringRef Line : Lines) {
81 Line = Line.trim();
82 if (!Line.empty())
83 CHRModules.insert(Line);
84 }
85 }
86 if (!CHRFunctionList.empty()) {
87 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
88 if (!FileOrErr) {
89 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
90 std::exit(1);
91 }
92 StringRef Buf = FileOrErr->get()->getBuffer();
93 SmallVector<StringRef, 0> Lines;
94 Buf.split(Lines, '\n');
95 for (StringRef Line : Lines) {
96 Line = Line.trim();
97 if (!Line.empty())
98 CHRFunctions.insert(Line);
99 }
100 }
101 }
102
103 namespace {
104 class ControlHeightReductionLegacyPass : public FunctionPass {
105 public:
106 static char ID;
107
ControlHeightReductionLegacyPass()108 ControlHeightReductionLegacyPass() : FunctionPass(ID) {
109 initializeControlHeightReductionLegacyPassPass(
110 *PassRegistry::getPassRegistry());
111 parseCHRFilterFiles();
112 }
113
114 bool runOnFunction(Function &F) override;
getAnalysisUsage(AnalysisUsage & AU) const115 void getAnalysisUsage(AnalysisUsage &AU) const override {
116 AU.addRequired<BlockFrequencyInfoWrapperPass>();
117 AU.addRequired<DominatorTreeWrapperPass>();
118 AU.addRequired<ProfileSummaryInfoWrapperPass>();
119 AU.addRequired<RegionInfoPass>();
120 AU.addPreserved<GlobalsAAWrapperPass>();
121 }
122 };
123 } // end anonymous namespace
124
125 char ControlHeightReductionLegacyPass::ID = 0;
126
127 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
128 "chr",
129 "Reduce control height in the hot paths",
130 false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)131 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
132 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
133 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
134 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
135 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
136 "chr",
137 "Reduce control height in the hot paths",
138 false, false)
139
140 FunctionPass *llvm::createControlHeightReductionLegacyPass() {
141 return new ControlHeightReductionLegacyPass();
142 }
143
144 namespace {
145
146 struct CHRStats {
CHRStats__anon26c64e210211::CHRStats147 CHRStats() : NumBranches(0), NumBranchesDelta(0),
148 WeightedNumBranchesDelta(0) {}
print__anon26c64e210211::CHRStats149 void print(raw_ostream &OS) const {
150 OS << "CHRStats: NumBranches " << NumBranches
151 << " NumBranchesDelta " << NumBranchesDelta
152 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
153 }
154 uint64_t NumBranches; // The original number of conditional branches /
155 // selects
156 uint64_t NumBranchesDelta; // The decrease of the number of conditional
157 // branches / selects in the hot paths due to CHR.
158 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
159 // count at the scope entry.
160 };
161
162 // RegInfo - some properties of a Region.
163 struct RegInfo {
RegInfo__anon26c64e210211::RegInfo164 RegInfo() : R(nullptr), HasBranch(false) {}
RegInfo__anon26c64e210211::RegInfo165 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
166 Region *R;
167 bool HasBranch;
168 SmallVector<SelectInst *, 8> Selects;
169 };
170
171 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
172
173 // CHRScope - a sequence of regions to CHR together. It corresponds to a
174 // sequence of conditional blocks. It can have subscopes which correspond to
175 // nested conditional blocks. Nested CHRScopes form a tree.
176 class CHRScope {
177 public:
CHRScope(RegInfo RI)178 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
179 assert(RI.R && "Null RegionIn");
180 RegInfos.push_back(RI);
181 }
182
getParentRegion()183 Region *getParentRegion() {
184 assert(RegInfos.size() > 0 && "Empty CHRScope");
185 Region *Parent = RegInfos[0].R->getParent();
186 assert(Parent && "Unexpected to call this on the top-level region");
187 return Parent;
188 }
189
getEntryBlock()190 BasicBlock *getEntryBlock() {
191 assert(RegInfos.size() > 0 && "Empty CHRScope");
192 return RegInfos.front().R->getEntry();
193 }
194
getExitBlock()195 BasicBlock *getExitBlock() {
196 assert(RegInfos.size() > 0 && "Empty CHRScope");
197 return RegInfos.back().R->getExit();
198 }
199
appendable(CHRScope * Next)200 bool appendable(CHRScope *Next) {
201 // The next scope is appendable only if this scope is directly connected to
202 // it (which implies it post-dominates this scope) and this scope dominates
203 // it (no edge to the next scope outside this scope).
204 BasicBlock *NextEntry = Next->getEntryBlock();
205 if (getExitBlock() != NextEntry)
206 // Not directly connected.
207 return false;
208 Region *LastRegion = RegInfos.back().R;
209 for (BasicBlock *Pred : predecessors(NextEntry))
210 if (!LastRegion->contains(Pred))
211 // There's an edge going into the entry of the next scope from outside
212 // of this scope.
213 return false;
214 return true;
215 }
216
append(CHRScope * Next)217 void append(CHRScope *Next) {
218 assert(RegInfos.size() > 0 && "Empty CHRScope");
219 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
220 assert(getParentRegion() == Next->getParentRegion() &&
221 "Must be siblings");
222 assert(getExitBlock() == Next->getEntryBlock() &&
223 "Must be adjacent");
224 for (RegInfo &RI : Next->RegInfos)
225 RegInfos.push_back(RI);
226 for (CHRScope *Sub : Next->Subs)
227 Subs.push_back(Sub);
228 }
229
addSub(CHRScope * SubIn)230 void addSub(CHRScope *SubIn) {
231 #ifndef NDEBUG
232 bool IsChild = false;
233 for (RegInfo &RI : RegInfos)
234 if (RI.R == SubIn->getParentRegion()) {
235 IsChild = true;
236 break;
237 }
238 assert(IsChild && "Must be a child");
239 #endif
240 Subs.push_back(SubIn);
241 }
242
243 // Split this scope at the boundary region into two, which will belong to the
244 // tail and returns the tail.
split(Region * Boundary)245 CHRScope *split(Region *Boundary) {
246 assert(Boundary && "Boundary null");
247 assert(RegInfos.begin()->R != Boundary &&
248 "Can't be split at beginning");
249 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
250 [&Boundary](const RegInfo& RI) {
251 return Boundary == RI.R;
252 });
253 if (BoundaryIt == RegInfos.end())
254 return nullptr;
255 SmallVector<RegInfo, 8> TailRegInfos;
256 SmallVector<CHRScope *, 8> TailSubs;
257 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
258 RegInfos.resize(BoundaryIt - RegInfos.begin());
259 DenseSet<Region *> TailRegionSet;
260 for (RegInfo &RI : TailRegInfos)
261 TailRegionSet.insert(RI.R);
262 for (auto It = Subs.begin(); It != Subs.end(); ) {
263 CHRScope *Sub = *It;
264 assert(Sub && "null Sub");
265 Region *Parent = Sub->getParentRegion();
266 if (TailRegionSet.count(Parent)) {
267 TailSubs.push_back(Sub);
268 It = Subs.erase(It);
269 } else {
270 assert(std::find_if(RegInfos.begin(), RegInfos.end(),
271 [&Parent](const RegInfo& RI) {
272 return Parent == RI.R;
273 }) != RegInfos.end() &&
274 "Must be in head");
275 ++It;
276 }
277 }
278 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
279 return new CHRScope(TailRegInfos, TailSubs);
280 }
281
contains(Instruction * I) const282 bool contains(Instruction *I) const {
283 BasicBlock *Parent = I->getParent();
284 for (const RegInfo &RI : RegInfos)
285 if (RI.R->contains(Parent))
286 return true;
287 return false;
288 }
289
290 void print(raw_ostream &OS) const;
291
292 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
293 SmallVector<CHRScope *, 8> Subs; // Subscopes.
294
295 // The instruction at which to insert the CHR conditional branch (and hoist
296 // the dependent condition values).
297 Instruction *BranchInsertPoint;
298
299 // True-biased and false-biased regions (conditional blocks),
300 // respectively. Used only for the outermost scope and includes regions in
301 // subscopes. The rest are unbiased.
302 DenseSet<Region *> TrueBiasedRegions;
303 DenseSet<Region *> FalseBiasedRegions;
304 // Among the biased regions, the regions that get CHRed.
305 SmallVector<RegInfo, 8> CHRRegions;
306
307 // True-biased and false-biased selects, respectively. Used only for the
308 // outermost scope and includes ones in subscopes.
309 DenseSet<SelectInst *> TrueBiasedSelects;
310 DenseSet<SelectInst *> FalseBiasedSelects;
311
312 // Map from one of the above regions to the instructions to stop
313 // hoisting instructions at through use-def chains.
314 HoistStopMapTy HoistStopMap;
315
316 private:
CHRScope(SmallVector<RegInfo,8> & RegInfosIn,SmallVector<CHRScope *,8> & SubsIn)317 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
318 SmallVector<CHRScope *, 8> &SubsIn)
319 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
320 };
321
322 class CHR {
323 public:
CHR(Function & Fin,BlockFrequencyInfo & BFIin,DominatorTree & DTin,ProfileSummaryInfo & PSIin,RegionInfo & RIin,OptimizationRemarkEmitter & OREin)324 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
325 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
326 OptimizationRemarkEmitter &OREin)
327 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
328
~CHR()329 ~CHR() {
330 for (CHRScope *Scope : Scopes) {
331 delete Scope;
332 }
333 }
334
335 bool run();
336
337 private:
338 // See the comments in CHR::run() for the high level flow of the algorithm and
339 // what the following functions do.
340
findScopes(SmallVectorImpl<CHRScope * > & Output)341 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
342 Region *R = RI.getTopLevelRegion();
343 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
344 if (Scope) {
345 Output.push_back(Scope);
346 }
347 }
348 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
349 SmallVectorImpl<CHRScope *> &Scopes);
350 CHRScope *findScope(Region *R);
351 void checkScopeHoistable(CHRScope *Scope);
352
353 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
354 SmallVectorImpl<CHRScope *> &Output);
355 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
356 CHRScope *Outer,
357 DenseSet<Value *> *OuterConditionValues,
358 Instruction *OuterInsertPoint,
359 SmallVectorImpl<CHRScope *> &Output,
360 DenseSet<Instruction *> &Unhoistables);
361
362 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
363 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
364
365 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
366 SmallVectorImpl<CHRScope *> &Output);
367
368 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
369 SmallVectorImpl<CHRScope *> &Output);
370 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
371
372 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
373 SmallVectorImpl<CHRScope *> &Output);
374
375 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
376 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
377 void cloneScopeBlocks(CHRScope *Scope,
378 BasicBlock *PreEntryBlock,
379 BasicBlock *ExitBlock,
380 Region *LastRegion,
381 ValueToValueMapTy &VMap);
382 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
383 BasicBlock *EntryBlock,
384 BasicBlock *NewEntryBlock,
385 ValueToValueMapTy &VMap);
386 void fixupBranchesAndSelects(CHRScope *Scope,
387 BasicBlock *PreEntryBlock,
388 BranchInst *MergedBR,
389 uint64_t ProfileCount);
390 void fixupBranch(Region *R,
391 CHRScope *Scope,
392 IRBuilder<> &IRB,
393 Value *&MergedCondition, BranchProbability &CHRBranchBias);
394 void fixupSelect(SelectInst* SI,
395 CHRScope *Scope,
396 IRBuilder<> &IRB,
397 Value *&MergedCondition, BranchProbability &CHRBranchBias);
398 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
399 Instruction *BranchOrSelect,
400 CHRScope *Scope,
401 IRBuilder<> &IRB,
402 Value *&MergedCondition);
403
404 Function &F;
405 BlockFrequencyInfo &BFI;
406 DominatorTree &DT;
407 ProfileSummaryInfo &PSI;
408 RegionInfo &RI;
409 OptimizationRemarkEmitter &ORE;
410 CHRStats Stats;
411
412 // All the true-biased regions in the function
413 DenseSet<Region *> TrueBiasedRegionsGlobal;
414 // All the false-biased regions in the function
415 DenseSet<Region *> FalseBiasedRegionsGlobal;
416 // All the true-biased selects in the function
417 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
418 // All the false-biased selects in the function
419 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
420 // A map from biased regions to their branch bias
421 DenseMap<Region *, BranchProbability> BranchBiasMap;
422 // A map from biased selects to their branch bias
423 DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
424 // All the scopes.
425 DenseSet<CHRScope *> Scopes;
426 };
427
428 } // end anonymous namespace
429
430 static inline
operator <<(raw_ostream & OS,const CHRStats & Stats)431 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
432 const CHRStats &Stats) {
433 Stats.print(OS);
434 return OS;
435 }
436
437 static inline
operator <<(raw_ostream & OS,const CHRScope & Scope)438 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
439 Scope.print(OS);
440 return OS;
441 }
442
shouldApply(Function & F,ProfileSummaryInfo & PSI)443 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
444 if (ForceCHR)
445 return true;
446
447 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
448 if (CHRModules.count(F.getParent()->getName()))
449 return true;
450 return CHRFunctions.count(F.getName());
451 }
452
453 assert(PSI.hasProfileSummary() && "Empty PSI?");
454 return PSI.isFunctionEntryHot(&F);
455 }
456
dumpIR(Function & F,const char * Label,CHRStats * Stats)457 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
458 CHRStats *Stats) {
459 StringRef FuncName = F.getName();
460 StringRef ModuleName = F.getParent()->getName();
461 (void)(FuncName); // Unused in release build.
462 (void)(ModuleName); // Unused in release build.
463 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
464 << FuncName);
465 if (Stats)
466 CHR_DEBUG(dbgs() << " " << *Stats);
467 CHR_DEBUG(dbgs() << "\n");
468 CHR_DEBUG(F.dump());
469 }
470
print(raw_ostream & OS) const471 void CHRScope::print(raw_ostream &OS) const {
472 assert(RegInfos.size() > 0 && "Empty CHRScope");
473 OS << "CHRScope[";
474 OS << RegInfos.size() << ", Regions[";
475 for (const RegInfo &RI : RegInfos) {
476 OS << RI.R->getNameStr();
477 if (RI.HasBranch)
478 OS << " B";
479 if (RI.Selects.size() > 0)
480 OS << " S" << RI.Selects.size();
481 OS << ", ";
482 }
483 if (RegInfos[0].R->getParent()) {
484 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
485 } else {
486 // top level region
487 OS << "]";
488 }
489 OS << ", Subs[";
490 for (CHRScope *Sub : Subs) {
491 OS << *Sub << ", ";
492 }
493 OS << "]]";
494 }
495
496 // Return true if the given instruction type can be hoisted by CHR.
isHoistableInstructionType(Instruction * I)497 static bool isHoistableInstructionType(Instruction *I) {
498 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
499 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
500 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
501 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
502 isa<InsertValueInst>(I);
503 }
504
505 // Return true if the given instruction can be hoisted by CHR.
isHoistable(Instruction * I,DominatorTree & DT)506 static bool isHoistable(Instruction *I, DominatorTree &DT) {
507 if (!isHoistableInstructionType(I))
508 return false;
509 return isSafeToSpeculativelyExecute(I, nullptr, &DT);
510 }
511
512 // Recursively traverse the use-def chains of the given value and return a set
513 // of the unhoistable base values defined within the scope (excluding the
514 // first-region entry block) or the (hoistable or unhoistable) base values that
515 // are defined outside (including the first-region entry block) of the
516 // scope. The returned set doesn't include constants.
getBaseValues(Value * V,DominatorTree & DT,DenseMap<Value *,std::set<Value * >> & Visited)517 static std::set<Value *> getBaseValues(
518 Value *V, DominatorTree &DT,
519 DenseMap<Value *, std::set<Value *>> &Visited) {
520 if (Visited.count(V)) {
521 return Visited[V];
522 }
523 std::set<Value *> Result;
524 if (auto *I = dyn_cast<Instruction>(V)) {
525 // We don't stop at a block that's not in the Scope because we would miss some
526 // instructions that are based on the same base values if we stop there.
527 if (!isHoistable(I, DT)) {
528 Result.insert(I);
529 Visited.insert(std::make_pair(V, Result));
530 return Result;
531 }
532 // I is hoistable above the Scope.
533 for (Value *Op : I->operands()) {
534 std::set<Value *> OpResult = getBaseValues(Op, DT, Visited);
535 Result.insert(OpResult.begin(), OpResult.end());
536 }
537 Visited.insert(std::make_pair(V, Result));
538 return Result;
539 }
540 if (isa<Argument>(V)) {
541 Result.insert(V);
542 Visited.insert(std::make_pair(V, Result));
543 return Result;
544 }
545 // We don't include others like constants because those won't lead to any
546 // chance of folding of conditions (eg two bit checks merged into one check)
547 // after CHR.
548 Visited.insert(std::make_pair(V, Result));
549 return Result; // empty
550 }
551
552 // Return true if V is already hoisted or can be hoisted (along with its
553 // operands) above the insert point. When it returns true and HoistStops is
554 // non-null, the instructions to stop hoisting at through the use-def chains are
555 // inserted into HoistStops.
556 static bool
checkHoistValue(Value * V,Instruction * InsertPoint,DominatorTree & DT,DenseSet<Instruction * > & Unhoistables,DenseSet<Instruction * > * HoistStops,DenseMap<Instruction *,bool> & Visited)557 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
558 DenseSet<Instruction *> &Unhoistables,
559 DenseSet<Instruction *> *HoistStops,
560 DenseMap<Instruction *, bool> &Visited) {
561 assert(InsertPoint && "Null InsertPoint");
562 if (auto *I = dyn_cast<Instruction>(V)) {
563 if (Visited.count(I)) {
564 return Visited[I];
565 }
566 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
567 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
568 if (Unhoistables.count(I)) {
569 // Don't hoist if they are not to be hoisted.
570 Visited[I] = false;
571 return false;
572 }
573 if (DT.dominates(I, InsertPoint)) {
574 // We are already above the insert point. Stop here.
575 if (HoistStops)
576 HoistStops->insert(I);
577 Visited[I] = true;
578 return true;
579 }
580 // We aren't not above the insert point, check if we can hoist it above the
581 // insert point.
582 if (isHoistable(I, DT)) {
583 // Check operands first.
584 DenseSet<Instruction *> OpsHoistStops;
585 bool AllOpsHoisted = true;
586 for (Value *Op : I->operands()) {
587 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
588 Visited)) {
589 AllOpsHoisted = false;
590 break;
591 }
592 }
593 if (AllOpsHoisted) {
594 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
595 if (HoistStops)
596 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
597 Visited[I] = true;
598 return true;
599 }
600 }
601 Visited[I] = false;
602 return false;
603 }
604 // Non-instructions are considered hoistable.
605 return true;
606 }
607
608 // Returns true and sets the true probability and false probability of an
609 // MD_prof metadata if it's well-formed.
checkMDProf(MDNode * MD,BranchProbability & TrueProb,BranchProbability & FalseProb)610 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
611 BranchProbability &FalseProb) {
612 if (!MD) return false;
613 MDString *MDName = cast<MDString>(MD->getOperand(0));
614 if (MDName->getString() != "branch_weights" ||
615 MD->getNumOperands() != 3)
616 return false;
617 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
618 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
619 if (!TrueWeight || !FalseWeight)
620 return false;
621 uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
622 uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
623 uint64_t SumWt = TrueWt + FalseWt;
624
625 assert(SumWt >= TrueWt && SumWt >= FalseWt &&
626 "Overflow calculating branch probabilities.");
627
628 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
629 if (SumWt == 0)
630 return false;
631
632 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
633 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
634 return true;
635 }
636
getCHRBiasThreshold()637 static BranchProbability getCHRBiasThreshold() {
638 return BranchProbability::getBranchProbability(
639 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
640 }
641
642 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
643 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
644 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
645 // false.
646 template <typename K, typename S, typename M>
checkBias(K * Key,BranchProbability TrueProb,BranchProbability FalseProb,S & TrueSet,S & FalseSet,M & BiasMap)647 static bool checkBias(K *Key, BranchProbability TrueProb,
648 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
649 M &BiasMap) {
650 BranchProbability Threshold = getCHRBiasThreshold();
651 if (TrueProb >= Threshold) {
652 TrueSet.insert(Key);
653 BiasMap[Key] = TrueProb;
654 return true;
655 } else if (FalseProb >= Threshold) {
656 FalseSet.insert(Key);
657 BiasMap[Key] = FalseProb;
658 return true;
659 }
660 return false;
661 }
662
663 // Returns true and insert a region into the right biased set and the map if the
664 // branch of the region is biased.
checkBiasedBranch(BranchInst * BI,Region * R,DenseSet<Region * > & TrueBiasedRegionsGlobal,DenseSet<Region * > & FalseBiasedRegionsGlobal,DenseMap<Region *,BranchProbability> & BranchBiasMap)665 static bool checkBiasedBranch(BranchInst *BI, Region *R,
666 DenseSet<Region *> &TrueBiasedRegionsGlobal,
667 DenseSet<Region *> &FalseBiasedRegionsGlobal,
668 DenseMap<Region *, BranchProbability> &BranchBiasMap) {
669 if (!BI->isConditional())
670 return false;
671 BranchProbability ThenProb, ElseProb;
672 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
673 ThenProb, ElseProb))
674 return false;
675 BasicBlock *IfThen = BI->getSuccessor(0);
676 BasicBlock *IfElse = BI->getSuccessor(1);
677 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
678 IfThen != IfElse &&
679 "Invariant from findScopes");
680 if (IfThen == R->getExit()) {
681 // Swap them so that IfThen/ThenProb means going into the conditional code
682 // and IfElse/ElseProb means skipping it.
683 std::swap(IfThen, IfElse);
684 std::swap(ThenProb, ElseProb);
685 }
686 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
687 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
688 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
689 return checkBias(R, ThenProb, ElseProb,
690 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
691 BranchBiasMap);
692 }
693
694 // Returns true and insert a select into the right biased set and the map if the
695 // select is biased.
checkBiasedSelect(SelectInst * SI,Region * R,DenseSet<SelectInst * > & TrueBiasedSelectsGlobal,DenseSet<SelectInst * > & FalseBiasedSelectsGlobal,DenseMap<SelectInst *,BranchProbability> & SelectBiasMap)696 static bool checkBiasedSelect(
697 SelectInst *SI, Region *R,
698 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
699 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
700 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
701 BranchProbability TrueProb, FalseProb;
702 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
703 TrueProb, FalseProb))
704 return false;
705 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
706 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
707 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
708 return checkBias(SI, TrueProb, FalseProb,
709 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
710 SelectBiasMap);
711 }
712
713 // Returns the instruction at which to hoist the dependent condition values and
714 // insert the CHR branch for a region. This is the terminator branch in the
715 // entry block or the first select in the entry block, if any.
getBranchInsertPoint(RegInfo & RI)716 static Instruction* getBranchInsertPoint(RegInfo &RI) {
717 Region *R = RI.R;
718 BasicBlock *EntryBB = R->getEntry();
719 // The hoist point is by default the terminator of the entry block, which is
720 // the same as the branch instruction if RI.HasBranch is true.
721 Instruction *HoistPoint = EntryBB->getTerminator();
722 for (SelectInst *SI : RI.Selects) {
723 if (SI->getParent() == EntryBB) {
724 // Pick the first select in Selects in the entry block. Note Selects is
725 // sorted in the instruction order within a block (asserted below).
726 HoistPoint = SI;
727 break;
728 }
729 }
730 assert(HoistPoint && "Null HoistPoint");
731 #ifndef NDEBUG
732 // Check that HoistPoint is the first one in Selects in the entry block,
733 // if any.
734 DenseSet<Instruction *> EntryBlockSelectSet;
735 for (SelectInst *SI : RI.Selects) {
736 if (SI->getParent() == EntryBB) {
737 EntryBlockSelectSet.insert(SI);
738 }
739 }
740 for (Instruction &I : *EntryBB) {
741 if (EntryBlockSelectSet.count(&I) > 0) {
742 assert(&I == HoistPoint &&
743 "HoistPoint must be the first one in Selects");
744 break;
745 }
746 }
747 #endif
748 return HoistPoint;
749 }
750
751 // Find a CHR scope in the given region.
findScope(Region * R)752 CHRScope * CHR::findScope(Region *R) {
753 CHRScope *Result = nullptr;
754 BasicBlock *Entry = R->getEntry();
755 BasicBlock *Exit = R->getExit(); // null if top level.
756 assert(Entry && "Entry must not be null");
757 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
758 "Only top level region has a null exit");
759 if (Entry)
760 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
761 else
762 CHR_DEBUG(dbgs() << "Entry null\n");
763 if (Exit)
764 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
765 else
766 CHR_DEBUG(dbgs() << "Exit null\n");
767 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
768 // to this region).
769 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
770 if (EntryInSubregion)
771 return nullptr;
772 // Exclude loops
773 for (BasicBlock *Pred : predecessors(Entry))
774 if (R->contains(Pred))
775 return nullptr;
776 if (Exit) {
777 // Try to find an if-then block (check if R is an if-then).
778 // if (cond) {
779 // ...
780 // }
781 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
782 if (BI)
783 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
784 else
785 CHR_DEBUG(dbgs() << "BI null\n");
786 if (BI && BI->isConditional()) {
787 BasicBlock *S0 = BI->getSuccessor(0);
788 BasicBlock *S1 = BI->getSuccessor(1);
789 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
790 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
791 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
792 RegInfo RI(R);
793 RI.HasBranch = checkBiasedBranch(
794 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
795 BranchBiasMap);
796 Result = new CHRScope(RI);
797 Scopes.insert(Result);
798 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
799 ++Stats.NumBranches;
800 if (!RI.HasBranch) {
801 ORE.emit([&]() {
802 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
803 << "Branch not biased";
804 });
805 }
806 }
807 }
808 }
809 {
810 // Try to look for selects in the direct child blocks (as opposed to in
811 // subregions) of R.
812 // ...
813 // if (..) { // Some subregion
814 // ...
815 // }
816 // if (..) { // Some subregion
817 // ...
818 // }
819 // ...
820 // a = cond ? b : c;
821 // ...
822 SmallVector<SelectInst *, 8> Selects;
823 for (RegionNode *E : R->elements()) {
824 if (E->isSubRegion())
825 continue;
826 // This returns the basic block of E if E is a direct child of R (not a
827 // subregion.)
828 BasicBlock *BB = E->getEntry();
829 // Need to push in the order to make it easier to find the first Select
830 // later.
831 for (Instruction &I : *BB) {
832 if (auto *SI = dyn_cast<SelectInst>(&I)) {
833 Selects.push_back(SI);
834 ++Stats.NumBranches;
835 }
836 }
837 }
838 if (Selects.size() > 0) {
839 auto AddSelects = [&](RegInfo &RI) {
840 for (auto *SI : Selects)
841 if (checkBiasedSelect(SI, RI.R,
842 TrueBiasedSelectsGlobal,
843 FalseBiasedSelectsGlobal,
844 SelectBiasMap))
845 RI.Selects.push_back(SI);
846 else
847 ORE.emit([&]() {
848 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
849 << "Select not biased";
850 });
851 };
852 if (!Result) {
853 CHR_DEBUG(dbgs() << "Found a select-only region\n");
854 RegInfo RI(R);
855 AddSelects(RI);
856 Result = new CHRScope(RI);
857 Scopes.insert(Result);
858 } else {
859 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
860 AddSelects(Result->RegInfos[0]);
861 }
862 }
863 }
864
865 if (Result) {
866 checkScopeHoistable(Result);
867 }
868 return Result;
869 }
870
871 // Check that any of the branch and the selects in the region could be
872 // hoisted above the the CHR branch insert point (the most dominating of
873 // them, either the branch (at the end of the first block) or the first
874 // select in the first block). If the branch can't be hoisted, drop the
875 // selects in the first blocks.
876 //
877 // For example, for the following scope/region with selects, we want to insert
878 // the merged branch right before the first select in the first/entry block by
879 // hoisting c1, c2, c3, and c4.
880 //
881 // // Branch insert point here.
882 // a = c1 ? b : c; // Select 1
883 // d = c2 ? e : f; // Select 2
884 // if (c3) { // Branch
885 // ...
886 // c4 = foo() // A call.
887 // g = c4 ? h : i; // Select 3
888 // }
889 //
890 // But suppose we can't hoist c4 because it's dependent on the preceding
891 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
892 // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
checkScopeHoistable(CHRScope * Scope)893 void CHR::checkScopeHoistable(CHRScope *Scope) {
894 RegInfo &RI = Scope->RegInfos[0];
895 Region *R = RI.R;
896 BasicBlock *EntryBB = R->getEntry();
897 auto *Branch = RI.HasBranch ?
898 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
899 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
900 if (RI.HasBranch || !Selects.empty()) {
901 Instruction *InsertPoint = getBranchInsertPoint(RI);
902 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
903 // Avoid a data dependence from a select or a branch to a(nother)
904 // select. Note no instruction can't data-depend on a branch (a branch
905 // instruction doesn't produce a value).
906 DenseSet<Instruction *> Unhoistables;
907 // Initialize Unhoistables with the selects.
908 for (SelectInst *SI : Selects) {
909 Unhoistables.insert(SI);
910 }
911 // Remove Selects that can't be hoisted.
912 for (auto it = Selects.begin(); it != Selects.end(); ) {
913 SelectInst *SI = *it;
914 if (SI == InsertPoint) {
915 ++it;
916 continue;
917 }
918 DenseMap<Instruction *, bool> Visited;
919 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
920 DT, Unhoistables, nullptr, Visited);
921 if (!IsHoistable) {
922 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
923 ORE.emit([&]() {
924 return OptimizationRemarkMissed(DEBUG_TYPE,
925 "DropUnhoistableSelect", SI)
926 << "Dropped unhoistable select";
927 });
928 it = Selects.erase(it);
929 // Since we are dropping the select here, we also drop it from
930 // Unhoistables.
931 Unhoistables.erase(SI);
932 } else
933 ++it;
934 }
935 // Update InsertPoint after potentially removing selects.
936 InsertPoint = getBranchInsertPoint(RI);
937 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
938 if (RI.HasBranch && InsertPoint != Branch) {
939 DenseMap<Instruction *, bool> Visited;
940 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
941 DT, Unhoistables, nullptr, Visited);
942 if (!IsHoistable) {
943 // If the branch isn't hoistable, drop the selects in the entry
944 // block, preferring the branch, which makes the branch the hoist
945 // point.
946 assert(InsertPoint != Branch && "Branch must not be the hoist point");
947 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
948 CHR_DEBUG(
949 for (SelectInst *SI : Selects) {
950 dbgs() << "SI " << *SI << "\n";
951 });
952 for (SelectInst *SI : Selects) {
953 ORE.emit([&]() {
954 return OptimizationRemarkMissed(DEBUG_TYPE,
955 "DropSelectUnhoistableBranch", SI)
956 << "Dropped select due to unhoistable branch";
957 });
958 }
959 Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
960 [EntryBB](SelectInst *SI) {
961 return SI->getParent() == EntryBB;
962 }), Selects.end());
963 Unhoistables.clear();
964 InsertPoint = Branch;
965 }
966 }
967 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
968 #ifndef NDEBUG
969 if (RI.HasBranch) {
970 assert(!DT.dominates(Branch, InsertPoint) &&
971 "Branch can't be already above the hoist point");
972 DenseMap<Instruction *, bool> Visited;
973 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
974 DT, Unhoistables, nullptr, Visited) &&
975 "checkHoistValue for branch");
976 }
977 for (auto *SI : Selects) {
978 assert(!DT.dominates(SI, InsertPoint) &&
979 "SI can't be already above the hoist point");
980 DenseMap<Instruction *, bool> Visited;
981 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
982 Unhoistables, nullptr, Visited) &&
983 "checkHoistValue for selects");
984 }
985 CHR_DEBUG(dbgs() << "Result\n");
986 if (RI.HasBranch) {
987 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
988 }
989 for (auto *SI : Selects) {
990 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
991 }
992 #endif
993 }
994 }
995
996 // Traverse the region tree, find all nested scopes and merge them if possible.
findScopes(Region * R,Region * NextRegion,Region * ParentRegion,SmallVectorImpl<CHRScope * > & Scopes)997 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
998 SmallVectorImpl<CHRScope *> &Scopes) {
999 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
1000 CHRScope *Result = findScope(R);
1001 // Visit subscopes.
1002 CHRScope *ConsecutiveSubscope = nullptr;
1003 SmallVector<CHRScope *, 8> Subscopes;
1004 for (auto It = R->begin(); It != R->end(); ++It) {
1005 const std::unique_ptr<Region> &SubR = *It;
1006 auto NextIt = std::next(It);
1007 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
1008 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
1009 << "\n");
1010 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
1011 if (SubCHRScope) {
1012 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
1013 } else {
1014 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
1015 }
1016 if (SubCHRScope) {
1017 if (!ConsecutiveSubscope)
1018 ConsecutiveSubscope = SubCHRScope;
1019 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
1020 Subscopes.push_back(ConsecutiveSubscope);
1021 ConsecutiveSubscope = SubCHRScope;
1022 } else
1023 ConsecutiveSubscope->append(SubCHRScope);
1024 } else {
1025 if (ConsecutiveSubscope) {
1026 Subscopes.push_back(ConsecutiveSubscope);
1027 }
1028 ConsecutiveSubscope = nullptr;
1029 }
1030 }
1031 if (ConsecutiveSubscope) {
1032 Subscopes.push_back(ConsecutiveSubscope);
1033 }
1034 for (CHRScope *Sub : Subscopes) {
1035 if (Result) {
1036 // Combine it with the parent.
1037 Result->addSub(Sub);
1038 } else {
1039 // Push Subscopes as they won't be combined with the parent.
1040 Scopes.push_back(Sub);
1041 }
1042 }
1043 return Result;
1044 }
1045
getCHRConditionValuesForRegion(RegInfo & RI)1046 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1047 DenseSet<Value *> ConditionValues;
1048 if (RI.HasBranch) {
1049 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1050 ConditionValues.insert(BI->getCondition());
1051 }
1052 for (SelectInst *SI : RI.Selects) {
1053 ConditionValues.insert(SI->getCondition());
1054 }
1055 return ConditionValues;
1056 }
1057
1058
1059 // Determine whether to split a scope depending on the sets of the branch
1060 // condition values of the previous region and the current region. We split
1061 // (return true) it if 1) the condition values of the inner/lower scope can't be
1062 // hoisted up to the outer/upper scope, or 2) the two sets of the condition
1063 // values have an empty intersection (because the combined branch conditions
1064 // won't probably lead to a simpler combined condition).
shouldSplit(Instruction * InsertPoint,DenseSet<Value * > & PrevConditionValues,DenseSet<Value * > & ConditionValues,DominatorTree & DT,DenseSet<Instruction * > & Unhoistables)1065 static bool shouldSplit(Instruction *InsertPoint,
1066 DenseSet<Value *> &PrevConditionValues,
1067 DenseSet<Value *> &ConditionValues,
1068 DominatorTree &DT,
1069 DenseSet<Instruction *> &Unhoistables) {
1070 assert(InsertPoint && "Null InsertPoint");
1071 CHR_DEBUG(
1072 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1073 for (Value *V : PrevConditionValues) {
1074 dbgs() << *V << ", ";
1075 }
1076 dbgs() << " ConditionValues ";
1077 for (Value *V : ConditionValues) {
1078 dbgs() << *V << ", ";
1079 }
1080 dbgs() << "\n");
1081 // If any of Bases isn't hoistable to the hoist point, split.
1082 for (Value *V : ConditionValues) {
1083 DenseMap<Instruction *, bool> Visited;
1084 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1085 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1086 return true; // Not hoistable, split.
1087 }
1088 }
1089 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1090 // unnecessary splits at scopes with no branch/selects. If
1091 // PrevConditionValues and ConditionValues don't intersect at all, split.
1092 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1093 // Use std::set as DenseSet doesn't work with set_intersection.
1094 std::set<Value *> PrevBases, Bases;
1095 DenseMap<Value *, std::set<Value *>> Visited;
1096 for (Value *V : PrevConditionValues) {
1097 std::set<Value *> BaseValues = getBaseValues(V, DT, Visited);
1098 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1099 }
1100 for (Value *V : ConditionValues) {
1101 std::set<Value *> BaseValues = getBaseValues(V, DT, Visited);
1102 Bases.insert(BaseValues.begin(), BaseValues.end());
1103 }
1104 CHR_DEBUG(
1105 dbgs() << "PrevBases ";
1106 for (Value *V : PrevBases) {
1107 dbgs() << *V << ", ";
1108 }
1109 dbgs() << " Bases ";
1110 for (Value *V : Bases) {
1111 dbgs() << *V << ", ";
1112 }
1113 dbgs() << "\n");
1114 std::set<Value *> Intersection;
1115 std::set_intersection(PrevBases.begin(), PrevBases.end(),
1116 Bases.begin(), Bases.end(),
1117 std::inserter(Intersection, Intersection.begin()));
1118 if (Intersection.empty()) {
1119 // Empty intersection, split.
1120 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1121 return true;
1122 }
1123 }
1124 CHR_DEBUG(dbgs() << "No split\n");
1125 return false; // Don't split.
1126 }
1127
getSelectsInScope(CHRScope * Scope,DenseSet<Instruction * > & Output)1128 static void getSelectsInScope(CHRScope *Scope,
1129 DenseSet<Instruction *> &Output) {
1130 for (RegInfo &RI : Scope->RegInfos)
1131 for (SelectInst *SI : RI.Selects)
1132 Output.insert(SI);
1133 for (CHRScope *Sub : Scope->Subs)
1134 getSelectsInScope(Sub, Output);
1135 }
1136
splitScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1137 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1138 SmallVectorImpl<CHRScope *> &Output) {
1139 for (CHRScope *Scope : Input) {
1140 assert(!Scope->BranchInsertPoint &&
1141 "BranchInsertPoint must not be set");
1142 DenseSet<Instruction *> Unhoistables;
1143 getSelectsInScope(Scope, Unhoistables);
1144 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1145 }
1146 #ifndef NDEBUG
1147 for (CHRScope *Scope : Output) {
1148 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1149 }
1150 #endif
1151 }
1152
splitScope(CHRScope * Scope,CHRScope * Outer,DenseSet<Value * > * OuterConditionValues,Instruction * OuterInsertPoint,SmallVectorImpl<CHRScope * > & Output,DenseSet<Instruction * > & Unhoistables)1153 SmallVector<CHRScope *, 8> CHR::splitScope(
1154 CHRScope *Scope,
1155 CHRScope *Outer,
1156 DenseSet<Value *> *OuterConditionValues,
1157 Instruction *OuterInsertPoint,
1158 SmallVectorImpl<CHRScope *> &Output,
1159 DenseSet<Instruction *> &Unhoistables) {
1160 if (Outer) {
1161 assert(OuterConditionValues && "Null OuterConditionValues");
1162 assert(OuterInsertPoint && "Null OuterInsertPoint");
1163 }
1164 bool PrevSplitFromOuter = true;
1165 DenseSet<Value *> PrevConditionValues;
1166 Instruction *PrevInsertPoint = nullptr;
1167 SmallVector<CHRScope *, 8> Splits;
1168 SmallVector<bool, 8> SplitsSplitFromOuter;
1169 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1170 SmallVector<Instruction *, 8> SplitsInsertPoints;
1171 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1172 for (RegInfo &RI : RegInfos) {
1173 Instruction *InsertPoint = getBranchInsertPoint(RI);
1174 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1175 CHR_DEBUG(
1176 dbgs() << "ConditionValues ";
1177 for (Value *V : ConditionValues) {
1178 dbgs() << *V << ", ";
1179 }
1180 dbgs() << "\n");
1181 if (RI.R == RegInfos[0].R) {
1182 // First iteration. Check to see if we should split from the outer.
1183 if (Outer) {
1184 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1185 CHR_DEBUG(dbgs() << "Should split from outer at "
1186 << RI.R->getNameStr() << "\n");
1187 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1188 ConditionValues, DT, Unhoistables)) {
1189 PrevConditionValues = ConditionValues;
1190 PrevInsertPoint = InsertPoint;
1191 ORE.emit([&]() {
1192 return OptimizationRemarkMissed(DEBUG_TYPE,
1193 "SplitScopeFromOuter",
1194 RI.R->getEntry()->getTerminator())
1195 << "Split scope from outer due to unhoistable branch/select "
1196 << "and/or lack of common condition values";
1197 });
1198 } else {
1199 // Not splitting from the outer. Use the outer bases and insert
1200 // point. Union the bases.
1201 PrevSplitFromOuter = false;
1202 PrevConditionValues = *OuterConditionValues;
1203 PrevConditionValues.insert(ConditionValues.begin(),
1204 ConditionValues.end());
1205 PrevInsertPoint = OuterInsertPoint;
1206 }
1207 } else {
1208 CHR_DEBUG(dbgs() << "Outer null\n");
1209 PrevConditionValues = ConditionValues;
1210 PrevInsertPoint = InsertPoint;
1211 }
1212 } else {
1213 CHR_DEBUG(dbgs() << "Should split from prev at "
1214 << RI.R->getNameStr() << "\n");
1215 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1216 DT, Unhoistables)) {
1217 CHRScope *Tail = Scope->split(RI.R);
1218 Scopes.insert(Tail);
1219 Splits.push_back(Scope);
1220 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1221 SplitsConditionValues.push_back(PrevConditionValues);
1222 SplitsInsertPoints.push_back(PrevInsertPoint);
1223 Scope = Tail;
1224 PrevConditionValues = ConditionValues;
1225 PrevInsertPoint = InsertPoint;
1226 PrevSplitFromOuter = true;
1227 ORE.emit([&]() {
1228 return OptimizationRemarkMissed(DEBUG_TYPE,
1229 "SplitScopeFromPrev",
1230 RI.R->getEntry()->getTerminator())
1231 << "Split scope from previous due to unhoistable branch/select "
1232 << "and/or lack of common condition values";
1233 });
1234 } else {
1235 // Not splitting. Union the bases. Keep the hoist point.
1236 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1237 }
1238 }
1239 }
1240 Splits.push_back(Scope);
1241 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1242 SplitsConditionValues.push_back(PrevConditionValues);
1243 assert(PrevInsertPoint && "Null PrevInsertPoint");
1244 SplitsInsertPoints.push_back(PrevInsertPoint);
1245 assert(Splits.size() == SplitsConditionValues.size() &&
1246 Splits.size() == SplitsSplitFromOuter.size() &&
1247 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1248 for (size_t I = 0; I < Splits.size(); ++I) {
1249 CHRScope *Split = Splits[I];
1250 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1251 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1252 SmallVector<CHRScope *, 8> NewSubs;
1253 DenseSet<Instruction *> SplitUnhoistables;
1254 getSelectsInScope(Split, SplitUnhoistables);
1255 for (CHRScope *Sub : Split->Subs) {
1256 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1257 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1258 SplitUnhoistables);
1259 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
1260 }
1261 Split->Subs = NewSubs;
1262 }
1263 SmallVector<CHRScope *, 8> Result;
1264 for (size_t I = 0; I < Splits.size(); ++I) {
1265 CHRScope *Split = Splits[I];
1266 if (SplitsSplitFromOuter[I]) {
1267 // Split from the outer.
1268 Output.push_back(Split);
1269 Split->BranchInsertPoint = SplitsInsertPoints[I];
1270 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1271 << "\n");
1272 } else {
1273 // Connected to the outer.
1274 Result.push_back(Split);
1275 }
1276 }
1277 if (!Outer)
1278 assert(Result.empty() &&
1279 "If no outer (top-level), must return no nested ones");
1280 return Result;
1281 }
1282
classifyBiasedScopes(SmallVectorImpl<CHRScope * > & Scopes)1283 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1284 for (CHRScope *Scope : Scopes) {
1285 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1286 classifyBiasedScopes(Scope, Scope);
1287 CHR_DEBUG(
1288 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1289 dbgs() << "TrueBiasedRegions ";
1290 for (Region *R : Scope->TrueBiasedRegions) {
1291 dbgs() << R->getNameStr() << ", ";
1292 }
1293 dbgs() << "\n";
1294 dbgs() << "FalseBiasedRegions ";
1295 for (Region *R : Scope->FalseBiasedRegions) {
1296 dbgs() << R->getNameStr() << ", ";
1297 }
1298 dbgs() << "\n";
1299 dbgs() << "TrueBiasedSelects ";
1300 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1301 dbgs() << *SI << ", ";
1302 }
1303 dbgs() << "\n";
1304 dbgs() << "FalseBiasedSelects ";
1305 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1306 dbgs() << *SI << ", ";
1307 }
1308 dbgs() << "\n";);
1309 }
1310 }
1311
classifyBiasedScopes(CHRScope * Scope,CHRScope * OutermostScope)1312 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1313 for (RegInfo &RI : Scope->RegInfos) {
1314 if (RI.HasBranch) {
1315 Region *R = RI.R;
1316 if (TrueBiasedRegionsGlobal.count(R) > 0)
1317 OutermostScope->TrueBiasedRegions.insert(R);
1318 else if (FalseBiasedRegionsGlobal.count(R) > 0)
1319 OutermostScope->FalseBiasedRegions.insert(R);
1320 else
1321 llvm_unreachable("Must be biased");
1322 }
1323 for (SelectInst *SI : RI.Selects) {
1324 if (TrueBiasedSelectsGlobal.count(SI) > 0)
1325 OutermostScope->TrueBiasedSelects.insert(SI);
1326 else if (FalseBiasedSelectsGlobal.count(SI) > 0)
1327 OutermostScope->FalseBiasedSelects.insert(SI);
1328 else
1329 llvm_unreachable("Must be biased");
1330 }
1331 }
1332 for (CHRScope *Sub : Scope->Subs) {
1333 classifyBiasedScopes(Sub, OutermostScope);
1334 }
1335 }
1336
hasAtLeastTwoBiasedBranches(CHRScope * Scope)1337 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1338 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1339 Scope->FalseBiasedRegions.size() +
1340 Scope->TrueBiasedSelects.size() +
1341 Scope->FalseBiasedSelects.size();
1342 return NumBiased >= CHRMergeThreshold;
1343 }
1344
filterScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1345 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1346 SmallVectorImpl<CHRScope *> &Output) {
1347 for (CHRScope *Scope : Input) {
1348 // Filter out the ones with only one region and no subs.
1349 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1350 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1351 << Scope->TrueBiasedRegions.size()
1352 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1353 << " true-selects " << Scope->TrueBiasedSelects.size()
1354 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1355 ORE.emit([&]() {
1356 return OptimizationRemarkMissed(
1357 DEBUG_TYPE,
1358 "DropScopeWithOneBranchOrSelect",
1359 Scope->RegInfos[0].R->getEntry()->getTerminator())
1360 << "Drop scope with < "
1361 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1362 << " biased branch(es) or select(s)";
1363 });
1364 continue;
1365 }
1366 Output.push_back(Scope);
1367 }
1368 }
1369
setCHRRegions(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1370 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1371 SmallVectorImpl<CHRScope *> &Output) {
1372 for (CHRScope *Scope : Input) {
1373 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1374 "Empty");
1375 setCHRRegions(Scope, Scope);
1376 Output.push_back(Scope);
1377 CHR_DEBUG(
1378 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1379 for (auto pair : Scope->HoistStopMap) {
1380 Region *R = pair.first;
1381 dbgs() << "Region " << R->getNameStr() << "\n";
1382 for (Instruction *I : pair.second) {
1383 dbgs() << "HoistStop " << *I << "\n";
1384 }
1385 }
1386 dbgs() << "CHRRegions" << "\n";
1387 for (RegInfo &RI : Scope->CHRRegions) {
1388 dbgs() << RI.R->getNameStr() << "\n";
1389 });
1390 }
1391 }
1392
setCHRRegions(CHRScope * Scope,CHRScope * OutermostScope)1393 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1394 DenseSet<Instruction *> Unhoistables;
1395 // Put the biased selects in Unhoistables because they should stay where they
1396 // are and constant-folded after CHR (in case one biased select or a branch
1397 // can depend on another biased select.)
1398 for (RegInfo &RI : Scope->RegInfos) {
1399 for (SelectInst *SI : RI.Selects) {
1400 Unhoistables.insert(SI);
1401 }
1402 }
1403 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1404 for (RegInfo &RI : Scope->RegInfos) {
1405 Region *R = RI.R;
1406 DenseSet<Instruction *> HoistStops;
1407 bool IsHoisted = false;
1408 if (RI.HasBranch) {
1409 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
1410 OutermostScope->FalseBiasedRegions.count(R) > 0) &&
1411 "Must be truthy or falsy");
1412 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1413 // Note checkHoistValue fills in HoistStops.
1414 DenseMap<Instruction *, bool> Visited;
1415 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1416 Unhoistables, &HoistStops, Visited);
1417 assert(IsHoistable && "Must be hoistable");
1418 (void)(IsHoistable); // Unused in release build
1419 IsHoisted = true;
1420 }
1421 for (SelectInst *SI : RI.Selects) {
1422 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
1423 OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
1424 "Must be true or false biased");
1425 // Note checkHoistValue fills in HoistStops.
1426 DenseMap<Instruction *, bool> Visited;
1427 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1428 Unhoistables, &HoistStops, Visited);
1429 assert(IsHoistable && "Must be hoistable");
1430 (void)(IsHoistable); // Unused in release build
1431 IsHoisted = true;
1432 }
1433 if (IsHoisted) {
1434 OutermostScope->CHRRegions.push_back(RI);
1435 OutermostScope->HoistStopMap[R] = HoistStops;
1436 }
1437 }
1438 for (CHRScope *Sub : Scope->Subs)
1439 setCHRRegions(Sub, OutermostScope);
1440 }
1441
CHRScopeSorter(CHRScope * Scope1,CHRScope * Scope2)1442 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1443 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1444 }
1445
sortScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1446 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1447 SmallVectorImpl<CHRScope *> &Output) {
1448 Output.resize(Input.size());
1449 llvm::copy(Input, Output.begin());
1450 llvm::stable_sort(Output, CHRScopeSorter);
1451 }
1452
1453 // Return true if V is already hoisted or was hoisted (along with its operands)
1454 // to the insert point.
hoistValue(Value * V,Instruction * HoistPoint,Region * R,HoistStopMapTy & HoistStopMap,DenseSet<Instruction * > & HoistedSet,DenseSet<PHINode * > & TrivialPHIs,DominatorTree & DT)1455 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1456 HoistStopMapTy &HoistStopMap,
1457 DenseSet<Instruction *> &HoistedSet,
1458 DenseSet<PHINode *> &TrivialPHIs,
1459 DominatorTree &DT) {
1460 auto IT = HoistStopMap.find(R);
1461 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1462 DenseSet<Instruction *> &HoistStops = IT->second;
1463 if (auto *I = dyn_cast<Instruction>(V)) {
1464 if (I == HoistPoint)
1465 return;
1466 if (HoistStops.count(I))
1467 return;
1468 if (auto *PN = dyn_cast<PHINode>(I))
1469 if (TrivialPHIs.count(PN))
1470 // The trivial phi inserted by the previous CHR scope could replace a
1471 // non-phi in HoistStops. Note that since this phi is at the exit of a
1472 // previous CHR scope, which dominates this scope, it's safe to stop
1473 // hoisting there.
1474 return;
1475 if (HoistedSet.count(I))
1476 // Already hoisted, return.
1477 return;
1478 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1479 assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1480 assert(DT.getNode(HoistPoint->getParent()) &&
1481 "DT must contain HoistPoint block");
1482 if (DT.dominates(I, HoistPoint))
1483 // We are already above the hoist point. Stop here. This may be necessary
1484 // when multiple scopes would independently hoist the same
1485 // instruction. Since an outer (dominating) scope would hoist it to its
1486 // entry before an inner (dominated) scope would to its entry, the inner
1487 // scope may see the instruction already hoisted, in which case it
1488 // potentially wrong for the inner scope to hoist it and could cause bad
1489 // IR (non-dominating def), but safe to skip hoisting it instead because
1490 // it's already in a block that dominates the inner scope.
1491 return;
1492 for (Value *Op : I->operands()) {
1493 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1494 }
1495 I->moveBefore(HoistPoint);
1496 HoistedSet.insert(I);
1497 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1498 }
1499 }
1500
1501 // Hoist the dependent condition values of the branches and the selects in the
1502 // scope to the insert point.
hoistScopeConditions(CHRScope * Scope,Instruction * HoistPoint,DenseSet<PHINode * > & TrivialPHIs,DominatorTree & DT)1503 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1504 DenseSet<PHINode *> &TrivialPHIs,
1505 DominatorTree &DT) {
1506 DenseSet<Instruction *> HoistedSet;
1507 for (const RegInfo &RI : Scope->CHRRegions) {
1508 Region *R = RI.R;
1509 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1510 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1511 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1512 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1513 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1514 HoistedSet, TrivialPHIs, DT);
1515 }
1516 for (SelectInst *SI : RI.Selects) {
1517 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1518 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1519 if (!(IsTrueBiased || IsFalseBiased))
1520 continue;
1521 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1522 HoistedSet, TrivialPHIs, DT);
1523 }
1524 }
1525 }
1526
1527 // Negate the predicate if an ICmp if it's used only by branches or selects by
1528 // swapping the operands of the branches or the selects. Returns true if success.
negateICmpIfUsedByBranchOrSelectOnly(ICmpInst * ICmp,Instruction * ExcludedUser,CHRScope * Scope)1529 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1530 Instruction *ExcludedUser,
1531 CHRScope *Scope) {
1532 for (User *U : ICmp->users()) {
1533 if (U == ExcludedUser)
1534 continue;
1535 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1536 continue;
1537 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1538 continue;
1539 return false;
1540 }
1541 for (User *U : ICmp->users()) {
1542 if (U == ExcludedUser)
1543 continue;
1544 if (auto *BI = dyn_cast<BranchInst>(U)) {
1545 assert(BI->isConditional() && "Must be conditional");
1546 BI->swapSuccessors();
1547 // Don't need to swap this in terms of
1548 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1549 // mean whehter the branch is likely go into the if-then rather than
1550 // successor0/successor1 and because we can tell which edge is the then or
1551 // the else one by comparing the destination to the region exit block.
1552 continue;
1553 }
1554 if (auto *SI = dyn_cast<SelectInst>(U)) {
1555 // Swap operands
1556 SI->swapValues();
1557 SI->swapProfMetadata();
1558 if (Scope->TrueBiasedSelects.count(SI)) {
1559 assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1560 "Must not be already in");
1561 Scope->FalseBiasedSelects.insert(SI);
1562 } else if (Scope->FalseBiasedSelects.count(SI)) {
1563 assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1564 "Must not be already in");
1565 Scope->TrueBiasedSelects.insert(SI);
1566 }
1567 continue;
1568 }
1569 llvm_unreachable("Must be a branch or a select");
1570 }
1571 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1572 return true;
1573 }
1574
1575 // A helper for transformScopes. Insert a trivial phi at the scope exit block
1576 // for a value that's defined in the scope but used outside it (meaning it's
1577 // alive at the exit block).
insertTrivialPHIs(CHRScope * Scope,BasicBlock * EntryBlock,BasicBlock * ExitBlock,DenseSet<PHINode * > & TrivialPHIs)1578 static void insertTrivialPHIs(CHRScope *Scope,
1579 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1580 DenseSet<PHINode *> &TrivialPHIs) {
1581 DenseSet<BasicBlock *> BlocksInScopeSet;
1582 SmallVector<BasicBlock *, 8> BlocksInScopeVec;
1583 for (RegInfo &RI : Scope->RegInfos) {
1584 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1585 // sub-Scopes.
1586 BlocksInScopeSet.insert(BB);
1587 BlocksInScopeVec.push_back(BB);
1588 }
1589 }
1590 CHR_DEBUG(
1591 dbgs() << "Inserting redudant phis\n";
1592 for (BasicBlock *BB : BlocksInScopeVec) {
1593 dbgs() << "BlockInScope " << BB->getName() << "\n";
1594 });
1595 for (BasicBlock *BB : BlocksInScopeVec) {
1596 for (Instruction &I : *BB) {
1597 SmallVector<Instruction *, 8> Users;
1598 for (User *U : I.users()) {
1599 if (auto *UI = dyn_cast<Instruction>(U)) {
1600 if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
1601 // Unless there's already a phi for I at the exit block.
1602 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1603 CHR_DEBUG(dbgs() << "V " << I << "\n");
1604 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1605 Users.push_back(UI);
1606 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1607 // There's a loop backedge from a block that's dominated by this
1608 // scope to the entry block.
1609 CHR_DEBUG(dbgs() << "V " << I << "\n");
1610 CHR_DEBUG(dbgs()
1611 << "Used at entry block (for a back edge) by a phi user "
1612 << *UI << "\n");
1613 Users.push_back(UI);
1614 }
1615 }
1616 }
1617 if (Users.size() > 0) {
1618 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1619 // ExitBlock. Replace I with the new phi in UI unless UI is another
1620 // phi at ExitBlock.
1621 unsigned PredCount = std::distance(pred_begin(ExitBlock),
1622 pred_end(ExitBlock));
1623 PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
1624 &ExitBlock->front());
1625 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1626 PN->addIncoming(&I, Pred);
1627 }
1628 TrivialPHIs.insert(PN);
1629 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1630 for (Instruction *UI : Users) {
1631 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1632 if (UI->getOperand(J) == &I) {
1633 UI->setOperand(J, PN);
1634 }
1635 }
1636 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1637 }
1638 }
1639 }
1640 }
1641 }
1642
1643 // Assert that all the CHR regions of the scope have a biased branch or select.
1644 static void LLVM_ATTRIBUTE_UNUSED
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope * Scope)1645 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1646 #ifndef NDEBUG
1647 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1648 if (Scope->TrueBiasedRegions.count(RI.R) ||
1649 Scope->FalseBiasedRegions.count(RI.R))
1650 return true;
1651 for (SelectInst *SI : RI.Selects)
1652 if (Scope->TrueBiasedSelects.count(SI) ||
1653 Scope->FalseBiasedSelects.count(SI))
1654 return true;
1655 return false;
1656 };
1657 for (RegInfo &RI : Scope->CHRRegions) {
1658 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1659 "Must have biased branch or select");
1660 }
1661 #endif
1662 }
1663
1664 // Assert that all the condition values of the biased branches and selects have
1665 // been hoisted to the pre-entry block or outside of the scope.
assertBranchOrSelectConditionHoisted(CHRScope * Scope,BasicBlock * PreEntryBlock)1666 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1667 CHRScope *Scope, BasicBlock *PreEntryBlock) {
1668 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1669 for (RegInfo &RI : Scope->CHRRegions) {
1670 Region *R = RI.R;
1671 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1672 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1673 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1674 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1675 Value *V = BI->getCondition();
1676 CHR_DEBUG(dbgs() << *V << "\n");
1677 if (auto *I = dyn_cast<Instruction>(V)) {
1678 (void)(I); // Unused in release build.
1679 assert((I->getParent() == PreEntryBlock ||
1680 !Scope->contains(I)) &&
1681 "Must have been hoisted to PreEntryBlock or outside the scope");
1682 }
1683 }
1684 for (SelectInst *SI : RI.Selects) {
1685 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1686 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1687 if (!(IsTrueBiased || IsFalseBiased))
1688 continue;
1689 Value *V = SI->getCondition();
1690 CHR_DEBUG(dbgs() << *V << "\n");
1691 if (auto *I = dyn_cast<Instruction>(V)) {
1692 (void)(I); // Unused in release build.
1693 assert((I->getParent() == PreEntryBlock ||
1694 !Scope->contains(I)) &&
1695 "Must have been hoisted to PreEntryBlock or outside the scope");
1696 }
1697 }
1698 }
1699 }
1700
transformScopes(CHRScope * Scope,DenseSet<PHINode * > & TrivialPHIs)1701 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1702 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1703
1704 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1705 Region *FirstRegion = Scope->RegInfos[0].R;
1706 BasicBlock *EntryBlock = FirstRegion->getEntry();
1707 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1708 BasicBlock *ExitBlock = LastRegion->getExit();
1709 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1710
1711 if (ExitBlock) {
1712 // Insert a trivial phi at the exit block (where the CHR hot path and the
1713 // cold path merges) for a value that's defined in the scope but used
1714 // outside it (meaning it's alive at the exit block). We will add the
1715 // incoming values for the CHR cold paths to it below. Without this, we'd
1716 // miss updating phi's for such values unless there happens to already be a
1717 // phi for that value there.
1718 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1719 }
1720
1721 // Split the entry block of the first region. The new block becomes the new
1722 // entry block of the first region. The old entry block becomes the block to
1723 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1724 // through the split, we update the entry of the first region after the split,
1725 // and Region only points to the entry and the exit blocks, rather than
1726 // keeping everything in a list or set, the blocks membership and the
1727 // entry/exit blocks of the region are still valid after the split.
1728 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1729 << " at " << *Scope->BranchInsertPoint << "\n");
1730 BasicBlock *NewEntryBlock =
1731 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1732 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1733 "NewEntryBlock's only pred must be EntryBlock");
1734 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1735 BasicBlock *PreEntryBlock = EntryBlock;
1736
1737 ValueToValueMapTy VMap;
1738 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1739 // hot path (originals) and a cold path (clones) and update the PHIs at the
1740 // exit block.
1741 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1742
1743 // Replace the old (placeholder) branch with the new (merged) conditional
1744 // branch.
1745 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1746 NewEntryBlock, VMap);
1747
1748 #ifndef NDEBUG
1749 assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1750 #endif
1751
1752 // Hoist the conditional values of the branches/selects.
1753 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1754
1755 #ifndef NDEBUG
1756 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1757 #endif
1758
1759 // Create the combined branch condition and constant-fold the branches/selects
1760 // in the hot path.
1761 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1762 ProfileCount ? ProfileCount.getValue() : 0);
1763 }
1764
1765 // A helper for transformScopes. Clone the blocks in the scope (excluding the
1766 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1767 // at the exit block.
cloneScopeBlocks(CHRScope * Scope,BasicBlock * PreEntryBlock,BasicBlock * ExitBlock,Region * LastRegion,ValueToValueMapTy & VMap)1768 void CHR::cloneScopeBlocks(CHRScope *Scope,
1769 BasicBlock *PreEntryBlock,
1770 BasicBlock *ExitBlock,
1771 Region *LastRegion,
1772 ValueToValueMapTy &VMap) {
1773 // Clone all the blocks. The original blocks will be the hot-path
1774 // CHR-optimized code and the cloned blocks will be the original unoptimized
1775 // code. This is so that the block pointers from the
1776 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1777 // which CHR should apply to.
1778 SmallVector<BasicBlock*, 8> NewBlocks;
1779 for (RegInfo &RI : Scope->RegInfos)
1780 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1781 // sub-Scopes.
1782 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1783 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1784 NewBlocks.push_back(NewBB);
1785 VMap[BB] = NewBB;
1786 }
1787
1788 // Place the cloned blocks right after the original blocks (right before the
1789 // exit block of.)
1790 if (ExitBlock)
1791 F.getBasicBlockList().splice(ExitBlock->getIterator(),
1792 F.getBasicBlockList(),
1793 NewBlocks[0]->getIterator(), F.end());
1794
1795 // Update the cloned blocks/instructions to refer to themselves.
1796 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1797 for (Instruction &I : *NewBlocks[i])
1798 RemapInstruction(&I, VMap,
1799 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1800
1801 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1802 // the top-level region but we don't need to add PHIs. The trivial PHIs
1803 // inserted above will be updated here.
1804 if (ExitBlock)
1805 for (PHINode &PN : ExitBlock->phis())
1806 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1807 ++I) {
1808 BasicBlock *Pred = PN.getIncomingBlock(I);
1809 if (LastRegion->contains(Pred)) {
1810 Value *V = PN.getIncomingValue(I);
1811 auto It = VMap.find(V);
1812 if (It != VMap.end()) V = It->second;
1813 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1814 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1815 }
1816 }
1817 }
1818
1819 // A helper for transformScope. Replace the old (placeholder) branch with the
1820 // new (merged) conditional branch.
createMergedBranch(BasicBlock * PreEntryBlock,BasicBlock * EntryBlock,BasicBlock * NewEntryBlock,ValueToValueMapTy & VMap)1821 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1822 BasicBlock *EntryBlock,
1823 BasicBlock *NewEntryBlock,
1824 ValueToValueMapTy &VMap) {
1825 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1826 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1827 "SplitBlock did not work correctly!");
1828 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1829 "NewEntryBlock's only pred must be EntryBlock");
1830 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1831 "NewEntryBlock must have been copied");
1832 OldBR->dropAllReferences();
1833 OldBR->eraseFromParent();
1834 // The true predicate is a placeholder. It will be replaced later in
1835 // fixupBranchesAndSelects().
1836 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1837 cast<BasicBlock>(VMap[NewEntryBlock]),
1838 ConstantInt::getTrue(F.getContext()));
1839 PreEntryBlock->getInstList().push_back(NewBR);
1840 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1841 "NewEntryBlock's only pred must be EntryBlock");
1842 return NewBR;
1843 }
1844
1845 // A helper for transformScopes. Create the combined branch condition and
1846 // constant-fold the branches/selects in the hot path.
fixupBranchesAndSelects(CHRScope * Scope,BasicBlock * PreEntryBlock,BranchInst * MergedBR,uint64_t ProfileCount)1847 void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1848 BasicBlock *PreEntryBlock,
1849 BranchInst *MergedBR,
1850 uint64_t ProfileCount) {
1851 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1852 BranchProbability CHRBranchBias(1, 1);
1853 uint64_t NumCHRedBranches = 0;
1854 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1855 for (RegInfo &RI : Scope->CHRRegions) {
1856 Region *R = RI.R;
1857 if (RI.HasBranch) {
1858 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1859 ++NumCHRedBranches;
1860 }
1861 for (SelectInst *SI : RI.Selects) {
1862 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1863 ++NumCHRedBranches;
1864 }
1865 }
1866 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1867 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1868 ORE.emit([&]() {
1869 return OptimizationRemark(DEBUG_TYPE,
1870 "CHR",
1871 // Refer to the hot (original) path
1872 MergedBR->getSuccessor(0)->getTerminator())
1873 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1874 << " branches or selects";
1875 });
1876 MergedBR->setCondition(MergedCondition);
1877 SmallVector<uint32_t, 2> Weights;
1878 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
1879 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
1880 MDBuilder MDB(F.getContext());
1881 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1882 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1883 << "\n");
1884 }
1885
1886 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1887 // and constant-fold a branch in the hot path.
fixupBranch(Region * R,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition,BranchProbability & CHRBranchBias)1888 void CHR::fixupBranch(Region *R, CHRScope *Scope,
1889 IRBuilder<> &IRB,
1890 Value *&MergedCondition,
1891 BranchProbability &CHRBranchBias) {
1892 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1893 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1894 "Must be truthy or falsy");
1895 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1896 assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1897 "Must be in the bias map");
1898 BranchProbability Bias = BranchBiasMap[R];
1899 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1900 // Take the min.
1901 if (CHRBranchBias > Bias)
1902 CHRBranchBias = Bias;
1903 BasicBlock *IfThen = BI->getSuccessor(1);
1904 BasicBlock *IfElse = BI->getSuccessor(0);
1905 BasicBlock *RegionExitBlock = R->getExit();
1906 assert(RegionExitBlock && "Null ExitBlock");
1907 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1908 IfThen != IfElse && "Invariant from findScopes");
1909 if (IfThen == RegionExitBlock) {
1910 // Swap them so that IfThen means going into it and IfElse means skipping
1911 // it.
1912 std::swap(IfThen, IfElse);
1913 }
1914 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1915 << " IfElse " << IfElse->getName() << "\n");
1916 Value *Cond = BI->getCondition();
1917 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1918 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1919 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1920 MergedCondition);
1921 // Constant-fold the branch at ClonedEntryBlock.
1922 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1923 "The successor shouldn't change");
1924 Value *NewCondition = ConditionTrue ?
1925 ConstantInt::getTrue(F.getContext()) :
1926 ConstantInt::getFalse(F.getContext());
1927 BI->setCondition(NewCondition);
1928 }
1929
1930 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1931 // and constant-fold a select in the hot path.
fixupSelect(SelectInst * SI,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition,BranchProbability & CHRBranchBias)1932 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1933 IRBuilder<> &IRB,
1934 Value *&MergedCondition,
1935 BranchProbability &CHRBranchBias) {
1936 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1937 assert((IsTrueBiased ||
1938 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1939 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1940 "Must be in the bias map");
1941 BranchProbability Bias = SelectBiasMap[SI];
1942 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1943 // Take the min.
1944 if (CHRBranchBias > Bias)
1945 CHRBranchBias = Bias;
1946 Value *Cond = SI->getCondition();
1947 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1948 MergedCondition);
1949 Value *NewCondition = IsTrueBiased ?
1950 ConstantInt::getTrue(F.getContext()) :
1951 ConstantInt::getFalse(F.getContext());
1952 SI->setCondition(NewCondition);
1953 }
1954
1955 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1956 // condition.
addToMergedCondition(bool IsTrueBiased,Value * Cond,Instruction * BranchOrSelect,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition)1957 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1958 Instruction *BranchOrSelect,
1959 CHRScope *Scope,
1960 IRBuilder<> &IRB,
1961 Value *&MergedCondition) {
1962 if (IsTrueBiased) {
1963 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1964 } else {
1965 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1966 // branch, negate the icmp predicate and swap the branch targets and avoid
1967 // inserting an Xor to negate Cond.
1968 bool Done = false;
1969 if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1970 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1971 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1972 Done = true;
1973 }
1974 if (!Done) {
1975 Value *Negate = IRB.CreateXor(
1976 ConstantInt::getTrue(F.getContext()), Cond);
1977 MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1978 }
1979 }
1980 }
1981
transformScopes(SmallVectorImpl<CHRScope * > & CHRScopes)1982 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1983 unsigned I = 0;
1984 DenseSet<PHINode *> TrivialPHIs;
1985 for (CHRScope *Scope : CHRScopes) {
1986 transformScopes(Scope, TrivialPHIs);
1987 CHR_DEBUG(
1988 std::ostringstream oss;
1989 oss << " after transformScopes " << I++;
1990 dumpIR(F, oss.str().c_str(), nullptr));
1991 (void)I;
1992 }
1993 }
1994
1995 static void LLVM_ATTRIBUTE_UNUSED
dumpScopes(SmallVectorImpl<CHRScope * > & Scopes,const char * Label)1996 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1997 dbgs() << Label << " " << Scopes.size() << "\n";
1998 for (CHRScope *Scope : Scopes) {
1999 dbgs() << *Scope << "\n";
2000 }
2001 }
2002
run()2003 bool CHR::run() {
2004 if (!shouldApply(F, PSI))
2005 return false;
2006
2007 CHR_DEBUG(dumpIR(F, "before", nullptr));
2008
2009 bool Changed = false;
2010 {
2011 CHR_DEBUG(
2012 dbgs() << "RegionInfo:\n";
2013 RI.print(dbgs()));
2014
2015 // Recursively traverse the region tree and find regions that have biased
2016 // branches and/or selects and create scopes.
2017 SmallVector<CHRScope *, 8> AllScopes;
2018 findScopes(AllScopes);
2019 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2020
2021 // Split the scopes if 1) the conditiona values of the biased
2022 // branches/selects of the inner/lower scope can't be hoisted up to the
2023 // outermost/uppermost scope entry, or 2) the condition values of the biased
2024 // branches/selects in a scope (including subscopes) don't share at least
2025 // one common value.
2026 SmallVector<CHRScope *, 8> SplitScopes;
2027 splitScopes(AllScopes, SplitScopes);
2028 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2029
2030 // After splitting, set the biased regions and selects of a scope (a tree
2031 // root) that include those of the subscopes.
2032 classifyBiasedScopes(SplitScopes);
2033 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2034
2035 // Filter out the scopes that has only one biased region or select (CHR
2036 // isn't useful in such a case).
2037 SmallVector<CHRScope *, 8> FilteredScopes;
2038 filterScopes(SplitScopes, FilteredScopes);
2039 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2040
2041 // Set the regions to be CHR'ed and their hoist stops for each scope.
2042 SmallVector<CHRScope *, 8> SetScopes;
2043 setCHRRegions(FilteredScopes, SetScopes);
2044 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2045
2046 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2047 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2048 // to the hot path, rather than both hot and cold paths.
2049 SmallVector<CHRScope *, 8> SortedScopes;
2050 sortScopes(SetScopes, SortedScopes);
2051 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2052
2053 CHR_DEBUG(
2054 dbgs() << "RegionInfo:\n";
2055 RI.print(dbgs()));
2056
2057 // Apply the CHR transformation.
2058 if (!SortedScopes.empty()) {
2059 transformScopes(SortedScopes);
2060 Changed = true;
2061 }
2062 }
2063
2064 if (Changed) {
2065 CHR_DEBUG(dumpIR(F, "after", &Stats));
2066 ORE.emit([&]() {
2067 return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2068 << ore::NV("Function", &F) << " "
2069 << "Reduced the number of branches in hot paths by "
2070 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2071 << " (static) and "
2072 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2073 << " (weighted by PGO count)";
2074 });
2075 }
2076
2077 return Changed;
2078 }
2079
runOnFunction(Function & F)2080 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
2081 BlockFrequencyInfo &BFI =
2082 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
2083 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2084 ProfileSummaryInfo &PSI =
2085 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2086 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
2087 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
2088 std::make_unique<OptimizationRemarkEmitter>(&F);
2089 return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
2090 }
2091
2092 namespace llvm {
2093
ControlHeightReductionPass()2094 ControlHeightReductionPass::ControlHeightReductionPass() {
2095 parseCHRFilterFiles();
2096 }
2097
run(Function & F,FunctionAnalysisManager & FAM)2098 PreservedAnalyses ControlHeightReductionPass::run(
2099 Function &F,
2100 FunctionAnalysisManager &FAM) {
2101 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2102 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2103 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2104 auto &MAM = MAMProxy.getManager();
2105 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2106 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2107 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2108 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2109 if (!Changed)
2110 return PreservedAnalyses::all();
2111 auto PA = PreservedAnalyses();
2112 PA.preserve<GlobalsAA>();
2113 return PA;
2114 }
2115
2116 } // namespace llvm
2117