1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #define DEBUG_TYPE "bypass-slow-division"
19 #include "llvm/Instructions.h"
20 #include "llvm/Function.h"
21 #include "llvm/IRBuilder.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
24
25 using namespace llvm;
26
27 namespace {
28 struct DivOpInfo {
29 bool SignedOp;
30 Value *Dividend;
31 Value *Divisor;
32
DivOpInfo__anon48b5b4120111::DivOpInfo33 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
34 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
35 };
36
37 struct DivPhiNodes {
38 PHINode *Quotient;
39 PHINode *Remainder;
40
DivPhiNodes__anon48b5b4120111::DivPhiNodes41 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
42 : Quotient(InQuotient), Remainder(InRemainder) {}
43 };
44 }
45
46 namespace llvm {
47 template<>
48 struct DenseMapInfo<DivOpInfo> {
isEqualllvm::DenseMapInfo49 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
50 return Val1.SignedOp == Val2.SignedOp &&
51 Val1.Dividend == Val2.Dividend &&
52 Val1.Divisor == Val2.Divisor;
53 }
54
getEmptyKeyllvm::DenseMapInfo55 static DivOpInfo getEmptyKey() {
56 return DivOpInfo(false, 0, 0);
57 }
58
getTombstoneKeyllvm::DenseMapInfo59 static DivOpInfo getTombstoneKey() {
60 return DivOpInfo(true, 0, 0);
61 }
62
getHashValuellvm::DenseMapInfo63 static unsigned getHashValue(const DivOpInfo &Val) {
64 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
65 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
66 (unsigned)Val.SignedOp;
67 }
68 };
69
70 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
71 }
72
73 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
74 // value of the operands and uses a shorter-faster div/rem instruction when
75 // possible and the longer-slower div/rem instruction otherwise.
insertFastDiv(Function & F,Function::iterator & I,BasicBlock::iterator & J,IntegerType * BypassType,bool UseDivOp,bool UseSignedOp,DivCacheTy & PerBBDivCache)76 static bool insertFastDiv(Function &F,
77 Function::iterator &I,
78 BasicBlock::iterator &J,
79 IntegerType *BypassType,
80 bool UseDivOp,
81 bool UseSignedOp,
82 DivCacheTy &PerBBDivCache) {
83 // Get instruction operands
84 Instruction *Instr = J;
85 Value *Dividend = Instr->getOperand(0);
86 Value *Divisor = Instr->getOperand(1);
87
88 if (isa<ConstantInt>(Divisor) ||
89 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
90 // Operations with immediate values should have
91 // been solved and replaced during compile time.
92 return false;
93 }
94
95 // Basic Block is split before divide
96 BasicBlock *MainBB = I;
97 BasicBlock *SuccessorBB = I->splitBasicBlock(J);
98 ++I; //advance iterator I to successorBB
99
100 // Add new basic block for slow divide operation
101 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
102 MainBB->getParent(), SuccessorBB);
103 SlowBB->moveBefore(SuccessorBB);
104 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
105 Value *SlowQuotientV;
106 Value *SlowRemainderV;
107 if (UseSignedOp) {
108 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
109 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
110 } else {
111 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
112 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
113 }
114 SlowBuilder.CreateBr(SuccessorBB);
115
116 // Add new basic block for fast divide operation
117 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
118 MainBB->getParent(), SuccessorBB);
119 FastBB->moveBefore(SlowBB);
120 IRBuilder<> FastBuilder(FastBB, FastBB->begin());
121 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
122 BypassType);
123 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
124 BypassType);
125
126 // udiv/urem because optimization only handles positive numbers
127 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
128 ShortDivisorV);
129 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
130 ShortDivisorV);
131 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
132 ShortQuotientV,
133 Dividend->getType());
134 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
135 ShortRemainderV,
136 Dividend->getType());
137 FastBuilder.CreateBr(SuccessorBB);
138
139 // Phi nodes for result of div and rem
140 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
141 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
142 QuoPhi->addIncoming(SlowQuotientV, SlowBB);
143 QuoPhi->addIncoming(FastQuotientV, FastBB);
144 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
145 RemPhi->addIncoming(SlowRemainderV, SlowBB);
146 RemPhi->addIncoming(FastRemainderV, FastBB);
147
148 // Replace Instr with appropriate phi node
149 if (UseDivOp)
150 Instr->replaceAllUsesWith(QuoPhi);
151 else
152 Instr->replaceAllUsesWith(RemPhi);
153 Instr->eraseFromParent();
154
155 // Combine operands into a single value with OR for value testing below
156 MainBB->getInstList().back().eraseFromParent();
157 IRBuilder<> MainBuilder(MainBB, MainBB->end());
158 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
159
160 // BitMask is inverted to check if the operands are
161 // larger than the bypass type
162 uint64_t BitMask = ~BypassType->getBitMask();
163 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
164
165 // Compare operand values and branch
166 Value *ZeroV = MainBuilder.getInt32(0);
167 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
168 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
169
170 // point iterator J at first instruction of successorBB
171 J = I->begin();
172
173 // Cache phi nodes to be used later in place of other instances
174 // of div or rem with the same sign, dividend, and divisor
175 DivOpInfo Key(UseSignedOp, Dividend, Divisor);
176 DivPhiNodes Value(QuoPhi, RemPhi);
177 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
178 return true;
179 }
180
181 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
182 // operands and operation are identical. Otherwise call insertFastDiv to perform
183 // the optimization and cache the resulting dividend and remainder.
reuseOrInsertFastDiv(Function & F,Function::iterator & I,BasicBlock::iterator & J,IntegerType * BypassType,bool UseDivOp,bool UseSignedOp,DivCacheTy & PerBBDivCache)184 static bool reuseOrInsertFastDiv(Function &F,
185 Function::iterator &I,
186 BasicBlock::iterator &J,
187 IntegerType *BypassType,
188 bool UseDivOp,
189 bool UseSignedOp,
190 DivCacheTy &PerBBDivCache) {
191 // Get instruction operands
192 Instruction *Instr = J;
193 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
194 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
195
196 if (CacheI == PerBBDivCache.end()) {
197 // If previous instance does not exist, insert fast div
198 return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
199 PerBBDivCache);
200 }
201
202 // Replace operation value with previously generated phi node
203 DivPhiNodes &Value = CacheI->second;
204 if (UseDivOp) {
205 // Replace all uses of div instruction with quotient phi node
206 J->replaceAllUsesWith(Value.Quotient);
207 } else {
208 // Replace all uses of rem instruction with remainder phi node
209 J->replaceAllUsesWith(Value.Remainder);
210 }
211
212 // Advance to next operation
213 ++J;
214
215 // Remove redundant operation
216 Instr->eraseFromParent();
217 return true;
218 }
219
220 // bypassSlowDivision - This optimization identifies DIV instructions that can
221 // be profitably bypassed and carried out with a shorter, faster divide.
bypassSlowDivision(Function & F,Function::iterator & I,const DenseMap<Type *,Type * > & BypassTypeMap)222 bool llvm::bypassSlowDivision(Function &F,
223 Function::iterator &I,
224 const DenseMap<Type *, Type *> &BypassTypeMap) {
225 DivCacheTy DivCache;
226
227 bool MadeChange = false;
228 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
229
230 // Get instruction details
231 unsigned Opcode = J->getOpcode();
232 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
233 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
234 bool UseSignedOp = Opcode == Instruction::SDiv ||
235 Opcode == Instruction::SRem;
236
237 // Only optimize div or rem ops
238 if (!UseDivOp && !UseRemOp)
239 continue;
240
241 // Continue if div/rem type is not bypassed
242 DenseMap<Type *, Type *>::const_iterator BT =
243 BypassTypeMap.find(J->getType());
244 if (BT == BypassTypeMap.end())
245 continue;
246
247 IntegerType *BypassType = cast<IntegerType>(BT->second);
248 MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp,
249 UseSignedOp, DivCache);
250 }
251
252 return MadeChange;
253 }
254