1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to create scalar loops and orchestrate the
10 // creation of parallel loops as LLVM-IR.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "polly/CodeGen/LoopGenerators.h"
15 #include "polly/Options.h"
16 #include "polly/ScopDetection.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/IR/DataLayout.h"
19 #include "llvm/IR/Dominators.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
23
24 using namespace llvm;
25 using namespace polly;
26
27 int polly::PollyNumThreads;
28 OMPGeneralSchedulingType polly::PollyScheduling;
29 int polly::PollyChunkSize;
30
31 static cl::opt<int, true>
32 XPollyNumThreads("polly-num-threads",
33 cl::desc("Number of threads to use (0 = auto)"),
34 cl::Hidden, cl::location(polly::PollyNumThreads),
35 cl::init(0), cl::cat(PollyCategory));
36
37 static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
38 "polly-scheduling",
39 cl::desc("Scheduling type of parallel OpenMP for loops"),
40 cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static",
41 "Static scheduling"),
42 clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic",
43 "Dynamic scheduling"),
44 clEnumValN(OMPGeneralSchedulingType::Guided, "guided",
45 "Guided scheduling"),
46 clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime",
47 "Runtime determined (OMP_SCHEDULE)")),
48 cl::Hidden, cl::location(polly::PollyScheduling),
49 cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional,
50 cl::cat(PollyCategory));
51
52 static cl::opt<int, true>
53 XPollyChunkSize("polly-scheduling-chunksize",
54 cl::desc("Chunksize to use by the OpenMP runtime calls"),
55 cl::Hidden, cl::location(polly::PollyChunkSize),
56 cl::init(0), cl::Optional, cl::cat(PollyCategory));
57
58 // We generate a loop of either of the following structures:
59 //
60 // BeforeBB BeforeBB
61 // | |
62 // v v
63 // GuardBB PreHeaderBB
64 // / | | _____
65 // __ PreHeaderBB | v \/ |
66 // / \ / | HeaderBB latch
67 // latch HeaderBB | |\ |
68 // \ / \ / | \------/
69 // < \ / |
70 // \ / v
71 // ExitBB ExitBB
72 //
73 // depending on whether or not we know that it is executed at least once. If
74 // not, GuardBB checks if the loop is executed at least once. If this is the
75 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
76 // contains the loop iv 'polly.indvar', the incremented loop iv
77 // 'polly.indvar_next' as well as the condition to check if we execute another
78 // iteration of the loop. After the loop has finished, we branch to ExitBB.
79 // We expect the type of UB, LB, UB+Stride to be large enough for values that
80 // UB may take throughout the execution of the loop, including the computation
81 // of indvar + Stride before the final abort.
createLoop(Value * LB,Value * UB,Value * Stride,PollyIRBuilder & Builder,LoopInfo & LI,DominatorTree & DT,BasicBlock * & ExitBB,ICmpInst::Predicate Predicate,ScopAnnotator * Annotator,bool Parallel,bool UseGuard,bool LoopVectDisabled)82 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
83 PollyIRBuilder &Builder, LoopInfo &LI,
84 DominatorTree &DT, BasicBlock *&ExitBB,
85 ICmpInst::Predicate Predicate,
86 ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
87 bool LoopVectDisabled) {
88 Function *F = Builder.GetInsertBlock()->getParent();
89 LLVMContext &Context = F->getContext();
90
91 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
92 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
93 assert(LoopIVType && "UB is not integer?");
94
95 BasicBlock *BeforeBB = Builder.GetInsertBlock();
96 BasicBlock *GuardBB =
97 UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
98 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
99 BasicBlock *PreHeaderBB =
100 BasicBlock::Create(Context, "polly.loop_preheader", F);
101
102 // Update LoopInfo
103 Loop *OuterLoop = LI.getLoopFor(BeforeBB);
104 Loop *NewLoop = LI.AllocateLoop();
105
106 if (OuterLoop)
107 OuterLoop->addChildLoop(NewLoop);
108 else
109 LI.addTopLevelLoop(NewLoop);
110
111 if (OuterLoop) {
112 if (GuardBB)
113 OuterLoop->addBasicBlockToLoop(GuardBB, LI);
114 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
115 }
116
117 NewLoop->addBasicBlockToLoop(HeaderBB, LI);
118
119 // Notify the annotator (if present) that we have a new loop, but only
120 // after the header block is set.
121 if (Annotator)
122 Annotator->pushLoop(NewLoop, Parallel);
123
124 // ExitBB
125 ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
126 ExitBB->setName("polly.loop_exit");
127
128 // BeforeBB
129 if (GuardBB) {
130 BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
131 DT.addNewBlock(GuardBB, BeforeBB);
132
133 // GuardBB
134 Builder.SetInsertPoint(GuardBB);
135 Value *LoopGuard;
136 LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
137 LoopGuard->setName("polly.loop_guard");
138 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
139 DT.addNewBlock(PreHeaderBB, GuardBB);
140 } else {
141 BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
142 DT.addNewBlock(PreHeaderBB, BeforeBB);
143 }
144
145 // PreHeaderBB
146 Builder.SetInsertPoint(PreHeaderBB);
147 Builder.CreateBr(HeaderBB);
148
149 // HeaderBB
150 DT.addNewBlock(HeaderBB, PreHeaderBB);
151 Builder.SetInsertPoint(HeaderBB);
152 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
153 IV->addIncoming(LB, PreHeaderBB);
154 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
155 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
156 Value *LoopCondition =
157 Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
158
159 // Create the loop latch and annotate it as such.
160 BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
161 if (Annotator)
162 Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
163
164 IV->addIncoming(IncrementedIV, HeaderBB);
165 if (GuardBB)
166 DT.changeImmediateDominator(ExitBB, GuardBB);
167 else
168 DT.changeImmediateDominator(ExitBB, HeaderBB);
169
170 // The loop body should be added here.
171 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
172 return IV;
173 }
174
createParallelLoop(Value * LB,Value * UB,Value * Stride,SetVector<Value * > & UsedValues,ValueMapT & Map,BasicBlock::iterator * LoopBody)175 Value *ParallelLoopGenerator::createParallelLoop(
176 Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
177 ValueMapT &Map, BasicBlock::iterator *LoopBody) {
178
179 AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
180 BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
181
182 Value *IV;
183 Function *SubFn;
184 std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map);
185 *LoopBody = Builder.GetInsertPoint();
186 Builder.SetInsertPoint(&*BeforeLoop);
187
188 Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
189 "polly.par.userContext");
190
191 // Add one as the upper bound provided by OpenMP is a < comparison
192 // whereas the codegenForSequential function creates a <= comparison.
193 UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
194
195 // Execute the prepared subfunction in parallel.
196 deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride);
197
198 return IV;
199 }
200
createSubFnDefinition()201 Function *ParallelLoopGenerator::createSubFnDefinition() {
202 Function *F = Builder.GetInsertBlock()->getParent();
203 Function *SubFn = prepareSubFnDefinition(F);
204
205 // Certain backends (e.g., NVPTX) do not support '.'s in function names.
206 // Hence, we ensure that all '.'s are replaced by '_'s.
207 std::string FunctionName = SubFn->getName().str();
208 std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
209 SubFn->setName(FunctionName);
210
211 // Do not run any polly pass on the new function.
212 SubFn->addFnAttr(PollySkipFnAttr);
213
214 return SubFn;
215 }
216
217 AllocaInst *
storeValuesIntoStruct(SetVector<Value * > & Values)218 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
219 SmallVector<Type *, 8> Members;
220
221 for (Value *V : Values)
222 Members.push_back(V->getType());
223
224 const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
225
226 // We do not want to allocate the alloca inside any loop, thus we allocate it
227 // in the entry block of the function and use annotations to denote the actual
228 // live span (similar to clang).
229 BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
230 Instruction *IP = &*EntryBB.getFirstInsertionPt();
231 StructType *Ty = StructType::get(Builder.getContext(), Members);
232 AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
233 "polly.par.userContext", IP);
234
235 for (unsigned i = 0; i < Values.size(); i++) {
236 Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
237 Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
238 Builder.CreateStore(Values[i], Address);
239 }
240
241 return Struct;
242 }
243
extractValuesFromStruct(SetVector<Value * > OldValues,Type * Ty,Value * Struct,ValueMapT & Map)244 void ParallelLoopGenerator::extractValuesFromStruct(
245 SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
246 for (unsigned i = 0; i < OldValues.size(); i++) {
247 Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
248 Value *NewValue = Builder.CreateLoad(Address);
249 NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
250 Map[OldValues[i]] = NewValue;
251 }
252 }
253