1 //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to create parallel loops as LLVM-IR.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
14 #include "llvm/IR/Dominators.h"
15 #include "llvm/IR/Module.h"
16
17 using namespace llvm;
18 using namespace polly;
19
createCallSpawnThreads(Value * SubFn,Value * SubFnParam,Value * LB,Value * UB,Value * Stride)20 void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn,
21 Value *SubFnParam,
22 Value *LB, Value *UB,
23 Value *Stride) {
24 const std::string Name = "GOMP_parallel_loop_runtime_start";
25
26 Function *F = M->getFunction(Name);
27
28 // If F is not available, declare it.
29 if (!F) {
30 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
31
32 Type *Params[] = {PointerType::getUnqual(FunctionType::get(
33 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
34 Builder.getInt8PtrTy(),
35 Builder.getInt32Ty(),
36 LongType,
37 LongType,
38 LongType};
39
40 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
41 F = Function::Create(Ty, Linkage, Name, M);
42 }
43
44 Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads),
45 LB, UB, Stride};
46
47 Builder.CreateCall(F, Args);
48 }
49
deployParallelExecution(Function * SubFn,Value * SubFnParam,Value * LB,Value * UB,Value * Stride)50 void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn,
51 Value *SubFnParam,
52 Value *LB, Value *UB,
53 Value *Stride) {
54 // Tell the runtime we start a parallel loop
55 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
56 Builder.CreateCall(SubFn, SubFnParam);
57 createCallJoinThreads();
58 }
59
prepareSubFnDefinition(Function * F) const60 Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const {
61 FunctionType *FT =
62 FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false);
63 Function *SubFn = Function::Create(FT, Function::InternalLinkage,
64 F->getName() + "_polly_subfn", M);
65 // Name the function's arguments
66 SubFn->arg_begin()->setName("polly.par.userContext");
67 return SubFn;
68 }
69
70 // Create a subfunction of the following (preliminary) structure:
71 //
72 // PrevBB
73 // |
74 // v
75 // HeaderBB
76 // | _____
77 // v v |
78 // CheckNextBB PreHeaderBB
79 // |\ |
80 // | \______/
81 // |
82 // v
83 // ExitBB
84 //
85 // HeaderBB will hold allocations and loading of variables.
86 // CheckNextBB will check for more work.
87 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
88 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
89 // ExitBB marks the end of the parallel execution.
90 std::tuple<Value *, Function *>
createSubFn(Value * Stride,AllocaInst * StructData,SetVector<Value * > Data,ValueMapT & Map)91 ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData,
92 SetVector<Value *> Data,
93 ValueMapT &Map) {
94 if (PollyScheduling != OMPGeneralSchedulingType::Runtime) {
95 // User tried to influence the scheduling type (currently not supported)
96 errs() << "warning: Polly's GNU OpenMP backend solely "
97 "supports the scheduling type 'runtime'.\n";
98 }
99
100 if (PollyChunkSize != 0) {
101 // User tried to influence the chunk size (currently not supported)
102 errs() << "warning: Polly's GNU OpenMP backend solely "
103 "supports the default chunk size.\n";
104 }
105
106 Function *SubFn = createSubFnDefinition();
107 LLVMContext &Context = SubFn->getContext();
108
109 // Store the previous basic block.
110 BasicBlock *PrevBB = Builder.GetInsertBlock();
111
112 // Create basic blocks.
113 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
114 BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
115 BasicBlock *CheckNextBB =
116 BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
117 BasicBlock *PreHeaderBB =
118 BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
119
120 DT.addNewBlock(HeaderBB, PrevBB);
121 DT.addNewBlock(ExitBB, HeaderBB);
122 DT.addNewBlock(CheckNextBB, HeaderBB);
123 DT.addNewBlock(PreHeaderBB, HeaderBB);
124
125 // Fill up basic block HeaderBB.
126 Builder.SetInsertPoint(HeaderBB);
127 Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
128 Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
129 Value *UserContext = Builder.CreateBitCast(
130 &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
131
132 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
133 Map);
134 Builder.CreateBr(CheckNextBB);
135
136 // Add code to check if another set of iterations will be executed.
137 Builder.SetInsertPoint(CheckNextBB);
138 Value *Next = createCallGetWorkItem(LBPtr, UBPtr);
139 Value *HasNextSchedule = Builder.CreateTrunc(
140 Next, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock");
141 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
142
143 // Add code to load the iv bounds for this set of iterations.
144 Builder.SetInsertPoint(PreHeaderBB);
145 Value *LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
146 Value *UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
147
148 // Subtract one as the upper bound provided by OpenMP is a < comparison
149 // whereas the codegenForSequential function creates a <= comparison.
150 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
151 "polly.par.UBAdjusted");
152
153 Builder.CreateBr(CheckNextBB);
154 Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
155 BasicBlock *AfterBB;
156 Value *IV =
157 createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
158 nullptr, true, /* UseGuard */ false);
159
160 BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
161
162 // Add code to terminate this subfunction.
163 Builder.SetInsertPoint(ExitBB);
164 createCallCleanupThread();
165 Builder.CreateRetVoid();
166
167 Builder.SetInsertPoint(&*LoopBody);
168
169 return std::make_tuple(IV, SubFn);
170 }
171
createCallGetWorkItem(Value * LBPtr,Value * UBPtr)172 Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr,
173 Value *UBPtr) {
174 const std::string Name = "GOMP_loop_runtime_next";
175
176 Function *F = M->getFunction(Name);
177
178 // If F is not available, declare it.
179 if (!F) {
180 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
181 Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
182 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
183 F = Function::Create(Ty, Linkage, Name, M);
184 }
185
186 Value *Args[] = {LBPtr, UBPtr};
187 Value *Return = Builder.CreateCall(F, Args);
188 Return = Builder.CreateICmpNE(
189 Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
190 return Return;
191 }
192
createCallJoinThreads()193 void ParallelLoopGeneratorGOMP::createCallJoinThreads() {
194 const std::string Name = "GOMP_parallel_end";
195
196 Function *F = M->getFunction(Name);
197
198 // If F is not available, declare it.
199 if (!F) {
200 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
201
202 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
203 F = Function::Create(Ty, Linkage, Name, M);
204 }
205
206 Builder.CreateCall(F, {});
207 }
208
createCallCleanupThread()209 void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
210 const std::string Name = "GOMP_loop_end_nowait";
211
212 Function *F = M->getFunction(Name);
213
214 // If F is not available, declare it.
215 if (!F) {
216 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
217
218 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
219 F = Function::Create(Ty, Linkage, Name, M);
220 }
221
222 Builder.CreateCall(F, {});
223 }
224