1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
16
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/IR/CFG.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/MDBuilder.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
26 #include "llvm/Transforms/Utils/CodeExtractor.h"
27
28 #include <sstream>
29
30 #define DEBUG_TYPE "openmp-ir-builder"
31
32 using namespace llvm;
33 using namespace omp;
34 using namespace types;
35
36 static cl::opt<bool>
37 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
38 cl::desc("Use optimistic attributes describing "
39 "'as-if' properties of runtime calls."),
40 cl::init(false));
41
addAttributes(omp::RuntimeFunction FnID,Function & Fn)42 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
43 LLVMContext &Ctx = Fn.getContext();
44
45 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
46 #include "llvm/Frontend/OpenMP/OMPKinds.def"
47
48 // Add attributes to the new declaration.
49 switch (FnID) {
50 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
51 case Enum: \
52 Fn.setAttributes( \
53 AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \
54 break;
55 #include "llvm/Frontend/OpenMP/OMPKinds.def"
56 default:
57 // Attributes are optional.
58 break;
59 }
60 }
61
getOrCreateRuntimeFunction(RuntimeFunction FnID)62 Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {
63 Function *Fn = nullptr;
64
65 // Try to find the declation in the module first.
66 switch (FnID) {
67 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
68 case Enum: \
69 Fn = M.getFunction(Str); \
70 break;
71 #include "llvm/Frontend/OpenMP/OMPKinds.def"
72 }
73
74 if (!Fn) {
75 // Create a new declaration if we need one.
76 switch (FnID) {
77 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
78 case Enum: \
79 Fn = Function::Create(FunctionType::get(ReturnType, \
80 ArrayRef<Type *>{__VA_ARGS__}, \
81 IsVarArg), \
82 GlobalValue::ExternalLinkage, Str, M); \
83 break;
84 #include "llvm/Frontend/OpenMP/OMPKinds.def"
85 }
86
87 addAttributes(FnID, *Fn);
88 }
89
90 assert(Fn && "Failed to create OpenMP runtime function");
91 return Fn;
92 }
93
initialize()94 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
95
getOrCreateIdent(Constant * SrcLocStr,IdentFlag LocFlags)96 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
97 IdentFlag LocFlags) {
98 // Enable "C-mode".
99 LocFlags |= OMP_IDENT_FLAG_KMPC;
100
101 GlobalVariable *&DefaultIdent = IdentMap[{SrcLocStr, uint64_t(LocFlags)}];
102 if (!DefaultIdent) {
103 Constant *I32Null = ConstantInt::getNullValue(Int32);
104 Constant *IdentData[] = {I32Null,
105 ConstantInt::get(Int32, uint64_t(LocFlags)),
106 I32Null, I32Null, SrcLocStr};
107 Constant *Initializer = ConstantStruct::get(
108 cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
109
110 // Look for existing encoding of the location + flags, not needed but
111 // minimizes the difference to the existing solution while we transition.
112 for (GlobalVariable &GV : M.getGlobalList())
113 if (GV.getType() == IdentPtr && GV.hasInitializer())
114 if (GV.getInitializer() == Initializer)
115 return DefaultIdent = &GV;
116
117 DefaultIdent = new GlobalVariable(M, IdentPtr->getPointerElementType(),
118 /* isConstant = */ false,
119 GlobalValue::PrivateLinkage, Initializer);
120 DefaultIdent->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
121 DefaultIdent->setAlignment(Align(8));
122 }
123 return DefaultIdent;
124 }
125
getOrCreateSrcLocStr(StringRef LocStr)126 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
127 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
128 if (!SrcLocStr) {
129 Constant *Initializer =
130 ConstantDataArray::getString(M.getContext(), LocStr);
131
132 // Look for existing encoding of the location, not needed but minimizes the
133 // difference to the existing solution while we transition.
134 for (GlobalVariable &GV : M.getGlobalList())
135 if (GV.isConstant() && GV.hasInitializer() &&
136 GV.getInitializer() == Initializer)
137 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
138
139 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr);
140 }
141 return SrcLocStr;
142 }
143
getOrCreateDefaultSrcLocStr()144 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
145 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
146 }
147
148 Constant *
getOrCreateSrcLocStr(const LocationDescription & Loc)149 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
150 DILocation *DIL = Loc.DL.get();
151 if (!DIL)
152 return getOrCreateDefaultSrcLocStr();
153 StringRef Filename =
154 !DIL->getFilename().empty() ? DIL->getFilename() : M.getName();
155 StringRef Function = DIL->getScope()->getSubprogram()->getName();
156 Function =
157 !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
158 std::string LineStr = std::to_string(DIL->getLine());
159 std::string ColumnStr = std::to_string(DIL->getColumn());
160 std::stringstream SrcLocStr;
161 SrcLocStr << ";" << Filename.data() << ";" << Function.data() << ";"
162 << LineStr << ";" << ColumnStr << ";;";
163 return getOrCreateSrcLocStr(SrcLocStr.str());
164 }
165
getOrCreateThreadID(Value * Ident)166 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
167 return Builder.CreateCall(
168 getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num), Ident,
169 "omp_global_thread_num");
170 }
171
172 OpenMPIRBuilder::InsertPointTy
CreateBarrier(const LocationDescription & Loc,Directive DK,bool ForceSimpleCall,bool CheckCancelFlag)173 OpenMPIRBuilder::CreateBarrier(const LocationDescription &Loc, Directive DK,
174 bool ForceSimpleCall, bool CheckCancelFlag) {
175 if (!updateToLocation(Loc))
176 return Loc.IP;
177 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
178 }
179
180 OpenMPIRBuilder::InsertPointTy
emitBarrierImpl(const LocationDescription & Loc,Directive Kind,bool ForceSimpleCall,bool CheckCancelFlag)181 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
182 bool ForceSimpleCall, bool CheckCancelFlag) {
183 // Build call __kmpc_cancel_barrier(loc, thread_id) or
184 // __kmpc_barrier(loc, thread_id);
185
186 IdentFlag BarrierLocFlags;
187 switch (Kind) {
188 case OMPD_for:
189 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
190 break;
191 case OMPD_sections:
192 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
193 break;
194 case OMPD_single:
195 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
196 break;
197 case OMPD_barrier:
198 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
199 break;
200 default:
201 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
202 break;
203 }
204
205 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
206 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
207 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
208
209 // If we are in a cancellable parallel region, barriers are cancellation
210 // points.
211 // TODO: Check why we would force simple calls or to ignore the cancel flag.
212 bool UseCancelBarrier =
213 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
214
215 Value *Result = Builder.CreateCall(
216 getOrCreateRuntimeFunction(UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
217 : OMPRTL___kmpc_barrier),
218 Args);
219
220 if (UseCancelBarrier && CheckCancelFlag)
221 emitCancelationCheckImpl(Result, OMPD_parallel);
222
223 return Builder.saveIP();
224 }
225
226 OpenMPIRBuilder::InsertPointTy
CreateCancel(const LocationDescription & Loc,Value * IfCondition,omp::Directive CanceledDirective)227 OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc,
228 Value *IfCondition,
229 omp::Directive CanceledDirective) {
230 if (!updateToLocation(Loc))
231 return Loc.IP;
232
233 // LLVM utilities like blocks with terminators.
234 auto *UI = Builder.CreateUnreachable();
235
236 Instruction *ThenTI = UI, *ElseTI = nullptr;
237 if (IfCondition)
238 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
239 Builder.SetInsertPoint(ThenTI);
240
241 Value *CancelKind = nullptr;
242 switch (CanceledDirective) {
243 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
244 case DirectiveEnum: \
245 CancelKind = Builder.getInt32(Value); \
246 break;
247 #include "llvm/Frontend/OpenMP/OMPKinds.def"
248 default:
249 llvm_unreachable("Unknown cancel kind!");
250 }
251
252 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
253 Value *Ident = getOrCreateIdent(SrcLocStr);
254 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
255 Value *Result = Builder.CreateCall(
256 getOrCreateRuntimeFunction(OMPRTL___kmpc_cancel), Args);
257
258 // The actual cancel logic is shared with others, e.g., cancel_barriers.
259 emitCancelationCheckImpl(Result, CanceledDirective);
260
261 // Update the insertion point and remove the terminator we introduced.
262 Builder.SetInsertPoint(UI->getParent());
263 UI->eraseFromParent();
264
265 return Builder.saveIP();
266 }
267
emitCancelationCheckImpl(Value * CancelFlag,omp::Directive CanceledDirective)268 void OpenMPIRBuilder::emitCancelationCheckImpl(
269 Value *CancelFlag, omp::Directive CanceledDirective) {
270 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
271 "Unexpected cancellation!");
272
273 // For a cancel barrier we create two new blocks.
274 BasicBlock *BB = Builder.GetInsertBlock();
275 BasicBlock *NonCancellationBlock;
276 if (Builder.GetInsertPoint() == BB->end()) {
277 // TODO: This branch will not be needed once we moved to the
278 // OpenMPIRBuilder codegen completely.
279 NonCancellationBlock = BasicBlock::Create(
280 BB->getContext(), BB->getName() + ".cont", BB->getParent());
281 } else {
282 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
283 BB->getTerminator()->eraseFromParent();
284 Builder.SetInsertPoint(BB);
285 }
286 BasicBlock *CancellationBlock = BasicBlock::Create(
287 BB->getContext(), BB->getName() + ".cncl", BB->getParent());
288
289 // Jump to them based on the return value.
290 Value *Cmp = Builder.CreateIsNull(CancelFlag);
291 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
292 /* TODO weight */ nullptr, nullptr);
293
294 // From the cancellation block we finalize all variables and go to the
295 // post finalization block that is known to the FiniCB callback.
296 Builder.SetInsertPoint(CancellationBlock);
297 auto &FI = FinalizationStack.back();
298 FI.FiniCB(Builder.saveIP());
299
300 // The continuation block is where code generation continues.
301 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
302 }
303
CreateParallel(const LocationDescription & Loc,BodyGenCallbackTy BodyGenCB,PrivatizeCallbackTy PrivCB,FinalizeCallbackTy FiniCB,Value * IfCondition,Value * NumThreads,omp::ProcBindKind ProcBind,bool IsCancellable)304 IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
305 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
306 PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
307 Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
308 if (!updateToLocation(Loc))
309 return Loc.IP;
310
311 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
312 Value *Ident = getOrCreateIdent(SrcLocStr);
313 Value *ThreadID = getOrCreateThreadID(Ident);
314
315 if (NumThreads) {
316 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
317 Value *Args[] = {
318 Ident, ThreadID,
319 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
320 Builder.CreateCall(
321 getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args);
322 }
323
324 if (ProcBind != OMP_PROC_BIND_default) {
325 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
326 Value *Args[] = {
327 Ident, ThreadID,
328 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
329 Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind),
330 Args);
331 }
332
333 BasicBlock *InsertBB = Builder.GetInsertBlock();
334 Function *OuterFn = InsertBB->getParent();
335
336 // Vector to remember instructions we used only during the modeling but which
337 // we want to delete at the end.
338 SmallVector<Instruction *, 4> ToBeDeleted;
339
340 Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
341 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
342 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
343
344 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
345 // program, otherwise we only need them for modeling purposes to get the
346 // associated arguments in the outlined function. In the former case,
347 // initialize the allocas properly, in the latter case, delete them later.
348 if (IfCondition) {
349 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
350 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
351 } else {
352 ToBeDeleted.push_back(TIDAddr);
353 ToBeDeleted.push_back(ZeroAddr);
354 }
355
356 // Create an artificial insertion point that will also ensure the blocks we
357 // are about to split are not degenerated.
358 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
359
360 Instruction *ThenTI = UI, *ElseTI = nullptr;
361 if (IfCondition)
362 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
363
364 BasicBlock *ThenBB = ThenTI->getParent();
365 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
366 BasicBlock *PRegBodyBB =
367 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
368 BasicBlock *PRegPreFiniBB =
369 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
370 BasicBlock *PRegExitBB =
371 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
372
373 auto FiniCBWrapper = [&](InsertPointTy IP) {
374 // Hide "open-ended" blocks from the given FiniCB by setting the right jump
375 // target to the region exit block.
376 if (IP.getBlock()->end() == IP.getPoint()) {
377 IRBuilder<>::InsertPointGuard IPG(Builder);
378 Builder.restoreIP(IP);
379 Instruction *I = Builder.CreateBr(PRegExitBB);
380 IP = InsertPointTy(I->getParent(), I->getIterator());
381 }
382 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
383 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
384 "Unexpected insertion point for finalization call!");
385 return FiniCB(IP);
386 };
387
388 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
389
390 // Generate the privatization allocas in the block that will become the entry
391 // of the outlined function.
392 InsertPointTy AllocaIP(PRegEntryBB,
393 PRegEntryBB->getTerminator()->getIterator());
394 Builder.restoreIP(AllocaIP);
395 AllocaInst *PrivTIDAddr =
396 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
397 Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
398
399 // Add some fake uses for OpenMP provided arguments.
400 ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
401 ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
402
403 // ThenBB
404 // |
405 // V
406 // PRegionEntryBB <- Privatization allocas are placed here.
407 // |
408 // V
409 // PRegionBodyBB <- BodeGen is invoked here.
410 // |
411 // V
412 // PRegPreFiniBB <- The block we will start finalization from.
413 // |
414 // V
415 // PRegionExitBB <- A common exit to simplify block collection.
416 //
417
418 LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
419
420 // Let the caller create the body.
421 assert(BodyGenCB && "Expected body generation callback!");
422 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
423 BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
424
425 LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
426
427 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
428 SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
429 ParallelRegionBlockSet.insert(PRegEntryBB);
430 ParallelRegionBlockSet.insert(PRegExitBB);
431
432 // Collect all blocks in-between PRegEntryBB and PRegExitBB.
433 Worklist.push_back(PRegEntryBB);
434 while (!Worklist.empty()) {
435 BasicBlock *BB = Worklist.pop_back_val();
436 ParallelRegionBlocks.push_back(BB);
437 for (BasicBlock *SuccBB : successors(BB))
438 if (ParallelRegionBlockSet.insert(SuccBB).second)
439 Worklist.push_back(SuccBB);
440 }
441
442 CodeExtractorAnalysisCache CEAC(*OuterFn);
443 CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
444 /* AggregateArgs */ false,
445 /* BlockFrequencyInfo */ nullptr,
446 /* BranchProbabilityInfo */ nullptr,
447 /* AssumptionCache */ nullptr,
448 /* AllowVarArgs */ true,
449 /* AllowAlloca */ true,
450 /* Suffix */ ".omp_par");
451
452 // Find inputs to, outputs from the code region.
453 BasicBlock *CommonExit = nullptr;
454 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
455 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
456 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
457
458 LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
459
460 FunctionCallee TIDRTLFn =
461 getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
462
463 auto PrivHelper = [&](Value &V) {
464 if (&V == TIDAddr || &V == ZeroAddr)
465 return;
466
467 SmallVector<Use *, 8> Uses;
468 for (Use &U : V.uses())
469 if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
470 if (ParallelRegionBlockSet.count(UserI->getParent()))
471 Uses.push_back(&U);
472
473 Value *ReplacementValue = nullptr;
474 CallInst *CI = dyn_cast<CallInst>(&V);
475 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
476 ReplacementValue = PrivTID;
477 } else {
478 Builder.restoreIP(
479 PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
480 assert(ReplacementValue &&
481 "Expected copy/create callback to set replacement value!");
482 if (ReplacementValue == &V)
483 return;
484 }
485
486 for (Use *UPtr : Uses)
487 UPtr->set(ReplacementValue);
488 };
489
490 for (Value *Input : Inputs) {
491 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
492 PrivHelper(*Input);
493 }
494 for (Value *Output : Outputs) {
495 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
496 PrivHelper(*Output);
497 }
498
499 LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
500 LLVM_DEBUG({
501 for (auto *BB : ParallelRegionBlocks)
502 dbgs() << " PBR: " << BB->getName() << "\n";
503 });
504
505 // Add some known attributes to the outlined function.
506 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
507 OutlinedFn->addParamAttr(0, Attribute::NoAlias);
508 OutlinedFn->addParamAttr(1, Attribute::NoAlias);
509 OutlinedFn->addFnAttr(Attribute::NoUnwind);
510 OutlinedFn->addFnAttr(Attribute::NoRecurse);
511
512 LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
513 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
514
515 // For compability with the clang CG we move the outlined function after the
516 // one with the parallel region.
517 OutlinedFn->removeFromParent();
518 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
519
520 // Remove the artificial entry introduced by the extractor right away, we
521 // made our own entry block after all.
522 {
523 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
524 assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
525 assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
526 PRegEntryBB->moveBefore(&ArtificialEntry);
527 ArtificialEntry.eraseFromParent();
528 }
529 LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
530 assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
531
532 assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
533 assert(OutlinedFn->arg_size() >= 2 &&
534 "Expected at least tid and bounded tid as arguments");
535 unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
536
537 CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
538 CI->getParent()->setName("omp_parallel");
539 Builder.SetInsertPoint(CI);
540
541 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
542 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
543 Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
544
545 SmallVector<Value *, 16> RealArgs;
546 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
547 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
548
549 FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
550 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
551 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
552 llvm::LLVMContext &Ctx = F->getContext();
553 MDBuilder MDB(Ctx);
554 // Annotate the callback behavior of the __kmpc_fork_call:
555 // - The callback callee is argument number 2 (microtask).
556 // - The first two arguments of the callback callee are unknown (-1).
557 // - All variadic arguments to the __kmpc_fork_call are passed to the
558 // callback callee.
559 F->addMetadata(
560 llvm::LLVMContext::MD_callback,
561 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
562 2, {-1, -1},
563 /* VarArgsArePassed */ true)}));
564 }
565 }
566
567 Builder.CreateCall(RTLFn, RealArgs);
568
569 LLVM_DEBUG(dbgs() << "With fork_call placed: "
570 << *Builder.GetInsertBlock()->getParent() << "\n");
571
572 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
573 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
574 UI->eraseFromParent();
575
576 // Initialize the local TID stack location with the argument value.
577 Builder.SetInsertPoint(PrivTID);
578 Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
579 Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
580
581 // If no "if" clause was present we do not need the call created during
582 // outlining, otherwise we reuse it in the serialized parallel region.
583 if (!ElseTI) {
584 CI->eraseFromParent();
585 } else {
586
587 // If an "if" clause was present we are now generating the serialized
588 // version into the "else" branch.
589 Builder.SetInsertPoint(ElseTI);
590
591 // Build calls __kmpc_serialized_parallel(&Ident, GTid);
592 Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
593 Builder.CreateCall(
594 getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
595 SerializedParallelCallArgs);
596
597 // OutlinedFn(>id, &zero, CapturedStruct);
598 CI->removeFromParent();
599 Builder.Insert(CI);
600
601 // __kmpc_end_serialized_parallel(&Ident, GTid);
602 Value *EndArgs[] = {Ident, ThreadID};
603 Builder.CreateCall(
604 getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
605 EndArgs);
606
607 LLVM_DEBUG(dbgs() << "With serialized parallel region: "
608 << *Builder.GetInsertBlock()->getParent() << "\n");
609 }
610
611 // Adjust the finalization stack, verify the adjustment, and call the
612 // finalize function a last time to finalize values between the pre-fini block
613 // and the exit block if we left the parallel "the normal way".
614 auto FiniInfo = FinalizationStack.pop_back_val();
615 (void)FiniInfo;
616 assert(FiniInfo.DK == OMPD_parallel &&
617 "Unexpected finalization stack state!");
618
619 Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
620 assert(PreFiniTI->getNumSuccessors() == 1 &&
621 PreFiniTI->getSuccessor(0)->size() == 1 &&
622 isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
623 "Unexpected CFG structure!");
624
625 InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
626 FiniCB(PreFiniIP);
627
628 for (Instruction *I : ToBeDeleted)
629 I->eraseFromParent();
630
631 return AfterIP;
632 }
633