1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient 10 // way to create LLVM instructions for OpenMP directives. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 16 17 #include "llvm/Analysis/MemorySSAUpdater.h" 18 #include "llvm/Frontend/OpenMP/OMPConstants.h" 19 #include "llvm/IR/DebugLoc.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/Support/Allocator.h" 22 #include "llvm/TargetParser/Triple.h" 23 #include <forward_list> 24 #include <map> 25 #include <optional> 26 27 namespace llvm { 28 class CanonicalLoopInfo; 29 struct TargetRegionEntryInfo; 30 class OffloadEntriesInfoManager; 31 class OpenMPIRBuilder; 32 33 /// Move the instruction after an InsertPoint to the beginning of another 34 /// BasicBlock. 35 /// 36 /// The instructions after \p IP are moved to the beginning of \p New which must 37 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to 38 /// \p New will be added such that there is no semantic change. Otherwise, the 39 /// \p IP insert block remains degenerate and it is up to the caller to insert a 40 /// terminator. 41 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, 42 bool CreateBranch); 43 44 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new 45 /// insert location will stick to after the instruction before the insertion 46 /// point (instead of moving with the instruction the InsertPoint stores 47 /// internally). 48 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); 49 50 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate 51 /// (missing the terminator). 52 /// 53 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed 54 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch 55 /// is true, a branch to the new successor will new created such that 56 /// semantically there is no change; otherwise the block of the insertion point 57 /// remains degenerate and it is the caller's responsibility to insert a 58 /// terminator. Returns the new successor block. 59 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, 60 llvm::Twine Name = {}); 61 62 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 63 /// degenerate (missing the terminator). Its new insert location will stick to 64 /// after the instruction before the insertion point (instead of moving with the 65 /// instruction the InsertPoint stores internally). 66 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, 67 llvm::Twine Name = {}); 68 69 /// Split a BasicBlock at \p Builder's insertion point, even if the block is 70 /// degenerate (missing the terminator). Its new insert location will stick to 71 /// after the instruction before the insertion point (instead of moving with the 72 /// instruction the InsertPoint stores internally). 73 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); 74 75 /// Like splitBB, but reuses the current block's name for the new name. 76 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, 77 llvm::Twine Suffix = ".split"); 78 79 /// Captures attributes that affect generating LLVM-IR using the 80 /// OpenMPIRBuilder and related classes. Note that not all attributes are 81 /// required for all classes or functions. In some use cases the configuration 82 /// is not necessary at all, because because the only functions that are called 83 /// are ones that are not dependent on the configuration. 84 class OpenMPIRBuilderConfig { 85 public: 86 /// Flag to define whether to generate code for the role of the OpenMP host 87 /// (if set to false) or device (if set to true) in an offloading context. It 88 /// is set when the -fopenmp-is-target-device compiler frontend option is 89 /// specified. 90 std::optional<bool> IsTargetDevice; 91 92 /// Flag for specifying if the compilation is done for an accelerator. It is 93 /// set according to the architecture of the target triple and currently only 94 /// true when targeting AMDGPU or NVPTX. Today, these targets can only perform 95 /// the role of an OpenMP target device, so `IsTargetDevice` must also be true 96 /// if `IsGPU` is true. This restriction might be lifted if an accelerator- 97 /// like target with the ability to work as the OpenMP host is added, or if 98 /// the capabilities of the currently supported GPU architectures are 99 /// expanded. 100 std::optional<bool> IsGPU; 101 102 // Flag for specifying if offloading is mandatory. 103 std::optional<bool> OpenMPOffloadMandatory; 104 105 /// First separator used between the initial two parts of a name. 106 std::optional<StringRef> FirstSeparator; 107 /// Separator used between all of the rest consecutive parts of s name 108 std::optional<StringRef> Separator; 109 110 OpenMPIRBuilderConfig(); 111 OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU, 112 bool OpenMPOffloadMandatory, 113 bool HasRequiresReverseOffload, 114 bool HasRequiresUnifiedAddress, 115 bool HasRequiresUnifiedSharedMemory, 116 bool HasRequiresDynamicAllocators); 117 118 // Getters functions that assert if the required values are not present. isTargetDevice()119 bool isTargetDevice() const { 120 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set"); 121 return *IsTargetDevice; 122 } 123 isGPU()124 bool isGPU() const { 125 assert(IsGPU.has_value() && "IsGPU is not set"); 126 return *IsGPU; 127 } 128 openMPOffloadMandatory()129 bool openMPOffloadMandatory() const { 130 assert(OpenMPOffloadMandatory.has_value() && 131 "OpenMPOffloadMandatory is not set"); 132 return *OpenMPOffloadMandatory; 133 } 134 hasRequiresFlags()135 bool hasRequiresFlags() const { return RequiresFlags; } 136 bool hasRequiresReverseOffload() const; 137 bool hasRequiresUnifiedAddress() const; 138 bool hasRequiresUnifiedSharedMemory() const; 139 bool hasRequiresDynamicAllocators() const; 140 141 /// Returns requires directive clauses as flags compatible with those expected 142 /// by libomptarget. 143 int64_t getRequiresFlags() const; 144 145 // Returns the FirstSeparator if set, otherwise use the default separator 146 // depending on isGPU firstSeparator()147 StringRef firstSeparator() const { 148 if (FirstSeparator.has_value()) 149 return *FirstSeparator; 150 if (isGPU()) 151 return "_"; 152 return "."; 153 } 154 155 // Returns the Separator if set, otherwise use the default separator depending 156 // on isGPU separator()157 StringRef separator() const { 158 if (Separator.has_value()) 159 return *Separator; 160 if (isGPU()) 161 return "$"; 162 return "."; 163 } 164 setIsTargetDevice(bool Value)165 void setIsTargetDevice(bool Value) { IsTargetDevice = Value; } setIsGPU(bool Value)166 void setIsGPU(bool Value) { IsGPU = Value; } setOpenMPOffloadMandatory(bool Value)167 void setOpenMPOffloadMandatory(bool Value) { OpenMPOffloadMandatory = Value; } setFirstSeparator(StringRef FS)168 void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } setSeparator(StringRef S)169 void setSeparator(StringRef S) { Separator = S; } 170 171 void setHasRequiresReverseOffload(bool Value); 172 void setHasRequiresUnifiedAddress(bool Value); 173 void setHasRequiresUnifiedSharedMemory(bool Value); 174 void setHasRequiresDynamicAllocators(bool Value); 175 176 private: 177 /// Flags for specifying which requires directive clauses are present. 178 int64_t RequiresFlags; 179 }; 180 181 /// Data structure to contain the information needed to uniquely identify 182 /// a target entry. 183 struct TargetRegionEntryInfo { 184 std::string ParentName; 185 unsigned DeviceID; 186 unsigned FileID; 187 unsigned Line; 188 unsigned Count; 189 TargetRegionEntryInfoTargetRegionEntryInfo190 TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {} 191 TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, 192 unsigned FileID, unsigned Line, unsigned Count = 0) ParentNameTargetRegionEntryInfo193 : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), 194 Count(Count) {} 195 196 static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, 197 StringRef ParentName, 198 unsigned DeviceID, unsigned FileID, 199 unsigned Line, unsigned Count); 200 201 bool operator<(const TargetRegionEntryInfo RHS) const { 202 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < 203 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, 204 RHS.Count); 205 } 206 }; 207 208 /// Class that manages information about offload code regions and data 209 class OffloadEntriesInfoManager { 210 /// Number of entries registered so far. 211 OpenMPIRBuilder *OMPBuilder; 212 unsigned OffloadingEntriesNum = 0; 213 214 public: 215 /// Base class of the entries info. 216 class OffloadEntryInfo { 217 public: 218 /// Kind of a given entry. 219 enum OffloadingEntryInfoKinds : unsigned { 220 /// Entry is a target region. 221 OffloadingEntryInfoTargetRegion = 0, 222 /// Entry is a declare target variable. 223 OffloadingEntryInfoDeviceGlobalVar = 1, 224 /// Invalid entry info. 225 OffloadingEntryInfoInvalid = ~0u 226 }; 227 228 protected: 229 OffloadEntryInfo() = delete; OffloadEntryInfo(OffloadingEntryInfoKinds Kind)230 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} OffloadEntryInfo(OffloadingEntryInfoKinds Kind,unsigned Order,uint32_t Flags)231 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, 232 uint32_t Flags) 233 : Flags(Flags), Order(Order), Kind(Kind) {} 234 ~OffloadEntryInfo() = default; 235 236 public: isValid()237 bool isValid() const { return Order != ~0u; } getOrder()238 unsigned getOrder() const { return Order; } getKind()239 OffloadingEntryInfoKinds getKind() const { return Kind; } getFlags()240 uint32_t getFlags() const { return Flags; } setFlags(uint32_t NewFlags)241 void setFlags(uint32_t NewFlags) { Flags = NewFlags; } getAddress()242 Constant *getAddress() const { return cast_or_null<Constant>(Addr); } setAddress(Constant * V)243 void setAddress(Constant *V) { 244 assert(!Addr.pointsToAliveValue() && "Address has been set before!"); 245 Addr = V; 246 } classof(const OffloadEntryInfo * Info)247 static bool classof(const OffloadEntryInfo *Info) { return true; } 248 249 private: 250 /// Address of the entity that has to be mapped for offloading. 251 WeakTrackingVH Addr; 252 253 /// Flags associated with the device global. 254 uint32_t Flags = 0u; 255 256 /// Order this entry was emitted. 257 unsigned Order = ~0u; 258 259 OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; 260 }; 261 262 /// Return true if a there are no entries defined. 263 bool empty() const; 264 /// Return number of entries defined so far. size()265 unsigned size() const { return OffloadingEntriesNum; } 266 OffloadEntriesInfoManager(OpenMPIRBuilder * builder)267 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {} 268 269 // 270 // Target region entries related. 271 // 272 273 /// Kind of the target registry entry. 274 enum OMPTargetRegionEntryKind : uint32_t { 275 /// Mark the entry as target region. 276 OMPTargetRegionEntryTargetRegion = 0x0, 277 }; 278 279 /// Target region entries info. 280 class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { 281 /// Address that can be used as the ID of the entry. 282 Constant *ID = nullptr; 283 284 public: OffloadEntryInfoTargetRegion()285 OffloadEntryInfoTargetRegion() 286 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} OffloadEntryInfoTargetRegion(unsigned Order,Constant * Addr,Constant * ID,OMPTargetRegionEntryKind Flags)287 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, 288 Constant *ID, 289 OMPTargetRegionEntryKind Flags) 290 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), 291 ID(ID) { 292 setAddress(Addr); 293 } 294 getID()295 Constant *getID() const { return ID; } setID(Constant * V)296 void setID(Constant *V) { 297 assert(!ID && "ID has been set before!"); 298 ID = V; 299 } classof(const OffloadEntryInfo * Info)300 static bool classof(const OffloadEntryInfo *Info) { 301 return Info->getKind() == OffloadingEntryInfoTargetRegion; 302 } 303 }; 304 305 /// Initialize target region entry. 306 /// This is ONLY needed for DEVICE compilation. 307 void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, 308 unsigned Order); 309 /// Register target region entry. 310 void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, 311 Constant *Addr, Constant *ID, 312 OMPTargetRegionEntryKind Flags); 313 /// Return true if a target region entry with the provided information 314 /// exists. 315 bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, 316 bool IgnoreAddressId = false) const; 317 318 // Return the Name based on \a EntryInfo using the next available Count. 319 void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, 320 const TargetRegionEntryInfo &EntryInfo); 321 322 /// brief Applies action \a Action on all registered entries. 323 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo, 324 const OffloadEntryInfoTargetRegion &)> 325 OffloadTargetRegionEntryInfoActTy; 326 void 327 actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); 328 329 // 330 // Device global variable entries related. 331 // 332 333 /// Kind of the global variable entry.. 334 enum OMPTargetGlobalVarEntryKind : uint32_t { 335 /// Mark the entry as a to declare target. 336 OMPTargetGlobalVarEntryTo = 0x0, 337 /// Mark the entry as a to declare target link. 338 OMPTargetGlobalVarEntryLink = 0x1, 339 /// Mark the entry as a declare target enter. 340 OMPTargetGlobalVarEntryEnter = 0x2, 341 /// Mark the entry as having no declare target entry kind. 342 OMPTargetGlobalVarEntryNone = 0x3, 343 /// Mark the entry as a declare target indirect global. 344 OMPTargetGlobalVarEntryIndirect = 0x8, 345 /// Mark the entry as a register requires global. 346 OMPTargetGlobalRegisterRequires = 0x10, 347 }; 348 349 /// Kind of device clause for declare target variables 350 /// and functions 351 /// NOTE: Currently not used as a part of a variable entry 352 /// used for Flang and Clang to interface with the variable 353 /// related registration functions 354 enum OMPTargetDeviceClauseKind : uint32_t { 355 /// The target is marked for all devices 356 OMPTargetDeviceClauseAny = 0x0, 357 /// The target is marked for non-host devices 358 OMPTargetDeviceClauseNoHost = 0x1, 359 /// The target is marked for host devices 360 OMPTargetDeviceClauseHost = 0x2, 361 /// The target is marked as having no clause 362 OMPTargetDeviceClauseNone = 0x3 363 }; 364 365 /// Device global variable entries info. 366 class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { 367 /// Type of the global variable. 368 int64_t VarSize; 369 GlobalValue::LinkageTypes Linkage; 370 const std::string VarName; 371 372 public: OffloadEntryInfoDeviceGlobalVar()373 OffloadEntryInfoDeviceGlobalVar() 374 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} OffloadEntryInfoDeviceGlobalVar(unsigned Order,OMPTargetGlobalVarEntryKind Flags)375 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, 376 OMPTargetGlobalVarEntryKind Flags) 377 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} OffloadEntryInfoDeviceGlobalVar(unsigned Order,Constant * Addr,int64_t VarSize,OMPTargetGlobalVarEntryKind Flags,GlobalValue::LinkageTypes Linkage,const std::string & VarName)378 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, 379 int64_t VarSize, 380 OMPTargetGlobalVarEntryKind Flags, 381 GlobalValue::LinkageTypes Linkage, 382 const std::string &VarName) 383 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), 384 VarSize(VarSize), Linkage(Linkage), VarName(VarName) { 385 setAddress(Addr); 386 } 387 getVarSize()388 int64_t getVarSize() const { return VarSize; } getVarName()389 StringRef getVarName() const { return VarName; } setVarSize(int64_t Size)390 void setVarSize(int64_t Size) { VarSize = Size; } getLinkage()391 GlobalValue::LinkageTypes getLinkage() const { return Linkage; } setLinkage(GlobalValue::LinkageTypes LT)392 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } classof(const OffloadEntryInfo * Info)393 static bool classof(const OffloadEntryInfo *Info) { 394 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; 395 } 396 }; 397 398 /// Initialize device global variable entry. 399 /// This is ONLY used for DEVICE compilation. 400 void initializeDeviceGlobalVarEntryInfo(StringRef Name, 401 OMPTargetGlobalVarEntryKind Flags, 402 unsigned Order); 403 404 /// Register device global variable entry. 405 void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, 406 int64_t VarSize, 407 OMPTargetGlobalVarEntryKind Flags, 408 GlobalValue::LinkageTypes Linkage); 409 /// Checks if the variable with the given name has been registered already. hasDeviceGlobalVarEntryInfo(StringRef VarName)410 bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { 411 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; 412 } 413 /// Applies action \a Action on all registered entries. 414 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> 415 OffloadDeviceGlobalVarEntryInfoActTy; 416 void actOnDeviceGlobalVarEntriesInfo( 417 const OffloadDeviceGlobalVarEntryInfoActTy &Action); 418 419 private: 420 /// Return the count of entries at a particular source location. 421 unsigned 422 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; 423 424 /// Update the count of entries at a particular source location. 425 void 426 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); 427 428 static TargetRegionEntryInfo getTargetRegionEntryCountKey(const TargetRegionEntryInfo & EntryInfo)429 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { 430 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, 431 EntryInfo.FileID, EntryInfo.Line, 0); 432 } 433 434 // Count of entries at a location. 435 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount; 436 437 // Storage for target region entries kind. 438 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion> 439 OffloadEntriesTargetRegionTy; 440 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; 441 /// Storage for device global variable entries kind. The storage is to be 442 /// indexed by mangled name. 443 typedef StringMap<OffloadEntryInfoDeviceGlobalVar> 444 OffloadEntriesDeviceGlobalVarTy; 445 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; 446 }; 447 448 /// An interface to create LLVM-IR for OpenMP directives. 449 /// 450 /// Each OpenMP directive has a corresponding public generator method. 451 class OpenMPIRBuilder { 452 public: 453 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will 454 /// not have an effect on \p M (see initialize) OpenMPIRBuilder(Module & M)455 OpenMPIRBuilder(Module &M) 456 : M(M), Builder(M.getContext()), OffloadInfoManager(this), 457 T(Triple(M.getTargetTriple())) {} 458 ~OpenMPIRBuilder(); 459 460 /// Initialize the internal state, this will put structures types and 461 /// potentially other helpers into the underlying module. Must be called 462 /// before any other method and only once! This internal state includes types 463 /// used in the OpenMPIRBuilder generated from OMPKinds.def. 464 void initialize(); 465 setConfig(OpenMPIRBuilderConfig C)466 void setConfig(OpenMPIRBuilderConfig C) { Config = C; } 467 468 /// Finalize the underlying module, e.g., by outlining regions. 469 /// \param Fn The function to be finalized. If not used, 470 /// all functions are finalized. 471 void finalize(Function *Fn = nullptr); 472 473 /// Add attributes known for \p FnID to \p Fn. 474 void addAttributes(omp::RuntimeFunction FnID, Function &Fn); 475 476 /// Type used throughout for insertion points. 477 using InsertPointTy = IRBuilder<>::InsertPoint; 478 479 /// Get the create a name using the platform specific separators. 480 /// \param Parts parts of the final name that needs separation 481 /// The created name has a first separator between the first and second part 482 /// and a second separator between all other parts. 483 /// E.g. with FirstSeparator "$" and Separator "." and 484 /// parts: "p1", "p2", "p3", "p4" 485 /// The resulting name is "p1$p2.p3.p4" 486 /// The separators are retrieved from the OpenMPIRBuilderConfig. 487 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const; 488 489 /// Callback type for variable finalization (think destructors). 490 /// 491 /// \param CodeGenIP is the insertion point at which the finalization code 492 /// should be placed. 493 /// 494 /// A finalize callback knows about all objects that need finalization, e.g. 495 /// destruction, when the scope of the currently generated construct is left 496 /// at the time, and location, the callback is invoked. 497 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>; 498 499 struct FinalizationInfo { 500 /// The finalization callback provided by the last in-flight invocation of 501 /// createXXXX for the directive of kind DK. 502 FinalizeCallbackTy FiniCB; 503 504 /// The directive kind of the innermost directive that has an associated 505 /// region which might require finalization when it is left. 506 omp::Directive DK; 507 508 /// Flag to indicate if the directive is cancellable. 509 bool IsCancellable; 510 }; 511 512 /// Push a finalization callback on the finalization stack. 513 /// 514 /// NOTE: Temporary solution until Clang CG is gone. pushFinalizationCB(const FinalizationInfo & FI)515 void pushFinalizationCB(const FinalizationInfo &FI) { 516 FinalizationStack.push_back(FI); 517 } 518 519 /// Pop the last finalization callback from the finalization stack. 520 /// 521 /// NOTE: Temporary solution until Clang CG is gone. popFinalizationCB()522 void popFinalizationCB() { FinalizationStack.pop_back(); } 523 524 /// Callback type for body (=inner region) code generation 525 /// 526 /// The callback takes code locations as arguments, each describing a 527 /// location where additional instructions can be inserted. 528 /// 529 /// The CodeGenIP may be in the middle of a basic block or point to the end of 530 /// it. The basic block may have a terminator or be degenerate. The callback 531 /// function may just insert instructions at that position, but also split the 532 /// block (without the Before argument of BasicBlock::splitBasicBlock such 533 /// that the identify of the split predecessor block is preserved) and insert 534 /// additional control flow, including branches that do not lead back to what 535 /// follows the CodeGenIP. Note that since the callback is allowed to split 536 /// the block, callers must assume that InsertPoints to positions in the 537 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If 538 /// such InsertPoints need to be preserved, it can split the block itself 539 /// before calling the callback. 540 /// 541 /// AllocaIP and CodeGenIP must not point to the same position. 542 /// 543 /// \param AllocaIP is the insertion point at which new alloca instructions 544 /// should be placed. The BasicBlock it is pointing to must 545 /// not be split. 546 /// \param CodeGenIP is the insertion point at which the body code should be 547 /// placed. 548 using BodyGenCallbackTy = 549 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 550 551 // This is created primarily for sections construct as llvm::function_ref 552 // (BodyGenCallbackTy) is not storable (as described in the comments of 553 // function_ref class - function_ref contains non-ownable reference 554 // to the callable. 555 using StorableBodyGenCallbackTy = 556 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 557 558 /// Callback type for loop body code generation. 559 /// 560 /// \param CodeGenIP is the insertion point where the loop's body code must be 561 /// placed. This will be a dedicated BasicBlock with a 562 /// conditional branch from the loop condition check and 563 /// terminated with an unconditional branch to the loop 564 /// latch. 565 /// \param IndVar is the induction variable usable at the insertion point. 566 using LoopBodyGenCallbackTy = 567 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; 568 569 /// Callback type for variable privatization (think copy & default 570 /// constructor). 571 /// 572 /// \param AllocaIP is the insertion point at which new alloca instructions 573 /// should be placed. 574 /// \param CodeGenIP is the insertion point at which the privatization code 575 /// should be placed. 576 /// \param Original The value being copied/created, should not be used in the 577 /// generated IR. 578 /// \param Inner The equivalent of \p Original that should be used in the 579 /// generated IR; this is equal to \p Original if the value is 580 /// a pointer and can thus be passed directly, otherwise it is 581 /// an equivalent but different value. 582 /// \param ReplVal The replacement value, thus a copy or new created version 583 /// of \p Inner. 584 /// 585 /// \returns The new insertion point where code generation continues and 586 /// \p ReplVal the replacement value. 587 using PrivatizeCallbackTy = function_ref<InsertPointTy( 588 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, 589 Value &Inner, Value *&ReplVal)>; 590 591 /// Description of a LLVM-IR insertion point (IP) and a debug/source location 592 /// (filename, line, column, ...). 593 struct LocationDescription { LocationDescriptionLocationDescription594 LocationDescription(const IRBuilderBase &IRB) 595 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} LocationDescriptionLocationDescription596 LocationDescription(const InsertPointTy &IP) : IP(IP) {} LocationDescriptionLocationDescription597 LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) 598 : IP(IP), DL(DL) {} 599 InsertPointTy IP; 600 DebugLoc DL; 601 }; 602 603 /// Emitter methods for OpenMP directives. 604 /// 605 ///{ 606 607 /// Generator for '#omp barrier' 608 /// 609 /// \param Loc The location where the barrier directive was encountered. 610 /// \param DK The kind of directive that caused the barrier. 611 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 612 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 613 /// should be checked and acted upon. 614 /// 615 /// \returns The insertion point after the barrier. 616 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, 617 bool ForceSimpleCall = false, 618 bool CheckCancelFlag = true); 619 620 /// Generator for '#omp cancel' 621 /// 622 /// \param Loc The location where the directive was encountered. 623 /// \param IfCondition The evaluated 'if' clause expression, if any. 624 /// \param CanceledDirective The kind of directive that is cancled. 625 /// 626 /// \returns The insertion point after the barrier. 627 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, 628 omp::Directive CanceledDirective); 629 630 /// Generator for '#omp parallel' 631 /// 632 /// \param Loc The insert and source location description. 633 /// \param AllocaIP The insertion points to be used for alloca instructions. 634 /// \param BodyGenCB Callback that will generate the region code. 635 /// \param PrivCB Callback to copy a given variable (think copy constructor). 636 /// \param FiniCB Callback to finalize variable copies. 637 /// \param IfCondition The evaluated 'if' clause expression, if any. 638 /// \param NumThreads The evaluated 'num_threads' clause expression, if any. 639 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). 640 /// \param IsCancellable Flag to indicate a cancellable parallel region. 641 /// 642 /// \returns The insertion position *after* the parallel. 643 IRBuilder<>::InsertPoint 644 createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, 645 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 646 FinalizeCallbackTy FiniCB, Value *IfCondition, 647 Value *NumThreads, omp::ProcBindKind ProcBind, 648 bool IsCancellable); 649 650 /// Generator for the control flow structure of an OpenMP canonical loop. 651 /// 652 /// This generator operates on the logical iteration space of the loop, i.e. 653 /// the caller only has to provide a loop trip count of the loop as defined by 654 /// base language semantics. The trip count is interpreted as an unsigned 655 /// integer. The induction variable passed to \p BodyGenCB will be of the same 656 /// type and run from 0 to \p TripCount - 1. It is up to the callback to 657 /// convert the logical iteration variable to the loop counter variable in the 658 /// loop body. 659 /// 660 /// \param Loc The insert and source location description. The insert 661 /// location can be between two instructions or the end of a 662 /// degenerate block (e.g. a BB under construction). 663 /// \param BodyGenCB Callback that will generate the loop body code. 664 /// \param TripCount Number of iterations the loop body is executed. 665 /// \param Name Base name used to derive BB and instruction names. 666 /// 667 /// \returns An object representing the created control flow structure which 668 /// can be used for loop-associated directives. 669 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 670 LoopBodyGenCallbackTy BodyGenCB, 671 Value *TripCount, 672 const Twine &Name = "loop"); 673 674 /// Generator for the control flow structure of an OpenMP canonical loop. 675 /// 676 /// Instead of a logical iteration space, this allows specifying user-defined 677 /// loop counter values using increment, upper- and lower bounds. To 678 /// disambiguate the terminology when counting downwards, instead of lower 679 /// bounds we use \p Start for the loop counter value in the first body 680 /// iteration. 681 /// 682 /// Consider the following limitations: 683 /// 684 /// * A loop counter space over all integer values of its bit-width cannot be 685 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be 686 /// stored into an 8 bit integer): 687 /// 688 /// DO I = 0, 255, 1 689 /// 690 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. 691 /// effectively counting downwards: 692 /// 693 /// for (uint8_t i = 100u; i > 0; i += 127u) 694 /// 695 /// 696 /// TODO: May need to add additional parameters to represent: 697 /// 698 /// * Allow representing downcounting with unsigned integers. 699 /// 700 /// * Sign of the step and the comparison operator might disagree: 701 /// 702 /// for (int i = 0; i < 42; i -= 1u) 703 /// 704 // 705 /// \param Loc The insert and source location description. 706 /// \param BodyGenCB Callback that will generate the loop body code. 707 /// \param Start Value of the loop counter for the first iterations. 708 /// \param Stop Loop counter values past this will stop the loop. 709 /// \param Step Loop counter increment after each iteration; negative 710 /// means counting down. 711 /// \param IsSigned Whether Start, Stop and Step are signed integers. 712 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop 713 /// counter. 714 /// \param ComputeIP Insertion point for instructions computing the trip 715 /// count. Can be used to ensure the trip count is available 716 /// at the outermost loop of a loop nest. If not set, 717 /// defaults to the preheader of the generated loop. 718 /// \param Name Base name used to derive BB and instruction names. 719 /// 720 /// \returns An object representing the created control flow structure which 721 /// can be used for loop-associated directives. 722 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, 723 LoopBodyGenCallbackTy BodyGenCB, 724 Value *Start, Value *Stop, Value *Step, 725 bool IsSigned, bool InclusiveStop, 726 InsertPointTy ComputeIP = {}, 727 const Twine &Name = "loop"); 728 729 /// Collapse a loop nest into a single loop. 730 /// 731 /// Merges loops of a loop nest into a single CanonicalLoopNest representation 732 /// that has the same number of innermost loop iterations as the origin loop 733 /// nest. The induction variables of the input loops are derived from the 734 /// collapsed loop's induction variable. This is intended to be used to 735 /// implement OpenMP's collapse clause. Before applying a directive, 736 /// collapseLoops normalizes a loop nest to contain only a single loop and the 737 /// directive's implementation does not need to handle multiple loops itself. 738 /// This does not remove the need to handle all loop nest handling by 739 /// directives, such as the ordered(<n>) clause or the simd schedule-clause 740 /// modifier of the worksharing-loop directive. 741 /// 742 /// Example: 743 /// \code 744 /// for (int i = 0; i < 7; ++i) // Canonical loop "i" 745 /// for (int j = 0; j < 9; ++j) // Canonical loop "j" 746 /// body(i, j); 747 /// \endcode 748 /// 749 /// After collapsing with Loops={i,j}, the loop is changed to 750 /// \code 751 /// for (int ij = 0; ij < 63; ++ij) { 752 /// int i = ij / 9; 753 /// int j = ij % 9; 754 /// body(i, j); 755 /// } 756 /// \endcode 757 /// 758 /// In the current implementation, the following limitations apply: 759 /// 760 /// * All input loops have an induction variable of the same type. 761 /// 762 /// * The collapsed loop will have the same trip count integer type as the 763 /// input loops. Therefore it is possible that the collapsed loop cannot 764 /// represent all iterations of the input loops. For instance, assuming a 765 /// 32 bit integer type, and two input loops both iterating 2^16 times, the 766 /// theoretical trip count of the collapsed loop would be 2^32 iteration, 767 /// which cannot be represented in an 32-bit integer. Behavior is undefined 768 /// in this case. 769 /// 770 /// * The trip counts of every input loop must be available at \p ComputeIP. 771 /// Non-rectangular loops are not yet supported. 772 /// 773 /// * At each nest level, code between a surrounding loop and its nested loop 774 /// is hoisted into the loop body, and such code will be executed more 775 /// often than before collapsing (or not at all if any inner loop iteration 776 /// has a trip count of 0). This is permitted by the OpenMP specification. 777 /// 778 /// \param DL Debug location for instructions added for collapsing, 779 /// such as instructions to compute/derive the input loop's 780 /// induction variables. 781 /// \param Loops Loops in the loop nest to collapse. Loops are specified 782 /// from outermost-to-innermost and every control flow of a 783 /// loop's body must pass through its directly nested loop. 784 /// \param ComputeIP Where additional instruction that compute the collapsed 785 /// trip count. If not set, defaults to before the generated 786 /// loop. 787 /// 788 /// \returns The CanonicalLoopInfo object representing the collapsed loop. 789 CanonicalLoopInfo *collapseLoops(DebugLoc DL, 790 ArrayRef<CanonicalLoopInfo *> Loops, 791 InsertPointTy ComputeIP); 792 793 /// Get the default alignment value for given target 794 /// 795 /// \param TargetTriple Target triple 796 /// \param Features StringMap which describes extra CPU features 797 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, 798 const StringMap<bool> &Features); 799 800 /// Retrieve (or create if non-existent) the address of a declare 801 /// target variable, used in conjunction with registerTargetGlobalVariable 802 /// to create declare target global variables. 803 /// 804 /// \param CaptureClause - enumerator corresponding to the OpenMP capture 805 /// clause used in conjunction with the variable being registered (link, 806 /// to, enter). 807 /// \param DeviceClause - enumerator corresponding to the OpenMP capture 808 /// clause used in conjunction with the variable being registered (nohost, 809 /// host, any) 810 /// \param IsDeclaration - boolean stating if the variable being registered 811 /// is a declaration-only and not a definition 812 /// \param IsExternallyVisible - boolean stating if the variable is externally 813 /// visible 814 /// \param EntryInfo - Unique entry information for the value generated 815 /// using getTargetEntryUniqueInfo, used to name generated pointer references 816 /// to the declare target variable 817 /// \param MangledName - the mangled name of the variable being registered 818 /// \param GeneratedRefs - references generated by invocations of 819 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar, 820 /// these are required by Clang for book keeping. 821 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled 822 /// \param TargetTriple - The OpenMP device target triple we are compiling 823 /// for 824 /// \param LlvmPtrTy - The type of the variable we are generating or 825 /// retrieving an address for 826 /// \param GlobalInitializer - a lambda function which creates a constant 827 /// used for initializing a pointer reference to the variable in certain 828 /// cases. If a nullptr is passed, it will default to utilising the original 829 /// variable to initialize the pointer reference. 830 /// \param VariableLinkage - a lambda function which returns the variables 831 /// linkage type, if unspecified and a nullptr is given, it will instead 832 /// utilise the linkage stored on the existing global variable in the 833 /// LLVMModule. 834 Constant *getAddrOfDeclareTargetVar( 835 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, 836 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, 837 bool IsDeclaration, bool IsExternallyVisible, 838 TargetRegionEntryInfo EntryInfo, StringRef MangledName, 839 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD, 840 std::vector<Triple> TargetTriple, Type *LlvmPtrTy, 841 std::function<Constant *()> GlobalInitializer, 842 std::function<GlobalValue::LinkageTypes()> VariableLinkage); 843 844 /// Registers a target variable for device or host. 845 /// 846 /// \param CaptureClause - enumerator corresponding to the OpenMP capture 847 /// clause used in conjunction with the variable being registered (link, 848 /// to, enter). 849 /// \param DeviceClause - enumerator corresponding to the OpenMP capture 850 /// clause used in conjunction with the variable being registered (nohost, 851 /// host, any) 852 /// \param IsDeclaration - boolean stating if the variable being registered 853 /// is a declaration-only and not a definition 854 /// \param IsExternallyVisible - boolean stating if the variable is externally 855 /// visible 856 /// \param EntryInfo - Unique entry information for the value generated 857 /// using getTargetEntryUniqueInfo, used to name generated pointer references 858 /// to the declare target variable 859 /// \param MangledName - the mangled name of the variable being registered 860 /// \param GeneratedRefs - references generated by invocations of 861 /// registerTargetGlobalVariable these are required by Clang for book 862 /// keeping. 863 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled 864 /// \param TargetTriple - The OpenMP device target triple we are compiling 865 /// for 866 /// \param GlobalInitializer - a lambda function which creates a constant 867 /// used for initializing a pointer reference to the variable in certain 868 /// cases. If a nullptr is passed, it will default to utilising the original 869 /// variable to initialize the pointer reference. 870 /// \param VariableLinkage - a lambda function which returns the variables 871 /// linkage type, if unspecified and a nullptr is given, it will instead 872 /// utilise the linkage stored on the existing global variable in the 873 /// LLVMModule. 874 /// \param LlvmPtrTy - The type of the variable we are generating or 875 /// retrieving an address for 876 /// \param Addr - the original llvm value (addr) of the variable to be 877 /// registered 878 void registerTargetGlobalVariable( 879 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, 880 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, 881 bool IsDeclaration, bool IsExternallyVisible, 882 TargetRegionEntryInfo EntryInfo, StringRef MangledName, 883 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD, 884 std::vector<Triple> TargetTriple, 885 std::function<Constant *()> GlobalInitializer, 886 std::function<GlobalValue::LinkageTypes()> VariableLinkage, 887 Type *LlvmPtrTy, Constant *Addr); 888 889 /// Get the offset of the OMP_MAP_MEMBER_OF field. 890 unsigned getFlagMemberOffset(); 891 892 /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on 893 /// the position given. 894 /// \param Position - A value indicating the position of the parent 895 /// of the member in the kernel argument structure, often retrieved 896 /// by the parents position in the combined information vectors used 897 /// to generate the structure itself. Multiple children (member's of) 898 /// with the same parent will use the same returned member flag. 899 omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position); 900 901 /// Given an initial flag set, this function modifies it to contain 902 /// the passed in MemberOfFlag generated from the getMemberOfFlag 903 /// function. The results are dependent on the existing flag bits 904 /// set in the original flag set. 905 /// \param Flags - The original set of flags to be modified with the 906 /// passed in MemberOfFlag. 907 /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted 908 /// slightly based on the getMemberOfFlag which adjusts the flag bits 909 /// based on the members position in its parent. 910 void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, 911 omp::OpenMPOffloadMappingFlags MemberOfFlag); 912 913 private: 914 /// Modifies the canonical loop to be a statically-scheduled workshare loop 915 /// which is executed on the device 916 /// 917 /// This takes a \p CLI representing a canonical loop, such as the one 918 /// created by \see createCanonicalLoop and emits additional instructions to 919 /// turn it into a workshare loop. In particular, it calls to an OpenMP 920 /// runtime function in the preheader to call OpenMP device rtl function 921 /// which handles worksharing of loop body interations. 922 /// 923 /// \param DL Debug location for instructions added for the 924 /// workshare-loop construct itself. 925 /// \param CLI A descriptor of the canonical loop to workshare. 926 /// \param AllocaIP An insertion point for Alloca instructions usable in the 927 /// preheader of the loop. 928 /// \param LoopType Information about type of loop worksharing. 929 /// It corresponds to type of loop workshare OpenMP pragma. 930 /// 931 /// \returns Point where to insert code after the workshare construct. 932 InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI, 933 InsertPointTy AllocaIP, 934 omp::WorksharingLoopType LoopType); 935 936 /// Modifies the canonical loop to be a statically-scheduled workshare loop. 937 /// 938 /// This takes a \p LoopInfo representing a canonical loop, such as the one 939 /// created by \p createCanonicalLoop and emits additional instructions to 940 /// turn it into a workshare loop. In particular, it calls to an OpenMP 941 /// runtime function in the preheader to obtain the loop bounds to be used in 942 /// the current thread, updates the relevant instructions in the canonical 943 /// loop and calls to an OpenMP runtime finalization function after the loop. 944 /// 945 /// \param DL Debug location for instructions added for the 946 /// workshare-loop construct itself. 947 /// \param CLI A descriptor of the canonical loop to workshare. 948 /// \param AllocaIP An insertion point for Alloca instructions usable in the 949 /// preheader of the loop. 950 /// \param NeedsBarrier Indicates whether a barrier must be inserted after 951 /// the loop. 952 /// 953 /// \returns Point where to insert code after the workshare construct. 954 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 955 InsertPointTy AllocaIP, 956 bool NeedsBarrier); 957 958 /// Modifies the canonical loop a statically-scheduled workshare loop with a 959 /// user-specified chunk size. 960 /// 961 /// \param DL Debug location for instructions added for the 962 /// workshare-loop construct itself. 963 /// \param CLI A descriptor of the canonical loop to workshare. 964 /// \param AllocaIP An insertion point for Alloca instructions usable in 965 /// the preheader of the loop. 966 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the 967 /// loop. 968 /// \param ChunkSize The user-specified chunk size. 969 /// 970 /// \returns Point where to insert code after the workshare construct. 971 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, 972 CanonicalLoopInfo *CLI, 973 InsertPointTy AllocaIP, 974 bool NeedsBarrier, 975 Value *ChunkSize); 976 977 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. 978 /// 979 /// This takes a \p LoopInfo representing a canonical loop, such as the one 980 /// created by \p createCanonicalLoop and emits additional instructions to 981 /// turn it into a workshare loop. In particular, it calls to an OpenMP 982 /// runtime function in the preheader to obtain, and then in each iteration 983 /// to update the loop counter. 984 /// 985 /// \param DL Debug location for instructions added for the 986 /// workshare-loop construct itself. 987 /// \param CLI A descriptor of the canonical loop to workshare. 988 /// \param AllocaIP An insertion point for Alloca instructions usable in the 989 /// preheader of the loop. 990 /// \param SchedType Type of scheduling to be passed to the init function. 991 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 992 /// the loop. 993 /// \param Chunk The size of loop chunk considered as a unit when 994 /// scheduling. If \p nullptr, defaults to 1. 995 /// 996 /// \returns Point where to insert code after the workshare construct. 997 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, 998 InsertPointTy AllocaIP, 999 omp::OMPScheduleType SchedType, 1000 bool NeedsBarrier, 1001 Value *Chunk = nullptr); 1002 1003 /// Create alternative version of the loop to support if clause 1004 /// 1005 /// OpenMP if clause can require to generate second loop. This loop 1006 /// will be executed when if clause condition is not met. createIfVersion 1007 /// adds branch instruction to the copied loop if \p ifCond is not met. 1008 /// 1009 /// \param Loop Original loop which should be versioned. 1010 /// \param IfCond Value which corresponds to if clause condition 1011 /// \param VMap Value to value map to define relation between 1012 /// original and copied loop values and loop blocks. 1013 /// \param NamePrefix Optional name prefix for if.then if.else blocks. 1014 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond, 1015 ValueToValueMapTy &VMap, const Twine &NamePrefix = ""); 1016 1017 public: 1018 /// Modifies the canonical loop to be a workshare loop. 1019 /// 1020 /// This takes a \p LoopInfo representing a canonical loop, such as the one 1021 /// created by \p createCanonicalLoop and emits additional instructions to 1022 /// turn it into a workshare loop. In particular, it calls to an OpenMP 1023 /// runtime function in the preheader to obtain the loop bounds to be used in 1024 /// the current thread, updates the relevant instructions in the canonical 1025 /// loop and calls to an OpenMP runtime finalization function after the loop. 1026 /// 1027 /// The concrete transformation is done by applyStaticWorkshareLoop, 1028 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending 1029 /// on the value of \p SchedKind and \p ChunkSize. 1030 /// 1031 /// \param DL Debug location for instructions added for the 1032 /// workshare-loop construct itself. 1033 /// \param CLI A descriptor of the canonical loop to workshare. 1034 /// \param AllocaIP An insertion point for Alloca instructions usable in the 1035 /// preheader of the loop. 1036 /// \param NeedsBarrier Indicates whether a barrier must be insterted after 1037 /// the loop. 1038 /// \param SchedKind Scheduling algorithm to use. 1039 /// \param ChunkSize The chunk size for the inner loop. 1040 /// \param HasSimdModifier Whether the simd modifier is present in the 1041 /// schedule clause. 1042 /// \param HasMonotonicModifier Whether the monotonic modifier is present in 1043 /// the schedule clause. 1044 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is 1045 /// present in the schedule clause. 1046 /// \param HasOrderedClause Whether the (parameterless) ordered clause is 1047 /// present. 1048 /// \param LoopType Information about type of loop worksharing. 1049 /// It corresponds to type of loop workshare OpenMP pragma. 1050 /// 1051 /// \returns Point where to insert code after the workshare construct. 1052 InsertPointTy applyWorkshareLoop( 1053 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, 1054 bool NeedsBarrier, 1055 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, 1056 Value *ChunkSize = nullptr, bool HasSimdModifier = false, 1057 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, 1058 bool HasOrderedClause = false, 1059 omp::WorksharingLoopType LoopType = 1060 omp::WorksharingLoopType::ForStaticLoop); 1061 1062 /// Tile a loop nest. 1063 /// 1064 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in 1065 /// \p/ Loops must be perfectly nested, from outermost to innermost loop 1066 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value 1067 /// of every loop and every tile sizes must be usable in the outermost 1068 /// loop's preheader. This implies that the loop nest is rectangular. 1069 /// 1070 /// Example: 1071 /// \code 1072 /// for (int i = 0; i < 15; ++i) // Canonical loop "i" 1073 /// for (int j = 0; j < 14; ++j) // Canonical loop "j" 1074 /// body(i, j); 1075 /// \endcode 1076 /// 1077 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to 1078 /// \code 1079 /// for (int i1 = 0; i1 < 3; ++i1) 1080 /// for (int j1 = 0; j1 < 2; ++j1) 1081 /// for (int i2 = 0; i2 < 5; ++i2) 1082 /// for (int j2 = 0; j2 < 7; ++j2) 1083 /// body(i1*3+i2, j1*3+j2); 1084 /// \endcode 1085 /// 1086 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are 1087 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also 1088 /// handles non-constant trip counts, non-constant tile sizes and trip counts 1089 /// that are not multiples of the tile size. In the latter case the tile loop 1090 /// of the last floor-loop iteration will have fewer iterations than specified 1091 /// as its tile size. 1092 /// 1093 /// 1094 /// @param DL Debug location for instructions added by tiling, for 1095 /// instance the floor- and tile trip count computation. 1096 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are 1097 /// invalidated by this method, i.e. should not used after 1098 /// tiling. 1099 /// @param TileSizes For each loop in \p Loops, the tile size for that 1100 /// dimensions. 1101 /// 1102 /// \returns A list of generated loops. Contains twice as many loops as the 1103 /// input loop nest; the first half are the floor loops and the 1104 /// second half are the tile loops. 1105 std::vector<CanonicalLoopInfo *> 1106 tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1107 ArrayRef<Value *> TileSizes); 1108 1109 /// Fully unroll a loop. 1110 /// 1111 /// Instead of unrolling the loop immediately (and duplicating its body 1112 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop 1113 /// metadata. 1114 /// 1115 /// \param DL Debug location for instructions added by unrolling. 1116 /// \param Loop The loop to unroll. The loop will be invalidated. 1117 void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); 1118 1119 /// Fully or partially unroll a loop. How the loop is unrolled is determined 1120 /// using LLVM's LoopUnrollPass. 1121 /// 1122 /// \param DL Debug location for instructions added by unrolling. 1123 /// \param Loop The loop to unroll. The loop will be invalidated. 1124 void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); 1125 1126 /// Partially unroll a loop. 1127 /// 1128 /// The CanonicalLoopInfo of the unrolled loop for use with chained 1129 /// loop-associated directive can be requested using \p UnrolledCLI. Not 1130 /// needing the CanonicalLoopInfo allows more efficient code generation by 1131 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. 1132 /// A loop-associated directive applied to the unrolled loop needs to know the 1133 /// new trip count which means that if using a heuristically determined unroll 1134 /// factor (\p Factor == 0), that factor must be computed immediately. We are 1135 /// using the same logic as the LoopUnrollPass to derived the unroll factor, 1136 /// but which assumes that some canonicalization has taken place (e.g. 1137 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform 1138 /// better when the unrolled loop's CanonicalLoopInfo is not needed. 1139 /// 1140 /// \param DL Debug location for instructions added by unrolling. 1141 /// \param Loop The loop to unroll. The loop will be invalidated. 1142 /// \param Factor The factor to unroll the loop by. A factor of 0 1143 /// indicates that a heuristic should be used to determine 1144 /// the unroll-factor. 1145 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the 1146 /// partially unrolled loop. Otherwise, uses loop metadata 1147 /// to defer unrolling to the LoopUnrollPass. 1148 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, 1149 CanonicalLoopInfo **UnrolledCLI); 1150 1151 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop 1152 /// is cloned. The metadata which prevents vectorization is added to 1153 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated 1154 /// to false. 1155 /// 1156 /// \param Loop The loop to simd-ize. 1157 /// \param AlignedVars The map which containts pairs of the pointer 1158 /// and its corresponding alignment. 1159 /// \param IfCond The value which corresponds to the if clause 1160 /// condition. 1161 /// \param Order The enum to map order clause. 1162 /// \param Simdlen The Simdlen length to apply to the simd loop. 1163 /// \param Safelen The Safelen length to apply to the simd loop. 1164 void applySimd(CanonicalLoopInfo *Loop, 1165 MapVector<Value *, Value *> AlignedVars, Value *IfCond, 1166 omp::OrderKind Order, ConstantInt *Simdlen, 1167 ConstantInt *Safelen); 1168 1169 /// Generator for '#omp flush' 1170 /// 1171 /// \param Loc The location where the flush directive was encountered 1172 void createFlush(const LocationDescription &Loc); 1173 1174 /// Generator for '#omp taskwait' 1175 /// 1176 /// \param Loc The location where the taskwait directive was encountered. 1177 void createTaskwait(const LocationDescription &Loc); 1178 1179 /// Generator for '#omp taskyield' 1180 /// 1181 /// \param Loc The location where the taskyield directive was encountered. 1182 void createTaskyield(const LocationDescription &Loc); 1183 1184 /// A struct to pack the relevant information for an OpenMP depend clause. 1185 struct DependData { 1186 omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown; 1187 Type *DepValueType; 1188 Value *DepVal; 1189 explicit DependData() = default; DependDataDependData1190 DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, 1191 Value *DepVal) 1192 : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {} 1193 }; 1194 1195 /// Generator for `#omp task` 1196 /// 1197 /// \param Loc The location where the task construct was encountered. 1198 /// \param AllocaIP The insertion point to be used for alloca instructions. 1199 /// \param BodyGenCB Callback that will generate the region code. 1200 /// \param Tied True if the task is tied, false if the task is untied. 1201 /// \param Final i1 value which is `true` if the task is final, `false` if the 1202 /// task is not final. 1203 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred 1204 /// task is generated, and the encountering thread must 1205 /// suspend the current task region, for which execution 1206 /// cannot be resumed until execution of the structured 1207 /// block that is associated with the generated task is 1208 /// completed. 1209 InsertPointTy createTask(const LocationDescription &Loc, 1210 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, 1211 bool Tied = true, Value *Final = nullptr, 1212 Value *IfCondition = nullptr, 1213 SmallVector<DependData> Dependencies = {}); 1214 1215 /// Generator for the taskgroup construct 1216 /// 1217 /// \param Loc The location where the taskgroup construct was encountered. 1218 /// \param AllocaIP The insertion point to be used for alloca instructions. 1219 /// \param BodyGenCB Callback that will generate the region code. 1220 InsertPointTy createTaskgroup(const LocationDescription &Loc, 1221 InsertPointTy AllocaIP, 1222 BodyGenCallbackTy BodyGenCB); 1223 1224 using FileIdentifierInfoCallbackTy = 1225 std::function<std::tuple<std::string, uint64_t>()>; 1226 1227 /// Creates a unique info for a target entry when provided a filename and 1228 /// line number from. 1229 /// 1230 /// \param CallBack A callback function which should return filename the entry 1231 /// resides in as well as the line number for the target entry 1232 /// \param ParentName The name of the parent the target entry resides in, if 1233 /// any. 1234 static TargetRegionEntryInfo 1235 getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, 1236 StringRef ParentName = ""); 1237 1238 /// Functions used to generate reductions. Such functions take two Values 1239 /// representing LHS and RHS of the reduction, respectively, and a reference 1240 /// to the value that is updated to refer to the reduction result. 1241 using ReductionGenTy = 1242 function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; 1243 1244 /// Functions used to generate atomic reductions. Such functions take two 1245 /// Values representing pointers to LHS and RHS of the reduction, as well as 1246 /// the element type of these pointers. They are expected to atomically 1247 /// update the LHS to the reduced value. 1248 using AtomicReductionGenTy = 1249 function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>; 1250 1251 /// Information about an OpenMP reduction. 1252 struct ReductionInfo { ReductionInfoReductionInfo1253 ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, 1254 ReductionGenTy ReductionGen, 1255 AtomicReductionGenTy AtomicReductionGen) 1256 : ElementType(ElementType), Variable(Variable), 1257 PrivateVariable(PrivateVariable), ReductionGen(ReductionGen), 1258 AtomicReductionGen(AtomicReductionGen) {} 1259 1260 /// Reduction element type, must match pointee type of variable. 1261 Type *ElementType; 1262 1263 /// Reduction variable of pointer type. 1264 Value *Variable; 1265 1266 /// Thread-private partial reduction variable. 1267 Value *PrivateVariable; 1268 1269 /// Callback for generating the reduction body. The IR produced by this will 1270 /// be used to combine two values in a thread-safe context, e.g., under 1271 /// lock or within the same thread, and therefore need not be atomic. 1272 ReductionGenTy ReductionGen; 1273 1274 /// Callback for generating the atomic reduction body, may be null. The IR 1275 /// produced by this will be used to atomically combine two values during 1276 /// reduction. If null, the implementation will use the non-atomic version 1277 /// along with the appropriate synchronization mechanisms. 1278 AtomicReductionGenTy AtomicReductionGen; 1279 }; 1280 1281 // TODO: provide atomic and non-atomic reduction generators for reduction 1282 // operators defined by the OpenMP specification. 1283 1284 /// Generator for '#omp reduction'. 1285 /// 1286 /// Emits the IR instructing the runtime to perform the specific kind of 1287 /// reductions. Expects reduction variables to have been privatized and 1288 /// initialized to reduction-neutral values separately. Emits the calls to 1289 /// runtime functions as well as the reduction function and the basic blocks 1290 /// performing the reduction atomically and non-atomically. 1291 /// 1292 /// The code emitted for the following: 1293 /// 1294 /// \code 1295 /// type var_1; 1296 /// type var_2; 1297 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) 1298 /// /* body */; 1299 /// \endcode 1300 /// 1301 /// corresponds to the following sketch. 1302 /// 1303 /// \code 1304 /// void _outlined_par() { 1305 /// // N is the number of different reductions. 1306 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; 1307 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, 1308 /// _omp_reduction_func, 1309 /// _gomp_critical_user.reduction.var)) { 1310 /// case 1: { 1311 /// var_1 = var_1 <reduction-op> privatized_var_1; 1312 /// var_2 = var_2 <reduction-op> privatized_var_2; 1313 /// // ... 1314 /// __kmpc_end_reduce(...); 1315 /// break; 1316 /// } 1317 /// case 2: { 1318 /// _Atomic<ReductionOp>(var_1, privatized_var_1); 1319 /// _Atomic<ReductionOp>(var_2, privatized_var_2); 1320 /// // ... 1321 /// break; 1322 /// } 1323 /// default: break; 1324 /// } 1325 /// } 1326 /// 1327 /// void _omp_reduction_func(void **lhs, void **rhs) { 1328 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; 1329 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; 1330 /// // ... 1331 /// } 1332 /// \endcode 1333 /// 1334 /// \param Loc The location where the reduction was 1335 /// encountered. Must be within the associate 1336 /// directive and after the last local access to the 1337 /// reduction variables. 1338 /// \param AllocaIP An insertion point suitable for allocas usable 1339 /// in reductions. 1340 /// \param ReductionInfos A list of info on each reduction variable. 1341 /// \param IsNoWait A flag set if the reduction is marked as nowait. 1342 /// \param IsByRef A flag set if the reduction is using reference 1343 /// or direct value. 1344 InsertPointTy createReductions(const LocationDescription &Loc, 1345 InsertPointTy AllocaIP, 1346 ArrayRef<ReductionInfo> ReductionInfos, 1347 bool IsNoWait = false, bool IsByRef = false); 1348 1349 ///} 1350 1351 /// Return the insertion point used by the underlying IRBuilder. getInsertionPoint()1352 InsertPointTy getInsertionPoint() { return Builder.saveIP(); } 1353 1354 /// Update the internal location to \p Loc. updateToLocation(const LocationDescription & Loc)1355 bool updateToLocation(const LocationDescription &Loc) { 1356 Builder.restoreIP(Loc.IP); 1357 Builder.SetCurrentDebugLocation(Loc.DL); 1358 return Loc.IP.getBlock() != nullptr; 1359 } 1360 1361 /// Return the function declaration for the runtime function with \p FnID. 1362 FunctionCallee getOrCreateRuntimeFunction(Module &M, 1363 omp::RuntimeFunction FnID); 1364 1365 Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); 1366 1367 /// Return the (LLVM-IR) string describing the source location \p LocStr. 1368 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); 1369 1370 /// Return the (LLVM-IR) string describing the default source location. 1371 Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); 1372 1373 /// Return the (LLVM-IR) string describing the source location identified by 1374 /// the arguments. 1375 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, 1376 unsigned Line, unsigned Column, 1377 uint32_t &SrcLocStrSize); 1378 1379 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as 1380 /// fallback if \p DL does not specify the function name. 1381 Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, 1382 Function *F = nullptr); 1383 1384 /// Return the (LLVM-IR) string describing the source location \p Loc. 1385 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, 1386 uint32_t &SrcLocStrSize); 1387 1388 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. 1389 /// TODO: Create a enum class for the Reserve2Flags 1390 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, 1391 omp::IdentFlag Flags = omp::IdentFlag(0), 1392 unsigned Reserve2Flags = 0); 1393 1394 /// Create a hidden global flag \p Name in the module with initial value \p 1395 /// Value. 1396 GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); 1397 1398 /// Generate control flow and cleanup for cancellation. 1399 /// 1400 /// \param CancelFlag Flag indicating if the cancellation is performed. 1401 /// \param CanceledDirective The kind of directive that is cancled. 1402 /// \param ExitCB Extra code to be generated in the exit block. 1403 void emitCancelationCheckImpl(Value *CancelFlag, 1404 omp::Directive CanceledDirective, 1405 FinalizeCallbackTy ExitCB = {}); 1406 1407 /// Generate a target region entry call. 1408 /// 1409 /// \param Loc The location at which the request originated and is fulfilled. 1410 /// \param AllocaIP The insertion point to be used for alloca instructions. 1411 /// \param Return Return value of the created function returned by reference. 1412 /// \param DeviceID Identifier for the device via the 'device' clause. 1413 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause 1414 /// or 0 if unspecified and -1 if there is no 'teams' clause. 1415 /// \param NumThreads Number of threads via the 'thread_limit' clause. 1416 /// \param HostPtr Pointer to the host-side pointer of the target kernel. 1417 /// \param KernelArgs Array of arguments to the kernel. 1418 InsertPointTy emitTargetKernel(const LocationDescription &Loc, 1419 InsertPointTy AllocaIP, Value *&Return, 1420 Value *Ident, Value *DeviceID, Value *NumTeams, 1421 Value *NumThreads, Value *HostPtr, 1422 ArrayRef<Value *> KernelArgs); 1423 1424 /// Generate a barrier runtime call. 1425 /// 1426 /// \param Loc The location at which the request originated and is fulfilled. 1427 /// \param DK The directive which caused the barrier 1428 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. 1429 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value 1430 /// should be checked and acted upon. 1431 /// 1432 /// \returns The insertion point after the barrier. 1433 InsertPointTy emitBarrierImpl(const LocationDescription &Loc, 1434 omp::Directive DK, bool ForceSimpleCall, 1435 bool CheckCancelFlag); 1436 1437 /// Generate a flush runtime call. 1438 /// 1439 /// \param Loc The location at which the request originated and is fulfilled. 1440 void emitFlush(const LocationDescription &Loc); 1441 1442 /// The finalization stack made up of finalize callbacks currently in-flight, 1443 /// wrapped into FinalizationInfo objects that reference also the finalization 1444 /// target block and the kind of cancellable directive. 1445 SmallVector<FinalizationInfo, 8> FinalizationStack; 1446 1447 /// Return true if the last entry in the finalization stack is of kind \p DK 1448 /// and cancellable. isLastFinalizationInfoCancellable(omp::Directive DK)1449 bool isLastFinalizationInfoCancellable(omp::Directive DK) { 1450 return !FinalizationStack.empty() && 1451 FinalizationStack.back().IsCancellable && 1452 FinalizationStack.back().DK == DK; 1453 } 1454 1455 /// Generate a taskwait runtime call. 1456 /// 1457 /// \param Loc The location at which the request originated and is fulfilled. 1458 void emitTaskwaitImpl(const LocationDescription &Loc); 1459 1460 /// Generate a taskyield runtime call. 1461 /// 1462 /// \param Loc The location at which the request originated and is fulfilled. 1463 void emitTaskyieldImpl(const LocationDescription &Loc); 1464 1465 /// Return the current thread ID. 1466 /// 1467 /// \param Ident The ident (ident_t*) describing the query origin. 1468 Value *getOrCreateThreadID(Value *Ident); 1469 1470 /// The OpenMPIRBuilder Configuration 1471 OpenMPIRBuilderConfig Config; 1472 1473 /// The underlying LLVM-IR module 1474 Module &M; 1475 1476 /// The LLVM-IR Builder used to create IR. 1477 IRBuilder<> Builder; 1478 1479 /// Map to remember source location strings 1480 StringMap<Constant *> SrcLocStrMap; 1481 1482 /// Map to remember existing ident_t*. 1483 DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap; 1484 1485 /// Info manager to keep track of target regions. 1486 OffloadEntriesInfoManager OffloadInfoManager; 1487 1488 /// The target triple of the underlying module. 1489 const Triple T; 1490 1491 /// Helper that contains information about regions we need to outline 1492 /// during finalization. 1493 struct OutlineInfo { 1494 using PostOutlineCBTy = std::function<void(Function &)>; 1495 PostOutlineCBTy PostOutlineCB; 1496 BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; 1497 SmallVector<Value *, 2> ExcludeArgsFromAggregate; 1498 1499 /// Collect all blocks in between EntryBB and ExitBB in both the given 1500 /// vector and set. 1501 void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, 1502 SmallVectorImpl<BasicBlock *> &BlockVector); 1503 1504 /// Return the function that contains the region to be outlined. getFunctionOutlineInfo1505 Function *getFunction() const { return EntryBB->getParent(); } 1506 }; 1507 1508 /// Collection of regions that need to be outlined during finalization. 1509 SmallVector<OutlineInfo, 16> OutlineInfos; 1510 1511 /// A collection of candidate target functions that's constant allocas will 1512 /// attempt to be raised on a call of finalize after all currently enqueued 1513 /// outline info's have been processed. 1514 SmallVector<llvm::Function *, 16> ConstantAllocaRaiseCandidates; 1515 1516 /// Collection of owned canonical loop objects that eventually need to be 1517 /// free'd. 1518 std::forward_list<CanonicalLoopInfo> LoopInfos; 1519 1520 /// Add a new region that will be outlined later. addOutlineInfo(OutlineInfo && OI)1521 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } 1522 1523 /// An ordered map of auto-generated variables to their unique names. 1524 /// It stores variables with the following names: 1) ".gomp_critical_user_" + 1525 /// <critical_section_name> + ".var" for "omp critical" directives; 2) 1526 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate 1527 /// variables. 1528 StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars; 1529 1530 /// Computes the size of type in bytes. 1531 Value *getSizeInBytes(Value *BasePtr); 1532 1533 // Emit a branch from the current block to the Target block only if 1534 // the current block has a terminator. 1535 void emitBranch(BasicBlock *Target); 1536 1537 // If BB has no use then delete it and return. Else place BB after the current 1538 // block, if possible, or else at the end of the function. Also add a branch 1539 // from current block to BB if current block does not have a terminator. 1540 void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false); 1541 1542 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy 1543 /// Here is the logic: 1544 /// if (Cond) { 1545 /// ThenGen(); 1546 /// } else { 1547 /// ElseGen(); 1548 /// } 1549 void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, 1550 BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {}); 1551 1552 /// Create the global variable holding the offload mappings information. 1553 GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 1554 std::string VarName); 1555 1556 /// Create the global variable holding the offload names information. 1557 GlobalVariable * 1558 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 1559 std::string VarName); 1560 1561 struct MapperAllocas { 1562 AllocaInst *ArgsBase = nullptr; 1563 AllocaInst *Args = nullptr; 1564 AllocaInst *ArgSizes = nullptr; 1565 }; 1566 1567 /// Create the allocas instruction used in call to mapper functions. 1568 void createMapperAllocas(const LocationDescription &Loc, 1569 InsertPointTy AllocaIP, unsigned NumOperands, 1570 struct MapperAllocas &MapperAllocas); 1571 1572 /// Create the call for the target mapper function. 1573 /// \param Loc The source location description. 1574 /// \param MapperFunc Function to be called. 1575 /// \param SrcLocInfo Source location information global. 1576 /// \param MaptypesArg The argument types. 1577 /// \param MapnamesArg The argument names. 1578 /// \param MapperAllocas The AllocaInst used for the call. 1579 /// \param DeviceID Device ID for the call. 1580 /// \param NumOperands Number of operands in the call. 1581 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, 1582 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, 1583 struct MapperAllocas &MapperAllocas, int64_t DeviceID, 1584 unsigned NumOperands); 1585 1586 /// Container for the arguments used to pass data to the runtime library. 1587 struct TargetDataRTArgs { 1588 /// The array of base pointer passed to the runtime library. 1589 Value *BasePointersArray = nullptr; 1590 /// The array of section pointers passed to the runtime library. 1591 Value *PointersArray = nullptr; 1592 /// The array of sizes passed to the runtime library. 1593 Value *SizesArray = nullptr; 1594 /// The array of map types passed to the runtime library for the beginning 1595 /// of the region or for the entire region if there are no separate map 1596 /// types for the region end. 1597 Value *MapTypesArray = nullptr; 1598 /// The array of map types passed to the runtime library for the end of the 1599 /// region, or nullptr if there are no separate map types for the region 1600 /// end. 1601 Value *MapTypesArrayEnd = nullptr; 1602 /// The array of user-defined mappers passed to the runtime library. 1603 Value *MappersArray = nullptr; 1604 /// The array of original declaration names of mapped pointers sent to the 1605 /// runtime library for debugging 1606 Value *MapNamesArray = nullptr; 1607 TargetDataRTArgsTargetDataRTArgs1608 explicit TargetDataRTArgs() {} TargetDataRTArgsTargetDataRTArgs1609 explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, 1610 Value *SizesArray, Value *MapTypesArray, 1611 Value *MapTypesArrayEnd, Value *MappersArray, 1612 Value *MapNamesArray) 1613 : BasePointersArray(BasePointersArray), PointersArray(PointersArray), 1614 SizesArray(SizesArray), MapTypesArray(MapTypesArray), 1615 MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray), 1616 MapNamesArray(MapNamesArray) {} 1617 }; 1618 1619 /// Data structure that contains the needed information to construct the 1620 /// kernel args vector. 1621 struct TargetKernelArgs { 1622 /// Number of arguments passed to the runtime library. 1623 unsigned NumTargetItems; 1624 /// Arguments passed to the runtime library 1625 TargetDataRTArgs RTArgs; 1626 /// The number of iterations 1627 Value *NumIterations; 1628 /// The number of teams. 1629 Value *NumTeams; 1630 /// The number of threads. 1631 Value *NumThreads; 1632 /// The size of the dynamic shared memory. 1633 Value *DynCGGroupMem; 1634 /// True if the kernel has 'no wait' clause. 1635 bool HasNoWait; 1636 1637 /// Constructor for TargetKernelArgs TargetKernelArgsTargetKernelArgs1638 TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, 1639 Value *NumIterations, Value *NumTeams, Value *NumThreads, 1640 Value *DynCGGroupMem, bool HasNoWait) 1641 : NumTargetItems(NumTargetItems), RTArgs(RTArgs), 1642 NumIterations(NumIterations), NumTeams(NumTeams), 1643 NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem), 1644 HasNoWait(HasNoWait) {} 1645 }; 1646 1647 /// Create the kernel args vector used by emitTargetKernel. This function 1648 /// creates various constant values that are used in the resulting args 1649 /// vector. 1650 static void getKernelArgsVector(TargetKernelArgs &KernelArgs, 1651 IRBuilderBase &Builder, 1652 SmallVector<Value *> &ArgsVector); 1653 1654 /// Struct that keeps the information that should be kept throughout 1655 /// a 'target data' region. 1656 class TargetDataInfo { 1657 /// Set to true if device pointer information have to be obtained. 1658 bool RequiresDevicePointerInfo = false; 1659 /// Set to true if Clang emits separate runtime calls for the beginning and 1660 /// end of the region. These calls might have separate map type arrays. 1661 bool SeparateBeginEndCalls = false; 1662 1663 public: 1664 TargetDataRTArgs RTArgs; 1665 1666 SmallMapVector<const Value *, std::pair<Value *, Value *>, 4> 1667 DevicePtrInfoMap; 1668 1669 /// Indicate whether any user-defined mapper exists. 1670 bool HasMapper = false; 1671 /// The total number of pointers passed to the runtime library. 1672 unsigned NumberOfPtrs = 0u; 1673 TargetDataInfo()1674 explicit TargetDataInfo() {} TargetDataInfo(bool RequiresDevicePointerInfo,bool SeparateBeginEndCalls)1675 explicit TargetDataInfo(bool RequiresDevicePointerInfo, 1676 bool SeparateBeginEndCalls) 1677 : RequiresDevicePointerInfo(RequiresDevicePointerInfo), 1678 SeparateBeginEndCalls(SeparateBeginEndCalls) {} 1679 /// Clear information about the data arrays. clearArrayInfo()1680 void clearArrayInfo() { 1681 RTArgs = TargetDataRTArgs(); 1682 HasMapper = false; 1683 NumberOfPtrs = 0u; 1684 } 1685 /// Return true if the current target data information has valid arrays. isValid()1686 bool isValid() { 1687 return RTArgs.BasePointersArray && RTArgs.PointersArray && 1688 RTArgs.SizesArray && RTArgs.MapTypesArray && 1689 (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs; 1690 } requiresDevicePointerInfo()1691 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } separateBeginEndCalls()1692 bool separateBeginEndCalls() { return SeparateBeginEndCalls; } 1693 }; 1694 1695 enum class DeviceInfoTy { None, Pointer, Address }; 1696 using MapValuesArrayTy = SmallVector<Value *, 4>; 1697 using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>; 1698 using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>; 1699 using MapNamesArrayTy = SmallVector<Constant *, 4>; 1700 using MapDimArrayTy = SmallVector<uint64_t, 4>; 1701 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 1702 1703 /// This structure contains combined information generated for mappable 1704 /// clauses, including base pointers, pointers, sizes, map types, user-defined 1705 /// mappers, and non-contiguous information. 1706 struct MapInfosTy { 1707 struct StructNonContiguousInfo { 1708 bool IsNonContiguous = false; 1709 MapDimArrayTy Dims; 1710 MapNonContiguousArrayTy Offsets; 1711 MapNonContiguousArrayTy Counts; 1712 MapNonContiguousArrayTy Strides; 1713 }; 1714 MapValuesArrayTy BasePointers; 1715 MapValuesArrayTy Pointers; 1716 MapDeviceInfoArrayTy DevicePointers; 1717 MapValuesArrayTy Sizes; 1718 MapFlagsArrayTy Types; 1719 MapNamesArrayTy Names; 1720 StructNonContiguousInfo NonContigInfo; 1721 1722 /// Append arrays in \a CurInfo. appendMapInfosTy1723 void append(MapInfosTy &CurInfo) { 1724 BasePointers.append(CurInfo.BasePointers.begin(), 1725 CurInfo.BasePointers.end()); 1726 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 1727 DevicePointers.append(CurInfo.DevicePointers.begin(), 1728 CurInfo.DevicePointers.end()); 1729 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 1730 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 1731 Names.append(CurInfo.Names.begin(), CurInfo.Names.end()); 1732 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 1733 CurInfo.NonContigInfo.Dims.end()); 1734 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 1735 CurInfo.NonContigInfo.Offsets.end()); 1736 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 1737 CurInfo.NonContigInfo.Counts.end()); 1738 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 1739 CurInfo.NonContigInfo.Strides.end()); 1740 } 1741 }; 1742 1743 /// Callback function type for functions emitting the host fallback code that 1744 /// is executed when the kernel launch fails. It takes an insertion point as 1745 /// parameter where the code should be emitted. It returns an insertion point 1746 /// that points right after after the emitted code. 1747 using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>; 1748 1749 /// Generate a target region entry call and host fallback call. 1750 /// 1751 /// \param Loc The location at which the request originated and is fulfilled. 1752 /// \param OutlinedFn The outlined kernel function. 1753 /// \param OutlinedFnID The ooulined function ID. 1754 /// \param EmitTargetCallFallbackCB Call back function to generate host 1755 /// fallback code. 1756 /// \param Args Data structure holding information about the kernel arguments. 1757 /// \param DeviceID Identifier for the device via the 'device' clause. 1758 /// \param RTLoc Source location identifier 1759 /// \param AllocaIP The insertion point to be used for alloca instructions. 1760 InsertPointTy emitKernelLaunch( 1761 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, 1762 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, 1763 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP); 1764 1765 /// Emit the arguments to be passed to the runtime library based on the 1766 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 1767 /// ForEndCall, emit map types to be passed for the end of the region instead 1768 /// of the beginning. 1769 void emitOffloadingArraysArgument(IRBuilderBase &Builder, 1770 OpenMPIRBuilder::TargetDataRTArgs &RTArgs, 1771 OpenMPIRBuilder::TargetDataInfo &Info, 1772 bool EmitDebug = false, 1773 bool ForEndCall = false); 1774 1775 /// Emit an array of struct descriptors to be assigned to the offload args. 1776 void emitNonContiguousDescriptor(InsertPointTy AllocaIP, 1777 InsertPointTy CodeGenIP, 1778 MapInfosTy &CombinedInfo, 1779 TargetDataInfo &Info); 1780 1781 /// Emit the arrays used to pass the captures and map information to the 1782 /// offloading runtime library. If there is no map or capture information, 1783 /// return nullptr by reference. 1784 void emitOffloadingArrays( 1785 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, 1786 TargetDataInfo &Info, bool IsNonContiguous = false, 1787 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, 1788 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); 1789 1790 /// Creates offloading entry for the provided entry ID \a ID, address \a 1791 /// Addr, size \a Size, and flags \a Flags. 1792 void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, 1793 int32_t Flags, GlobalValue::LinkageTypes, 1794 StringRef Name = ""); 1795 1796 /// The kind of errors that can occur when emitting the offload entries and 1797 /// metadata. 1798 enum EmitMetadataErrorKind { 1799 EMIT_MD_TARGET_REGION_ERROR, 1800 EMIT_MD_DECLARE_TARGET_ERROR, 1801 EMIT_MD_GLOBAL_VAR_LINK_ERROR 1802 }; 1803 1804 /// Callback function type 1805 using EmitMetadataErrorReportFunctionTy = 1806 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>; 1807 1808 // Emit the offloading entries and metadata so that the device codegen side 1809 // can easily figure out what to emit. The produced metadata looks like 1810 // this: 1811 // 1812 // !omp_offload.info = !{!1, ...} 1813 // 1814 // We only generate metadata for function that contain target regions. 1815 void createOffloadEntriesAndInfoMetadata( 1816 EmitMetadataErrorReportFunctionTy &ErrorReportFunction); 1817 1818 public: 1819 /// Generator for __kmpc_copyprivate 1820 /// 1821 /// \param Loc The source location description. 1822 /// \param BufSize Number of elements in the buffer. 1823 /// \param CpyBuf List of pointers to data to be copied. 1824 /// \param CpyFn function to call for copying data. 1825 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. 1826 /// 1827 /// \return The insertion position *after* the CopyPrivate call. 1828 1829 InsertPointTy createCopyPrivate(const LocationDescription &Loc, 1830 llvm::Value *BufSize, llvm::Value *CpyBuf, 1831 llvm::Value *CpyFn, llvm::Value *DidIt); 1832 1833 /// Generator for '#omp single' 1834 /// 1835 /// \param Loc The source location description. 1836 /// \param BodyGenCB Callback that will generate the region code. 1837 /// \param FiniCB Callback to finalize variable copies. 1838 /// \param IsNowait If false, a barrier is emitted. 1839 /// \param CPVars copyprivate variables. 1840 /// \param CPFuncs copy functions to use for each copyprivate variable. 1841 /// 1842 /// \returns The insertion position *after* the single call. 1843 InsertPointTy createSingle(const LocationDescription &Loc, 1844 BodyGenCallbackTy BodyGenCB, 1845 FinalizeCallbackTy FiniCB, bool IsNowait, 1846 ArrayRef<llvm::Value *> CPVars = {}, 1847 ArrayRef<llvm::Function *> CPFuncs = {}); 1848 1849 /// Generator for '#omp master' 1850 /// 1851 /// \param Loc The insert and source location description. 1852 /// \param BodyGenCB Callback that will generate the region code. 1853 /// \param FiniCB Callback to finalize variable copies. 1854 /// 1855 /// \returns The insertion position *after* the master. 1856 InsertPointTy createMaster(const LocationDescription &Loc, 1857 BodyGenCallbackTy BodyGenCB, 1858 FinalizeCallbackTy FiniCB); 1859 1860 /// Generator for '#omp masked' 1861 /// 1862 /// \param Loc The insert and source location description. 1863 /// \param BodyGenCB Callback that will generate the region code. 1864 /// \param FiniCB Callback to finialize variable copies. 1865 /// 1866 /// \returns The insertion position *after* the masked. 1867 InsertPointTy createMasked(const LocationDescription &Loc, 1868 BodyGenCallbackTy BodyGenCB, 1869 FinalizeCallbackTy FiniCB, Value *Filter); 1870 1871 /// Generator for '#omp critical' 1872 /// 1873 /// \param Loc The insert and source location description. 1874 /// \param BodyGenCB Callback that will generate the region body code. 1875 /// \param FiniCB Callback to finalize variable copies. 1876 /// \param CriticalName name of the lock used by the critical directive 1877 /// \param HintInst Hint Instruction for hint clause associated with critical 1878 /// 1879 /// \returns The insertion position *after* the critical. 1880 InsertPointTy createCritical(const LocationDescription &Loc, 1881 BodyGenCallbackTy BodyGenCB, 1882 FinalizeCallbackTy FiniCB, 1883 StringRef CriticalName, Value *HintInst); 1884 1885 /// Generator for '#omp ordered depend (source | sink)' 1886 /// 1887 /// \param Loc The insert and source location description. 1888 /// \param AllocaIP The insertion point to be used for alloca instructions. 1889 /// \param NumLoops The number of loops in depend clause. 1890 /// \param StoreValues The value will be stored in vector address. 1891 /// \param Name The name of alloca instruction. 1892 /// \param IsDependSource If true, depend source; otherwise, depend sink. 1893 /// 1894 /// \return The insertion position *after* the ordered. 1895 InsertPointTy createOrderedDepend(const LocationDescription &Loc, 1896 InsertPointTy AllocaIP, unsigned NumLoops, 1897 ArrayRef<llvm::Value *> StoreValues, 1898 const Twine &Name, bool IsDependSource); 1899 1900 /// Generator for '#omp ordered [threads | simd]' 1901 /// 1902 /// \param Loc The insert and source location description. 1903 /// \param BodyGenCB Callback that will generate the region code. 1904 /// \param FiniCB Callback to finalize variable copies. 1905 /// \param IsThreads If true, with threads clause or without clause; 1906 /// otherwise, with simd clause; 1907 /// 1908 /// \returns The insertion position *after* the ordered. 1909 InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, 1910 BodyGenCallbackTy BodyGenCB, 1911 FinalizeCallbackTy FiniCB, 1912 bool IsThreads); 1913 1914 /// Generator for '#omp sections' 1915 /// 1916 /// \param Loc The insert and source location description. 1917 /// \param AllocaIP The insertion points to be used for alloca instructions. 1918 /// \param SectionCBs Callbacks that will generate body of each section. 1919 /// \param PrivCB Callback to copy a given variable (think copy constructor). 1920 /// \param FiniCB Callback to finalize variable copies. 1921 /// \param IsCancellable Flag to indicate a cancellable parallel region. 1922 /// \param IsNowait If true, barrier - to ensure all sections are executed 1923 /// before moving forward will not be generated. 1924 /// \returns The insertion position *after* the sections. 1925 InsertPointTy createSections(const LocationDescription &Loc, 1926 InsertPointTy AllocaIP, 1927 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, 1928 PrivatizeCallbackTy PrivCB, 1929 FinalizeCallbackTy FiniCB, bool IsCancellable, 1930 bool IsNowait); 1931 1932 /// Generator for '#omp section' 1933 /// 1934 /// \param Loc The insert and source location description. 1935 /// \param BodyGenCB Callback that will generate the region body code. 1936 /// \param FiniCB Callback to finalize variable copies. 1937 /// \returns The insertion position *after* the section. 1938 InsertPointTy createSection(const LocationDescription &Loc, 1939 BodyGenCallbackTy BodyGenCB, 1940 FinalizeCallbackTy FiniCB); 1941 1942 /// Generator for `#omp teams` 1943 /// 1944 /// \param Loc The location where the teams construct was encountered. 1945 /// \param BodyGenCB Callback that will generate the region code. 1946 /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr, 1947 /// it is as if lower bound is specified as equal to upperbound. If 1948 /// this is non-null, then upperbound must also be non-null. 1949 /// \param NumTeamsUpper Upper bound on the number of teams. 1950 /// \param ThreadLimit on the number of threads that may participate in a 1951 /// contention group created by each team. 1952 /// \param IfExpr is the integer argument value of the if condition on the 1953 /// teams clause. 1954 InsertPointTy 1955 createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 1956 Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr, 1957 Value *ThreadLimit = nullptr, Value *IfExpr = nullptr); 1958 1959 /// Generate conditional branch and relevant BasicBlocks through which private 1960 /// threads copy the 'copyin' variables from Master copy to threadprivate 1961 /// copies. 1962 /// 1963 /// \param IP insertion block for copyin conditional 1964 /// \param MasterVarPtr a pointer to the master variable 1965 /// \param PrivateVarPtr a pointer to the threadprivate variable 1966 /// \param IntPtrTy Pointer size type 1967 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks 1968 // and copy.in.end block 1969 /// 1970 /// \returns The insertion point where copying operation to be emitted. 1971 InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, 1972 Value *PrivateAddr, 1973 llvm::IntegerType *IntPtrTy, 1974 bool BranchtoEnd = true); 1975 1976 /// Create a runtime call for kmpc_Alloc 1977 /// 1978 /// \param Loc The insert and source location description. 1979 /// \param Size Size of allocated memory space 1980 /// \param Allocator Allocator information instruction 1981 /// \param Name Name of call Instruction for OMP_alloc 1982 /// 1983 /// \returns CallInst to the OMP_Alloc call 1984 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, 1985 Value *Allocator, std::string Name = ""); 1986 1987 /// Create a runtime call for kmpc_free 1988 /// 1989 /// \param Loc The insert and source location description. 1990 /// \param Addr Address of memory space to be freed 1991 /// \param Allocator Allocator information instruction 1992 /// \param Name Name of call Instruction for OMP_Free 1993 /// 1994 /// \returns CallInst to the OMP_Free call 1995 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, 1996 Value *Allocator, std::string Name = ""); 1997 1998 /// Create a runtime call for kmpc_threadprivate_cached 1999 /// 2000 /// \param Loc The insert and source location description. 2001 /// \param Pointer pointer to data to be cached 2002 /// \param Size size of data to be cached 2003 /// \param Name Name of call Instruction for callinst 2004 /// 2005 /// \returns CallInst to the thread private cache call. 2006 CallInst *createCachedThreadPrivate(const LocationDescription &Loc, 2007 llvm::Value *Pointer, 2008 llvm::ConstantInt *Size, 2009 const llvm::Twine &Name = Twine("")); 2010 2011 /// Create a runtime call for __tgt_interop_init 2012 /// 2013 /// \param Loc The insert and source location description. 2014 /// \param InteropVar variable to be allocated 2015 /// \param InteropType type of interop operation 2016 /// \param Device devide to which offloading will occur 2017 /// \param NumDependences number of dependence variables 2018 /// \param DependenceAddress pointer to dependence variables 2019 /// \param HaveNowaitClause does nowait clause exist 2020 /// 2021 /// \returns CallInst to the __tgt_interop_init call 2022 CallInst *createOMPInteropInit(const LocationDescription &Loc, 2023 Value *InteropVar, 2024 omp::OMPInteropType InteropType, Value *Device, 2025 Value *NumDependences, 2026 Value *DependenceAddress, 2027 bool HaveNowaitClause); 2028 2029 /// Create a runtime call for __tgt_interop_destroy 2030 /// 2031 /// \param Loc The insert and source location description. 2032 /// \param InteropVar variable to be allocated 2033 /// \param Device devide to which offloading will occur 2034 /// \param NumDependences number of dependence variables 2035 /// \param DependenceAddress pointer to dependence variables 2036 /// \param HaveNowaitClause does nowait clause exist 2037 /// 2038 /// \returns CallInst to the __tgt_interop_destroy call 2039 CallInst *createOMPInteropDestroy(const LocationDescription &Loc, 2040 Value *InteropVar, Value *Device, 2041 Value *NumDependences, 2042 Value *DependenceAddress, 2043 bool HaveNowaitClause); 2044 2045 /// Create a runtime call for __tgt_interop_use 2046 /// 2047 /// \param Loc The insert and source location description. 2048 /// \param InteropVar variable to be allocated 2049 /// \param Device devide to which offloading will occur 2050 /// \param NumDependences number of dependence variables 2051 /// \param DependenceAddress pointer to dependence variables 2052 /// \param HaveNowaitClause does nowait clause exist 2053 /// 2054 /// \returns CallInst to the __tgt_interop_use call 2055 CallInst *createOMPInteropUse(const LocationDescription &Loc, 2056 Value *InteropVar, Value *Device, 2057 Value *NumDependences, Value *DependenceAddress, 2058 bool HaveNowaitClause); 2059 2060 /// The `omp target` interface 2061 /// 2062 /// For more information about the usage of this interface, 2063 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h 2064 /// 2065 ///{ 2066 2067 /// Create a runtime call for kmpc_target_init 2068 /// 2069 /// \param Loc The insert and source location description. 2070 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. 2071 /// \param MinThreads Minimal number of threads, or 0. 2072 /// \param MaxThreads Maximal number of threads, or 0. 2073 /// \param MinTeams Minimal number of teams, or 0. 2074 /// \param MaxTeams Maximal number of teams, or 0. 2075 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, 2076 int32_t MinThreadsVal = 0, 2077 int32_t MaxThreadsVal = 0, 2078 int32_t MinTeamsVal = 0, 2079 int32_t MaxTeamsVal = 0); 2080 2081 /// Create a runtime call for kmpc_target_deinit 2082 /// 2083 /// \param Loc The insert and source location description. 2084 /// \param TeamsReductionDataSize The maximal size of all the reduction data 2085 /// for teams reduction. 2086 /// \param TeamsReductionBufferLength The number of elements (each of up to 2087 /// \p TeamsReductionDataSize size), in the teams reduction buffer. 2088 void createTargetDeinit(const LocationDescription &Loc, 2089 int32_t TeamsReductionDataSize = 0, 2090 int32_t TeamsReductionBufferLength = 1024); 2091 2092 ///} 2093 2094 /// Helpers to read/write kernel annotations from the IR. 2095 /// 2096 ///{ 2097 2098 /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none 2099 /// is set. 2100 static std::pair<int32_t, int32_t> 2101 readThreadBoundsForKernel(const Triple &T, Function &Kernel); 2102 static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, 2103 int32_t LB, int32_t UB); 2104 2105 /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none 2106 /// is set. 2107 static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T, 2108 Function &Kernel); 2109 static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, 2110 int32_t UB); 2111 ///} 2112 2113 private: 2114 // Sets the function attributes expected for the outlined function 2115 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn); 2116 2117 // Creates the function ID/Address for the given outlined function. 2118 // In the case of an embedded device function the address of the function is 2119 // used, in the case of a non-offload function a constant is created. 2120 Constant *createOutlinedFunctionID(Function *OutlinedFn, 2121 StringRef EntryFnIDName); 2122 2123 // Creates the region entry address for the outlined function 2124 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction, 2125 StringRef EntryFnName); 2126 2127 public: 2128 /// Functions used to generate a function with the given name. 2129 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>; 2130 2131 /// Create a unique name for the entry function using the source location 2132 /// information of the current target region. The name will be something like: 2133 /// 2134 /// __omp_offloading_DD_FFFF_PP_lBB[_CC] 2135 /// 2136 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 2137 /// mangled name of the function that encloses the target region and BB is the 2138 /// line number of the target region. CC is a count added when more than one 2139 /// region is located at the same location. 2140 /// 2141 /// If this target outline function is not an offload entry, we don't need to 2142 /// register it. This may happen if it is guarded by an if clause that is 2143 /// false at compile time, or no target archs have been specified. 2144 /// 2145 /// The created target region ID is used by the runtime library to identify 2146 /// the current target region, so it only has to be unique and not 2147 /// necessarily point to anything. It could be the pointer to the outlined 2148 /// function that implements the target region, but we aren't using that so 2149 /// that the compiler doesn't need to keep that, and could therefore inline 2150 /// the host function if proven worthwhile during optimization. In the other 2151 /// hand, if emitting code for the device, the ID has to be the function 2152 /// address so that it can retrieved from the offloading entry and launched 2153 /// by the runtime library. We also mark the outlined function to have 2154 /// external linkage in case we are emitting code for the device, because 2155 /// these functions will be entry points to the device. 2156 /// 2157 /// \param InfoManager The info manager keeping track of the offload entries 2158 /// \param EntryInfo The entry information about the function 2159 /// \param GenerateFunctionCallback The callback function to generate the code 2160 /// \param OutlinedFunction Pointer to the outlined function 2161 /// \param EntryFnIDName Name of the ID o be created 2162 void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, 2163 FunctionGenCallback &GenerateFunctionCallback, 2164 bool IsOffloadEntry, Function *&OutlinedFn, 2165 Constant *&OutlinedFnID); 2166 2167 /// Registers the given function and sets up the attribtues of the function 2168 /// Returns the FunctionID. 2169 /// 2170 /// \param InfoManager The info manager keeping track of the offload entries 2171 /// \param EntryInfo The entry information about the function 2172 /// \param OutlinedFunction Pointer to the outlined function 2173 /// \param EntryFnName Name of the outlined function 2174 /// \param EntryFnIDName Name of the ID o be created 2175 Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, 2176 Function *OutlinedFunction, 2177 StringRef EntryFnName, 2178 StringRef EntryFnIDName); 2179 2180 /// Type of BodyGen to use for region codegen 2181 /// 2182 /// Priv: If device pointer privatization is required, emit the body of the 2183 /// region here. It will have to be duplicated: with and without 2184 /// privatization. 2185 /// DupNoPriv: If we need device pointer privatization, we need 2186 /// to emit the body of the region with no privatization in the 'else' branch 2187 /// of the conditional. 2188 /// NoPriv: If we don't require privatization of device 2189 /// pointers, we emit the body in between the runtime calls. This avoids 2190 /// duplicating the body code. 2191 enum BodyGenTy { Priv, DupNoPriv, NoPriv }; 2192 2193 /// Callback type for creating the map infos for the kernel parameters. 2194 /// \param CodeGenIP is the insertion point where code should be generated, 2195 /// if any. 2196 using GenMapInfoCallbackTy = 2197 function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; 2198 2199 /// Generator for '#omp target data' 2200 /// 2201 /// \param Loc The location where the target data construct was encountered. 2202 /// \param AllocaIP The insertion points to be used for alloca instructions. 2203 /// \param CodeGenIP The insertion point at which the target directive code 2204 /// should be placed. 2205 /// \param IsBegin If true then emits begin mapper call otherwise emits 2206 /// end mapper call. 2207 /// \param DeviceID Stores the DeviceID from the device clause. 2208 /// \param IfCond Value which corresponds to the if clause condition. 2209 /// \param Info Stores all information realted to the Target Data directive. 2210 /// \param GenMapInfoCB Callback that populates the MapInfos and returns. 2211 /// \param BodyGenCB Optional Callback to generate the region code. 2212 /// \param DeviceAddrCB Optional callback to generate code related to 2213 /// use_device_ptr and use_device_addr. 2214 /// \param CustomMapperCB Optional callback to generate code related to 2215 /// custom mappers. 2216 OpenMPIRBuilder::InsertPointTy createTargetData( 2217 const LocationDescription &Loc, InsertPointTy AllocaIP, 2218 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, 2219 TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, 2220 omp::RuntimeFunction *MapperFunc = nullptr, 2221 function_ref<InsertPointTy(InsertPointTy CodeGenIP, 2222 BodyGenTy BodyGenType)> 2223 BodyGenCB = nullptr, 2224 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, 2225 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr, 2226 Value *SrcLocInfo = nullptr); 2227 2228 using TargetBodyGenCallbackTy = function_ref<InsertPointTy( 2229 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; 2230 2231 using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointTy( 2232 Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, 2233 InsertPointTy CodeGenIP)>; 2234 2235 /// Generator for '#omp target' 2236 /// 2237 /// \param Loc where the target data construct was encountered. 2238 /// \param CodeGenIP The insertion point where the call to the outlined 2239 /// function should be emitted. 2240 /// \param EntryInfo The entry information about the function. 2241 /// \param NumTeams Number of teams specified in the num_teams clause. 2242 /// \param NumThreads Number of teams specified in the thread_limit clause. 2243 /// \param Inputs The input values to the region that will be passed. 2244 /// as arguments to the outlined function. 2245 /// \param BodyGenCB Callback that will generate the region code. 2246 /// \param ArgAccessorFuncCB Callback that will generate accessors 2247 /// instructions for passed in target arguments where neccessary 2248 InsertPointTy createTarget(const LocationDescription &Loc, 2249 OpenMPIRBuilder::InsertPointTy AllocaIP, 2250 OpenMPIRBuilder::InsertPointTy CodeGenIP, 2251 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, 2252 int32_t NumThreads, 2253 SmallVectorImpl<Value *> &Inputs, 2254 GenMapInfoCallbackTy GenMapInfoCB, 2255 TargetBodyGenCallbackTy BodyGenCB, 2256 TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB); 2257 2258 /// Returns __kmpc_for_static_init_* runtime function for the specified 2259 /// size \a IVSize and sign \a IVSigned. Will create a distribute call 2260 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set. 2261 FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, 2262 bool IsGPUDistribute); 2263 2264 /// Returns __kmpc_dispatch_init_* runtime function for the specified 2265 /// size \a IVSize and sign \a IVSigned. 2266 FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned); 2267 2268 /// Returns __kmpc_dispatch_next_* runtime function for the specified 2269 /// size \a IVSize and sign \a IVSigned. 2270 FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned); 2271 2272 /// Returns __kmpc_dispatch_fini_* runtime function for the specified 2273 /// size \a IVSize and sign \a IVSigned. 2274 FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned); 2275 2276 /// Declarations for LLVM-IR types (simple, array, function and structure) are 2277 /// generated below. Their names are defined and used in OpenMPKinds.def. Here 2278 /// we provide the declarations, the initializeTypes function will provide the 2279 /// values. 2280 /// 2281 ///{ 2282 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr; 2283 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 2284 ArrayType *VarName##Ty = nullptr; \ 2285 PointerType *VarName##PtrTy = nullptr; 2286 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 2287 FunctionType *VarName = nullptr; \ 2288 PointerType *VarName##Ptr = nullptr; 2289 #define OMP_STRUCT_TYPE(VarName, StrName, ...) \ 2290 StructType *VarName = nullptr; \ 2291 PointerType *VarName##Ptr = nullptr; 2292 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2293 2294 ///} 2295 2296 private: 2297 /// Create all simple and struct types exposed by the runtime and remember 2298 /// the llvm::PointerTypes of them for easy access later. 2299 void initializeTypes(Module &M); 2300 2301 /// Common interface for generating entry calls for OMP Directives. 2302 /// if the directive has a region/body, It will set the insertion 2303 /// point to the body 2304 /// 2305 /// \param OMPD Directive to generate entry blocks for 2306 /// \param EntryCall Call to the entry OMP Runtime Function 2307 /// \param ExitBB block where the region ends. 2308 /// \param Conditional indicate if the entry call result will be used 2309 /// to evaluate a conditional of whether a thread will execute 2310 /// body code or not. 2311 /// 2312 /// \return The insertion position in exit block 2313 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall, 2314 BasicBlock *ExitBB, 2315 bool Conditional = false); 2316 2317 /// Common interface to finalize the region 2318 /// 2319 /// \param OMPD Directive to generate exiting code for 2320 /// \param FinIP Insertion point for emitting Finalization code and exit call 2321 /// \param ExitCall Call to the ending OMP Runtime Function 2322 /// \param HasFinalize indicate if the directive will require finalization 2323 /// and has a finalization callback in the stack that 2324 /// should be called. 2325 /// 2326 /// \return The insertion position in exit block 2327 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, 2328 InsertPointTy FinIP, 2329 Instruction *ExitCall, 2330 bool HasFinalize = true); 2331 2332 /// Common Interface to generate OMP inlined regions 2333 /// 2334 /// \param OMPD Directive to generate inlined region for 2335 /// \param EntryCall Call to the entry OMP Runtime Function 2336 /// \param ExitCall Call to the ending OMP Runtime Function 2337 /// \param BodyGenCB Body code generation callback. 2338 /// \param FiniCB Finalization Callback. Will be called when finalizing region 2339 /// \param Conditional indicate if the entry call result will be used 2340 /// to evaluate a conditional of whether a thread will execute 2341 /// body code or not. 2342 /// \param HasFinalize indicate if the directive will require finalization 2343 /// and has a finalization callback in the stack that 2344 /// should be called. 2345 /// \param IsCancellable if HasFinalize is set to true, indicate if the 2346 /// the directive should be cancellable. 2347 /// \return The insertion point after the region 2348 2349 InsertPointTy 2350 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, 2351 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, 2352 FinalizeCallbackTy FiniCB, bool Conditional = false, 2353 bool HasFinalize = true, bool IsCancellable = false); 2354 2355 /// Get the platform-specific name separator. 2356 /// \param Parts different parts of the final name that needs separation 2357 /// \param FirstSeparator First separator used between the initial two 2358 /// parts of the name. 2359 /// \param Separator separator used between all of the rest consecutive 2360 /// parts of the name 2361 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, 2362 StringRef FirstSeparator, 2363 StringRef Separator); 2364 2365 /// Returns corresponding lock object for the specified critical region 2366 /// name. If the lock object does not exist it is created, otherwise the 2367 /// reference to the existing copy is returned. 2368 /// \param CriticalName Name of the critical region. 2369 /// 2370 Value *getOMPCriticalRegionLock(StringRef CriticalName); 2371 2372 /// Callback type for Atomic Expression update 2373 /// ex: 2374 /// \code{.cpp} 2375 /// unsigned x = 0; 2376 /// #pragma omp atomic update 2377 /// x = Expr(x_old); //Expr() is any legal operation 2378 /// \endcode 2379 /// 2380 /// \param XOld the value of the atomic memory address to use for update 2381 /// \param IRB reference to the IRBuilder to use 2382 /// 2383 /// \returns Value to update X to. 2384 using AtomicUpdateCallbackTy = 2385 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>; 2386 2387 private: 2388 enum AtomicKind { Read, Write, Update, Capture, Compare }; 2389 2390 /// Determine whether to emit flush or not 2391 /// 2392 /// \param Loc The insert and source location description. 2393 /// \param AO The required atomic ordering 2394 /// \param AK The OpenMP atomic operation kind used. 2395 /// 2396 /// \returns wether a flush was emitted or not 2397 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc, 2398 AtomicOrdering AO, AtomicKind AK); 2399 2400 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 2401 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 2402 /// Only Scalar data types. 2403 /// 2404 /// \param AllocaIP The insertion point to be used for alloca 2405 /// instructions. 2406 /// \param X The target atomic pointer to be updated 2407 /// \param XElemTy The element type of the atomic pointer. 2408 /// \param Expr The value to update X with. 2409 /// \param AO Atomic ordering of the generated atomic 2410 /// instructions. 2411 /// \param RMWOp The binary operation used for update. If 2412 /// operation is not supported by atomicRMW, 2413 /// or belong to {FADD, FSUB, BAD_BINOP}. 2414 /// Then a `cmpExch` based atomic will be generated. 2415 /// \param UpdateOp Code generator for complex expressions that cannot be 2416 /// expressed through atomicrmw instruction. 2417 /// \param VolatileX true if \a X volatile? 2418 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 2419 /// update expression, false otherwise. 2420 /// (e.g. true for X = X BinOp Expr) 2421 /// 2422 /// \returns A pair of the old value of X before the update, and the value 2423 /// used for the update. 2424 std::pair<Value *, Value *> 2425 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, 2426 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2427 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, 2428 bool IsXBinopExpr); 2429 2430 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . 2431 /// 2432 /// \Return The instruction 2433 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, 2434 AtomicRMWInst::BinOp RMWOp); 2435 2436 public: 2437 /// a struct to pack relevant information while generating atomic Ops 2438 struct AtomicOpValue { 2439 Value *Var = nullptr; 2440 Type *ElemTy = nullptr; 2441 bool IsSigned = false; 2442 bool IsVolatile = false; 2443 }; 2444 2445 /// Emit atomic Read for : V = X --- Only Scalar data types. 2446 /// 2447 /// \param Loc The insert and source location description. 2448 /// \param X The target pointer to be atomically read 2449 /// \param V Memory address where to store atomically read 2450 /// value 2451 /// \param AO Atomic ordering of the generated atomic 2452 /// instructions. 2453 /// 2454 /// \return Insertion point after generated atomic read IR. 2455 InsertPointTy createAtomicRead(const LocationDescription &Loc, 2456 AtomicOpValue &X, AtomicOpValue &V, 2457 AtomicOrdering AO); 2458 2459 /// Emit atomic write for : X = Expr --- Only Scalar data types. 2460 /// 2461 /// \param Loc The insert and source location description. 2462 /// \param X The target pointer to be atomically written to 2463 /// \param Expr The value to store. 2464 /// \param AO Atomic ordering of the generated atomic 2465 /// instructions. 2466 /// 2467 /// \return Insertion point after generated atomic Write IR. 2468 InsertPointTy createAtomicWrite(const LocationDescription &Loc, 2469 AtomicOpValue &X, Value *Expr, 2470 AtomicOrdering AO); 2471 2472 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X 2473 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) 2474 /// Only Scalar data types. 2475 /// 2476 /// \param Loc The insert and source location description. 2477 /// \param AllocaIP The insertion point to be used for alloca instructions. 2478 /// \param X The target atomic pointer to be updated 2479 /// \param Expr The value to update X with. 2480 /// \param AO Atomic ordering of the generated atomic instructions. 2481 /// \param RMWOp The binary operation used for update. If operation 2482 /// is not supported by atomicRMW, or belong to 2483 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based 2484 /// atomic will be generated. 2485 /// \param UpdateOp Code generator for complex expressions that cannot be 2486 /// expressed through atomicrmw instruction. 2487 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the 2488 /// update expression, false otherwise. 2489 /// (e.g. true for X = X BinOp Expr) 2490 /// 2491 /// \return Insertion point after generated atomic update IR. 2492 InsertPointTy createAtomicUpdate(const LocationDescription &Loc, 2493 InsertPointTy AllocaIP, AtomicOpValue &X, 2494 Value *Expr, AtomicOrdering AO, 2495 AtomicRMWInst::BinOp RMWOp, 2496 AtomicUpdateCallbackTy &UpdateOp, 2497 bool IsXBinopExpr); 2498 2499 /// Emit atomic update for constructs: --- Only Scalar data types 2500 /// V = X; X = X BinOp Expr , 2501 /// X = X BinOp Expr; V = X, 2502 /// V = X; X = Expr BinOp X, 2503 /// X = Expr BinOp X; V = X, 2504 /// V = X; X = UpdateOp(X), 2505 /// X = UpdateOp(X); V = X, 2506 /// 2507 /// \param Loc The insert and source location description. 2508 /// \param AllocaIP The insertion point to be used for alloca instructions. 2509 /// \param X The target atomic pointer to be updated 2510 /// \param V Memory address where to store captured value 2511 /// \param Expr The value to update X with. 2512 /// \param AO Atomic ordering of the generated atomic instructions 2513 /// \param RMWOp The binary operation used for update. If 2514 /// operation is not supported by atomicRMW, or belong to 2515 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based 2516 /// atomic will be generated. 2517 /// \param UpdateOp Code generator for complex expressions that cannot be 2518 /// expressed through atomicrmw instruction. 2519 /// \param UpdateExpr true if X is an in place update of the form 2520 /// X = X BinOp Expr or X = Expr BinOp X 2521 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the 2522 /// update expression, false otherwise. 2523 /// (e.g. true for X = X BinOp Expr) 2524 /// \param IsPostfixUpdate true if original value of 'x' must be stored in 2525 /// 'v', not an updated one. 2526 /// 2527 /// \return Insertion point after generated atomic capture IR. 2528 InsertPointTy 2529 createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, 2530 AtomicOpValue &X, AtomicOpValue &V, Value *Expr, 2531 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2532 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, 2533 bool IsPostfixUpdate, bool IsXBinopExpr); 2534 2535 /// Emit atomic compare for constructs: --- Only scalar data types 2536 /// cond-expr-stmt: 2537 /// x = x ordop expr ? expr : x; 2538 /// x = expr ordop x ? expr : x; 2539 /// x = x == e ? d : x; 2540 /// x = e == x ? d : x; (this one is not in the spec) 2541 /// cond-update-stmt: 2542 /// if (x ordop expr) { x = expr; } 2543 /// if (expr ordop x) { x = expr; } 2544 /// if (x == e) { x = d; } 2545 /// if (e == x) { x = d; } (this one is not in the spec) 2546 /// conditional-update-capture-atomic: 2547 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) 2548 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) 2549 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, 2550 /// IsFailOnly=true) 2551 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) 2552 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, 2553 /// IsFailOnly=true) 2554 /// 2555 /// \param Loc The insert and source location description. 2556 /// \param X The target atomic pointer to be updated. 2557 /// \param V Memory address where to store captured value (for 2558 /// compare capture only). 2559 /// \param R Memory address where to store comparison result 2560 /// (for compare capture with '==' only). 2561 /// \param E The expected value ('e') for forms that use an 2562 /// equality comparison or an expression ('expr') for 2563 /// forms that use 'ordop' (logically an atomic maximum or 2564 /// minimum). 2565 /// \param D The desired value for forms that use an equality 2566 /// comparison. If forms that use 'ordop', it should be 2567 /// \p nullptr. 2568 /// \param AO Atomic ordering of the generated atomic instructions. 2569 /// \param Op Atomic compare operation. It can only be ==, <, or >. 2570 /// \param IsXBinopExpr True if the conditional statement is in the form where 2571 /// x is on LHS. It only matters for < or >. 2572 /// \param IsPostfixUpdate True if original value of 'x' must be stored in 2573 /// 'v', not an updated one (for compare capture 2574 /// only). 2575 /// \param IsFailOnly True if the original value of 'x' is stored to 'v' 2576 /// only when the comparison fails. This is only valid for 2577 /// the case the comparison is '=='. 2578 /// 2579 /// \return Insertion point after generated atomic capture IR. 2580 InsertPointTy 2581 createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, 2582 AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, 2583 AtomicOrdering AO, omp::OMPAtomicCompareOp Op, 2584 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); 2585 InsertPointTy createAtomicCompare(const LocationDescription &Loc, 2586 AtomicOpValue &X, AtomicOpValue &V, 2587 AtomicOpValue &R, Value *E, Value *D, 2588 AtomicOrdering AO, 2589 omp::OMPAtomicCompareOp Op, 2590 bool IsXBinopExpr, bool IsPostfixUpdate, 2591 bool IsFailOnly, AtomicOrdering Failure); 2592 2593 /// Create the control flow structure of a canonical OpenMP loop. 2594 /// 2595 /// The emitted loop will be disconnected, i.e. no edge to the loop's 2596 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's 2597 /// IRBuilder location is not preserved. 2598 /// 2599 /// \param DL DebugLoc used for the instructions in the skeleton. 2600 /// \param TripCount Value to be used for the trip count. 2601 /// \param F Function in which to insert the BasicBlocks. 2602 /// \param PreInsertBefore Where to insert BBs that execute before the body, 2603 /// typically the body itself. 2604 /// \param PostInsertBefore Where to insert BBs that execute after the body. 2605 /// \param Name Base name used to derive BB 2606 /// and instruction names. 2607 /// 2608 /// \returns The CanonicalLoopInfo that represents the emitted loop. 2609 CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, 2610 Function *F, 2611 BasicBlock *PreInsertBefore, 2612 BasicBlock *PostInsertBefore, 2613 const Twine &Name = {}); 2614 /// OMP Offload Info Metadata name string 2615 const std::string ompOffloadInfoName = "omp_offload.info"; 2616 2617 /// Loads all the offload entries information from the host IR 2618 /// metadata. This function is only meant to be used with device code 2619 /// generation. 2620 /// 2621 /// \param M Module to load Metadata info from. Module passed maybe 2622 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. 2623 void loadOffloadInfoMetadata(Module &M); 2624 2625 /// Loads all the offload entries information from the host IR 2626 /// metadata read from the file passed in as the HostFilePath argument. This 2627 /// function is only meant to be used with device code generation. 2628 /// 2629 /// \param HostFilePath The path to the host IR file, 2630 /// used to load in offload metadata for the device, allowing host and device 2631 /// to maintain the same metadata mapping. 2632 void loadOffloadInfoMetadata(StringRef HostFilePath); 2633 2634 /// Gets (if variable with the given name already exist) or creates 2635 /// internal global variable with the specified Name. The created variable has 2636 /// linkage CommonLinkage by default and is initialized by null value. 2637 /// \param Ty Type of the global variable. If it is exist already the type 2638 /// must be the same. 2639 /// \param Name Name of the variable. 2640 GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name, 2641 unsigned AddressSpace = 0); 2642 }; 2643 2644 /// Class to represented the control flow structure of an OpenMP canonical loop. 2645 /// 2646 /// The control-flow structure is standardized for easy consumption by 2647 /// directives associated with loops. For instance, the worksharing-loop 2648 /// construct may change this control flow such that each loop iteration is 2649 /// executed on only one thread. The constraints of a canonical loop in brief 2650 /// are: 2651 /// 2652 /// * The number of loop iterations must have been computed before entering the 2653 /// loop. 2654 /// 2655 /// * Has an (unsigned) logical induction variable that starts at zero and 2656 /// increments by one. 2657 /// 2658 /// * The loop's CFG itself has no side-effects. The OpenMP specification 2659 /// itself allows side-effects, but the order in which they happen, including 2660 /// how often or whether at all, is unspecified. We expect that the frontend 2661 /// will emit those side-effect instructions somewhere (e.g. before the loop) 2662 /// such that the CanonicalLoopInfo itself can be side-effect free. 2663 /// 2664 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated 2665 /// execution of a loop body that satifies these constraints. It does NOT 2666 /// represent arbitrary SESE regions that happen to contain a loop. Do not use 2667 /// CanonicalLoopInfo for such purposes. 2668 /// 2669 /// The control flow can be described as follows: 2670 /// 2671 /// Preheader 2672 /// | 2673 /// /-> Header 2674 /// | | 2675 /// | Cond---\ 2676 /// | | | 2677 /// | Body | 2678 /// | | | | 2679 /// | <...> | 2680 /// | | | | 2681 /// \--Latch | 2682 /// | 2683 /// Exit 2684 /// | 2685 /// After 2686 /// 2687 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator, 2688 /// including) and end at AfterIP (at the After's first instruction, excluding). 2689 /// That is, instructions in the Preheader and After blocks (except the 2690 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have 2691 /// side-effects. Typically, the Preheader is used to compute the loop's trip 2692 /// count. The instructions from BodyIP (at the Body block's first instruction, 2693 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's 2694 /// control and thus can have side-effects. The body block is the single entry 2695 /// point into the loop body, which may contain arbitrary control flow as long 2696 /// as all control paths eventually branch to the Latch block. 2697 /// 2698 /// TODO: Consider adding another standardized BasicBlock between Body CFG and 2699 /// Latch to guarantee that there is only a single edge to the latch. It would 2700 /// make loop transformations easier to not needing to consider multiple 2701 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us 2702 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that 2703 /// executes after each body iteration. 2704 /// 2705 /// There must be no loop-carried dependencies through llvm::Values. This is 2706 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is 2707 /// for the induction variable. 2708 /// 2709 /// All code in Header, Cond, Latch and Exit (plus the terminator of the 2710 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked 2711 /// by assertOK(). They are expected to not be modified unless explicitly 2712 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP 2713 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, 2714 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its 2715 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used 2716 /// anymore as its underlying control flow may not exist anymore. 2717 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop 2718 /// may also return a new CanonicalLoopInfo that can be passed to other 2719 /// loop-associated construct implementing methods. These loop-transforming 2720 /// methods may either create a new CanonicalLoopInfo usually using 2721 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and 2722 /// modify one of the input CanonicalLoopInfo and return it as representing the 2723 /// modified loop. What is done is an implementation detail of 2724 /// transformation-implementing method and callers should always assume that the 2725 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned. 2726 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one 2727 /// created by createCanonicalLoop, such that transforming methods do not have 2728 /// to special case where the CanonicalLoopInfo originated from. 2729 /// 2730 /// Generally, methods consuming CanonicalLoopInfo do not need an 2731 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the 2732 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless 2733 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate 2734 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, 2735 /// any InsertPoint in the Preheader, After or Block can still be used after 2736 /// calling such a method. 2737 /// 2738 /// TODO: Provide mechanisms for exception handling and cancellation points. 2739 /// 2740 /// Defined outside OpenMPIRBuilder because nested classes cannot be 2741 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. 2742 class CanonicalLoopInfo { 2743 friend class OpenMPIRBuilder; 2744 2745 private: 2746 BasicBlock *Header = nullptr; 2747 BasicBlock *Cond = nullptr; 2748 BasicBlock *Latch = nullptr; 2749 BasicBlock *Exit = nullptr; 2750 2751 /// Add the control blocks of this loop to \p BBs. 2752 /// 2753 /// This does not include any block from the body, including the one returned 2754 /// by getBody(). 2755 /// 2756 /// FIXME: This currently includes the Preheader and After blocks even though 2757 /// their content is (mostly) not under CanonicalLoopInfo's control. 2758 /// Re-evaluated whether this makes sense. 2759 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); 2760 2761 /// Sets the number of loop iterations to the given value. This value must be 2762 /// valid in the condition block (i.e., defined in the preheader) and is 2763 /// interpreted as an unsigned integer. 2764 void setTripCount(Value *TripCount); 2765 2766 /// Replace all uses of the canonical induction variable in the loop body with 2767 /// a new one. 2768 /// 2769 /// The intended use case is to update the induction variable for an updated 2770 /// iteration space such that it can stay normalized in the 0...tripcount-1 2771 /// range. 2772 /// 2773 /// The \p Updater is called with the (presumable updated) current normalized 2774 /// induction variable and is expected to return the value that uses of the 2775 /// pre-updated induction values should use instead, typically dependent on 2776 /// the new induction variable. This is a lambda (instead of e.g. just passing 2777 /// the new value) to be able to distinguish the uses of the pre-updated 2778 /// induction variable and uses of the induction varible to compute the 2779 /// updated induction variable value. 2780 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater); 2781 2782 public: 2783 /// Returns whether this object currently represents the IR of a loop. If 2784 /// returning false, it may have been consumed by a loop transformation or not 2785 /// been intialized. Do not use in this case; isValid()2786 bool isValid() const { return Header; } 2787 2788 /// The preheader ensures that there is only a single edge entering the loop. 2789 /// Code that must be execute before any loop iteration can be emitted here, 2790 /// such as computing the loop trip count and begin lifetime markers. Code in 2791 /// the preheader is not considered part of the canonical loop. 2792 BasicBlock *getPreheader() const; 2793 2794 /// The header is the entry for each iteration. In the canonical control flow, 2795 /// it only contains the PHINode for the induction variable. getHeader()2796 BasicBlock *getHeader() const { 2797 assert(isValid() && "Requires a valid canonical loop"); 2798 return Header; 2799 } 2800 2801 /// The condition block computes whether there is another loop iteration. If 2802 /// yes, branches to the body; otherwise to the exit block. getCond()2803 BasicBlock *getCond() const { 2804 assert(isValid() && "Requires a valid canonical loop"); 2805 return Cond; 2806 } 2807 2808 /// The body block is the single entry for a loop iteration and not controlled 2809 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must 2810 /// eventually branch to the \p Latch block. getBody()2811 BasicBlock *getBody() const { 2812 assert(isValid() && "Requires a valid canonical loop"); 2813 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0); 2814 } 2815 2816 /// Reaching the latch indicates the end of the loop body code. In the 2817 /// canonical control flow, it only contains the increment of the induction 2818 /// variable. getLatch()2819 BasicBlock *getLatch() const { 2820 assert(isValid() && "Requires a valid canonical loop"); 2821 return Latch; 2822 } 2823 2824 /// Reaching the exit indicates no more iterations are being executed. getExit()2825 BasicBlock *getExit() const { 2826 assert(isValid() && "Requires a valid canonical loop"); 2827 return Exit; 2828 } 2829 2830 /// The after block is intended for clean-up code such as lifetime end 2831 /// markers. It is separate from the exit block to ensure, analogous to the 2832 /// preheader, it having just a single entry edge and being free from PHI 2833 /// nodes should there be multiple loop exits (such as from break 2834 /// statements/cancellations). getAfter()2835 BasicBlock *getAfter() const { 2836 assert(isValid() && "Requires a valid canonical loop"); 2837 return Exit->getSingleSuccessor(); 2838 } 2839 2840 /// Returns the llvm::Value containing the number of loop iterations. It must 2841 /// be valid in the preheader and always interpreted as an unsigned integer of 2842 /// any bit-width. getTripCount()2843 Value *getTripCount() const { 2844 assert(isValid() && "Requires a valid canonical loop"); 2845 Instruction *CmpI = &Cond->front(); 2846 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 2847 return CmpI->getOperand(1); 2848 } 2849 2850 /// Returns the instruction representing the current logical induction 2851 /// variable. Always unsigned, always starting at 0 with an increment of one. getIndVar()2852 Instruction *getIndVar() const { 2853 assert(isValid() && "Requires a valid canonical loop"); 2854 Instruction *IndVarPHI = &Header->front(); 2855 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI"); 2856 return IndVarPHI; 2857 } 2858 2859 /// Return the type of the induction variable (and the trip count). getIndVarType()2860 Type *getIndVarType() const { 2861 assert(isValid() && "Requires a valid canonical loop"); 2862 return getIndVar()->getType(); 2863 } 2864 2865 /// Return the insertion point for user code before the loop. getPreheaderIP()2866 OpenMPIRBuilder::InsertPointTy getPreheaderIP() const { 2867 assert(isValid() && "Requires a valid canonical loop"); 2868 BasicBlock *Preheader = getPreheader(); 2869 return {Preheader, std::prev(Preheader->end())}; 2870 }; 2871 2872 /// Return the insertion point for user code in the body. getBodyIP()2873 OpenMPIRBuilder::InsertPointTy getBodyIP() const { 2874 assert(isValid() && "Requires a valid canonical loop"); 2875 BasicBlock *Body = getBody(); 2876 return {Body, Body->begin()}; 2877 }; 2878 2879 /// Return the insertion point for user code after the loop. getAfterIP()2880 OpenMPIRBuilder::InsertPointTy getAfterIP() const { 2881 assert(isValid() && "Requires a valid canonical loop"); 2882 BasicBlock *After = getAfter(); 2883 return {After, After->begin()}; 2884 }; 2885 getFunction()2886 Function *getFunction() const { 2887 assert(isValid() && "Requires a valid canonical loop"); 2888 return Header->getParent(); 2889 } 2890 2891 /// Consistency self-check. 2892 void assertOK() const; 2893 2894 /// Invalidate this loop. That is, the underlying IR does not fulfill the 2895 /// requirements of an OpenMP canonical loop anymore. 2896 void invalidate(); 2897 }; 2898 2899 } // end namespace llvm 2900 2901 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H 2902