• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient
10 // way to create LLVM instructions for OpenMP directives.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16 
17 #include "llvm/Analysis/MemorySSAUpdater.h"
18 #include "llvm/Frontend/OpenMP/OMPConstants.h"
19 #include "llvm/IR/DebugLoc.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/Support/Allocator.h"
22 #include "llvm/TargetParser/Triple.h"
23 #include <forward_list>
24 #include <map>
25 #include <optional>
26 
27 namespace llvm {
28 class CanonicalLoopInfo;
29 struct TargetRegionEntryInfo;
30 class OffloadEntriesInfoManager;
31 class OpenMPIRBuilder;
32 
33 /// Move the instruction after an InsertPoint to the beginning of another
34 /// BasicBlock.
35 ///
36 /// The instructions after \p IP are moved to the beginning of \p New which must
37 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
38 /// \p New will be added such that there is no semantic change. Otherwise, the
39 /// \p IP insert block remains degenerate and it is up to the caller to insert a
40 /// terminator.
41 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
42               bool CreateBranch);
43 
44 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
45 /// insert location will stick to after the instruction before the insertion
46 /// point (instead of moving with the instruction the InsertPoint stores
47 /// internally).
48 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
49 
50 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate
51 /// (missing the terminator).
52 ///
53 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
54 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
55 /// is true, a branch to the new successor will new created such that
56 /// semantically there is no change; otherwise the block of the insertion point
57 /// remains degenerate and it is the caller's responsibility to insert a
58 /// terminator. Returns the new successor block.
59 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
60                     llvm::Twine Name = {});
61 
62 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
63 /// degenerate (missing the terminator).  Its new insert location will stick to
64 /// after the instruction before the insertion point (instead of moving with the
65 /// instruction the InsertPoint stores internally).
66 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
67                     llvm::Twine Name = {});
68 
69 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
70 /// degenerate (missing the terminator).  Its new insert location will stick to
71 /// after the instruction before the insertion point (instead of moving with the
72 /// instruction the InsertPoint stores internally).
73 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
74 
75 /// Like splitBB, but reuses the current block's name for the new name.
76 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
77                               llvm::Twine Suffix = ".split");
78 
79 /// Captures attributes that affect generating LLVM-IR using the
80 /// OpenMPIRBuilder and related classes. Note that not all attributes are
81 /// required for all classes or functions. In some use cases the configuration
82 /// is not necessary at all, because because the only functions that are called
83 /// are ones that are not dependent on the configuration.
84 class OpenMPIRBuilderConfig {
85 public:
86   /// Flag to define whether to generate code for the role of the OpenMP host
87   /// (if set to false) or device (if set to true) in an offloading context. It
88   /// is set when the -fopenmp-is-target-device compiler frontend option is
89   /// specified.
90   std::optional<bool> IsTargetDevice;
91 
92   /// Flag for specifying if the compilation is done for an accelerator. It is
93   /// set according to the architecture of the target triple and currently only
94   /// true when targeting AMDGPU or NVPTX. Today, these targets can only perform
95   /// the role of an OpenMP target device, so `IsTargetDevice` must also be true
96   /// if `IsGPU` is true. This restriction might be lifted if an accelerator-
97   /// like target with the ability to work as the OpenMP host is added, or if
98   /// the capabilities of the currently supported GPU architectures are
99   /// expanded.
100   std::optional<bool> IsGPU;
101 
102   // Flag for specifying if offloading is mandatory.
103   std::optional<bool> OpenMPOffloadMandatory;
104 
105   /// First separator used between the initial two parts of a name.
106   std::optional<StringRef> FirstSeparator;
107   /// Separator used between all of the rest consecutive parts of s name
108   std::optional<StringRef> Separator;
109 
110   OpenMPIRBuilderConfig();
111   OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
112                         bool OpenMPOffloadMandatory,
113                         bool HasRequiresReverseOffload,
114                         bool HasRequiresUnifiedAddress,
115                         bool HasRequiresUnifiedSharedMemory,
116                         bool HasRequiresDynamicAllocators);
117 
118   // Getters functions that assert if the required values are not present.
isTargetDevice()119   bool isTargetDevice() const {
120     assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
121     return *IsTargetDevice;
122   }
123 
isGPU()124   bool isGPU() const {
125     assert(IsGPU.has_value() && "IsGPU is not set");
126     return *IsGPU;
127   }
128 
openMPOffloadMandatory()129   bool openMPOffloadMandatory() const {
130     assert(OpenMPOffloadMandatory.has_value() &&
131            "OpenMPOffloadMandatory is not set");
132     return *OpenMPOffloadMandatory;
133   }
134 
hasRequiresFlags()135   bool hasRequiresFlags() const { return RequiresFlags; }
136   bool hasRequiresReverseOffload() const;
137   bool hasRequiresUnifiedAddress() const;
138   bool hasRequiresUnifiedSharedMemory() const;
139   bool hasRequiresDynamicAllocators() const;
140 
141   /// Returns requires directive clauses as flags compatible with those expected
142   /// by libomptarget.
143   int64_t getRequiresFlags() const;
144 
145   // Returns the FirstSeparator if set, otherwise use the default separator
146   // depending on isGPU
firstSeparator()147   StringRef firstSeparator() const {
148     if (FirstSeparator.has_value())
149       return *FirstSeparator;
150     if (isGPU())
151       return "_";
152     return ".";
153   }
154 
155   // Returns the Separator if set, otherwise use the default separator depending
156   // on isGPU
separator()157   StringRef separator() const {
158     if (Separator.has_value())
159       return *Separator;
160     if (isGPU())
161       return "$";
162     return ".";
163   }
164 
setIsTargetDevice(bool Value)165   void setIsTargetDevice(bool Value) { IsTargetDevice = Value; }
setIsGPU(bool Value)166   void setIsGPU(bool Value) { IsGPU = Value; }
setOpenMPOffloadMandatory(bool Value)167   void setOpenMPOffloadMandatory(bool Value) { OpenMPOffloadMandatory = Value; }
setFirstSeparator(StringRef FS)168   void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
setSeparator(StringRef S)169   void setSeparator(StringRef S) { Separator = S; }
170 
171   void setHasRequiresReverseOffload(bool Value);
172   void setHasRequiresUnifiedAddress(bool Value);
173   void setHasRequiresUnifiedSharedMemory(bool Value);
174   void setHasRequiresDynamicAllocators(bool Value);
175 
176 private:
177   /// Flags for specifying which requires directive clauses are present.
178   int64_t RequiresFlags;
179 };
180 
181 /// Data structure to contain the information needed to uniquely identify
182 /// a target entry.
183 struct TargetRegionEntryInfo {
184   std::string ParentName;
185   unsigned DeviceID;
186   unsigned FileID;
187   unsigned Line;
188   unsigned Count;
189 
TargetRegionEntryInfoTargetRegionEntryInfo190   TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {}
191   TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
192                         unsigned FileID, unsigned Line, unsigned Count = 0)
ParentNameTargetRegionEntryInfo193       : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
194         Count(Count) {}
195 
196   static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
197                                          StringRef ParentName,
198                                          unsigned DeviceID, unsigned FileID,
199                                          unsigned Line, unsigned Count);
200 
201   bool operator<(const TargetRegionEntryInfo RHS) const {
202     return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
203            std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
204                            RHS.Count);
205   }
206 };
207 
208 /// Class that manages information about offload code regions and data
209 class OffloadEntriesInfoManager {
210   /// Number of entries registered so far.
211   OpenMPIRBuilder *OMPBuilder;
212   unsigned OffloadingEntriesNum = 0;
213 
214 public:
215   /// Base class of the entries info.
216   class OffloadEntryInfo {
217   public:
218     /// Kind of a given entry.
219     enum OffloadingEntryInfoKinds : unsigned {
220       /// Entry is a target region.
221       OffloadingEntryInfoTargetRegion = 0,
222       /// Entry is a declare target variable.
223       OffloadingEntryInfoDeviceGlobalVar = 1,
224       /// Invalid entry info.
225       OffloadingEntryInfoInvalid = ~0u
226     };
227 
228   protected:
229     OffloadEntryInfo() = delete;
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)230     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
OffloadEntryInfo(OffloadingEntryInfoKinds Kind,unsigned Order,uint32_t Flags)231     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
232                               uint32_t Flags)
233         : Flags(Flags), Order(Order), Kind(Kind) {}
234     ~OffloadEntryInfo() = default;
235 
236   public:
isValid()237     bool isValid() const { return Order != ~0u; }
getOrder()238     unsigned getOrder() const { return Order; }
getKind()239     OffloadingEntryInfoKinds getKind() const { return Kind; }
getFlags()240     uint32_t getFlags() const { return Flags; }
setFlags(uint32_t NewFlags)241     void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
getAddress()242     Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
setAddress(Constant * V)243     void setAddress(Constant *V) {
244       assert(!Addr.pointsToAliveValue() && "Address has been set before!");
245       Addr = V;
246     }
classof(const OffloadEntryInfo * Info)247     static bool classof(const OffloadEntryInfo *Info) { return true; }
248 
249   private:
250     /// Address of the entity that has to be mapped for offloading.
251     WeakTrackingVH Addr;
252 
253     /// Flags associated with the device global.
254     uint32_t Flags = 0u;
255 
256     /// Order this entry was emitted.
257     unsigned Order = ~0u;
258 
259     OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
260   };
261 
262   /// Return true if a there are no entries defined.
263   bool empty() const;
264   /// Return number of entries defined so far.
size()265   unsigned size() const { return OffloadingEntriesNum; }
266 
OffloadEntriesInfoManager(OpenMPIRBuilder * builder)267   OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
268 
269   //
270   // Target region entries related.
271   //
272 
273   /// Kind of the target registry entry.
274   enum OMPTargetRegionEntryKind : uint32_t {
275     /// Mark the entry as target region.
276     OMPTargetRegionEntryTargetRegion = 0x0,
277   };
278 
279   /// Target region entries info.
280   class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
281     /// Address that can be used as the ID of the entry.
282     Constant *ID = nullptr;
283 
284   public:
OffloadEntryInfoTargetRegion()285     OffloadEntryInfoTargetRegion()
286         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
OffloadEntryInfoTargetRegion(unsigned Order,Constant * Addr,Constant * ID,OMPTargetRegionEntryKind Flags)287     explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
288                                           Constant *ID,
289                                           OMPTargetRegionEntryKind Flags)
290         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
291           ID(ID) {
292       setAddress(Addr);
293     }
294 
getID()295     Constant *getID() const { return ID; }
setID(Constant * V)296     void setID(Constant *V) {
297       assert(!ID && "ID has been set before!");
298       ID = V;
299     }
classof(const OffloadEntryInfo * Info)300     static bool classof(const OffloadEntryInfo *Info) {
301       return Info->getKind() == OffloadingEntryInfoTargetRegion;
302     }
303   };
304 
305   /// Initialize target region entry.
306   /// This is ONLY needed for DEVICE compilation.
307   void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
308                                        unsigned Order);
309   /// Register target region entry.
310   void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
311                                      Constant *Addr, Constant *ID,
312                                      OMPTargetRegionEntryKind Flags);
313   /// Return true if a target region entry with the provided information
314   /// exists.
315   bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
316                                 bool IgnoreAddressId = false) const;
317 
318   // Return the Name based on \a EntryInfo using the next available Count.
319   void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
320                                   const TargetRegionEntryInfo &EntryInfo);
321 
322   /// brief Applies action \a Action on all registered entries.
323   typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
324                             const OffloadEntryInfoTargetRegion &)>
325       OffloadTargetRegionEntryInfoActTy;
326   void
327   actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
328 
329   //
330   // Device global variable entries related.
331   //
332 
333   /// Kind of the global variable entry..
334   enum OMPTargetGlobalVarEntryKind : uint32_t {
335     /// Mark the entry as a to declare target.
336     OMPTargetGlobalVarEntryTo = 0x0,
337     /// Mark the entry as a to declare target link.
338     OMPTargetGlobalVarEntryLink = 0x1,
339     /// Mark the entry as a declare target enter.
340     OMPTargetGlobalVarEntryEnter = 0x2,
341     /// Mark the entry as having no declare target entry kind.
342     OMPTargetGlobalVarEntryNone = 0x3,
343     /// Mark the entry as a declare target indirect global.
344     OMPTargetGlobalVarEntryIndirect = 0x8,
345     /// Mark the entry as a register requires global.
346     OMPTargetGlobalRegisterRequires = 0x10,
347   };
348 
349   /// Kind of device clause for declare target variables
350   /// and functions
351   /// NOTE: Currently not used as a part of a variable entry
352   /// used for Flang and Clang to interface with the variable
353   /// related registration functions
354   enum OMPTargetDeviceClauseKind : uint32_t {
355     /// The target is marked for all devices
356     OMPTargetDeviceClauseAny = 0x0,
357     /// The target is marked for non-host devices
358     OMPTargetDeviceClauseNoHost = 0x1,
359     /// The target is marked for host devices
360     OMPTargetDeviceClauseHost = 0x2,
361     /// The target is marked as having no clause
362     OMPTargetDeviceClauseNone = 0x3
363   };
364 
365   /// Device global variable entries info.
366   class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
367     /// Type of the global variable.
368     int64_t VarSize;
369     GlobalValue::LinkageTypes Linkage;
370     const std::string VarName;
371 
372   public:
OffloadEntryInfoDeviceGlobalVar()373     OffloadEntryInfoDeviceGlobalVar()
374         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
OffloadEntryInfoDeviceGlobalVar(unsigned Order,OMPTargetGlobalVarEntryKind Flags)375     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
376                                              OMPTargetGlobalVarEntryKind Flags)
377         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
OffloadEntryInfoDeviceGlobalVar(unsigned Order,Constant * Addr,int64_t VarSize,OMPTargetGlobalVarEntryKind Flags,GlobalValue::LinkageTypes Linkage,const std::string & VarName)378     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
379                                              int64_t VarSize,
380                                              OMPTargetGlobalVarEntryKind Flags,
381                                              GlobalValue::LinkageTypes Linkage,
382                                              const std::string &VarName)
383         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
384           VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
385       setAddress(Addr);
386     }
387 
getVarSize()388     int64_t getVarSize() const { return VarSize; }
getVarName()389     StringRef getVarName() const { return VarName; }
setVarSize(int64_t Size)390     void setVarSize(int64_t Size) { VarSize = Size; }
getLinkage()391     GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
setLinkage(GlobalValue::LinkageTypes LT)392     void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
classof(const OffloadEntryInfo * Info)393     static bool classof(const OffloadEntryInfo *Info) {
394       return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
395     }
396   };
397 
398   /// Initialize device global variable entry.
399   /// This is ONLY used for DEVICE compilation.
400   void initializeDeviceGlobalVarEntryInfo(StringRef Name,
401                                           OMPTargetGlobalVarEntryKind Flags,
402                                           unsigned Order);
403 
404   /// Register device global variable entry.
405   void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
406                                         int64_t VarSize,
407                                         OMPTargetGlobalVarEntryKind Flags,
408                                         GlobalValue::LinkageTypes Linkage);
409   /// Checks if the variable with the given name has been registered already.
hasDeviceGlobalVarEntryInfo(StringRef VarName)410   bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
411     return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
412   }
413   /// Applies action \a Action on all registered entries.
414   typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
415       OffloadDeviceGlobalVarEntryInfoActTy;
416   void actOnDeviceGlobalVarEntriesInfo(
417       const OffloadDeviceGlobalVarEntryInfoActTy &Action);
418 
419 private:
420   /// Return the count of entries at a particular source location.
421   unsigned
422   getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
423 
424   /// Update the count of entries at a particular source location.
425   void
426   incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
427 
428   static TargetRegionEntryInfo
getTargetRegionEntryCountKey(const TargetRegionEntryInfo & EntryInfo)429   getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
430     return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
431                                  EntryInfo.FileID, EntryInfo.Line, 0);
432   }
433 
434   // Count of entries at a location.
435   std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
436 
437   // Storage for target region entries kind.
438   typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
439       OffloadEntriesTargetRegionTy;
440   OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
441   /// Storage for device global variable entries kind. The storage is to be
442   /// indexed by mangled name.
443   typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
444       OffloadEntriesDeviceGlobalVarTy;
445   OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
446 };
447 
448 /// An interface to create LLVM-IR for OpenMP directives.
449 ///
450 /// Each OpenMP directive has a corresponding public generator method.
451 class OpenMPIRBuilder {
452 public:
453   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
454   /// not have an effect on \p M (see initialize)
OpenMPIRBuilder(Module & M)455   OpenMPIRBuilder(Module &M)
456       : M(M), Builder(M.getContext()), OffloadInfoManager(this),
457         T(Triple(M.getTargetTriple())) {}
458   ~OpenMPIRBuilder();
459 
460   /// Initialize the internal state, this will put structures types and
461   /// potentially other helpers into the underlying module. Must be called
462   /// before any other method and only once! This internal state includes types
463   /// used in the OpenMPIRBuilder generated from OMPKinds.def.
464   void initialize();
465 
setConfig(OpenMPIRBuilderConfig C)466   void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
467 
468   /// Finalize the underlying module, e.g., by outlining regions.
469   /// \param Fn                    The function to be finalized. If not used,
470   ///                              all functions are finalized.
471   void finalize(Function *Fn = nullptr);
472 
473   /// Add attributes known for \p FnID to \p Fn.
474   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
475 
476   /// Type used throughout for insertion points.
477   using InsertPointTy = IRBuilder<>::InsertPoint;
478 
479   /// Get the create a name using the platform specific separators.
480   /// \param Parts parts of the final name that needs separation
481   /// The created name has a first separator between the first and second part
482   /// and a second separator between all other parts.
483   /// E.g. with FirstSeparator "$" and Separator "." and
484   /// parts: "p1", "p2", "p3", "p4"
485   /// The resulting name is "p1$p2.p3.p4"
486   /// The separators are retrieved from the OpenMPIRBuilderConfig.
487   std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
488 
489   /// Callback type for variable finalization (think destructors).
490   ///
491   /// \param CodeGenIP is the insertion point at which the finalization code
492   ///                  should be placed.
493   ///
494   /// A finalize callback knows about all objects that need finalization, e.g.
495   /// destruction, when the scope of the currently generated construct is left
496   /// at the time, and location, the callback is invoked.
497   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
498 
499   struct FinalizationInfo {
500     /// The finalization callback provided by the last in-flight invocation of
501     /// createXXXX for the directive of kind DK.
502     FinalizeCallbackTy FiniCB;
503 
504     /// The directive kind of the innermost directive that has an associated
505     /// region which might require finalization when it is left.
506     omp::Directive DK;
507 
508     /// Flag to indicate if the directive is cancellable.
509     bool IsCancellable;
510   };
511 
512   /// Push a finalization callback on the finalization stack.
513   ///
514   /// NOTE: Temporary solution until Clang CG is gone.
pushFinalizationCB(const FinalizationInfo & FI)515   void pushFinalizationCB(const FinalizationInfo &FI) {
516     FinalizationStack.push_back(FI);
517   }
518 
519   /// Pop the last finalization callback from the finalization stack.
520   ///
521   /// NOTE: Temporary solution until Clang CG is gone.
popFinalizationCB()522   void popFinalizationCB() { FinalizationStack.pop_back(); }
523 
524   /// Callback type for body (=inner region) code generation
525   ///
526   /// The callback takes code locations as arguments, each describing a
527   /// location where additional instructions can be inserted.
528   ///
529   /// The CodeGenIP may be in the middle of a basic block or point to the end of
530   /// it. The basic block may have a terminator or be degenerate. The callback
531   /// function may just insert instructions at that position, but also split the
532   /// block (without the Before argument of BasicBlock::splitBasicBlock such
533   /// that the identify of the split predecessor block is preserved) and insert
534   /// additional control flow, including branches that do not lead back to what
535   /// follows the CodeGenIP. Note that since the callback is allowed to split
536   /// the block, callers must assume that InsertPoints to positions in the
537   /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
538   /// such InsertPoints need to be preserved, it can split the block itself
539   /// before calling the callback.
540   ///
541   /// AllocaIP and CodeGenIP must not point to the same position.
542   ///
543   /// \param AllocaIP is the insertion point at which new alloca instructions
544   ///                 should be placed. The BasicBlock it is pointing to must
545   ///                 not be split.
546   /// \param CodeGenIP is the insertion point at which the body code should be
547   ///                  placed.
548   using BodyGenCallbackTy =
549       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
550 
551   // This is created primarily for sections construct as llvm::function_ref
552   // (BodyGenCallbackTy) is not storable (as described in the comments of
553   // function_ref class - function_ref contains non-ownable reference
554   // to the callable.
555   using StorableBodyGenCallbackTy =
556       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
557 
558   /// Callback type for loop body code generation.
559   ///
560   /// \param CodeGenIP is the insertion point where the loop's body code must be
561   ///                  placed. This will be a dedicated BasicBlock with a
562   ///                  conditional branch from the loop condition check and
563   ///                  terminated with an unconditional branch to the loop
564   ///                  latch.
565   /// \param IndVar    is the induction variable usable at the insertion point.
566   using LoopBodyGenCallbackTy =
567       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
568 
569   /// Callback type for variable privatization (think copy & default
570   /// constructor).
571   ///
572   /// \param AllocaIP is the insertion point at which new alloca instructions
573   ///                 should be placed.
574   /// \param CodeGenIP is the insertion point at which the privatization code
575   ///                  should be placed.
576   /// \param Original The value being copied/created, should not be used in the
577   ///                 generated IR.
578   /// \param Inner The equivalent of \p Original that should be used in the
579   ///              generated IR; this is equal to \p Original if the value is
580   ///              a pointer and can thus be passed directly, otherwise it is
581   ///              an equivalent but different value.
582   /// \param ReplVal The replacement value, thus a copy or new created version
583   ///                of \p Inner.
584   ///
585   /// \returns The new insertion point where code generation continues and
586   ///          \p ReplVal the replacement value.
587   using PrivatizeCallbackTy = function_ref<InsertPointTy(
588       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
589       Value &Inner, Value *&ReplVal)>;
590 
591   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
592   /// (filename, line, column, ...).
593   struct LocationDescription {
LocationDescriptionLocationDescription594     LocationDescription(const IRBuilderBase &IRB)
595         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
LocationDescriptionLocationDescription596     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
LocationDescriptionLocationDescription597     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
598         : IP(IP), DL(DL) {}
599     InsertPointTy IP;
600     DebugLoc DL;
601   };
602 
603   /// Emitter methods for OpenMP directives.
604   ///
605   ///{
606 
607   /// Generator for '#omp barrier'
608   ///
609   /// \param Loc The location where the barrier directive was encountered.
610   /// \param DK The kind of directive that caused the barrier.
611   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
612   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
613   ///                        should be checked and acted upon.
614   ///
615   /// \returns The insertion point after the barrier.
616   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
617                               bool ForceSimpleCall = false,
618                               bool CheckCancelFlag = true);
619 
620   /// Generator for '#omp cancel'
621   ///
622   /// \param Loc The location where the directive was encountered.
623   /// \param IfCondition The evaluated 'if' clause expression, if any.
624   /// \param CanceledDirective The kind of directive that is cancled.
625   ///
626   /// \returns The insertion point after the barrier.
627   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
628                              omp::Directive CanceledDirective);
629 
630   /// Generator for '#omp parallel'
631   ///
632   /// \param Loc The insert and source location description.
633   /// \param AllocaIP The insertion points to be used for alloca instructions.
634   /// \param BodyGenCB Callback that will generate the region code.
635   /// \param PrivCB Callback to copy a given variable (think copy constructor).
636   /// \param FiniCB Callback to finalize variable copies.
637   /// \param IfCondition The evaluated 'if' clause expression, if any.
638   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
639   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
640   /// \param IsCancellable Flag to indicate a cancellable parallel region.
641   ///
642   /// \returns The insertion position *after* the parallel.
643   IRBuilder<>::InsertPoint
644   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
645                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
646                  FinalizeCallbackTy FiniCB, Value *IfCondition,
647                  Value *NumThreads, omp::ProcBindKind ProcBind,
648                  bool IsCancellable);
649 
650   /// Generator for the control flow structure of an OpenMP canonical loop.
651   ///
652   /// This generator operates on the logical iteration space of the loop, i.e.
653   /// the caller only has to provide a loop trip count of the loop as defined by
654   /// base language semantics. The trip count is interpreted as an unsigned
655   /// integer. The induction variable passed to \p BodyGenCB will be of the same
656   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
657   /// convert the logical iteration variable to the loop counter variable in the
658   /// loop body.
659   ///
660   /// \param Loc       The insert and source location description. The insert
661   ///                  location can be between two instructions or the end of a
662   ///                  degenerate block (e.g. a BB under construction).
663   /// \param BodyGenCB Callback that will generate the loop body code.
664   /// \param TripCount Number of iterations the loop body is executed.
665   /// \param Name      Base name used to derive BB and instruction names.
666   ///
667   /// \returns An object representing the created control flow structure which
668   ///          can be used for loop-associated directives.
669   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
670                                          LoopBodyGenCallbackTy BodyGenCB,
671                                          Value *TripCount,
672                                          const Twine &Name = "loop");
673 
674   /// Generator for the control flow structure of an OpenMP canonical loop.
675   ///
676   /// Instead of a logical iteration space, this allows specifying user-defined
677   /// loop counter values using increment, upper- and lower bounds. To
678   /// disambiguate the terminology when counting downwards, instead of lower
679   /// bounds we use \p Start for the loop counter value in the first body
680   /// iteration.
681   ///
682   /// Consider the following limitations:
683   ///
684   ///  * A loop counter space over all integer values of its bit-width cannot be
685   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
686   ///    stored into an 8 bit integer):
687   ///
688   ///      DO I = 0, 255, 1
689   ///
690   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
691   ///    effectively counting downwards:
692   ///
693   ///      for (uint8_t i = 100u; i > 0; i += 127u)
694   ///
695   ///
696   /// TODO: May need to add additional parameters to represent:
697   ///
698   ///  * Allow representing downcounting with unsigned integers.
699   ///
700   ///  * Sign of the step and the comparison operator might disagree:
701   ///
702   ///      for (int i = 0; i < 42; i -= 1u)
703   ///
704   //
705   /// \param Loc       The insert and source location description.
706   /// \param BodyGenCB Callback that will generate the loop body code.
707   /// \param Start     Value of the loop counter for the first iterations.
708   /// \param Stop      Loop counter values past this will stop the loop.
709   /// \param Step      Loop counter increment after each iteration; negative
710   ///                  means counting down.
711   /// \param IsSigned  Whether Start, Stop and Step are signed integers.
712   /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
713   ///                      counter.
714   /// \param ComputeIP Insertion point for instructions computing the trip
715   ///                  count. Can be used to ensure the trip count is available
716   ///                  at the outermost loop of a loop nest. If not set,
717   ///                  defaults to the preheader of the generated loop.
718   /// \param Name      Base name used to derive BB and instruction names.
719   ///
720   /// \returns An object representing the created control flow structure which
721   ///          can be used for loop-associated directives.
722   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
723                                          LoopBodyGenCallbackTy BodyGenCB,
724                                          Value *Start, Value *Stop, Value *Step,
725                                          bool IsSigned, bool InclusiveStop,
726                                          InsertPointTy ComputeIP = {},
727                                          const Twine &Name = "loop");
728 
729   /// Collapse a loop nest into a single loop.
730   ///
731   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
732   /// that has the same number of innermost loop iterations as the origin loop
733   /// nest. The induction variables of the input loops are derived from the
734   /// collapsed loop's induction variable. This is intended to be used to
735   /// implement OpenMP's collapse clause. Before applying a directive,
736   /// collapseLoops normalizes a loop nest to contain only a single loop and the
737   /// directive's implementation does not need to handle multiple loops itself.
738   /// This does not remove the need to handle all loop nest handling by
739   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
740   /// modifier of the worksharing-loop directive.
741   ///
742   /// Example:
743   /// \code
744   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
745   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
746   ///       body(i, j);
747   /// \endcode
748   ///
749   /// After collapsing with Loops={i,j}, the loop is changed to
750   /// \code
751   ///   for (int ij = 0; ij < 63; ++ij) {
752   ///     int i = ij / 9;
753   ///     int j = ij % 9;
754   ///     body(i, j);
755   ///   }
756   /// \endcode
757   ///
758   /// In the current implementation, the following limitations apply:
759   ///
760   ///  * All input loops have an induction variable of the same type.
761   ///
762   ///  * The collapsed loop will have the same trip count integer type as the
763   ///    input loops. Therefore it is possible that the collapsed loop cannot
764   ///    represent all iterations of the input loops. For instance, assuming a
765   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
766   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
767   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
768   ///    in this case.
769   ///
770   ///  * The trip counts of every input loop must be available at \p ComputeIP.
771   ///    Non-rectangular loops are not yet supported.
772   ///
773   ///  * At each nest level, code between a surrounding loop and its nested loop
774   ///    is hoisted into the loop body, and such code will be executed more
775   ///    often than before collapsing (or not at all if any inner loop iteration
776   ///    has a trip count of 0). This is permitted by the OpenMP specification.
777   ///
778   /// \param DL        Debug location for instructions added for collapsing,
779   ///                  such as instructions to compute/derive the input loop's
780   ///                  induction variables.
781   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
782   ///                  from outermost-to-innermost and every control flow of a
783   ///                  loop's body must pass through its directly nested loop.
784   /// \param ComputeIP Where additional instruction that compute the collapsed
785   ///                  trip count. If not set, defaults to before the generated
786   ///                  loop.
787   ///
788   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
789   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
790                                    ArrayRef<CanonicalLoopInfo *> Loops,
791                                    InsertPointTy ComputeIP);
792 
793   /// Get the default alignment value for given target
794   ///
795   /// \param TargetTriple   Target triple
796   /// \param Features       StringMap which describes extra CPU features
797   static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
798                                             const StringMap<bool> &Features);
799 
800   /// Retrieve (or create if non-existent) the address of a declare
801   /// target variable, used in conjunction with registerTargetGlobalVariable
802   /// to create declare target global variables.
803   ///
804   /// \param CaptureClause - enumerator corresponding to the OpenMP capture
805   /// clause used in conjunction with the variable being registered (link,
806   /// to, enter).
807   /// \param DeviceClause - enumerator corresponding to the OpenMP capture
808   /// clause used in conjunction with the variable being registered (nohost,
809   /// host, any)
810   /// \param IsDeclaration - boolean stating if the variable being registered
811   /// is a declaration-only and not a definition
812   /// \param IsExternallyVisible - boolean stating if the variable is externally
813   /// visible
814   /// \param EntryInfo - Unique entry information for the value generated
815   /// using getTargetEntryUniqueInfo, used to name generated pointer references
816   /// to the declare target variable
817   /// \param MangledName - the mangled name of the variable being registered
818   /// \param GeneratedRefs - references generated by invocations of
819   /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
820   /// these are required by Clang for book keeping.
821   /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
822   /// \param TargetTriple - The OpenMP device target triple we are compiling
823   /// for
824   /// \param LlvmPtrTy - The type of the variable we are generating or
825   /// retrieving an address for
826   /// \param GlobalInitializer - a lambda function which creates a constant
827   /// used for initializing a pointer reference to the variable in certain
828   /// cases. If a nullptr is passed, it will default to utilising the original
829   /// variable to initialize the pointer reference.
830   /// \param VariableLinkage - a lambda function which returns the variables
831   /// linkage type, if unspecified and a nullptr is given, it will instead
832   /// utilise the linkage stored on the existing global variable in the
833   /// LLVMModule.
834   Constant *getAddrOfDeclareTargetVar(
835       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
836       OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
837       bool IsDeclaration, bool IsExternallyVisible,
838       TargetRegionEntryInfo EntryInfo, StringRef MangledName,
839       std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
840       std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
841       std::function<Constant *()> GlobalInitializer,
842       std::function<GlobalValue::LinkageTypes()> VariableLinkage);
843 
844   /// Registers a target variable for device or host.
845   ///
846   /// \param CaptureClause - enumerator corresponding to the OpenMP capture
847   /// clause used in conjunction with the variable being registered (link,
848   /// to, enter).
849   /// \param DeviceClause - enumerator corresponding to the OpenMP capture
850   /// clause used in conjunction with the variable being registered (nohost,
851   /// host, any)
852   /// \param IsDeclaration - boolean stating if the variable being registered
853   /// is a declaration-only and not a definition
854   /// \param IsExternallyVisible - boolean stating if the variable is externally
855   /// visible
856   /// \param EntryInfo - Unique entry information for the value generated
857   /// using getTargetEntryUniqueInfo, used to name generated pointer references
858   /// to the declare target variable
859   /// \param MangledName - the mangled name of the variable being registered
860   /// \param GeneratedRefs - references generated by invocations of
861   /// registerTargetGlobalVariable these are required by Clang for book
862   /// keeping.
863   /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
864   /// \param TargetTriple - The OpenMP device target triple we are compiling
865   /// for
866   /// \param GlobalInitializer - a lambda function which creates a constant
867   /// used for initializing a pointer reference to the variable in certain
868   /// cases. If a nullptr is passed, it will default to utilising the original
869   /// variable to initialize the pointer reference.
870   /// \param VariableLinkage - a lambda function which returns the variables
871   /// linkage type, if unspecified and a nullptr is given, it will instead
872   /// utilise the linkage stored on the existing global variable in the
873   /// LLVMModule.
874   /// \param LlvmPtrTy - The type of the variable we are generating or
875   /// retrieving an address for
876   /// \param Addr - the original llvm value (addr) of the variable to be
877   /// registered
878   void registerTargetGlobalVariable(
879       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
880       OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
881       bool IsDeclaration, bool IsExternallyVisible,
882       TargetRegionEntryInfo EntryInfo, StringRef MangledName,
883       std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
884       std::vector<Triple> TargetTriple,
885       std::function<Constant *()> GlobalInitializer,
886       std::function<GlobalValue::LinkageTypes()> VariableLinkage,
887       Type *LlvmPtrTy, Constant *Addr);
888 
889   /// Get the offset of the OMP_MAP_MEMBER_OF field.
890   unsigned getFlagMemberOffset();
891 
892   /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on
893   /// the position given.
894   /// \param Position - A value indicating the position of the parent
895   /// of the member in the kernel argument structure, often retrieved
896   /// by the parents position in the combined information vectors used
897   /// to generate the structure itself. Multiple children (member's of)
898   /// with the same parent will use the same returned member flag.
899   omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position);
900 
901   /// Given an initial flag set, this function modifies it to contain
902   /// the passed in MemberOfFlag generated from the getMemberOfFlag
903   /// function. The results are dependent on the existing flag bits
904   /// set in the original flag set.
905   /// \param Flags - The original set of flags to be modified with the
906   /// passed in MemberOfFlag.
907   /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted
908   /// slightly based on the getMemberOfFlag which adjusts the flag bits
909   /// based on the members position in its parent.
910   void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags,
911                               omp::OpenMPOffloadMappingFlags MemberOfFlag);
912 
913 private:
914   /// Modifies the canonical loop to be a statically-scheduled workshare loop
915   /// which is executed on the device
916   ///
917   /// This takes a \p CLI representing a canonical loop, such as the one
918   /// created by \see createCanonicalLoop and emits additional instructions to
919   /// turn it into a workshare loop. In particular, it calls to an OpenMP
920   /// runtime function in the preheader to call OpenMP device rtl function
921   /// which handles worksharing of loop body interations.
922   ///
923   /// \param DL       Debug location for instructions added for the
924   ///                 workshare-loop construct itself.
925   /// \param CLI      A descriptor of the canonical loop to workshare.
926   /// \param AllocaIP An insertion point for Alloca instructions usable in the
927   ///                 preheader of the loop.
928   /// \param LoopType Information about type of loop worksharing.
929   ///                 It corresponds to type of loop workshare OpenMP pragma.
930   ///
931   /// \returns Point where to insert code after the workshare construct.
932   InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
933                                          InsertPointTy AllocaIP,
934                                          omp::WorksharingLoopType LoopType);
935 
936   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
937   ///
938   /// This takes a \p LoopInfo representing a canonical loop, such as the one
939   /// created by \p createCanonicalLoop and emits additional instructions to
940   /// turn it into a workshare loop. In particular, it calls to an OpenMP
941   /// runtime function in the preheader to obtain the loop bounds to be used in
942   /// the current thread, updates the relevant instructions in the canonical
943   /// loop and calls to an OpenMP runtime finalization function after the loop.
944   ///
945   /// \param DL       Debug location for instructions added for the
946   ///                 workshare-loop construct itself.
947   /// \param CLI      A descriptor of the canonical loop to workshare.
948   /// \param AllocaIP An insertion point for Alloca instructions usable in the
949   ///                 preheader of the loop.
950   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
951   ///                     the loop.
952   ///
953   /// \returns Point where to insert code after the workshare construct.
954   InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
955                                          InsertPointTy AllocaIP,
956                                          bool NeedsBarrier);
957 
958   /// Modifies the canonical loop a statically-scheduled workshare loop with a
959   /// user-specified chunk size.
960   ///
961   /// \param DL           Debug location for instructions added for the
962   ///                     workshare-loop construct itself.
963   /// \param CLI          A descriptor of the canonical loop to workshare.
964   /// \param AllocaIP     An insertion point for Alloca instructions usable in
965   ///                     the preheader of the loop.
966   /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
967   ///                     loop.
968   /// \param ChunkSize    The user-specified chunk size.
969   ///
970   /// \returns Point where to insert code after the workshare construct.
971   InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
972                                                 CanonicalLoopInfo *CLI,
973                                                 InsertPointTy AllocaIP,
974                                                 bool NeedsBarrier,
975                                                 Value *ChunkSize);
976 
977   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
978   ///
979   /// This takes a \p LoopInfo representing a canonical loop, such as the one
980   /// created by \p createCanonicalLoop and emits additional instructions to
981   /// turn it into a workshare loop. In particular, it calls to an OpenMP
982   /// runtime function in the preheader to obtain, and then in each iteration
983   /// to update the loop counter.
984   ///
985   /// \param DL       Debug location for instructions added for the
986   ///                 workshare-loop construct itself.
987   /// \param CLI      A descriptor of the canonical loop to workshare.
988   /// \param AllocaIP An insertion point for Alloca instructions usable in the
989   ///                 preheader of the loop.
990   /// \param SchedType Type of scheduling to be passed to the init function.
991   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
992   ///                     the loop.
993   /// \param Chunk    The size of loop chunk considered as a unit when
994   ///                 scheduling. If \p nullptr, defaults to 1.
995   ///
996   /// \returns Point where to insert code after the workshare construct.
997   InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
998                                           InsertPointTy AllocaIP,
999                                           omp::OMPScheduleType SchedType,
1000                                           bool NeedsBarrier,
1001                                           Value *Chunk = nullptr);
1002 
1003   /// Create alternative version of the loop to support if clause
1004   ///
1005   /// OpenMP if clause can require to generate second loop. This loop
1006   /// will be executed when if clause condition is not met. createIfVersion
1007   /// adds branch instruction to the copied loop if \p  ifCond is not met.
1008   ///
1009   /// \param Loop       Original loop which should be versioned.
1010   /// \param IfCond     Value which corresponds to if clause condition
1011   /// \param VMap       Value to value map to define relation between
1012   ///                   original and copied loop values and loop blocks.
1013   /// \param NamePrefix Optional name prefix for if.then if.else blocks.
1014   void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
1015                        ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
1016 
1017 public:
1018   /// Modifies the canonical loop to be a workshare loop.
1019   ///
1020   /// This takes a \p LoopInfo representing a canonical loop, such as the one
1021   /// created by \p createCanonicalLoop and emits additional instructions to
1022   /// turn it into a workshare loop. In particular, it calls to an OpenMP
1023   /// runtime function in the preheader to obtain the loop bounds to be used in
1024   /// the current thread, updates the relevant instructions in the canonical
1025   /// loop and calls to an OpenMP runtime finalization function after the loop.
1026   ///
1027   /// The concrete transformation is done by applyStaticWorkshareLoop,
1028   /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
1029   /// on the value of \p SchedKind and \p ChunkSize.
1030   ///
1031   /// \param DL       Debug location for instructions added for the
1032   ///                 workshare-loop construct itself.
1033   /// \param CLI      A descriptor of the canonical loop to workshare.
1034   /// \param AllocaIP An insertion point for Alloca instructions usable in the
1035   ///                 preheader of the loop.
1036   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1037   ///                     the loop.
1038   /// \param SchedKind Scheduling algorithm to use.
1039   /// \param ChunkSize The chunk size for the inner loop.
1040   /// \param HasSimdModifier Whether the simd modifier is present in the
1041   ///                        schedule clause.
1042   /// \param HasMonotonicModifier Whether the monotonic modifier is present in
1043   ///                             the schedule clause.
1044   /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
1045   ///                                present in the schedule clause.
1046   /// \param HasOrderedClause Whether the (parameterless) ordered clause is
1047   ///                         present.
1048   /// \param LoopType Information about type of loop worksharing.
1049   ///                 It corresponds to type of loop workshare OpenMP pragma.
1050   ///
1051   /// \returns Point where to insert code after the workshare construct.
1052   InsertPointTy applyWorkshareLoop(
1053       DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1054       bool NeedsBarrier,
1055       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
1056       Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1057       bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1058       bool HasOrderedClause = false,
1059       omp::WorksharingLoopType LoopType =
1060           omp::WorksharingLoopType::ForStaticLoop);
1061 
1062   /// Tile a loop nest.
1063   ///
1064   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1065   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1066   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1067   /// of every loop and every tile sizes must be usable in the outermost
1068   /// loop's preheader. This implies that the loop nest is rectangular.
1069   ///
1070   /// Example:
1071   /// \code
1072   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
1073   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
1074   ///         body(i, j);
1075   /// \endcode
1076   ///
1077   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1078   /// \code
1079   ///   for (int i1 = 0; i1 < 3; ++i1)
1080   ///     for (int j1 = 0; j1 < 2; ++j1)
1081   ///       for (int i2 = 0; i2 < 5; ++i2)
1082   ///         for (int j2 = 0; j2 < 7; ++j2)
1083   ///           body(i1*3+i2, j1*3+j2);
1084   /// \endcode
1085   ///
1086   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1087   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1088   /// handles non-constant trip counts, non-constant tile sizes and trip counts
1089   /// that are not multiples of the tile size. In the latter case the tile loop
1090   /// of the last floor-loop iteration will have fewer iterations than specified
1091   /// as its tile size.
1092   ///
1093   ///
1094   /// @param DL        Debug location for instructions added by tiling, for
1095   ///                  instance the floor- and tile trip count computation.
1096   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
1097   ///                  invalidated by this method, i.e. should not used after
1098   ///                  tiling.
1099   /// @param TileSizes For each loop in \p Loops, the tile size for that
1100   ///                  dimensions.
1101   ///
1102   /// \returns A list of generated loops. Contains twice as many loops as the
1103   ///          input loop nest; the first half are the floor loops and the
1104   ///          second half are the tile loops.
1105   std::vector<CanonicalLoopInfo *>
1106   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
1107             ArrayRef<Value *> TileSizes);
1108 
1109   /// Fully unroll a loop.
1110   ///
1111   /// Instead of unrolling the loop immediately (and duplicating its body
1112   /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1113   /// metadata.
1114   ///
1115   /// \param DL   Debug location for instructions added by unrolling.
1116   /// \param Loop The loop to unroll. The loop will be invalidated.
1117   void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
1118 
1119   /// Fully or partially unroll a loop. How the loop is unrolled is determined
1120   /// using LLVM's LoopUnrollPass.
1121   ///
1122   /// \param DL   Debug location for instructions added by unrolling.
1123   /// \param Loop The loop to unroll. The loop will be invalidated.
1124   void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
1125 
1126   /// Partially unroll a loop.
1127   ///
1128   /// The CanonicalLoopInfo of the unrolled loop for use with chained
1129   /// loop-associated directive can be requested using \p UnrolledCLI. Not
1130   /// needing the CanonicalLoopInfo allows more efficient code generation by
1131   /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1132   /// A loop-associated directive applied to the unrolled loop needs to know the
1133   /// new trip count which means that if using a heuristically determined unroll
1134   /// factor (\p Factor == 0), that factor must be computed immediately. We are
1135   /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1136   /// but which assumes that some canonicalization has taken place (e.g.
1137   /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1138   /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1139   ///
1140   /// \param DL          Debug location for instructions added by unrolling.
1141   /// \param Loop        The loop to unroll. The loop will be invalidated.
1142   /// \param Factor      The factor to unroll the loop by. A factor of 0
1143   ///                    indicates that a heuristic should be used to determine
1144   ///                    the unroll-factor.
1145   /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1146   ///                    partially unrolled loop. Otherwise, uses loop metadata
1147   ///                    to defer unrolling to the LoopUnrollPass.
1148   void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
1149                          CanonicalLoopInfo **UnrolledCLI);
1150 
1151   /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1152   /// is cloned. The metadata which prevents vectorization is added to
1153   /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1154   /// to false.
1155   ///
1156   /// \param Loop        The loop to simd-ize.
1157   /// \param AlignedVars The map which containts pairs of the pointer
1158   ///                    and its corresponding alignment.
1159   /// \param IfCond      The value which corresponds to the if clause
1160   ///                    condition.
1161   /// \param Order       The enum to map order clause.
1162   /// \param Simdlen     The Simdlen length to apply to the simd loop.
1163   /// \param Safelen     The Safelen length to apply to the simd loop.
1164   void applySimd(CanonicalLoopInfo *Loop,
1165                  MapVector<Value *, Value *> AlignedVars, Value *IfCond,
1166                  omp::OrderKind Order, ConstantInt *Simdlen,
1167                  ConstantInt *Safelen);
1168 
1169   /// Generator for '#omp flush'
1170   ///
1171   /// \param Loc The location where the flush directive was encountered
1172   void createFlush(const LocationDescription &Loc);
1173 
1174   /// Generator for '#omp taskwait'
1175   ///
1176   /// \param Loc The location where the taskwait directive was encountered.
1177   void createTaskwait(const LocationDescription &Loc);
1178 
1179   /// Generator for '#omp taskyield'
1180   ///
1181   /// \param Loc The location where the taskyield directive was encountered.
1182   void createTaskyield(const LocationDescription &Loc);
1183 
1184   /// A struct to pack the relevant information for an OpenMP depend clause.
1185   struct DependData {
1186     omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown;
1187     Type *DepValueType;
1188     Value *DepVal;
1189     explicit DependData() = default;
DependDataDependData1190     DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType,
1191                Value *DepVal)
1192         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
1193   };
1194 
1195   /// Generator for `#omp task`
1196   ///
1197   /// \param Loc The location where the task construct was encountered.
1198   /// \param AllocaIP The insertion point to be used for alloca instructions.
1199   /// \param BodyGenCB Callback that will generate the region code.
1200   /// \param Tied True if the task is tied, false if the task is untied.
1201   /// \param Final i1 value which is `true` if the task is final, `false` if the
1202   ///              task is not final.
1203   /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1204   ///                    task is generated, and the encountering thread must
1205   ///                    suspend the current task region, for which execution
1206   ///                    cannot be resumed until execution of the structured
1207   ///                    block that is associated with the generated task is
1208   ///                    completed.
1209   InsertPointTy createTask(const LocationDescription &Loc,
1210                            InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1211                            bool Tied = true, Value *Final = nullptr,
1212                            Value *IfCondition = nullptr,
1213                            SmallVector<DependData> Dependencies = {});
1214 
1215   /// Generator for the taskgroup construct
1216   ///
1217   /// \param Loc The location where the taskgroup construct was encountered.
1218   /// \param AllocaIP The insertion point to be used for alloca instructions.
1219   /// \param BodyGenCB Callback that will generate the region code.
1220   InsertPointTy createTaskgroup(const LocationDescription &Loc,
1221                                 InsertPointTy AllocaIP,
1222                                 BodyGenCallbackTy BodyGenCB);
1223 
1224   using FileIdentifierInfoCallbackTy =
1225       std::function<std::tuple<std::string, uint64_t>()>;
1226 
1227   /// Creates a unique info for a target entry when provided a filename and
1228   /// line number from.
1229   ///
1230   /// \param CallBack A callback function which should return filename the entry
1231   /// resides in as well as the line number for the target entry
1232   /// \param ParentName The name of the parent the target entry resides in, if
1233   /// any.
1234   static TargetRegionEntryInfo
1235   getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
1236                            StringRef ParentName = "");
1237 
1238   /// Functions used to generate reductions. Such functions take two Values
1239   /// representing LHS and RHS of the reduction, respectively, and a reference
1240   /// to the value that is updated to refer to the reduction result.
1241   using ReductionGenTy =
1242       function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
1243 
1244   /// Functions used to generate atomic reductions. Such functions take two
1245   /// Values representing pointers to LHS and RHS of the reduction, as well as
1246   /// the element type of these pointers. They are expected to atomically
1247   /// update the LHS to the reduced value.
1248   using AtomicReductionGenTy =
1249       function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
1250 
1251   /// Information about an OpenMP reduction.
1252   struct ReductionInfo {
ReductionInfoReductionInfo1253     ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
1254                   ReductionGenTy ReductionGen,
1255                   AtomicReductionGenTy AtomicReductionGen)
1256         : ElementType(ElementType), Variable(Variable),
1257           PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
1258           AtomicReductionGen(AtomicReductionGen) {}
1259 
1260     /// Reduction element type, must match pointee type of variable.
1261     Type *ElementType;
1262 
1263     /// Reduction variable of pointer type.
1264     Value *Variable;
1265 
1266     /// Thread-private partial reduction variable.
1267     Value *PrivateVariable;
1268 
1269     /// Callback for generating the reduction body. The IR produced by this will
1270     /// be used to combine two values in a thread-safe context, e.g., under
1271     /// lock or within the same thread, and therefore need not be atomic.
1272     ReductionGenTy ReductionGen;
1273 
1274     /// Callback for generating the atomic reduction body, may be null. The IR
1275     /// produced by this will be used to atomically combine two values during
1276     /// reduction. If null, the implementation will use the non-atomic version
1277     /// along with the appropriate synchronization mechanisms.
1278     AtomicReductionGenTy AtomicReductionGen;
1279   };
1280 
1281   // TODO: provide atomic and non-atomic reduction generators for reduction
1282   // operators defined by the OpenMP specification.
1283 
1284   /// Generator for '#omp reduction'.
1285   ///
1286   /// Emits the IR instructing the runtime to perform the specific kind of
1287   /// reductions. Expects reduction variables to have been privatized and
1288   /// initialized to reduction-neutral values separately. Emits the calls to
1289   /// runtime functions as well as the reduction function and the basic blocks
1290   /// performing the reduction atomically and non-atomically.
1291   ///
1292   /// The code emitted for the following:
1293   ///
1294   /// \code
1295   ///   type var_1;
1296   ///   type var_2;
1297   ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1298   ///   /* body */;
1299   /// \endcode
1300   ///
1301   /// corresponds to the following sketch.
1302   ///
1303   /// \code
1304   /// void _outlined_par() {
1305   ///   // N is the number of different reductions.
1306   ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1307   ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1308   ///                        _omp_reduction_func,
1309   ///                        _gomp_critical_user.reduction.var)) {
1310   ///   case 1: {
1311   ///     var_1 = var_1 <reduction-op> privatized_var_1;
1312   ///     var_2 = var_2 <reduction-op> privatized_var_2;
1313   ///     // ...
1314   ///    __kmpc_end_reduce(...);
1315   ///     break;
1316   ///   }
1317   ///   case 2: {
1318   ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
1319   ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
1320   ///     // ...
1321   ///     break;
1322   ///   }
1323   ///   default: break;
1324   ///   }
1325   /// }
1326   ///
1327   /// void _omp_reduction_func(void **lhs, void **rhs) {
1328   ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1329   ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1330   ///   // ...
1331   /// }
1332   /// \endcode
1333   ///
1334   /// \param Loc                The location where the reduction was
1335   ///                           encountered. Must be within the associate
1336   ///                           directive and after the last local access to the
1337   ///                           reduction variables.
1338   /// \param AllocaIP           An insertion point suitable for allocas usable
1339   ///                           in reductions.
1340   /// \param ReductionInfos     A list of info on each reduction variable.
1341   /// \param IsNoWait           A flag set if the reduction is marked as nowait.
1342   /// \param IsByRef            A flag set if the reduction is using reference
1343   /// or direct value.
1344   InsertPointTy createReductions(const LocationDescription &Loc,
1345                                  InsertPointTy AllocaIP,
1346                                  ArrayRef<ReductionInfo> ReductionInfos,
1347                                  bool IsNoWait = false, bool IsByRef = false);
1348 
1349   ///}
1350 
1351   /// Return the insertion point used by the underlying IRBuilder.
getInsertionPoint()1352   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
1353 
1354   /// Update the internal location to \p Loc.
updateToLocation(const LocationDescription & Loc)1355   bool updateToLocation(const LocationDescription &Loc) {
1356     Builder.restoreIP(Loc.IP);
1357     Builder.SetCurrentDebugLocation(Loc.DL);
1358     return Loc.IP.getBlock() != nullptr;
1359   }
1360 
1361   /// Return the function declaration for the runtime function with \p FnID.
1362   FunctionCallee getOrCreateRuntimeFunction(Module &M,
1363                                             omp::RuntimeFunction FnID);
1364 
1365   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
1366 
1367   /// Return the (LLVM-IR) string describing the source location \p LocStr.
1368   Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1369 
1370   /// Return the (LLVM-IR) string describing the default source location.
1371   Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
1372 
1373   /// Return the (LLVM-IR) string describing the source location identified by
1374   /// the arguments.
1375   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1376                                  unsigned Line, unsigned Column,
1377                                  uint32_t &SrcLocStrSize);
1378 
1379   /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1380   /// fallback if \p DL does not specify the function name.
1381   Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
1382                                  Function *F = nullptr);
1383 
1384   /// Return the (LLVM-IR) string describing the source location \p Loc.
1385   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1386                                  uint32_t &SrcLocStrSize);
1387 
1388   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1389   /// TODO: Create a enum class for the Reserve2Flags
1390   Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1391                              omp::IdentFlag Flags = omp::IdentFlag(0),
1392                              unsigned Reserve2Flags = 0);
1393 
1394   /// Create a hidden global flag \p Name in the module with initial value \p
1395   /// Value.
1396   GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
1397 
1398   /// Generate control flow and cleanup for cancellation.
1399   ///
1400   /// \param CancelFlag Flag indicating if the cancellation is performed.
1401   /// \param CanceledDirective The kind of directive that is cancled.
1402   /// \param ExitCB Extra code to be generated in the exit block.
1403   void emitCancelationCheckImpl(Value *CancelFlag,
1404                                 omp::Directive CanceledDirective,
1405                                 FinalizeCallbackTy ExitCB = {});
1406 
1407   /// Generate a target region entry call.
1408   ///
1409   /// \param Loc The location at which the request originated and is fulfilled.
1410   /// \param AllocaIP The insertion point to be used for alloca instructions.
1411   /// \param Return Return value of the created function returned by reference.
1412   /// \param DeviceID Identifier for the device via the 'device' clause.
1413   /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1414   ///                 or 0 if unspecified and -1 if there is no 'teams' clause.
1415   /// \param NumThreads Number of threads via the 'thread_limit' clause.
1416   /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1417   /// \param KernelArgs Array of arguments to the kernel.
1418   InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1419                                  InsertPointTy AllocaIP, Value *&Return,
1420                                  Value *Ident, Value *DeviceID, Value *NumTeams,
1421                                  Value *NumThreads, Value *HostPtr,
1422                                  ArrayRef<Value *> KernelArgs);
1423 
1424   /// Generate a barrier runtime call.
1425   ///
1426   /// \param Loc The location at which the request originated and is fulfilled.
1427   /// \param DK The directive which caused the barrier
1428   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1429   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1430   ///                        should be checked and acted upon.
1431   ///
1432   /// \returns The insertion point after the barrier.
1433   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1434                                 omp::Directive DK, bool ForceSimpleCall,
1435                                 bool CheckCancelFlag);
1436 
1437   /// Generate a flush runtime call.
1438   ///
1439   /// \param Loc The location at which the request originated and is fulfilled.
1440   void emitFlush(const LocationDescription &Loc);
1441 
1442   /// The finalization stack made up of finalize callbacks currently in-flight,
1443   /// wrapped into FinalizationInfo objects that reference also the finalization
1444   /// target block and the kind of cancellable directive.
1445   SmallVector<FinalizationInfo, 8> FinalizationStack;
1446 
1447   /// Return true if the last entry in the finalization stack is of kind \p DK
1448   /// and cancellable.
isLastFinalizationInfoCancellable(omp::Directive DK)1449   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1450     return !FinalizationStack.empty() &&
1451            FinalizationStack.back().IsCancellable &&
1452            FinalizationStack.back().DK == DK;
1453   }
1454 
1455   /// Generate a taskwait runtime call.
1456   ///
1457   /// \param Loc The location at which the request originated and is fulfilled.
1458   void emitTaskwaitImpl(const LocationDescription &Loc);
1459 
1460   /// Generate a taskyield runtime call.
1461   ///
1462   /// \param Loc The location at which the request originated and is fulfilled.
1463   void emitTaskyieldImpl(const LocationDescription &Loc);
1464 
1465   /// Return the current thread ID.
1466   ///
1467   /// \param Ident The ident (ident_t*) describing the query origin.
1468   Value *getOrCreateThreadID(Value *Ident);
1469 
1470   /// The OpenMPIRBuilder Configuration
1471   OpenMPIRBuilderConfig Config;
1472 
1473   /// The underlying LLVM-IR module
1474   Module &M;
1475 
1476   /// The LLVM-IR Builder used to create IR.
1477   IRBuilder<> Builder;
1478 
1479   /// Map to remember source location strings
1480   StringMap<Constant *> SrcLocStrMap;
1481 
1482   /// Map to remember existing ident_t*.
1483   DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
1484 
1485   /// Info manager to keep track of target regions.
1486   OffloadEntriesInfoManager OffloadInfoManager;
1487 
1488   /// The target triple of the underlying module.
1489   const Triple T;
1490 
1491   /// Helper that contains information about regions we need to outline
1492   /// during finalization.
1493   struct OutlineInfo {
1494     using PostOutlineCBTy = std::function<void(Function &)>;
1495     PostOutlineCBTy PostOutlineCB;
1496     BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
1497     SmallVector<Value *, 2> ExcludeArgsFromAggregate;
1498 
1499     /// Collect all blocks in between EntryBB and ExitBB in both the given
1500     /// vector and set.
1501     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
1502                        SmallVectorImpl<BasicBlock *> &BlockVector);
1503 
1504     /// Return the function that contains the region to be outlined.
getFunctionOutlineInfo1505     Function *getFunction() const { return EntryBB->getParent(); }
1506   };
1507 
1508   /// Collection of regions that need to be outlined during finalization.
1509   SmallVector<OutlineInfo, 16> OutlineInfos;
1510 
1511   /// A collection of candidate target functions that's constant allocas will
1512   /// attempt to be raised on a call of finalize after all currently enqueued
1513   /// outline info's have been processed.
1514   SmallVector<llvm::Function *, 16> ConstantAllocaRaiseCandidates;
1515 
1516   /// Collection of owned canonical loop objects that eventually need to be
1517   /// free'd.
1518   std::forward_list<CanonicalLoopInfo> LoopInfos;
1519 
1520   /// Add a new region that will be outlined later.
addOutlineInfo(OutlineInfo && OI)1521   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1522 
1523   /// An ordered map of auto-generated variables to their unique names.
1524   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1525   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1526   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1527   /// variables.
1528   StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars;
1529 
1530   /// Computes the size of type in bytes.
1531   Value *getSizeInBytes(Value *BasePtr);
1532 
1533   // Emit a branch from the current block to the Target block only if
1534   // the current block has a terminator.
1535   void emitBranch(BasicBlock *Target);
1536 
1537   // If BB has no use then delete it and return. Else place BB after the current
1538   // block, if possible, or else at the end of the function. Also add a branch
1539   // from current block to BB if current block does not have a terminator.
1540   void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
1541 
1542   /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
1543   /// Here is the logic:
1544   /// if (Cond) {
1545   ///   ThenGen();
1546   /// } else {
1547   ///   ElseGen();
1548   /// }
1549   void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
1550                     BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
1551 
1552   /// Create the global variable holding the offload mappings information.
1553   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
1554                                         std::string VarName);
1555 
1556   /// Create the global variable holding the offload names information.
1557   GlobalVariable *
1558   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
1559                         std::string VarName);
1560 
1561   struct MapperAllocas {
1562     AllocaInst *ArgsBase = nullptr;
1563     AllocaInst *Args = nullptr;
1564     AllocaInst *ArgSizes = nullptr;
1565   };
1566 
1567   /// Create the allocas instruction used in call to mapper functions.
1568   void createMapperAllocas(const LocationDescription &Loc,
1569                            InsertPointTy AllocaIP, unsigned NumOperands,
1570                            struct MapperAllocas &MapperAllocas);
1571 
1572   /// Create the call for the target mapper function.
1573   /// \param Loc The source location description.
1574   /// \param MapperFunc Function to be called.
1575   /// \param SrcLocInfo Source location information global.
1576   /// \param MaptypesArg The argument types.
1577   /// \param MapnamesArg The argument names.
1578   /// \param MapperAllocas The AllocaInst used for the call.
1579   /// \param DeviceID Device ID for the call.
1580   /// \param NumOperands Number of operands in the call.
1581   void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1582                       Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1583                       struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1584                       unsigned NumOperands);
1585 
1586   /// Container for the arguments used to pass data to the runtime library.
1587   struct TargetDataRTArgs {
1588     /// The array of base pointer passed to the runtime library.
1589     Value *BasePointersArray = nullptr;
1590     /// The array of section pointers passed to the runtime library.
1591     Value *PointersArray = nullptr;
1592     /// The array of sizes passed to the runtime library.
1593     Value *SizesArray = nullptr;
1594     /// The array of map types passed to the runtime library for the beginning
1595     /// of the region or for the entire region if there are no separate map
1596     /// types for the region end.
1597     Value *MapTypesArray = nullptr;
1598     /// The array of map types passed to the runtime library for the end of the
1599     /// region, or nullptr if there are no separate map types for the region
1600     /// end.
1601     Value *MapTypesArrayEnd = nullptr;
1602     /// The array of user-defined mappers passed to the runtime library.
1603     Value *MappersArray = nullptr;
1604     /// The array of original declaration names of mapped pointers sent to the
1605     /// runtime library for debugging
1606     Value *MapNamesArray = nullptr;
1607 
TargetDataRTArgsTargetDataRTArgs1608     explicit TargetDataRTArgs() {}
TargetDataRTArgsTargetDataRTArgs1609     explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray,
1610                               Value *SizesArray, Value *MapTypesArray,
1611                               Value *MapTypesArrayEnd, Value *MappersArray,
1612                               Value *MapNamesArray)
1613         : BasePointersArray(BasePointersArray), PointersArray(PointersArray),
1614           SizesArray(SizesArray), MapTypesArray(MapTypesArray),
1615           MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray),
1616           MapNamesArray(MapNamesArray) {}
1617   };
1618 
1619   /// Data structure that contains the needed information to construct the
1620   /// kernel args vector.
1621   struct TargetKernelArgs {
1622     /// Number of arguments passed to the runtime library.
1623     unsigned NumTargetItems;
1624     /// Arguments passed to the runtime library
1625     TargetDataRTArgs RTArgs;
1626     /// The number of iterations
1627     Value *NumIterations;
1628     /// The number of teams.
1629     Value *NumTeams;
1630     /// The number of threads.
1631     Value *NumThreads;
1632     /// The size of the dynamic shared memory.
1633     Value *DynCGGroupMem;
1634     /// True if the kernel has 'no wait' clause.
1635     bool HasNoWait;
1636 
1637     /// Constructor for TargetKernelArgs
TargetKernelArgsTargetKernelArgs1638     TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs,
1639                      Value *NumIterations, Value *NumTeams, Value *NumThreads,
1640                      Value *DynCGGroupMem, bool HasNoWait)
1641         : NumTargetItems(NumTargetItems), RTArgs(RTArgs),
1642           NumIterations(NumIterations), NumTeams(NumTeams),
1643           NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem),
1644           HasNoWait(HasNoWait) {}
1645   };
1646 
1647   /// Create the kernel args vector used by emitTargetKernel. This function
1648   /// creates various constant values that are used in the resulting args
1649   /// vector.
1650   static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
1651                                   IRBuilderBase &Builder,
1652                                   SmallVector<Value *> &ArgsVector);
1653 
1654   /// Struct that keeps the information that should be kept throughout
1655   /// a 'target data' region.
1656   class TargetDataInfo {
1657     /// Set to true if device pointer information have to be obtained.
1658     bool RequiresDevicePointerInfo = false;
1659     /// Set to true if Clang emits separate runtime calls for the beginning and
1660     /// end of the region.  These calls might have separate map type arrays.
1661     bool SeparateBeginEndCalls = false;
1662 
1663   public:
1664     TargetDataRTArgs RTArgs;
1665 
1666     SmallMapVector<const Value *, std::pair<Value *, Value *>, 4>
1667         DevicePtrInfoMap;
1668 
1669     /// Indicate whether any user-defined mapper exists.
1670     bool HasMapper = false;
1671     /// The total number of pointers passed to the runtime library.
1672     unsigned NumberOfPtrs = 0u;
1673 
TargetDataInfo()1674     explicit TargetDataInfo() {}
TargetDataInfo(bool RequiresDevicePointerInfo,bool SeparateBeginEndCalls)1675     explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1676                             bool SeparateBeginEndCalls)
1677         : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1678           SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1679     /// Clear information about the data arrays.
clearArrayInfo()1680     void clearArrayInfo() {
1681       RTArgs = TargetDataRTArgs();
1682       HasMapper = false;
1683       NumberOfPtrs = 0u;
1684     }
1685     /// Return true if the current target data information has valid arrays.
isValid()1686     bool isValid() {
1687       return RTArgs.BasePointersArray && RTArgs.PointersArray &&
1688              RTArgs.SizesArray && RTArgs.MapTypesArray &&
1689              (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
1690     }
requiresDevicePointerInfo()1691     bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
separateBeginEndCalls()1692     bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1693   };
1694 
1695   enum class DeviceInfoTy { None, Pointer, Address };
1696   using MapValuesArrayTy = SmallVector<Value *, 4>;
1697   using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>;
1698   using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>;
1699   using MapNamesArrayTy = SmallVector<Constant *, 4>;
1700   using MapDimArrayTy = SmallVector<uint64_t, 4>;
1701   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
1702 
1703   /// This structure contains combined information generated for mappable
1704   /// clauses, including base pointers, pointers, sizes, map types, user-defined
1705   /// mappers, and non-contiguous information.
1706   struct MapInfosTy {
1707     struct StructNonContiguousInfo {
1708       bool IsNonContiguous = false;
1709       MapDimArrayTy Dims;
1710       MapNonContiguousArrayTy Offsets;
1711       MapNonContiguousArrayTy Counts;
1712       MapNonContiguousArrayTy Strides;
1713     };
1714     MapValuesArrayTy BasePointers;
1715     MapValuesArrayTy Pointers;
1716     MapDeviceInfoArrayTy DevicePointers;
1717     MapValuesArrayTy Sizes;
1718     MapFlagsArrayTy Types;
1719     MapNamesArrayTy Names;
1720     StructNonContiguousInfo NonContigInfo;
1721 
1722     /// Append arrays in \a CurInfo.
appendMapInfosTy1723     void append(MapInfosTy &CurInfo) {
1724       BasePointers.append(CurInfo.BasePointers.begin(),
1725                           CurInfo.BasePointers.end());
1726       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
1727       DevicePointers.append(CurInfo.DevicePointers.begin(),
1728                             CurInfo.DevicePointers.end());
1729       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
1730       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
1731       Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
1732       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
1733                                 CurInfo.NonContigInfo.Dims.end());
1734       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
1735                                    CurInfo.NonContigInfo.Offsets.end());
1736       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
1737                                   CurInfo.NonContigInfo.Counts.end());
1738       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
1739                                    CurInfo.NonContigInfo.Strides.end());
1740     }
1741   };
1742 
1743   /// Callback function type for functions emitting the host fallback code that
1744   /// is executed when the kernel launch fails. It takes an insertion point as
1745   /// parameter where the code should be emitted. It returns an insertion point
1746   /// that points right after after the emitted code.
1747   using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>;
1748 
1749   /// Generate a target region entry call and host fallback call.
1750   ///
1751   /// \param Loc The location at which the request originated and is fulfilled.
1752   /// \param OutlinedFn The outlined kernel function.
1753   /// \param OutlinedFnID The ooulined function ID.
1754   /// \param EmitTargetCallFallbackCB Call back function to generate host
1755   ///        fallback code.
1756   /// \param Args Data structure holding information about the kernel arguments.
1757   /// \param DeviceID Identifier for the device via the 'device' clause.
1758   /// \param RTLoc Source location identifier
1759   /// \param AllocaIP The insertion point to be used for alloca instructions.
1760   InsertPointTy emitKernelLaunch(
1761       const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1762       EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1763       Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
1764 
1765   /// Emit the arguments to be passed to the runtime library based on the
1766   /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
1767   /// ForEndCall, emit map types to be passed for the end of the region instead
1768   /// of the beginning.
1769   void emitOffloadingArraysArgument(IRBuilderBase &Builder,
1770                                     OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
1771                                     OpenMPIRBuilder::TargetDataInfo &Info,
1772                                     bool EmitDebug = false,
1773                                     bool ForEndCall = false);
1774 
1775   /// Emit an array of struct descriptors to be assigned to the offload args.
1776   void emitNonContiguousDescriptor(InsertPointTy AllocaIP,
1777                                    InsertPointTy CodeGenIP,
1778                                    MapInfosTy &CombinedInfo,
1779                                    TargetDataInfo &Info);
1780 
1781   /// Emit the arrays used to pass the captures and map information to the
1782   /// offloading runtime library. If there is no map or capture information,
1783   /// return nullptr by reference.
1784   void emitOffloadingArrays(
1785       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
1786       TargetDataInfo &Info, bool IsNonContiguous = false,
1787       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
1788       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
1789 
1790   /// Creates offloading entry for the provided entry ID \a ID, address \a
1791   /// Addr, size \a Size, and flags \a Flags.
1792   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
1793                           int32_t Flags, GlobalValue::LinkageTypes,
1794                           StringRef Name = "");
1795 
1796   /// The kind of errors that can occur when emitting the offload entries and
1797   /// metadata.
1798   enum EmitMetadataErrorKind {
1799     EMIT_MD_TARGET_REGION_ERROR,
1800     EMIT_MD_DECLARE_TARGET_ERROR,
1801     EMIT_MD_GLOBAL_VAR_LINK_ERROR
1802   };
1803 
1804   /// Callback function type
1805   using EmitMetadataErrorReportFunctionTy =
1806       std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1807 
1808   // Emit the offloading entries and metadata so that the device codegen side
1809   // can easily figure out what to emit. The produced metadata looks like
1810   // this:
1811   //
1812   // !omp_offload.info = !{!1, ...}
1813   //
1814   // We only generate metadata for function that contain target regions.
1815   void createOffloadEntriesAndInfoMetadata(
1816       EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1817 
1818 public:
1819   /// Generator for __kmpc_copyprivate
1820   ///
1821   /// \param Loc The source location description.
1822   /// \param BufSize Number of elements in the buffer.
1823   /// \param CpyBuf List of pointers to data to be copied.
1824   /// \param CpyFn function to call for copying data.
1825   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1826   ///
1827   /// \return The insertion position *after* the CopyPrivate call.
1828 
1829   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
1830                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
1831                                   llvm::Value *CpyFn, llvm::Value *DidIt);
1832 
1833   /// Generator for '#omp single'
1834   ///
1835   /// \param Loc The source location description.
1836   /// \param BodyGenCB Callback that will generate the region code.
1837   /// \param FiniCB Callback to finalize variable copies.
1838   /// \param IsNowait If false, a barrier is emitted.
1839   /// \param CPVars copyprivate variables.
1840   /// \param CPFuncs copy functions to use for each copyprivate variable.
1841   ///
1842   /// \returns The insertion position *after* the single call.
1843   InsertPointTy createSingle(const LocationDescription &Loc,
1844                              BodyGenCallbackTy BodyGenCB,
1845                              FinalizeCallbackTy FiniCB, bool IsNowait,
1846                              ArrayRef<llvm::Value *> CPVars = {},
1847                              ArrayRef<llvm::Function *> CPFuncs = {});
1848 
1849   /// Generator for '#omp master'
1850   ///
1851   /// \param Loc The insert and source location description.
1852   /// \param BodyGenCB Callback that will generate the region code.
1853   /// \param FiniCB Callback to finalize variable copies.
1854   ///
1855   /// \returns The insertion position *after* the master.
1856   InsertPointTy createMaster(const LocationDescription &Loc,
1857                              BodyGenCallbackTy BodyGenCB,
1858                              FinalizeCallbackTy FiniCB);
1859 
1860   /// Generator for '#omp masked'
1861   ///
1862   /// \param Loc The insert and source location description.
1863   /// \param BodyGenCB Callback that will generate the region code.
1864   /// \param FiniCB Callback to finialize variable copies.
1865   ///
1866   /// \returns The insertion position *after* the masked.
1867   InsertPointTy createMasked(const LocationDescription &Loc,
1868                              BodyGenCallbackTy BodyGenCB,
1869                              FinalizeCallbackTy FiniCB, Value *Filter);
1870 
1871   /// Generator for '#omp critical'
1872   ///
1873   /// \param Loc The insert and source location description.
1874   /// \param BodyGenCB Callback that will generate the region body code.
1875   /// \param FiniCB Callback to finalize variable copies.
1876   /// \param CriticalName name of the lock used by the critical directive
1877   /// \param HintInst Hint Instruction for hint clause associated with critical
1878   ///
1879   /// \returns The insertion position *after* the critical.
1880   InsertPointTy createCritical(const LocationDescription &Loc,
1881                                BodyGenCallbackTy BodyGenCB,
1882                                FinalizeCallbackTy FiniCB,
1883                                StringRef CriticalName, Value *HintInst);
1884 
1885   /// Generator for '#omp ordered depend (source | sink)'
1886   ///
1887   /// \param Loc The insert and source location description.
1888   /// \param AllocaIP The insertion point to be used for alloca instructions.
1889   /// \param NumLoops The number of loops in depend clause.
1890   /// \param StoreValues The value will be stored in vector address.
1891   /// \param Name The name of alloca instruction.
1892   /// \param IsDependSource If true, depend source; otherwise, depend sink.
1893   ///
1894   /// \return The insertion position *after* the ordered.
1895   InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1896                                     InsertPointTy AllocaIP, unsigned NumLoops,
1897                                     ArrayRef<llvm::Value *> StoreValues,
1898                                     const Twine &Name, bool IsDependSource);
1899 
1900   /// Generator for '#omp ordered [threads | simd]'
1901   ///
1902   /// \param Loc The insert and source location description.
1903   /// \param BodyGenCB Callback that will generate the region code.
1904   /// \param FiniCB Callback to finalize variable copies.
1905   /// \param IsThreads If true, with threads clause or without clause;
1906   /// otherwise, with simd clause;
1907   ///
1908   /// \returns The insertion position *after* the ordered.
1909   InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1910                                          BodyGenCallbackTy BodyGenCB,
1911                                          FinalizeCallbackTy FiniCB,
1912                                          bool IsThreads);
1913 
1914   /// Generator for '#omp sections'
1915   ///
1916   /// \param Loc The insert and source location description.
1917   /// \param AllocaIP The insertion points to be used for alloca instructions.
1918   /// \param SectionCBs Callbacks that will generate body of each section.
1919   /// \param PrivCB Callback to copy a given variable (think copy constructor).
1920   /// \param FiniCB Callback to finalize variable copies.
1921   /// \param IsCancellable Flag to indicate a cancellable parallel region.
1922   /// \param IsNowait If true, barrier - to ensure all sections are executed
1923   /// before moving forward will not be generated.
1924   /// \returns The insertion position *after* the sections.
1925   InsertPointTy createSections(const LocationDescription &Loc,
1926                                InsertPointTy AllocaIP,
1927                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1928                                PrivatizeCallbackTy PrivCB,
1929                                FinalizeCallbackTy FiniCB, bool IsCancellable,
1930                                bool IsNowait);
1931 
1932   /// Generator for '#omp section'
1933   ///
1934   /// \param Loc The insert and source location description.
1935   /// \param BodyGenCB Callback that will generate the region body code.
1936   /// \param FiniCB Callback to finalize variable copies.
1937   /// \returns The insertion position *after* the section.
1938   InsertPointTy createSection(const LocationDescription &Loc,
1939                               BodyGenCallbackTy BodyGenCB,
1940                               FinalizeCallbackTy FiniCB);
1941 
1942   /// Generator for `#omp teams`
1943   ///
1944   /// \param Loc The location where the teams construct was encountered.
1945   /// \param BodyGenCB Callback that will generate the region code.
1946   /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr,
1947   ///        it is as if lower bound is specified as equal to upperbound. If
1948   ///        this is non-null, then upperbound must also be non-null.
1949   /// \param NumTeamsUpper Upper bound on the number of teams.
1950   /// \param ThreadLimit on the number of threads that may participate in a
1951   ///        contention group created by each team.
1952   /// \param IfExpr is the integer argument value of the if condition on the
1953   ///        teams clause.
1954   InsertPointTy
1955   createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
1956               Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
1957               Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
1958 
1959   /// Generate conditional branch and relevant BasicBlocks through which private
1960   /// threads copy the 'copyin' variables from Master copy to threadprivate
1961   /// copies.
1962   ///
1963   /// \param IP insertion block for copyin conditional
1964   /// \param MasterVarPtr a pointer to the master variable
1965   /// \param PrivateVarPtr a pointer to the threadprivate variable
1966   /// \param IntPtrTy Pointer size type
1967   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1968   //				 and copy.in.end block
1969   ///
1970   /// \returns The insertion point where copying operation to be emitted.
1971   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
1972                                          Value *PrivateAddr,
1973                                          llvm::IntegerType *IntPtrTy,
1974                                          bool BranchtoEnd = true);
1975 
1976   /// Create a runtime call for kmpc_Alloc
1977   ///
1978   /// \param Loc The insert and source location description.
1979   /// \param Size Size of allocated memory space
1980   /// \param Allocator Allocator information instruction
1981   /// \param Name Name of call Instruction for OMP_alloc
1982   ///
1983   /// \returns CallInst to the OMP_Alloc call
1984   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1985                            Value *Allocator, std::string Name = "");
1986 
1987   /// Create a runtime call for kmpc_free
1988   ///
1989   /// \param Loc The insert and source location description.
1990   /// \param Addr Address of memory space to be freed
1991   /// \param Allocator Allocator information instruction
1992   /// \param Name Name of call Instruction for OMP_Free
1993   ///
1994   /// \returns CallInst to the OMP_Free call
1995   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1996                           Value *Allocator, std::string Name = "");
1997 
1998   /// Create a runtime call for kmpc_threadprivate_cached
1999   ///
2000   /// \param Loc The insert and source location description.
2001   /// \param Pointer pointer to data to be cached
2002   /// \param Size size of data to be cached
2003   /// \param Name Name of call Instruction for callinst
2004   ///
2005   /// \returns CallInst to the thread private cache call.
2006   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
2007                                       llvm::Value *Pointer,
2008                                       llvm::ConstantInt *Size,
2009                                       const llvm::Twine &Name = Twine(""));
2010 
2011   /// Create a runtime call for __tgt_interop_init
2012   ///
2013   /// \param Loc The insert and source location description.
2014   /// \param InteropVar variable to be allocated
2015   /// \param InteropType type of interop operation
2016   /// \param Device devide to which offloading will occur
2017   /// \param NumDependences  number of dependence variables
2018   /// \param DependenceAddress pointer to dependence variables
2019   /// \param HaveNowaitClause does nowait clause exist
2020   ///
2021   /// \returns CallInst to the __tgt_interop_init call
2022   CallInst *createOMPInteropInit(const LocationDescription &Loc,
2023                                  Value *InteropVar,
2024                                  omp::OMPInteropType InteropType, Value *Device,
2025                                  Value *NumDependences,
2026                                  Value *DependenceAddress,
2027                                  bool HaveNowaitClause);
2028 
2029   /// Create a runtime call for __tgt_interop_destroy
2030   ///
2031   /// \param Loc The insert and source location description.
2032   /// \param InteropVar variable to be allocated
2033   /// \param Device devide to which offloading will occur
2034   /// \param NumDependences  number of dependence variables
2035   /// \param DependenceAddress pointer to dependence variables
2036   /// \param HaveNowaitClause does nowait clause exist
2037   ///
2038   /// \returns CallInst to the __tgt_interop_destroy call
2039   CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
2040                                     Value *InteropVar, Value *Device,
2041                                     Value *NumDependences,
2042                                     Value *DependenceAddress,
2043                                     bool HaveNowaitClause);
2044 
2045   /// Create a runtime call for __tgt_interop_use
2046   ///
2047   /// \param Loc The insert and source location description.
2048   /// \param InteropVar variable to be allocated
2049   /// \param Device devide to which offloading will occur
2050   /// \param NumDependences  number of dependence variables
2051   /// \param DependenceAddress pointer to dependence variables
2052   /// \param HaveNowaitClause does nowait clause exist
2053   ///
2054   /// \returns CallInst to the __tgt_interop_use call
2055   CallInst *createOMPInteropUse(const LocationDescription &Loc,
2056                                 Value *InteropVar, Value *Device,
2057                                 Value *NumDependences, Value *DependenceAddress,
2058                                 bool HaveNowaitClause);
2059 
2060   /// The `omp target` interface
2061   ///
2062   /// For more information about the usage of this interface,
2063   /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
2064   ///
2065   ///{
2066 
2067   /// Create a runtime call for kmpc_target_init
2068   ///
2069   /// \param Loc The insert and source location description.
2070   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
2071   /// \param MinThreads Minimal number of threads, or 0.
2072   /// \param MaxThreads Maximal number of threads, or 0.
2073   /// \param MinTeams Minimal number of teams, or 0.
2074   /// \param MaxTeams Maximal number of teams, or 0.
2075   InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
2076                                  int32_t MinThreadsVal = 0,
2077                                  int32_t MaxThreadsVal = 0,
2078                                  int32_t MinTeamsVal = 0,
2079                                  int32_t MaxTeamsVal = 0);
2080 
2081   /// Create a runtime call for kmpc_target_deinit
2082   ///
2083   /// \param Loc The insert and source location description.
2084   /// \param TeamsReductionDataSize The maximal size of all the reduction data
2085   ///        for teams reduction.
2086   /// \param TeamsReductionBufferLength The number of elements (each of up to
2087   ///        \p TeamsReductionDataSize size), in the teams reduction buffer.
2088   void createTargetDeinit(const LocationDescription &Loc,
2089                           int32_t TeamsReductionDataSize = 0,
2090                           int32_t TeamsReductionBufferLength = 1024);
2091 
2092   ///}
2093 
2094   /// Helpers to read/write kernel annotations from the IR.
2095   ///
2096   ///{
2097 
2098   /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none
2099   /// is set.
2100   static std::pair<int32_t, int32_t>
2101   readThreadBoundsForKernel(const Triple &T, Function &Kernel);
2102   static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel,
2103                                          int32_t LB, int32_t UB);
2104 
2105   /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
2106   /// is set.
2107   static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T,
2108                                                              Function &Kernel);
2109   static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB,
2110                                   int32_t UB);
2111   ///}
2112 
2113 private:
2114   // Sets the function attributes expected for the outlined function
2115   void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn);
2116 
2117   // Creates the function ID/Address for the given outlined function.
2118   // In the case of an embedded device function the address of the function is
2119   // used, in the case of a non-offload function a constant is created.
2120   Constant *createOutlinedFunctionID(Function *OutlinedFn,
2121                                      StringRef EntryFnIDName);
2122 
2123   // Creates the region entry address for the outlined function
2124   Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
2125                                         StringRef EntryFnName);
2126 
2127 public:
2128   /// Functions used to generate a function with the given name.
2129   using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
2130 
2131   /// Create a unique name for the entry function using the source location
2132   /// information of the current target region. The name will be something like:
2133   ///
2134   /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
2135   ///
2136   /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
2137   /// mangled name of the function that encloses the target region and BB is the
2138   /// line number of the target region. CC is a count added when more than one
2139   /// region is located at the same location.
2140   ///
2141   /// If this target outline function is not an offload entry, we don't need to
2142   /// register it. This may happen if it is guarded by an if clause that is
2143   /// false at compile time, or no target archs have been specified.
2144   ///
2145   /// The created target region ID is used by the runtime library to identify
2146   /// the current target region, so it only has to be unique and not
2147   /// necessarily point to anything. It could be the pointer to the outlined
2148   /// function that implements the target region, but we aren't using that so
2149   /// that the compiler doesn't need to keep that, and could therefore inline
2150   /// the host function if proven worthwhile during optimization. In the other
2151   /// hand, if emitting code for the device, the ID has to be the function
2152   /// address so that it can retrieved from the offloading entry and launched
2153   /// by the runtime library. We also mark the outlined function to have
2154   /// external linkage in case we are emitting code for the device, because
2155   /// these functions will be entry points to the device.
2156   ///
2157   /// \param InfoManager The info manager keeping track of the offload entries
2158   /// \param EntryInfo The entry information about the function
2159   /// \param GenerateFunctionCallback The callback function to generate the code
2160   /// \param OutlinedFunction Pointer to the outlined function
2161   /// \param EntryFnIDName Name of the ID o be created
2162   void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
2163                                 FunctionGenCallback &GenerateFunctionCallback,
2164                                 bool IsOffloadEntry, Function *&OutlinedFn,
2165                                 Constant *&OutlinedFnID);
2166 
2167   /// Registers the given function and sets up the attribtues of the function
2168   /// Returns the FunctionID.
2169   ///
2170   /// \param InfoManager The info manager keeping track of the offload entries
2171   /// \param EntryInfo The entry information about the function
2172   /// \param OutlinedFunction Pointer to the outlined function
2173   /// \param EntryFnName Name of the outlined function
2174   /// \param EntryFnIDName Name of the ID o be created
2175   Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
2176                                          Function *OutlinedFunction,
2177                                          StringRef EntryFnName,
2178                                          StringRef EntryFnIDName);
2179 
2180   /// Type of BodyGen to use for region codegen
2181   ///
2182   /// Priv: If device pointer privatization is required, emit the body of the
2183   /// region here. It will have to be duplicated: with and without
2184   /// privatization.
2185   /// DupNoPriv: If we need device pointer privatization, we need
2186   /// to emit the body of the region with no privatization in the 'else' branch
2187   /// of the conditional.
2188   /// NoPriv: If we don't require privatization of device
2189   /// pointers, we emit the body in between the runtime calls. This avoids
2190   /// duplicating the body code.
2191   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
2192 
2193   /// Callback type for creating the map infos for the kernel parameters.
2194   /// \param CodeGenIP is the insertion point where code should be generated,
2195   ///        if any.
2196   using GenMapInfoCallbackTy =
2197       function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
2198 
2199   /// Generator for '#omp target data'
2200   ///
2201   /// \param Loc The location where the target data construct was encountered.
2202   /// \param AllocaIP The insertion points to be used for alloca instructions.
2203   /// \param CodeGenIP The insertion point at which the target directive code
2204   /// should be placed.
2205   /// \param IsBegin If true then emits begin mapper call otherwise emits
2206   /// end mapper call.
2207   /// \param DeviceID Stores the DeviceID from the device clause.
2208   /// \param IfCond Value which corresponds to the if clause condition.
2209   /// \param Info Stores all information realted to the Target Data directive.
2210   /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
2211   /// \param BodyGenCB Optional Callback to generate the region code.
2212   /// \param DeviceAddrCB Optional callback to generate code related to
2213   /// use_device_ptr and use_device_addr.
2214   /// \param CustomMapperCB Optional callback to generate code related to
2215   /// custom mappers.
2216   OpenMPIRBuilder::InsertPointTy createTargetData(
2217       const LocationDescription &Loc, InsertPointTy AllocaIP,
2218       InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
2219       TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
2220       omp::RuntimeFunction *MapperFunc = nullptr,
2221       function_ref<InsertPointTy(InsertPointTy CodeGenIP,
2222                                  BodyGenTy BodyGenType)>
2223           BodyGenCB = nullptr,
2224       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
2225       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
2226       Value *SrcLocInfo = nullptr);
2227 
2228   using TargetBodyGenCallbackTy = function_ref<InsertPointTy(
2229       InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
2230 
2231   using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointTy(
2232       Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
2233       InsertPointTy CodeGenIP)>;
2234 
2235   /// Generator for '#omp target'
2236   ///
2237   /// \param Loc where the target data construct was encountered.
2238   /// \param CodeGenIP The insertion point where the call to the outlined
2239   /// function should be emitted.
2240   /// \param EntryInfo The entry information about the function.
2241   /// \param NumTeams Number of teams specified in the num_teams clause.
2242   /// \param NumThreads Number of teams specified in the thread_limit clause.
2243   /// \param Inputs The input values to the region that will be passed.
2244   /// as arguments to the outlined function.
2245   /// \param BodyGenCB Callback that will generate the region code.
2246   /// \param ArgAccessorFuncCB Callback that will generate accessors
2247   /// instructions for passed in target arguments where neccessary
2248   InsertPointTy createTarget(const LocationDescription &Loc,
2249                              OpenMPIRBuilder::InsertPointTy AllocaIP,
2250                              OpenMPIRBuilder::InsertPointTy CodeGenIP,
2251                              TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
2252                              int32_t NumThreads,
2253                              SmallVectorImpl<Value *> &Inputs,
2254                              GenMapInfoCallbackTy GenMapInfoCB,
2255                              TargetBodyGenCallbackTy BodyGenCB,
2256                              TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB);
2257 
2258   /// Returns __kmpc_for_static_init_* runtime function for the specified
2259   /// size \a IVSize and sign \a IVSigned. Will create a distribute call
2260   /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
2261   FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned,
2262                                              bool IsGPUDistribute);
2263 
2264   /// Returns __kmpc_dispatch_init_* runtime function for the specified
2265   /// size \a IVSize and sign \a IVSigned.
2266   FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned);
2267 
2268   /// Returns __kmpc_dispatch_next_* runtime function for the specified
2269   /// size \a IVSize and sign \a IVSigned.
2270   FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned);
2271 
2272   /// Returns __kmpc_dispatch_fini_* runtime function for the specified
2273   /// size \a IVSize and sign \a IVSigned.
2274   FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
2275 
2276   /// Declarations for LLVM-IR types (simple, array, function and structure) are
2277   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
2278   /// we provide the declarations, the initializeTypes function will provide the
2279   /// values.
2280   ///
2281   ///{
2282 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
2283 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
2284   ArrayType *VarName##Ty = nullptr;                                            \
2285   PointerType *VarName##PtrTy = nullptr;
2286 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
2287   FunctionType *VarName = nullptr;                                             \
2288   PointerType *VarName##Ptr = nullptr;
2289 #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
2290   StructType *VarName = nullptr;                                               \
2291   PointerType *VarName##Ptr = nullptr;
2292 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2293 
2294   ///}
2295 
2296 private:
2297   /// Create all simple and struct types exposed by the runtime and remember
2298   /// the llvm::PointerTypes of them for easy access later.
2299   void initializeTypes(Module &M);
2300 
2301   /// Common interface for generating entry calls for OMP Directives.
2302   /// if the directive has a region/body, It will set the insertion
2303   /// point to the body
2304   ///
2305   /// \param OMPD Directive to generate entry blocks for
2306   /// \param EntryCall Call to the entry OMP Runtime Function
2307   /// \param ExitBB block where the region ends.
2308   /// \param Conditional indicate if the entry call result will be used
2309   ///        to evaluate a conditional of whether a thread will execute
2310   ///        body code or not.
2311   ///
2312   /// \return The insertion position in exit block
2313   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
2314                                          BasicBlock *ExitBB,
2315                                          bool Conditional = false);
2316 
2317   /// Common interface to finalize the region
2318   ///
2319   /// \param OMPD Directive to generate exiting code for
2320   /// \param FinIP Insertion point for emitting Finalization code and exit call
2321   /// \param ExitCall Call to the ending OMP Runtime Function
2322   /// \param HasFinalize indicate if the directive will require finalization
2323   ///         and has a finalization callback in the stack that
2324   ///        should be called.
2325   ///
2326   /// \return The insertion position in exit block
2327   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
2328                                         InsertPointTy FinIP,
2329                                         Instruction *ExitCall,
2330                                         bool HasFinalize = true);
2331 
2332   /// Common Interface to generate OMP inlined regions
2333   ///
2334   /// \param OMPD Directive to generate inlined region for
2335   /// \param EntryCall Call to the entry OMP Runtime Function
2336   /// \param ExitCall Call to the ending OMP Runtime Function
2337   /// \param BodyGenCB Body code generation callback.
2338   /// \param FiniCB Finalization Callback. Will be called when finalizing region
2339   /// \param Conditional indicate if the entry call result will be used
2340   ///        to evaluate a conditional of whether a thread will execute
2341   ///        body code or not.
2342   /// \param HasFinalize indicate if the directive will require finalization
2343   ///        and has a finalization callback in the stack that
2344   ///        should be called.
2345   /// \param IsCancellable if HasFinalize is set to true, indicate if the
2346   ///        the directive should be cancellable.
2347   /// \return The insertion point after the region
2348 
2349   InsertPointTy
2350   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
2351                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
2352                        FinalizeCallbackTy FiniCB, bool Conditional = false,
2353                        bool HasFinalize = true, bool IsCancellable = false);
2354 
2355   /// Get the platform-specific name separator.
2356   /// \param Parts different parts of the final name that needs separation
2357   /// \param FirstSeparator First separator used between the initial two
2358   ///        parts of the name.
2359   /// \param Separator separator used between all of the rest consecutive
2360   ///        parts of the name
2361   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
2362                                            StringRef FirstSeparator,
2363                                            StringRef Separator);
2364 
2365   /// Returns corresponding lock object for the specified critical region
2366   /// name. If the lock object does not exist it is created, otherwise the
2367   /// reference to the existing copy is returned.
2368   /// \param CriticalName Name of the critical region.
2369   ///
2370   Value *getOMPCriticalRegionLock(StringRef CriticalName);
2371 
2372   /// Callback type for Atomic Expression update
2373   /// ex:
2374   /// \code{.cpp}
2375   /// unsigned x = 0;
2376   /// #pragma omp atomic update
2377   /// x = Expr(x_old);  //Expr() is any legal operation
2378   /// \endcode
2379   ///
2380   /// \param XOld the value of the atomic memory address to use for update
2381   /// \param IRB reference to the IRBuilder to use
2382   ///
2383   /// \returns Value to update X to.
2384   using AtomicUpdateCallbackTy =
2385       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2386 
2387 private:
2388   enum AtomicKind { Read, Write, Update, Capture, Compare };
2389 
2390   /// Determine whether to emit flush or not
2391   ///
2392   /// \param Loc    The insert and source location description.
2393   /// \param AO     The required atomic ordering
2394   /// \param AK     The OpenMP atomic operation kind used.
2395   ///
2396   /// \returns		wether a flush was emitted or not
2397   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2398                                     AtomicOrdering AO, AtomicKind AK);
2399 
2400   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2401   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2402   /// Only Scalar data types.
2403   ///
2404   /// \param AllocaIP	  The insertion point to be used for alloca
2405   ///                   instructions.
2406   /// \param X			    The target atomic pointer to be updated
2407   /// \param XElemTy    The element type of the atomic pointer.
2408   /// \param Expr		    The value to update X with.
2409   /// \param AO			    Atomic ordering of the generated atomic
2410   ///                   instructions.
2411   /// \param RMWOp		  The binary operation used for update. If
2412   ///                   operation is not supported by atomicRMW,
2413   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
2414   ///                   Then a `cmpExch` based	atomic will be generated.
2415   /// \param UpdateOp 	Code generator for complex expressions that cannot be
2416   ///                   expressed through atomicrmw instruction.
2417   /// \param VolatileX	     true if \a X volatile?
2418   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2419   ///                     update expression, false otherwise.
2420   ///                     (e.g. true for X = X BinOp Expr)
2421   ///
2422   /// \returns A pair of the old value of X before the update, and the value
2423   ///          used for the update.
2424   std::pair<Value *, Value *>
2425   emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2426                    AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2427                    AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2428                    bool IsXBinopExpr);
2429 
2430   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2431   ///
2432   /// \Return The instruction
2433   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2434                                 AtomicRMWInst::BinOp RMWOp);
2435 
2436 public:
2437   /// a struct to pack relevant information while generating atomic Ops
2438   struct AtomicOpValue {
2439     Value *Var = nullptr;
2440     Type *ElemTy = nullptr;
2441     bool IsSigned = false;
2442     bool IsVolatile = false;
2443   };
2444 
2445   /// Emit atomic Read for : V = X --- Only Scalar data types.
2446   ///
2447   /// \param Loc    The insert and source location description.
2448   /// \param X			The target pointer to be atomically read
2449   /// \param V			Memory address where to store atomically read
2450   /// 					    value
2451   /// \param AO			Atomic ordering of the generated atomic
2452   /// 					    instructions.
2453   ///
2454   /// \return Insertion point after generated atomic read IR.
2455   InsertPointTy createAtomicRead(const LocationDescription &Loc,
2456                                  AtomicOpValue &X, AtomicOpValue &V,
2457                                  AtomicOrdering AO);
2458 
2459   /// Emit atomic write for : X = Expr --- Only Scalar data types.
2460   ///
2461   /// \param Loc    The insert and source location description.
2462   /// \param X			The target pointer to be atomically written to
2463   /// \param Expr		The value to store.
2464   /// \param AO			Atomic ordering of the generated atomic
2465   ///               instructions.
2466   ///
2467   /// \return Insertion point after generated atomic Write IR.
2468   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
2469                                   AtomicOpValue &X, Value *Expr,
2470                                   AtomicOrdering AO);
2471 
2472   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2473   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2474   /// Only Scalar data types.
2475   ///
2476   /// \param Loc      The insert and source location description.
2477   /// \param AllocaIP The insertion point to be used for alloca instructions.
2478   /// \param X        The target atomic pointer to be updated
2479   /// \param Expr     The value to update X with.
2480   /// \param AO       Atomic ordering of the generated atomic instructions.
2481   /// \param RMWOp    The binary operation used for update. If operation
2482   ///                 is	not supported by atomicRMW, or belong to
2483   ///	                {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2484   ///                 atomic will be generated.
2485   /// \param UpdateOp 	Code generator for complex expressions that cannot be
2486   ///                   expressed through atomicrmw instruction.
2487   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2488   ///                     update expression, false otherwise.
2489   ///	                    (e.g. true for X = X BinOp Expr)
2490   ///
2491   /// \return Insertion point after generated atomic update IR.
2492   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
2493                                    InsertPointTy AllocaIP, AtomicOpValue &X,
2494                                    Value *Expr, AtomicOrdering AO,
2495                                    AtomicRMWInst::BinOp RMWOp,
2496                                    AtomicUpdateCallbackTy &UpdateOp,
2497                                    bool IsXBinopExpr);
2498 
2499   /// Emit atomic update for constructs: --- Only Scalar data types
2500   /// V = X; X = X BinOp Expr ,
2501   /// X = X BinOp Expr; V = X,
2502   /// V = X; X = Expr BinOp X,
2503   /// X = Expr BinOp X; V = X,
2504   /// V = X; X = UpdateOp(X),
2505   /// X = UpdateOp(X); V = X,
2506   ///
2507   /// \param Loc        The insert and source location description.
2508   /// \param AllocaIP   The insertion point to be used for alloca instructions.
2509   /// \param X          The target atomic pointer to be updated
2510   /// \param V          Memory address where to store captured value
2511   /// \param Expr       The value to update X with.
2512   /// \param AO         Atomic ordering of the generated atomic instructions
2513   /// \param RMWOp      The binary operation used for update. If
2514   ///                   operation is not supported by atomicRMW, or belong to
2515   ///	                  {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2516   ///                   atomic will be generated.
2517   /// \param UpdateOp   Code generator for complex expressions that cannot be
2518   ///                   expressed through atomicrmw instruction.
2519   /// \param UpdateExpr true if X is an in place update of the form
2520   ///                   X = X BinOp Expr or X = Expr BinOp X
2521   /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2522   ///                     update expression, false otherwise.
2523   ///                     (e.g. true for X = X BinOp Expr)
2524   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2525   ///                        'v', not an updated one.
2526   ///
2527   /// \return Insertion point after generated atomic capture IR.
2528   InsertPointTy
2529   createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
2530                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2531                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2532                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2533                       bool IsPostfixUpdate, bool IsXBinopExpr);
2534 
2535   /// Emit atomic compare for constructs: --- Only scalar data types
2536   /// cond-expr-stmt:
2537   /// x = x ordop expr ? expr : x;
2538   /// x = expr ordop x ? expr : x;
2539   /// x = x == e ? d : x;
2540   /// x = e == x ? d : x; (this one is not in the spec)
2541   /// cond-update-stmt:
2542   /// if (x ordop expr) { x = expr; }
2543   /// if (expr ordop x) { x = expr; }
2544   /// if (x == e) { x = d; }
2545   /// if (e == x) { x = d; } (this one is not in the spec)
2546   /// conditional-update-capture-atomic:
2547   /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2548   /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2549   /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2550   ///                                         IsFailOnly=true)
2551   /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2552   /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2553   ///                                                IsFailOnly=true)
2554   ///
2555   /// \param Loc          The insert and source location description.
2556   /// \param X            The target atomic pointer to be updated.
2557   /// \param V            Memory address where to store captured value (for
2558   ///                     compare capture only).
2559   /// \param R            Memory address where to store comparison result
2560   ///                     (for compare capture with '==' only).
2561   /// \param E            The expected value ('e') for forms that use an
2562   ///                     equality comparison or an expression ('expr') for
2563   ///                     forms that use 'ordop' (logically an atomic maximum or
2564   ///                     minimum).
2565   /// \param D            The desired value for forms that use an equality
2566   ///                     comparison. If forms that use 'ordop', it should be
2567   ///                     \p nullptr.
2568   /// \param AO           Atomic ordering of the generated atomic instructions.
2569   /// \param Op           Atomic compare operation. It can only be ==, <, or >.
2570   /// \param IsXBinopExpr True if the conditional statement is in the form where
2571   ///                     x is on LHS. It only matters for < or >.
2572   /// \param IsPostfixUpdate  True if original value of 'x' must be stored in
2573   ///                         'v', not an updated one (for compare capture
2574   ///                         only).
2575   /// \param IsFailOnly   True if the original value of 'x' is stored to 'v'
2576   ///                     only when the comparison fails. This is only valid for
2577   ///                     the case the comparison is '=='.
2578   ///
2579   /// \return Insertion point after generated atomic capture IR.
2580   InsertPointTy
2581   createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
2582                       AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
2583                       AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
2584                       bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2585   InsertPointTy createAtomicCompare(const LocationDescription &Loc,
2586                                     AtomicOpValue &X, AtomicOpValue &V,
2587                                     AtomicOpValue &R, Value *E, Value *D,
2588                                     AtomicOrdering AO,
2589                                     omp::OMPAtomicCompareOp Op,
2590                                     bool IsXBinopExpr, bool IsPostfixUpdate,
2591                                     bool IsFailOnly, AtomicOrdering Failure);
2592 
2593   /// Create the control flow structure of a canonical OpenMP loop.
2594   ///
2595   /// The emitted loop will be disconnected, i.e. no edge to the loop's
2596   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2597   /// IRBuilder location is not preserved.
2598   ///
2599   /// \param DL        DebugLoc used for the instructions in the skeleton.
2600   /// \param TripCount Value to be used for the trip count.
2601   /// \param F         Function in which to insert the BasicBlocks.
2602   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
2603   ///                         typically the body itself.
2604   /// \param PostInsertBefore Where to insert BBs that execute after the body.
2605   /// \param Name      Base name used to derive BB
2606   ///                  and instruction names.
2607   ///
2608   /// \returns The CanonicalLoopInfo that represents the emitted loop.
2609   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
2610                                         Function *F,
2611                                         BasicBlock *PreInsertBefore,
2612                                         BasicBlock *PostInsertBefore,
2613                                         const Twine &Name = {});
2614   /// OMP Offload Info Metadata name string
2615   const std::string ompOffloadInfoName = "omp_offload.info";
2616 
2617   /// Loads all the offload entries information from the host IR
2618   /// metadata. This function is only meant to be used with device code
2619   /// generation.
2620   ///
2621   /// \param M         Module to load Metadata info from. Module passed maybe
2622   /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2623   void loadOffloadInfoMetadata(Module &M);
2624 
2625   /// Loads all the offload entries information from the host IR
2626   /// metadata read from the file passed in as the HostFilePath argument. This
2627   /// function is only meant to be used with device code generation.
2628   ///
2629   /// \param HostFilePath The path to the host IR file,
2630   /// used to load in offload metadata for the device, allowing host and device
2631   /// to maintain the same metadata mapping.
2632   void loadOffloadInfoMetadata(StringRef HostFilePath);
2633 
2634   /// Gets (if variable with the given name already exist) or creates
2635   /// internal global variable with the specified Name. The created variable has
2636   /// linkage CommonLinkage by default and is initialized by null value.
2637   /// \param Ty Type of the global variable. If it is exist already the type
2638   /// must be the same.
2639   /// \param Name Name of the variable.
2640   GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
2641                                               unsigned AddressSpace = 0);
2642 };
2643 
2644 /// Class to represented the control flow structure of an OpenMP canonical loop.
2645 ///
2646 /// The control-flow structure is standardized for easy consumption by
2647 /// directives associated with loops. For instance, the worksharing-loop
2648 /// construct may change this control flow such that each loop iteration is
2649 /// executed on only one thread. The constraints of a canonical loop in brief
2650 /// are:
2651 ///
2652 ///  * The number of loop iterations must have been computed before entering the
2653 ///    loop.
2654 ///
2655 ///  * Has an (unsigned) logical induction variable that starts at zero and
2656 ///    increments by one.
2657 ///
2658 ///  * The loop's CFG itself has no side-effects. The OpenMP specification
2659 ///    itself allows side-effects, but the order in which they happen, including
2660 ///    how often or whether at all, is unspecified. We expect that the frontend
2661 ///    will emit those side-effect instructions somewhere (e.g. before the loop)
2662 ///    such that the CanonicalLoopInfo itself can be side-effect free.
2663 ///
2664 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2665 /// execution of a loop body that satifies these constraints. It does NOT
2666 /// represent arbitrary SESE regions that happen to contain a loop. Do not use
2667 /// CanonicalLoopInfo for such purposes.
2668 ///
2669 /// The control flow can be described as follows:
2670 ///
2671 ///     Preheader
2672 ///        |
2673 ///  /-> Header
2674 ///  |     |
2675 ///  |    Cond---\
2676 ///  |     |     |
2677 ///  |    Body   |
2678 ///  |    | |    |
2679 ///  |   <...>   |
2680 ///  |    | |    |
2681 ///   \--Latch   |
2682 ///              |
2683 ///             Exit
2684 ///              |
2685 ///            After
2686 ///
2687 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2688 /// including) and end at AfterIP (at the After's first instruction, excluding).
2689 /// That is, instructions in the Preheader and After blocks (except the
2690 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2691 /// side-effects. Typically, the Preheader is used to compute the loop's trip
2692 /// count. The instructions from BodyIP (at the Body block's first instruction,
2693 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2694 /// control and thus can have side-effects. The body block is the single entry
2695 /// point into the loop body, which may contain arbitrary control flow as long
2696 /// as all control paths eventually branch to the Latch block.
2697 ///
2698 /// TODO: Consider adding another standardized BasicBlock between Body CFG and
2699 /// Latch to guarantee that there is only a single edge to the latch. It would
2700 /// make loop transformations easier to not needing to consider multiple
2701 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2702 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2703 /// executes after each body iteration.
2704 ///
2705 /// There must be no loop-carried dependencies through llvm::Values. This is
2706 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2707 /// for the induction variable.
2708 ///
2709 /// All code in Header, Cond, Latch and Exit (plus the terminator of the
2710 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2711 /// by assertOK(). They are expected to not be modified unless explicitly
2712 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2713 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2714 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2715 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2716 /// anymore as its underlying control flow may not exist anymore.
2717 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2718 /// may also return a new CanonicalLoopInfo that can be passed to other
2719 /// loop-associated construct implementing methods. These loop-transforming
2720 /// methods may either create a new CanonicalLoopInfo usually using
2721 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2722 /// modify one of the input CanonicalLoopInfo and return it as representing the
2723 /// modified loop. What is done is an implementation detail of
2724 /// transformation-implementing method and callers should always assume that the
2725 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2726 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2727 /// created by createCanonicalLoop, such that transforming methods do not have
2728 /// to special case where the CanonicalLoopInfo originated from.
2729 ///
2730 /// Generally, methods consuming CanonicalLoopInfo do not need an
2731 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2732 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2733 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2734 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2735 /// any InsertPoint in the Preheader, After or Block can still be used after
2736 /// calling such a method.
2737 ///
2738 /// TODO: Provide mechanisms for exception handling and cancellation points.
2739 ///
2740 /// Defined outside OpenMPIRBuilder because nested classes cannot be
2741 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2742 class CanonicalLoopInfo {
2743   friend class OpenMPIRBuilder;
2744 
2745 private:
2746   BasicBlock *Header = nullptr;
2747   BasicBlock *Cond = nullptr;
2748   BasicBlock *Latch = nullptr;
2749   BasicBlock *Exit = nullptr;
2750 
2751   /// Add the control blocks of this loop to \p BBs.
2752   ///
2753   /// This does not include any block from the body, including the one returned
2754   /// by getBody().
2755   ///
2756   /// FIXME: This currently includes the Preheader and After blocks even though
2757   /// their content is (mostly) not under CanonicalLoopInfo's control.
2758   /// Re-evaluated whether this makes sense.
2759   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2760 
2761   /// Sets the number of loop iterations to the given value. This value must be
2762   /// valid in the condition block (i.e., defined in the preheader) and is
2763   /// interpreted as an unsigned integer.
2764   void setTripCount(Value *TripCount);
2765 
2766   /// Replace all uses of the canonical induction variable in the loop body with
2767   /// a new one.
2768   ///
2769   /// The intended use case is to update the induction variable for an updated
2770   /// iteration space such that it can stay normalized in the 0...tripcount-1
2771   /// range.
2772   ///
2773   /// The \p Updater is called with the (presumable updated) current normalized
2774   /// induction variable and is expected to return the value that uses of the
2775   /// pre-updated induction values should use instead, typically dependent on
2776   /// the new induction variable. This is a lambda (instead of e.g. just passing
2777   /// the new value) to be able to distinguish the uses of the pre-updated
2778   /// induction variable and uses of the induction varible to compute the
2779   /// updated induction variable value.
2780   void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2781 
2782 public:
2783   /// Returns whether this object currently represents the IR of a loop. If
2784   /// returning false, it may have been consumed by a loop transformation or not
2785   /// been intialized. Do not use in this case;
isValid()2786   bool isValid() const { return Header; }
2787 
2788   /// The preheader ensures that there is only a single edge entering the loop.
2789   /// Code that must be execute before any loop iteration can be emitted here,
2790   /// such as computing the loop trip count and begin lifetime markers. Code in
2791   /// the preheader is not considered part of the canonical loop.
2792   BasicBlock *getPreheader() const;
2793 
2794   /// The header is the entry for each iteration. In the canonical control flow,
2795   /// it only contains the PHINode for the induction variable.
getHeader()2796   BasicBlock *getHeader() const {
2797     assert(isValid() && "Requires a valid canonical loop");
2798     return Header;
2799   }
2800 
2801   /// The condition block computes whether there is another loop iteration. If
2802   /// yes, branches to the body; otherwise to the exit block.
getCond()2803   BasicBlock *getCond() const {
2804     assert(isValid() && "Requires a valid canonical loop");
2805     return Cond;
2806   }
2807 
2808   /// The body block is the single entry for a loop iteration and not controlled
2809   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2810   /// eventually branch to the \p Latch block.
getBody()2811   BasicBlock *getBody() const {
2812     assert(isValid() && "Requires a valid canonical loop");
2813     return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2814   }
2815 
2816   /// Reaching the latch indicates the end of the loop body code. In the
2817   /// canonical control flow, it only contains the increment of the induction
2818   /// variable.
getLatch()2819   BasicBlock *getLatch() const {
2820     assert(isValid() && "Requires a valid canonical loop");
2821     return Latch;
2822   }
2823 
2824   /// Reaching the exit indicates no more iterations are being executed.
getExit()2825   BasicBlock *getExit() const {
2826     assert(isValid() && "Requires a valid canonical loop");
2827     return Exit;
2828   }
2829 
2830   /// The after block is intended for clean-up code such as lifetime end
2831   /// markers. It is separate from the exit block to ensure, analogous to the
2832   /// preheader, it having just a single entry edge and being free from PHI
2833   /// nodes should there be multiple loop exits (such as from break
2834   /// statements/cancellations).
getAfter()2835   BasicBlock *getAfter() const {
2836     assert(isValid() && "Requires a valid canonical loop");
2837     return Exit->getSingleSuccessor();
2838   }
2839 
2840   /// Returns the llvm::Value containing the number of loop iterations. It must
2841   /// be valid in the preheader and always interpreted as an unsigned integer of
2842   /// any bit-width.
getTripCount()2843   Value *getTripCount() const {
2844     assert(isValid() && "Requires a valid canonical loop");
2845     Instruction *CmpI = &Cond->front();
2846     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2847     return CmpI->getOperand(1);
2848   }
2849 
2850   /// Returns the instruction representing the current logical induction
2851   /// variable. Always unsigned, always starting at 0 with an increment of one.
getIndVar()2852   Instruction *getIndVar() const {
2853     assert(isValid() && "Requires a valid canonical loop");
2854     Instruction *IndVarPHI = &Header->front();
2855     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2856     return IndVarPHI;
2857   }
2858 
2859   /// Return the type of the induction variable (and the trip count).
getIndVarType()2860   Type *getIndVarType() const {
2861     assert(isValid() && "Requires a valid canonical loop");
2862     return getIndVar()->getType();
2863   }
2864 
2865   /// Return the insertion point for user code before the loop.
getPreheaderIP()2866   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
2867     assert(isValid() && "Requires a valid canonical loop");
2868     BasicBlock *Preheader = getPreheader();
2869     return {Preheader, std::prev(Preheader->end())};
2870   };
2871 
2872   /// Return the insertion point for user code in the body.
getBodyIP()2873   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
2874     assert(isValid() && "Requires a valid canonical loop");
2875     BasicBlock *Body = getBody();
2876     return {Body, Body->begin()};
2877   };
2878 
2879   /// Return the insertion point for user code after the loop.
getAfterIP()2880   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
2881     assert(isValid() && "Requires a valid canonical loop");
2882     BasicBlock *After = getAfter();
2883     return {After, After->begin()};
2884   };
2885 
getFunction()2886   Function *getFunction() const {
2887     assert(isValid() && "Requires a valid canonical loop");
2888     return Header->getParent();
2889   }
2890 
2891   /// Consistency self-check.
2892   void assertOK() const;
2893 
2894   /// Invalidate this loop. That is, the underlying IR does not fulfill the
2895   /// requirements of an OpenMP canonical loop anymore.
2896   void invalidate();
2897 };
2898 
2899 } // end namespace llvm
2900 
2901 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
2902