• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to create scalar and OpenMP parallel loops
10 // as LLVM-IR.
11 //
12 //===----------------------------------------------------------------------===//
13 #ifndef POLLY_LOOP_GENERATORS_KMP_H
14 #define POLLY_LOOP_GENERATORS_KMP_H
15 
16 #include "polly/CodeGen/IRBuilder.h"
17 #include "polly/CodeGen/LoopGenerators.h"
18 #include "polly/Support/ScopHelper.h"
19 #include "llvm/ADT/SetVector.h"
20 
21 namespace polly {
22 using llvm::GlobalValue;
23 using llvm::GlobalVariable;
24 
25 /// This ParallelLoopGenerator subclass handles the generation of parallelized
26 /// code, utilizing the LLVM OpenMP library.
27 class ParallelLoopGeneratorKMP final : public ParallelLoopGenerator {
28 public:
29   /// Create a parallel loop generator for the current function.
ParallelLoopGeneratorKMP(PollyIRBuilder & Builder,LoopInfo & LI,DominatorTree & DT,const DataLayout & DL)30   ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI,
31                            DominatorTree &DT, const DataLayout &DL)
32       : ParallelLoopGenerator(Builder, LI, DT, DL) {
33     SourceLocationInfo = createSourceLocation();
34   }
35 
36 protected:
37   /// The source location struct of this loop.
38   /// ident_t = type { i32, i32, i32, i32, i8* }
39   GlobalValue *SourceLocationInfo;
40 
41   /// Convert the combination of given chunk size and scheduling type (which
42   /// might have been set via the command line) into the corresponding
43   /// scheduling type. This may result (e.g.) in a 'change' from
44   /// "static chunked" scheduling to "static non-chunked" (regarding the
45   /// provided and returned scheduling types).
46   ///
47   /// @param ChunkSize    The chunk size, set via command line or its default.
48   /// @param Scheduling   The scheduling, set via command line or its default.
49   ///
50   /// @return The corresponding OMPGeneralSchedulingType.
51   OMPGeneralSchedulingType
52   getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const;
53 
54   /// Returns True if 'LongType' is 64bit wide, otherwise: False.
55   bool is64BitArch();
56 
57 public:
58   // The functions below may be used if one does not want to generate a
59   // specific OpenMP parallel loop, but generate individual parts of it
60   // (e.g. the subfunction definition).
61 
62   /// Create a runtime library call to spawn the worker threads.
63   ///
64   /// @param SubFn      The subfunction which holds the loop body.
65   /// @param SubFnParam The parameter for the subfunction (basically the struct
66   ///                   filled with the outside values).
67   /// @param LB         The lower bound for the loop we parallelize.
68   /// @param UB         The upper bound for the loop we parallelize.
69   /// @param Stride     The stride of the loop we parallelize.
70   void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
71                               Value *UB, Value *Stride);
72 
73   void deployParallelExecution(Function *SubFn, Value *SubFnParam, Value *LB,
74                                Value *UB, Value *Stride) override;
75 
76   Function *prepareSubFnDefinition(Function *F) const override;
77 
78   std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct,
79                                               SetVector<Value *> UsedValues,
80                                               ValueMapT &VMap) override;
81 
82   /// Create a runtime library call to get the current global thread number.
83   ///
84   /// @return A Value ref which holds the current global thread number.
85   Value *createCallGlobalThreadNum();
86 
87   /// Create a runtime library call to request a number of threads.
88   /// Which will be used in the next OpenMP section (by the next fork).
89   ///
90   /// @param GlobalThreadID   The global thread ID.
91   /// @param NumThreads       The number of threads to use.
92   void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads);
93 
94   /// Create a runtime library call to prepare the OpenMP runtime.
95   /// For dynamically scheduled loops, saving the loop arguments.
96   ///
97   /// @param GlobalThreadID   The global thread ID.
98   /// @param LB               The loop's lower bound.
99   /// @param UB               The loop's upper bound.
100   /// @param Inc              The loop increment.
101   /// @param ChunkSize        The chunk size of the parallel loop.
102   void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB,
103                               Value *Inc, Value *ChunkSize);
104 
105   /// Create a runtime library call to retrieve the next (dynamically)
106   /// allocated chunk of work for this thread.
107   ///
108   /// @param GlobalThreadID   The global thread ID.
109   /// @param IsLastPtr        Pointer to a flag, which is set to 1 if this is
110   ///                         the last chunk of work, or 0 otherwise.
111   /// @param LBPtr            Pointer to the lower bound for the next chunk.
112   /// @param UBPtr            Pointer to the upper bound for the next chunk.
113   /// @param StridePtr        Pointer to the stride for the next chunk.
114   ///
115   /// @return A Value which holds 1 if there is work to be done, 0 otherwise.
116   Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr,
117                                 Value *LBPtr, Value *UBPtr, Value *StridePtr);
118 
119   /// Create a runtime library call to prepare the OpenMP runtime.
120   /// For statically scheduled loops, saving the loop arguments.
121   ///
122   /// @param GlobalThreadID   The global thread ID.
123   /// @param IsLastPtr        Pointer to a flag, which is set to 1 if this is
124   ///                         the last chunk of work, or 0 otherwise.
125   /// @param LBPtr            Pointer to the lower bound for the next chunk.
126   /// @param UBPtr            Pointer to the upper bound for the next chunk.
127   /// @param StridePtr        Pointer to the stride for the next chunk.
128   /// @param ChunkSize        The chunk size of the parallel loop.
129   void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr,
130                             Value *LBPtr, Value *UBPtr, Value *StridePtr,
131                             Value *ChunkSize);
132 
133   /// Create a runtime library call to mark the end of
134   /// a statically scheduled loop.
135   ///
136   /// @param GlobalThreadID   The global thread ID.
137   void createCallStaticFini(Value *GlobalThreadID);
138 
139   /// Create the current source location.
140   ///
141   /// TODO: Generates only(!) dummy values.
142   GlobalVariable *createSourceLocation();
143 };
144 } // end namespace polly
145 #endif
146