1 //===- LoopGenerators.h - IR helper to create loops -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create scalar and OpenMP parallel loops 10 // as LLVM-IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #ifndef POLLY_LOOP_GENERATORS_H 14 #define POLLY_LOOP_GENERATORS_H 15 16 #include "polly/CodeGen/IRBuilder.h" 17 #include "polly/Support/ScopHelper.h" 18 #include "llvm/ADT/SetVector.h" 19 20 namespace polly { 21 using llvm::AllocaInst; 22 using llvm::BasicBlock; 23 using llvm::DataLayout; 24 using llvm::DominatorTree; 25 using llvm::Function; 26 using llvm::ICmpInst; 27 using llvm::LoopInfo; 28 using llvm::Module; 29 using llvm::SetVector; 30 using llvm::Type; 31 using llvm::Value; 32 33 /// General scheduling types of parallel OpenMP for loops. 34 /// Initialization values taken from OpenMP's enum in kmp.h: sched_type. 35 /// Currently, only 'static' scheduling may change from chunked to non-chunked. 36 enum class OMPGeneralSchedulingType { 37 StaticChunked = 33, 38 StaticNonChunked = 34, 39 Dynamic = 35, 40 Guided = 36, 41 Runtime = 37 42 }; 43 44 extern int PollyNumThreads; 45 extern OMPGeneralSchedulingType PollyScheduling; 46 extern int PollyChunkSize; 47 48 /// Create a scalar do/for-style loop. 49 /// 50 /// @param LowerBound The starting value of the induction variable. 51 /// @param UpperBound The upper bound of the induction variable. 52 /// @param Stride The value by which the induction variable 53 /// is incremented. 54 /// 55 /// @param Builder The builder used to create the loop. 56 /// @param P A pointer to the pass that uses this function. 57 /// It is used to update analysis information. 58 /// @param LI The loop info for the current function 59 /// @param DT The dominator tree we need to update 60 /// @param ExitBlock The block the loop will exit to. 61 /// @param Predicate The predicate used to generate the upper loop 62 /// bound. 63 /// @param Annotator This function can (optionally) take 64 /// a ScopAnnotator which 65 /// annotates loops and alias information in the SCoP. 66 /// @param Parallel If this loop should be marked parallel in 67 /// the Annotator. 68 /// @param UseGuard Create a guard in front of the header to check if 69 /// the loop is executed at least once, otherwise just 70 /// assume it. 71 /// @param LoopVectDisabled If the Loop vectorizer should be disabled for this 72 /// loop. 73 /// 74 /// @return Value* The newly created induction variable for this loop. 75 Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride, 76 PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT, 77 BasicBlock *&ExitBlock, ICmpInst::Predicate Predicate, 78 ScopAnnotator *Annotator = nullptr, bool Parallel = false, 79 bool UseGuard = true, bool LoopVectDisabled = false); 80 81 /// Create a DebugLoc representing generated instructions. 82 /// 83 /// The IR verifier requires !dbg metadata to be set in some situations. For 84 /// instance, if an (inlinable) function has debug info, all its call site must 85 /// have debug info as well. 86 llvm::DebugLoc createDebugLocForGeneratedCode(Function *F); 87 88 /// The ParallelLoopGenerator allows to create parallelized loops 89 /// 90 /// To parallelize a loop, we perform the following steps: 91 /// o Generate a subfunction which will hold the loop body. 92 /// o Create a struct to hold all outer values needed in the loop body. 93 /// o Create calls to a runtime library to achieve the actual parallelism. 94 /// These calls will spawn and join threads, define how the work (here the 95 /// iterations) are distributed between them and make sure each has access 96 /// to the struct holding all needed values. 97 /// 98 /// At the moment we support only one parallel runtime, OpenMP. 99 /// 100 /// If we parallelize the outer loop of the following loop nest, 101 /// 102 /// S0; 103 /// for (int i = 0; i < N; i++) 104 /// for (int j = 0; j < M; j++) 105 /// S1(i, j); 106 /// S2; 107 /// 108 /// we will generate the following code (with different runtime function names): 109 /// 110 /// S0; 111 /// auto *values = storeValuesIntoStruct(); 112 /// // Execute subfunction with multiple threads 113 /// spawn_threads(subfunction, values); 114 /// join_threads(); 115 /// S2; 116 /// 117 /// // This function is executed in parallel by different threads 118 /// void subfunction(values) { 119 /// while (auto *WorkItem = getWorkItem()) { 120 /// int LB = WorkItem.begin(); 121 /// int UB = WorkItem.end(); 122 /// for (int i = LB; i < UB; i++) 123 /// for (int j = 0; j < M; j++) 124 /// S1(i, j); 125 /// } 126 /// cleanup_thread(); 127 /// } 128 class ParallelLoopGenerator { 129 public: 130 /// Create a parallel loop generator for the current function. ParallelLoopGenerator(PollyIRBuilder & Builder,LoopInfo & LI,DominatorTree & DT,const DataLayout & DL)131 ParallelLoopGenerator(PollyIRBuilder &Builder, LoopInfo &LI, 132 DominatorTree &DT, const DataLayout &DL) 133 : Builder(Builder), LI(LI), DT(DT), 134 LongType( 135 Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())), 136 M(Builder.GetInsertBlock()->getParent()->getParent()), 137 DLGenerated(createDebugLocForGeneratedCode( 138 Builder.GetInsertBlock()->getParent())) {} 139 ~ParallelLoopGenerator()140 virtual ~ParallelLoopGenerator() {} 141 142 /// Create a parallel loop. 143 /// 144 /// This function is the main function to automatically generate a parallel 145 /// loop with all its components. 146 /// 147 /// @param LB The lower bound for the loop we parallelize. 148 /// @param UB The upper bound for the loop we parallelize. 149 /// @param Stride The stride of the loop we parallelize. 150 /// @param Values A set of LLVM-IR Values that should be available in 151 /// the new loop body. 152 /// @param VMap A map to allow outside access to the new versions of 153 /// the values in @p Values. 154 /// @param LoopBody A pointer to an iterator that is set to point to the 155 /// body of the created loop. It should be used to insert 156 /// instructions that form the actual loop body. 157 /// 158 /// @return The newly created induction variable for this loop. 159 Value *createParallelLoop(Value *LB, Value *UB, Value *Stride, 160 SetVector<Value *> &Values, ValueMapT &VMap, 161 BasicBlock::iterator *LoopBody); 162 163 protected: 164 /// The IR builder we use to create instructions. 165 PollyIRBuilder &Builder; 166 167 /// The loop info of the current function we need to update. 168 LoopInfo &LI; 169 170 /// The dominance tree of the current function we need to update. 171 DominatorTree &DT; 172 173 /// The type of a "long" on this hardware used for backend calls. 174 Type *LongType; 175 176 /// The current module 177 Module *M; 178 179 /// Debug location for generated code without direct link to any specific 180 /// line. 181 /// 182 /// We only set the DebugLoc where the IR Verifier requires us to. Otherwise, 183 /// absent debug location for optimized code should be fine. 184 llvm::DebugLoc DLGenerated; 185 186 public: 187 /// Create a struct for all @p Values and store them in there. 188 /// 189 /// @param Values The values which should be stored in the struct. 190 /// 191 /// @return The created struct. 192 AllocaInst *storeValuesIntoStruct(SetVector<Value *> &Values); 193 194 /// Extract all values from the @p Struct and construct the mapping. 195 /// 196 /// @param Values The values which were stored in the struct. 197 /// @param Struct The struct holding all the values in @p Values. 198 /// @param VMap A map to associate every element of @p Values with the 199 /// new llvm value loaded from the @p Struct. 200 void extractValuesFromStruct(SetVector<Value *> Values, Type *Ty, 201 Value *Struct, ValueMapT &VMap); 202 203 /// Create the definition of the parallel subfunction. 204 /// 205 /// @return A pointer to the subfunction. 206 Function *createSubFnDefinition(); 207 208 /// Create the runtime library calls for spawn and join of the worker threads. 209 /// Additionally, places a call to the specified subfunction. 210 /// 211 /// @param SubFn The subfunction which holds the loop body. 212 /// @param SubFnParam The parameter for the subfunction (basically the struct 213 /// filled with the outside values). 214 /// @param LB The lower bound for the loop we parallelize. 215 /// @param UB The upper bound for the loop we parallelize. 216 /// @param Stride The stride of the loop we parallelize. 217 virtual void deployParallelExecution(Function *SubFn, Value *SubFnParam, 218 Value *LB, Value *UB, Value *Stride) = 0; 219 220 /// Prepare the definition of the parallel subfunction. 221 /// Creates the argument list and names them (as well as the subfunction). 222 /// 223 /// @param F A pointer to the (parallel) subfunction's parent function. 224 /// 225 /// @return The pointer to the (parallel) subfunction. 226 virtual Function *prepareSubFnDefinition(Function *F) const = 0; 227 228 /// Create the parallel subfunction. 229 /// 230 /// @param Stride The induction variable increment. 231 /// @param Struct A struct holding all values in @p Values. 232 /// @param Values A set of LLVM-IR Values that should be available in 233 /// the new loop body. 234 /// @param VMap A map to allow outside access to the new versions of 235 /// the values in @p Values. 236 /// @param SubFn The newly created subfunction is returned here. 237 /// 238 /// @return The newly created induction variable. 239 virtual std::tuple<Value *, Function *> 240 createSubFn(Value *Stride, AllocaInst *Struct, SetVector<Value *> UsedValues, 241 ValueMapT &VMap) = 0; 242 }; 243 } // end namespace polly 244 #endif 245