1 // Copyright (c) 2018 Google LLC. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef SOURCE_OPT_LOOP_PEELING_H_ 16 #define SOURCE_OPT_LOOP_PEELING_H_ 17 18 #include <algorithm> 19 #include <limits> 20 #include <memory> 21 #include <tuple> 22 #include <unordered_map> 23 #include <unordered_set> 24 #include <utility> 25 #include <vector> 26 27 #include "source/opt/ir_context.h" 28 #include "source/opt/loop_descriptor.h" 29 #include "source/opt/loop_utils.h" 30 #include "source/opt/pass.h" 31 #include "source/opt/scalar_analysis.h" 32 33 namespace spvtools { 34 namespace opt { 35 36 // Utility class to perform the peeling of a given loop. 37 // The loop peeling transformation make a certain amount of a loop iterations to 38 // be executed either before (peel before) or after (peel after) the transformed 39 // loop. 40 // 41 // For peeling cases the transformation does the following steps: 42 // - It clones the loop and inserts the cloned loop before the original loop; 43 // - It connects all iterating values of the cloned loop with the 44 // corresponding original loop values so that the second loop starts with 45 // the appropriate values. 46 // - It inserts a new induction variable "i" is inserted into the cloned that 47 // starts with the value 0 and increment by step of one. 48 // 49 // The last step is specific to each case: 50 // - Peel before: the transformation is to peel the "N" first iterations. 51 // The exit condition of the cloned loop is changed so that the loop 52 // exits when "i < N" becomes false. The original loop is then protected to 53 // only execute if there is any iteration left to do. 54 // - Peel after: the transformation is to peel the "N" last iterations, 55 // then the exit condition of the cloned loop is changed so that the loop 56 // exits when "i + N < max_iteration" becomes false, where "max_iteration" 57 // is the upper bound of the loop. The cloned loop is then protected to 58 // only execute if there is any iteration left to do no covered by the 59 // second. 60 // 61 // To be peelable: 62 // - The loop must be in LCSSA form; 63 // - The loop must not contain any breaks; 64 // - The loop must not have any ambiguous iterators updates (see 65 // "CanPeelLoop"). 66 // The method "CanPeelLoop" checks that those constrained are met. 67 class LoopPeeling { 68 public: 69 // LoopPeeling constructor. 70 // |loop| is the loop to peel. 71 // |loop_iteration_count| is the instruction holding the |loop| iteration 72 // count, must be invariant for |loop| and must be of an int 32 type (signed 73 // or unsigned). 74 // |canonical_induction_variable| is an induction variable that can be used to 75 // count the number of iterations, must be of the same type as 76 // |loop_iteration_count| and start at 0 and increase by step of one at each 77 // iteration. The value nullptr is interpreted as no suitable variable exists 78 // and one will be created. 79 LoopPeeling(Loop* loop, Instruction* loop_iteration_count, 80 Instruction* canonical_induction_variable = nullptr) 81 : context_(loop->GetContext()), 82 loop_utils_(loop->GetContext(), loop), 83 loop_(loop), 84 loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count) 85 ? loop_iteration_count 86 : nullptr), 87 int_type_(nullptr), 88 original_loop_canonical_induction_variable_( 89 canonical_induction_variable), 90 canonical_induction_variable_(nullptr) { 91 if (loop_iteration_count_) { 92 int_type_ = context_->get_type_mgr() 93 ->GetType(loop_iteration_count_->type_id()) 94 ->AsInteger(); 95 if (canonical_induction_variable_) { 96 assert(canonical_induction_variable_->type_id() == 97 loop_iteration_count_->type_id() && 98 "loop_iteration_count and canonical_induction_variable do not " 99 "have the same type"); 100 } 101 } 102 GetIteratingExitValues(); 103 } 104 105 // Returns true if the loop can be peeled. 106 // To be peelable, all operation involved in the update of the loop iterators 107 // must not dominates the exit condition. This restriction is a work around to 108 // not miss compile code like: 109 // 110 // for (int i = 0; i + 1 < N; i++) {} 111 // for (int i = 0; ++i < N; i++) {} 112 // 113 // The increment will happen before the test on the exit condition leading to 114 // very look-a-like code. 115 // 116 // This restriction will not apply if a loop rotate is applied before (i.e. 117 // becomes a do-while loop). CanPeelLoop()118 bool CanPeelLoop() const { 119 CFG& cfg = *context_->cfg(); 120 121 if (!loop_iteration_count_) { 122 return false; 123 } 124 if (!int_type_) { 125 return false; 126 } 127 if (int_type_->width() != 32) { 128 return false; 129 } 130 if (!loop_->IsLCSSA()) { 131 return false; 132 } 133 if (!loop_->GetMergeBlock()) { 134 return false; 135 } 136 if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { 137 return false; 138 } 139 if (!IsConditionCheckSideEffectFree()) { 140 return false; 141 } 142 143 return !std::any_of(exit_value_.cbegin(), exit_value_.cend(), 144 [](std::pair<uint32_t, Instruction*> it) { 145 return it.second == nullptr; 146 }); 147 } 148 149 // Moves the execution of the |factor| first iterations of the loop into a 150 // dedicated loop. 151 void PeelBefore(uint32_t factor); 152 153 // Moves the execution of the |factor| last iterations of the loop into a 154 // dedicated loop. 155 void PeelAfter(uint32_t factor); 156 157 // Returns the cloned loop. GetClonedLoop()158 Loop* GetClonedLoop() { return cloned_loop_; } 159 // Returns the original loop. GetOriginalLoop()160 Loop* GetOriginalLoop() { return loop_; } 161 162 private: 163 IRContext* context_; 164 LoopUtils loop_utils_; 165 // The original loop. 166 Loop* loop_; 167 // The initial |loop_| upper bound. 168 Instruction* loop_iteration_count_; 169 // The int type to use for the canonical_induction_variable_. 170 analysis::Integer* int_type_; 171 // The cloned loop. 172 Loop* cloned_loop_; 173 // This is set to true when the exit and back-edge branch instruction is the 174 // same. 175 bool do_while_form_; 176 // The canonical induction variable from the original loop if it exists. 177 Instruction* original_loop_canonical_induction_variable_; 178 // The canonical induction variable of the cloned loop. The induction variable 179 // is initialized to 0 and incremented by step of 1. 180 Instruction* canonical_induction_variable_; 181 // Map between loop iterators and exit values. Loop iterators 182 std::unordered_map<uint32_t, Instruction*> exit_value_; 183 184 // Duplicate |loop_| and place the new loop before the cloned loop. Iterating 185 // values from the cloned loop are then connected to the original loop as 186 // initializer. 187 void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results); 188 189 // Insert the canonical induction variable into the first loop as a simplified 190 // counter. 191 void InsertCanonicalInductionVariable( 192 LoopUtils::LoopCloningResult* clone_results); 193 194 // Fixes the exit condition of the before loop. The function calls 195 // |condition_builder| to get the condition to use in the conditional branch 196 // of the loop exit. The loop will be exited if the condition evaluate to 197 // true. |condition_builder| takes an Instruction* that represent the 198 // insertion point. 199 void FixExitCondition( 200 const std::function<uint32_t(Instruction*)>& condition_builder); 201 202 // Gathers all operations involved in the update of |iterator| into 203 // |operations|. 204 void GetIteratorUpdateOperations( 205 const Loop* loop, Instruction* iterator, 206 std::unordered_set<Instruction*>* operations); 207 208 // Gathers exiting iterator values. The function builds a map between each 209 // iterating value in the loop (a phi instruction in the loop header) and its 210 // SSA value when it exit the loop. If no exit value can be accurately found, 211 // it is map to nullptr (see comment on CanPeelLoop). 212 void GetIteratingExitValues(); 213 214 // Returns true if a for-loop has no instruction with effects before the 215 // condition check. 216 bool IsConditionCheckSideEffectFree() const; 217 218 // Creates a new basic block and insert it between |bb| and the predecessor of 219 // |bb|. 220 BasicBlock* CreateBlockBefore(BasicBlock* bb); 221 222 // Inserts code to only execute |loop| only if the given |condition| is true. 223 // |if_merge| is a suitable basic block to be used by the if condition as 224 // merge block. 225 // The function returns the if block protecting the loop. 226 BasicBlock* ProtectLoop(Loop* loop, Instruction* condition, 227 BasicBlock* if_merge); 228 }; 229 230 // Implements a loop peeling optimization. 231 // For each loop, the pass will try to peel it if there is conditions that 232 // are true for the "N" first or last iterations of the loop. 233 // To avoid code size explosion, too large loops will not be peeled. 234 class LoopPeelingPass : public Pass { 235 public: 236 // Describes the peeling direction. 237 enum class PeelDirection { 238 kNone, // Cannot peel 239 kBefore, // Can peel before 240 kAfter // Can peel last 241 }; 242 243 // Holds some statistics about peeled function. 244 struct LoopPeelingStats { 245 std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_; 246 }; 247 stats_(stats)248 LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {} 249 250 // Sets the loop peeling growth threshold. If the code size increase is above 251 // |code_grow_threshold|, the loop will not be peeled. The code size is 252 // measured in terms of SPIR-V instructions. SetLoopPeelingThreshold(size_t code_grow_threshold)253 static void SetLoopPeelingThreshold(size_t code_grow_threshold) { 254 code_grow_threshold_ = code_grow_threshold; 255 } 256 257 // Returns the loop peeling code growth threshold. GetLoopPeelingThreshold()258 static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; } 259 name()260 const char* name() const override { return "loop-peeling"; } 261 262 // Processes the given |module|. Returns Status::Failure if errors occur when 263 // processing. Returns the corresponding Status::Success if processing is 264 // succesful to indicate whether changes have been made to the modue. 265 Pass::Status Process() override; 266 267 private: 268 // Describes the peeling direction. 269 enum class CmpOperator { 270 kLT, // less than 271 kGT, // greater than 272 kLE, // less than or equal 273 kGE, // greater than or equal 274 }; 275 276 class LoopPeelingInfo { 277 public: 278 using Direction = std::pair<PeelDirection, uint32_t>; 279 LoopPeelingInfo(Loop * loop,size_t loop_max_iterations,ScalarEvolutionAnalysis * scev_analysis)280 LoopPeelingInfo(Loop* loop, size_t loop_max_iterations, 281 ScalarEvolutionAnalysis* scev_analysis) 282 : context_(loop->GetContext()), 283 loop_(loop), 284 scev_analysis_(scev_analysis), 285 loop_max_iterations_(loop_max_iterations) {} 286 287 // Returns by how much and to which direction a loop should be peeled to 288 // make the conditional branch of the basic block |bb| an unconditional 289 // branch. If |bb|'s terminator is not a conditional branch or the condition 290 // is not workable then it returns PeelDirection::kNone and a 0 factor. 291 Direction GetPeelingInfo(BasicBlock* bb) const; 292 293 private: 294 // Returns the id of the loop invariant operand of the conditional 295 // expression |condition|. It returns if no operand is invariant. 296 uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const; 297 // Returns the id of the non loop invariant operand of the conditional 298 // expression |condition|. It returns if all operands are invariant. 299 uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const; 300 301 // Returns the value of |rec| at the first loop iteration. 302 SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const; 303 // Returns the value of |rec| at the given |iteration|. 304 SExpression GetValueAtIteration(SERecurrentNode* rec, 305 int64_t iteration) const; 306 // Returns the value of |rec| at the last loop iteration. 307 SExpression GetValueAtLastIteration(SERecurrentNode* rec) const; 308 309 bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs, 310 bool* result) const; 311 312 Direction HandleEquality(SExpression lhs, SExpression rhs) const; 313 Direction HandleInequality(CmpOperator cmp_op, SExpression lhs, 314 SERecurrentNode* rhs) const; 315 GetNoneDirection()316 static Direction GetNoneDirection() { 317 return Direction{LoopPeelingPass::PeelDirection::kNone, 0}; 318 } 319 IRContext* context_; 320 Loop* loop_; 321 ScalarEvolutionAnalysis* scev_analysis_; 322 size_t loop_max_iterations_; 323 }; 324 // Peel profitable loops in |f|. 325 bool ProcessFunction(Function* f); 326 // Peel |loop| if profitable. 327 std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size); 328 329 static size_t code_grow_threshold_; 330 LoopPeelingStats* stats_; 331 }; 332 333 } // namespace opt 334 } // namespace spvtools 335 336 #endif // SOURCE_OPT_LOOP_PEELING_H_ 337