1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 17 #define COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 18 19 #include "optimizer/ir/basicblock.h" 20 #include "optimizer/ir/graph.h" 21 #include "utils/arena_containers.h" 22 #include "utils/hash.h" 23 24 namespace panda::compiler { 25 class BasicBlock; 26 class Graph; 27 class Inst; 28 29 // NOLINTNEXTLINE(readability-redundant-declaration) 30 bool IsLoopSingleBackEdgeExitPoint(Loop *loop); 31 32 enum class UnrollType : uint8_t { UNROLL_WITH_SIDE_EXITS, UNROLL_WITHOUT_SIDE_EXITS, UNROLL_POST_INCREMENT }; 33 34 enum class InstCloneType : uint8_t { CLONE_ALL, CLONE_INSTS }; 35 36 enum class CloneEdgeType : uint8_t { EDGE_PRED, EDGE_SUCC }; 37 38 /** 39 * Helper-class, provides methods to: 40 * - Clone the whole graph; 41 * - Clone loop; 42 * - Unroll loop; 43 * - Peel loop; 44 */ 45 class GraphCloner { 46 using PhiInputsMap = ArenaUnorderedMap<Inst *, Inst *>; 47 48 struct LoopUnrollData { 49 BasicBlock *header {nullptr}; 50 BasicBlock *backedge {nullptr}; 51 BasicBlock *exit_block {nullptr}; 52 BasicBlock *outer {nullptr}; 53 ArenaVector<BasicBlock *> *blocks {nullptr}; 54 InstVector *phi_update_inputs {nullptr}; 55 PhiInputsMap *phi_replaced_inputs {nullptr}; 56 }; 57 58 struct LoopClonerData { 59 BasicBlock *outer {nullptr}; 60 BasicBlock *header {nullptr}; 61 BasicBlock *pre_header {nullptr}; 62 ArenaVector<BasicBlock *> *blocks {nullptr}; 63 }; 64 65 public: 66 explicit GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *local_allocator); 67 68 Graph *CloneGraph(); 69 BasicBlock *CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceable_pred); 70 Loop *CloneLoop(Loop *loop); 71 bool IsLoopClonable(Loop *loop, size_t inst_limit); 72 73 /** 74 * Make equal to the `factor` number of clones of loop body and insert them into the graph 75 * 76 * /----[pre-loop] 77 * | | 78 * | v 79 * | [loop-body]<----\ 80 * | | | | 81 * | | \-------/ 82 * | | 83 * | v 84 * \--->[outside-block] 85 * | 86 * v 87 * ... 88 * 89 * Cloning with side-exits: 90 * 91 * /----[pre-loop] 92 * | | 93 * | v 94 * | [loop-body]---------\ 95 * | | | 96 * | v | 97 * | [loop-body']------->| 98 * | | | 99 * | v | 100 * | [loop-body'']------>| 101 * | | | 102 * | v | 103 * | [resolver-block]<----/ 104 * | | 105 * | v 106 * \--->[outside-block] 107 * ... 108 * 109 * Cloning without side-exits: 110 * 111 * /----[pre-loop] 112 * | | 113 * | v 114 * | [loop-body] 115 * | | 116 * | v 117 * | [loop-body'] 118 * | | 119 * | v 120 * | [loop-body''] 121 * | | 122 * | v 123 * \--->[outside-block] 124 */ 125 template <UnrollType type> UnrollLoopBody(Loop * loop,size_t factor)126 void UnrollLoopBody(Loop *loop, size_t factor) 127 { 128 ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point"); 129 auto marker_holder = MarkerHolder(GetGraph()); 130 clone_marker_ = marker_holder.GetMarker(); 131 auto unroll_data = PrepareLoopToUnroll(loop, type != UnrollType::UNROLL_WITHOUT_SIDE_EXITS); 132 133 auto clone_count = factor - 1; 134 for (size_t i = 0; i < clone_count; i++) { 135 CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unroll_data->blocks, GetGraph()); 136 BuildLoopUnrollControlFlow(unroll_data); 137 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 138 if constexpr (type == UnrollType::UNROLL_WITHOUT_SIDE_EXITS) { 139 // Users update should be done on the last no-side-exits unroll iteration 140 // before building loop data-flow 141 if (i + 1 == clone_count) { // last_iteration 142 UpdateUsersAfterNoSideExitsUnroll(unroll_data); 143 } 144 } 145 BuildLoopUnrollDataFlow(unroll_data); 146 } 147 148 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 149 if constexpr (type == UnrollType::UNROLL_POST_INCREMENT) { 150 RemoveLoopBackEdge(unroll_data); 151 } 152 } 153 154 private: 155 // Whole graph cloning 156 void CopyLoop(Loop *loop, Loop *cloned_loop); 157 void CloneLinearOrder(Graph *new_graph); 158 void BuildControlFlow(); 159 void BuildDataFlow(); 160 void CloneAnalyses(Graph *new_graph); 161 // Loop cloning 162 LoopClonerData *PrepareLoopToClone(Loop *loop); 163 BasicBlock *CreateNewOutsideSucc(BasicBlock *outside_succ, BasicBlock *back_edge, BasicBlock *pre_header); 164 void BuildLoopCloneControlFlow(LoopClonerData *unroll_data); 165 void BuildLoopCloneDataFlow(LoopClonerData *unroll_data); 166 void MakeLoopCloneInfo(LoopClonerData *unroll_data); 167 // Unroll cloning 168 LoopUnrollData *PrepareLoopToUnroll(Loop *loop, bool clone_side_exits); 169 BasicBlock *CreateResolverBlock(Loop *loop, BasicBlock *back_edge); 170 BasicBlock *SplitBackEdge(LoopUnrollData *unroll_data, Loop *loop, BasicBlock *back_edge); 171 void UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unroll_data); 172 void BuildLoopUnrollControlFlow(LoopUnrollData *unroll_data); 173 void BuildLoopUnrollDataFlow(LoopUnrollData *unroll_data); 174 void RemoveLoopBackEdge(const LoopUnrollData *unroll_data); 175 // Loop header cloning 176 void BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone); 177 void UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outer_block); 178 // Cloned blocks and instructions getters HasClone(const BasicBlock * block)179 bool HasClone(const BasicBlock *block) 180 { 181 return (block->GetId() < clone_blocks_.size()) && (clone_blocks_[block->GetId()] != nullptr); 182 } 183 GetClone(const BasicBlock * block)184 BasicBlock *GetClone(const BasicBlock *block) 185 { 186 ASSERT(block != nullptr); 187 ASSERT_PRINT(block->GetGraph() == GetGraph(), "GraphCloner probably caught disconnected block"); 188 ASSERT_DO(HasClone(block), block->Dump(&std::cerr)); 189 return clone_blocks_[block->GetId()]; 190 } 191 HasClone(Inst * inst)192 bool HasClone(Inst *inst) 193 { 194 return inst->IsMarked(clone_marker_) && (inst->GetCloneNumber() < clone_instructions_.size()); 195 } 196 GetClone(Inst * inst)197 Inst *GetClone(Inst *inst) 198 { 199 ASSERT(inst != nullptr); 200 ASSERT(HasClone(inst)); 201 202 // We don't use clone_marker_ when cloning the whole graph, so lets at least check the basic block here 203 ASSERT(inst->GetBasicBlock() != nullptr); 204 ASSERT_PRINT(inst->GetBasicBlock()->GetGraph() == GetGraph(), 205 "GraphCloner probably caught an instruction from disconnected block"); 206 // Empty clone_blocks_ means we are cloning only one basic block 207 ASSERT(clone_blocks_.empty() || HasClone(inst->GetBasicBlock())); 208 209 return clone_instructions_[inst->GetCloneNumber()]; 210 } 211 212 /** 213 * For each block of input vector create a new one empty block and populate it with the instructions, 214 * cloned form the original block 215 */ 216 template <InstCloneType type, bool skip_safepoints> CloneBlocksAndInstructions(const ArenaVector<BasicBlock * > & blocks,Graph * target_graph)217 void CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *target_graph) 218 { 219 clone_blocks_.clear(); 220 clone_blocks_.resize(GetGraph()->GetVectorBlocks().size(), nullptr); 221 clone_instructions_.clear(); 222 size_t inst_count = 0; 223 for (const auto &block : blocks) { 224 if (block != nullptr) { 225 auto clone = block->Clone(target_graph); 226 clone_blocks_[block->GetId()] = clone; 227 CloneInstructions<type, skip_safepoints>(block, clone, &inst_count); 228 if (block->IsTryBegin()) { 229 target_graph->AppendTryBeginBlock(clone); 230 } 231 } 232 } 233 } 234 235 /** 236 * Clone block's instructions and append to the block's clone 237 */ 238 template <InstCloneType type, bool skip_safepoints> CloneInstructions(const BasicBlock * block,BasicBlock * clone,size_t * inst_count)239 void CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *inst_count) 240 { 241 for (auto inst : block->Insts()) { 242 clone->AppendInst(CloneInstruction(inst, inst_count, clone->GetGraph())); 243 } 244 245 if constexpr (type == InstCloneType::CLONE_ALL) { // NOLINT 246 for (auto phi : block->PhiInsts()) { 247 auto phi_clone = CloneInstruction(phi, inst_count, clone->GetGraph()); 248 clone->AppendPhi(phi_clone->CastToPhi()); 249 } 250 } 251 } 252 253 /** 254 * Clone instruction and mark both: clone and cloned 255 */ CloneInstruction(Inst * inst,size_t * inst_count,Graph * target_graph)256 Inst *CloneInstruction(Inst *inst, size_t *inst_count, Graph *target_graph) 257 { 258 inst->SetCloneNumber((*inst_count)++); 259 auto inst_clone = inst->Clone(target_graph); 260 clone_instructions_.push_back(inst_clone); 261 inst->SetMarker(clone_marker_); 262 inst_clone->SetMarker(clone_marker_); 263 if (inst->GetBasicBlock()->GetGraph() != target_graph) { 264 inst_clone->SetId(inst->GetId()); 265 } 266 return inst_clone; 267 } 268 269 inline bool IsInstLoopHeaderPhi(Inst *inst, Loop *loop); 270 271 /** 272 * Use the following rules cloning the inputs: 273 * - if input of the original instruction has clone - insert this clone as input 274 * - otherwise - use original input as clone instruction input 275 */ 276 template <bool replace_header_phi> 277 void SetCloneInputs(Inst *inst, BasicBlock *block = nullptr) 278 { 279 auto clone = GetClone(inst); 280 for (size_t i = 0; i < inst->GetInputsCount(); i++) { 281 auto input = inst->GetInput(i).GetInst(); 282 if (replace_header_phi && IsInstLoopHeaderPhi(input, inst->GetBasicBlock()->GetLoop())) { 283 ASSERT(block != nullptr); 284 input = input->CastToPhi()->GetPhiInput(block); 285 } else if (HasClone(input)) { 286 input = GetClone(input); 287 } 288 289 if (clone->IsOperandsDynamic()) { 290 clone->AppendInput(input); 291 if (clone->IsSaveState()) { 292 static_cast<SaveStateInst *>(clone)->SetVirtualRegister( 293 i, static_cast<SaveStateInst *>(inst)->GetVirtualRegister(i)); 294 } else if (clone->IsPhi()) { 295 clone->CastToPhi()->SetPhiInputBbNum(i, inst->CastToPhi()->GetPhiInputBbNum(i)); 296 } 297 } else { 298 clone->SetInput(i, input); 299 } 300 } 301 } 302 303 template <CloneEdgeType edge_type> CloneEdges(BasicBlock * block)304 void CloneEdges(BasicBlock *block) 305 { 306 auto clone = GetClone(block); 307 auto block_edges = &block->GetPredsBlocks(); 308 auto clone_edges = &clone->GetPredsBlocks(); 309 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 310 if constexpr (edge_type == CloneEdgeType::EDGE_SUCC) { 311 block_edges = &block->GetSuccsBlocks(); 312 clone_edges = &clone->GetSuccsBlocks(); 313 } 314 ASSERT(clone_edges->empty()); 315 clone_edges->reserve(block_edges->size()); 316 for (auto edge : *block_edges) { 317 clone_edges->push_back(GetClone(edge)); 318 } 319 } 320 GetGraph()321 Graph *GetGraph() 322 { 323 return graph_; 324 } 325 326 void UpdateCaller(Inst *inst); 327 328 private: 329 Graph *graph_; 330 ArenaAllocator *allocator_; 331 ArenaAllocator *local_allocator_; 332 ArenaVector<BasicBlock *> clone_blocks_; 333 InstVector clone_instructions_; 334 Marker clone_marker_ {UNDEF_MARKER}; 335 }; 336 } // namespace panda::compiler 337 338 #endif // COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 339