1 /* 2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 17 #define COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 18 19 #include "inst.h" 20 #include "optimizer/analysis/loop_analyzer.h" 21 #include "optimizer/analysis/rpo.h" 22 #include "optimizer/ir/analysis.h" 23 #include "optimizer/ir/basicblock.h" 24 #include "optimizer/ir/graph.h" 25 #include "utils/arena_containers.h" 26 #include "utils/hash.h" 27 28 namespace ark::compiler { 29 class BasicBlock; 30 class Graph; 31 class Inst; 32 33 // NOLINTNEXTLINE(readability-redundant-declaration) 34 bool IsLoopSingleBackEdgeExitPoint(Loop *loop); 35 36 enum UnrollType : uint8_t { 37 UNROLL_WITH_SIDE_EXITS = 0, 38 UNROLL_WITHOUT_SIDE_EXITS = 1, 39 UNROLL_REMOVE_BACK_EDGE = 2, 40 UNROLL_POST_INCREMENT = UNROLL_REMOVE_BACK_EDGE, 41 UNROLL_CONSTANT_ITERATIONS = UNROLL_WITHOUT_SIDE_EXITS | UNROLL_REMOVE_BACK_EDGE 42 }; 43 44 enum class InstCloneType : uint8_t { CLONE_ALL, CLONE_INSTS }; 45 46 enum class CloneEdgeType : uint8_t { EDGE_PRED, EDGE_SUCC }; 47 48 /** 49 * Helper-class, provides methods to: 50 * - Clone the whole graph; 51 * - Clone loop; 52 * - Unroll loop; 53 * - Peel loop; 54 */ 55 class GraphCloner { 56 using PhiInputsMap = ArenaUnorderedMap<Inst *, Inst *>; 57 58 struct LoopUnrollData { 59 BasicBlock *header {nullptr}; 60 BasicBlock *backedge {nullptr}; 61 BasicBlock *exitBlock {nullptr}; 62 BasicBlock *outer {nullptr}; 63 ArenaVector<BasicBlock *> *blocks {nullptr}; 64 InstVector *phiUpdateInputs {nullptr}; 65 PhiInputsMap *phiReplacedInputs {nullptr}; 66 }; 67 68 protected: 69 struct LoopClonerData { 70 BasicBlock *outer {nullptr}; 71 BasicBlock *header {nullptr}; 72 BasicBlock *preHeader {nullptr}; 73 ArenaVector<BasicBlock *> *blocks {nullptr}; 74 }; 75 76 public: 77 PANDA_PUBLIC_API explicit GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *localAllocator); 78 79 PANDA_PUBLIC_API Graph *CloneGraph(); 80 BasicBlock *CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceablePred); 81 Loop *CloneLoop(Loop *loop); 82 83 /** 84 * Make equal to the `factor` number of clones of loop body and insert them into the graph 85 * 86 * /----[pre-loop] 87 * | | 88 * | v 89 * | [loop-body]<----\ 90 * | | | | 91 * | | \-------/ 92 * | | 93 * | v 94 * \--->[outside-block] 95 * | 96 * v 97 * ... 98 * 99 * Cloning with side-exits: 100 * 101 * /----[pre-loop] 102 * | | 103 * | v 104 * | [loop-body]---------\ 105 * | | | 106 * | v | 107 * | [loop-body']------->| 108 * | | | 109 * | v | 110 * | [loop-body'']------>| 111 * | | | 112 * | v | 113 * | [resolver-block]<----/ 114 * | | 115 * | v 116 * \--->[outside-block] 117 * ... 118 * 119 * Cloning without side-exits: 120 * 121 * /----[pre-loop] 122 * | | 123 * | v 124 * | [loop-body] 125 * | | 126 * | v 127 * | [loop-body'] 128 * | | 129 * | v 130 * | [loop-body''] 131 * | | 132 * | v 133 * \--->[outside-block] 134 */ 135 template <UnrollType TYPE> UnrollLoopBody(Loop * loop,size_t factor)136 void UnrollLoopBody(Loop *loop, size_t factor) 137 { 138 ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point"); 139 auto markerHolder = MarkerHolder(GetGraph()); 140 cloneMarker_ = markerHolder.GetMarker(); 141 auto unrollData = PrepareLoopToUnroll(loop, (TYPE & UnrollType::UNROLL_WITHOUT_SIDE_EXITS) == 0); 142 143 ASSERT(factor > 0); 144 auto cloneCount = factor - 1; 145 for (size_t i = 0; i < cloneCount; i++) { 146 ASSERT(unrollData != nullptr); 147 CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unrollData->blocks, GetGraph()); 148 BuildLoopUnrollControlFlow(unrollData); 149 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 150 if constexpr ((TYPE & UnrollType::UNROLL_WITHOUT_SIDE_EXITS) != 0) { 151 // Users update should be done on the last no-side-exits unroll iteration 152 // before building loop data-flow 153 if (i + 1 == cloneCount) { // last_iteration 154 UpdateUsersAfterNoSideExitsUnroll(unrollData); 155 } 156 } 157 BuildLoopUnrollDataFlow(unrollData); 158 } 159 160 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 161 if constexpr ((TYPE & UnrollType::UNROLL_REMOVE_BACK_EDGE) != 0) { 162 RemoveLoopBackEdge(unrollData); 163 } 164 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 165 if constexpr (TYPE == UnrollType::UNROLL_CONSTANT_ITERATIONS) { 166 RemoveLoopPreHeader(unrollData); 167 } 168 ssb_.FixPhisWithCheckInputs(unrollData->outer); 169 } 170 171 protected: GetGraph()172 Graph *GetGraph() 173 { 174 return graph_; 175 } 176 GetClone(const BasicBlock * block)177 BasicBlock *GetClone(const BasicBlock *block) 178 { 179 ASSERT(block != nullptr); 180 ASSERT_PRINT(block->GetGraph() == GetGraph(), "GraphCloner probably caught disconnected block"); 181 ASSERT_DO(HasClone(block), block->Dump(&std::cerr)); 182 return cloneBlocks_[block->GetId()]; 183 } 184 185 template <CloneEdgeType EDGE_TYPE> CloneEdges(BasicBlock * block)186 void CloneEdges(BasicBlock *block) 187 { 188 auto clone = GetClone(block); 189 190 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 191 if constexpr (EDGE_TYPE == CloneEdgeType::EDGE_SUCC) { 192 auto blockEdges = &block->GetSuccsBlocks(); 193 auto cloneEdges = &clone->GetSuccsBlocks(); 194 ASSERT(cloneEdges->empty()); 195 cloneEdges->reserve(blockEdges->size()); 196 for (auto edge : *blockEdges) { 197 cloneEdges->push_back(GetClone(edge)); 198 } 199 } else { 200 auto blockEdges = &block->GetPredsBlocks(); 201 auto cloneEdges = &clone->GetPredsBlocks(); 202 203 ASSERT(cloneEdges->empty()); 204 cloneEdges->reserve(blockEdges->size()); 205 for (auto edge : *blockEdges) { 206 cloneEdges->push_back(GetClone(edge)); 207 } 208 } 209 } 210 211 /** 212 * For each block of input vector create a new one empty block and populate it with the instructions, 213 * cloned form the original block 214 */ 215 template <InstCloneType TYPE, bool SKIP_SAFEPOINTS> CloneBlocksAndInstructions(const ArenaVector<BasicBlock * > & blocks,Graph * targetGraph)216 void CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *targetGraph) 217 { 218 cloneBlocks_.clear(); 219 cloneBlocks_.resize(GetGraph()->GetVectorBlocks().size(), nullptr); 220 cloneInstructions_.clear(); 221 size_t instCount = 0; 222 for (const auto &block : blocks) { 223 if (block != nullptr) { 224 auto clone = block->Clone(targetGraph); 225 cloneBlocks_[block->GetId()] = clone; 226 CloneInstructions<TYPE, SKIP_SAFEPOINTS>(block, clone, &instCount); 227 if (block->IsTryBegin()) { 228 clone->SetTryBegin(true); 229 targetGraph->AppendTryBeginBlock(clone); 230 } 231 if (block->IsTryEnd()) { 232 clone->SetTryEnd(true); 233 } 234 if (block->IsTry()) { 235 clone->SetTry(true); 236 } 237 if (block->IsCatch()) { 238 clone->SetCatch(true); 239 } 240 } 241 } 242 } 243 244 void MakeLoopCloneInfo(LoopClonerData *unrollData); 245 LoopClonerData *PrepareLoopToClone(Loop *loop); 246 void ProcessMarkedInsts(LoopClonerData *data); 247 BasicBlock *CreateNewOutsideSucc(BasicBlock *outsideSucc, BasicBlock *backEdge, BasicBlock *preHeader); 248 249 /** 250 * Use the following rules cloning the inputs: 251 * - if input of the original instruction has clone - insert this clone as input 252 * - otherwise - use original input as clone instruction input 253 */ 254 template <bool REPLACE_HEADER_PHI> 255 void SetCloneInputs(Inst *inst, BasicBlock *block = nullptr) 256 { 257 auto clone = GetClone(inst); 258 for (size_t i = 0; i < inst->GetInputsCount(); i++) { 259 auto input = inst->GetInput(i).GetInst(); 260 if (REPLACE_HEADER_PHI && IsInstLoopHeaderPhi(input, inst->GetBasicBlock()->GetLoop())) { 261 ASSERT(block != nullptr); 262 input = input->CastToPhi()->GetPhiInput(block); 263 } else if (HasClone(input)) { 264 input = GetClone(input); 265 } 266 267 if (clone->IsOperandsDynamic()) { 268 clone->AppendInput(input); 269 if (clone->IsSaveState()) { 270 static_cast<SaveStateInst *>(clone)->SetVirtualRegister( 271 i, static_cast<SaveStateInst *>(inst)->GetVirtualRegister(i)); 272 } else if (clone->IsPhi()) { 273 clone->CastToPhi()->SetPhiInputBbNum(i, inst->CastToPhi()->GetPhiInputBbNum(i)); 274 } 275 } else { 276 clone->SetInput(i, input); 277 } 278 } 279 } 280 281 void UpdateCaller(Inst *inst); 282 GetClone(Inst * inst)283 Inst *GetClone(Inst *inst) 284 { 285 ASSERT(inst != nullptr); 286 ASSERT(HasClone(inst)); 287 288 // We don't use clone_marker_ when cloning the whole graph, so lets at least check the basic block here 289 ASSERT(inst->GetBasicBlock() != nullptr); 290 ASSERT_PRINT(inst->GetBasicBlock()->GetGraph() == GetGraph(), 291 "GraphCloner probably caught an instruction from disconnected block"); 292 // Empty clone_blocks_ means we are cloning only one basic block 293 ASSERT(cloneBlocks_.empty() || HasClone(inst->GetBasicBlock())); 294 295 return cloneInstructions_[inst->GetCloneNumber()]; 296 } 297 298 // Cloned blocks and instructions getters HasClone(const BasicBlock * block)299 bool HasClone(const BasicBlock *block) 300 { 301 return (block->GetId() < cloneBlocks_.size()) && (cloneBlocks_[block->GetId()] != nullptr); 302 } 303 HasClone(Inst * inst)304 bool HasClone(Inst *inst) 305 { 306 return inst->IsMarked(cloneMarker_) && (inst->GetCloneNumber() < cloneInstructions_.size()); 307 } 308 309 private: 310 // Whole graph cloning 311 void CopyLoop(Loop *loop, Loop *clonedLoop); 312 void CloneLinearOrder(Graph *newGraph); 313 void BuildControlFlow(); 314 void BuildTryCatchLogic(Graph *newGraph); 315 void BuildDataFlow(); 316 void CloneConstantsInfo(Graph *newGraph); 317 void CloneAnalyses(Graph *newGraph); 318 // Loop cloning 319 void SplitPreHeader(Loop *loop); 320 GraphCloner::LoopClonerData *PopulateLoopClonerData(Loop *loop, BasicBlock *preHeader, BasicBlock *outsideSucc); 321 void BuildLoopCloneControlFlow(LoopClonerData *unrollData); 322 void BuildLoopCloneDataFlow(LoopClonerData *unrollData); 323 // Unroll cloning 324 LoopUnrollData *PrepareLoopToUnroll(Loop *loop, bool cloneSideExits); 325 BasicBlock *CreateResolverBlock(Loop *loop, BasicBlock *backEdge); 326 BasicBlock *SplitBackEdge(LoopUnrollData *unrollData, Loop *loop, BasicBlock *backEdge); 327 Inst *GetCompareInst(Inst *ifimm); 328 void UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unrollData); 329 void UpdateOutloopUsers(Loop *loop, Inst *inst); 330 void BuildLoopUnrollControlFlow(LoopUnrollData *unrollData); 331 void BuildLoopUnrollDataFlow(LoopUnrollData *unrollData); 332 void RemoveLoopBackEdge(const LoopUnrollData *unrollData); 333 void RemoveLoopPreHeader(const LoopUnrollData *unrollData); 334 // Loop header cloning 335 void BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone); 336 void UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outerBlock); 337 338 /// Clone block's instructions and append to the block's clone 339 template <InstCloneType TYPE, bool SKIP_SAFEPOINTS> CloneInstructions(const BasicBlock * block,BasicBlock * clone,size_t * instCount)340 void CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *instCount) 341 { 342 for (auto inst : block->Insts()) { 343 if constexpr (SKIP_SAFEPOINTS) { // NOLINT 344 if (inst->GetOpcode() == Opcode::SafePoint) { 345 continue; 346 } 347 } 348 if constexpr (TYPE != InstCloneType::CLONE_ALL) { // NOLINT 349 if (inst->GetOpcode() == Opcode::NOP) { 350 continue; 351 } 352 } 353 354 ASSERT(clone != nullptr); 355 auto *clonedInst = CloneInstruction(inst, instCount, clone->GetGraph()); 356 357 cloneInstMap_[inst] = clonedInst; 358 cloneInstMap_[clonedInst] = inst; 359 360 clone->AppendInst(clonedInst); 361 } 362 363 if constexpr (TYPE == InstCloneType::CLONE_ALL) { // NOLINT 364 for (auto phi : block->PhiInsts()) { 365 auto phiClone = CloneInstruction(phi, instCount, clone->GetGraph()); 366 cloneInstMap_[phi] = phiClone; 367 cloneInstMap_[phiClone] = phi; 368 clone->AppendPhi(phiClone->CastToPhi()); 369 } 370 } 371 } 372 373 /// Clone instruction and mark both: clone and cloned CloneInstruction(Inst * inst,size_t * instCount,Graph * targetGraph)374 Inst *CloneInstruction(Inst *inst, size_t *instCount, Graph *targetGraph) 375 { 376 inst->SetCloneNumber((*instCount)++); 377 auto instClone = inst->Clone(targetGraph); 378 cloneInstructions_.push_back(instClone); 379 inst->SetMarker(cloneMarker_); 380 instClone->SetMarker(cloneMarker_); 381 if (inst->GetBasicBlock()->GetGraph() != targetGraph) { 382 instClone->SetId(inst->GetId()); 383 } 384 return instClone; 385 } 386 387 bool IsInstLoopHeaderPhi(Inst *inst, Loop *loop); 388 389 // clone throwable inst -> catch blocks mapping 390 void CloneThrowableInstHandlers(Graph *newGraph); 391 392 protected: 393 Marker cloneMarker_ {UNDEF_MARKER}; // NOLINT(misc-non-private-member-variables-in-classes) 394 395 private: 396 Graph *graph_; 397 ArenaAllocator *allocator_; 398 ArenaAllocator *localAllocator_; 399 ArenaVector<BasicBlock *> cloneBlocks_; 400 InstVector cloneInstructions_; 401 SaveStateBridgesBuilder ssb_; 402 ArenaMap<const Inst *, const Inst *> cloneInstMap_; 403 }; 404 } // namespace ark::compiler 405 406 #endif // COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 407