/** * Copyright (c) 2021-2022 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H #define COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H #include "optimizer/ir/basicblock.h" #include "optimizer/ir/graph.h" #include "utils/arena_containers.h" #include "utils/hash.h" namespace panda::compiler { class BasicBlock; class Graph; class Inst; // NOLINTNEXTLINE(readability-redundant-declaration) bool IsLoopSingleBackEdgeExitPoint(Loop *loop); enum class UnrollType : uint8_t { UNROLL_WITH_SIDE_EXITS, UNROLL_WITHOUT_SIDE_EXITS, UNROLL_POST_INCREMENT }; enum class InstCloneType : uint8_t { CLONE_ALL, CLONE_INSTS }; enum class CloneEdgeType : uint8_t { EDGE_PRED, EDGE_SUCC }; /** * Helper-class, provides methods to: * - Clone the whole graph; * - Clone loop; * - Unroll loop; * - Peel loop; */ class GraphCloner { using PhiInputsMap = ArenaUnorderedMap<Inst *, Inst *>; struct LoopUnrollData { BasicBlock *header {nullptr}; BasicBlock *backedge {nullptr}; BasicBlock *exit_block {nullptr}; BasicBlock *outer {nullptr}; ArenaVector<BasicBlock *> *blocks {nullptr}; InstVector *phi_update_inputs {nullptr}; PhiInputsMap *phi_replaced_inputs {nullptr}; }; struct LoopClonerData { BasicBlock *outer {nullptr}; BasicBlock *header {nullptr}; BasicBlock *pre_header {nullptr}; ArenaVector<BasicBlock *> *blocks {nullptr}; }; public: explicit GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *local_allocator); Graph *CloneGraph(); BasicBlock *CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceable_pred); Loop *CloneLoop(Loop *loop); bool IsLoopClonable(Loop *loop, size_t inst_limit); /** * Make equal to the `factor` number of clones of loop body and insert them into the graph * * /----[pre-loop] * | | * | v * | [loop-body]<----\ * | | | | * | | \-------/ * | | * | v * \--->[outside-block] * | * v * ... * * Cloning with side-exits: * * /----[pre-loop] * | | * | v * | [loop-body]---------\ * | | | * | v | * | [loop-body']------->| * | | | * | v | * | [loop-body'']------>| * | | | * | v | * | [resolver-block]<----/ * | | * | v * \--->[outside-block] * ... * * Cloning without side-exits: * * /----[pre-loop] * | | * | v * | [loop-body] * | | * | v * | [loop-body'] * | | * | v * | [loop-body''] * | | * | v * \--->[outside-block] */ template <UnrollType type> void UnrollLoopBody(Loop *loop, size_t factor) { ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point"); auto marker_holder = MarkerHolder(GetGraph()); clone_marker_ = marker_holder.GetMarker(); auto unroll_data = PrepareLoopToUnroll(loop, type != UnrollType::UNROLL_WITHOUT_SIDE_EXITS); auto clone_count = factor - 1; for (size_t i = 0; i < clone_count; i++) { CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unroll_data->blocks, GetGraph()); BuildLoopUnrollControlFlow(unroll_data); // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) if constexpr (type == UnrollType::UNROLL_WITHOUT_SIDE_EXITS) { // Users update should be done on the last no-side-exits unroll iteration // before building loop data-flow if (i + 1 == clone_count) { // last_iteration UpdateUsersAfterNoSideExitsUnroll(unroll_data); } } BuildLoopUnrollDataFlow(unroll_data); } // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) if constexpr (type == UnrollType::UNROLL_POST_INCREMENT) { RemoveLoopBackEdge(unroll_data); } } private: // Whole graph cloning void CopyLoop(Loop *loop, Loop *cloned_loop); void CloneLinearOrder(Graph *new_graph); void BuildControlFlow(); void BuildDataFlow(); void CloneAnalyses(Graph *new_graph); // Loop cloning LoopClonerData *PrepareLoopToClone(Loop *loop); BasicBlock *CreateNewOutsideSucc(BasicBlock *outside_succ, BasicBlock *back_edge, BasicBlock *pre_header); void BuildLoopCloneControlFlow(LoopClonerData *unroll_data); void BuildLoopCloneDataFlow(LoopClonerData *unroll_data); void MakeLoopCloneInfo(LoopClonerData *unroll_data); // Unroll cloning LoopUnrollData *PrepareLoopToUnroll(Loop *loop, bool clone_side_exits); BasicBlock *CreateResolverBlock(Loop *loop, BasicBlock *back_edge); BasicBlock *SplitBackEdge(LoopUnrollData *unroll_data, Loop *loop, BasicBlock *back_edge); void UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unroll_data); void BuildLoopUnrollControlFlow(LoopUnrollData *unroll_data); void BuildLoopUnrollDataFlow(LoopUnrollData *unroll_data); void RemoveLoopBackEdge(const LoopUnrollData *unroll_data); // Loop header cloning void BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone); void UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outer_block); // Cloned blocks and instructions getters bool HasClone(const BasicBlock *block) { return (block->GetId() < clone_blocks_.size()) && (clone_blocks_[block->GetId()] != nullptr); } BasicBlock *GetClone(const BasicBlock *block) { ASSERT(block != nullptr); ASSERT_PRINT(block->GetGraph() == GetGraph(), "GraphCloner probably caught disconnected block"); ASSERT_DO(HasClone(block), block->Dump(&std::cerr)); return clone_blocks_[block->GetId()]; } bool HasClone(Inst *inst) { return inst->IsMarked(clone_marker_) && (inst->GetCloneNumber() < clone_instructions_.size()); } Inst *GetClone(Inst *inst) { ASSERT(inst != nullptr); ASSERT(HasClone(inst)); // We don't use clone_marker_ when cloning the whole graph, so lets at least check the basic block here ASSERT(inst->GetBasicBlock() != nullptr); ASSERT_PRINT(inst->GetBasicBlock()->GetGraph() == GetGraph(), "GraphCloner probably caught an instruction from disconnected block"); // Empty clone_blocks_ means we are cloning only one basic block ASSERT(clone_blocks_.empty() || HasClone(inst->GetBasicBlock())); return clone_instructions_[inst->GetCloneNumber()]; } /** * For each block of input vector create a new one empty block and populate it with the instructions, * cloned form the original block */ template <InstCloneType type, bool skip_safepoints> void CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *target_graph) { clone_blocks_.clear(); clone_blocks_.resize(GetGraph()->GetVectorBlocks().size(), nullptr); clone_instructions_.clear(); size_t inst_count = 0; for (const auto &block : blocks) { if (block != nullptr) { auto clone = block->Clone(target_graph); clone_blocks_[block->GetId()] = clone; CloneInstructions<type, skip_safepoints>(block, clone, &inst_count); if (block->IsTryBegin()) { target_graph->AppendTryBeginBlock(clone); } } } } /** * Clone block's instructions and append to the block's clone */ template <InstCloneType type, bool skip_safepoints> void CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *inst_count) { for (auto inst : block->Insts()) { clone->AppendInst(CloneInstruction(inst, inst_count, clone->GetGraph())); } if constexpr (type == InstCloneType::CLONE_ALL) { // NOLINT for (auto phi : block->PhiInsts()) { auto phi_clone = CloneInstruction(phi, inst_count, clone->GetGraph()); clone->AppendPhi(phi_clone->CastToPhi()); } } } /** * Clone instruction and mark both: clone and cloned */ Inst *CloneInstruction(Inst *inst, size_t *inst_count, Graph *target_graph) { inst->SetCloneNumber((*inst_count)++); auto inst_clone = inst->Clone(target_graph); clone_instructions_.push_back(inst_clone); inst->SetMarker(clone_marker_); inst_clone->SetMarker(clone_marker_); if (inst->GetBasicBlock()->GetGraph() != target_graph) { inst_clone->SetId(inst->GetId()); } return inst_clone; } inline bool IsInstLoopHeaderPhi(Inst *inst, Loop *loop); /** * Use the following rules cloning the inputs: * - if input of the original instruction has clone - insert this clone as input * - otherwise - use original input as clone instruction input */ template <bool replace_header_phi> void SetCloneInputs(Inst *inst, BasicBlock *block = nullptr) { auto clone = GetClone(inst); for (size_t i = 0; i < inst->GetInputsCount(); i++) { auto input = inst->GetInput(i).GetInst(); if (replace_header_phi && IsInstLoopHeaderPhi(input, inst->GetBasicBlock()->GetLoop())) { ASSERT(block != nullptr); input = input->CastToPhi()->GetPhiInput(block); } else if (HasClone(input)) { input = GetClone(input); } if (clone->IsOperandsDynamic()) { clone->AppendInput(input); if (clone->IsSaveState()) { static_cast<SaveStateInst *>(clone)->SetVirtualRegister( i, static_cast<SaveStateInst *>(inst)->GetVirtualRegister(i)); } else if (clone->IsPhi()) { clone->CastToPhi()->SetPhiInputBbNum(i, inst->CastToPhi()->GetPhiInputBbNum(i)); } } else { clone->SetInput(i, input); } } } template <CloneEdgeType edge_type> void CloneEdges(BasicBlock *block) { auto clone = GetClone(block); auto block_edges = &block->GetPredsBlocks(); auto clone_edges = &clone->GetPredsBlocks(); // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) if constexpr (edge_type == CloneEdgeType::EDGE_SUCC) { block_edges = &block->GetSuccsBlocks(); clone_edges = &clone->GetSuccsBlocks(); } ASSERT(clone_edges->empty()); clone_edges->reserve(block_edges->size()); for (auto edge : *block_edges) { clone_edges->push_back(GetClone(edge)); } } Graph *GetGraph() { return graph_; } void UpdateCaller(Inst *inst); private: Graph *graph_; ArenaAllocator *allocator_; ArenaAllocator *local_allocator_; ArenaVector<BasicBlock *> clone_blocks_; InstVector clone_instructions_; Marker clone_marker_ {UNDEF_MARKER}; }; } // namespace panda::compiler #endif // COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H