1 /* 2 * Copyright (c) 2025 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_ANALYSIS_HOTNESS_PROPAGATION_H 17 #define COMPILER_OPTIMIZER_ANALYSIS_HOTNESS_PROPAGATION_H 18 19 #include <optional> 20 #include "optimizer/ir/graph.h" 21 #include "optimizer/ir/inst.h" 22 23 namespace ark::compiler { 24 25 class HotnessPropagation { 26 public: HotnessPropagation(Graph * graph)27 explicit HotnessPropagation(Graph *graph) : graph_(graph) {} 28 Run()29 void Run() 30 { 31 if (graph_->HasIrreducibleLoop()) { 32 return; 33 } 34 auto markerHolder = MarkerHolder(graph_); 35 mrk_ = markerHolder.GetMarker(); 36 auto *eb = graph_->GetEndBlock(); 37 auto *bb = graph_->GetStartBlock(); 38 if (eb != nullptr) { 39 auto retHotness = EnsureBackedgeResolvable<true>(eb, nullptr); 40 bb->SetHotness(retHotness); 41 } 42 bb->SetMarker(mrk_); 43 44 ASSERT(bb->GetPredsBlocks().empty()); 45 ASSERT(bb->GetSuccsBlocks().size() == 1); 46 47 PropagateHotness(bb->GetSuccessor(0)); 48 } 49 50 private: PropagateHotness(BasicBlock * bb)51 void PropagateHotness(BasicBlock *bb) 52 { 53 if (!bb->IsMarked(mrk_)) { 54 // The block hotness is unknown, resolve it: 55 if (bb->IsLoopHeader()) { 56 auto canPropagateToOuterSuccessor = FindLoopHeaderHotness(bb); 57 if (!canPropagateToOuterSuccessor) { 58 return; 59 } 60 } 61 62 // Check all preds are resolved, if not - come back here later via currently unresolved pred: 63 for (auto *p : bb->GetPredsBlocks()) { 64 ASSERT(p->GetSuccsBlocks().size() != 2U || p->IsTryBegin() || p->IsTryCatch() || 65 p->GetLastInst()->GetOpcode() == Opcode::IfImm || p->GetLastInst()->GetOpcode() == Opcode::If || 66 p->GetLastInst()->GetOpcode() == Opcode::AddOverflow || 67 p->GetLastInst()->GetOpcode() == Opcode::SubOverflow); 68 69 auto predLastOpcode = p->GetLastInst() == nullptr ? Opcode::INVALID : p->GetLastInst()->GetOpcode(); 70 bool predIsBranch = (predLastOpcode == Opcode::IfImm) || (predLastOpcode == Opcode::If); 71 if (!p->IsMarked(mrk_) && !p->IsCatch() && !predIsBranch) { 72 return; 73 } 74 } 75 76 size_t predsHtn = 0; 77 for (auto *p : bb->GetPredsBlocks()) { 78 predsHtn += GetEdgeHotness(p, bb); 79 } 80 bb->SetHotness(predsHtn); 81 bb->SetMarker(mrk_); 82 } 83 84 if (bb->IsTryBegin() || bb->IsTryEnd()) { 85 // Do not propagate hotness to catch blocks: 86 if (bb->GetSuccsBlocks().size() > 1) { 87 auto *tb = bb->GetSuccessor(0); 88 if (!tb->IsMarked(mrk_)) { 89 PropagateHotness(tb); 90 } 91 } 92 } else { 93 ASSERT(bb->GetSuccsBlocks().size() <= 2U); 94 for (auto *s : bb->GetSuccsBlocks()) { 95 if (!s->IsMarked(mrk_)) { 96 PropagateHotness(s); 97 } 98 } 99 } 100 } 101 FindLoopHeaderHotness(BasicBlock * loopHeader)102 bool FindLoopHeaderHotness(BasicBlock *loopHeader) 103 { 104 // Check that loop has conditional branches (i.e. has profile info). 105 // There may be loops in try-block without conditional branches, they considered as finite, so 106 // it is neccessary to conservatively check the loop if a graph has try-begin construction. 107 if (loopHeader->GetLoop()->IsInfinite() || !loopHeader->GetGraph()->GetTryBeginBlocks().empty()) { 108 if (!ProcessInfiniteLoop(loopHeader->GetLoop())) { 109 return false; 110 } 111 } 112 auto ol = loopHeader->GetLoop()->GetOuterLoop(); 113 bool allNonBackEdgesPredsVisited = true; 114 for (auto *p : loopHeader->GetPredsBlocks()) { 115 if (ol == p->GetLoop()) { 116 allNonBackEdgesPredsVisited &= p->IsMarked(mrk_); 117 } else { 118 if (!p->IsMarked(mrk_)) { 119 // This should be resolveable for each non-infinite-loop because of existance of loop exits. 120 // The result is not used here. There are two scenarios: 121 // 1. `p` contains `IfInst`, so later actual profiled info will be loaded (again). 122 // 2. `loopHeader` is the single successor of `p` so `p` hotness will be resolved and stored in 123 // p::hotness_ and `p` will be marked. 124 EnsureBackedgeResolvable(p, loopHeader); 125 } 126 } 127 } 128 return allNonBackEdgesPredsVisited; 129 } 130 ProcessInfiniteLoop(Loop * l)131 bool ProcessInfiniteLoop(Loop *l) 132 { 133 bool hasBranches = false; 134 auto checkHasBranches = [&hasBranches](Loop *loop) { 135 for (auto *loopBlock : loop->GetBlocks()) { 136 if ((loopBlock->GetSuccsBlocks().size() == 2U) && (loopBlock->GetLastInst() != nullptr)) { 137 auto opc = loopBlock->GetLastInst()->GetOpcode(); 138 hasBranches |= (opc == Opcode::If || opc == Opcode::IfImm) && !loopBlock->IsCatch(); 139 } 140 } 141 }; 142 143 int64_t infLoopHotness = 0; 144 // Avoid catch-loops (i.e. when catch handler contains associated try-block itself): 145 if (!l->GetHeader()->IsCatchBegin()) { 146 checkHasBranches(l); 147 if (!hasBranches) { 148 for (auto *innerLoop : l->GetInnerLoops()) { 149 checkHasBranches(innerLoop); 150 } 151 } 152 infLoopHotness = std::numeric_limits<int64_t>::max() / 2U; 153 } 154 155 if (!hasBranches) { 156 // Set some large value to mark infinite loop with no profile info: 157 auto setInfiniteHotness = [=](Loop *loop) { 158 for (auto *loopBlock : loop->GetBlocks()) { 159 loopBlock->SetHotness(infLoopHotness); 160 loopBlock->SetMarker(mrk_); 161 } 162 }; 163 setInfiniteHotness(l); 164 for (auto *innerLoop : l->GetInnerLoops()) { 165 setInfiniteHotness(innerLoop); 166 } 167 return false; 168 } 169 return true; 170 } 171 GetEdgeHotness(BasicBlock * pred,BasicBlock * succ)172 size_t GetEdgeHotness(BasicBlock *pred, BasicBlock *succ) 173 { 174 ASSERT(std::find(pred->GetSuccsBlocks().begin(), pred->GetSuccsBlocks().end(), succ) != 175 pred->GetSuccsBlocks().end()); 176 if (pred->IsCatch()) { 177 return 0; 178 } 179 if (pred->IsTryBegin() || pred->IsTryEnd()) { 180 return pred->GetHotness(); 181 } 182 if (pred->GetSuccsBlocks().size() == 2U) { 183 ASSERT(pred->GetTrueSuccessor() == succ || pred->GetFalseSuccessor() == succ); 184 return pred->GetGraph()->GetBranchCounter(pred, pred->GetTrueSuccessor() == succ); 185 } 186 ASSERT(pred->GetSuccsBlocks().size() == 1); 187 return pred->GetHotness(); 188 } 189 TryGetProfiledEdge(BasicBlock * pred,BasicBlock * succ)190 std::optional<size_t> TryGetProfiledEdge(BasicBlock *pred, BasicBlock *succ) 191 { 192 if (pred->IsCatch()) { 193 // Catch-blocks are considered as cold blocks: 194 return 0; 195 } 196 // This algo shouldn't reach catch-begin: 197 ASSERT(!pred->IsCatchBegin()); 198 199 if (pred->IsTryEnd() || pred->IsTryBegin()) { 200 // Try-end and try-begin are transitive blocks: 201 return std::nullopt; 202 } 203 auto pli = pred->GetLastInst(); 204 if (pli != nullptr && (pli->GetOpcode() == Opcode::AddOverflow || pli->GetOpcode() == Opcode::SubOverflow)) { 205 if (pred->GetTrueSuccessor() == succ) { 206 return 0; 207 } 208 ASSERT(pred->GetFalseSuccessor() == succ); 209 return std::nullopt; 210 } 211 switch (pred->GetSuccsBlocks().size()) { 212 case 0: 213 case 1: { 214 return std::nullopt; 215 } 216 case 2U: { 217 if (pred->GetGraph()->IsThrowApplied() && pli != nullptr && pli->GetOpcode() == Opcode::Throw) { 218 return 0; 219 } 220 ASSERT(pli != nullptr && (pli->GetOpcode() == Opcode::If || pli->GetOpcode() == Opcode::IfImm)); 221 ASSERT(pred->GetTrueSuccessor() == succ || pred->GetFalseSuccessor() == succ); 222 return pred->GetGraph()->GetBranchCounter(pred, pred->GetTrueSuccessor() == succ); 223 } 224 } 225 226 UNREACHABLE(); 227 } 228 229 template <bool IS_END_BLOCK = false> EnsureBackedgeResolvable(BasicBlock * pred,BasicBlock * succ)230 size_t EnsureBackedgeResolvable(BasicBlock *pred, [[maybe_unused]] BasicBlock *succ) 231 { 232 ASSERT(pred != nullptr); 233 ASSERT(!pred->IsMarked(mrk_)); 234 if constexpr (!IS_END_BLOCK) { 235 auto res = TryGetProfiledEdge(pred, succ); 236 if (res) { 237 // There is immediate branch, return actual profiled info: 238 return res.value(); 239 } 240 } else { 241 ASSERT(succ == nullptr); 242 } 243 244 // Otherwise, it is a transition-block (i.e. have only one successor), resolve its hotness firstly and return 245 // it: 246 if constexpr (!IS_END_BLOCK) { 247 ASSERT(pred->GetSuccsBlocks().size() == 1 || pred->IsTryBegin() || pred->IsTryEnd() || 248 pred->GetLastInst()->GetOpcode() == Opcode::AddOverflow || 249 pred->GetLastInst()->GetOpcode() == Opcode::SubOverflow || 250 (pred->GetGraph()->IsThrowApplied() && pred->IsEndWithThrow())); 251 } 252 size_t sumPreds = 0; 253 for (auto *newPred : pred->GetPredsBlocks()) { 254 sumPreds += EnsureBackedgeResolvable(newPred, pred); 255 } 256 257 // These blocks can be safely marked as each of them end up in a backedge and cannot transfer CF out of loop 258 // (so no side-exits skipped): 259 pred->SetMarker(mrk_); 260 pred->SetHotness(sumPreds); 261 262 return sumPreds; 263 } 264 265 private: 266 Marker mrk_ {UNDEF_MARKER}; 267 Graph *graph_ {}; 268 }; 269 270 } // namespace ark::compiler 271 272 #endif // COMPILER_OPTIMIZER_ANALYSIS_HOTNESS_PROPAGATION_H 273