1 /* 2 * Copyright (c) 2021-2023 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_OPTIMIZATIONS_CSE_H 17 #define COMPILER_OPTIMIZER_OPTIMIZATIONS_CSE_H 18 19 #include "optimizer/ir/graph.h" 20 #include "optimizer/ir/analysis.h" 21 #include "optimizer/ir/basicblock.h" 22 #include "optimizer/ir/inst.h" 23 #include "optimizer/pass.h" 24 #include "utils/arena_containers.h" 25 #include "vn.h" 26 #include "compiler_logger.h" 27 #include "utils/logger.h" 28 29 namespace panda::compiler { 30 class Cse : public Optimization { 31 using PairInsts = std::pair<Inst *, Inst *>; 32 using PairVectorsInsts = std::pair<InstVector, InstVector>; 33 34 public: Cse(Graph * graph)35 explicit Cse(Graph *graph) 36 : Optimization(graph), 37 replacePair_(graph->GetLocalAllocator()->Adapter()), 38 minReplaceStar_(graph->GetLocalAllocator()->Adapter()), 39 deletedInsts_(graph->GetLocalAllocator()->Adapter()), 40 sameExpPair_(graph->GetLocalAllocator()->Adapter()), 41 matchedTuple_(graph->GetLocalAllocator()->Adapter()), 42 candidates_(graph->GetLocalAllocator()->Adapter()) 43 { 44 } 45 46 NO_MOVE_SEMANTIC(Cse); 47 NO_COPY_SEMANTIC(Cse); 48 ~Cse() override = default; 49 50 bool RunImpl() override; 51 IsEnable()52 bool IsEnable() const override 53 { 54 return g_options.IsCompilerCse(); 55 } 56 GetPassName()57 const char *GetPassName() const override 58 { 59 return "Cse"; 60 } 61 62 private: 63 void LocalCse(); 64 /* 65 LocalCse eliminates the duplicate computations for every basic block. 66 If there are two instructions whose inputs and opcodes and types are all the same, 67 then we move users from second instruction to first instructions(first instruction 68 is at front of the second), and then delete the second instruction. 69 */ 70 71 void DomTreeCse(); 72 /* 73 DomTreeCse eliminates the duplicate computations along dominator tree. 74 If there are two instructions inst1 and inst2 with the same expression 75 such that inst1's block dominates inst2's block, then we can move 76 the users of inst2 into inst1, and then delete inst2. Here we make a 77 convention that a block does not dominate itself. It can be viewed as 78 a generation of LocalCse. 79 */ 80 81 void GlobalCse(); 82 /* 83 GlobalCse eliminates the redundant computations whose result can be obtained 84 from its (two) predecessors. In this case we will use a new Phi instruction 85 to take place of the redundant instruction. For example, 86 __________________________________ ____________________________________ 87 |BB 3: ... | |BB 4: ... | 88 | 6.u32 Add v0, v1 -> (v8) | | 9.u32 Add v0, v1 -> (v15) | 89 | ... | | ... | 90 |_________________________________| |___________________________________| 91 \ / 92 \ / 93 \ / 94 \ / 95 \____________________________________/ 96 |BB 5: ... | 97 | 14.u32 Add v0, v1 -> (v20) | 98 | ... | 99 |____________________________________| 100 101 GlobalCse will add a new Phi instruction, to take place of inst 14., as follows: 102 __________________________________ ____________________________________ 103 |BB 3: ... | |BB 4: ... | 104 | 6.u32 Add v0, v1 -> (v8, v33p) | | 9.u32 Add v0, v1 -> (v15, v33p) | 105 | ... | | ... | 106 |_________________________________| |___________________________________| 107 \ / 108 \ / 109 \ / 110 \ / 111 __ \_____________________________________/___ 112 |BB 5: 33.u32 Phi v6(bb3), v9(bb4) -> (v20) | 113 | ... | 114 | ... | 115 |_____________________________________________| 116 */ 117 struct Exp { 118 Opcode opcode; 119 DataType::Type type; 120 Inst *inputl; 121 Inst *inputr; 122 }; NotSameExp(Exp exp1,Exp exp2)123 static inline bool NotSameExp(Exp exp1, Exp exp2) 124 { 125 return exp1.opcode != exp2.opcode || exp1.type != exp2.type || exp1.inputl != exp2.inputl || 126 exp1.inputr != exp2.inputr; 127 } 128 /* Exp is the key of the instruction. 129 * We will use ArenaMap<Exp, ArenaVector<Inst*>>canditates to record the insts that have been visited. 130 * The instructions that have the same Exp will be put in a ArenaVector whose key is Exp. 131 * With such map, we can search duplicate computations more efficiently. 132 */ 133 GetExp(Inst * inst)134 static Exp GetExp(Inst *inst) 135 { 136 ASSERT(IsLegalExp(inst)); 137 Exp exp = {inst->GetOpcode(), inst->GetType(), inst->GetDataFlowInput(inst->GetInput(0).GetInst()), 138 inst->GetDataFlowInput(inst->GetInput(1).GetInst())}; 139 return exp; 140 } 141 GetExpCommutative(Inst * inst)142 static Exp GetExpCommutative(Inst *inst) 143 { 144 ASSERT(IsLegalExp(inst)); 145 Exp exp = {inst->GetOpcode(), inst->GetType(), inst->GetDataFlowInput(inst->GetInput(1).GetInst()), 146 inst->GetDataFlowInput(inst->GetInput(0).GetInst())}; 147 return exp; 148 } 149 150 struct Cmpexp { operatorCmpexp151 bool operator()(Exp exp1, Exp exp2) const 152 { 153 if (exp1.opcode != exp2.opcode) { 154 return static_cast<int>(exp1.opcode) < static_cast<int>(exp2.opcode); 155 } 156 if (exp1.type != exp2.type) { 157 return exp1.type < exp2.type; 158 } 159 if (exp1.inputl != exp2.inputl) { 160 return exp1.inputl->GetId() < exp2.inputl->GetId(); 161 } 162 if (exp1.inputr != exp2.inputr) { 163 return exp1.inputr->GetId() < exp2.inputr->GetId(); 164 } 165 return false; 166 } 167 }; 168 169 // We eliminate the duplicate expressions which contain one of the following binary operators. One can add other 170 // operators if necessary. IsLegalExp(Inst * inst)171 static bool IsLegalExp(Inst *inst) 172 { 173 if (inst->IsNotCseApplicable() || !inst->HasUsers()) { 174 return false; 175 } 176 switch (inst->GetOpcode()) { 177 case Opcode::Add: 178 case Opcode::Sub: 179 case Opcode::Mul: 180 case Opcode::Div: 181 case Opcode::Mod: 182 case Opcode::Min: 183 case Opcode::Max: 184 case Opcode::Shl: 185 case Opcode::Shr: 186 case Opcode::AShr: 187 case Opcode::And: 188 case Opcode::Or: 189 case Opcode::Xor: 190 return true; 191 default: 192 return false; 193 } 194 } 195 IsCommutative(Inst * inst)196 static bool IsCommutative(Inst *inst) 197 { 198 switch (inst->GetOpcode()) { 199 case Opcode::Add: 200 case Opcode::Mul: 201 case Opcode::Min: 202 case Opcode::Max: 203 case Opcode::And: 204 case Opcode::Or: 205 case Opcode::Xor: 206 return true; 207 default: 208 return false; 209 } 210 } 211 212 class Finder { 213 public: Finder(Inst * inst)214 explicit Finder(Inst *inst) : instruction_(inst) {} operator()215 bool operator()(Inst *instn) const 216 { 217 return !HasOsrEntryBetween(instn, instruction_); 218 } 219 220 private: 221 Inst *instruction_; 222 }; 223 224 template <typename T> NotIn(const T & candidates,Exp exp)225 bool NotIn(const T &candidates, Exp exp) 226 { 227 return candidates.find(exp) == candidates.end(); 228 } 229 230 template <typename T> AllNotIn(const T & candidates,Inst * inst)231 bool AllNotIn(const T &candidates, Inst *inst) 232 { 233 Exp exp = GetExp(inst); 234 Exp expc = GetExpCommutative(inst); 235 return NotIn(candidates, exp) && (!IsCommutative(inst) || NotIn(candidates, expc)); 236 } 237 InVector(const InstVector & dele,Inst * inst)238 static bool InVector(const InstVector &dele, Inst *inst) 239 { 240 return std::find(dele.begin(), dele.end(), inst) != dele.end(); 241 } 242 DeleteInstLog(Inst * inst)243 void DeleteInstLog(Inst *inst) 244 { 245 COMPILER_LOG(DEBUG, CSE_OPT) << " Cse deletes inst " << inst->GetId(); 246 GetGraph()->GetEventWriter().EventCse(inst->GetId(), inst->GetPc()); 247 } 248 RemoveInstsIn(InstVector * deletedInsts)249 void RemoveInstsIn(InstVector *deletedInsts) 250 { 251 // delete redundant insts 252 if (deletedInsts->empty()) { 253 return; 254 } 255 for (auto inst : *deletedInsts) { 256 DeleteInstLog(inst); 257 auto bb = inst->GetBasicBlock(); 258 bb->RemoveInst(inst); 259 } 260 isApplied_ = true; 261 } 262 263 void ConvertTreeForestToStarForest(); 264 void EliminateAlongDomTree(); 265 void CollectTreeForest(); 266 void BuildSetOfPairs(BasicBlock *block); 267 268 private: 269 bool isApplied_ = false; 270 ArenaVector<PairInsts> replacePair_; 271 ArenaVector<PairInsts> minReplaceStar_; 272 InstVector deletedInsts_; 273 ArenaMap<Exp, PairVectorsInsts, Cmpexp> sameExpPair_; 274 ArenaVector<std::pair<Inst *, PairInsts>> matchedTuple_; 275 ArenaMap<Exp, InstVector, Cmpexp> candidates_; 276 }; 277 } // namespace panda::compiler 278 279 #endif // COMPILER_OPTIMIZER_OPTIMIZATIONS_CSE_H 280