• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMPILER_OPTIMIZER_OPTIMIZATIONS_CSE_H
17 #define COMPILER_OPTIMIZER_OPTIMIZATIONS_CSE_H
18 
19 #include "optimizer/ir/graph.h"
20 #include "optimizer/ir/analysis.h"
21 #include "optimizer/ir/basicblock.h"
22 #include "optimizer/ir/inst.h"
23 #include "optimizer/pass.h"
24 #include "utils/arena_containers.h"
25 #include "vn.h"
26 #include "compiler_logger.h"
27 #include "utils/logger.h"
28 
29 namespace panda::compiler {
30 class Cse : public Optimization {
31     using PairInsts = std::pair<Inst *, Inst *>;
32     using PairVectorsInsts = std::pair<InstVector, InstVector>;
33 
34 public:
Cse(Graph * graph)35     explicit Cse(Graph *graph)
36         : Optimization(graph),
37           replacePair_(graph->GetLocalAllocator()->Adapter()),
38           minReplaceStar_(graph->GetLocalAllocator()->Adapter()),
39           deletedInsts_(graph->GetLocalAllocator()->Adapter()),
40           sameExpPair_(graph->GetLocalAllocator()->Adapter()),
41           matchedTuple_(graph->GetLocalAllocator()->Adapter()),
42           candidates_(graph->GetLocalAllocator()->Adapter())
43     {
44     }
45 
46     NO_MOVE_SEMANTIC(Cse);
47     NO_COPY_SEMANTIC(Cse);
48     ~Cse() override = default;
49 
50     bool RunImpl() override;
51 
IsEnable()52     bool IsEnable() const override
53     {
54         return g_options.IsCompilerCse();
55     }
56 
GetPassName()57     const char *GetPassName() const override
58     {
59         return "Cse";
60     }
61 
62 private:
63     void LocalCse();
64     /*
65        LocalCse eliminates the duplicate computations for every basic block.
66        If there are two instructions whose inputs and opcodes and types are all the same,
67        then we move users from second instruction to first instructions(first instruction
68        is at front of the second), and then delete the second instruction.
69     */
70 
71     void DomTreeCse();
72     /*
73        DomTreeCse eliminates the duplicate computations along dominator tree.
74        If there are two instructions inst1 and inst2 with the same expression
75        such that inst1's block dominates inst2's block, then we can move
76        the users of inst2 into inst1, and then delete inst2. Here we make a
77        convention that a block does not dominate itself. It can be viewed as
78        a generation of LocalCse.
79     */
80 
81     void GlobalCse();
82     /*
83        GlobalCse eliminates the redundant computations whose result can be obtained
84        from its (two) predecessors. In this case we will use a new Phi instruction
85        to take place of the redundant instruction. For example,
86        __________________________________          ____________________________________
87        |BB 3:         ...                |         |BB 4:           ...                |
88        |      6.u32 Add  v0, v1 -> (v8)  |         |       9.u32 Add  v0, v1 -> (v15)  |
89        |              ...                |         |                ...                |
90        |_________________________________|         |___________________________________|
91                         \                                            /
92                          \                                          /
93                           \                                        /
94                            \                                      /
95                             \____________________________________/
96                             |BB 5:            ...                |
97                             |       14.u32 Add  v0, v1 -> (v20)  |
98                             |                 ...                |
99                             |____________________________________|
100 
101         GlobalCse will add a new Phi instruction, to take place of inst 14., as follows:
102         __________________________________          ____________________________________
103        |BB 3:         ...                |         |BB 4:           ...                |
104        | 6.u32 Add  v0, v1 -> (v8, v33p) |         | 9.u32 Add  v0, v1 -> (v15, v33p)  |
105        |              ...                |         |                ...                |
106        |_________________________________|         |___________________________________|
107                         \                                             /
108                          \                                           /
109                           \                                         /
110                            \                                       /
111                          __ \_____________________________________/___
112                         |BB 5:  33.u32 Phi  v6(bb3), v9(bb4) -> (v20) |
113                         |                     ...                     |
114                         |                     ...                     |
115                         |_____________________________________________|
116     */
117     struct Exp {
118         Opcode opcode;
119         DataType::Type type;
120         Inst *inputl;
121         Inst *inputr;
122     };
NotSameExp(Exp exp1,Exp exp2)123     static inline bool NotSameExp(Exp exp1, Exp exp2)
124     {
125         return exp1.opcode != exp2.opcode || exp1.type != exp2.type || exp1.inputl != exp2.inputl ||
126                exp1.inputr != exp2.inputr;
127     }
128     /*  Exp is the key of the instruction.
129      *  We will use ArenaMap<Exp, ArenaVector<Inst*>>canditates to record the insts that have been visited.
130      *  The instructions that have the same Exp will be put in a ArenaVector whose key is Exp.
131      *  With such map, we can search duplicate computations more efficiently.
132      */
133 
GetExp(Inst * inst)134     static Exp GetExp(Inst *inst)
135     {
136         ASSERT(IsLegalExp(inst));
137         Exp exp = {inst->GetOpcode(), inst->GetType(), inst->GetDataFlowInput(inst->GetInput(0).GetInst()),
138                    inst->GetDataFlowInput(inst->GetInput(1).GetInst())};
139         return exp;
140     }
141 
GetExpCommutative(Inst * inst)142     static Exp GetExpCommutative(Inst *inst)
143     {
144         ASSERT(IsLegalExp(inst));
145         Exp exp = {inst->GetOpcode(), inst->GetType(), inst->GetDataFlowInput(inst->GetInput(1).GetInst()),
146                    inst->GetDataFlowInput(inst->GetInput(0).GetInst())};
147         return exp;
148     }
149 
150     struct Cmpexp {
operatorCmpexp151         bool operator()(Exp exp1, Exp exp2) const
152         {
153             if (exp1.opcode != exp2.opcode) {
154                 return static_cast<int>(exp1.opcode) < static_cast<int>(exp2.opcode);
155             }
156             if (exp1.type != exp2.type) {
157                 return exp1.type < exp2.type;
158             }
159             if (exp1.inputl != exp2.inputl) {
160                 return exp1.inputl->GetId() < exp2.inputl->GetId();
161             }
162             if (exp1.inputr != exp2.inputr) {
163                 return exp1.inputr->GetId() < exp2.inputr->GetId();
164             }
165             return false;
166         }
167     };
168 
169     // We eliminate the duplicate expressions which contain one of the following binary operators. One can add other
170     // operators if necessary.
IsLegalExp(Inst * inst)171     static bool IsLegalExp(Inst *inst)
172     {
173         if (inst->IsNotCseApplicable() || !inst->HasUsers()) {
174             return false;
175         }
176         switch (inst->GetOpcode()) {
177             case Opcode::Add:
178             case Opcode::Sub:
179             case Opcode::Mul:
180             case Opcode::Div:
181             case Opcode::Mod:
182             case Opcode::Min:
183             case Opcode::Max:
184             case Opcode::Shl:
185             case Opcode::Shr:
186             case Opcode::AShr:
187             case Opcode::And:
188             case Opcode::Or:
189             case Opcode::Xor:
190                 return true;
191             default:
192                 return false;
193         }
194     }
195 
IsCommutative(Inst * inst)196     static bool IsCommutative(Inst *inst)
197     {
198         switch (inst->GetOpcode()) {
199             case Opcode::Add:
200             case Opcode::Mul:
201             case Opcode::Min:
202             case Opcode::Max:
203             case Opcode::And:
204             case Opcode::Or:
205             case Opcode::Xor:
206                 return true;
207             default:
208                 return false;
209         }
210     }
211 
212     class Finder {
213     public:
Finder(Inst * inst)214         explicit Finder(Inst *inst) : instruction_(inst) {}
operator()215         bool operator()(Inst *instn) const
216         {
217             return !HasOsrEntryBetween(instn, instruction_);
218         }
219 
220     private:
221         Inst *instruction_;
222     };
223 
224     template <typename T>
NotIn(const T & candidates,Exp exp)225     bool NotIn(const T &candidates, Exp exp)
226     {
227         return candidates.find(exp) == candidates.end();
228     }
229 
230     template <typename T>
AllNotIn(const T & candidates,Inst * inst)231     bool AllNotIn(const T &candidates, Inst *inst)
232     {
233         Exp exp = GetExp(inst);
234         Exp expc = GetExpCommutative(inst);
235         return NotIn(candidates, exp) && (!IsCommutative(inst) || NotIn(candidates, expc));
236     }
237 
InVector(const InstVector & dele,Inst * inst)238     static bool InVector(const InstVector &dele, Inst *inst)
239     {
240         return std::find(dele.begin(), dele.end(), inst) != dele.end();
241     }
242 
DeleteInstLog(Inst * inst)243     void DeleteInstLog(Inst *inst)
244     {
245         COMPILER_LOG(DEBUG, CSE_OPT) << " Cse deletes inst " << inst->GetId();
246         GetGraph()->GetEventWriter().EventCse(inst->GetId(), inst->GetPc());
247     }
248 
RemoveInstsIn(InstVector * deletedInsts)249     void RemoveInstsIn(InstVector *deletedInsts)
250     {
251         // delete redundant insts
252         if (deletedInsts->empty()) {
253             return;
254         }
255         for (auto inst : *deletedInsts) {
256             DeleteInstLog(inst);
257             auto bb = inst->GetBasicBlock();
258             bb->RemoveInst(inst);
259         }
260         isApplied_ = true;
261     }
262 
263     void ConvertTreeForestToStarForest();
264     void EliminateAlongDomTree();
265     void CollectTreeForest();
266     void BuildSetOfPairs(BasicBlock *block);
267 
268 private:
269     bool isApplied_ = false;
270     ArenaVector<PairInsts> replacePair_;
271     ArenaVector<PairInsts> minReplaceStar_;
272     InstVector deletedInsts_;
273     ArenaMap<Exp, PairVectorsInsts, Cmpexp> sameExpPair_;
274     ArenaVector<std::pair<Inst *, PairInsts>> matchedTuple_;
275     ArenaMap<Exp, InstVector, Cmpexp> candidates_;
276 };
277 }  // namespace panda::compiler
278 
279 #endif  // COMPILER_OPTIMIZER_OPTIMIZATIONS_CSE_H
280