• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20 
21 namespace panda::bytecodeopt {
22 
23 /**
24  * Decide if accumulator register gets dirty between two instructions.
25  */
IsAccWriteBetween(compiler::Inst * src_inst,compiler::Inst * dst_inst)26 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
27 {
28     ASSERT(src_inst != dst_inst);
29 
30     compiler::BasicBlock *block = src_inst->GetBasicBlock();
31     compiler::Inst *inst = src_inst->GetNext();
32 
33     while (inst != dst_inst) {
34         if (UNLIKELY(inst == nullptr)) {
35             do {
36                 // TODO(rtakacs): visit all the successors to get information about the
37                 // accumulator usage. Only linear flow is supported right now.
38                 if (block->GetSuccsBlocks().size() > 1) {
39                     return true;
40                 }
41 
42                 ASSERT(block->GetSuccsBlocks().size() == 1);
43                 block = block->GetSuccessor(0);
44                 // TODO(rtakacs): only linear flow is supported right now.
45                 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
46                     return true;
47                 }
48             } while (block->IsEmpty() && !block->HasPhi());
49 
50             // Get first phi instruction if exist.
51             // This is requred if dst_inst is a phi node.
52             inst = *(block->AllInsts());
53         } else {
54             if (inst->IsAccWrite()) {
55                 return true;
56             }
57 
58             if (inst->IsAccRead()) {
59                 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
60 
61                 if (input->GetDstReg() != compiler::ACC_REG_ID) {
62                     return true;
63                 }
64             }
65 
66             inst = inst->GetNext();
67         }
68     }
69 
70     return false;
71 }
72 
73 /**
74  * Return true if Phi instruction is marked as optimizable.
75  */
IsPhiOptimizable(compiler::Inst * phi) const76 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
77 {
78     ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
79     return phi->IsMarked(acc_marker_);
80 }
81 
82 /**
83  * Return true if instruction can read the accumulator.
84  */
IsAccRead(compiler::Inst * inst) const85 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
86 {
87     return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
88 }
89 
90 /**
91  * Return true if instruction can write the accumulator.
92  */
IsAccWrite(compiler::Inst * inst) const93 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
94 {
95     return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
96 }
97 
98 /**
99  * Decide if user can use accumulator as source.
100  * Do modifications on the order of inputs if necessary.
101  *
102  * Return true, if user can be optimized.
103  */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const104 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
105 {
106     if (user->IsPhi()) {
107         return IsPhiOptimizable(user);
108     }
109 
110     if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
111         return false;
112     }
113 
114     bool found = false;
115     // Check if the instrucion occures more times as input.
116     // v2. SUB v0, v1
117     // v3. Add v2, v2
118     for (auto input : user->GetInputs()) {
119         compiler::Inst *uinput = input.GetInst();
120 
121         if (uinput != inst) {
122             continue;
123         }
124 
125         if (!found) {
126             found = true;
127         } else {
128             return false;
129         }
130     }
131 
132     if (user->IsCall()) {
133         return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1);  // +1 for SaveState
134     }
135 
136     return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
137 }
138 
139 /**
140  * Check if all the Phi inputs and outputs can use the accumulator register.
141  *
142  * Return true, if Phi can be optimized.
143  */
IsPhiAccReady(compiler::Inst * phi) const144 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
145 {
146     ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
147 
148     // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
149     // These cases are not optimized for accumulator.
150     for (auto input : phi->GetInputs()) {
151         compiler::Inst *phi_input = input.GetInst();
152 
153         if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
154             return false;
155         }
156     }
157 
158     for (auto &user : phi->GetUsers()) {
159         compiler::Inst *uinst = user.GetInst();
160 
161         if (!CanUserReadAcc(phi, uinst)) {
162             return false;
163         }
164     }
165     return true;
166 }
167 
168 /**
169  * For most insts we can use their src_reg on the acc-read position
170  * to characterise whether we need lda in the codegen pass.
171  */
SetNeedLda(compiler::Inst * inst,bool need)172 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
173 {
174     if (inst->IsPhi() || inst->IsCatchPhi()) {
175         return;
176     }
177     if (!IsAccRead(inst)) {
178         return;
179     }
180     if (inst->IsCall()) {  // we never need lda for calls
181         return;
182     }
183     compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
184     inst->SetSrcReg(AccReadIndex(inst), reg);
185 }
186 
187 /**
188  * Determine the accumulator usage between instructions.
189  * Eliminate unnecessary register allocations by applying
190  * a special value (ACC_REG_ID) to the destination and
191  * source registers.
192  * This special value is a marker for the code generator
193  * not to produce lda/sta instructions.
194  */
RunImpl()195 bool RegAccAlloc::RunImpl()
196 {
197     GetGraph()->InitDefaultLocations();
198     // Initialize all source register of all instructions.
199     for (auto block : GetGraph()->GetBlocksRPO()) {
200         for (auto inst : block->Insts()) {
201             if (inst->IsSaveState() || inst->IsCatchPhi()) {
202                 continue;
203             }
204             if (inst->IsConst()) {
205                 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
206             }
207             for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
208                 inst->SetSrcReg(i, compiler::INVALID_REG);
209                 if (inst->IsConst()) {
210                     inst->SetDstReg(compiler::INVALID_REG);
211                 }
212             }
213         }
214     }
215 
216     // Mark Phi instructions if they can be optimized for acc.
217     for (auto block : GetGraph()->GetBlocksRPO()) {
218         for (auto phi : block->PhiInsts()) {
219             if (IsPhiAccReady(phi)) {
220                 phi->SetMarker(acc_marker_);
221             }
222         }
223     }
224 
225     // Mark instructions if they can be optimized for acc.
226     for (auto block : GetGraph()->GetBlocksRPO()) {
227         for (auto inst : block->AllInsts()) {
228             if (inst->NoDest() || !IsAccWrite(inst)) {
229                 continue;
230             }
231 
232             bool use_acc_dst_reg = true;
233 
234             for (auto &user : inst->GetUsers()) {
235                 compiler::Inst *uinst = user.GetInst();
236                 if (uinst->IsSaveState()) {
237                     continue;
238                 }
239                 if (CanUserReadAcc(inst, uinst)) {
240                     SetNeedLda(uinst, false);
241                 } else {
242                     use_acc_dst_reg = false;
243                 }
244             }
245 
246             if (use_acc_dst_reg) {
247                 inst->SetDstReg(compiler::ACC_REG_ID);
248             } else if (inst->IsConst()) {
249                 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
250                 for (auto &user : inst->GetUsers()) {
251                     compiler::Inst *uinst = user.GetInst();
252                     if (uinst->IsSaveState()) {
253                         continue;
254                     }
255                     SetNeedLda(uinst, true);
256                 }
257             }
258         }
259 
260         for (auto inst : block->Insts()) {
261             if (inst->GetInputsCount() == 0) {
262                 continue;
263             }
264 
265             if (inst->IsCall()) {
266                 continue;
267             }
268 
269             compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
270 
271             if (IsAccWriteBetween(input, inst)) {
272                 input->SetDstReg(compiler::INVALID_REG);
273                 SetNeedLda(inst, true);
274 
275                 if (input->IsConst()) {
276                     input->ClearFlag(compiler::inst_flags::ACC_WRITE);
277                     for (auto &user : input->GetUsers()) {
278                         compiler::Inst *uinst = user.GetInst();
279                         SetNeedLda(uinst, true);
280                     }
281                 }
282             }
283         }
284     }
285 
286 #ifndef NDEBUG
287     GetGraph()->SetRegAccAllocApplied();
288 #endif  // NDEBUG
289 
290     return true;
291 }
292 
293 }  // namespace panda::bytecodeopt
294