• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20 
21 namespace panda::bytecodeopt {
22 
23 /**
24  * Decide if accumulator register gets dirty between two instructions.
25  */
IsAccWriteBetween(compiler::Inst * src_inst,compiler::Inst * dst_inst)26 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
27 {
28     ASSERT(src_inst != dst_inst);
29 
30     compiler::BasicBlock *block = src_inst->GetBasicBlock();
31     compiler::Inst *inst = src_inst->GetNext();
32 
33     while (inst != dst_inst) {
34         if (UNLIKELY(inst == nullptr)) {
35             do {
36                 // TODO(rtakacs): visit all the successors to get information about the
37                 // accumulator usage. Only linear flow is supported right now.
38                 if (block->GetSuccsBlocks().size() > 1) {
39                     return true;
40                 }
41 
42                 ASSERT(block->GetSuccsBlocks().size() == 1);
43                 block = block->GetSuccessor(0);
44 
45                 // TODO(rtakacs): only linear flow is supported right now.
46                 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
47                     return true;
48                 }
49             } while (block->IsEmpty() && !block->HasPhi());
50 
51             // Get first phi instruction if exist.
52             // This is requred if dst_inst is a phi node.
53             inst = *(block->AllInsts());
54         } else {
55             if (inst->IsAccWrite()) {
56                 return true;
57             }
58 
59             if (inst->IsAccRead()) {
60                 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
61 
62                 if (input->GetDstReg() != compiler::ACC_REG_ID) {
63                     return true;
64                 }
65             }
66 
67             inst = inst->GetNext();
68         }
69     }
70 
71     return false;
72 }
73 
74 /**
75  * Return true if Phi instruction is marked as optimizable.
76  */
IsPhiOptimizable(compiler::Inst * phi) const77 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
78 {
79     ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
80     return phi->IsMarked(acc_marker_);
81 }
82 
83 /**
84  * Return true if instruction can read the accumulator.
85  */
IsAccRead(compiler::Inst * inst) const86 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
87 {
88     return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
89 }
90 
UserNeedSwapInputs(compiler::Inst * inst,compiler::Inst * user)91 bool UserNeedSwapInputs(compiler::Inst *inst, compiler::Inst *user)
92 {
93     if (!user->IsCommutative()) {
94         return false;
95     }
96     return user->GetInput(AccReadIndex(user)).GetInst() != inst;
97 }
98 
99 /**
100  * Return true if instruction can write the accumulator.
101  */
IsAccWrite(compiler::Inst * inst) const102 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
103 {
104     return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
105 }
106 
107 /**
108  * Decide if user can use accumulator as source.
109  * Do modifications on the order of inputs if necessary.
110  *
111  * Return true, if user can be optimized.
112  */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const113 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
114 {
115     if (user->IsPhi()) {
116         return IsPhiOptimizable(user);
117     }
118 
119     if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
120         return false;
121     }
122 
123     bool found = false;
124     // Check if the instrucion occures more times as input.
125     // v2. SUB v0, v1
126     // v3. Add v2, v2
127     for (auto input : user->GetInputs()) {
128         compiler::Inst *uinput = input.GetInst();
129 
130         if (uinput != inst) {
131             continue;
132         }
133 
134         if (!found) {
135             found = true;
136         } else {
137             return false;
138         }
139     }
140 
141     if (user->IsCall()) {
142         return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1);  // +1 for SaveState
143     }
144 
145     return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
146 }
147 
148 /**
149  * Check if all the Phi inputs and outputs can use the accumulator register.
150  *
151  * Return true, if Phi can be optimized.
152  */
IsPhiAccReady(compiler::Inst * phi) const153 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
154 {
155     ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
156 
157     // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
158     // These cases are not optimized for accumulator.
159     for (auto input : phi->GetInputs()) {
160         compiler::Inst *phi_input = input.GetInst();
161 
162         if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
163             return false;
164         }
165     }
166 
167     std::unordered_set<compiler::Inst *> users_that_required_swap_inputs;
168     for (auto &user : phi->GetUsers()) {
169         compiler::Inst *uinst = user.GetInst();
170 
171         if (!CanUserReadAcc(phi, uinst)) {
172             return false;
173         }
174         if (UserNeedSwapInputs(phi, uinst)) {
175             users_that_required_swap_inputs.insert(uinst);
176         }
177     }
178     for (auto uinst : users_that_required_swap_inputs) {
179         uinst->SwapInputs();
180     }
181 
182     return true;
183 }
184 
185 /**
186  * For most insts we can use their src_reg on the acc-read position
187  * to characterise whether we need lda in the codegen pass.
188  */
SetNeedLda(compiler::Inst * inst,bool need)189 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
190 {
191     if (inst->IsPhi() || inst->IsCatchPhi()) {
192         return;
193     }
194     if (!IsAccRead(inst)) {
195         return;
196     }
197     if (inst->IsCall()) {  // we never need lda for calls
198         return;
199     }
200     compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
201     inst->SetSrcReg(AccReadIndex(inst), reg);
202 }
203 
204 /**
205  * Determine the accumulator usage between instructions.
206  * Eliminate unnecessary register allocations by applying
207  * a special value (ACC_REG_ID) to the destination and
208  * source registers.
209  * This special value is a marker for the code generator
210  * not to produce lda/sta instructions.
211  */
RunImpl()212 bool RegAccAlloc::RunImpl()
213 {
214     GetGraph()->InitDefaultLocations();
215     // Initialize all source register of all instructions.
216     for (auto block : GetGraph()->GetBlocksRPO()) {
217         for (auto inst : block->Insts()) {
218             if (inst->IsSaveState() || inst->IsCatchPhi()) {
219                 continue;
220             }
221             if (inst->IsConst()) {
222                 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
223             }
224             for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
225                 inst->SetSrcReg(i, compiler::INVALID_REG);
226                 if ((inst->GetOpcode() == compiler::Opcode::LoadObject) || (inst->IsConst())) {
227                     inst->SetDstReg(compiler::INVALID_REG);
228                 }
229             }
230         }
231     }
232 
233     // Drop the pass if the function contains unsupported opcodes
234     // TODO(rtakacs): support these opcodes.
235     if (!GetGraph()->IsDynamicMethod()) {
236         for (auto block : GetGraph()->GetBlocksRPO()) {
237             for (auto inst : block->AllInsts()) {
238                 if (inst->GetOpcode() == compiler::Opcode::Builtin) {
239                     return false;
240                 }
241             }
242         }
243     }
244 
245     // Mark Phi instructions if they can be optimized for acc.
246     for (auto block : GetGraph()->GetBlocksRPO()) {
247         for (auto phi : block->PhiInsts()) {
248             if (IsPhiAccReady(phi)) {
249                 phi->SetMarker(acc_marker_);
250             }
251         }
252     }
253 
254     // Mark instructions if they can be optimized for acc.
255     for (auto block : GetGraph()->GetBlocksRPO()) {
256         for (auto inst : block->AllInsts()) {
257             if (inst->NoDest() || !IsAccWrite(inst)) {
258                 continue;
259             }
260 
261             bool use_acc_dst_reg = true;
262 
263             std::unordered_set<compiler::Inst *> users_that_required_swap_inputs;
264             for (auto &user : inst->GetUsers()) {
265                 compiler::Inst *uinst = user.GetInst();
266                 if (uinst->IsSaveState()) {
267                     continue;
268                 }
269                 if (CanUserReadAcc(inst, uinst)) {
270                     if (UserNeedSwapInputs(inst, uinst)) {
271                         users_that_required_swap_inputs.insert(uinst);
272                     }
273                     SetNeedLda(uinst, false);
274                 } else {
275                     use_acc_dst_reg = false;
276                 }
277             }
278             for (auto uinst : users_that_required_swap_inputs) {
279                 uinst->SwapInputs();
280             }
281 
282             if (use_acc_dst_reg) {
283                 inst->SetDstReg(compiler::ACC_REG_ID);
284             } else if ((inst->GetOpcode() == compiler::Opcode::LoadObject) || inst->IsConst()) {
285                 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
286                 for (auto &user : inst->GetUsers()) {
287                     compiler::Inst *uinst = user.GetInst();
288                     if (uinst->IsSaveState()) {
289                         continue;
290                     }
291                     SetNeedLda(uinst, true);
292                 }
293             }
294         }
295 
296         for (auto inst : block->Insts()) {
297             if (inst->GetInputsCount() == 0) {
298                 continue;
299             }
300 
301             if (inst->IsCall()) {
302                 continue;
303             }
304 
305             compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
306 
307             if (IsAccWriteBetween(input, inst)) {
308                 input->SetDstReg(compiler::INVALID_REG);
309                 SetNeedLda(inst, true);
310 
311                 if ((input->GetOpcode() == compiler::Opcode::LoadObject) || (input->IsConst())) {
312                     input->ClearFlag(compiler::inst_flags::ACC_WRITE);
313                     for (auto &user : input->GetUsers()) {
314                         compiler::Inst *uinst = user.GetInst();
315                         SetNeedLda(uinst, true);
316                     }
317                 }
318             }
319         }
320     }
321 
322 #ifndef NDEBUG
323     GetGraph()->SetRegAccAllocApplied();
324 #endif  // NDEBUG
325 
326     return true;
327 }
328 
329 }  // namespace panda::bytecodeopt
330