1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20
21 namespace panda::bytecodeopt {
22
23 /**
24 * Decide if accumulator register gets dirty between two instructions.
25 */
IsAccWriteBetween(compiler::Inst * src_inst,compiler::Inst * dst_inst)26 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
27 {
28 ASSERT(src_inst != dst_inst);
29
30 compiler::BasicBlock *block = src_inst->GetBasicBlock();
31 compiler::Inst *inst = src_inst->GetNext();
32
33 while (inst != dst_inst) {
34 if (UNLIKELY(inst == nullptr)) {
35 do {
36 // TODO(rtakacs): visit all the successors to get information about the
37 // accumulator usage. Only linear flow is supported right now.
38 if (block->GetSuccsBlocks().size() > 1) {
39 return true;
40 }
41
42 ASSERT(block->GetSuccsBlocks().size() == 1);
43 block = block->GetSuccessor(0);
44
45 // TODO(rtakacs): only linear flow is supported right now.
46 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
47 return true;
48 }
49 } while (block->IsEmpty() && !block->HasPhi());
50
51 // Get first phi instruction if exist.
52 // This is requred if dst_inst is a phi node.
53 inst = *(block->AllInsts());
54 } else {
55 if (inst->IsAccWrite()) {
56 return true;
57 }
58
59 if (inst->IsAccRead()) {
60 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
61
62 if (input->GetDstReg() != compiler::ACC_REG_ID) {
63 return true;
64 }
65 }
66
67 inst = inst->GetNext();
68 }
69 }
70
71 return false;
72 }
73
74 /**
75 * Return true if Phi instruction is marked as optimizable.
76 */
IsPhiOptimizable(compiler::Inst * phi) const77 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
78 {
79 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
80 return phi->IsMarked(acc_marker_);
81 }
82
83 /**
84 * Return true if instruction can read the accumulator.
85 */
IsAccRead(compiler::Inst * inst) const86 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
87 {
88 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
89 }
90
UserNeedSwapInputs(compiler::Inst * inst,compiler::Inst * user)91 bool UserNeedSwapInputs(compiler::Inst *inst, compiler::Inst *user)
92 {
93 if (!user->IsCommutative()) {
94 return false;
95 }
96 return user->GetInput(AccReadIndex(user)).GetInst() != inst;
97 }
98
99 /**
100 * Return true if instruction can write the accumulator.
101 */
IsAccWrite(compiler::Inst * inst) const102 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
103 {
104 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
105 }
106
107 /**
108 * Decide if user can use accumulator as source.
109 * Do modifications on the order of inputs if necessary.
110 *
111 * Return true, if user can be optimized.
112 */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const113 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
114 {
115 if (user->IsPhi()) {
116 return IsPhiOptimizable(user);
117 }
118
119 if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
120 return false;
121 }
122
123 bool found = false;
124 // Check if the instrucion occures more times as input.
125 // v2. SUB v0, v1
126 // v3. Add v2, v2
127 for (auto input : user->GetInputs()) {
128 compiler::Inst *uinput = input.GetInst();
129
130 if (uinput != inst) {
131 continue;
132 }
133
134 if (!found) {
135 found = true;
136 } else {
137 return false;
138 }
139 }
140
141 if (user->IsCall()) {
142 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1); // +1 for SaveState
143 }
144
145 return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
146 }
147
148 /**
149 * Check if all the Phi inputs and outputs can use the accumulator register.
150 *
151 * Return true, if Phi can be optimized.
152 */
IsPhiAccReady(compiler::Inst * phi) const153 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
154 {
155 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
156
157 // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
158 // These cases are not optimized for accumulator.
159 for (auto input : phi->GetInputs()) {
160 compiler::Inst *phi_input = input.GetInst();
161
162 if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
163 return false;
164 }
165 }
166
167 std::unordered_set<compiler::Inst *> users_that_required_swap_inputs;
168 for (auto &user : phi->GetUsers()) {
169 compiler::Inst *uinst = user.GetInst();
170
171 if (!CanUserReadAcc(phi, uinst)) {
172 return false;
173 }
174 if (UserNeedSwapInputs(phi, uinst)) {
175 users_that_required_swap_inputs.insert(uinst);
176 }
177 }
178 for (auto uinst : users_that_required_swap_inputs) {
179 uinst->SwapInputs();
180 }
181
182 return true;
183 }
184
185 /**
186 * For most insts we can use their src_reg on the acc-read position
187 * to characterise whether we need lda in the codegen pass.
188 */
SetNeedLda(compiler::Inst * inst,bool need)189 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
190 {
191 if (inst->IsPhi() || inst->IsCatchPhi()) {
192 return;
193 }
194 if (!IsAccRead(inst)) {
195 return;
196 }
197 if (inst->IsCall()) { // we never need lda for calls
198 return;
199 }
200 compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
201 inst->SetSrcReg(AccReadIndex(inst), reg);
202 }
203
204 /**
205 * Determine the accumulator usage between instructions.
206 * Eliminate unnecessary register allocations by applying
207 * a special value (ACC_REG_ID) to the destination and
208 * source registers.
209 * This special value is a marker for the code generator
210 * not to produce lda/sta instructions.
211 */
RunImpl()212 bool RegAccAlloc::RunImpl()
213 {
214 GetGraph()->InitDefaultLocations();
215 // Initialize all source register of all instructions.
216 for (auto block : GetGraph()->GetBlocksRPO()) {
217 for (auto inst : block->Insts()) {
218 if (inst->IsSaveState() || inst->IsCatchPhi()) {
219 continue;
220 }
221 if (inst->IsConst()) {
222 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
223 }
224 for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
225 inst->SetSrcReg(i, compiler::INVALID_REG);
226 if ((inst->GetOpcode() == compiler::Opcode::LoadObject) || (inst->IsConst())) {
227 inst->SetDstReg(compiler::INVALID_REG);
228 }
229 }
230 }
231 }
232
233 // Drop the pass if the function contains unsupported opcodes
234 // TODO(rtakacs): support these opcodes.
235 if (!GetGraph()->IsDynamicMethod()) {
236 for (auto block : GetGraph()->GetBlocksRPO()) {
237 for (auto inst : block->AllInsts()) {
238 if (inst->GetOpcode() == compiler::Opcode::Builtin) {
239 return false;
240 }
241 }
242 }
243 }
244
245 // Mark Phi instructions if they can be optimized for acc.
246 for (auto block : GetGraph()->GetBlocksRPO()) {
247 for (auto phi : block->PhiInsts()) {
248 if (IsPhiAccReady(phi)) {
249 phi->SetMarker(acc_marker_);
250 }
251 }
252 }
253
254 // Mark instructions if they can be optimized for acc.
255 for (auto block : GetGraph()->GetBlocksRPO()) {
256 for (auto inst : block->AllInsts()) {
257 if (inst->NoDest() || !IsAccWrite(inst)) {
258 continue;
259 }
260
261 bool use_acc_dst_reg = true;
262
263 std::unordered_set<compiler::Inst *> users_that_required_swap_inputs;
264 for (auto &user : inst->GetUsers()) {
265 compiler::Inst *uinst = user.GetInst();
266 if (uinst->IsSaveState()) {
267 continue;
268 }
269 if (CanUserReadAcc(inst, uinst)) {
270 if (UserNeedSwapInputs(inst, uinst)) {
271 users_that_required_swap_inputs.insert(uinst);
272 }
273 SetNeedLda(uinst, false);
274 } else {
275 use_acc_dst_reg = false;
276 }
277 }
278 for (auto uinst : users_that_required_swap_inputs) {
279 uinst->SwapInputs();
280 }
281
282 if (use_acc_dst_reg) {
283 inst->SetDstReg(compiler::ACC_REG_ID);
284 } else if ((inst->GetOpcode() == compiler::Opcode::LoadObject) || inst->IsConst()) {
285 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
286 for (auto &user : inst->GetUsers()) {
287 compiler::Inst *uinst = user.GetInst();
288 if (uinst->IsSaveState()) {
289 continue;
290 }
291 SetNeedLda(uinst, true);
292 }
293 }
294 }
295
296 for (auto inst : block->Insts()) {
297 if (inst->GetInputsCount() == 0) {
298 continue;
299 }
300
301 if (inst->IsCall()) {
302 continue;
303 }
304
305 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
306
307 if (IsAccWriteBetween(input, inst)) {
308 input->SetDstReg(compiler::INVALID_REG);
309 SetNeedLda(inst, true);
310
311 if ((input->GetOpcode() == compiler::Opcode::LoadObject) || (input->IsConst())) {
312 input->ClearFlag(compiler::inst_flags::ACC_WRITE);
313 for (auto &user : input->GetUsers()) {
314 compiler::Inst *uinst = user.GetInst();
315 SetNeedLda(uinst, true);
316 }
317 }
318 }
319 }
320 }
321
322 #ifndef NDEBUG
323 GetGraph()->SetRegAccAllocApplied();
324 #endif // NDEBUG
325
326 return true;
327 }
328
329 } // namespace panda::bytecodeopt
330