1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19
20 namespace panda::bytecodeopt {
21
22 /**
23 * Decide if accumulator register gets dirty between two instructions.
24 */
IsAccWriteBetween(compiler::Inst * src_inst,compiler::Inst * dst_inst)25 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
26 {
27 ASSERT(src_inst != dst_inst);
28
29 compiler::BasicBlock *block = src_inst->GetBasicBlock();
30 compiler::Inst *inst = src_inst->GetNext();
31
32 while (inst != dst_inst) {
33 if (UNLIKELY(inst == nullptr)) {
34 do {
35 // TODO(rtakacs): visit all the successors to get information about the
36 // accumulator usage. Only linear flow is supported right now.
37 if (block->GetSuccsBlocks().size() > 1) {
38 return true;
39 }
40
41 ASSERT(block->GetSuccsBlocks().size() == 1);
42 block = block->GetSuccessor(0);
43 // TODO(rtakacs): only linear flow is supported right now.
44 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
45 return true;
46 }
47 } while (block->IsEmpty() && !block->HasPhi());
48
49 // Get first phi instruction if exist.
50 // This is requred if dst_inst is a phi node.
51 inst = *(block->AllInsts());
52 } else {
53 if (inst->IsAccWrite()) {
54 return true;
55 }
56
57 if (inst->IsAccRead()) {
58 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
59
60 if (input->GetDstReg() != compiler::ACC_REG_ID) {
61 return true;
62 }
63 }
64
65 inst = inst->GetNext();
66 }
67 }
68
69 return false;
70 }
71
72 /**
73 * Return true if Phi instruction is marked as optimizable.
74 */
IsPhiOptimizable(compiler::Inst * phi) const75 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
76 {
77 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
78 return phi->IsMarked(acc_marker_);
79 }
80
81 /**
82 * Return true if instruction can read the accumulator.
83 */
IsAccRead(compiler::Inst * inst) const84 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
85 {
86 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
87 }
88
89 /**
90 * Return true if instruction can write the accumulator.
91 */
IsAccWrite(compiler::Inst * inst) const92 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
93 {
94 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
95 }
96
97 /**
98 * Decide if user can use accumulator as source.
99 * Do modifications on the order of inputs if necessary.
100 *
101 * Return true, if user can be optimized.
102 */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const103 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
104 {
105 if (user->IsPhi()) {
106 return IsPhiOptimizable(user);
107 }
108
109 if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
110 return false;
111 }
112
113 bool found = false;
114 // Check if the instrucion occures more times as input.
115 // v2. SUB v0, v1
116 // v3. Add v2, v2
117 for (auto input : user->GetInputs()) {
118 compiler::Inst *uinput = input.GetInst();
119
120 if (uinput != inst) {
121 continue;
122 }
123
124 if (!found) {
125 found = true;
126 } else {
127 return false;
128 }
129 }
130
131 if (user->IsCall()) {
132 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1); // +1 for SaveState
133 }
134
135 return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
136 }
137
138 /**
139 * Check if all the Phi inputs and outputs can use the accumulator register.
140 *
141 * Return true, if Phi can be optimized.
142 */
IsPhiAccReady(compiler::Inst * phi) const143 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
144 {
145 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
146
147 // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
148 // These cases are not optimized for accumulator.
149 for (auto input : phi->GetInputs()) {
150 compiler::Inst *phi_input = input.GetInst();
151
152 if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
153 return false;
154 }
155 }
156
157 for (auto &user : phi->GetUsers()) {
158 compiler::Inst *uinst = user.GetInst();
159
160 if (!CanUserReadAcc(phi, uinst)) {
161 return false;
162 }
163 }
164 return true;
165 }
166
167 /**
168 * For most insts we can use their src_reg on the acc-read position
169 * to characterise whether we need lda in the codegen pass.
170 */
SetNeedLda(compiler::Inst * inst,bool need)171 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
172 {
173 if (inst->IsPhi() || inst->IsCatchPhi()) {
174 return;
175 }
176 if (!IsAccRead(inst)) {
177 return;
178 }
179 if (inst->IsCall()) { // we never need lda for calls
180 return;
181 }
182 compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
183 inst->SetSrcReg(AccReadIndex(inst), reg);
184 }
185
InitializeSourceRegisters()186 void RegAccAlloc::InitializeSourceRegisters()
187 {
188 for (auto block : GetGraph()->GetBlocksRPO()) {
189 for (auto inst : block->Insts()) {
190 if (inst->IsSaveState() || inst->IsCatchPhi()) {
191 continue;
192 }
193 if (inst->IsConst()) {
194 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
195 }
196 for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
197 inst->SetSrcReg(i, compiler::INVALID_REG);
198 }
199 if (inst->IsConst()) {
200 inst->SetDstReg(compiler::INVALID_REG);
201 }
202 }
203 }
204 }
205
MarkAccForPhiInstructions()206 void RegAccAlloc::MarkAccForPhiInstructions()
207 {
208 for (auto block : GetGraph()->GetBlocksRPO()) {
209 for (auto phi : block->PhiInsts()) {
210 if (IsPhiAccReady(phi)) {
211 phi->SetMarker(acc_marker_);
212 }
213 }
214 }
215 }
216
MarkAccForInstructions(compiler::BasicBlock * block)217 void RegAccAlloc::MarkAccForInstructions(compiler::BasicBlock *block)
218 {
219 for (auto inst : block->AllInsts()) {
220 if (inst->NoDest() || !IsAccWrite(inst)) {
221 continue;
222 }
223
224 bool use_acc_dst_reg = true;
225
226 for (auto &user : inst->GetUsers()) {
227 compiler::Inst *uinst = user.GetInst();
228 if (uinst->IsSaveState()) {
229 continue;
230 }
231 if (CanUserReadAcc(inst, uinst)) {
232 SetNeedLda(uinst, false);
233 } else {
234 use_acc_dst_reg = false;
235 }
236 }
237
238 if (use_acc_dst_reg) {
239 inst->SetDstReg(compiler::ACC_REG_ID);
240 continue;
241 }
242
243 if (!inst->IsConst()) {
244 continue;
245 }
246
247 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
248
249 for (auto &user : inst->GetUsers()) {
250 compiler::Inst *uinst = user.GetInst();
251
252 if (uinst->IsSaveState()) {
253 continue;
254 }
255
256 SetNeedLda(uinst, true);
257 }
258 }
259 }
260
UpdateInstructionsAfterMark(compiler::BasicBlock * block)261 void RegAccAlloc::UpdateInstructionsAfterMark(compiler::BasicBlock *block)
262 {
263 for (auto inst : block->Insts()) {
264 if (inst->GetInputsCount() == 0) {
265 continue;
266 }
267
268 if (inst->IsCall()) {
269 continue;
270 }
271
272 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
273 if (!IsAccWriteBetween(input, inst)) {
274 continue;
275 }
276
277 input->SetDstReg(compiler::INVALID_REG);
278 SetNeedLda(inst, true);
279
280 if (input->IsConst()) {
281 input->ClearFlag(compiler::inst_flags::ACC_WRITE);
282 for (auto &user : input->GetUsers()) {
283 compiler::Inst *uinst = user.GetInst();
284 SetNeedLda(uinst, true);
285 }
286 }
287 }
288 }
289
290 /**
291 * Determine the accumulator usage between instructions.
292 * Eliminate unnecessary register allocations by applying
293 * a special value (ACC_REG_ID) to the destination and
294 * source registers.
295 * This special value is a marker for the code generator
296 * not to produce lda/sta instructions.
297 */
RunImpl()298 bool RegAccAlloc::RunImpl()
299 {
300 GetGraph()->InitDefaultLocations();
301 // Initialize all source register of all instructions.
302 InitializeSourceRegisters();
303 // Mark Phi instructions if they can be optimized for acc.
304 MarkAccForPhiInstructions();
305 // Mark instructions if they can be optimized for acc.
306 for (auto block : GetGraph()->GetBlocksRPO()) {
307 MarkAccForInstructions(block);
308 UpdateInstructionsAfterMark(block);
309 }
310
311 #ifndef NDEBUG
312 GetGraph()->SetRegAccAllocApplied();
313 #endif // NDEBUG
314
315 return true;
316 }
317
318 } // namespace panda::bytecodeopt
319