1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20
21 namespace panda::bytecodeopt {
22
23 /**
24 * Decide if accumulator register gets dirty between two instructions.
25 */
IsAccWriteBetween(compiler::Inst * src_inst,compiler::Inst * dst_inst)26 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
27 {
28 ASSERT(src_inst != dst_inst);
29
30 compiler::BasicBlock *block = src_inst->GetBasicBlock();
31 compiler::Inst *inst = src_inst->GetNext();
32
33 while (inst != dst_inst) {
34 if (UNLIKELY(inst == nullptr)) {
35 do {
36 // TODO(rtakacs): visit all the successors to get information about the
37 // accumulator usage. Only linear flow is supported right now.
38 if (block->GetSuccsBlocks().size() > 1) {
39 return true;
40 }
41
42 ASSERT(block->GetSuccsBlocks().size() == 1);
43 block = block->GetSuccessor(0);
44
45 // TODO(rtakacs): only linear flow is supported right now.
46 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
47 return true;
48 }
49 } while (block->IsEmpty() && !block->HasPhi());
50
51 // Get first phi instruction if exist.
52 // This is requred if dst_inst is a phi node.
53 inst = *(block->AllInsts());
54 } else {
55 if (inst->IsAccWrite()) {
56 return true;
57 }
58
59 if (inst->IsAccRead()) {
60 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
61
62 if (input->GetDstReg() != compiler::ACC_REG_ID) {
63 return true;
64 }
65 }
66
67 inst = inst->GetNext();
68 }
69 }
70
71 return false;
72 }
73
74 /**
75 * Return true if Phi instruction is marked as optimizable.
76 */
IsPhiOptimizable(compiler::Inst * phi) const77 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
78 {
79 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
80 return phi->IsMarked(acc_marker_);
81 }
82
83 /**
84 * Return true if instruction can read the accumulator.
85 */
IsAccRead(compiler::Inst * inst) const86 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
87 {
88 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
89 }
90
91 /**
92 * Return true if instruction can write the accumulator.
93 */
IsAccWrite(compiler::Inst * inst) const94 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
95 {
96 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
97 }
98
99 /**
100 * Decide if user can use accumulator as source.
101 * Do modifications on the order of inputs if necessary.
102 *
103 * Return true, if user can be optimized.
104 */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const105 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
106 {
107 if (user->IsPhi()) {
108 return IsPhiOptimizable(user);
109 }
110
111 if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
112 return false;
113 }
114
115 bool found = false;
116 // Check if the instrucion occures more times as input.
117 // v2. SUB v0, v1
118 // v3. Add v2, v2
119 for (auto input : user->GetInputs()) {
120 compiler::Inst *uinput = input.GetInst();
121
122 if (uinput != inst) {
123 continue;
124 }
125
126 if (!found) {
127 found = true;
128 } else {
129 return false;
130 }
131 }
132
133 if (user->IsCall()) {
134 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1); // +1 for SaveState
135 }
136
137 return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
138 }
139
140 /**
141 * Check if all the Phi inputs and outputs can use the accumulator register.
142 *
143 * Return true, if Phi can be optimized.
144 */
IsPhiAccReady(compiler::Inst * phi) const145 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
146 {
147 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
148
149 // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
150 // These cases are not optimized for accumulator.
151 for (auto input : phi->GetInputs()) {
152 compiler::Inst *phi_input = input.GetInst();
153
154 if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
155 return false;
156 }
157 }
158
159 for (auto &user : phi->GetUsers()) {
160 compiler::Inst *uinst = user.GetInst();
161
162 if (!CanUserReadAcc(phi, uinst)) {
163 return false;
164 }
165 }
166 return true;
167 }
168
169 /**
170 * For most insts we can use their src_reg on the acc-read position
171 * to characterise whether we need lda in the codegen pass.
172 */
SetNeedLda(compiler::Inst * inst,bool need)173 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
174 {
175 if (inst->IsPhi() || inst->IsCatchPhi()) {
176 return;
177 }
178 if (!IsAccRead(inst)) {
179 return;
180 }
181 if (inst->IsCall()) { // we never need lda for calls
182 return;
183 }
184 compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
185 inst->SetSrcReg(AccReadIndex(inst), reg);
186 }
187
188 /**
189 * Determine the accumulator usage between instructions.
190 * Eliminate unnecessary register allocations by applying
191 * a special value (ACC_REG_ID) to the destination and
192 * source registers.
193 * This special value is a marker for the code generator
194 * not to produce lda/sta instructions.
195 */
RunImpl()196 bool RegAccAlloc::RunImpl()
197 {
198 GetGraph()->InitDefaultLocations();
199 // Initialize all source register of all instructions.
200 for (auto block : GetGraph()->GetBlocksRPO()) {
201 for (auto inst : block->Insts()) {
202 if (inst->IsSaveState() || inst->IsCatchPhi()) {
203 continue;
204 }
205 if (inst->IsConst()) {
206 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
207 }
208 for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
209 inst->SetSrcReg(i, compiler::INVALID_REG);
210 if (inst->IsConst()) {
211 inst->SetDstReg(compiler::INVALID_REG);
212 }
213 }
214 }
215 }
216
217 // Mark Phi instructions if they can be optimized for acc.
218 for (auto block : GetGraph()->GetBlocksRPO()) {
219 for (auto phi : block->PhiInsts()) {
220 if (IsPhiAccReady(phi)) {
221 phi->SetMarker(acc_marker_);
222 }
223 }
224 }
225
226 // Mark instructions if they can be optimized for acc.
227 for (auto block : GetGraph()->GetBlocksRPO()) {
228 for (auto inst : block->AllInsts()) {
229 if (inst->NoDest() || !IsAccWrite(inst)) {
230 continue;
231 }
232
233 bool use_acc_dst_reg = true;
234
235 for (auto &user : inst->GetUsers()) {
236 compiler::Inst *uinst = user.GetInst();
237 if (uinst->IsSaveState()) {
238 continue;
239 }
240 if (CanUserReadAcc(inst, uinst)) {
241 SetNeedLda(uinst, false);
242 } else {
243 use_acc_dst_reg = false;
244 }
245 }
246
247 if (use_acc_dst_reg) {
248 inst->SetDstReg(compiler::ACC_REG_ID);
249 } else if (inst->IsConst()) {
250 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
251 for (auto &user : inst->GetUsers()) {
252 compiler::Inst *uinst = user.GetInst();
253 if (uinst->IsSaveState()) {
254 continue;
255 }
256 SetNeedLda(uinst, true);
257 }
258 }
259 }
260
261 for (auto inst : block->Insts()) {
262 if (inst->GetInputsCount() == 0) {
263 continue;
264 }
265
266 if (inst->IsCall()) {
267 continue;
268 }
269
270 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
271
272 if (IsAccWriteBetween(input, inst)) {
273 input->SetDstReg(compiler::INVALID_REG);
274 SetNeedLda(inst, true);
275
276 if (input->IsConst()) {
277 input->ClearFlag(compiler::inst_flags::ACC_WRITE);
278 for (auto &user : input->GetUsers()) {
279 compiler::Inst *uinst = user.GetInst();
280 SetNeedLda(uinst, true);
281 }
282 }
283 }
284 }
285 }
286
287 #ifndef NDEBUG
288 GetGraph()->SetRegAccAllocApplied();
289 #endif // NDEBUG
290
291 return true;
292 }
293
294 } // namespace panda::bytecodeopt
295