1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20
21 namespace panda::bytecodeopt {
22
23 /**
24 * Decide if accumulator register gets dirty between two instructions.
25 */
IsAccWriteBetween(compiler::Inst * src_inst,compiler::Inst * dst_inst)26 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
27 {
28 ASSERT(src_inst != dst_inst);
29
30 compiler::BasicBlock *block = src_inst->GetBasicBlock();
31 compiler::Inst *inst = src_inst->GetNext();
32
33 while (inst != dst_inst) {
34 if (UNLIKELY(inst == nullptr)) {
35 do {
36 // TODO(rtakacs): visit all the successors to get information about the
37 // accumulator usage. Only linear flow is supported right now.
38 if (block->GetSuccsBlocks().size() > 1) {
39 return true;
40 }
41
42 ASSERT(block->GetSuccsBlocks().size() == 1);
43 block = block->GetSuccessor(0);
44 // TODO(rtakacs): only linear flow is supported right now.
45 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
46 return true;
47 }
48 } while (block->IsEmpty() && !block->HasPhi());
49
50 // Get first phi instruction if exist.
51 // This is requred if dst_inst is a phi node.
52 inst = *(block->AllInsts());
53 } else {
54 if (inst->IsAccWrite()) {
55 return true;
56 }
57
58 if (inst->IsAccRead()) {
59 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
60
61 if (input->GetDstReg() != compiler::ACC_REG_ID) {
62 return true;
63 }
64 }
65
66 inst = inst->GetNext();
67 }
68 }
69
70 return false;
71 }
72
73 /**
74 * Return true if Phi instruction is marked as optimizable.
75 */
IsPhiOptimizable(compiler::Inst * phi) const76 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
77 {
78 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
79 return phi->IsMarked(acc_marker_);
80 }
81
82 /**
83 * Return true if instruction can read the accumulator.
84 */
IsAccRead(compiler::Inst * inst) const85 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
86 {
87 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
88 }
89
90 /**
91 * Return true if instruction can write the accumulator.
92 */
IsAccWrite(compiler::Inst * inst) const93 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
94 {
95 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
96 }
97
98 /**
99 * Decide if user can use accumulator as source.
100 * Do modifications on the order of inputs if necessary.
101 *
102 * Return true, if user can be optimized.
103 */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const104 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
105 {
106 if (user->IsPhi()) {
107 return IsPhiOptimizable(user);
108 }
109
110 if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
111 return false;
112 }
113
114 bool found = false;
115 // Check if the instrucion occures more times as input.
116 // v2. SUB v0, v1
117 // v3. Add v2, v2
118 for (auto input : user->GetInputs()) {
119 compiler::Inst *uinput = input.GetInst();
120
121 if (uinput != inst) {
122 continue;
123 }
124
125 if (!found) {
126 found = true;
127 } else {
128 return false;
129 }
130 }
131
132 if (user->IsCall()) {
133 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1); // +1 for SaveState
134 }
135
136 return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
137 }
138
139 /**
140 * Check if all the Phi inputs and outputs can use the accumulator register.
141 *
142 * Return true, if Phi can be optimized.
143 */
IsPhiAccReady(compiler::Inst * phi) const144 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
145 {
146 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
147
148 // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
149 // These cases are not optimized for accumulator.
150 for (auto input : phi->GetInputs()) {
151 compiler::Inst *phi_input = input.GetInst();
152
153 if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
154 return false;
155 }
156 }
157
158 for (auto &user : phi->GetUsers()) {
159 compiler::Inst *uinst = user.GetInst();
160
161 if (!CanUserReadAcc(phi, uinst)) {
162 return false;
163 }
164 }
165 return true;
166 }
167
168 /**
169 * For most insts we can use their src_reg on the acc-read position
170 * to characterise whether we need lda in the codegen pass.
171 */
SetNeedLda(compiler::Inst * inst,bool need)172 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
173 {
174 if (inst->IsPhi() || inst->IsCatchPhi()) {
175 return;
176 }
177 if (!IsAccRead(inst)) {
178 return;
179 }
180 if (inst->IsCall()) { // we never need lda for calls
181 return;
182 }
183 compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
184 inst->SetSrcReg(AccReadIndex(inst), reg);
185 }
186
187 /**
188 * Determine the accumulator usage between instructions.
189 * Eliminate unnecessary register allocations by applying
190 * a special value (ACC_REG_ID) to the destination and
191 * source registers.
192 * This special value is a marker for the code generator
193 * not to produce lda/sta instructions.
194 */
RunImpl()195 bool RegAccAlloc::RunImpl()
196 {
197 GetGraph()->InitDefaultLocations();
198 // Initialize all source register of all instructions.
199 for (auto block : GetGraph()->GetBlocksRPO()) {
200 for (auto inst : block->Insts()) {
201 if (inst->IsSaveState() || inst->IsCatchPhi()) {
202 continue;
203 }
204 if (inst->IsConst()) {
205 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
206 }
207 for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
208 inst->SetSrcReg(i, compiler::INVALID_REG);
209 if (inst->IsConst()) {
210 inst->SetDstReg(compiler::INVALID_REG);
211 }
212 }
213 }
214 }
215
216 // Mark Phi instructions if they can be optimized for acc.
217 for (auto block : GetGraph()->GetBlocksRPO()) {
218 for (auto phi : block->PhiInsts()) {
219 if (IsPhiAccReady(phi)) {
220 phi->SetMarker(acc_marker_);
221 }
222 }
223 }
224
225 // Mark instructions if they can be optimized for acc.
226 for (auto block : GetGraph()->GetBlocksRPO()) {
227 for (auto inst : block->AllInsts()) {
228 if (inst->NoDest() || !IsAccWrite(inst)) {
229 continue;
230 }
231
232 bool use_acc_dst_reg = true;
233
234 for (auto &user : inst->GetUsers()) {
235 compiler::Inst *uinst = user.GetInst();
236 if (uinst->IsSaveState()) {
237 continue;
238 }
239 if (CanUserReadAcc(inst, uinst)) {
240 SetNeedLda(uinst, false);
241 } else {
242 use_acc_dst_reg = false;
243 }
244 }
245
246 if (use_acc_dst_reg) {
247 inst->SetDstReg(compiler::ACC_REG_ID);
248 } else if (inst->IsConst()) {
249 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
250 for (auto &user : inst->GetUsers()) {
251 compiler::Inst *uinst = user.GetInst();
252 if (uinst->IsSaveState()) {
253 continue;
254 }
255 SetNeedLda(uinst, true);
256 }
257 }
258 }
259
260 for (auto inst : block->Insts()) {
261 if (inst->GetInputsCount() == 0) {
262 continue;
263 }
264
265 if (inst->IsCall()) {
266 continue;
267 }
268
269 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
270
271 if (IsAccWriteBetween(input, inst)) {
272 input->SetDstReg(compiler::INVALID_REG);
273 SetNeedLda(inst, true);
274
275 if (input->IsConst()) {
276 input->ClearFlag(compiler::inst_flags::ACC_WRITE);
277 for (auto &user : input->GetUsers()) {
278 compiler::Inst *uinst = user.GetInst();
279 SetNeedLda(uinst, true);
280 }
281 }
282 }
283 }
284 }
285
286 #ifndef NDEBUG
287 GetGraph()->SetRegAccAllocApplied();
288 #endif // NDEBUG
289
290 return true;
291 }
292
293 } // namespace panda::bytecodeopt
294