1 /*
2 * Copyright (c) 2023-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20
21 namespace ark::bytecodeopt {
22
23 // If false returns also BasicBlock pointer in which search stopped
IsAccWriteBetweenBlocks(compiler::BasicBlock * block,compiler::Inst * dstInst)24 static std::pair<bool, compiler::BasicBlock *> IsAccWriteBetweenBlocks(compiler::BasicBlock *block,
25 compiler::Inst *dstInst)
26 {
27 do {
28 // NOTE(rtakacs): visit all the successors to get information about the
29 // accumulator usage. Only linear flow is supported right now.
30 if (block->GetSuccsBlocks().size() > 1U) {
31 return {true, nullptr};
32 }
33
34 ASSERT(block->GetSuccsBlocks().size() == 1U);
35 block = block->GetSuccessor(0U);
36 // NOTE(rtakacs): only linear flow is supported right now.
37 if (!dstInst->IsPhi() && block->GetPredsBlocks().size() > 1U) {
38 return {true, nullptr};
39 }
40 } while (block->IsEmpty() && !block->HasPhi());
41 return {false, block};
42 }
43
IsAccWriteInInst(compiler::Inst * inst)44 static bool IsAccWriteInInst(compiler::Inst *inst)
45 {
46 if (!inst->IsAccWrite()) {
47 return false;
48 }
49 if (inst->GetBasicBlock()->GetGraph()->IsAbcKit()) {
50 return true;
51 }
52 if (!inst->IsConst()) {
53 return true;
54 }
55 if (inst->GetDstReg() == compiler::GetAccReg()) {
56 return true;
57 }
58 return false;
59 }
60
61 // Check that acc_read instruction reads from input allocated to vreg
IsAccReadFromReg(compiler::Inst * srcInst,compiler::Inst * inst)62 static bool IsAccReadFromReg(compiler::Inst *srcInst, compiler::Inst *inst)
63 {
64 if (!inst->IsAccRead()) {
65 return false;
66 }
67 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
68 return (input != srcInst && input->GetDstReg() != compiler::GetAccReg());
69 }
70
71 /// Decide if accumulator register gets dirty between two instructions.
IsAccWriteBetween(compiler::Inst * srcInst,compiler::Inst * dstInst)72 bool IsAccWriteBetween(compiler::Inst *srcInst, compiler::Inst *dstInst)
73 {
74 ASSERT(srcInst != dstInst);
75
76 compiler::BasicBlock *block = srcInst->GetBasicBlock();
77 compiler::Inst *inst = srcInst->GetNext();
78
79 while (inst != dstInst) {
80 if (UNLIKELY(inst == nullptr)) {
81 bool accWrite;
82 std::tie(accWrite, block) = IsAccWriteBetweenBlocks(block, dstInst);
83 if (accWrite) {
84 // fast exit if we know the answer
85 return true;
86 }
87 // Get first phi instruction if exist.
88 // This is required if dstInst is a phi node.
89 inst = *(block->AllInsts());
90 } else {
91 if (IsAccWriteInInst(inst)) {
92 return true;
93 }
94
95 if (IsAccReadFromReg(srcInst, inst)) {
96 return true;
97 }
98
99 inst = inst->GetNext();
100 }
101 }
102
103 return false;
104 }
105 /// Return true if Phi instruction is marked as optimizable.
IsPhiOptimizable(compiler::Inst * phi) const106 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
107 {
108 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
109 return phi->IsMarked(accMarker_);
110 }
111
112 /// Return true if instruction can read the accumulator.
IsAccRead(compiler::Inst * inst) const113 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
114 {
115 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
116 }
117
IsCommutative(compiler::Inst * inst)118 static bool IsCommutative(compiler::Inst *inst)
119 {
120 if (inst->GetOpcode() == compiler::Opcode::If) {
121 auto cc = inst->CastToIf()->GetCc();
122 return cc == compiler::ConditionCode::CC_EQ || cc == compiler::ConditionCode::CC_NE;
123 }
124 return inst->IsCommutative();
125 }
126
UserNeedSwapInputs(compiler::Inst * inst,compiler::Inst * user)127 bool UserNeedSwapInputs(compiler::Inst *inst, compiler::Inst *user)
128 {
129 if (!IsCommutative(user)) {
130 return false;
131 }
132 return user->GetInput(AccReadIndex(user)).GetInst() != inst;
133 }
134
135 /// Return true if instruction can write the accumulator.
IsAccWrite(compiler::Inst * inst) const136 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
137 {
138 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
139 }
140
CanIntrinsicReadAcc(compiler::IntrinsicInst * inst) const141 bool RegAccAlloc::CanIntrinsicReadAcc(compiler::IntrinsicInst *inst) const
142 {
143 ASSERT(GetGraph()->IsAbcKit());
144 return inst->IsAccRead();
145 }
146 /**
147 * Decide if user can use accumulator as source.
148 * Do modifications on the order of inputs if necessary.
149 *
150 * Return true, if user can be optimized.
151 */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const152 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
153 {
154 if (user->IsPhi()) {
155 return IsPhiOptimizable(user);
156 }
157
158 if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
159 return false;
160 }
161
162 bool found = false;
163 // Check if the instrucion occures more times as input.
164 // v2. SUB v0, v1
165 // v3. Add v2, v2
166 for (auto input : user->GetInputs()) {
167 compiler::Inst *uinput = input.GetInst();
168
169 if (uinput != inst) {
170 continue;
171 }
172
173 if (!found) {
174 found = true;
175 } else {
176 return false;
177 }
178 }
179
180 for (auto &input : user->GetInputs()) {
181 auto inputInst = input.GetInst();
182 if (inputInst != user && inputInst->GetDstReg() == compiler::GetAccReg()) {
183 return false;
184 }
185 if (inst->IsPhi() && inputInst->IsConst() &&
186 (inputInst->GetBasicBlock()->GetId() != user->GetBasicBlock()->GetId())) {
187 return false;
188 }
189 }
190
191 if (GetGraph()->IsAbcKit()) {
192 if (user->IsIntrinsic()) {
193 return CanIntrinsicReadAcc(user->CastToIntrinsic()) && user->GetInput(AccReadIndex(user)).GetInst() == inst;
194 }
195 if (user->IsCall()) {
196 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS);
197 }
198 }
199
200 if (user->IsCallOrIntrinsic()) {
201 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1U); // +1 for SaveState
202 }
203
204 return user->GetInput(AccReadIndex(user)).GetInst() == inst || IsCommutative(user);
205 }
206
207 /**
208 * Check if all the Phi inputs and outputs can use the accumulator register.
209 *
210 * Return true, if Phi can be optimized.
211 */
IsPhiAccReady(compiler::Inst * phi) const212 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
213 {
214 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
215
216 // NOTE(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
217 // These cases are not optimized for accumulator.
218 for (auto input : phi->GetInputs()) {
219 compiler::Inst *phiInput = input.GetInst();
220
221 if (!IsAccWrite(phiInput) || IsAccWriteBetween(phiInput, phi)) {
222 return false;
223 }
224 }
225
226 std::unordered_set<compiler::Inst *> usersThatRequiredSwapInputs;
227 for (auto &user : phi->GetUsers()) {
228 compiler::Inst *uinst = user.GetInst();
229
230 if (!CanUserReadAcc(phi, uinst)) {
231 return false;
232 }
233 if (UserNeedSwapInputs(phi, uinst)) {
234 usersThatRequiredSwapInputs.insert(uinst);
235 }
236 }
237 for (auto uinst : usersThatRequiredSwapInputs) {
238 uinst->SwapInputs();
239 }
240
241 return true;
242 }
243
244 /**
245 * For most insts we can use their src_reg on the acc-read position
246 * to characterise whether we need lda in the codegen pass.
247 */
SetNeedLda(compiler::Inst * inst,bool need)248 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
249 {
250 if (inst->IsPhi() || inst->IsCatchPhi()) {
251 return;
252 }
253 if (!IsAccRead(inst)) {
254 return;
255 }
256 if (IsCall(inst)) {
257 return;
258 }
259
260 compiler::Register reg = need ? compiler::GetInvalidReg() : compiler::GetAccReg();
261 inst->SetSrcReg(AccReadIndex(inst), reg);
262 }
263
MaybeRegDst(compiler::Inst * inst)264 static inline bool MaybeRegDst(compiler::Inst *inst)
265 {
266 compiler::Opcode opcode = inst->GetOpcode();
267 #ifdef ENABLE_LIBABCKIT
268 if (inst->GetBasicBlock()->GetGraph()->IsAbcKit() && inst->IsIntrinsic()) {
269 auto id = inst->CastToIntrinsic()->GetIntrinsicId();
270 return id == ark::compiler::RuntimeInterface::IntrinsicId::INTRINSIC_ABCKIT_LOAD_OBJECT ||
271 id == ark::compiler::RuntimeInterface::IntrinsicId::INTRINSIC_ABCKIT_LOAD_OBJECT_WIDE ||
272 id == ark::compiler::RuntimeInterface::IntrinsicId::INTRINSIC_ABCKIT_LOAD_OBJECT_OBJECT;
273 }
274 #endif
275 return inst->IsConst() || inst->IsBinaryInst() || inst->IsBinaryImmInst() || opcode == compiler::Opcode::LoadObject;
276 }
277
InitRegistersForInst(compiler::Inst * inst)278 static void InitRegistersForInst(compiler::Inst *inst)
279 {
280 if (inst->IsSaveState() || inst->IsCatchPhi()) {
281 return;
282 }
283 if (inst->IsConst()) {
284 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
285 }
286 for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
287 inst->SetSrcReg(i, compiler::GetInvalidReg());
288 if (MaybeRegDst(inst)) {
289 inst->SetDstReg(compiler::GetInvalidReg());
290 }
291 }
292 }
293
InitRegisters(compiler::Graph * graph)294 static void InitRegisters(compiler::Graph *graph)
295 {
296 for (auto block : graph->GetBlocksRPO()) {
297 for (auto inst : block->Insts()) {
298 InitRegistersForInst(inst);
299 }
300 }
301 }
302
HasUnsupportedOpcode(compiler::Graph * graph)303 static bool HasUnsupportedOpcode(compiler::Graph *graph)
304 {
305 if (graph->IsDynamicMethod()) {
306 return false;
307 }
308 for (auto block : graph->GetBlocksRPO()) {
309 for (auto inst : block->AllInsts()) {
310 if (inst->GetOpcode() == compiler::Opcode::Builtin) {
311 return true;
312 }
313 }
314 }
315 return false;
316 }
317
MarkPhiInstructions() const318 void RegAccAlloc::MarkPhiInstructions() const
319 {
320 for (auto block : GetGraph()->GetBlocksRPO()) {
321 for (auto phi : block->PhiInsts()) {
322 if (IsPhiAccReady(phi)) {
323 phi->SetMarker(accMarker_);
324 }
325 }
326 }
327 }
328
ClearAccForInstAndUsers(compiler::Inst * inst)329 void RegAccAlloc::ClearAccForInstAndUsers(compiler::Inst *inst)
330 {
331 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
332 for (auto &user : inst->GetUsers()) {
333 compiler::Inst *uinst = user.GetInst();
334 if (uinst->IsSaveState()) {
335 continue;
336 }
337 SetNeedLda(uinst, true);
338 }
339 }
340
MarkInstruction(compiler::Inst * inst)341 void RegAccAlloc::MarkInstruction(compiler::Inst *inst)
342 {
343 if (inst->NoDest() || !IsAccWrite(inst)) {
344 return;
345 }
346
347 bool useAccDstReg = true;
348
349 std::unordered_set<compiler::Inst *> usersThatRequiredSwapInputs;
350 for (auto &user : inst->GetUsers()) {
351 compiler::Inst *uinst = user.GetInst();
352 if (uinst->IsSaveState()) {
353 continue;
354 }
355 if (CanUserReadAcc(inst, uinst)) {
356 if (UserNeedSwapInputs(inst, uinst)) {
357 usersThatRequiredSwapInputs.insert(uinst);
358 }
359 SetNeedLda(uinst, false);
360 } else {
361 useAccDstReg = false;
362 }
363 }
364 for (auto uinst : usersThatRequiredSwapInputs) {
365 uinst->SwapInputs();
366 }
367
368 if (useAccDstReg) {
369 inst->SetDstReg(compiler::GetAccReg());
370 } else if (MaybeRegDst(inst)) {
371 ClearAccForInstAndUsers(inst);
372 }
373 }
374
MarkInstructions()375 void RegAccAlloc::MarkInstructions()
376 {
377 for (auto block : GetGraph()->GetBlocksRPO()) {
378 for (auto inst : block->AllInsts()) {
379 MarkInstruction(inst);
380 }
381
382 // Check if acc is written between inst and its intput
383 for (auto inst : block->Insts()) {
384 if (inst->GetInputsCount() == 0U) {
385 continue;
386 }
387 if (IsCall(inst)) {
388 continue;
389 }
390
391 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
392
393 if (!IsAccWriteBetween(input, inst)) {
394 continue;
395 }
396 input->SetDstReg(compiler::GetInvalidReg());
397 SetNeedLda(inst, true);
398
399 if (MaybeRegDst(input)) {
400 ClearAccForInstAndUsers(input);
401 }
402 }
403 }
404 }
405
406 /**
407 * Determine the accumulator usage between instructions.
408 * Eliminate unnecessary register allocations by applying
409 * a special value (ACC_REG_ID) to the destination and
410 * source registers.
411 * This special value is a marker for the code generator
412 * not to produce lda/sta instructions.
413 */
RunImpl()414 bool RegAccAlloc::RunImpl()
415 {
416 GetGraph()->InitDefaultLocations();
417 InitRegisters(GetGraph());
418
419 if (HasUnsupportedOpcode(GetGraph())) {
420 return false;
421 }
422
423 MarkPhiInstructions();
424 MarkInstructions();
425
426 #ifdef COMPILER_DEBUG_CHECKS
427 GetGraph()->SetRegAccAllocApplied();
428 #endif // COMPILER_DEBUG_CHECKS
429
430 return true;
431 }
432
433 } // namespace ark::bytecodeopt
434