1 /*
2 * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20
21 namespace ark::bytecodeopt {
22
23 // If false returns also BasicBlock pointer in which search stopped
IsAccWriteBetweenBlocks(compiler::BasicBlock * block,compiler::Inst * dstInst)24 static std::pair<bool, compiler::BasicBlock *> IsAccWriteBetweenBlocks(compiler::BasicBlock *block,
25 compiler::Inst *dstInst)
26 {
27 do {
28 // NOTE(rtakacs): visit all the successors to get information about the
29 // accumulator usage. Only linear flow is supported right now.
30 if (block->GetSuccsBlocks().size() > 1U) {
31 return {true, nullptr};
32 }
33
34 ASSERT(block->GetSuccsBlocks().size() == 1U);
35 block = block->GetSuccessor(0U);
36 // NOTE(rtakacs): only linear flow is supported right now.
37 if (!dstInst->IsPhi() && block->GetPredsBlocks().size() > 1U) {
38 return {true, nullptr};
39 }
40 } while (block->IsEmpty() && !block->HasPhi());
41 return {false, block};
42 }
43
IsAccWriteInInst(compiler::Inst * inst)44 static bool IsAccWriteInInst(compiler::Inst *inst)
45 {
46 if (!inst->IsAccWrite()) {
47 return false;
48 }
49 if (inst->GetBasicBlock()->GetGraph()->IsAbcKit()) {
50 return true;
51 }
52 if (!inst->IsConst()) {
53 return true;
54 }
55 if (inst->GetDstReg() == compiler::GetAccReg()) {
56 return true;
57 }
58 return false;
59 }
60
61 // Check that acc_read instruction reads from input allocated to vreg
IsAccReadFromReg(compiler::Inst * srcInst,compiler::Inst * inst)62 static bool IsAccReadFromReg(compiler::Inst *srcInst, compiler::Inst *inst)
63 {
64 if (!inst->IsAccRead()) {
65 return false;
66 }
67 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
68 return (input != srcInst && input->GetDstReg() != compiler::GetAccReg());
69 }
70
71 /// Decide if accumulator register gets dirty between two instructions.
IsAccWriteBetween(compiler::Inst * srcInst,compiler::Inst * dstInst)72 bool IsAccWriteBetween(compiler::Inst *srcInst, compiler::Inst *dstInst)
73 {
74 ASSERT(srcInst != dstInst);
75
76 compiler::BasicBlock *block = srcInst->GetBasicBlock();
77 compiler::Inst *inst = srcInst->GetNext();
78
79 while (inst != dstInst) {
80 if (UNLIKELY(inst == nullptr)) {
81 bool accWrite;
82 std::tie(accWrite, block) = IsAccWriteBetweenBlocks(block, dstInst);
83 if (accWrite) {
84 // fast exit if we know the answer
85 return true;
86 }
87 // Get first phi instruction if exist.
88 // This is required if dstInst is a phi node.
89 inst = *(block->AllInsts());
90 } else {
91 if (IsAccWriteInInst(inst)) {
92 return true;
93 }
94
95 if (IsAccReadFromReg(srcInst, inst)) {
96 return true;
97 }
98
99 inst = inst->GetNext();
100 }
101 }
102
103 return false;
104 }
105 /// Return true if Phi instruction is marked as optimizable.
IsPhiOptimizable(compiler::Inst * phi) const106 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
107 {
108 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
109 return phi->IsMarked(accMarker_);
110 }
111
112 /// Return true if instruction can read the accumulator.
IsAccRead(compiler::Inst * inst) const113 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
114 {
115 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
116 }
117
IsCommutative(compiler::Inst * inst)118 static bool IsCommutative(compiler::Inst *inst)
119 {
120 if (inst->GetOpcode() == compiler::Opcode::If) {
121 auto cc = inst->CastToIf()->GetCc();
122 return cc == compiler::ConditionCode::CC_EQ || cc == compiler::ConditionCode::CC_NE;
123 }
124 return inst->IsCommutative();
125 }
126
UserNeedSwapInputs(compiler::Inst * inst,compiler::Inst * user)127 bool UserNeedSwapInputs(compiler::Inst *inst, compiler::Inst *user)
128 {
129 if (!IsCommutative(user)) {
130 return false;
131 }
132 return user->GetInput(AccReadIndex(user)).GetInst() != inst;
133 }
134
135 /// Return true if instruction can write the accumulator.
IsAccWrite(compiler::Inst * inst) const136 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
137 {
138 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
139 }
140
CanIntrinsicReadAcc(compiler::IntrinsicInst * inst) const141 bool RegAccAlloc::CanIntrinsicReadAcc(compiler::IntrinsicInst *inst) const
142 {
143 ASSERT(GetGraph()->IsAbcKit());
144 return inst->IsAccRead();
145 }
146 /**
147 * Decide if user can use accumulator as source.
148 * Do modifications on the order of inputs if necessary.
149 *
150 * Return true, if user can be optimized.
151 */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const152 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
153 {
154 if (user->IsPhi()) {
155 return IsPhiOptimizable(user);
156 }
157
158 if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
159 return false;
160 }
161
162 bool found = false;
163 // Check if the instrucion occures more times as input.
164 // v2. SUB v0, v1
165 // v3. Add v2, v2
166 for (auto input : user->GetInputs()) {
167 compiler::Inst *uinput = input.GetInst();
168
169 if (uinput != inst) {
170 continue;
171 }
172
173 if (!found) {
174 found = true;
175 } else {
176 return false;
177 }
178 }
179
180 for (auto &input : user->GetInputs()) {
181 auto inputInst = input.GetInst();
182 if (inputInst != user && inputInst->GetDstReg() == compiler::GetAccReg()) {
183 return false;
184 }
185 if (inst->IsPhi() && inputInst->IsConst() &&
186 (inputInst->GetBasicBlock()->GetId() != user->GetBasicBlock()->GetId())) {
187 return false;
188 }
189 }
190
191 if (user->IsLaunchCall()) {
192 return false;
193 }
194
195 if (GetGraph()->IsAbcKit()) {
196 if (user->IsIntrinsic()) {
197 return CanIntrinsicReadAcc(user->CastToIntrinsic()) && user->GetInput(AccReadIndex(user)).GetInst() == inst;
198 }
199 if (user->IsCall()) {
200 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS);
201 }
202 }
203
204 if (user->IsCallOrIntrinsic()) {
205 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1U); // +1 for SaveState
206 }
207
208 return user->GetInput(AccReadIndex(user)).GetInst() == inst || IsCommutative(user);
209 }
210
211 /**
212 * Check if all the Phi inputs and outputs can use the accumulator register.
213 *
214 * Return true, if Phi can be optimized.
215 */
IsPhiAccReady(compiler::Inst * phi) const216 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
217 {
218 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
219
220 // NOTE(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
221 // These cases are not optimized for accumulator.
222 for (auto input : phi->GetInputs()) {
223 compiler::Inst *phiInput = input.GetInst();
224
225 if (!IsAccWrite(phiInput) || IsAccWriteBetween(phiInput, phi)) {
226 return false;
227 }
228 }
229
230 std::unordered_set<compiler::Inst *> usersThatRequiredSwapInputs;
231 for (auto &user : phi->GetUsers()) {
232 compiler::Inst *uinst = user.GetInst();
233
234 if (!CanUserReadAcc(phi, uinst)) {
235 return false;
236 }
237 if (UserNeedSwapInputs(phi, uinst)) {
238 usersThatRequiredSwapInputs.insert(uinst);
239 }
240 }
241 for (auto uinst : usersThatRequiredSwapInputs) {
242 uinst->SwapInputs();
243 }
244
245 return true;
246 }
247
248 /**
249 * For most insts we can use their src_reg on the acc-read position
250 * to characterise whether we need lda in the codegen pass.
251 */
SetNeedLda(compiler::Inst * inst,bool need)252 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
253 {
254 if (inst->IsPhi() || inst->IsCatchPhi()) {
255 return;
256 }
257 if (!IsAccRead(inst)) {
258 return;
259 }
260 if (IsCall(inst)) {
261 return;
262 }
263
264 compiler::Register reg = need ? compiler::GetInvalidReg() : compiler::GetAccReg();
265 inst->SetSrcReg(AccReadIndex(inst), reg);
266 }
267
MaybeRegDst(compiler::Inst * inst)268 static inline bool MaybeRegDst(compiler::Inst *inst)
269 {
270 compiler::Opcode opcode = inst->GetOpcode();
271 #ifdef ENABLE_LIBABCKIT
272 if (inst->GetBasicBlock()->GetGraph()->IsAbcKit() && inst->IsIntrinsic()) {
273 auto id = inst->CastToIntrinsic()->GetIntrinsicId();
274 return id == ark::compiler::RuntimeInterface::IntrinsicId::INTRINSIC_ABCKIT_LOAD_OBJECT ||
275 id == ark::compiler::RuntimeInterface::IntrinsicId::INTRINSIC_ABCKIT_LOAD_OBJECT_WIDE ||
276 id == ark::compiler::RuntimeInterface::IntrinsicId::INTRINSIC_ABCKIT_LOAD_OBJECT_OBJECT;
277 }
278 #endif
279 return inst->IsConst() || inst->IsBinaryInst() || inst->IsBinaryImmInst() || opcode == compiler::Opcode::LoadObject;
280 }
281
InitRegistersForInst(compiler::Inst * inst)282 static void InitRegistersForInst(compiler::Inst *inst)
283 {
284 if (inst->IsSaveState() || inst->IsCatchPhi()) {
285 return;
286 }
287 if (inst->IsConst()) {
288 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
289 }
290 for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
291 inst->SetSrcReg(i, compiler::GetInvalidReg());
292 if (MaybeRegDst(inst)) {
293 inst->SetDstReg(compiler::GetInvalidReg());
294 }
295 }
296 }
297
InitRegisters(compiler::Graph * graph)298 static void InitRegisters(compiler::Graph *graph)
299 {
300 for (auto block : graph->GetBlocksRPO()) {
301 for (auto inst : block->Insts()) {
302 InitRegistersForInst(inst);
303 }
304 }
305 }
306
HasUnsupportedOpcode(compiler::Graph * graph)307 static bool HasUnsupportedOpcode(compiler::Graph *graph)
308 {
309 if (graph->IsDynamicMethod()) {
310 return false;
311 }
312 for (auto block : graph->GetBlocksRPO()) {
313 for (auto inst : block->AllInsts()) {
314 if (inst->GetOpcode() == compiler::Opcode::Builtin) {
315 return true;
316 }
317 }
318 }
319 return false;
320 }
321
MarkPhiInstructions() const322 void RegAccAlloc::MarkPhiInstructions() const
323 {
324 for (auto block : GetGraph()->GetBlocksRPO()) {
325 for (auto phi : block->PhiInsts()) {
326 if (IsPhiAccReady(phi)) {
327 phi->SetMarker(accMarker_);
328 }
329 }
330 }
331 }
332
ClearAccForInstAndUsers(compiler::Inst * inst)333 void RegAccAlloc::ClearAccForInstAndUsers(compiler::Inst *inst)
334 {
335 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
336 for (auto &user : inst->GetUsers()) {
337 compiler::Inst *uinst = user.GetInst();
338 if (uinst->IsSaveState()) {
339 continue;
340 }
341 SetNeedLda(uinst, true);
342 }
343 }
344
MarkInstruction(compiler::Inst * inst)345 void RegAccAlloc::MarkInstruction(compiler::Inst *inst)
346 {
347 if (inst->NoDest() || !IsAccWrite(inst)) {
348 return;
349 }
350
351 bool useAccDstReg = true;
352
353 std::unordered_set<compiler::Inst *> usersThatRequiredSwapInputs;
354 for (auto &user : inst->GetUsers()) {
355 compiler::Inst *uinst = user.GetInst();
356 if (uinst->IsSaveState()) {
357 continue;
358 }
359 if (CanUserReadAcc(inst, uinst)) {
360 if (UserNeedSwapInputs(inst, uinst)) {
361 usersThatRequiredSwapInputs.insert(uinst);
362 }
363 SetNeedLda(uinst, false);
364 } else {
365 useAccDstReg = false;
366 }
367 }
368 for (auto uinst : usersThatRequiredSwapInputs) {
369 uinst->SwapInputs();
370 }
371
372 if (useAccDstReg) {
373 inst->SetDstReg(compiler::GetAccReg());
374 } else if (MaybeRegDst(inst)) {
375 ClearAccForInstAndUsers(inst);
376 }
377 }
378
MarkInstructions()379 void RegAccAlloc::MarkInstructions()
380 {
381 for (auto block : GetGraph()->GetBlocksRPO()) {
382 for (auto inst : block->AllInsts()) {
383 MarkInstruction(inst);
384 }
385
386 // Check if acc is written between inst and its intput
387 for (auto inst : block->Insts()) {
388 if (inst->GetInputsCount() == 0U) {
389 continue;
390 }
391 if (IsCall(inst)) {
392 continue;
393 }
394
395 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
396
397 if (!IsAccWriteBetween(input, inst)) {
398 continue;
399 }
400 input->SetDstReg(compiler::GetInvalidReg());
401 SetNeedLda(inst, true);
402
403 if (MaybeRegDst(input)) {
404 ClearAccForInstAndUsers(input);
405 }
406 }
407 }
408 }
409
410 /**
411 * Determine the accumulator usage between instructions.
412 * Eliminate unnecessary register allocations by applying
413 * a special value (ACC_REG_ID) to the destination and
414 * source registers.
415 * This special value is a marker for the code generator
416 * not to produce lda/sta instructions.
417 */
RunImpl()418 bool RegAccAlloc::RunImpl()
419 {
420 GetGraph()->InitDefaultLocations();
421 InitRegisters(GetGraph());
422
423 if (HasUnsupportedOpcode(GetGraph())) {
424 return false;
425 }
426
427 MarkPhiInstructions();
428 MarkInstructions();
429
430 #ifdef COMPILER_DEBUG_CHECKS
431 GetGraph()->SetRegAccAllocApplied();
432 #endif // COMPILER_DEBUG_CHECKS
433
434 return true;
435 }
436
437 } // namespace ark::bytecodeopt
438