• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 
20 namespace panda::bytecodeopt {
21 
22 /**
23  * Decide if accumulator register gets dirty between two instructions.
24  */
IsAccWriteBetween(compiler::Inst * src_inst,compiler::Inst * dst_inst)25 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
26 {
27     ASSERT(src_inst != dst_inst);
28 
29     compiler::BasicBlock *block = src_inst->GetBasicBlock();
30     compiler::Inst *inst = src_inst->GetNext();
31 
32     while (inst != dst_inst) {
33         if (UNLIKELY(inst == nullptr)) {
34             do {
35                 // TODO(rtakacs): visit all the successors to get information about the
36                 // accumulator usage. Only linear flow is supported right now.
37                 if (block->GetSuccsBlocks().size() > 1) {
38                     return true;
39                 }
40 
41                 ASSERT(block->GetSuccsBlocks().size() == 1);
42                 block = block->GetSuccessor(0);
43                 // TODO(rtakacs): only linear flow is supported right now.
44                 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
45                     return true;
46                 }
47             } while (block->IsEmpty() && !block->HasPhi());
48 
49             // Get first phi instruction if exist.
50             // This is requred if dst_inst is a phi node.
51             inst = *(block->AllInsts());
52         } else {
53             if (inst->IsAccWrite()) {
54                 return true;
55             }
56 
57             if (inst->IsAccRead()) {
58                 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
59 
60                 if (input->GetDstReg() != compiler::ACC_REG_ID) {
61                     return true;
62                 }
63             }
64 
65             inst = inst->GetNext();
66         }
67     }
68 
69     return false;
70 }
71 
72 /**
73  * Return true if Phi instruction is marked as optimizable.
74  */
IsPhiOptimizable(compiler::Inst * phi) const75 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
76 {
77     ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
78     return phi->IsMarked(acc_marker_);
79 }
80 
81 /**
82  * Return true if instruction can read the accumulator.
83  */
IsAccRead(compiler::Inst * inst) const84 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
85 {
86     return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
87 }
88 
89 /**
90  * Return true if instruction can write the accumulator.
91  */
IsAccWrite(compiler::Inst * inst) const92 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
93 {
94     return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
95 }
96 
97 /**
98  * Decide if user can use accumulator as source.
99  * Do modifications on the order of inputs if necessary.
100  *
101  * Return true, if user can be optimized.
102  */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const103 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
104 {
105     if (user->IsPhi()) {
106         return IsPhiOptimizable(user);
107     }
108 
109     if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
110         return false;
111     }
112 
113     bool found = false;
114     // Check if the instrucion occures more times as input.
115     // v2. SUB v0, v1
116     // v3. Add v2, v2
117     for (auto input : user->GetInputs()) {
118         compiler::Inst *uinput = input.GetInst();
119 
120         if (uinput != inst) {
121             continue;
122         }
123 
124         if (!found) {
125             found = true;
126         } else {
127             return false;
128         }
129     }
130 
131     if (user->IsCall()) {
132         return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1);  // +1 for SaveState
133     }
134 
135     return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
136 }
137 
138 /**
139  * Check if all the Phi inputs and outputs can use the accumulator register.
140  *
141  * Return true, if Phi can be optimized.
142  */
IsPhiAccReady(compiler::Inst * phi) const143 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
144 {
145     ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
146 
147     // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
148     // These cases are not optimized for accumulator.
149     for (auto input : phi->GetInputs()) {
150         compiler::Inst *phi_input = input.GetInst();
151 
152         if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
153             return false;
154         }
155     }
156 
157     for (auto &user : phi->GetUsers()) {
158         compiler::Inst *uinst = user.GetInst();
159 
160         if (!CanUserReadAcc(phi, uinst)) {
161             return false;
162         }
163     }
164     return true;
165 }
166 
167 /**
168  * For most insts we can use their src_reg on the acc-read position
169  * to characterise whether we need lda in the codegen pass.
170  */
SetNeedLda(compiler::Inst * inst,bool need)171 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
172 {
173     if (inst->IsPhi() || inst->IsCatchPhi()) {
174         return;
175     }
176     if (!IsAccRead(inst)) {
177         return;
178     }
179     if (inst->IsCall()) {  // we never need lda for calls
180         return;
181     }
182     compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
183     inst->SetSrcReg(AccReadIndex(inst), reg);
184 }
185 
InitializeSourceRegisters()186 void RegAccAlloc::InitializeSourceRegisters()
187 {
188     for (auto block : GetGraph()->GetBlocksRPO()) {
189         for (auto inst : block->Insts()) {
190             if (inst->IsSaveState() || inst->IsCatchPhi()) {
191                 continue;
192             }
193             if (inst->IsConst()) {
194                 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
195             }
196             for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
197                 inst->SetSrcReg(i, compiler::INVALID_REG);
198             }
199             if (inst->IsConst()) {
200                 inst->SetDstReg(compiler::INVALID_REG);
201             }
202         }
203     }
204 }
205 
MarkAccForPhiInstructions()206 void RegAccAlloc::MarkAccForPhiInstructions()
207 {
208     for (auto block : GetGraph()->GetBlocksRPO()) {
209         for (auto phi : block->PhiInsts()) {
210             if (IsPhiAccReady(phi)) {
211                 phi->SetMarker(acc_marker_);
212             }
213         }
214     }
215 }
216 
MarkAccForInstructions(compiler::BasicBlock * block)217 void RegAccAlloc::MarkAccForInstructions(compiler::BasicBlock *block)
218 {
219     for (auto inst : block->AllInsts()) {
220         if (inst->NoDest() || !IsAccWrite(inst)) {
221             continue;
222         }
223 
224         bool use_acc_dst_reg = true;
225 
226         for (auto &user : inst->GetUsers()) {
227             compiler::Inst *uinst = user.GetInst();
228             if (uinst->IsSaveState()) {
229                 continue;
230             }
231             if (CanUserReadAcc(inst, uinst)) {
232                 SetNeedLda(uinst, false);
233             } else {
234                 use_acc_dst_reg = false;
235             }
236         }
237 
238         if (use_acc_dst_reg) {
239             inst->SetDstReg(compiler::ACC_REG_ID);
240             continue;
241         }
242 
243         if (!inst->IsConst()) {
244             continue;
245         }
246 
247         inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
248 
249         for (auto &user : inst->GetUsers()) {
250             compiler::Inst *uinst = user.GetInst();
251 
252             if (uinst->IsSaveState()) {
253                 continue;
254             }
255 
256             SetNeedLda(uinst, true);
257         }
258     }
259 }
260 
UpdateInstructionsAfterMark(compiler::BasicBlock * block)261 void RegAccAlloc::UpdateInstructionsAfterMark(compiler::BasicBlock *block)
262 {
263     for (auto inst : block->Insts()) {
264         if (inst->GetInputsCount() == 0) {
265             continue;
266         }
267 
268         if (inst->IsCall()) {
269             continue;
270         }
271 
272         compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
273         if (!IsAccWriteBetween(input, inst)) {
274             continue;
275         }
276 
277         input->SetDstReg(compiler::INVALID_REG);
278         SetNeedLda(inst, true);
279 
280         if (input->IsConst()) {
281             input->ClearFlag(compiler::inst_flags::ACC_WRITE);
282             for (auto &user : input->GetUsers()) {
283                 compiler::Inst *uinst = user.GetInst();
284                 SetNeedLda(uinst, true);
285             }
286         }
287     }
288 }
289 
290 /**
291  * Determine the accumulator usage between instructions.
292  * Eliminate unnecessary register allocations by applying
293  * a special value (ACC_REG_ID) to the destination and
294  * source registers.
295  * This special value is a marker for the code generator
296  * not to produce lda/sta instructions.
297  */
RunImpl()298 bool RegAccAlloc::RunImpl()
299 {
300     GetGraph()->InitDefaultLocations();
301     // Initialize all source register of all instructions.
302     InitializeSourceRegisters();
303     // Mark Phi instructions if they can be optimized for acc.
304     MarkAccForPhiInstructions();
305     // Mark instructions if they can be optimized for acc.
306     for (auto block : GetGraph()->GetBlocksRPO()) {
307         MarkAccForInstructions(block);
308         UpdateInstructionsAfterMark(block);
309     }
310 
311 #ifndef NDEBUG
312     GetGraph()->SetRegAccAllocApplied();
313 #endif  // NDEBUG
314 
315     return true;
316 }
317 
318 }  // namespace panda::bytecodeopt
319