• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20 
21 namespace panda::bytecodeopt {
22 
23 /**
24  * Decide if accumulator register gets dirty between two instructions.
25  */
IsAccWriteBetween(compiler::Inst * src_inst,compiler::Inst * dst_inst)26 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
27 {
28     ASSERT(src_inst != dst_inst);
29 
30     compiler::BasicBlock *block = src_inst->GetBasicBlock();
31     compiler::Inst *inst = src_inst->GetNext();
32 
33     while (inst != dst_inst) {
34         if (UNLIKELY(inst == nullptr)) {
35             do {
36                 // TODO(rtakacs): visit all the successors to get information about the
37                 // accumulator usage. Only linear flow is supported right now.
38                 if (block->GetSuccsBlocks().size() > 1) {
39                     return true;
40                 }
41 
42                 ASSERT(block->GetSuccsBlocks().size() == 1);
43                 block = block->GetSuccessor(0);
44 
45                 // TODO(rtakacs): only linear flow is supported right now.
46                 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
47                     return true;
48                 }
49             } while (block->IsEmpty() && !block->HasPhi());
50 
51             // Get first phi instruction if exist.
52             // This is requred if dst_inst is a phi node.
53             inst = *(block->AllInsts());
54         } else {
55             if (inst->IsAccWrite()) {
56                 return true;
57             }
58 
59             if (inst->IsAccRead()) {
60                 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
61 
62                 if (input->GetDstReg() != compiler::ACC_REG_ID) {
63                     return true;
64                 }
65             }
66 
67             inst = inst->GetNext();
68         }
69     }
70 
71     return false;
72 }
73 
74 /**
75  * Return true if Phi instruction is marked as optimizable.
76  */
IsPhiOptimizable(compiler::Inst * phi) const77 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
78 {
79     ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
80     return phi->IsMarked(acc_marker_);
81 }
82 
83 /**
84  * Return true if instruction can read the accumulator.
85  */
IsAccRead(compiler::Inst * inst) const86 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
87 {
88     return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
89 }
90 
91 /**
92  * Return true if instruction can write the accumulator.
93  */
IsAccWrite(compiler::Inst * inst) const94 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
95 {
96     return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
97 }
98 
99 /**
100  * Decide if user can use accumulator as source.
101  * Do modifications on the order of inputs if necessary.
102  *
103  * Return true, if user can be optimized.
104  */
CanUserReadAcc(compiler::Inst * inst,compiler::Inst * user) const105 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
106 {
107     if (user->IsPhi()) {
108         return IsPhiOptimizable(user);
109     }
110 
111     if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
112         return false;
113     }
114 
115     bool found = false;
116     // Check if the instrucion occures more times as input.
117     // v2. SUB v0, v1
118     // v3. Add v2, v2
119     for (auto input : user->GetInputs()) {
120         compiler::Inst *uinput = input.GetInst();
121 
122         if (uinput != inst) {
123             continue;
124         }
125 
126         if (!found) {
127             found = true;
128         } else {
129             return false;
130         }
131     }
132 
133     if (user->IsCall()) {
134         return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1);  // +1 for SaveState
135     }
136 
137     return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
138 }
139 
140 /**
141  * Check if all the Phi inputs and outputs can use the accumulator register.
142  *
143  * Return true, if Phi can be optimized.
144  */
IsPhiAccReady(compiler::Inst * phi) const145 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
146 {
147     ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
148 
149     // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
150     // These cases are not optimized for accumulator.
151     for (auto input : phi->GetInputs()) {
152         compiler::Inst *phi_input = input.GetInst();
153 
154         if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
155             return false;
156         }
157     }
158 
159     for (auto &user : phi->GetUsers()) {
160         compiler::Inst *uinst = user.GetInst();
161 
162         if (!CanUserReadAcc(phi, uinst)) {
163             return false;
164         }
165     }
166     return true;
167 }
168 
169 /**
170  * For most insts we can use their src_reg on the acc-read position
171  * to characterise whether we need lda in the codegen pass.
172  */
SetNeedLda(compiler::Inst * inst,bool need)173 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
174 {
175     if (inst->IsPhi() || inst->IsCatchPhi()) {
176         return;
177     }
178     if (!IsAccRead(inst)) {
179         return;
180     }
181     if (inst->IsCall()) {  // we never need lda for calls
182         return;
183     }
184     compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
185     inst->SetSrcReg(AccReadIndex(inst), reg);
186 }
187 
188 /**
189  * Determine the accumulator usage between instructions.
190  * Eliminate unnecessary register allocations by applying
191  * a special value (ACC_REG_ID) to the destination and
192  * source registers.
193  * This special value is a marker for the code generator
194  * not to produce lda/sta instructions.
195  */
RunImpl()196 bool RegAccAlloc::RunImpl()
197 {
198     GetGraph()->InitDefaultLocations();
199     // Initialize all source register of all instructions.
200     for (auto block : GetGraph()->GetBlocksRPO()) {
201         for (auto inst : block->Insts()) {
202             if (inst->IsSaveState() || inst->IsCatchPhi()) {
203                 continue;
204             }
205             if (inst->IsConst()) {
206                 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
207             }
208             for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
209                 inst->SetSrcReg(i, compiler::INVALID_REG);
210                 if (inst->IsConst()) {
211                     inst->SetDstReg(compiler::INVALID_REG);
212                 }
213             }
214         }
215     }
216 
217     // Mark Phi instructions if they can be optimized for acc.
218     for (auto block : GetGraph()->GetBlocksRPO()) {
219         for (auto phi : block->PhiInsts()) {
220             if (IsPhiAccReady(phi)) {
221                 phi->SetMarker(acc_marker_);
222             }
223         }
224     }
225 
226     // Mark instructions if they can be optimized for acc.
227     for (auto block : GetGraph()->GetBlocksRPO()) {
228         for (auto inst : block->AllInsts()) {
229             if (inst->NoDest() || !IsAccWrite(inst)) {
230                 continue;
231             }
232 
233             bool use_acc_dst_reg = true;
234 
235             for (auto &user : inst->GetUsers()) {
236                 compiler::Inst *uinst = user.GetInst();
237                 if (uinst->IsSaveState()) {
238                     continue;
239                 }
240                 if (CanUserReadAcc(inst, uinst)) {
241                     SetNeedLda(uinst, false);
242                 } else {
243                     use_acc_dst_reg = false;
244                 }
245             }
246 
247             if (use_acc_dst_reg) {
248                 inst->SetDstReg(compiler::ACC_REG_ID);
249             } else if (inst->IsConst()) {
250                 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
251                 for (auto &user : inst->GetUsers()) {
252                     compiler::Inst *uinst = user.GetInst();
253                     if (uinst->IsSaveState()) {
254                         continue;
255                     }
256                     SetNeedLda(uinst, true);
257                 }
258             }
259         }
260 
261         for (auto inst : block->Insts()) {
262             if (inst->GetInputsCount() == 0) {
263                 continue;
264             }
265 
266             if (inst->IsCall()) {
267                 continue;
268             }
269 
270             compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
271 
272             if (IsAccWriteBetween(input, inst)) {
273                 input->SetDstReg(compiler::INVALID_REG);
274                 SetNeedLda(inst, true);
275 
276                 if (input->IsConst()) {
277                     input->ClearFlag(compiler::inst_flags::ACC_WRITE);
278                     for (auto &user : input->GetUsers()) {
279                         compiler::Inst *uinst = user.GetInst();
280                         SetNeedLda(uinst, true);
281                     }
282                 }
283             }
284         }
285     }
286 
287 #ifndef NDEBUG
288     GetGraph()->SetRegAccAllocApplied();
289 #endif  // NDEBUG
290 
291     return true;
292 }
293 
294 }  // namespace panda::bytecodeopt
295