• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "reg_encoder.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 
20 namespace panda::bytecodeopt {
21 
IsIntrinsicRange(Inst * inst)22 static bool IsIntrinsicRange(Inst *inst)
23 {
24     if (inst->GetOpcode() != compiler::Opcode::Intrinsic) {
25         return false;
26     }
27 #ifdef ENABLE_BYTECODE_OPT
28     switch (inst->CastToIntrinsic()->GetIntrinsicId()) {
29         case compiler::RuntimeInterface::IntrinsicId::CALLRANGE_IMM8_IMM8_V8:
30         case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLRANGE_PREF_IMM16_V8:
31         case compiler::RuntimeInterface::IntrinsicId::CALLTHISRANGE_IMM8_IMM8_V8:
32         case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLTHISRANGE_PREF_IMM16_V8:
33         case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM8_IMM8_V8:
34         case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM16_IMM8_V8:
35         case compiler::RuntimeInterface::IntrinsicId::WIDE_NEWOBJRANGE_PREF_IMM16_V8:
36         case compiler::RuntimeInterface::IntrinsicId::SUPERCALLTHISRANGE_IMM8_IMM8_V8:
37         case compiler::RuntimeInterface::IntrinsicId::SUPERCALLARROWRANGE_IMM8_IMM8_V8:
38         case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLTHISRANGE_PREF_IMM16_V8:
39         case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLARROWRANGE_PREF_IMM16_V8:
40         case compiler::RuntimeInterface::IntrinsicId::CREATEOBJECTWITHEXCLUDEDKEYS_IMM8_V8_V8:
41         case compiler::RuntimeInterface::IntrinsicId::WIDE_CREATEOBJECTWITHEXCLUDEDKEYS_PREF_IMM16_V8_V8:
42             return true;
43         default:
44             return false;
45     }
46 #endif
47     return false;
48 }
49 
CanHoldRange(Inst * inst)50 static bool CanHoldRange(Inst *inst)
51 {
52     switch (inst->GetOpcode()) {
53         case compiler::Opcode::Intrinsic:
54             return IsIntrinsicRange(inst);
55         default:
56             return false;
57     }
58 }
59 
CalculateNumNeededRangeTemps(const compiler::Graph * graph)60 static compiler::Register CalculateNumNeededRangeTemps(const compiler::Graph *graph)
61 {
62     compiler::Register ret = 0;
63 
64     for (auto bb : graph->GetBlocksRPO()) {
65         for (const auto &inst : bb->AllInsts()) {
66             if (!CanHoldRange(inst)) {
67                 continue;
68             }
69             auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
70             if (ret < nargs) {
71                 if (nargs > MAX_NUM_NON_RANGE_ARGS || IsIntrinsicRange(inst)) {
72                     ret = nargs;
73                 }
74             }
75         }
76     }
77 
78     return ret;
79 }
80 
RunImpl()81 bool RegEncoder::RunImpl()
82 {
83     ASSERT(state_ == RegEncoderState::IDLE);
84 
85     num_max_range_input_ = CalculateNumNeededRangeTemps(GetGraph());
86 
87     state_ = RegEncoderState::RENUMBER_ARGS;
88     if (!RenumberArgRegs()) {
89         return false;
90     }
91 
92     state_ = RegEncoderState::RESERVE_TEMPS;
93     ASSERT(num_temps_ == 0);
94 
95     const auto num_regs = GetNumRegs();
96 
97     auto max_num_temps = num_temps_;
98     CalculateNumNeededTemps();
99 
100     while (max_num_temps != num_temps_) {
101         ASSERT(num_temps_ > max_num_temps);
102 
103         if (num_regs > compiler::VIRTUAL_FRAME_SIZE - num_temps_) {  // to avoid overflow
104             return false;                                            // no more free registers left in the frame
105         }
106 
107         auto delta = static_cast<compiler::Register>(num_temps_ - max_num_temps);
108         range_temps_start_ += delta;
109 
110         RenumberRegs(MIN_REGISTER_NUMBER, delta);
111 
112         max_num_temps = num_temps_;
113         CalculateNumNeededTemps();
114     }
115 
116     if (num_temps_ > 0 || num_max_range_input_ > 0) {
117         state_ = RegEncoderState::INSERT_SPILLS;
118         InsertSpills();
119 
120         auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
121         for (compiler::Register r = 0; r < num_regs; r++) {
122             usage_mask->at(num_regs + num_temps_ - r - 1) = usage_mask->at(num_regs - r - 1);
123         }
124         std::fill(usage_mask->begin(), usage_mask->begin() + num_temps_, true);
125     }
126 
127     SaveNumLocalsToGraph(GetNumLocalsFromGraph() + num_temps_);
128     state_ = RegEncoderState::IDLE;
129 
130     return true;
131 }
132 
GetRegType(panda::compiler::DataType::Type type)133 static panda::compiler::DataType::Type GetRegType(panda::compiler::DataType::Type type)
134 {
135     if (type == panda::compiler::DataType::REFERENCE) {
136         return type;
137     }
138     if (panda::compiler::DataType::Is32Bits(type, Arch::NONE)) {
139         return panda::compiler::DataType::UINT32;
140     }
141     return panda::compiler::DataType::UINT64;
142 }
143 
RegNeedsRenumbering(panda::compiler::Register r)144 static bool RegNeedsRenumbering(panda::compiler::Register r)
145 {
146     return r != panda::compiler::ACC_REG_ID && r != panda::compiler::INVALID_REG;
147 }
148 
RenumberReg(const panda::compiler::Register r,const panda::compiler::Register delta)149 static panda::compiler::Register RenumberReg(const panda::compiler::Register r, const panda::compiler::Register delta)
150 {
151     if (r == panda::compiler::ACC_REG_ID) {
152         return r;
153     }
154     return r + delta;
155 }
156 
RenumberSpillFillRegs(panda::compiler::SpillFillInst * inst,const panda::compiler::Register min_reg,const panda::compiler::Register delta)157 static void RenumberSpillFillRegs(panda::compiler::SpillFillInst *inst, const panda::compiler::Register min_reg,
158                                   const panda::compiler::Register delta)
159 {
160     for (auto &sf : inst->GetSpillFills()) {
161         if (sf.SrcType() == compiler::LocationType::REGISTER && sf.SrcValue() >= min_reg) {
162             sf.SetSrc(compiler::Location::MakeRegister(RenumberReg(sf.SrcValue(), delta)));
163         }
164         if (sf.DstType() == compiler::LocationType::REGISTER && sf.DstValue() >= min_reg) {
165             sf.SetDst(compiler::Location::MakeRegister(RenumberReg(sf.DstValue(), delta)));
166         }
167     }
168 }
169 
RenumberRegs(const compiler::Register min_reg,const compiler::Register delta)170 void RegEncoder::RenumberRegs(const compiler::Register min_reg, const compiler::Register delta)
171 {
172     // Renumbering always advances register number `delta` positions forward,
173     // wrapping around on overflows with well-defined behavour.
174     // Hence the requirement to keep delta unsigned.
175     static_assert(std::is_unsigned<compiler::Register>::value, "compiler::Register must be unsigned");
176     ASSERT(delta > 0);
177 
178     for (auto *bb : GetGraph()->GetBlocksRPO()) {
179         for (auto inst : bb->AllInsts()) {
180             // Renumber output of any instruction, if applicable:
181             if (RegNeedsRenumbering(inst->GetDstReg()) && inst->GetDstReg() >= min_reg) {
182                 inst->SetDstReg(RenumberReg(inst->GetDstReg(), delta));
183             }
184 
185             if (inst->IsPhi() || inst->IsCatchPhi()) {
186                 continue;
187             }
188 
189             // Renumber inputs and outputs of SpillFill instructions:
190             if (inst->IsSpillFill()) {
191                 RenumberSpillFillRegs(inst->CastToSpillFill(), min_reg, delta);
192                 continue;
193             }
194 
195             // Fix inputs of common instructions:
196             for (size_t i = 0; i < inst->GetInputsCount(); i++) {
197                 if (RegNeedsRenumbering(inst->GetSrcReg(i)) && inst->GetSrcReg(i) >= min_reg) {
198                     inst->SetSrcReg(i, RenumberReg(inst->GetSrcReg(i), delta));
199                 }
200             }
201         }
202     }
203 }
204 
RenumberArgRegs()205 bool RegEncoder::RenumberArgRegs()
206 {
207     const auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
208     ASSERT(usage_mask->size() == compiler::VIRTUAL_FRAME_SIZE);
209 
210     auto frame_size = static_cast<compiler::Register>(usage_mask->size());
211     const auto num_args = GetNumArgsFromGraph();
212     ASSERT(frame_size >= num_args);
213 
214     auto num_non_args = static_cast<compiler::Register>(frame_size - num_args);
215     if (num_max_range_input_ > num_non_args) {
216         LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
217         return false;
218     }
219 
220     compiler::Register num_locals = 0;
221     if (num_non_args != 0) {
222         while (num_locals != num_non_args && usage_mask->at(num_locals)) {
223             ++num_locals;
224         }
225     }
226 
227     compiler::Register num_temps = 0;
228     if (num_locals != num_non_args) {
229         compiler::Register r = num_non_args - 1;
230         while (r < num_non_args && usage_mask->at(r)) {
231             ++num_temps;
232             --r;
233         }
234     }
235 
236     if (num_locals + num_temps > num_non_args - num_max_range_input_) {
237         LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
238         return false;
239     }
240 
241     range_temps_start_ = num_locals;
242 
243     bool do_renumber = true;
244 
245     if (num_non_args == 0 && num_max_range_input_ == 0) {  // all registers are arguments: no need to renumber
246         do_renumber = false;
247     }
248 
249     // All free regs will be just enough to encode call.rang: no need to renumber
250     if (num_locals + num_temps + num_max_range_input_ == num_non_args) {
251         do_renumber = false;
252     }
253 
254     if (num_temps + num_args == 0) {  // no temps and no args: nothing to renumber
255         do_renumber = false;
256     }
257 
258     if (do_renumber) {
259         const auto min_reg = static_cast<compiler::Register>(num_non_args - num_temps);
260         ASSERT(min_reg > MIN_REGISTER_NUMBER);
261 
262         // Assert that if temps are present, they are marked allocated in the mask:
263         for (compiler::Register r = min_reg; r < min_reg + num_temps; r++) {
264             ASSERT(usage_mask->at(r));
265         }
266 
267         // Assert that there are no used regs between locals and temps + arguments:
268         for (compiler::Register r = num_locals; r < min_reg; r++) {
269             ASSERT(!usage_mask->at(r));
270         }
271 
272         auto delta = static_cast<compiler::Register>(num_locals + num_temps + num_max_range_input_ - num_non_args);
273         RenumberRegs(min_reg, delta);
274 
275         for (compiler::Register r = min_reg; r < frame_size; r++) {
276             usage_mask->at(RenumberReg(r, delta)) = usage_mask->at(r);
277             usage_mask->at(r) = false;
278         }
279     }
280 
281     SaveNumLocalsToGraph(num_locals + num_temps + num_max_range_input_);
282     return true;
283 }
284 
InsertSpills()285 void RegEncoder::InsertSpills()
286 {
287     ASSERT(num_max_range_input_ > 0 || (num_temps_ > 0 && num_temps_ <= MAX_NUM_INPUTS));
288 
289     for (auto *bb : GetGraph()->GetBlocksRPO()) {
290         for (auto inst : bb->AllInstsSafe()) {
291             if (inst->GetInputsCount() == 0) {
292                 continue;
293             }
294 
295             VisitInstruction(inst);
296             // TODO(aantipina): Enable assert here for GetStatus() as soon code generation is fully supported
297         }
298     }
299 }
300 
CalculateNumNeededTemps()301 void RegEncoder::CalculateNumNeededTemps()
302 {
303     num_temps_ = 0;
304 
305     for (auto bb : GetGraph()->GetBlocksRPO()) {
306         for (auto inst : bb->AllInstsSafe()) {
307             if (inst->GetInputsCount() == 0) {
308                 continue;
309             }
310 
311             VisitInstruction(inst);
312             // TODO(aantipina): Enable here for GetStatus() as soon code generation is fully supported
313         }
314     }
315 
316     LOG(DEBUG, BYTECODE_OPTIMIZER) << GetGraph()->GetRuntime()->GetMethodFullName(GetGraph()->GetMethod())
317                                    << ": num_temps_ = " << std::to_string(num_temps_);
318 }
319 
320 template <typename T>
AddMoveBefore(Inst * inst,const T & sp_container)321 static void AddMoveBefore(Inst *inst, const T &sp_container)
322 {
323     if (sp_container.empty()) {
324         return;
325     }
326     auto sf_inst = inst->GetBasicBlock()->GetGraph()->CreateInstSpillFill();
327     for (auto const &[src, dst] : sp_container) {
328         ASSERT(src != compiler::ACC_REG_ID);
329         sf_inst->AddMove(src, dst.reg, GetRegType(dst.type));
330         LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: Move v" << static_cast<int>(dst.reg) << " <- v"
331                                        << static_cast<int>(src) << " was added";
332     }
333     inst->GetBasicBlock()->InsertBefore(sf_inst, inst);
334 }
335 
IsAccReadPosition(compiler::Inst * inst,size_t pos)336 static bool IsAccReadPosition(compiler::Inst *inst, size_t pos)
337 {
338     // Calls can have accumulator at any position, return false for them
339     return !inst->IsCall() && inst->IsAccRead() && pos == AccReadIndex(inst);
340 }
341 
InsertSpillsForDynInputsInst(compiler::Inst * inst)342 void RegEncoder::InsertSpillsForDynInputsInst(compiler::Inst *inst)
343 {
344     ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
345     ASSERT(inst->IsIntrinsic());
346 
347     RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter());  // src -> (dst, src_type), non-callrange
348     RegContentVec spill_vec(GetGraph()->GetLocalAllocator()->Adapter());  // spill_vec is used to handle callrange
349 
350     auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
351     size_t start = 0;
352     bool range = IsIntrinsicRange(inst) || (nargs - start > MAX_NUM_NON_RANGE_ARGS && CanHoldRange(inst));
353 
354     compiler::Register temp = range ? range_temps_start_ : 0;
355 
356     for (size_t i = start; i < nargs; ++i) {
357         auto src_reg = inst->GetSrcReg(i);
358         auto type = inst->GetInputType(i);
359 
360         // do not spillfill for acc-read position. For example, Intrinsic.FSTARR32
361         if (IsAccReadPosition(inst, i)) {
362             continue;
363         }
364 
365         if (!range) {
366             if (!RegNeedsRenumbering(src_reg) || src_reg < NUM_COMPACTLY_ENCODED_REGS) {
367                 continue;
368             }
369 
370             auto res = spill_map.emplace(src_reg, RegContent(temp, type));
371             if (res.second) {
372                 inst->SetSrcReg(i, temp++);
373             } else {
374                 // Such register is already in map.
375                 // It can be ok for cases like: CallStatic v49, v49
376                 // Such instructions can be generated by optimizer too.
377                 const RegContent &reg_cont = res.first->second;
378                 inst->SetSrcReg(i, reg_cont.reg);
379             }
380         } else {
381             spill_vec.emplace_back(src_reg, RegContent(temp, type));
382             inst->SetSrcReg(i, temp++);
383         }
384     }
385 
386     AddMoveBefore(inst, spill_map);
387     AddMoveBefore(inst, spill_vec);
388 }
389 
InsertSpillsForInst(compiler::Inst * inst)390 void RegEncoder::InsertSpillsForInst(compiler::Inst *inst)
391 {
392     ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
393 
394     RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter());  // src -> (dst, src_type)
395 
396     if (inst->IsOperandsDynamic()) {
397         InsertSpillsForDynInputsInst(inst);
398         return;
399     }
400 
401     compiler::Register temp = 0;
402     for (size_t i = 0; i < inst->GetInputsCount(); i++) {
403         auto reg = inst->GetSrcReg(i);
404         if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
405             auto res = spill_map.emplace(reg, RegContent(temp, GetRegType(inst->GetInputType(i))));
406             if (res.second) {
407                 inst->SetSrcReg(i, temp++);
408             } else {
409                 // Such register is already in map.
410                 // It can be ok for cases like: and v49, v49
411                 // Such instructions can be generated by optimizer too.
412                 const RegContent &reg_cont = res.first->second;
413                 inst->SetSrcReg(i, reg_cont.reg);
414             }
415         }
416     }
417 
418     AddMoveBefore(inst, spill_map);
419 }
420 
IncTempsIfNeeded(const compiler::Register reg,compiler::Register & num_temps)421 static void IncTempsIfNeeded(const compiler::Register reg, compiler::Register &num_temps)
422 {
423     if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
424         num_temps++;
425     }
426 }
427 
CalculateNumNeededTempsForInst(compiler::Inst * inst)428 void RegEncoder::CalculateNumNeededTempsForInst(compiler::Inst *inst)
429 {
430     ASSERT(state_ == RegEncoderState::RESERVE_TEMPS);
431 
432     compiler::Register num_temps = 0;
433 
434     if (inst->IsOperandsDynamic()) {
435         if (IsIntrinsicRange(inst)) {
436             return;
437         }
438         ASSERT(inst->IsIntrinsic());
439 
440         auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
441         size_t start = 0;
442 
443         if (nargs - start > MAX_NUM_NON_RANGE_ARGS) {  // is call.range
444             return;
445         }
446 
447         for (size_t i = start; i < nargs; i++) {
448             if (IsAccReadPosition(inst, i)) {
449                 continue;
450             }
451             auto reg = inst->GetSrcReg(i);
452             if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
453                 num_temps++;
454             }
455         }
456     } else {
457         for (size_t i = 0; i < inst->GetInputsCount(); i++) {
458             IncTempsIfNeeded(inst->GetSrcReg(i), num_temps);
459         }
460     }
461 
462     ASSERT(num_temps <= MAX_NUM_INPUTS);
463 
464     num_temps_ = std::max(num_temps, num_temps_);
465 }
466 
Check4Width(compiler::Inst * inst)467 void RegEncoder::Check4Width(compiler::Inst *inst)
468 {
469     switch (state_) {
470         case RegEncoderState::RESERVE_TEMPS: {
471             CalculateNumNeededTempsForInst(inst);
472             break;
473         }
474         case RegEncoderState::INSERT_SPILLS: {
475             InsertSpillsForInst(inst);
476             break;
477         }
478         default:
479             UNREACHABLE();
480     }
481 }
482 
Check8Width(compiler::Inst * inst)483 void RegEncoder::Check8Width([[maybe_unused]] compiler::Inst *inst)
484 {
485     // TODO(aantipina): implement after it became possible to use register numbers more than 256 (#2697)
486 }
487 
VisitIntrinsic(GraphVisitor * visitor,Inst * inst)488 void RegEncoder::VisitIntrinsic(GraphVisitor *visitor, Inst *inst)
489 {
490     auto re = static_cast<RegEncoder *>(visitor);
491     if (IsIntrinsicRange(inst)) {
492         re->Check4Width(inst->CastToIntrinsic());
493         return;
494     }
495 
496     re->Check8Width(inst->CastToIntrinsic());
497 }
498 
499 #include "generated/check_width.cpp"
500 }  // namespace panda::bytecodeopt
501