• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "reg_encoder.h"
17 #include "common.h"
18 
19 namespace panda::bytecodeopt {
20 
IsIntrinsicRange(Inst * inst)21 static bool IsIntrinsicRange(Inst *inst)
22 {
23     if (inst->GetOpcode() != compiler::Opcode::Intrinsic) {
24         return false;
25     }
26 #ifdef ENABLE_BYTECODE_OPT
27     switch (inst->CastToIntrinsic()->GetIntrinsicId()) {
28         case compiler::RuntimeInterface::IntrinsicId::CALLRANGE_IMM8_IMM8_V8:
29         case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLRANGE_PREF_IMM16_V8:
30         case compiler::RuntimeInterface::IntrinsicId::CALLTHISRANGE_IMM8_IMM8_V8:
31         case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLTHISRANGE_PREF_IMM16_V8:
32         case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM8_IMM8_V8:
33         case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM16_IMM8_V8:
34         case compiler::RuntimeInterface::IntrinsicId::WIDE_NEWOBJRANGE_PREF_IMM16_V8:
35         case compiler::RuntimeInterface::IntrinsicId::SUPERCALLTHISRANGE_IMM8_IMM8_V8:
36         case compiler::RuntimeInterface::IntrinsicId::SUPERCALLARROWRANGE_IMM8_IMM8_V8:
37         case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLTHISRANGE_PREF_IMM16_V8:
38         case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLARROWRANGE_PREF_IMM16_V8:
39         case compiler::RuntimeInterface::IntrinsicId::CREATEOBJECTWITHEXCLUDEDKEYS_IMM8_V8_V8:
40         case compiler::RuntimeInterface::IntrinsicId::WIDE_CREATEOBJECTWITHEXCLUDEDKEYS_PREF_IMM16_V8_V8:
41             return true;
42         default:
43             return false;
44     }
45 #endif
46     return false;
47 }
48 
CanHoldRange(Inst * inst)49 static bool CanHoldRange(Inst *inst)
50 {
51     switch (inst->GetOpcode()) {
52         case compiler::Opcode::Intrinsic:
53             return IsIntrinsicRange(inst);
54         default:
55             return false;
56     }
57 }
58 
CalculateNumNeededRangeTemps(const compiler::Graph * graph)59 static compiler::Register CalculateNumNeededRangeTemps(const compiler::Graph *graph)
60 {
61     compiler::Register ret = 0;
62 
63     for (auto bb : graph->GetBlocksRPO()) {
64         for (const auto &inst : bb->AllInsts()) {
65             if (!CanHoldRange(inst)) {
66                 continue;
67             }
68             auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
69             if (ret < nargs) {
70                 if (nargs > MAX_NUM_NON_RANGE_ARGS || IsIntrinsicRange(inst)) {
71                     ret = nargs;
72                 }
73             }
74         }
75     }
76 
77     return ret;
78 }
79 
RunImpl()80 bool RegEncoder::RunImpl()
81 {
82     ASSERT(state_ == RegEncoderState::IDLE);
83 
84     num_max_range_input_ = CalculateNumNeededRangeTemps(GetGraph());
85 
86     state_ = RegEncoderState::RENUMBER_ARGS;
87     if (!RenumberArgRegs()) {
88         return false;
89     }
90 
91     state_ = RegEncoderState::RESERVE_TEMPS;
92     ASSERT(num_temps_ == 0);
93 
94     const auto num_regs = GetNumRegs();
95 
96     auto max_num_temps = num_temps_;
97     CalculateNumNeededTemps();
98 
99     while (max_num_temps != num_temps_) {
100         ASSERT(num_temps_ > max_num_temps);
101 
102         if (num_regs > compiler::VIRTUAL_FRAME_SIZE - num_temps_) {  // to avoid overflow
103             return false;                                            // no more free registers left in the frame
104         }
105 
106         auto delta = static_cast<compiler::Register>(num_temps_ - max_num_temps);
107         range_temps_start_ += delta;
108 
109         RenumberRegs(MIN_REGISTER_NUMBER, delta);
110 
111         max_num_temps = num_temps_;
112         CalculateNumNeededTemps();
113     }
114 
115     if (num_temps_ > 0 || num_max_range_input_ > 0) {
116         state_ = RegEncoderState::INSERT_SPILLS;
117         InsertSpills();
118 
119         auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
120         for (compiler::Register r = 0; r < num_regs; r++) {
121             usage_mask->at(num_regs + num_temps_ - r - 1) = usage_mask->at(num_regs - r - 1);
122         }
123         std::fill(usage_mask->begin(), usage_mask->begin() + num_temps_, true);
124     }
125 
126     SaveNumLocalsToGraph(GetNumLocalsFromGraph() + num_temps_);
127     state_ = RegEncoderState::IDLE;
128 
129     return true;
130 }
131 
GetRegType(panda::compiler::DataType::Type type)132 static panda::compiler::DataType::Type GetRegType(panda::compiler::DataType::Type type)
133 {
134     if (type == panda::compiler::DataType::REFERENCE) {
135         return type;
136     }
137     if (panda::compiler::DataType::Is32Bits(type, Arch::NONE)) {
138         return panda::compiler::DataType::UINT32;
139     }
140     return panda::compiler::DataType::UINT64;
141 }
142 
RegNeedsRenumbering(panda::compiler::Register r)143 static bool RegNeedsRenumbering(panda::compiler::Register r)
144 {
145     return r != panda::compiler::ACC_REG_ID && r != panda::compiler::INVALID_REG;
146 }
147 
RenumberReg(const panda::compiler::Register r,const panda::compiler::Register delta)148 static panda::compiler::Register RenumberReg(const panda::compiler::Register r, const panda::compiler::Register delta)
149 {
150     if (r == panda::compiler::ACC_REG_ID) {
151         return r;
152     }
153     return r + delta;
154 }
155 
RenumberSpillFillRegs(panda::compiler::SpillFillInst * inst,const panda::compiler::Register min_reg,const panda::compiler::Register delta)156 static void RenumberSpillFillRegs(panda::compiler::SpillFillInst *inst, const panda::compiler::Register min_reg,
157                                   const panda::compiler::Register delta)
158 {
159     for (auto &sf : inst->GetSpillFills()) {
160         if (sf.SrcType() == compiler::LocationType::REGISTER && sf.SrcValue() >= min_reg) {
161             sf.SetSrc(compiler::Location::MakeRegister(RenumberReg(sf.SrcValue(), delta)));
162         }
163         if (sf.DstType() == compiler::LocationType::REGISTER && sf.DstValue() >= min_reg) {
164             sf.SetDst(compiler::Location::MakeRegister(RenumberReg(sf.DstValue(), delta)));
165         }
166     }
167 }
168 
RenumberRegs(const compiler::Register min_reg,const compiler::Register delta)169 void RegEncoder::RenumberRegs(const compiler::Register min_reg, const compiler::Register delta)
170 {
171     // Renumbering always advances register number `delta` positions forward,
172     // wrapping around on overflows with well-defined behavour.
173     // Hence the requirement to keep delta unsigned.
174     static_assert(std::is_unsigned<compiler::Register>::value, "compiler::Register must be unsigned");
175     ASSERT(delta > 0);
176 
177     for (auto *bb : GetGraph()->GetBlocksRPO()) {
178         for (auto inst : bb->AllInsts()) {
179             // Renumber output of any instruction, if applicable:
180             if (RegNeedsRenumbering(inst->GetDstReg()) && inst->GetDstReg() >= min_reg) {
181                 inst->SetDstReg(RenumberReg(inst->GetDstReg(), delta));
182             }
183 
184             if (inst->IsPhi() || inst->IsCatchPhi()) {
185                 continue;
186             }
187 
188             // Renumber inputs and outputs of SpillFill instructions:
189             if (inst->IsSpillFill()) {
190                 RenumberSpillFillRegs(inst->CastToSpillFill(), min_reg, delta);
191                 continue;
192             }
193 
194             // Fix inputs of common instructions:
195             for (size_t i = 0; i < inst->GetInputsCount(); i++) {
196                 if (RegNeedsRenumbering(inst->GetSrcReg(i)) && inst->GetSrcReg(i) >= min_reg) {
197                     inst->SetSrcReg(i, RenumberReg(inst->GetSrcReg(i), delta));
198                 }
199             }
200         }
201     }
202 }
203 
RenumberArgRegs()204 bool RegEncoder::RenumberArgRegs()
205 {
206     const auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
207     ASSERT(usage_mask->size() == compiler::VIRTUAL_FRAME_SIZE);
208 
209     auto frame_size = static_cast<compiler::Register>(usage_mask->size());
210     const auto num_args = GetNumArgsFromGraph();
211     ASSERT(frame_size >= num_args);
212 
213     auto num_non_args = static_cast<compiler::Register>(frame_size - num_args);
214     if (num_max_range_input_ > num_non_args) {
215         LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
216         return false;
217     }
218 
219     compiler::Register num_locals = 0;
220     if (num_non_args != 0) {
221         while (num_locals != num_non_args && usage_mask->at(num_locals)) {
222             ++num_locals;
223         }
224     }
225 
226     compiler::Register num_temps = 0;
227     if (num_locals != num_non_args) {
228         compiler::Register r = num_non_args - 1;
229         while (r < num_non_args && usage_mask->at(r)) {
230             ++num_temps;
231             --r;
232         }
233     }
234 
235     if (num_locals + num_temps > num_non_args - num_max_range_input_) {
236         LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
237         return false;
238     }
239 
240     range_temps_start_ = num_locals;
241 
242     bool do_renumber = true;
243 
244     if (num_non_args == 0 && num_max_range_input_ == 0) {  // all registers are arguments: no need to renumber
245         do_renumber = false;
246     }
247 
248     // All free regs will be just enough to encode call.rang: no need to renumber
249     if (num_locals + num_temps + num_max_range_input_ == num_non_args) {
250         do_renumber = false;
251     }
252 
253     if (num_temps + num_args == 0) {  // no temps and no args: nothing to renumber
254         do_renumber = false;
255     }
256 
257     if (do_renumber) {
258         const auto min_reg = static_cast<compiler::Register>(num_non_args - num_temps);
259         ASSERT(min_reg > MIN_REGISTER_NUMBER);
260 
261         // Assert that if temps are present, they are marked allocated in the mask:
262         for (compiler::Register r = min_reg; r < min_reg + num_temps; r++) {
263             ASSERT(usage_mask->at(r));
264         }
265 
266         // Assert that there are no used regs between locals and temps + arguments:
267         for (compiler::Register r = num_locals; r < min_reg; r++) {
268             ASSERT(!usage_mask->at(r));
269         }
270 
271         auto delta = static_cast<compiler::Register>(num_locals + num_temps + num_max_range_input_ - num_non_args);
272         RenumberRegs(min_reg, delta);
273 
274         for (compiler::Register r = min_reg; r < frame_size; r++) {
275             usage_mask->at(RenumberReg(r, delta)) = usage_mask->at(r);
276             usage_mask->at(r) = false;
277         }
278     }
279 
280     SaveNumLocalsToGraph(num_locals + num_temps + num_max_range_input_);
281     return true;
282 }
283 
InsertSpills()284 void RegEncoder::InsertSpills()
285 {
286     ASSERT(num_max_range_input_ > 0 || (num_temps_ > 0 && num_temps_ <= MAX_NUM_INPUTS));
287 
288     for (auto *bb : GetGraph()->GetBlocksRPO()) {
289         for (auto inst : bb->AllInstsSafe()) {
290             if (inst->GetInputsCount() == 0) {
291                 continue;
292             }
293 
294             VisitInstruction(inst);
295             // TODO(aantipina): Enable assert here for GetStatus() as soon code generation is fully supported
296         }
297     }
298 }
299 
CalculateNumNeededTemps()300 void RegEncoder::CalculateNumNeededTemps()
301 {
302     num_temps_ = 0;
303 
304     for (auto bb : GetGraph()->GetBlocksRPO()) {
305         for (auto inst : bb->AllInstsSafe()) {
306             if (inst->GetInputsCount() == 0) {
307                 continue;
308             }
309 
310             VisitInstruction(inst);
311             // TODO(aantipina): Enable here for GetStatus() as soon code generation is fully supported
312         }
313     }
314 
315     LOG(DEBUG, BYTECODE_OPTIMIZER) << GetGraph()->GetRuntime()->GetMethodFullName(GetGraph()->GetMethod())
316                                    << ": num_temps_ = " << std::to_string(num_temps_);
317 }
318 
319 template <typename T>
AddMoveBefore(Inst * inst,const T & sp_container)320 static void AddMoveBefore(Inst *inst, const T &sp_container)
321 {
322     if (sp_container.empty()) {
323         return;
324     }
325     auto sf_inst = inst->GetBasicBlock()->GetGraph()->CreateInstSpillFill();
326     for (auto const &[src, dst] : sp_container) {
327         ASSERT(src != compiler::ACC_REG_ID);
328         sf_inst->AddMove(src, dst.reg, GetRegType(dst.type));
329         LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: Move v" << static_cast<int>(dst.reg) << " <- v"
330                                        << static_cast<int>(src) << " was added";
331     }
332     inst->GetBasicBlock()->InsertBefore(sf_inst, inst);
333 }
334 
IsAccReadPosition(compiler::Inst * inst,size_t pos)335 static bool IsAccReadPosition(compiler::Inst *inst, size_t pos)
336 {
337     // Calls can have accumulator at any position, return false for them
338     return !inst->IsCall() && inst->IsAccRead() && pos == AccReadIndex(inst);
339 }
340 
InsertSpillsForDynInputsInst(compiler::Inst * inst)341 void RegEncoder::InsertSpillsForDynInputsInst(compiler::Inst *inst)
342 {
343     ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
344     ASSERT(inst->IsIntrinsic());
345 
346     RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter());  // src -> (dst, src_type), non-callrange
347     RegContentVec spill_vec(GetGraph()->GetLocalAllocator()->Adapter());  // spill_vec is used to handle callrange
348 
349     auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
350     size_t start = 0;
351     bool range = IsIntrinsicRange(inst) || (nargs - start > MAX_NUM_NON_RANGE_ARGS && CanHoldRange(inst));
352 
353     compiler::Register temp = range ? range_temps_start_ : 0;
354 
355     for (size_t i = start; i < nargs; ++i) {
356         auto src_reg = inst->GetSrcReg(i);
357         auto type = inst->GetInputType(i);
358 
359         // do not spillfill for acc-read position. For example, Intrinsic.FSTARR32
360         if (IsAccReadPosition(inst, i)) {
361             continue;
362         }
363 
364         if (!range) {
365             if (!RegNeedsRenumbering(src_reg) || src_reg < NUM_COMPACTLY_ENCODED_REGS) {
366                 continue;
367             }
368 
369             auto res = spill_map.emplace(src_reg, RegContent(temp, type));
370             if (res.second) {
371                 inst->SetSrcReg(i, temp++);
372             } else {
373                 // Such register is already in map.
374                 // It can be ok for cases like: CallStatic v49, v49
375                 // Such instructions can be generated by optimizer too.
376                 const RegContent &reg_cont = res.first->second;
377                 inst->SetSrcReg(i, reg_cont.reg);
378             }
379         } else {
380             spill_vec.emplace_back(src_reg, RegContent(temp, type));
381             inst->SetSrcReg(i, temp++);
382         }
383     }
384 
385     AddMoveBefore(inst, spill_map);
386     AddMoveBefore(inst, spill_vec);
387 }
388 
InsertSpillsForInst(compiler::Inst * inst)389 void RegEncoder::InsertSpillsForInst(compiler::Inst *inst)
390 {
391     ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
392 
393     RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter());  // src -> (dst, src_type)
394 
395     if (inst->IsOperandsDynamic()) {
396         InsertSpillsForDynInputsInst(inst);
397         return;
398     }
399 
400     compiler::Register temp = 0;
401     for (size_t i = 0; i < inst->GetInputsCount(); i++) {
402         auto reg = inst->GetSrcReg(i);
403         if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
404             auto res = spill_map.emplace(reg, RegContent(temp, GetRegType(inst->GetInputType(i))));
405             if (res.second) {
406                 inst->SetSrcReg(i, temp++);
407             } else {
408                 // Such register is already in map.
409                 // It can be ok for cases like: and v49, v49
410                 // Such instructions can be generated by optimizer too.
411                 const RegContent &reg_cont = res.first->second;
412                 inst->SetSrcReg(i, reg_cont.reg);
413             }
414         }
415     }
416 
417     AddMoveBefore(inst, spill_map);
418 }
419 
IncTempsIfNeeded(const compiler::Register reg,compiler::Register & num_temps)420 static void IncTempsIfNeeded(const compiler::Register reg, compiler::Register &num_temps)
421 {
422     if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
423         num_temps++;
424     }
425 }
426 
CalculateNumNeededTempsForInst(compiler::Inst * inst)427 void RegEncoder::CalculateNumNeededTempsForInst(compiler::Inst *inst)
428 {
429     ASSERT(state_ == RegEncoderState::RESERVE_TEMPS);
430 
431     compiler::Register num_temps = 0;
432 
433     if (inst->IsOperandsDynamic()) {
434         if (IsIntrinsicRange(inst)) {
435             return;
436         }
437         ASSERT(inst->IsIntrinsic());
438 
439         auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
440         size_t start = 0;
441 
442         if (nargs - start > MAX_NUM_NON_RANGE_ARGS) {  // is call.range
443             return;
444         }
445 
446         for (size_t i = start; i < nargs; i++) {
447             if (IsAccReadPosition(inst, i)) {
448                 continue;
449             }
450             auto reg = inst->GetSrcReg(i);
451             if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
452                 num_temps++;
453             }
454         }
455     } else {
456         for (size_t i = 0; i < inst->GetInputsCount(); i++) {
457             IncTempsIfNeeded(inst->GetSrcReg(i), num_temps);
458         }
459     }
460 
461     ASSERT(num_temps <= MAX_NUM_INPUTS);
462 
463     num_temps_ = std::max(num_temps, num_temps_);
464 }
465 
Check4Width(compiler::Inst * inst)466 void RegEncoder::Check4Width(compiler::Inst *inst)
467 {
468     switch (state_) {
469         case RegEncoderState::RESERVE_TEMPS: {
470             CalculateNumNeededTempsForInst(inst);
471             break;
472         }
473         case RegEncoderState::INSERT_SPILLS: {
474             InsertSpillsForInst(inst);
475             break;
476         }
477         default:
478             UNREACHABLE();
479     }
480 }
481 
Check8Width(compiler::Inst * inst)482 void RegEncoder::Check8Width([[maybe_unused]] compiler::Inst *inst)
483 {
484     // TODO(aantipina): implement after it became possible to use register numbers more than 256 (#2697)
485 }
486 
VisitIntrinsic(GraphVisitor * visitor,Inst * inst)487 void RegEncoder::VisitIntrinsic(GraphVisitor *visitor, Inst *inst)
488 {
489     auto re = static_cast<RegEncoder *>(visitor);
490     if (IsIntrinsicRange(inst)) {
491         re->Check4Width(inst->CastToIntrinsic());
492         return;
493     }
494 
495     re->Check8Width(inst->CastToIntrinsic());
496 }
497 
498 #include "generated/check_width.cpp"
499 }  // namespace panda::bytecodeopt
500