1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_encoder.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19
20 namespace panda::bytecodeopt {
21
IsIntrinsicRange(Inst * inst)22 static bool IsIntrinsicRange(Inst *inst)
23 {
24 if (inst->GetOpcode() != compiler::Opcode::Intrinsic) {
25 return false;
26 }
27 #ifdef ENABLE_BYTECODE_OPT
28 switch (inst->CastToIntrinsic()->GetIntrinsicId()) {
29 case compiler::RuntimeInterface::IntrinsicId::CALLRANGE_IMM8_IMM8_V8:
30 case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLRANGE_PREF_IMM16_V8:
31 case compiler::RuntimeInterface::IntrinsicId::CALLTHISRANGE_IMM8_IMM8_V8:
32 case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLTHISRANGE_PREF_IMM16_V8:
33 case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM8_IMM8_V8:
34 case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM16_IMM8_V8:
35 case compiler::RuntimeInterface::IntrinsicId::WIDE_NEWOBJRANGE_PREF_IMM16_V8:
36 case compiler::RuntimeInterface::IntrinsicId::SUPERCALLTHISRANGE_IMM8_IMM8_V8:
37 case compiler::RuntimeInterface::IntrinsicId::SUPERCALLARROWRANGE_IMM8_IMM8_V8:
38 case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLTHISRANGE_PREF_IMM16_V8:
39 case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLARROWRANGE_PREF_IMM16_V8:
40 case compiler::RuntimeInterface::IntrinsicId::CREATEOBJECTWITHEXCLUDEDKEYS_IMM8_V8_V8:
41 case compiler::RuntimeInterface::IntrinsicId::WIDE_CREATEOBJECTWITHEXCLUDEDKEYS_PREF_IMM16_V8_V8:
42 return true;
43 default:
44 return false;
45 }
46 #endif
47 return false;
48 }
49
CanHoldRange(Inst * inst)50 static bool CanHoldRange(Inst *inst)
51 {
52 switch (inst->GetOpcode()) {
53 case compiler::Opcode::Intrinsic:
54 return IsIntrinsicRange(inst);
55 default:
56 return false;
57 }
58 }
59
CalculateNumNeededRangeTemps(const compiler::Graph * graph)60 static compiler::Register CalculateNumNeededRangeTemps(const compiler::Graph *graph)
61 {
62 compiler::Register ret = 0;
63
64 for (auto bb : graph->GetBlocksRPO()) {
65 for (const auto &inst : bb->AllInsts()) {
66 if (!CanHoldRange(inst)) {
67 continue;
68 }
69 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
70 if (ret < nargs) {
71 if (nargs > MAX_NUM_NON_RANGE_ARGS || IsIntrinsicRange(inst)) {
72 ret = nargs;
73 }
74 }
75 }
76 }
77
78 return ret;
79 }
80
RunImpl()81 bool RegEncoder::RunImpl()
82 {
83 ASSERT(state_ == RegEncoderState::IDLE);
84
85 num_max_range_input_ = CalculateNumNeededRangeTemps(GetGraph());
86
87 state_ = RegEncoderState::RENUMBER_ARGS;
88 if (!RenumberArgRegs()) {
89 return false;
90 }
91
92 state_ = RegEncoderState::RESERVE_TEMPS;
93 ASSERT(num_temps_ == 0);
94
95 const auto num_regs = GetNumRegs();
96
97 auto max_num_temps = num_temps_;
98 CalculateNumNeededTemps();
99
100 while (max_num_temps != num_temps_) {
101 ASSERT(num_temps_ > max_num_temps);
102
103 if (num_regs > compiler::VIRTUAL_FRAME_SIZE - num_temps_) { // to avoid overflow
104 return false; // no more free registers left in the frame
105 }
106
107 auto delta = static_cast<compiler::Register>(num_temps_ - max_num_temps);
108 range_temps_start_ += delta;
109
110 RenumberRegs(MIN_REGISTER_NUMBER, delta);
111
112 max_num_temps = num_temps_;
113 CalculateNumNeededTemps();
114 }
115
116 if (num_temps_ > 0 || num_max_range_input_ > 0) {
117 state_ = RegEncoderState::INSERT_SPILLS;
118 InsertSpills();
119
120 auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
121 for (compiler::Register r = 0; r < num_regs; r++) {
122 usage_mask->at(num_regs + num_temps_ - r - 1) = usage_mask->at(num_regs - r - 1);
123 }
124 std::fill(usage_mask->begin(), usage_mask->begin() + num_temps_, true);
125 }
126
127 SaveNumLocalsToGraph(GetNumLocalsFromGraph() + num_temps_);
128 state_ = RegEncoderState::IDLE;
129
130 return true;
131 }
132
GetRegType(panda::compiler::DataType::Type type)133 static panda::compiler::DataType::Type GetRegType(panda::compiler::DataType::Type type)
134 {
135 if (type == panda::compiler::DataType::REFERENCE) {
136 return type;
137 }
138 if (panda::compiler::DataType::Is32Bits(type, Arch::NONE)) {
139 return panda::compiler::DataType::UINT32;
140 }
141 return panda::compiler::DataType::UINT64;
142 }
143
RegNeedsRenumbering(panda::compiler::Register r)144 static bool RegNeedsRenumbering(panda::compiler::Register r)
145 {
146 return r != panda::compiler::ACC_REG_ID && r != panda::compiler::INVALID_REG;
147 }
148
RenumberReg(const panda::compiler::Register r,const panda::compiler::Register delta)149 static panda::compiler::Register RenumberReg(const panda::compiler::Register r, const panda::compiler::Register delta)
150 {
151 if (r == panda::compiler::ACC_REG_ID) {
152 return r;
153 }
154 return r + delta;
155 }
156
RenumberSpillFillRegs(panda::compiler::SpillFillInst * inst,const panda::compiler::Register min_reg,const panda::compiler::Register delta)157 static void RenumberSpillFillRegs(panda::compiler::SpillFillInst *inst, const panda::compiler::Register min_reg,
158 const panda::compiler::Register delta)
159 {
160 for (auto &sf : inst->GetSpillFills()) {
161 if (sf.SrcType() == compiler::LocationType::REGISTER && sf.SrcValue() >= min_reg) {
162 sf.SetSrc(compiler::Location::MakeRegister(RenumberReg(sf.SrcValue(), delta)));
163 }
164 if (sf.DstType() == compiler::LocationType::REGISTER && sf.DstValue() >= min_reg) {
165 sf.SetDst(compiler::Location::MakeRegister(RenumberReg(sf.DstValue(), delta)));
166 }
167 }
168 }
169
RenumberRegs(const compiler::Register min_reg,const compiler::Register delta)170 void RegEncoder::RenumberRegs(const compiler::Register min_reg, const compiler::Register delta)
171 {
172 // Renumbering always advances register number `delta` positions forward,
173 // wrapping around on overflows with well-defined behavour.
174 // Hence the requirement to keep delta unsigned.
175 static_assert(std::is_unsigned<compiler::Register>::value, "compiler::Register must be unsigned");
176 ASSERT(delta > 0);
177
178 for (auto *bb : GetGraph()->GetBlocksRPO()) {
179 for (auto inst : bb->AllInsts()) {
180 // Renumber output of any instruction, if applicable:
181 if (RegNeedsRenumbering(inst->GetDstReg()) && inst->GetDstReg() >= min_reg) {
182 inst->SetDstReg(RenumberReg(inst->GetDstReg(), delta));
183 }
184
185 if (inst->IsPhi() || inst->IsCatchPhi()) {
186 continue;
187 }
188
189 // Renumber inputs and outputs of SpillFill instructions:
190 if (inst->IsSpillFill()) {
191 RenumberSpillFillRegs(inst->CastToSpillFill(), min_reg, delta);
192 continue;
193 }
194
195 // Fix inputs of common instructions:
196 for (size_t i = 0; i < inst->GetInputsCount(); i++) {
197 if (RegNeedsRenumbering(inst->GetSrcReg(i)) && inst->GetSrcReg(i) >= min_reg) {
198 inst->SetSrcReg(i, RenumberReg(inst->GetSrcReg(i), delta));
199 }
200 }
201 }
202 }
203 }
204
RenumberArgRegs()205 bool RegEncoder::RenumberArgRegs()
206 {
207 const auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
208 ASSERT(usage_mask->size() == compiler::VIRTUAL_FRAME_SIZE);
209
210 auto frame_size = static_cast<compiler::Register>(usage_mask->size());
211 const auto num_args = GetNumArgsFromGraph();
212 ASSERT(frame_size >= num_args);
213
214 auto num_non_args = static_cast<compiler::Register>(frame_size - num_args);
215 if (num_max_range_input_ > num_non_args) {
216 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
217 return false;
218 }
219
220 compiler::Register num_locals = 0;
221 if (num_non_args != 0) {
222 while (num_locals != num_non_args && usage_mask->at(num_locals)) {
223 ++num_locals;
224 }
225 }
226
227 compiler::Register num_temps = 0;
228 if (num_locals != num_non_args) {
229 compiler::Register r = num_non_args - 1;
230 while (r < num_non_args && usage_mask->at(r)) {
231 ++num_temps;
232 --r;
233 }
234 }
235
236 if (num_locals + num_temps > num_non_args - num_max_range_input_) {
237 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
238 return false;
239 }
240
241 range_temps_start_ = num_locals;
242
243 bool do_renumber = true;
244
245 if (num_non_args == 0 && num_max_range_input_ == 0) { // all registers are arguments: no need to renumber
246 do_renumber = false;
247 }
248
249 // All free regs will be just enough to encode call.rang: no need to renumber
250 if (num_locals + num_temps + num_max_range_input_ == num_non_args) {
251 do_renumber = false;
252 }
253
254 if (num_temps + num_args == 0) { // no temps and no args: nothing to renumber
255 do_renumber = false;
256 }
257
258 if (do_renumber) {
259 const auto min_reg = static_cast<compiler::Register>(num_non_args - num_temps);
260 ASSERT(min_reg > MIN_REGISTER_NUMBER);
261
262 // Assert that if temps are present, they are marked allocated in the mask:
263 for (compiler::Register r = min_reg; r < min_reg + num_temps; r++) {
264 ASSERT(usage_mask->at(r));
265 }
266
267 // Assert that there are no used regs between locals and temps + arguments:
268 for (compiler::Register r = num_locals; r < min_reg; r++) {
269 ASSERT(!usage_mask->at(r));
270 }
271
272 auto delta = static_cast<compiler::Register>(num_locals + num_temps + num_max_range_input_ - num_non_args);
273 RenumberRegs(min_reg, delta);
274
275 for (compiler::Register r = min_reg; r < frame_size; r++) {
276 usage_mask->at(RenumberReg(r, delta)) = usage_mask->at(r);
277 usage_mask->at(r) = false;
278 }
279 }
280
281 SaveNumLocalsToGraph(num_locals + num_temps + num_max_range_input_);
282 return true;
283 }
284
InsertSpills()285 void RegEncoder::InsertSpills()
286 {
287 ASSERT(num_max_range_input_ > 0 || (num_temps_ > 0 && num_temps_ <= MAX_NUM_INPUTS));
288
289 for (auto *bb : GetGraph()->GetBlocksRPO()) {
290 for (auto inst : bb->AllInstsSafe()) {
291 if (inst->GetInputsCount() == 0) {
292 continue;
293 }
294
295 VisitInstruction(inst);
296 // TODO(aantipina): Enable assert here for GetStatus() as soon code generation is fully supported
297 }
298 }
299 }
300
CalculateNumNeededTemps()301 void RegEncoder::CalculateNumNeededTemps()
302 {
303 num_temps_ = 0;
304
305 for (auto bb : GetGraph()->GetBlocksRPO()) {
306 for (auto inst : bb->AllInstsSafe()) {
307 if (inst->GetInputsCount() == 0) {
308 continue;
309 }
310
311 VisitInstruction(inst);
312 // TODO(aantipina): Enable here for GetStatus() as soon code generation is fully supported
313 }
314 }
315
316 LOG(DEBUG, BYTECODE_OPTIMIZER) << GetGraph()->GetRuntime()->GetMethodFullName(GetGraph()->GetMethod())
317 << ": num_temps_ = " << std::to_string(num_temps_);
318 }
319
320 template <typename T>
AddMoveBefore(Inst * inst,const T & sp_container)321 static void AddMoveBefore(Inst *inst, const T &sp_container)
322 {
323 if (sp_container.empty()) {
324 return;
325 }
326 auto sf_inst = inst->GetBasicBlock()->GetGraph()->CreateInstSpillFill();
327 for (auto const &[src, dst] : sp_container) {
328 ASSERT(src != compiler::ACC_REG_ID);
329 sf_inst->AddMove(src, dst.reg, GetRegType(dst.type));
330 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: Move v" << static_cast<int>(dst.reg) << " <- v"
331 << static_cast<int>(src) << " was added";
332 }
333 inst->GetBasicBlock()->InsertBefore(sf_inst, inst);
334 }
335
IsAccReadPosition(compiler::Inst * inst,size_t pos)336 static bool IsAccReadPosition(compiler::Inst *inst, size_t pos)
337 {
338 // Calls can have accumulator at any position, return false for them
339 return !inst->IsCall() && inst->IsAccRead() && pos == AccReadIndex(inst);
340 }
341
InsertSpillsForDynInputsInst(compiler::Inst * inst)342 void RegEncoder::InsertSpillsForDynInputsInst(compiler::Inst *inst)
343 {
344 ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
345 ASSERT(inst->IsIntrinsic());
346
347 RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter()); // src -> (dst, src_type), non-callrange
348 RegContentVec spill_vec(GetGraph()->GetLocalAllocator()->Adapter()); // spill_vec is used to handle callrange
349
350 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
351 size_t start = 0;
352 bool range = IsIntrinsicRange(inst) || (nargs - start > MAX_NUM_NON_RANGE_ARGS && CanHoldRange(inst));
353
354 compiler::Register temp = range ? range_temps_start_ : 0;
355
356 for (size_t i = start; i < nargs; ++i) {
357 auto src_reg = inst->GetSrcReg(i);
358 auto type = inst->GetInputType(i);
359
360 // do not spillfill for acc-read position. For example, Intrinsic.FSTARR32
361 if (IsAccReadPosition(inst, i)) {
362 continue;
363 }
364
365 if (!range) {
366 if (!RegNeedsRenumbering(src_reg) || src_reg < NUM_COMPACTLY_ENCODED_REGS) {
367 continue;
368 }
369
370 auto res = spill_map.emplace(src_reg, RegContent(temp, type));
371 if (res.second) {
372 inst->SetSrcReg(i, temp++);
373 } else {
374 // Such register is already in map.
375 // It can be ok for cases like: CallStatic v49, v49
376 // Such instructions can be generated by optimizer too.
377 const RegContent ®_cont = res.first->second;
378 inst->SetSrcReg(i, reg_cont.reg);
379 }
380 } else {
381 spill_vec.emplace_back(src_reg, RegContent(temp, type));
382 inst->SetSrcReg(i, temp++);
383 }
384 }
385
386 AddMoveBefore(inst, spill_map);
387 AddMoveBefore(inst, spill_vec);
388 }
389
InsertSpillsForInst(compiler::Inst * inst)390 void RegEncoder::InsertSpillsForInst(compiler::Inst *inst)
391 {
392 ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
393
394 RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter()); // src -> (dst, src_type)
395
396 if (inst->IsOperandsDynamic()) {
397 InsertSpillsForDynInputsInst(inst);
398 return;
399 }
400
401 compiler::Register temp = 0;
402 for (size_t i = 0; i < inst->GetInputsCount(); i++) {
403 auto reg = inst->GetSrcReg(i);
404 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
405 auto res = spill_map.emplace(reg, RegContent(temp, GetRegType(inst->GetInputType(i))));
406 if (res.second) {
407 inst->SetSrcReg(i, temp++);
408 } else {
409 // Such register is already in map.
410 // It can be ok for cases like: and v49, v49
411 // Such instructions can be generated by optimizer too.
412 const RegContent ®_cont = res.first->second;
413 inst->SetSrcReg(i, reg_cont.reg);
414 }
415 }
416 }
417
418 AddMoveBefore(inst, spill_map);
419 }
420
IncTempsIfNeeded(const compiler::Register reg,compiler::Register & num_temps)421 static void IncTempsIfNeeded(const compiler::Register reg, compiler::Register &num_temps)
422 {
423 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
424 num_temps++;
425 }
426 }
427
CalculateNumNeededTempsForInst(compiler::Inst * inst)428 void RegEncoder::CalculateNumNeededTempsForInst(compiler::Inst *inst)
429 {
430 ASSERT(state_ == RegEncoderState::RESERVE_TEMPS);
431
432 compiler::Register num_temps = 0;
433
434 if (inst->IsOperandsDynamic()) {
435 if (IsIntrinsicRange(inst)) {
436 return;
437 }
438 ASSERT(inst->IsIntrinsic());
439
440 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
441 size_t start = 0;
442
443 if (nargs - start > MAX_NUM_NON_RANGE_ARGS) { // is call.range
444 return;
445 }
446
447 for (size_t i = start; i < nargs; i++) {
448 if (IsAccReadPosition(inst, i)) {
449 continue;
450 }
451 auto reg = inst->GetSrcReg(i);
452 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
453 num_temps++;
454 }
455 }
456 } else {
457 for (size_t i = 0; i < inst->GetInputsCount(); i++) {
458 IncTempsIfNeeded(inst->GetSrcReg(i), num_temps);
459 }
460 }
461
462 ASSERT(num_temps <= MAX_NUM_INPUTS);
463
464 num_temps_ = std::max(num_temps, num_temps_);
465 }
466
Check4Width(compiler::Inst * inst)467 void RegEncoder::Check4Width(compiler::Inst *inst)
468 {
469 switch (state_) {
470 case RegEncoderState::RESERVE_TEMPS: {
471 CalculateNumNeededTempsForInst(inst);
472 break;
473 }
474 case RegEncoderState::INSERT_SPILLS: {
475 InsertSpillsForInst(inst);
476 break;
477 }
478 default:
479 UNREACHABLE();
480 }
481 }
482
Check8Width(compiler::Inst * inst)483 void RegEncoder::Check8Width([[maybe_unused]] compiler::Inst *inst)
484 {
485 // TODO(aantipina): implement after it became possible to use register numbers more than 256 (#2697)
486 }
487
VisitIntrinsic(GraphVisitor * visitor,Inst * inst)488 void RegEncoder::VisitIntrinsic(GraphVisitor *visitor, Inst *inst)
489 {
490 auto re = static_cast<RegEncoder *>(visitor);
491 if (IsIntrinsicRange(inst)) {
492 re->Check4Width(inst->CastToIntrinsic());
493 return;
494 }
495
496 re->Check8Width(inst->CastToIntrinsic());
497 }
498
499 #include "generated/check_width.cpp"
500 } // namespace panda::bytecodeopt
501