1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_encoder.h"
17 #include "common.h"
18
19 namespace panda::bytecodeopt {
20
IsIntrinsicRange(Inst * inst)21 static bool IsIntrinsicRange(Inst *inst)
22 {
23 if (inst->GetOpcode() != compiler::Opcode::Intrinsic) {
24 return false;
25 }
26 #ifdef ENABLE_BYTECODE_OPT
27 switch (inst->CastToIntrinsic()->GetIntrinsicId()) {
28 case compiler::RuntimeInterface::IntrinsicId::CALLRANGE_IMM8_IMM8_V8:
29 case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLRANGE_PREF_IMM16_V8:
30 case compiler::RuntimeInterface::IntrinsicId::CALLTHISRANGE_IMM8_IMM8_V8:
31 case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLTHISRANGE_PREF_IMM16_V8:
32 case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM8_IMM8_V8:
33 case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM16_IMM8_V8:
34 case compiler::RuntimeInterface::IntrinsicId::WIDE_NEWOBJRANGE_PREF_IMM16_V8:
35 case compiler::RuntimeInterface::IntrinsicId::SUPERCALLTHISRANGE_IMM8_IMM8_V8:
36 case compiler::RuntimeInterface::IntrinsicId::SUPERCALLARROWRANGE_IMM8_IMM8_V8:
37 case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLTHISRANGE_PREF_IMM16_V8:
38 case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLARROWRANGE_PREF_IMM16_V8:
39 case compiler::RuntimeInterface::IntrinsicId::CREATEOBJECTWITHEXCLUDEDKEYS_IMM8_V8_V8:
40 case compiler::RuntimeInterface::IntrinsicId::WIDE_CREATEOBJECTWITHEXCLUDEDKEYS_PREF_IMM16_V8_V8:
41 return true;
42 default:
43 return false;
44 }
45 #endif
46 return false;
47 }
48
CanHoldRange(Inst * inst)49 static bool CanHoldRange(Inst *inst)
50 {
51 switch (inst->GetOpcode()) {
52 case compiler::Opcode::Intrinsic:
53 return IsIntrinsicRange(inst);
54 default:
55 return false;
56 }
57 }
58
CalculateNumNeededRangeTemps(const compiler::Graph * graph)59 static compiler::Register CalculateNumNeededRangeTemps(const compiler::Graph *graph)
60 {
61 compiler::Register ret = 0;
62
63 for (auto bb : graph->GetBlocksRPO()) {
64 for (const auto &inst : bb->AllInsts()) {
65 if (!CanHoldRange(inst)) {
66 continue;
67 }
68 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
69 if (ret < nargs) {
70 if (nargs > MAX_NUM_NON_RANGE_ARGS || IsIntrinsicRange(inst)) {
71 ret = nargs;
72 }
73 }
74 }
75 }
76
77 return ret;
78 }
79
RunImpl()80 bool RegEncoder::RunImpl()
81 {
82 ASSERT(state_ == RegEncoderState::IDLE);
83
84 num_max_range_input_ = CalculateNumNeededRangeTemps(GetGraph());
85
86 state_ = RegEncoderState::RENUMBER_ARGS;
87 if (!RenumberArgRegs()) {
88 return false;
89 }
90
91 state_ = RegEncoderState::RESERVE_TEMPS;
92 ASSERT(num_temps_ == 0);
93
94 const auto num_regs = GetNumRegs();
95
96 auto max_num_temps = num_temps_;
97 CalculateNumNeededTemps();
98
99 while (max_num_temps != num_temps_) {
100 ASSERT(num_temps_ > max_num_temps);
101
102 if (num_regs > compiler::VIRTUAL_FRAME_SIZE - num_temps_) { // to avoid overflow
103 return false; // no more free registers left in the frame
104 }
105
106 auto delta = static_cast<compiler::Register>(num_temps_ - max_num_temps);
107 range_temps_start_ += delta;
108
109 RenumberRegs(MIN_REGISTER_NUMBER, delta);
110
111 max_num_temps = num_temps_;
112 CalculateNumNeededTemps();
113 }
114
115 if (num_temps_ > 0 || num_max_range_input_ > 0) {
116 state_ = RegEncoderState::INSERT_SPILLS;
117 InsertSpills();
118
119 auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
120 for (compiler::Register r = 0; r < num_regs; r++) {
121 usage_mask->at(num_regs + num_temps_ - r - 1) = usage_mask->at(num_regs - r - 1);
122 }
123 std::fill(usage_mask->begin(), usage_mask->begin() + num_temps_, true);
124 }
125
126 SaveNumLocalsToGraph(GetNumLocalsFromGraph() + num_temps_);
127 state_ = RegEncoderState::IDLE;
128
129 return true;
130 }
131
GetRegType(panda::compiler::DataType::Type type)132 static panda::compiler::DataType::Type GetRegType(panda::compiler::DataType::Type type)
133 {
134 if (type == panda::compiler::DataType::REFERENCE) {
135 return type;
136 }
137 if (panda::compiler::DataType::Is32Bits(type, Arch::NONE)) {
138 return panda::compiler::DataType::UINT32;
139 }
140 return panda::compiler::DataType::UINT64;
141 }
142
RegNeedsRenumbering(panda::compiler::Register r)143 static bool RegNeedsRenumbering(panda::compiler::Register r)
144 {
145 return r != panda::compiler::ACC_REG_ID && r != panda::compiler::INVALID_REG;
146 }
147
RenumberReg(const panda::compiler::Register r,const panda::compiler::Register delta)148 static panda::compiler::Register RenumberReg(const panda::compiler::Register r, const panda::compiler::Register delta)
149 {
150 if (r == panda::compiler::ACC_REG_ID) {
151 return r;
152 }
153 return r + delta;
154 }
155
RenumberSpillFillRegs(panda::compiler::SpillFillInst * inst,const panda::compiler::Register min_reg,const panda::compiler::Register delta)156 static void RenumberSpillFillRegs(panda::compiler::SpillFillInst *inst, const panda::compiler::Register min_reg,
157 const panda::compiler::Register delta)
158 {
159 for (auto &sf : inst->GetSpillFills()) {
160 if (sf.SrcType() == compiler::LocationType::REGISTER && sf.SrcValue() >= min_reg) {
161 sf.SetSrc(compiler::Location::MakeRegister(RenumberReg(sf.SrcValue(), delta)));
162 }
163 if (sf.DstType() == compiler::LocationType::REGISTER && sf.DstValue() >= min_reg) {
164 sf.SetDst(compiler::Location::MakeRegister(RenumberReg(sf.DstValue(), delta)));
165 }
166 }
167 }
168
RenumberRegs(const compiler::Register min_reg,const compiler::Register delta)169 void RegEncoder::RenumberRegs(const compiler::Register min_reg, const compiler::Register delta)
170 {
171 // Renumbering always advances register number `delta` positions forward,
172 // wrapping around on overflows with well-defined behavour.
173 // Hence the requirement to keep delta unsigned.
174 static_assert(std::is_unsigned<compiler::Register>::value, "compiler::Register must be unsigned");
175 ASSERT(delta > 0);
176
177 for (auto *bb : GetGraph()->GetBlocksRPO()) {
178 for (auto inst : bb->AllInsts()) {
179 // Renumber output of any instruction, if applicable:
180 if (RegNeedsRenumbering(inst->GetDstReg()) && inst->GetDstReg() >= min_reg) {
181 inst->SetDstReg(RenumberReg(inst->GetDstReg(), delta));
182 }
183
184 if (inst->IsPhi() || inst->IsCatchPhi()) {
185 continue;
186 }
187
188 // Renumber inputs and outputs of SpillFill instructions:
189 if (inst->IsSpillFill()) {
190 RenumberSpillFillRegs(inst->CastToSpillFill(), min_reg, delta);
191 continue;
192 }
193
194 // Fix inputs of common instructions:
195 for (size_t i = 0; i < inst->GetInputsCount(); i++) {
196 if (RegNeedsRenumbering(inst->GetSrcReg(i)) && inst->GetSrcReg(i) >= min_reg) {
197 inst->SetSrcReg(i, RenumberReg(inst->GetSrcReg(i), delta));
198 }
199 }
200 }
201 }
202 }
203
RenumberArgRegs()204 bool RegEncoder::RenumberArgRegs()
205 {
206 const auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
207 ASSERT(usage_mask->size() == compiler::VIRTUAL_FRAME_SIZE);
208
209 auto frame_size = static_cast<compiler::Register>(usage_mask->size());
210 const auto num_args = GetNumArgsFromGraph();
211 ASSERT(frame_size >= num_args);
212
213 auto num_non_args = static_cast<compiler::Register>(frame_size - num_args);
214 if (num_max_range_input_ > num_non_args) {
215 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
216 return false;
217 }
218
219 compiler::Register num_locals = 0;
220 if (num_non_args != 0) {
221 while (num_locals != num_non_args && usage_mask->at(num_locals)) {
222 ++num_locals;
223 }
224 }
225
226 compiler::Register num_temps = 0;
227 if (num_locals != num_non_args) {
228 compiler::Register r = num_non_args - 1;
229 while (r < num_non_args && usage_mask->at(r)) {
230 ++num_temps;
231 --r;
232 }
233 }
234
235 if (num_locals + num_temps > num_non_args - num_max_range_input_) {
236 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
237 return false;
238 }
239
240 range_temps_start_ = num_locals;
241
242 bool do_renumber = true;
243
244 if (num_non_args == 0 && num_max_range_input_ == 0) { // all registers are arguments: no need to renumber
245 do_renumber = false;
246 }
247
248 // All free regs will be just enough to encode call.rang: no need to renumber
249 if (num_locals + num_temps + num_max_range_input_ == num_non_args) {
250 do_renumber = false;
251 }
252
253 if (num_temps + num_args == 0) { // no temps and no args: nothing to renumber
254 do_renumber = false;
255 }
256
257 if (do_renumber) {
258 const auto min_reg = static_cast<compiler::Register>(num_non_args - num_temps);
259 ASSERT(min_reg > MIN_REGISTER_NUMBER);
260
261 // Assert that if temps are present, they are marked allocated in the mask:
262 for (compiler::Register r = min_reg; r < min_reg + num_temps; r++) {
263 ASSERT(usage_mask->at(r));
264 }
265
266 // Assert that there are no used regs between locals and temps + arguments:
267 for (compiler::Register r = num_locals; r < min_reg; r++) {
268 ASSERT(!usage_mask->at(r));
269 }
270
271 auto delta = static_cast<compiler::Register>(num_locals + num_temps + num_max_range_input_ - num_non_args);
272 RenumberRegs(min_reg, delta);
273
274 for (compiler::Register r = min_reg; r < frame_size; r++) {
275 usage_mask->at(RenumberReg(r, delta)) = usage_mask->at(r);
276 usage_mask->at(r) = false;
277 }
278 }
279
280 SaveNumLocalsToGraph(num_locals + num_temps + num_max_range_input_);
281 return true;
282 }
283
InsertSpills()284 void RegEncoder::InsertSpills()
285 {
286 ASSERT(num_max_range_input_ > 0 || (num_temps_ > 0 && num_temps_ <= MAX_NUM_INPUTS));
287
288 for (auto *bb : GetGraph()->GetBlocksRPO()) {
289 for (auto inst : bb->AllInstsSafe()) {
290 if (inst->GetInputsCount() == 0) {
291 continue;
292 }
293
294 VisitInstruction(inst);
295 // TODO(aantipina): Enable assert here for GetStatus() as soon code generation is fully supported
296 }
297 }
298 }
299
CalculateNumNeededTemps()300 void RegEncoder::CalculateNumNeededTemps()
301 {
302 num_temps_ = 0;
303
304 for (auto bb : GetGraph()->GetBlocksRPO()) {
305 for (auto inst : bb->AllInstsSafe()) {
306 if (inst->GetInputsCount() == 0) {
307 continue;
308 }
309
310 VisitInstruction(inst);
311 // TODO(aantipina): Enable here for GetStatus() as soon code generation is fully supported
312 }
313 }
314
315 LOG(DEBUG, BYTECODE_OPTIMIZER) << GetGraph()->GetRuntime()->GetMethodFullName(GetGraph()->GetMethod())
316 << ": num_temps_ = " << std::to_string(num_temps_);
317 }
318
319 template <typename T>
AddMoveBefore(Inst * inst,const T & sp_container)320 static void AddMoveBefore(Inst *inst, const T &sp_container)
321 {
322 if (sp_container.empty()) {
323 return;
324 }
325 auto sf_inst = inst->GetBasicBlock()->GetGraph()->CreateInstSpillFill();
326 for (auto const &[src, dst] : sp_container) {
327 ASSERT(src != compiler::ACC_REG_ID);
328 sf_inst->AddMove(src, dst.reg, GetRegType(dst.type));
329 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: Move v" << static_cast<int>(dst.reg) << " <- v"
330 << static_cast<int>(src) << " was added";
331 }
332 inst->GetBasicBlock()->InsertBefore(sf_inst, inst);
333 }
334
IsAccReadPosition(compiler::Inst * inst,size_t pos)335 static bool IsAccReadPosition(compiler::Inst *inst, size_t pos)
336 {
337 // Calls can have accumulator at any position, return false for them
338 return !inst->IsCall() && inst->IsAccRead() && pos == AccReadIndex(inst);
339 }
340
InsertSpillsForDynInputsInst(compiler::Inst * inst)341 void RegEncoder::InsertSpillsForDynInputsInst(compiler::Inst *inst)
342 {
343 ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
344 ASSERT(inst->IsIntrinsic());
345
346 RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter()); // src -> (dst, src_type), non-callrange
347 RegContentVec spill_vec(GetGraph()->GetLocalAllocator()->Adapter()); // spill_vec is used to handle callrange
348
349 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
350 size_t start = 0;
351 bool range = IsIntrinsicRange(inst) || (nargs - start > MAX_NUM_NON_RANGE_ARGS && CanHoldRange(inst));
352
353 compiler::Register temp = range ? range_temps_start_ : 0;
354
355 for (size_t i = start; i < nargs; ++i) {
356 auto src_reg = inst->GetSrcReg(i);
357 auto type = inst->GetInputType(i);
358
359 // do not spillfill for acc-read position. For example, Intrinsic.FSTARR32
360 if (IsAccReadPosition(inst, i)) {
361 continue;
362 }
363
364 if (!range) {
365 if (!RegNeedsRenumbering(src_reg) || src_reg < NUM_COMPACTLY_ENCODED_REGS) {
366 continue;
367 }
368
369 auto res = spill_map.emplace(src_reg, RegContent(temp, type));
370 if (res.second) {
371 inst->SetSrcReg(i, temp++);
372 } else {
373 // Such register is already in map.
374 // It can be ok for cases like: CallStatic v49, v49
375 // Such instructions can be generated by optimizer too.
376 const RegContent ®_cont = res.first->second;
377 inst->SetSrcReg(i, reg_cont.reg);
378 }
379 } else {
380 spill_vec.emplace_back(src_reg, RegContent(temp, type));
381 inst->SetSrcReg(i, temp++);
382 }
383 }
384
385 AddMoveBefore(inst, spill_map);
386 AddMoveBefore(inst, spill_vec);
387 }
388
InsertSpillsForInst(compiler::Inst * inst)389 void RegEncoder::InsertSpillsForInst(compiler::Inst *inst)
390 {
391 ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
392
393 RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter()); // src -> (dst, src_type)
394
395 if (inst->IsOperandsDynamic()) {
396 InsertSpillsForDynInputsInst(inst);
397 return;
398 }
399
400 compiler::Register temp = 0;
401 for (size_t i = 0; i < inst->GetInputsCount(); i++) {
402 auto reg = inst->GetSrcReg(i);
403 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
404 auto res = spill_map.emplace(reg, RegContent(temp, GetRegType(inst->GetInputType(i))));
405 if (res.second) {
406 inst->SetSrcReg(i, temp++);
407 } else {
408 // Such register is already in map.
409 // It can be ok for cases like: and v49, v49
410 // Such instructions can be generated by optimizer too.
411 const RegContent ®_cont = res.first->second;
412 inst->SetSrcReg(i, reg_cont.reg);
413 }
414 }
415 }
416
417 AddMoveBefore(inst, spill_map);
418 }
419
IncTempsIfNeeded(const compiler::Register reg,compiler::Register & num_temps)420 static void IncTempsIfNeeded(const compiler::Register reg, compiler::Register &num_temps)
421 {
422 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
423 num_temps++;
424 }
425 }
426
CalculateNumNeededTempsForInst(compiler::Inst * inst)427 void RegEncoder::CalculateNumNeededTempsForInst(compiler::Inst *inst)
428 {
429 ASSERT(state_ == RegEncoderState::RESERVE_TEMPS);
430
431 compiler::Register num_temps = 0;
432
433 if (inst->IsOperandsDynamic()) {
434 if (IsIntrinsicRange(inst)) {
435 return;
436 }
437 ASSERT(inst->IsIntrinsic());
438
439 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
440 size_t start = 0;
441
442 if (nargs - start > MAX_NUM_NON_RANGE_ARGS) { // is call.range
443 return;
444 }
445
446 for (size_t i = start; i < nargs; i++) {
447 if (IsAccReadPosition(inst, i)) {
448 continue;
449 }
450 auto reg = inst->GetSrcReg(i);
451 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
452 num_temps++;
453 }
454 }
455 } else {
456 for (size_t i = 0; i < inst->GetInputsCount(); i++) {
457 IncTempsIfNeeded(inst->GetSrcReg(i), num_temps);
458 }
459 }
460
461 ASSERT(num_temps <= MAX_NUM_INPUTS);
462
463 num_temps_ = std::max(num_temps, num_temps_);
464 }
465
Check4Width(compiler::Inst * inst)466 void RegEncoder::Check4Width(compiler::Inst *inst)
467 {
468 switch (state_) {
469 case RegEncoderState::RESERVE_TEMPS: {
470 CalculateNumNeededTempsForInst(inst);
471 break;
472 }
473 case RegEncoderState::INSERT_SPILLS: {
474 InsertSpillsForInst(inst);
475 break;
476 }
477 default:
478 UNREACHABLE();
479 }
480 }
481
Check8Width(compiler::Inst * inst)482 void RegEncoder::Check8Width([[maybe_unused]] compiler::Inst *inst)
483 {
484 // TODO(aantipina): implement after it became possible to use register numbers more than 256 (#2697)
485 }
486
VisitIntrinsic(GraphVisitor * visitor,Inst * inst)487 void RegEncoder::VisitIntrinsic(GraphVisitor *visitor, Inst *inst)
488 {
489 auto re = static_cast<RegEncoder *>(visitor);
490 if (IsIntrinsicRange(inst)) {
491 re->Check4Width(inst->CastToIntrinsic());
492 return;
493 }
494
495 re->Check8Width(inst->CastToIntrinsic());
496 }
497
498 #include "generated/check_width.cpp"
499 } // namespace panda::bytecodeopt
500