1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "codegen_fastpath.h"
17 #include "relocations.h"
18
19 namespace panda::compiler {
20
SaveCallerRegistersInFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool is_fp)21 static void SaveCallerRegistersInFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool is_fp)
22 {
23 if (mask.none()) {
24 return;
25 }
26 auto fp_reg = Target(fl.GetArch()).GetFrameReg();
27
28 mask &= GetCallerRegsMask(fl.GetArch(), is_fp);
29 auto start_slot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(is_fp);
30 encoder->SaveRegisters(mask, is_fp, -start_slot, fp_reg, GetCallerRegsMask(fl.GetArch(), is_fp));
31 }
32
RestoreCallerRegistersFromFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool is_fp)33 static void RestoreCallerRegistersFromFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool is_fp)
34 {
35 if (mask.none()) {
36 return;
37 }
38 auto fp_reg = Target(fl.GetArch()).GetFrameReg();
39
40 mask &= GetCallerRegsMask(fl.GetArch(), is_fp);
41 auto start_slot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(is_fp);
42 encoder->LoadRegisters(mask, is_fp, -start_slot, fp_reg, GetCallerRegsMask(fl.GetArch(), is_fp));
43 }
44
GeneratePrologue()45 void CodegenFastPath::GeneratePrologue()
46 {
47 SCOPED_DISASM_STR(this, "FastPath Prologue");
48
49 auto caller_regs = RegMask(GetCallerRegsMask(GetArch(), false));
50 auto args_num = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
51 caller_regs &= GetUsedRegs() & ~GetTarget().GetParamRegsMask(args_num);
52 SaveCallerRegistersInFrame(caller_regs, GetEncoder(), GetFrameLayout(), false);
53
54 saved_registers_ = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
55 GetEncoder()->PushRegisters(saved_registers_, false, GetTarget().SupportLinkReg());
56
57 if (GetUsedVRegs().Any()) {
58 SaveCallerRegistersInFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(), GetFrameLayout(),
59 true);
60 saved_fp_registers_ = GetUsedVRegs() & VRegMask(GetCalleeRegsMask(GetArch(), true));
61 GetEncoder()->PushRegisters(saved_fp_registers_, true, GetTarget().SupportLinkReg());
62 }
63
64 if (GetFrameInfo()->GetSpillsCount() != 0) {
65 GetEncoder()->EncodeSub(
66 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
67 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
68 }
69 }
70
GetCallerRegistersToRestore() const71 RegMask CodegenFastPath::GetCallerRegistersToRestore() const
72 {
73 RegMask caller_regs = GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
74
75 auto args_num = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
76 caller_regs &= ~GetTarget().GetParamRegsMask(args_num);
77
78 if (auto ret_type {GetRuntime()->GetMethodReturnType(GetGraph()->GetMethod())}; ret_type != DataType::VOID) {
79 ASSERT(!DataType::IsFloatType(ret_type));
80 caller_regs.reset(GetTarget().GetReturnRegId());
81 }
82 return caller_regs;
83 }
84
GenerateEpilogue()85 void CodegenFastPath::GenerateEpilogue()
86 {
87 SCOPED_DISASM_STR(this, "FastPath Epilogue");
88
89 if (GetFrameInfo()->GetSpillsCount() != 0) {
90 GetEncoder()->EncodeAdd(
91 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
92 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
93 }
94
95 RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), GetEncoder(), GetFrameLayout(), false);
96
97 if (GetUsedVRegs().Any()) {
98 RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(),
99 GetFrameLayout(), true);
100 GetEncoder()->PopRegisters(saved_fp_registers_, true, GetTarget().SupportLinkReg());
101 }
102
103 GetEncoder()->PopRegisters(saved_registers_, false, GetTarget().SupportLinkReg());
104
105 GetEncoder()->EncodeReturn();
106 }
107
CreateFrameInfo()108 void CodegenFastPath::CreateFrameInfo()
109 {
110 auto frame = GetGraph()->GetLocalAllocator()->New<FrameInfo>(
111 FrameInfo::PositionedCallers::Encode(true) | FrameInfo::PositionedCallees::Encode(false) |
112 FrameInfo::CallersRelativeFp::Encode(true) | FrameInfo::CalleesRelativeFp::Encode(false));
113 frame->SetSpillsCount(GetGraph()->GetStackSlotsCount());
114 CFrameLayout fl(GetGraph()->GetArch(), GetGraph()->GetStackSlotsCount());
115
116 frame->SetCallersOffset(
117 fl.GetOffset<CFrameLayout::SP, CFrameLayout::SLOTS>(fl.GetStackStartSlot() + fl.GetCallerLastSlot(false)));
118 frame->SetFpCallersOffset(
119 fl.GetOffset<CFrameLayout::SP, CFrameLayout::SLOTS>(fl.GetStackStartSlot() + fl.GetCallerLastSlot(true)));
120 frame->SetCalleesOffset(
121 -fl.GetOffset<CFrameLayout::FP, CFrameLayout::SLOTS>(fl.GetStackStartSlot() + fl.GetCalleeLastSlot(false)));
122 frame->SetFpCalleesOffset(
123 -fl.GetOffset<CFrameLayout::FP, CFrameLayout::SLOTS>(fl.GetStackStartSlot() + fl.GetCalleeLastSlot(true)));
124
125 SetFrameInfo(frame);
126 }
127
IntrinsicSlowPathEntry(IntrinsicInst * inst)128 void CodegenFastPath::IntrinsicSlowPathEntry(IntrinsicInst *inst)
129 {
130 auto encoder = GetEncoder();
131
132 if (GetFrameInfo()->GetSpillsCount() != 0) {
133 encoder->EncodeAdd(
134 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
135 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
136 }
137
138 /* Once we reach the slow path, we can release all temp registers, since slow path terminates execution */
139 auto temps_mask = GetTarget().GetTempRegsMask();
140 for (size_t reg = temps_mask.GetMinRegister(); reg <= temps_mask.GetMaxRegister(); reg++) {
141 if (temps_mask.Test(reg)) {
142 GetEncoder()->ReleaseScratchRegister(Reg(reg, INT32_TYPE));
143 }
144 }
145
146 RegMask caller_regs = ~GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
147 auto args_num = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
148 caller_regs &= ~GetTarget().GetParamRegsMask(args_num);
149
150 if (GetUsedVRegs().Any()) {
151 VRegMask fp_caller_regs = ~GetUsedVRegs() & RegMask(GetCallerRegsMask(GetArch(), true));
152 SaveCallerRegistersInFrame(fp_caller_regs, encoder, GetFrameLayout(), true);
153 encoder->PopRegisters(saved_fp_registers_, true, GetTarget().SupportLinkReg());
154 }
155
156 SaveCallerRegistersInFrame(caller_regs, encoder, GetFrameLayout(), false);
157 encoder->PopRegisters(saved_registers_, false, GetTarget().SupportLinkReg());
158
159 /* Offset of the runtime entrypoint is stored in the first intrinsic's immediate */
160 CHECK_EQ(inst->GetImms().size(), 1U);
161 if (inst->GetRelocate()) {
162 RelocationInfo relocation;
163 encoder->EncodeJump(&relocation);
164 GetGraph()->GetRelocationHandler()->AddRelocation(relocation);
165 } else {
166 ScopedTmpReg tmp(encoder);
167 auto offset = inst->GetImms()[0];
168 encoder->EncodeLdr(tmp, false, MemRef(ThreadReg(), offset));
169 encoder->EncodeJump(tmp);
170 }
171 }
172
IntrinsicSaveRegisters(IntrinsicInst * inst)173 void CodegenFastPath::IntrinsicSaveRegisters([[maybe_unused]] IntrinsicInst *inst)
174 {
175 RegMask callee_regs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
176 // We need to save all caller regs, since caller doesn't care about registers at all (except parameters)
177 auto caller_regs = RegMask(GetCallerRegsMask(GetArch(), false));
178 auto caller_vregs = RegMask(GetCallerRegsMask(GetArch(), true));
179 // Alignment to 16 bytes is required in two cases:
180 // 1. If target supports link register, then it didn't push return address on the stack, so SP is already aligned,
181 // thus, we need to keep it aligned after pushing registers.
182 // 2. If amount of the saved registers in prologue is even(i.e. aligned), then even if target doesn't support LR,
183 // we need to align SP, because targets, that don't support LR, push LR on stack, thus SP is not aligned when we
184 // came to this prologue.
185 bool align = IsPrologueAligned() || GetTarget().SupportLinkReg();
186 for (auto &input : inst->GetInputs()) {
187 callee_regs.reset(input.GetInst()->GetDstReg());
188 caller_regs.reset(input.GetInst()->GetDstReg());
189 }
190 if (GetTarget().SupportLinkReg()) {
191 caller_regs.set(GetTarget().GetLinkReg().GetId());
192 }
193 GetEncoder()->PushRegisters(caller_regs | callee_regs, false, align);
194 GetEncoder()->PushRegisters(caller_vregs, true, align);
195 }
196
IntrinsicRestoreRegisters(IntrinsicInst * inst)197 void CodegenFastPath::IntrinsicRestoreRegisters([[maybe_unused]] IntrinsicInst *inst)
198 {
199 RegMask callee_regs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
200 // We need to restore all caller regs, since caller doesn't care about registers at all (except parameters)
201 auto caller_regs = RegMask(GetCallerRegsMask(GetArch(), false));
202 auto caller_vregs = RegMask(GetCallerRegsMask(GetArch(), true));
203 bool align = IsPrologueAligned() || GetTarget().SupportLinkReg();
204 for (auto &input : inst->GetInputs()) {
205 callee_regs.reset(input.GetInst()->GetDstReg());
206 caller_regs.reset(input.GetInst()->GetDstReg());
207 }
208 if (GetTarget().SupportLinkReg()) {
209 caller_regs.set(GetTarget().GetLinkReg().GetId());
210 }
211 GetEncoder()->PopRegisters(caller_vregs, true, align);
212 GetEncoder()->PopRegisters(caller_regs | callee_regs, false, align);
213 }
214
215 } // namespace panda::compiler
216