1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Low-level calling convention
17 */
18 #include "target/aarch64/target.h"
19
20 namespace ark::compiler::aarch64 {
21
22 constexpr int32_t IMM_2 = 2;
23
Aarch64CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)24 Aarch64CallingConvention::Aarch64CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
25 CallConvMode mode)
26 : CallingConvention(allocator, enc, descr, mode)
27 {
28 }
29
GetTarget()30 constexpr auto Aarch64CallingConvention::GetTarget()
31 {
32 return ark::compiler::Target(Arch::AARCH64);
33 }
34
IsValid() const35 bool Aarch64CallingConvention::IsValid() const
36 {
37 return true;
38 }
39
GetMasm()40 vixl::aarch64::MacroAssembler *Aarch64CallingConvention::GetMasm()
41 {
42 return (static_cast<Aarch64Encoder *>(GetEncoder()))->GetMasm();
43 }
44
GetParameterInfo(uint8_t regsOffset)45 ParameterInfo *Aarch64CallingConvention::GetParameterInfo(uint8_t regsOffset)
46 {
47 auto paramInfo = GetAllocator()->New<aarch64::Aarch64ParameterInfo>();
48 for (int i = 0; i < regsOffset; ++i) {
49 paramInfo->GetNativeParam(INT64_TYPE);
50 }
51 return paramInfo;
52 }
53
GetCodeEntry()54 void *Aarch64CallingConvention::GetCodeEntry()
55 {
56 return reinterpret_cast<void *>(GetMasm()->GetInstructionAt(0));
57 }
58
GetCodeSize()59 uint32_t Aarch64CallingConvention::GetCodeSize()
60 {
61 return GetMasm()->GetSizeOfCodeGenerated();
62 }
63
PrepareToPushPopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)64 void Aarch64CallingConvention::PrepareToPushPopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
65 bool isCallee)
66 {
67 if ((regs.GetCount() % IMM_2) == 1) {
68 ASSERT((regs.GetList() & (UINT64_C(1) << vixl::aarch64::xzr.GetCode())) == 0);
69 regs.Combine(vixl::aarch64::xzr);
70 }
71 if ((vregs.GetCount() % IMM_2) == 1) {
72 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
73 uint8_t allignmentVreg = regdescr->GetAlignmentVreg(isCallee);
74 ASSERT((vregs.GetList() & (UINT64_C(1) << allignmentVreg)) == 0);
75 vregs.Combine(allignmentVreg);
76 }
77 }
78
PushRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)79 size_t Aarch64CallingConvention::PushRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
80 bool isCallee)
81 {
82 PrepareToPushPopRegs(regs, vregs, isCallee);
83 GetMasm()->PushCPURegList(vregs);
84 GetMasm()->PushCPURegList(regs);
85 return vregs.GetCount() + regs.GetCount();
86 }
87
PopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)88 size_t Aarch64CallingConvention::PopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs, bool isCallee)
89 {
90 PrepareToPushPopRegs(regs, vregs, isCallee);
91 GetMasm()->PopCPURegList(regs);
92 GetMasm()->PopCPURegList(vregs);
93 return vregs.GetCount() + regs.GetCount();
94 }
95
GetNativeParam(const TypeInfo & type)96 std::variant<Reg, uint8_t> Aarch64ParameterInfo::GetNativeParam(const TypeInfo &type)
97 {
98 if (type.IsFloat()) {
99 if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
100 return currentStackOffset_++;
101 }
102 return Reg(currentVectorNumber_++, type);
103 }
104 if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
105 return currentStackOffset_++;
106 }
107 auto ret = Reg(currentScalarNumber_++, type);
108 if (type.GetSize() > DOUBLE_WORD_SIZE) {
109 currentScalarNumber_++;
110 }
111 return ret;
112 }
113
GetNextLocation(DataType::Type type)114 Location Aarch64ParameterInfo::GetNextLocation(DataType::Type type)
115 {
116 if (DataType::IsFloatType(type)) {
117 if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
118 return Location::MakeStackArgument(currentStackOffset_++);
119 }
120 return Location::MakeFpRegister(currentVectorNumber_++);
121 }
122 if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
123 return Location::MakeStackArgument(currentStackOffset_++);
124 }
125 Target target(Arch::AARCH64);
126 return Location::MakeRegister(target.GetParamRegId(currentScalarNumber_++));
127 }
128
InitFlagsReg(bool hasFloatRegs)129 Reg Aarch64CallingConvention::InitFlagsReg(bool hasFloatRegs)
130 {
131 auto flags {static_cast<uint64_t>(hasFloatRegs) << CFrameLayout::HasFloatRegsFlag::START_BIT};
132 auto flagsReg {GetTarget().GetZeroReg()};
133 if (flags != 0U) {
134 flagsReg = GetTarget().GetLinkReg();
135 GetEncoder()->EncodeMov(flagsReg, Imm(flags));
136 }
137 return flagsReg;
138 }
139
140 using vixl::aarch64::CPURegList, vixl::aarch64::CPURegister, vixl::aarch64::MemOperand;
141
SaveCalleeSavedRegs(const FrameInfo & frameInfo,const CFrameLayout & fl,size_t spToRegsSlots,bool isNative)142 void Aarch64CallingConvention::SaveCalleeSavedRegs(const FrameInfo &frameInfo, const CFrameLayout &fl,
143 size_t spToRegsSlots, bool isNative)
144 {
145 RegMask calleeRegsMask;
146 VRegMask calleeVregsMask;
147 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
148 bool irtocOptimized = isNative ? GetMode().IsOptIrtoc() : false;
149
150 regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs(),
151 irtocOptimized);
152 SET_CFI_CALLEE_REGS(calleeRegsMask);
153 SET_CFI_CALLEE_VREGS(calleeVregsMask);
154 auto lastCalleeReg = spToRegsSlots + calleeRegsMask.Count();
155 auto lastCalleeVreg = spToRegsSlots + fl.GetCalleeRegistersCount(false) + calleeVregsMask.Count();
156 auto calleeRegs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, calleeRegsMask.GetValue());
157 auto calleeVregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, calleeVregsMask.GetValue());
158 auto sp = GetTarget().GetStackReg();
159 GetMasm()->StoreCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeReg * fl.GetSlotSize())));
160 GetMasm()->StoreCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeVreg * fl.GetSlotSize())));
161 SET_CFI_OFFSET(pushCallees, GetEncoder()->GetCursorOffset());
162 }
163
SaveFpLr(const FrameInfo & frameInfo,Encoder * encoder,Reg fp,Reg lr)164 size_t Aarch64CallingConvention::SaveFpLr(const FrameInfo &frameInfo, [[maybe_unused]] Encoder *encoder,
165 [[maybe_unused]] Reg fp, [[maybe_unused]] Reg lr)
166 {
167 if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
168 static_assert(CFrameLayout::GetTopToRegsSlotsCount() > CFrameLayout::GetFpLrSlotsCount());
169 GetMasm()->PushCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
170 SET_CFI_OFFSET(pushFplr, encoder->GetCursorOffset());
171 return CFrameLayout::GetFpLrSlotsCount();
172 }
173 return 0;
174 }
175
EncodeDynCallMode(const FrameInfo & frameInfo,Encoder * encoder)176 void Aarch64CallingConvention::EncodeDynCallMode([[maybe_unused]] const FrameInfo &frameInfo, Encoder *encoder)
177 {
178 static_assert(CallConvDynInfo::REG_NUM_ARGS == 1);
179 static_assert(CallConvDynInfo::REG_COUNT == CallConvDynInfo::REG_NUM_ARGS + 1);
180
181 ASSERT(frameInfo.GetSaveFrameAndLinkRegs());
182
183 constexpr auto NUM_ACTUAL_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_NUM_ARGS);
184 constexpr auto NUM_EXPECTED_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_COUNT);
185 auto numExpected = GetDynInfo().GetNumExpectedArgs();
186
187 auto expandDone = encoder->CreateLabel();
188 encoder->EncodeJump(expandDone, NUM_ACTUAL_REG, Imm(numExpected), Condition::GE);
189 encoder->EncodeMov(NUM_EXPECTED_REG, Imm(numExpected));
190
191 MemRef expandEntrypoint(Reg(GetThreadReg(Arch::AARCH64), GetTarget().GetPtrRegType()),
192 GetDynInfo().GetExpandEntrypointTlsOffset());
193 GetEncoder()->MakeCall(expandEntrypoint);
194 encoder->BindLabel(expandDone);
195 }
196
GeneratePrologue(const FrameInfo & frameInfo)197 void Aarch64CallingConvention::GeneratePrologue(const FrameInfo &frameInfo)
198 {
199 static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
200 auto encoder = GetEncoder();
201 const CFrameLayout &fl = encoder->GetFrameLayout();
202 auto sp = GetTarget().GetStackReg();
203 auto fp = GetTarget().GetFrameReg();
204 auto lr = GetTarget().GetLinkReg();
205 auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
206
207 // Save FP and LR
208 spToRegsSlots -= SaveFpLr(frameInfo, encoder, fp, lr);
209
210 // Setup FP
211 if (frameInfo.GetSetupFrame() || ProvideCFI()) {
212 // If SetupFrame flag is set, then SaveFrameAndLinkRegs must be set also.
213 // These are separate flags as it looks like Irtoc does not need frame setup
214 // but requires to save frame and link regs.
215 ASSERT(!frameInfo.GetSetupFrame() || frameInfo.GetSaveFrameAndLinkRegs());
216 encoder->EncodeMov(fp, sp);
217 SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
218 }
219
220 if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
221 EncodeDynCallMode(frameInfo, encoder);
222 }
223
224 // Reset flags and setup method
225 if (frameInfo.GetSetupFrame()) {
226 static_assert(CFrameMethod::End() == CFrameFlags::Start());
227 constexpr int64_t SLOTS_COUNT = CFrameMethod::GetSize() + CFrameFlags::GetSize();
228
229 GetMasm()->Stp(VixlReg(InitFlagsReg(frameInfo.GetHasFloatRegs())), // Reset OSR flag and set HasFloatRegsFlag
230 VixlReg(GetTarget().GetParamReg(0)), // Set Method pointer
231 vixl::aarch64::MemOperand(VixlReg(sp), VixlImm(-SLOTS_COUNT * fl.GetSlotSize()),
232 vixl::aarch64::AddrMode::PreIndex));
233 spToRegsSlots -= SLOTS_COUNT;
234 }
235
236 SaveCalleeSavedRegs(frameInfo, fl, spToRegsSlots, false);
237
238 // Adjust SP
239 if (frameInfo.GetAdjustSpReg()) {
240 auto spToFrameEndOffset = (spToRegsSlots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
241 encoder->EncodeSub(sp, sp, Imm(spToFrameEndOffset));
242 }
243 }
244
245 template <bool IS_NATIVE>
GenerateEpilogueImpl(const FrameInfo & frameInfo,const std::function<void ()> & postJob)246 void Aarch64CallingConvention::GenerateEpilogueImpl(const FrameInfo &frameInfo, const std::function<void()> &postJob)
247 {
248 const CFrameLayout &fl = GetEncoder()->GetFrameLayout();
249 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
250 auto sp = GetTarget().GetStackReg();
251 auto fp = GetTarget().GetFrameReg();
252 auto lr = GetTarget().GetLinkReg();
253
254 if (postJob) {
255 postJob();
256 }
257
258 // Restore callee-registers
259 RegMask calleeRegsMask;
260 VRegMask calleeVregsMask;
261 if constexpr (IS_NATIVE) {
262 regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs(),
263 GetMode().IsOptIrtoc());
264 } else {
265 regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs());
266 }
267
268 auto calleeRegs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, calleeRegsMask.GetValue());
269 auto calleeVregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, calleeVregsMask.GetValue());
270
271 if (frameInfo.GetAdjustSpReg()) {
272 // SP points to the frame's bottom
273 auto lastCalleeReg = fl.GetRegsSlotsCount() - calleeRegsMask.Count();
274 auto lastCalleeVreg = fl.GetRegsSlotsCount() - fl.GetCalleeRegistersCount(false) - calleeVregsMask.Count();
275 GetMasm()->LoadCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(lastCalleeReg * fl.GetSlotSize())));
276 GetMasm()->LoadCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(lastCalleeVreg * fl.GetSlotSize())));
277 } else {
278 // SP either points to the frame's top or frame's top + FPLR slot
279 auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
280 if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
281 // Adjust for FPLR slot
282 spToRegsSlots -= CFrameLayout::GetFpLrSlotsCount();
283 }
284 auto lastCalleeReg = spToRegsSlots + calleeRegsMask.Count();
285 auto lastCalleeVreg = spToRegsSlots + fl.GetCalleeRegistersCount(false) + calleeVregsMask.Count();
286 GetMasm()->LoadCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeReg * fl.GetSlotSize())));
287 GetMasm()->LoadCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeVreg * fl.GetSlotSize())));
288 }
289 SET_CFI_OFFSET(popCallees, GetEncoder()->GetCursorOffset());
290
291 // Adjust SP
292 if (frameInfo.GetAdjustSpReg()) {
293 // SP points to the frame's bottom
294 auto spToFrameTopSlots = fl.GetRegsSlotsCount() + CFrameRegs::Start() - CFrameReturnAddr::Start();
295 if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
296 spToFrameTopSlots -= CFrameLayout::GetFpLrSlotsCount();
297 }
298 auto spToFrameTopOffset = spToFrameTopSlots * fl.GetSlotSize();
299 GetEncoder()->EncodeAdd(sp, sp, Imm(spToFrameTopOffset));
300 }
301
302 // Restore FP and LR
303 if (IsOsrMode()) {
304 GetEncoder()->EncodeAdd(sp, sp, Imm(CFrameLayout::GetFpLrSlotsCount() * fl.GetSlotSize()));
305 GetEncoder()->EncodeLdp(fp, lr, false, MemRef(fp, -fl.GetOsrFpLrOffset()));
306 } else if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
307 GetMasm()->PopCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
308 }
309 SET_CFI_OFFSET(popFplr, GetEncoder()->GetCursorOffset());
310
311 GetMasm()->Ret();
312 }
313
GenerateEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)314 void Aarch64CallingConvention::GenerateEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
315 {
316 GenerateEpilogueImpl<false>(frameInfo, postJob);
317 }
318
GenerateNativePrologue(const FrameInfo & frameInfo)319 void Aarch64CallingConvention::GenerateNativePrologue(const FrameInfo &frameInfo)
320 {
321 static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
322 auto encoder = GetEncoder();
323 const CFrameLayout &fl = encoder->GetFrameLayout();
324 auto sp = GetTarget().GetStackReg();
325 auto fp = GetTarget().GetFrameReg();
326 auto lr = GetTarget().GetLinkReg();
327 auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
328
329 // Save FP and LR
330 spToRegsSlots -= SaveFpLr(frameInfo, encoder, fp, lr);
331
332 // 'Native' calling convention requires setting up FP for FastPath calls from IRtoC Interpreter entrypoint
333 if (frameInfo.GetSetupFrame() || ProvideCFI()) {
334 encoder->EncodeMov(fp, sp);
335 SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
336 }
337
338 if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
339 EncodeDynCallMode(frameInfo, encoder);
340 }
341
342 // Save callee-saved registers
343 SaveCalleeSavedRegs(frameInfo, fl, spToRegsSlots, true);
344
345 // Adjust SP
346 if (frameInfo.GetAdjustSpReg()) {
347 auto spToFrameEndOffset = (spToRegsSlots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
348 encoder->EncodeSub(sp, sp, Imm(spToFrameEndOffset));
349 }
350 }
351
GenerateNativeEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)352 void Aarch64CallingConvention::GenerateNativeEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
353 {
354 GenerateEpilogueImpl<true>(frameInfo, postJob);
355 }
356 } // namespace ark::compiler::aarch64
357