1 /*
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Low-level calling convention
17 */
18 #include "target/aarch64/target.h"
19
20 namespace ark::compiler::aarch64 {
21
22 constexpr int32_t IMM_2 = 2;
23
Aarch64CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)24 Aarch64CallingConvention::Aarch64CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
25 CallConvMode mode)
26 : CallingConvention(allocator, enc, descr, mode)
27 {
28 }
29
GetTarget()30 constexpr auto Aarch64CallingConvention::GetTarget()
31 {
32 return ark::compiler::Target(Arch::AARCH64);
33 }
34
IsValid() const35 bool Aarch64CallingConvention::IsValid() const
36 {
37 return true;
38 }
39
GetMasm()40 vixl::aarch64::MacroAssembler *Aarch64CallingConvention::GetMasm()
41 {
42 return (static_cast<Aarch64Encoder *>(GetEncoder()))->GetMasm();
43 }
44
GetParameterInfo(uint8_t regsOffset)45 ParameterInfo *Aarch64CallingConvention::GetParameterInfo(uint8_t regsOffset)
46 {
47 auto paramInfo = GetAllocator()->New<aarch64::Aarch64ParameterInfo>();
48 ASSERT(paramInfo != nullptr);
49 for (int i = 0; i < regsOffset; ++i) {
50 paramInfo->GetNativeParam(INT64_TYPE);
51 }
52 return paramInfo;
53 }
54
GetCodeEntry()55 void *Aarch64CallingConvention::GetCodeEntry()
56 {
57 return reinterpret_cast<void *>(GetMasm()->GetInstructionAt(0));
58 }
59
GetCodeSize()60 uint32_t Aarch64CallingConvention::GetCodeSize()
61 {
62 return GetMasm()->GetSizeOfCodeGenerated();
63 }
64
PrepareToPushPopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)65 void Aarch64CallingConvention::PrepareToPushPopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
66 bool isCallee)
67 {
68 if ((regs.GetCount() % IMM_2) == 1) {
69 ASSERT((regs.GetList() & (UINT64_C(1) << vixl::aarch64::xzr.GetCode())) == 0);
70 regs.Combine(vixl::aarch64::xzr);
71 }
72 if ((vregs.GetCount() % IMM_2) == 1) {
73 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
74 uint8_t allignmentVreg = regdescr->GetAlignmentVreg(isCallee);
75 ASSERT((vregs.GetList() & (UINT64_C(1) << allignmentVreg)) == 0);
76 vregs.Combine(allignmentVreg);
77 }
78 }
79
PushRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)80 size_t Aarch64CallingConvention::PushRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
81 bool isCallee)
82 {
83 PrepareToPushPopRegs(regs, vregs, isCallee);
84 GetMasm()->PushCPURegList(vregs);
85 GetMasm()->PushCPURegList(regs);
86 return vregs.GetCount() + regs.GetCount();
87 }
88
PopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)89 size_t Aarch64CallingConvention::PopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs, bool isCallee)
90 {
91 PrepareToPushPopRegs(regs, vregs, isCallee);
92 GetMasm()->PopCPURegList(regs);
93 GetMasm()->PopCPURegList(vregs);
94 return vregs.GetCount() + regs.GetCount();
95 }
96
GetNativeParam(const TypeInfo & type)97 std::variant<Reg, uint8_t> Aarch64ParameterInfo::GetNativeParam(const TypeInfo &type)
98 {
99 if (type.IsFloat()) {
100 if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
101 return currentStackOffset_++;
102 }
103 return Reg(currentVectorNumber_++, type);
104 }
105 if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
106 return currentStackOffset_++;
107 }
108 auto ret = Reg(currentScalarNumber_++, type);
109 if (type.GetSize() > DOUBLE_WORD_SIZE) {
110 currentScalarNumber_++;
111 }
112 return ret;
113 }
114
GetNextLocation(DataType::Type type)115 Location Aarch64ParameterInfo::GetNextLocation(DataType::Type type)
116 {
117 if (DataType::IsFloatType(type)) {
118 if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
119 return Location::MakeStackArgument(currentStackOffset_++);
120 }
121 return Location::MakeFpRegister(currentVectorNumber_++);
122 }
123 if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
124 return Location::MakeStackArgument(currentStackOffset_++);
125 }
126 Target target(Arch::AARCH64);
127 return Location::MakeRegister(target.GetParamRegId(currentScalarNumber_++));
128 }
129
InitFlagsReg(bool hasFloatRegs)130 Reg Aarch64CallingConvention::InitFlagsReg(bool hasFloatRegs)
131 {
132 auto flags {static_cast<uint64_t>(hasFloatRegs) << CFrameLayout::HasFloatRegsFlag::START_BIT};
133 auto flagsReg {GetTarget().GetZeroReg()};
134 if (flags != 0U) {
135 flagsReg = GetTarget().GetLinkReg();
136 GetEncoder()->EncodeMov(flagsReg, Imm(flags));
137 }
138 return flagsReg;
139 }
140
141 using vixl::aarch64::CPURegList, vixl::aarch64::CPURegister, vixl::aarch64::MemOperand;
142
SaveCalleeSavedRegs(const FrameInfo & frameInfo,const CFrameLayout & fl,size_t spToRegsSlots,bool isNative)143 void Aarch64CallingConvention::SaveCalleeSavedRegs(const FrameInfo &frameInfo, const CFrameLayout &fl,
144 size_t spToRegsSlots, bool isNative)
145 {
146 RegMask calleeRegsMask;
147 VRegMask calleeVregsMask;
148 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
149 bool irtocOptimized = isNative ? GetMode().IsOptIrtoc() : false;
150
151 regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs(),
152 irtocOptimized);
153 SET_CFI_CALLEE_REGS(calleeRegsMask);
154 SET_CFI_CALLEE_VREGS(calleeVregsMask);
155 auto lastCalleeReg = spToRegsSlots + calleeRegsMask.Count();
156 auto lastCalleeVreg = spToRegsSlots + fl.GetCalleeRegistersCount(false) + calleeVregsMask.Count();
157 auto calleeRegs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, calleeRegsMask.GetValue());
158 auto calleeVregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, calleeVregsMask.GetValue());
159 auto sp = GetTarget().GetStackReg();
160 GetMasm()->StoreCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeReg * fl.GetSlotSize())));
161 GetMasm()->StoreCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeVreg * fl.GetSlotSize())));
162 SET_CFI_OFFSET(pushCallees, GetEncoder()->GetCursorOffset());
163 }
164
SaveFpLr(const FrameInfo & frameInfo,Encoder * encoder,Reg fp,Reg lr)165 size_t Aarch64CallingConvention::SaveFpLr(const FrameInfo &frameInfo, [[maybe_unused]] Encoder *encoder,
166 [[maybe_unused]] Reg fp, [[maybe_unused]] Reg lr)
167 {
168 if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
169 static_assert(CFrameLayout::GetTopToRegsSlotsCount() > CFrameLayout::GetFpLrSlotsCount());
170 GetMasm()->PushCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
171 SET_CFI_OFFSET(pushFplr, encoder->GetCursorOffset());
172 return CFrameLayout::GetFpLrSlotsCount();
173 }
174 return 0;
175 }
176
EncodeDynCallMode(const FrameInfo & frameInfo,Encoder * encoder)177 void Aarch64CallingConvention::EncodeDynCallMode([[maybe_unused]] const FrameInfo &frameInfo, Encoder *encoder)
178 {
179 static_assert(CallConvDynInfo::REG_NUM_ARGS == 1);
180 static_assert(CallConvDynInfo::REG_COUNT == CallConvDynInfo::REG_NUM_ARGS + 1);
181
182 ASSERT(frameInfo.GetSaveFrameAndLinkRegs());
183
184 constexpr auto NUM_ACTUAL_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_NUM_ARGS);
185 constexpr auto NUM_EXPECTED_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_COUNT);
186 auto numExpected = GetDynInfo().GetNumExpectedArgs();
187
188 auto expandDone = encoder->CreateLabel();
189 encoder->EncodeJump(expandDone, NUM_ACTUAL_REG, Imm(numExpected), Condition::GE);
190 encoder->EncodeMov(NUM_EXPECTED_REG, Imm(numExpected));
191
192 MemRef expandEntrypoint(Reg(GetThreadReg(Arch::AARCH64), GetTarget().GetPtrRegType()),
193 GetDynInfo().GetExpandEntrypointTlsOffset());
194 GetEncoder()->MakeCall(expandEntrypoint);
195 encoder->BindLabel(expandDone);
196 }
197
GeneratePrologue(const FrameInfo & frameInfo)198 void Aarch64CallingConvention::GeneratePrologue(const FrameInfo &frameInfo)
199 {
200 static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
201 auto encoder = GetEncoder();
202 const CFrameLayout &fl = encoder->GetFrameLayout();
203 auto sp = GetTarget().GetStackReg();
204 auto fp = GetTarget().GetFrameReg();
205 auto lr = GetTarget().GetLinkReg();
206 auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
207
208 // Save FP and LR
209 spToRegsSlots -= SaveFpLr(frameInfo, encoder, fp, lr);
210
211 // Setup FP
212 if (frameInfo.GetSetupFrame() || ProvideCFI()) {
213 // If SetupFrame flag is set, then SaveFrameAndLinkRegs must be set also.
214 // These are separate flags as it looks like Irtoc does not need frame setup
215 // but requires to save frame and link regs.
216 ASSERT(!frameInfo.GetSetupFrame() || frameInfo.GetSaveFrameAndLinkRegs());
217 encoder->EncodeMov(fp, sp);
218 SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
219 }
220
221 if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
222 EncodeDynCallMode(frameInfo, encoder);
223 }
224
225 // Reset flags and setup method
226 if (frameInfo.GetSetupFrame()) {
227 static_assert(CFrameMethod::End() == CFrameFlags::Start());
228 constexpr int64_t SLOTS_COUNT = CFrameMethod::GetSize() + CFrameFlags::GetSize();
229
230 GetMasm()->Stp(VixlReg(InitFlagsReg(frameInfo.GetHasFloatRegs())), // Reset OSR flag and set HasFloatRegsFlag
231 VixlReg(GetTarget().GetParamReg(0)), // Set Method pointer
232 vixl::aarch64::MemOperand(VixlReg(sp), VixlImm(-SLOTS_COUNT * fl.GetSlotSize()),
233 vixl::aarch64::AddrMode::PreIndex));
234 spToRegsSlots -= SLOTS_COUNT;
235 }
236
237 SaveCalleeSavedRegs(frameInfo, fl, spToRegsSlots, false);
238
239 // Adjust SP
240 if (frameInfo.GetAdjustSpReg()) {
241 auto spToFrameEndOffset = (spToRegsSlots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
242 encoder->EncodeSub(sp, sp, Imm(spToFrameEndOffset));
243 }
244 }
245
246 template <bool IS_NATIVE>
GenerateEpilogueImpl(const FrameInfo & frameInfo,const std::function<void ()> & postJob)247 void Aarch64CallingConvention::GenerateEpilogueImpl(const FrameInfo &frameInfo, const std::function<void()> &postJob)
248 {
249 const CFrameLayout &fl = GetEncoder()->GetFrameLayout();
250 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
251 auto sp = GetTarget().GetStackReg();
252 auto fp = GetTarget().GetFrameReg();
253 auto lr = GetTarget().GetLinkReg();
254
255 if (postJob) {
256 postJob();
257 }
258
259 // Restore callee-registers
260 RegMask calleeRegsMask;
261 VRegMask calleeVregsMask;
262 if constexpr (IS_NATIVE) {
263 regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs(),
264 GetMode().IsOptIrtoc());
265 } else {
266 regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs());
267 }
268
269 auto calleeRegs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, calleeRegsMask.GetValue());
270 auto calleeVregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, calleeVregsMask.GetValue());
271
272 if (frameInfo.GetAdjustSpReg()) {
273 // SP points to the frame's bottom
274 auto lastCalleeReg = fl.GetRegsSlotsCount() - calleeRegsMask.Count();
275 auto lastCalleeVreg = fl.GetRegsSlotsCount() - fl.GetCalleeRegistersCount(false) - calleeVregsMask.Count();
276 GetMasm()->LoadCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(lastCalleeReg * fl.GetSlotSize())));
277 GetMasm()->LoadCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(lastCalleeVreg * fl.GetSlotSize())));
278 } else {
279 // SP either points to the frame's top or frame's top + FPLR slot
280 auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
281 if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
282 // Adjust for FPLR slot
283 spToRegsSlots -= CFrameLayout::GetFpLrSlotsCount();
284 }
285 auto lastCalleeReg = spToRegsSlots + calleeRegsMask.Count();
286 auto lastCalleeVreg = spToRegsSlots + fl.GetCalleeRegistersCount(false) + calleeVregsMask.Count();
287 GetMasm()->LoadCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeReg * fl.GetSlotSize())));
288 GetMasm()->LoadCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeVreg * fl.GetSlotSize())));
289 }
290 SET_CFI_OFFSET(popCallees, GetEncoder()->GetCursorOffset());
291
292 // Adjust SP
293 if (frameInfo.GetAdjustSpReg()) {
294 // SP points to the frame's bottom
295 auto spToFrameTopSlots =
296 fl.GetRegsSlotsCount() + static_cast<size_t>(CFrameRegs::Start() - CFrameReturnAddr::Start());
297 if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
298 spToFrameTopSlots -= CFrameLayout::GetFpLrSlotsCount();
299 }
300 auto spToFrameTopOffset = spToFrameTopSlots * fl.GetSlotSize();
301 GetEncoder()->EncodeAdd(sp, sp, Imm(spToFrameTopOffset));
302 }
303
304 // Restore FP and LR
305 if (IsOsrMode()) {
306 GetEncoder()->EncodeAdd(sp, sp, Imm(CFrameLayout::GetFpLrSlotsCount() * fl.GetSlotSize()));
307 GetEncoder()->EncodeLdp(fp, lr, false, MemRef(fp, -fl.GetOsrFpLrOffset()));
308 } else if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
309 GetMasm()->PopCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
310 }
311 SET_CFI_OFFSET(popFplr, GetEncoder()->GetCursorOffset());
312
313 GetMasm()->Ret();
314 }
315
GenerateEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)316 void Aarch64CallingConvention::GenerateEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
317 {
318 GenerateEpilogueImpl<false>(frameInfo, postJob);
319 }
320
GenerateNativePrologue(const FrameInfo & frameInfo)321 void Aarch64CallingConvention::GenerateNativePrologue(const FrameInfo &frameInfo)
322 {
323 static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
324 auto encoder = GetEncoder();
325 const CFrameLayout &fl = encoder->GetFrameLayout();
326 auto sp = GetTarget().GetStackReg();
327 auto fp = GetTarget().GetFrameReg();
328 auto lr = GetTarget().GetLinkReg();
329 auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
330
331 // Save FP and LR
332 spToRegsSlots -= SaveFpLr(frameInfo, encoder, fp, lr);
333
334 // 'Native' calling convention requires setting up FP for FastPath calls from IRtoC Interpreter entrypoint
335 if (frameInfo.GetSetupFrame() || ProvideCFI()) {
336 encoder->EncodeMov(fp, sp);
337 SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
338 }
339
340 if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
341 EncodeDynCallMode(frameInfo, encoder);
342 }
343
344 // Save callee-saved registers
345 SaveCalleeSavedRegs(frameInfo, fl, spToRegsSlots, true);
346
347 // Adjust SP
348 if (frameInfo.GetAdjustSpReg()) {
349 auto spToFrameEndOffset = (spToRegsSlots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
350 encoder->EncodeSub(sp, sp, Imm(spToFrameEndOffset));
351 }
352 }
353
GenerateNativeEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)354 void Aarch64CallingConvention::GenerateNativeEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
355 {
356 GenerateEpilogueImpl<true>(frameInfo, postJob);
357 }
358 } // namespace ark::compiler::aarch64
359