• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Low-level calling convention
17 */
18 #include "target/aarch64/target.h"
19 
20 namespace ark::compiler::aarch64 {
21 
22 constexpr int32_t IMM_2 = 2;
23 
Aarch64CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)24 Aarch64CallingConvention::Aarch64CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
25                                                    CallConvMode mode)
26     : CallingConvention(allocator, enc, descr, mode)
27 {
28 }
29 
GetTarget()30 constexpr auto Aarch64CallingConvention::GetTarget()
31 {
32     return ark::compiler::Target(Arch::AARCH64);
33 }
34 
IsValid() const35 bool Aarch64CallingConvention::IsValid() const
36 {
37     return true;
38 }
39 
GetMasm()40 vixl::aarch64::MacroAssembler *Aarch64CallingConvention::GetMasm()
41 {
42     return (static_cast<Aarch64Encoder *>(GetEncoder()))->GetMasm();
43 }
44 
GetParameterInfo(uint8_t regsOffset)45 ParameterInfo *Aarch64CallingConvention::GetParameterInfo(uint8_t regsOffset)
46 {
47     auto paramInfo = GetAllocator()->New<aarch64::Aarch64ParameterInfo>();
48     ASSERT(paramInfo != nullptr);
49     for (int i = 0; i < regsOffset; ++i) {
50         paramInfo->GetNativeParam(INT64_TYPE);
51     }
52     return paramInfo;
53 }
54 
GetCodeEntry()55 void *Aarch64CallingConvention::GetCodeEntry()
56 {
57     return reinterpret_cast<void *>(GetMasm()->GetInstructionAt(0));
58 }
59 
GetCodeSize()60 uint32_t Aarch64CallingConvention::GetCodeSize()
61 {
62     return GetMasm()->GetSizeOfCodeGenerated();
63 }
64 
PrepareToPushPopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)65 void Aarch64CallingConvention::PrepareToPushPopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
66                                                     bool isCallee)
67 {
68     if ((regs.GetCount() % IMM_2) == 1) {
69         ASSERT((regs.GetList() & (UINT64_C(1) << vixl::aarch64::xzr.GetCode())) == 0);
70         regs.Combine(vixl::aarch64::xzr);
71     }
72     if ((vregs.GetCount() % IMM_2) == 1) {
73         auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
74         uint8_t allignmentVreg = regdescr->GetAlignmentVreg(isCallee);
75         ASSERT((vregs.GetList() & (UINT64_C(1) << allignmentVreg)) == 0);
76         vregs.Combine(allignmentVreg);
77     }
78 }
79 
PushRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)80 size_t Aarch64CallingConvention::PushRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
81                                           bool isCallee)
82 {
83     PrepareToPushPopRegs(regs, vregs, isCallee);
84     GetMasm()->PushCPURegList(vregs);
85     GetMasm()->PushCPURegList(regs);
86     return vregs.GetCount() + regs.GetCount();
87 }
88 
PopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)89 size_t Aarch64CallingConvention::PopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs, bool isCallee)
90 {
91     PrepareToPushPopRegs(regs, vregs, isCallee);
92     GetMasm()->PopCPURegList(regs);
93     GetMasm()->PopCPURegList(vregs);
94     return vregs.GetCount() + regs.GetCount();
95 }
96 
GetNativeParam(const TypeInfo & type)97 std::variant<Reg, uint8_t> Aarch64ParameterInfo::GetNativeParam(const TypeInfo &type)
98 {
99     if (type.IsFloat()) {
100         if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
101             return currentStackOffset_++;
102         }
103         return Reg(currentVectorNumber_++, type);
104     }
105     if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
106         return currentStackOffset_++;
107     }
108     auto ret = Reg(currentScalarNumber_++, type);
109     if (type.GetSize() > DOUBLE_WORD_SIZE) {
110         currentScalarNumber_++;
111     }
112     return ret;
113 }
114 
GetNextLocation(DataType::Type type)115 Location Aarch64ParameterInfo::GetNextLocation(DataType::Type type)
116 {
117     if (DataType::IsFloatType(type)) {
118         if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
119             return Location::MakeStackArgument(currentStackOffset_++);
120         }
121         return Location::MakeFpRegister(currentVectorNumber_++);
122     }
123     if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
124         return Location::MakeStackArgument(currentStackOffset_++);
125     }
126     Target target(Arch::AARCH64);
127     return Location::MakeRegister(target.GetParamRegId(currentScalarNumber_++));
128 }
129 
InitFlagsReg(bool hasFloatRegs)130 Reg Aarch64CallingConvention::InitFlagsReg(bool hasFloatRegs)
131 {
132     auto flags {static_cast<uint64_t>(hasFloatRegs) << CFrameLayout::HasFloatRegsFlag::START_BIT};
133     auto flagsReg {GetTarget().GetZeroReg()};
134     if (flags != 0U) {
135         flagsReg = GetTarget().GetLinkReg();
136         GetEncoder()->EncodeMov(flagsReg, Imm(flags));
137     }
138     return flagsReg;
139 }
140 
141 using vixl::aarch64::CPURegList, vixl::aarch64::CPURegister, vixl::aarch64::MemOperand;
142 
SaveCalleeSavedRegs(const FrameInfo & frameInfo,const CFrameLayout & fl,size_t spToRegsSlots,bool isNative)143 void Aarch64CallingConvention::SaveCalleeSavedRegs(const FrameInfo &frameInfo, const CFrameLayout &fl,
144                                                    size_t spToRegsSlots, bool isNative)
145 {
146     RegMask calleeRegsMask;
147     VRegMask calleeVregsMask;
148     auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
149     bool irtocOptimized = isNative ? GetMode().IsOptIrtoc() : false;
150 
151     regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs(),
152                                            irtocOptimized);
153     SET_CFI_CALLEE_REGS(calleeRegsMask);
154     SET_CFI_CALLEE_VREGS(calleeVregsMask);
155     auto lastCalleeReg = spToRegsSlots + calleeRegsMask.Count();
156     auto lastCalleeVreg = spToRegsSlots + fl.GetCalleeRegistersCount(false) + calleeVregsMask.Count();
157     auto calleeRegs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, calleeRegsMask.GetValue());
158     auto calleeVregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, calleeVregsMask.GetValue());
159     auto sp = GetTarget().GetStackReg();
160     GetMasm()->StoreCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeReg * fl.GetSlotSize())));
161     GetMasm()->StoreCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeVreg * fl.GetSlotSize())));
162     SET_CFI_OFFSET(pushCallees, GetEncoder()->GetCursorOffset());
163 }
164 
SaveFpLr(const FrameInfo & frameInfo,Encoder * encoder,Reg fp,Reg lr)165 size_t Aarch64CallingConvention::SaveFpLr(const FrameInfo &frameInfo, [[maybe_unused]] Encoder *encoder,
166                                           [[maybe_unused]] Reg fp, [[maybe_unused]] Reg lr)
167 {
168     if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
169         static_assert(CFrameLayout::GetTopToRegsSlotsCount() > CFrameLayout::GetFpLrSlotsCount());
170         GetMasm()->PushCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
171         SET_CFI_OFFSET(pushFplr, encoder->GetCursorOffset());
172         return CFrameLayout::GetFpLrSlotsCount();
173     }
174     return 0;
175 }
176 
EncodeDynCallMode(const FrameInfo & frameInfo,Encoder * encoder)177 void Aarch64CallingConvention::EncodeDynCallMode([[maybe_unused]] const FrameInfo &frameInfo, Encoder *encoder)
178 {
179     static_assert(CallConvDynInfo::REG_NUM_ARGS == 1);
180     static_assert(CallConvDynInfo::REG_COUNT == CallConvDynInfo::REG_NUM_ARGS + 1);
181 
182     ASSERT(frameInfo.GetSaveFrameAndLinkRegs());
183 
184     constexpr auto NUM_ACTUAL_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_NUM_ARGS);
185     constexpr auto NUM_EXPECTED_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_COUNT);
186     auto numExpected = GetDynInfo().GetNumExpectedArgs();
187 
188     auto expandDone = encoder->CreateLabel();
189     encoder->EncodeJump(expandDone, NUM_ACTUAL_REG, Imm(numExpected), Condition::GE);
190     encoder->EncodeMov(NUM_EXPECTED_REG, Imm(numExpected));
191 
192     MemRef expandEntrypoint(Reg(GetThreadReg(Arch::AARCH64), GetTarget().GetPtrRegType()),
193                             GetDynInfo().GetExpandEntrypointTlsOffset());
194     GetEncoder()->MakeCall(expandEntrypoint);
195     encoder->BindLabel(expandDone);
196 }
197 
GeneratePrologue(const FrameInfo & frameInfo)198 void Aarch64CallingConvention::GeneratePrologue(const FrameInfo &frameInfo)
199 {
200     static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
201     auto encoder = GetEncoder();
202     const CFrameLayout &fl = encoder->GetFrameLayout();
203     auto sp = GetTarget().GetStackReg();
204     auto fp = GetTarget().GetFrameReg();
205     auto lr = GetTarget().GetLinkReg();
206     auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
207 
208     // Save FP and LR
209     spToRegsSlots -= SaveFpLr(frameInfo, encoder, fp, lr);
210 
211     // Setup FP
212     if (frameInfo.GetSetupFrame() || ProvideCFI()) {
213         // If SetupFrame flag is set, then SaveFrameAndLinkRegs must be set also.
214         // These are separate flags as it looks like Irtoc does not need frame setup
215         // but requires to save frame and link regs.
216         ASSERT(!frameInfo.GetSetupFrame() || frameInfo.GetSaveFrameAndLinkRegs());
217         encoder->EncodeMov(fp, sp);
218         SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
219     }
220 
221     if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
222         EncodeDynCallMode(frameInfo, encoder);
223     }
224 
225     // Reset flags and setup method
226     if (frameInfo.GetSetupFrame()) {
227         static_assert(CFrameMethod::End() == CFrameFlags::Start());
228         constexpr int64_t SLOTS_COUNT = CFrameMethod::GetSize() + CFrameFlags::GetSize();
229 
230         GetMasm()->Stp(VixlReg(InitFlagsReg(frameInfo.GetHasFloatRegs())),  // Reset OSR flag and set HasFloatRegsFlag
231                        VixlReg(GetTarget().GetParamReg(0)),                 // Set Method pointer
232                        vixl::aarch64::MemOperand(VixlReg(sp), VixlImm(-SLOTS_COUNT * fl.GetSlotSize()),
233                                                  vixl::aarch64::AddrMode::PreIndex));
234         spToRegsSlots -= SLOTS_COUNT;
235     }
236 
237     SaveCalleeSavedRegs(frameInfo, fl, spToRegsSlots, false);
238 
239     // Adjust SP
240     if (frameInfo.GetAdjustSpReg()) {
241         auto spToFrameEndOffset = (spToRegsSlots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
242         encoder->EncodeSub(sp, sp, Imm(spToFrameEndOffset));
243     }
244 }
245 
246 template <bool IS_NATIVE>
GenerateEpilogueImpl(const FrameInfo & frameInfo,const std::function<void ()> & postJob)247 void Aarch64CallingConvention::GenerateEpilogueImpl(const FrameInfo &frameInfo, const std::function<void()> &postJob)
248 {
249     const CFrameLayout &fl = GetEncoder()->GetFrameLayout();
250     auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
251     auto sp = GetTarget().GetStackReg();
252     auto fp = GetTarget().GetFrameReg();
253     auto lr = GetTarget().GetLinkReg();
254 
255     if (postJob) {
256         postJob();
257     }
258 
259     // Restore callee-registers
260     RegMask calleeRegsMask;
261     VRegMask calleeVregsMask;
262     if constexpr (IS_NATIVE) {
263         regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs(),
264                                                GetMode().IsOptIrtoc());
265     } else {
266         regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs());
267     }
268 
269     auto calleeRegs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, calleeRegsMask.GetValue());
270     auto calleeVregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, calleeVregsMask.GetValue());
271 
272     if (frameInfo.GetAdjustSpReg()) {
273         // SP points to the frame's bottom
274         auto lastCalleeReg = fl.GetRegsSlotsCount() - calleeRegsMask.Count();
275         auto lastCalleeVreg = fl.GetRegsSlotsCount() - fl.GetCalleeRegistersCount(false) - calleeVregsMask.Count();
276         GetMasm()->LoadCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(lastCalleeReg * fl.GetSlotSize())));
277         GetMasm()->LoadCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(lastCalleeVreg * fl.GetSlotSize())));
278     } else {
279         // SP either points to the frame's top or frame's top + FPLR slot
280         auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
281         if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
282             // Adjust for FPLR slot
283             spToRegsSlots -= CFrameLayout::GetFpLrSlotsCount();
284         }
285         auto lastCalleeReg = spToRegsSlots + calleeRegsMask.Count();
286         auto lastCalleeVreg = spToRegsSlots + fl.GetCalleeRegistersCount(false) + calleeVregsMask.Count();
287         GetMasm()->LoadCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeReg * fl.GetSlotSize())));
288         GetMasm()->LoadCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeVreg * fl.GetSlotSize())));
289     }
290     SET_CFI_OFFSET(popCallees, GetEncoder()->GetCursorOffset());
291 
292     // Adjust SP
293     if (frameInfo.GetAdjustSpReg()) {
294         // SP points to the frame's bottom
295         auto spToFrameTopSlots =
296             fl.GetRegsSlotsCount() + static_cast<size_t>(CFrameRegs::Start() - CFrameReturnAddr::Start());
297         if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
298             spToFrameTopSlots -= CFrameLayout::GetFpLrSlotsCount();
299         }
300         auto spToFrameTopOffset = spToFrameTopSlots * fl.GetSlotSize();
301         GetEncoder()->EncodeAdd(sp, sp, Imm(spToFrameTopOffset));
302     }
303 
304     // Restore FP and LR
305     if (IsOsrMode()) {
306         GetEncoder()->EncodeAdd(sp, sp, Imm(CFrameLayout::GetFpLrSlotsCount() * fl.GetSlotSize()));
307         GetEncoder()->EncodeLdp(fp, lr, false, MemRef(fp, -fl.GetOsrFpLrOffset()));
308     } else if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
309         GetMasm()->PopCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
310     }
311     SET_CFI_OFFSET(popFplr, GetEncoder()->GetCursorOffset());
312 
313     GetMasm()->Ret();
314 }
315 
GenerateEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)316 void Aarch64CallingConvention::GenerateEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
317 {
318     GenerateEpilogueImpl<false>(frameInfo, postJob);
319 }
320 
GenerateNativePrologue(const FrameInfo & frameInfo)321 void Aarch64CallingConvention::GenerateNativePrologue(const FrameInfo &frameInfo)
322 {
323     static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
324     auto encoder = GetEncoder();
325     const CFrameLayout &fl = encoder->GetFrameLayout();
326     auto sp = GetTarget().GetStackReg();
327     auto fp = GetTarget().GetFrameReg();
328     auto lr = GetTarget().GetLinkReg();
329     auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
330 
331     // Save FP and LR
332     spToRegsSlots -= SaveFpLr(frameInfo, encoder, fp, lr);
333 
334     // 'Native' calling convention requires setting up FP for FastPath calls from IRtoC Interpreter entrypoint
335     if (frameInfo.GetSetupFrame() || ProvideCFI()) {
336         encoder->EncodeMov(fp, sp);
337         SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
338     }
339 
340     if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
341         EncodeDynCallMode(frameInfo, encoder);
342     }
343 
344     // Save callee-saved registers
345     SaveCalleeSavedRegs(frameInfo, fl, spToRegsSlots, true);
346 
347     // Adjust SP
348     if (frameInfo.GetAdjustSpReg()) {
349         auto spToFrameEndOffset = (spToRegsSlots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
350         encoder->EncodeSub(sp, sp, Imm(spToFrameEndOffset));
351     }
352 }
353 
GenerateNativeEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)354 void Aarch64CallingConvention::GenerateNativeEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
355 {
356     GenerateEpilogueImpl<true>(frameInfo, postJob);
357 }
358 }  // namespace ark::compiler::aarch64
359