• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Low-level calling convention
17 */
18 #include "target/aarch64/target.h"
19 
20 namespace ark::compiler::aarch64 {
21 
22 constexpr int32_t IMM_2 = 2;
23 
Aarch64CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)24 Aarch64CallingConvention::Aarch64CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
25                                                    CallConvMode mode)
26     : CallingConvention(allocator, enc, descr, mode)
27 {
28 }
29 
GetTarget()30 constexpr auto Aarch64CallingConvention::GetTarget()
31 {
32     return ark::compiler::Target(Arch::AARCH64);
33 }
34 
IsValid() const35 bool Aarch64CallingConvention::IsValid() const
36 {
37     return true;
38 }
39 
GetMasm()40 vixl::aarch64::MacroAssembler *Aarch64CallingConvention::GetMasm()
41 {
42     return (static_cast<Aarch64Encoder *>(GetEncoder()))->GetMasm();
43 }
44 
GetParameterInfo(uint8_t regsOffset)45 ParameterInfo *Aarch64CallingConvention::GetParameterInfo(uint8_t regsOffset)
46 {
47     auto paramInfo = GetAllocator()->New<aarch64::Aarch64ParameterInfo>();
48     for (int i = 0; i < regsOffset; ++i) {
49         paramInfo->GetNativeParam(INT64_TYPE);
50     }
51     return paramInfo;
52 }
53 
GetCodeEntry()54 void *Aarch64CallingConvention::GetCodeEntry()
55 {
56     return reinterpret_cast<void *>(GetMasm()->GetInstructionAt(0));
57 }
58 
GetCodeSize()59 uint32_t Aarch64CallingConvention::GetCodeSize()
60 {
61     return GetMasm()->GetSizeOfCodeGenerated();
62 }
63 
PrepareToPushPopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)64 void Aarch64CallingConvention::PrepareToPushPopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
65                                                     bool isCallee)
66 {
67     if ((regs.GetCount() % IMM_2) == 1) {
68         ASSERT((regs.GetList() & (UINT64_C(1) << vixl::aarch64::xzr.GetCode())) == 0);
69         regs.Combine(vixl::aarch64::xzr);
70     }
71     if ((vregs.GetCount() % IMM_2) == 1) {
72         auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
73         uint8_t allignmentVreg = regdescr->GetAlignmentVreg(isCallee);
74         ASSERT((vregs.GetList() & (UINT64_C(1) << allignmentVreg)) == 0);
75         vregs.Combine(allignmentVreg);
76     }
77 }
78 
PushRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)79 size_t Aarch64CallingConvention::PushRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
80                                           bool isCallee)
81 {
82     PrepareToPushPopRegs(regs, vregs, isCallee);
83     GetMasm()->PushCPURegList(vregs);
84     GetMasm()->PushCPURegList(regs);
85     return vregs.GetCount() + regs.GetCount();
86 }
87 
PopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool isCallee)88 size_t Aarch64CallingConvention::PopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs, bool isCallee)
89 {
90     PrepareToPushPopRegs(regs, vregs, isCallee);
91     GetMasm()->PopCPURegList(regs);
92     GetMasm()->PopCPURegList(vregs);
93     return vregs.GetCount() + regs.GetCount();
94 }
95 
GetNativeParam(const TypeInfo & type)96 std::variant<Reg, uint8_t> Aarch64ParameterInfo::GetNativeParam(const TypeInfo &type)
97 {
98     if (type.IsFloat()) {
99         if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
100             return currentStackOffset_++;
101         }
102         return Reg(currentVectorNumber_++, type);
103     }
104     if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
105         return currentStackOffset_++;
106     }
107     auto ret = Reg(currentScalarNumber_++, type);
108     if (type.GetSize() > DOUBLE_WORD_SIZE) {
109         currentScalarNumber_++;
110     }
111     return ret;
112 }
113 
GetNextLocation(DataType::Type type)114 Location Aarch64ParameterInfo::GetNextLocation(DataType::Type type)
115 {
116     if (DataType::IsFloatType(type)) {
117         if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
118             return Location::MakeStackArgument(currentStackOffset_++);
119         }
120         return Location::MakeFpRegister(currentVectorNumber_++);
121     }
122     if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
123         return Location::MakeStackArgument(currentStackOffset_++);
124     }
125     Target target(Arch::AARCH64);
126     return Location::MakeRegister(target.GetParamRegId(currentScalarNumber_++));
127 }
128 
InitFlagsReg(bool hasFloatRegs)129 Reg Aarch64CallingConvention::InitFlagsReg(bool hasFloatRegs)
130 {
131     auto flags {static_cast<uint64_t>(hasFloatRegs) << CFrameLayout::HasFloatRegsFlag::START_BIT};
132     auto flagsReg {GetTarget().GetZeroReg()};
133     if (flags != 0U) {
134         flagsReg = GetTarget().GetLinkReg();
135         GetEncoder()->EncodeMov(flagsReg, Imm(flags));
136     }
137     return flagsReg;
138 }
139 
140 using vixl::aarch64::CPURegList, vixl::aarch64::CPURegister, vixl::aarch64::MemOperand;
141 
SaveCalleeSavedRegs(const FrameInfo & frameInfo,const CFrameLayout & fl,size_t spToRegsSlots,bool isNative)142 void Aarch64CallingConvention::SaveCalleeSavedRegs(const FrameInfo &frameInfo, const CFrameLayout &fl,
143                                                    size_t spToRegsSlots, bool isNative)
144 {
145     RegMask calleeRegsMask;
146     VRegMask calleeVregsMask;
147     auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
148     bool irtocOptimized = isNative ? GetMode().IsOptIrtoc() : false;
149 
150     regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs(),
151                                            irtocOptimized);
152     SET_CFI_CALLEE_REGS(calleeRegsMask);
153     SET_CFI_CALLEE_VREGS(calleeVregsMask);
154     auto lastCalleeReg = spToRegsSlots + calleeRegsMask.Count();
155     auto lastCalleeVreg = spToRegsSlots + fl.GetCalleeRegistersCount(false) + calleeVregsMask.Count();
156     auto calleeRegs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, calleeRegsMask.GetValue());
157     auto calleeVregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, calleeVregsMask.GetValue());
158     auto sp = GetTarget().GetStackReg();
159     GetMasm()->StoreCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeReg * fl.GetSlotSize())));
160     GetMasm()->StoreCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeVreg * fl.GetSlotSize())));
161     SET_CFI_OFFSET(pushCallees, GetEncoder()->GetCursorOffset());
162 }
163 
SaveFpLr(const FrameInfo & frameInfo,Encoder * encoder,Reg fp,Reg lr)164 size_t Aarch64CallingConvention::SaveFpLr(const FrameInfo &frameInfo, [[maybe_unused]] Encoder *encoder,
165                                           [[maybe_unused]] Reg fp, [[maybe_unused]] Reg lr)
166 {
167     if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
168         static_assert(CFrameLayout::GetTopToRegsSlotsCount() > CFrameLayout::GetFpLrSlotsCount());
169         GetMasm()->PushCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
170         SET_CFI_OFFSET(pushFplr, encoder->GetCursorOffset());
171         return CFrameLayout::GetFpLrSlotsCount();
172     }
173     return 0;
174 }
175 
EncodeDynCallMode(const FrameInfo & frameInfo,Encoder * encoder)176 void Aarch64CallingConvention::EncodeDynCallMode([[maybe_unused]] const FrameInfo &frameInfo, Encoder *encoder)
177 {
178     static_assert(CallConvDynInfo::REG_NUM_ARGS == 1);
179     static_assert(CallConvDynInfo::REG_COUNT == CallConvDynInfo::REG_NUM_ARGS + 1);
180 
181     ASSERT(frameInfo.GetSaveFrameAndLinkRegs());
182 
183     constexpr auto NUM_ACTUAL_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_NUM_ARGS);
184     constexpr auto NUM_EXPECTED_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_COUNT);
185     auto numExpected = GetDynInfo().GetNumExpectedArgs();
186 
187     auto expandDone = encoder->CreateLabel();
188     encoder->EncodeJump(expandDone, NUM_ACTUAL_REG, Imm(numExpected), Condition::GE);
189     encoder->EncodeMov(NUM_EXPECTED_REG, Imm(numExpected));
190 
191     MemRef expandEntrypoint(Reg(GetThreadReg(Arch::AARCH64), GetTarget().GetPtrRegType()),
192                             GetDynInfo().GetExpandEntrypointTlsOffset());
193     GetEncoder()->MakeCall(expandEntrypoint);
194     encoder->BindLabel(expandDone);
195 }
196 
GeneratePrologue(const FrameInfo & frameInfo)197 void Aarch64CallingConvention::GeneratePrologue(const FrameInfo &frameInfo)
198 {
199     static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
200     auto encoder = GetEncoder();
201     const CFrameLayout &fl = encoder->GetFrameLayout();
202     auto sp = GetTarget().GetStackReg();
203     auto fp = GetTarget().GetFrameReg();
204     auto lr = GetTarget().GetLinkReg();
205     auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
206 
207     // Save FP and LR
208     spToRegsSlots -= SaveFpLr(frameInfo, encoder, fp, lr);
209 
210     // Setup FP
211     if (frameInfo.GetSetupFrame() || ProvideCFI()) {
212         // If SetupFrame flag is set, then SaveFrameAndLinkRegs must be set also.
213         // These are separate flags as it looks like Irtoc does not need frame setup
214         // but requires to save frame and link regs.
215         ASSERT(!frameInfo.GetSetupFrame() || frameInfo.GetSaveFrameAndLinkRegs());
216         encoder->EncodeMov(fp, sp);
217         SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
218     }
219 
220     if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
221         EncodeDynCallMode(frameInfo, encoder);
222     }
223 
224     // Reset flags and setup method
225     if (frameInfo.GetSetupFrame()) {
226         static_assert(CFrameMethod::End() == CFrameFlags::Start());
227         constexpr int64_t SLOTS_COUNT = CFrameMethod::GetSize() + CFrameFlags::GetSize();
228 
229         GetMasm()->Stp(VixlReg(InitFlagsReg(frameInfo.GetHasFloatRegs())),  // Reset OSR flag and set HasFloatRegsFlag
230                        VixlReg(GetTarget().GetParamReg(0)),                 // Set Method pointer
231                        vixl::aarch64::MemOperand(VixlReg(sp), VixlImm(-SLOTS_COUNT * fl.GetSlotSize()),
232                                                  vixl::aarch64::AddrMode::PreIndex));
233         spToRegsSlots -= SLOTS_COUNT;
234     }
235 
236     SaveCalleeSavedRegs(frameInfo, fl, spToRegsSlots, false);
237 
238     // Adjust SP
239     if (frameInfo.GetAdjustSpReg()) {
240         auto spToFrameEndOffset = (spToRegsSlots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
241         encoder->EncodeSub(sp, sp, Imm(spToFrameEndOffset));
242     }
243 }
244 
245 template <bool IS_NATIVE>
GenerateEpilogueImpl(const FrameInfo & frameInfo,const std::function<void ()> & postJob)246 void Aarch64CallingConvention::GenerateEpilogueImpl(const FrameInfo &frameInfo, const std::function<void()> &postJob)
247 {
248     const CFrameLayout &fl = GetEncoder()->GetFrameLayout();
249     auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
250     auto sp = GetTarget().GetStackReg();
251     auto fp = GetTarget().GetFrameReg();
252     auto lr = GetTarget().GetLinkReg();
253 
254     if (postJob) {
255         postJob();
256     }
257 
258     // Restore callee-registers
259     RegMask calleeRegsMask;
260     VRegMask calleeVregsMask;
261     if constexpr (IS_NATIVE) {
262         regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs(),
263                                                GetMode().IsOptIrtoc());
264     } else {
265         regdescr->FillUsedCalleeSavedRegisters(&calleeRegsMask, &calleeVregsMask, frameInfo.GetSaveUnusedCalleeRegs());
266     }
267 
268     auto calleeRegs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, calleeRegsMask.GetValue());
269     auto calleeVregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, calleeVregsMask.GetValue());
270 
271     if (frameInfo.GetAdjustSpReg()) {
272         // SP points to the frame's bottom
273         auto lastCalleeReg = fl.GetRegsSlotsCount() - calleeRegsMask.Count();
274         auto lastCalleeVreg = fl.GetRegsSlotsCount() - fl.GetCalleeRegistersCount(false) - calleeVregsMask.Count();
275         GetMasm()->LoadCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(lastCalleeReg * fl.GetSlotSize())));
276         GetMasm()->LoadCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(lastCalleeVreg * fl.GetSlotSize())));
277     } else {
278         // SP either points to the frame's top or frame's top + FPLR slot
279         auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
280         if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
281             // Adjust for FPLR slot
282             spToRegsSlots -= CFrameLayout::GetFpLrSlotsCount();
283         }
284         auto lastCalleeReg = spToRegsSlots + calleeRegsMask.Count();
285         auto lastCalleeVreg = spToRegsSlots + fl.GetCalleeRegistersCount(false) + calleeVregsMask.Count();
286         GetMasm()->LoadCPURegList(calleeRegs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeReg * fl.GetSlotSize())));
287         GetMasm()->LoadCPURegList(calleeVregs, MemOperand(VixlReg(sp), VixlImm(-lastCalleeVreg * fl.GetSlotSize())));
288     }
289     SET_CFI_OFFSET(popCallees, GetEncoder()->GetCursorOffset());
290 
291     // Adjust SP
292     if (frameInfo.GetAdjustSpReg()) {
293         // SP points to the frame's bottom
294         auto spToFrameTopSlots = fl.GetRegsSlotsCount() + CFrameRegs::Start() - CFrameReturnAddr::Start();
295         if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
296             spToFrameTopSlots -= CFrameLayout::GetFpLrSlotsCount();
297         }
298         auto spToFrameTopOffset = spToFrameTopSlots * fl.GetSlotSize();
299         GetEncoder()->EncodeAdd(sp, sp, Imm(spToFrameTopOffset));
300     }
301 
302     // Restore FP and LR
303     if (IsOsrMode()) {
304         GetEncoder()->EncodeAdd(sp, sp, Imm(CFrameLayout::GetFpLrSlotsCount() * fl.GetSlotSize()));
305         GetEncoder()->EncodeLdp(fp, lr, false, MemRef(fp, -fl.GetOsrFpLrOffset()));
306     } else if (frameInfo.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
307         GetMasm()->PopCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
308     }
309     SET_CFI_OFFSET(popFplr, GetEncoder()->GetCursorOffset());
310 
311     GetMasm()->Ret();
312 }
313 
GenerateEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)314 void Aarch64CallingConvention::GenerateEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
315 {
316     GenerateEpilogueImpl<false>(frameInfo, postJob);
317 }
318 
GenerateNativePrologue(const FrameInfo & frameInfo)319 void Aarch64CallingConvention::GenerateNativePrologue(const FrameInfo &frameInfo)
320 {
321     static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
322     auto encoder = GetEncoder();
323     const CFrameLayout &fl = encoder->GetFrameLayout();
324     auto sp = GetTarget().GetStackReg();
325     auto fp = GetTarget().GetFrameReg();
326     auto lr = GetTarget().GetLinkReg();
327     auto spToRegsSlots = CFrameLayout::GetTopToRegsSlotsCount();
328 
329     // Save FP and LR
330     spToRegsSlots -= SaveFpLr(frameInfo, encoder, fp, lr);
331 
332     // 'Native' calling convention requires setting up FP for FastPath calls from IRtoC Interpreter entrypoint
333     if (frameInfo.GetSetupFrame() || ProvideCFI()) {
334         encoder->EncodeMov(fp, sp);
335         SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
336     }
337 
338     if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
339         EncodeDynCallMode(frameInfo, encoder);
340     }
341 
342     // Save callee-saved registers
343     SaveCalleeSavedRegs(frameInfo, fl, spToRegsSlots, true);
344 
345     // Adjust SP
346     if (frameInfo.GetAdjustSpReg()) {
347         auto spToFrameEndOffset = (spToRegsSlots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
348         encoder->EncodeSub(sp, sp, Imm(spToFrameEndOffset));
349     }
350 }
351 
GenerateNativeEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)352 void Aarch64CallingConvention::GenerateNativeEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
353 {
354     GenerateEpilogueImpl<true>(frameInfo, postJob);
355 }
356 }  // namespace ark::compiler::aarch64
357