1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Low-level calling convention
17 */
18 #include "target/aarch64/target.h"
19
20 namespace panda::compiler::aarch64 {
21
22 constexpr int32_t IMM_2 = 2;
23
Aarch64CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)24 Aarch64CallingConvention::Aarch64CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
25 CallConvMode mode)
26 : CallingConvention(allocator, enc, descr, mode)
27 {
28 }
29
GetParameterInfo(uint8_t regs_offset)30 ParameterInfo *Aarch64CallingConvention::GetParameterInfo(uint8_t regs_offset)
31 {
32 auto param_info = GetAllocator()->New<aarch64::Aarch64ParameterInfo>();
33 for (int i = 0; i < regs_offset; ++i) {
34 param_info->GetNativeParam(INT64_TYPE);
35 }
36 return param_info;
37 }
38
GetCodeEntry()39 void *Aarch64CallingConvention::GetCodeEntry()
40 {
41 return reinterpret_cast<void *>(GetMasm()->GetInstructionAt(0));
42 }
43
GetCodeSize()44 uint32_t Aarch64CallingConvention::GetCodeSize()
45 {
46 return GetMasm()->GetSizeOfCodeGenerated();
47 }
48
PushRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool is_callee)49 size_t Aarch64CallingConvention::PushRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
50 bool is_callee)
51 {
52 if ((regs.GetCount() % IMM_2) == 1) {
53 ASSERT((regs.GetList() & (UINT64_C(1) << vixl::aarch64::xzr.GetCode())) == 0);
54 regs.Combine(vixl::aarch64::xzr);
55 }
56 if ((vregs.GetCount() % IMM_2) == 1) {
57 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
58 uint8_t allignment_vreg = regdescr->GetAlignmentVreg(is_callee);
59 ASSERT((vregs.GetList() & (UINT64_C(1) << allignment_vreg)) == 0);
60 vregs.Combine(allignment_vreg);
61 }
62 GetMasm()->PushCPURegList(vregs);
63 GetMasm()->PushCPURegList(regs);
64 return vregs.GetCount() + regs.GetCount();
65 }
66
PopRegs(vixl::aarch64::CPURegList regs,vixl::aarch64::CPURegList vregs,bool is_callee)67 size_t Aarch64CallingConvention::PopRegs(vixl::aarch64::CPURegList regs, vixl::aarch64::CPURegList vregs,
68 bool is_callee)
69 {
70 if ((regs.GetCount() % IMM_2) == 1) {
71 ASSERT((regs.GetList() & (UINT64_C(1) << vixl::aarch64::xzr.GetCode())) == 0);
72 regs.Combine(vixl::aarch64::xzr);
73 }
74 if ((vregs.GetCount() % IMM_2) == 1) {
75 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
76 uint8_t allignment_vreg = regdescr->GetAlignmentVreg(is_callee);
77 ASSERT((vregs.GetList() & (UINT64_C(1) << allignment_vreg)) == 0);
78 vregs.Combine(allignment_vreg);
79 }
80 GetMasm()->PopCPURegList(regs);
81 GetMasm()->PopCPURegList(vregs);
82 return vregs.GetCount() + regs.GetCount();
83 }
84
GetNativeParam(const TypeInfo & type)85 std::variant<Reg, uint8_t> Aarch64ParameterInfo::GetNativeParam(const TypeInfo &type)
86 {
87 if (type.IsFloat()) {
88 if (current_vector_number_ > MAX_VECTOR_PARAM_ID) {
89 return current_stack_offset_++;
90 }
91 return Reg(current_vector_number_++, type);
92 }
93 if (current_scalar_number_ > MAX_SCALAR_PARAM_ID) {
94 return current_stack_offset_++;
95 }
96 auto ret = Reg(current_scalar_number_++, type);
97 if (type.GetSize() > DOUBLE_WORD_SIZE) {
98 current_scalar_number_++;
99 }
100 return ret;
101 }
102
GetNextLocation(DataType::Type type)103 Location Aarch64ParameterInfo::GetNextLocation(DataType::Type type)
104 {
105 if (DataType::IsFloatType(type)) {
106 if (current_vector_number_ > MAX_VECTOR_PARAM_ID) {
107 return Location::MakeStackArgument(current_stack_offset_++);
108 }
109 return Location::MakeFpRegister(current_vector_number_++);
110 }
111 if (current_scalar_number_ > MAX_SCALAR_PARAM_ID) {
112 return Location::MakeStackArgument(current_stack_offset_++);
113 }
114 Target target(Arch::AARCH64);
115 return Location::MakeRegister(target.GetParamRegId(current_scalar_number_++));
116 }
117
InitFlagsReg(bool has_float_regs)118 Reg Aarch64CallingConvention::InitFlagsReg(bool has_float_regs)
119 {
120 auto flags {static_cast<uint64_t>(has_float_regs) << CFrameLayout::HasFloatRegsFlag::START_BIT};
121 auto flags_reg {GetTarget().GetZeroReg()};
122 if (flags != 0U) {
123 flags_reg = GetTarget().GetLinkReg();
124 GetEncoder()->EncodeMov(flags_reg, Imm(flags));
125 }
126 return flags_reg;
127 }
128
129 using vixl::aarch64::CPURegList, vixl::aarch64::CPURegister, vixl::aarch64::MemOperand;
130
GeneratePrologue(const FrameInfo & frame_info)131 void Aarch64CallingConvention::GeneratePrologue(const FrameInfo &frame_info)
132 {
133 static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
134 auto encoder = GetEncoder();
135 const CFrameLayout &fl = encoder->GetFrameLayout();
136 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
137 auto sp = GetTarget().GetStackReg();
138 auto fp = GetTarget().GetFrameReg();
139 auto lr = GetTarget().GetLinkReg();
140 auto sp_to_regs_slots = CFrameLayout::GetTopToRegsSlotsCount();
141
142 // Save FP and LR
143 if (frame_info.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
144 static_assert(CFrameLayout::GetTopToRegsSlotsCount() > CFrameLayout::GetFpLrSlotsCount());
145 GetMasm()->PushCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
146 SET_CFI_OFFSET(push_fplr, encoder->GetCursorOffset());
147 sp_to_regs_slots -= CFrameLayout::GetFpLrSlotsCount();
148 }
149
150 // Setup FP
151 if (frame_info.GetSetupFrame() || ProvideCFI()) {
152 // If SetupFrame flag is set, then SaveFrameAndLinkRegs must be set also.
153 // These are separate flags as it looks like Irtoc does not need frame setup
154 // but requires to save frame and link regs.
155 ASSERT(!frame_info.GetSetupFrame() || frame_info.GetSaveFrameAndLinkRegs());
156 encoder->EncodeMov(fp, sp);
157 SET_CFI_OFFSET(set_fp, encoder->GetCursorOffset());
158 }
159
160 // Reset flags and setup method
161 if (frame_info.GetSetupFrame()) {
162 static_assert(CFrameMethod::End() == CFrameFlags::Start());
163 constexpr int64_t slots_count = CFrameMethod::GetSize() + CFrameFlags::GetSize();
164
165 GetMasm()->Stp(VixlReg(InitFlagsReg(frame_info.GetHasFloatRegs())), // Reset OSR flag and set HasFloatRegsFlag
166 VixlReg(GetTarget().GetParamReg(0)), // Set Method pointer
167 vixl::aarch64::MemOperand(VixlReg(sp), VixlImm(-slots_count * fl.GetSlotSize()),
168 vixl::aarch64::AddrMode::PreIndex));
169 sp_to_regs_slots -= slots_count;
170 }
171
172 // Save callee-saved registers
173 RegMask callee_regs_mask;
174 VRegMask callee_vregs_mask;
175 regdescr->FillUsedCalleeSavedRegisters(&callee_regs_mask, &callee_vregs_mask, frame_info.GetSaveUnusedCalleeRegs());
176 SET_CFI_CALLEE_REGS(callee_regs_mask);
177 SET_CFI_CALLEE_VREGS(callee_vregs_mask);
178 auto last_callee_reg = sp_to_regs_slots + callee_regs_mask.Count();
179 auto last_callee_vreg = sp_to_regs_slots + fl.GetCalleeRegistersCount(false) + callee_vregs_mask.Count();
180 auto callee_regs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, callee_regs_mask.GetValue());
181 auto callee_vregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, callee_vregs_mask.GetValue());
182 GetMasm()->StoreCPURegList(callee_regs, MemOperand(VixlReg(sp), VixlImm(-last_callee_reg * fl.GetSlotSize())));
183 GetMasm()->StoreCPURegList(callee_vregs, MemOperand(VixlReg(sp), VixlImm(-last_callee_vreg * fl.GetSlotSize())));
184 SET_CFI_OFFSET(push_callees, encoder->GetCursorOffset());
185
186 // Adjust SP
187 if (frame_info.GetAdjustSpReg()) {
188 auto sp_to_frame_end_offset = (sp_to_regs_slots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
189 encoder->EncodeSub(sp, sp, Imm(sp_to_frame_end_offset));
190 }
191 }
192
GenerateEpilogue(const FrameInfo & frame_info,std::function<void ()> post_job)193 void Aarch64CallingConvention::GenerateEpilogue(const FrameInfo &frame_info, std::function<void()> post_job)
194 {
195 auto encoder = GetEncoder();
196 const CFrameLayout &fl = encoder->GetFrameLayout();
197 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
198 auto sp = GetTarget().GetStackReg();
199 auto fp = GetTarget().GetFrameReg();
200 auto lr = GetTarget().GetLinkReg();
201
202 if (post_job) {
203 post_job();
204 }
205
206 // Restore callee-registers
207 RegMask callee_regs_mask;
208 VRegMask callee_vregs_mask;
209 regdescr->FillUsedCalleeSavedRegisters(&callee_regs_mask, &callee_vregs_mask, frame_info.GetSaveUnusedCalleeRegs());
210
211 auto callee_regs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, callee_regs_mask.GetValue());
212 auto callee_vregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, callee_vregs_mask.GetValue());
213
214 if (frame_info.GetAdjustSpReg()) {
215 // SP points to the frame's bottom
216 auto last_callee_reg = fl.GetRegsSlotsCount() - callee_regs_mask.Count();
217 auto last_callee_vreg = fl.GetRegsSlotsCount() - fl.GetCalleeRegistersCount(false) - callee_vregs_mask.Count();
218 GetMasm()->LoadCPURegList(callee_regs, MemOperand(VixlReg(sp), VixlImm(last_callee_reg * fl.GetSlotSize())));
219 GetMasm()->LoadCPURegList(callee_vregs, MemOperand(VixlReg(sp), VixlImm(last_callee_vreg * fl.GetSlotSize())));
220 } else {
221 // SP either points to the frame's top or frame's top + FPLR slot
222 auto sp_to_regs_slots = CFrameLayout::GetTopToRegsSlotsCount();
223 if (frame_info.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
224 // Adjust for FPLR slot
225 sp_to_regs_slots -= CFrameLayout::GetFpLrSlotsCount();
226 }
227 auto last_callee_reg = sp_to_regs_slots + callee_regs_mask.Count();
228 auto last_callee_vreg = sp_to_regs_slots + fl.GetCalleeRegistersCount(false) + callee_vregs_mask.Count();
229 GetMasm()->LoadCPURegList(callee_regs, MemOperand(VixlReg(sp), VixlImm(-last_callee_reg * fl.GetSlotSize())));
230 GetMasm()->LoadCPURegList(callee_vregs, MemOperand(VixlReg(sp), VixlImm(-last_callee_vreg * fl.GetSlotSize())));
231 }
232 SET_CFI_OFFSET(pop_callees, encoder->GetCursorOffset());
233
234 // Adjust SP
235 if (frame_info.GetAdjustSpReg()) {
236 // SP points to the frame's bottom
237 auto sp_to_frame_top_slots = fl.GetRegsSlotsCount() + CFrameRegs::Start() - CFrameReturnAddr::Start();
238 if (frame_info.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
239 sp_to_frame_top_slots -= CFrameLayout::GetFpLrSlotsCount();
240 }
241 auto sp_to_frame_top_offset = sp_to_frame_top_slots * fl.GetSlotSize();
242 encoder->EncodeAdd(sp, sp, Imm(sp_to_frame_top_offset));
243 }
244
245 // Restore FP and LR
246 if (IsOsrMode()) {
247 encoder->EncodeAdd(sp, sp, Imm(CFrameLayout::GetFpLrSlotsCount() * fl.GetSlotSize()));
248 encoder->EncodeLdp(fp, lr, false, MemRef(fp, -fl.GetOsrFpLrOffset()));
249 } else if (frame_info.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
250 GetMasm()->PopCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
251 }
252 SET_CFI_OFFSET(pop_fplr, encoder->GetCursorOffset());
253
254 GetMasm()->Ret();
255 }
256
GenerateNativePrologue(const FrameInfo & frame_info)257 void Aarch64CallingConvention::GenerateNativePrologue(const FrameInfo &frame_info)
258 {
259 static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
260 auto encoder = GetEncoder();
261 const CFrameLayout &fl = encoder->GetFrameLayout();
262 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
263 auto sp = GetTarget().GetStackReg();
264 auto fp = GetTarget().GetFrameReg();
265 auto lr = GetTarget().GetLinkReg();
266 auto sp_to_regs_slots = CFrameLayout::GetTopToRegsSlotsCount();
267
268 // Save FP and LR
269 if (frame_info.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
270 static_assert(CFrameLayout::GetTopToRegsSlotsCount() > CFrameLayout::GetFpLrSlotsCount());
271 GetMasm()->PushCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
272 SET_CFI_OFFSET(push_fplr, encoder->GetCursorOffset());
273 sp_to_regs_slots -= CFrameLayout::GetFpLrSlotsCount();
274 }
275
276 // Note that 'Native' calling convention does not reqire setting up FP.
277 // We have to setup FP iff CFI is requested.
278 ASSERT(!frame_info.GetSetupFrame());
279 if (ProvideCFI()) {
280 encoder->EncodeMov(fp, sp);
281 SET_CFI_OFFSET(set_fp, encoder->GetCursorOffset());
282 }
283
284 // Save callee-saved registers
285 RegMask callee_regs_mask;
286 VRegMask callee_vregs_mask;
287 regdescr->FillUsedCalleeSavedRegisters(&callee_regs_mask, &callee_vregs_mask, frame_info.GetSaveUnusedCalleeRegs());
288 SET_CFI_CALLEE_REGS(callee_regs_mask);
289 SET_CFI_CALLEE_VREGS(callee_vregs_mask);
290 auto last_callee_reg = sp_to_regs_slots + callee_regs_mask.Count();
291 auto last_callee_vreg = sp_to_regs_slots + fl.GetCalleeRegistersCount(false) + callee_vregs_mask.Count();
292 auto callee_regs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, callee_regs_mask.GetValue());
293 auto callee_vregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, callee_vregs_mask.GetValue());
294 GetMasm()->StoreCPURegList(callee_regs, MemOperand(VixlReg(sp), VixlImm(-last_callee_reg * fl.GetSlotSize())));
295 GetMasm()->StoreCPURegList(callee_vregs, MemOperand(VixlReg(sp), VixlImm(-last_callee_vreg * fl.GetSlotSize())));
296 SET_CFI_OFFSET(push_callees, encoder->GetCursorOffset());
297
298 // Adjust SP
299 if (frame_info.GetAdjustSpReg()) {
300 auto sp_to_frame_end_offset = (sp_to_regs_slots + fl.GetRegsSlotsCount()) * fl.GetSlotSize();
301 encoder->EncodeSub(sp, sp, Imm(sp_to_frame_end_offset));
302 }
303 }
304
GenerateNativeEpilogue(const FrameInfo & frame_info,std::function<void ()> post_job)305 void Aarch64CallingConvention::GenerateNativeEpilogue(const FrameInfo &frame_info, std::function<void()> post_job)
306 {
307 auto encoder = GetEncoder();
308 const CFrameLayout &fl = encoder->GetFrameLayout();
309 auto regdescr = static_cast<Aarch64RegisterDescription *>(GetRegfile());
310 auto sp = GetTarget().GetStackReg();
311 auto fp = GetTarget().GetFrameReg();
312 auto lr = GetTarget().GetLinkReg();
313
314 if (post_job) {
315 post_job();
316 }
317
318 // Restore callee-registers
319 RegMask callee_regs_mask;
320 VRegMask callee_vregs_mask;
321 regdescr->FillUsedCalleeSavedRegisters(&callee_regs_mask, &callee_vregs_mask, frame_info.GetSaveUnusedCalleeRegs());
322
323 auto callee_regs = CPURegList(CPURegister::kRegister, vixl::aarch64::kXRegSize, callee_regs_mask.GetValue());
324 auto callee_vregs = CPURegList(CPURegister::kVRegister, vixl::aarch64::kXRegSize, callee_vregs_mask.GetValue());
325
326 if (frame_info.GetAdjustSpReg()) {
327 // SP points to the frame's bottom
328 auto last_callee_reg = fl.GetRegsSlotsCount() - callee_regs_mask.Count();
329 auto last_callee_vreg = fl.GetRegsSlotsCount() - fl.GetCalleeRegistersCount(false) - callee_vregs_mask.Count();
330 GetMasm()->LoadCPURegList(callee_regs, MemOperand(VixlReg(sp), VixlImm(last_callee_reg * fl.GetSlotSize())));
331 GetMasm()->LoadCPURegList(callee_vregs, MemOperand(VixlReg(sp), VixlImm(last_callee_vreg * fl.GetSlotSize())));
332 } else {
333 // SP either points to the frame's top or frame's top + FPLR slot
334 auto sp_to_regs_slots = CFrameLayout::GetTopToRegsSlotsCount();
335 if (frame_info.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
336 // Adjust for FPLR slot
337 sp_to_regs_slots -= CFrameLayout::GetFpLrSlotsCount();
338 }
339 auto last_callee_reg = sp_to_regs_slots + callee_regs_mask.Count();
340 auto last_callee_vreg = sp_to_regs_slots + fl.GetCalleeRegistersCount(false) + callee_vregs_mask.Count();
341 GetMasm()->LoadCPURegList(callee_regs, MemOperand(VixlReg(sp), VixlImm(-last_callee_reg * fl.GetSlotSize())));
342 GetMasm()->LoadCPURegList(callee_vregs, MemOperand(VixlReg(sp), VixlImm(-last_callee_vreg * fl.GetSlotSize())));
343 }
344 SET_CFI_OFFSET(pop_callees, encoder->GetCursorOffset());
345
346 // Adjust SP
347 if (frame_info.GetAdjustSpReg()) {
348 // SP points to the frame's bottom
349 auto sp_to_frame_top_slots = fl.GetRegsSlotsCount() + CFrameRegs::Start() - CFrameReturnAddr::Start();
350 if (frame_info.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
351 sp_to_frame_top_slots -= CFrameLayout::GetFpLrSlotsCount();
352 }
353 auto sp_to_frame_top_offset = sp_to_frame_top_slots * fl.GetSlotSize();
354 encoder->EncodeAdd(sp, sp, Imm(sp_to_frame_top_offset));
355 }
356
357 // Restore FP and LR
358 if (IsOsrMode()) {
359 encoder->EncodeAdd(sp, sp, Imm(CFrameLayout::GetFpLrSlotsCount() * fl.GetSlotSize()));
360 encoder->EncodeLdp(fp, lr, false, MemRef(fp, -fl.GetOsrFpLrOffset()));
361 } else if (frame_info.GetSaveFrameAndLinkRegs() || ProvideCFI()) {
362 GetMasm()->PopCPURegList(vixl::aarch64::CPURegList(VixlReg(fp), VixlReg(lr)));
363 }
364 SET_CFI_OFFSET(pop_fplr, encoder->GetCursorOffset());
365
366 GetMasm()->Ret();
367 }
368 } // namespace panda::compiler::aarch64
369