1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Low-level calling convention
17 */
18 #include <cmath>
19 #include "target/aarch32/target.h"
20
21 namespace panda::compiler::aarch32 {
22 using vixl::aarch32::RegisterList;
23 using vixl::aarch32::SRegister;
24 using vixl::aarch32::SRegisterList;
25
Aarch32CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)26 Aarch32CallingConvention::Aarch32CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
27 CallConvMode mode)
28 : CallingConvention(allocator, enc, descr, mode)
29 {
30 }
31
GetParameterInfo(uint8_t regs_offset)32 ParameterInfo *Aarch32CallingConvention::GetParameterInfo(uint8_t regs_offset)
33 {
34 auto param_info = GetAllocator()->New<aarch32::Aarch32ParameterInfo>();
35 for (int i = 0; i < regs_offset; ++i) {
36 param_info->GetNativeParam(INT32_TYPE);
37 }
38 return param_info;
39 }
40
GetCodeEntry()41 void *Aarch32CallingConvention::GetCodeEntry()
42 {
43 auto res = GetMasm()->GetBuffer()->GetOffsetAddress<uint32_t *>(0);
44 return reinterpret_cast<void *>(res);
45 }
46
GetCodeSize()47 uint32_t Aarch32CallingConvention::GetCodeSize()
48 {
49 return GetMasm()->GetSizeOfCodeGenerated();
50 }
51
PushPopVRegs(VRegMask vregs,bool is_push=true)52 uint8_t Aarch32CallingConvention::PushPopVRegs(VRegMask vregs, bool is_push = true)
53 {
54 int8_t first = -1;
55 uint8_t size = 0;
56 bool is_sequential = true;
57 for (size_t i = 0; i < vregs.size(); ++i) {
58 if (-1 == first && vregs.test(i)) {
59 first = i;
60 ++size;
61 continue;
62 }
63 if (vregs.test(i)) {
64 if (!vregs.test(i - 1)) {
65 is_sequential = false;
66 break;
67 }
68 ++size;
69 }
70 }
71 if (first == -1) {
72 ASSERT(size == 0);
73 return 0;
74 }
75
76 if (is_sequential) {
77 auto reg_list = vixl::aarch32::SRegisterList(vixl::aarch32::SRegister(first), size);
78 if (is_push) {
79 GetMasm()->Vpush(reg_list);
80 } else {
81 GetMasm()->Vpop(reg_list);
82 }
83 return size;
84 }
85
86 uint32_t real_offset = 0;
87 if (is_push) {
88 for (int32_t i = vregs.size() - 1; i >= 0; --i) {
89 if (vregs.test(i)) {
90 GetMasm()->PushRegister(VixlVReg(Reg(i, FLOAT32_TYPE)).S());
91 ++real_offset;
92 }
93 }
94 } else {
95 constexpr auto VREG_SIZE = 1;
96 for (size_t i = 0; i < vregs.size(); ++i) {
97 if (vregs.test(i)) {
98 GetMasm()->Vpop(vixl::aarch32::SRegisterList(VixlVReg(Reg(i, FLOAT32_TYPE)).S(), VREG_SIZE));
99 ++real_offset;
100 }
101 }
102 }
103 return real_offset;
104 }
105
PushRegs(RegMask regs,VRegMask vregs,bool is_callee)106 uint8_t Aarch32CallingConvention::PushRegs(RegMask regs, VRegMask vregs, bool is_callee)
107 {
108 auto regdescr = static_cast<Aarch32RegisterDescription *>(GetRegfile());
109 auto fp = GetTarget().GetFrameReg().GetId();
110 if (regs.test(fp)) {
111 regs.reset(fp);
112 }
113 auto lr = GetTarget().GetLinkReg().GetId();
114 if (regs.test(lr)) {
115 regs.reset(lr);
116 }
117
118 uint8_t real_offset = 0;
119 uint32_t saved_registers_mask = 0;
120
121 for (size_t i = 0; i < regs.size(); ++i) {
122 if (regs.test(i)) {
123 saved_registers_mask |= 1UL << i;
124 ++real_offset;
125 }
126 }
127
128 if (((regs.count() + vregs.count()) & 1U) == 1) {
129 // TODO(igorban) move them to Sub(sp)
130 uint8_t align_reg = regdescr->GetAligmentReg(is_callee);
131 GetMasm()->PushRegister(vixl::aarch32::Register(align_reg));
132 ++real_offset;
133 }
134
135 if (saved_registers_mask != 0) {
136 GetMasm()->Push(vixl::aarch32::RegisterList(saved_registers_mask));
137 }
138 real_offset += PushPopVRegs(vregs, true);
139 ASSERT((real_offset & 1U) == 0);
140
141 return real_offset;
142 }
143
PopRegs(RegMask regs,VRegMask vregs,bool is_callee)144 uint8_t Aarch32CallingConvention::PopRegs(RegMask regs, VRegMask vregs, bool is_callee)
145 {
146 auto regdescr = static_cast<Aarch32RegisterDescription *>(GetRegfile());
147
148 auto fp = GetTarget().GetFrameReg().GetId();
149 if (regs.test(fp)) {
150 regs.reset(fp);
151 }
152 auto lr = GetTarget().GetLinkReg().GetId();
153 if (regs.test(lr)) {
154 regs.reset(lr);
155 }
156
157 uint8_t real_offset = 0;
158 real_offset += PushPopVRegs(vregs, false);
159
160 uint32_t saved_registers_mask = 0;
161
162 for (size_t i = 0; i < regs.size(); ++i) {
163 if (regs.test(i)) {
164 saved_registers_mask |= 1UL << i;
165 ++real_offset;
166 }
167 }
168
169 if (saved_registers_mask != 0) {
170 GetMasm()->Pop(vixl::aarch32::RegisterList(saved_registers_mask));
171 }
172
173 if (((regs.count() + vregs.count()) & 1U) == 1) {
174 uint8_t align_reg = regdescr->GetAligmentReg(is_callee);
175 GetMasm()->Pop(vixl::aarch32::Register(align_reg));
176 ++real_offset;
177 }
178 ASSERT((real_offset & 1U) == 0);
179
180 return real_offset;
181 }
182
GetNativeParam(const TypeInfo & type)183 std::variant<Reg, uint8_t> Aarch32ParameterInfo::GetNativeParam(const TypeInfo &type)
184 {
185 constexpr int32_t STEP = 2;
186 #if (PANDA_TARGET_ARM32_ABI_HARD)
187 // Use vector registers
188 if (type == FLOAT32_TYPE) {
189 if (current_vector_number_ > MAX_VECTOR_SINGLE_PARAM_ID) {
190 return current_stack_offset_++;
191 }
192 return Reg(current_vector_number_++, FLOAT32_TYPE);
193 }
194 if (type == FLOAT64_TYPE) {
195 // Allignment for 8 bytes (in stack and registers)
196 if ((current_vector_number_ & 1U) == 1) {
197 ++current_vector_number_;
198 }
199 if ((current_vector_number_ >> 1U) > MAX_VECTOR_DOUBLE_PARAM_ID) {
200 if ((current_stack_offset_ & 1U) == 1) {
201 ++current_stack_offset_;
202 }
203 auto stack_offset = current_stack_offset_;
204 current_stack_offset_ += STEP;
205 return stack_offset;
206 }
207 auto vector_number = current_vector_number_;
208 current_vector_number_ += STEP;
209 return Reg(vector_number, FLOAT64_TYPE);
210 }
211 #endif // PANDA_TARGET_ARM32_ABI_HARD
212 if (type.GetSize() == DOUBLE_WORD_SIZE) {
213 if ((current_scalar_number_ & 1U) == 1) {
214 ++current_scalar_number_;
215 }
216 // Allignment for 8 bytes (in stack and registers)
217 if (current_scalar_number_ > MAX_SCALAR_PARAM_ID) {
218 if ((current_stack_offset_ & 1U) == 1) {
219 ++current_stack_offset_;
220 }
221 auto stack_offset = current_stack_offset_;
222 current_stack_offset_ += STEP;
223 return stack_offset;
224 }
225 auto scalar_number = current_scalar_number_;
226 current_scalar_number_ += STEP;
227 return Reg(scalar_number, INT64_TYPE);
228 }
229 if (current_scalar_number_ > MAX_SCALAR_PARAM_ID) {
230 return current_stack_offset_++;
231 }
232 ASSERT(!type.IsFloat() || type == FLOAT32_TYPE);
233 return Reg(current_scalar_number_++, type.IsFloat() ? INT32_TYPE : type);
234 }
235
GetNextLocation(DataType::Type type)236 Location Aarch32ParameterInfo::GetNextLocation(DataType::Type type)
237 {
238 auto res = GetNativeParam(TypeInfo::FromDataType(type, Arch::AARCH32));
239 if (std::holds_alternative<Reg>(res)) {
240 auto reg = std::get<Reg>(res);
241 #if (PANDA_TARGET_ARM32_ABI_SOFT || PANDA_TARGET_ARM32_ABI_SOFTFP)
242 if (DataType::IsFloatType(type)) {
243 return Location::MakeRegister(reg.GetId());
244 }
245 #endif
246 return Location::MakeRegister(reg.GetId(), type);
247 }
248 return Location::MakeStackArgument(std::get<uint8_t>(res));
249 }
250
GeneratePrologue(const FrameInfo & frame_info)251 void Aarch32CallingConvention::GeneratePrologue([[maybe_unused]] const FrameInfo &frame_info)
252 {
253 auto encoder = GetEncoder();
254 ASSERT(encoder->IsValid());
255 ASSERT(encoder->InitMasm());
256 const CFrameLayout &fl = encoder->GetFrameLayout();
257 auto fp_reg = GetTarget().GetFrameReg();
258 auto sp_reg = GetTarget().GetStackReg();
259
260 GetMasm()->Push(RegisterList(vixl::aarch32::r11, vixl::aarch32::lr));
261 SET_CFI_OFFSET(push_fplr, encoder->GetCursorOffset());
262
263 encoder->EncodeMov(fp_reg, sp_reg);
264 SET_CFI_OFFSET(set_fp, encoder->GetCursorOffset());
265 constexpr auto IMM_2 = 2;
266 encoder->EncodeSub(sp_reg, sp_reg, Imm(WORD_SIZE_BYTE * IMM_2));
267 encoder->EncodeStr(GetTarget().GetParamReg(0), MemRef(sp_reg, WORD_SIZE_BYTE));
268
269 // Allocate space for locals
270 auto locals_size = (CFrameSlots::Start() - CFrameData::Start()) * WORD_SIZE_BYTE;
271 encoder->EncodeSub(sp_reg, sp_reg, Imm(locals_size));
272
273 SET_CFI_CALLEE_REGS(GetCalleeRegsMask(Arch::AARCH32, false));
274 SET_CFI_CALLEE_VREGS(GetCalleeRegsMask(Arch::AARCH32, true));
275 GetMasm()->Push(RegisterList(GetCalleeRegsMask(Arch::AARCH32, false).GetValue()));
276 GetMasm()->Vpush(
277 SRegisterList(SRegister(GetFirstCalleeReg(Arch::AARCH32, true)), GetCalleeRegsCount(Arch::AARCH32, true)));
278 SET_CFI_OFFSET(push_callees, encoder->GetCursorOffset());
279
280 // Reset OSR flag and set HasFloatRegsFlag
281 auto callee_regs_size =
282 (GetCalleeRegsCount(Arch::AARCH32, true) + GetCalleeRegsCount(Arch::AARCH32, false)) * WORD_SIZE_BYTE;
283 auto flags {static_cast<uint32_t>(frame_info.GetHasFloatRegs()) << CFrameLayout::HasFloatRegsFlag::START_BIT};
284 encoder->EncodeSti(Imm(flags), MemRef(sp_reg, callee_regs_size + locals_size));
285
286 encoder->EncodeSub(
287 sp_reg, sp_reg,
288 Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
289 WORD_SIZE_BYTE));
290 }
291
GenerateEpilogue(const FrameInfo & frame_info,std::function<void ()>)292 void Aarch32CallingConvention::GenerateEpilogue([[maybe_unused]] const FrameInfo &frame_info,
293 std::function<void()> /* post_job */)
294 {
295 auto encoder = GetEncoder();
296 const CFrameLayout &fl = encoder->GetFrameLayout();
297 auto sp_reg = GetTarget().GetStackReg();
298
299 encoder->EncodeAdd(
300 sp_reg, sp_reg,
301 Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
302 WORD_SIZE_BYTE));
303
304 GetMasm()->Vpop(
305 SRegisterList(SRegister(GetFirstCalleeReg(Arch::AARCH32, true)), GetCalleeRegsCount(Arch::AARCH32, true)));
306 GetMasm()->Pop(RegisterList(GetCalleeRegsMask(Arch::AARCH32, false).GetValue()));
307 SET_CFI_OFFSET(pop_callees, encoder->GetCursorOffset());
308
309 // ARM32 doesn't support OSR mode
310 ASSERT(!IsOsrMode());
311 // Support restoring of LR and FP registers once OSR is supported in arm32
312 static_assert(!ArchTraits<Arch::AARCH32>::SUPPORT_OSR);
313 constexpr auto IMM_2 = 2;
314 encoder->EncodeAdd(sp_reg, sp_reg, Imm(WORD_SIZE_BYTE * IMM_2));
315 encoder->EncodeAdd(sp_reg, sp_reg, Imm(WORD_SIZE_BYTE * (CFrameSlots::Start() - CFrameData::Start())));
316
317 GetMasm()->Pop(RegisterList(vixl::aarch32::r11, vixl::aarch32::lr));
318 SET_CFI_OFFSET(pop_fplr, encoder->GetCursorOffset());
319
320 encoder->EncodeReturn();
321 }
322 } // namespace panda::compiler::aarch32
323