1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Low-level calling convention
17 */
18 #include "target/aarch32/target.h"
19 #include <cmath>
20
21 namespace ark::compiler::aarch32 {
22 using vixl::aarch32::RegisterList;
23 using vixl::aarch32::SRegister;
24 using vixl::aarch32::SRegisterList;
25
26 constexpr size_t MAX_SCALAR_PARAM_ID = 3; // r0-r3
27 [[maybe_unused]] constexpr size_t MAX_VECTOR_SINGLE_PARAM_ID = 15; // s0-s15
28 [[maybe_unused]] constexpr size_t MAX_VECTOR_DOUBLE_PARAM_ID = 7; // d0-d7
29
Aarch32CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)30 Aarch32CallingConvention::Aarch32CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
31 CallConvMode mode)
32 : CallingConvention(allocator, enc, descr, mode)
33 {
34 }
35
GetParameterInfo(uint8_t regsOffset)36 ParameterInfo *Aarch32CallingConvention::GetParameterInfo(uint8_t regsOffset)
37 {
38 auto paramInfo = GetAllocator()->New<aarch32::Aarch32ParameterInfo>();
39 for (int i = 0; i < regsOffset; ++i) {
40 paramInfo->GetNativeParam(INT32_TYPE);
41 }
42 return paramInfo;
43 }
44
GetCodeEntry()45 void *Aarch32CallingConvention::GetCodeEntry()
46 {
47 auto res = GetMasm()->GetBuffer()->GetOffsetAddress<uint32_t *>(0);
48 return reinterpret_cast<void *>(res);
49 }
50
GetCodeSize()51 uint32_t Aarch32CallingConvention::GetCodeSize()
52 {
53 return GetMasm()->GetSizeOfCodeGenerated();
54 }
55
PushPopVRegs(VRegMask vregs,bool isPush=true)56 uint8_t Aarch32CallingConvention::PushPopVRegs(VRegMask vregs, bool isPush = true)
57 {
58 int8_t first = -1;
59 uint8_t size = 0;
60 bool isSequential = true;
61 for (size_t i = 0; i < vregs.size(); ++i) {
62 if (-1 == first && vregs.test(i)) {
63 first = i;
64 ++size;
65 continue;
66 }
67 if (vregs.test(i)) {
68 if (!vregs.test(i - 1)) {
69 isSequential = false;
70 break;
71 }
72 ++size;
73 }
74 }
75 if (first == -1) {
76 ASSERT(size == 0);
77 return 0;
78 }
79
80 if (isSequential) {
81 auto regList = vixl::aarch32::SRegisterList(vixl::aarch32::SRegister(first), size);
82 if (isPush) {
83 GetMasm()->Vpush(regList);
84 } else {
85 GetMasm()->Vpop(regList);
86 }
87 return size;
88 }
89
90 uint32_t realOffset = 0;
91 if (isPush) {
92 for (int32_t i = vregs.size() - 1; i >= 0; --i) {
93 if (vregs.test(i)) {
94 GetMasm()->PushRegister(VixlVReg(Reg(i, FLOAT32_TYPE)).S());
95 ++realOffset;
96 }
97 }
98 } else {
99 constexpr auto VREG_SIZE = 1;
100 for (size_t i = 0; i < vregs.size(); ++i) {
101 if (vregs.test(i)) {
102 GetMasm()->Vpop(vixl::aarch32::SRegisterList(VixlVReg(Reg(i, FLOAT32_TYPE)).S(), VREG_SIZE));
103 ++realOffset;
104 }
105 }
106 }
107 return realOffset;
108 }
109
GetMasm()110 vixl::aarch32::MacroAssembler *Aarch32CallingConvention::GetMasm()
111 {
112 return (static_cast<Aarch32Encoder *>(GetEncoder()))->GetMasm();
113 }
GetTarget()114 constexpr auto Aarch32CallingConvention::GetTarget()
115 {
116 return ark::compiler::Target(Arch::AARCH32);
117 }
118
PushRegs(RegMask regs,VRegMask vregs,bool isCallee)119 uint8_t Aarch32CallingConvention::PushRegs(RegMask regs, VRegMask vregs, bool isCallee)
120 {
121 auto regdescr = static_cast<Aarch32RegisterDescription *>(GetRegfile());
122 auto fp = GetTarget().GetFrameReg().GetId();
123 if (regs.test(fp)) {
124 regs.reset(fp);
125 }
126 auto lr = GetTarget().GetLinkReg().GetId();
127 if (regs.test(lr)) {
128 regs.reset(lr);
129 }
130
131 uint8_t realOffset = 0;
132 uint32_t savedRegistersMask = 0;
133
134 for (size_t i = 0; i < regs.size(); ++i) {
135 if (regs.test(i)) {
136 savedRegistersMask |= 1UL << i;
137 ++realOffset;
138 }
139 }
140
141 if (((regs.count() + vregs.count()) & 1U) == 1) {
142 // NOTE(igorban) move them to Sub(sp)
143 uint8_t alignReg = regdescr->GetAligmentReg(isCallee);
144 GetMasm()->PushRegister(vixl::aarch32::Register(alignReg));
145 ++realOffset;
146 }
147
148 if (savedRegistersMask != 0) {
149 GetMasm()->Push(vixl::aarch32::RegisterList(savedRegistersMask));
150 }
151 realOffset += PushPopVRegs(vregs, true);
152 ASSERT((realOffset & 1U) == 0);
153
154 return realOffset;
155 }
156
PopRegs(RegMask regs,VRegMask vregs,bool isCallee)157 uint8_t Aarch32CallingConvention::PopRegs(RegMask regs, VRegMask vregs, bool isCallee)
158 {
159 auto regdescr = static_cast<Aarch32RegisterDescription *>(GetRegfile());
160
161 auto fp = GetTarget().GetFrameReg().GetId();
162 if (regs.test(fp)) {
163 regs.reset(fp);
164 }
165 auto lr = GetTarget().GetLinkReg().GetId();
166 if (regs.test(lr)) {
167 regs.reset(lr);
168 }
169
170 uint8_t realOffset = 0;
171 realOffset += PushPopVRegs(vregs, false);
172
173 uint32_t savedRegistersMask = 0;
174
175 for (size_t i = 0; i < regs.size(); ++i) {
176 if (regs.test(i)) {
177 savedRegistersMask |= 1UL << i;
178 ++realOffset;
179 }
180 }
181
182 if (savedRegistersMask != 0) {
183 GetMasm()->Pop(vixl::aarch32::RegisterList(savedRegistersMask));
184 }
185
186 if (((regs.count() + vregs.count()) & 1U) == 1) {
187 uint8_t alignReg = regdescr->GetAligmentReg(isCallee);
188 GetMasm()->Pop(vixl::aarch32::Register(alignReg));
189 ++realOffset;
190 }
191 ASSERT((realOffset & 1U) == 0);
192
193 return realOffset;
194 }
195
GetNativeParam(const TypeInfo & type)196 std::variant<Reg, uint8_t> Aarch32ParameterInfo::GetNativeParam(const TypeInfo &type)
197 {
198 constexpr int32_t STEP = 2;
199 #if (PANDA_TARGET_ARM32_ABI_HARD)
200 // Use vector registers
201 if (type == FLOAT32_TYPE) {
202 if (currentVectorNumber_ > MAX_VECTOR_SINGLE_PARAM_ID) {
203 return currentStackOffset_++;
204 }
205 return Reg(currentVectorNumber_++, FLOAT32_TYPE);
206 }
207 if (type == FLOAT64_TYPE) {
208 // Allignment for 8 bytes (in stack and registers)
209 if ((currentVectorNumber_ & 1U) == 1) {
210 ++currentVectorNumber_;
211 }
212 if ((currentVectorNumber_ >> 1U) > MAX_VECTOR_DOUBLE_PARAM_ID) {
213 if ((currentStackOffset_ & 1U) == 1) {
214 ++currentStackOffset_;
215 }
216 auto stackOffset = currentStackOffset_;
217 currentStackOffset_ += STEP;
218 return stackOffset;
219 }
220 auto vectorNumber = currentVectorNumber_;
221 currentVectorNumber_ += STEP;
222 return Reg(vectorNumber, FLOAT64_TYPE);
223 }
224 #endif // PANDA_TARGET_ARM32_ABI_HARD
225 if (type.GetSize() == DOUBLE_WORD_SIZE) {
226 if ((currentScalarNumber_ & 1U) == 1) {
227 ++currentScalarNumber_;
228 }
229 // Allignment for 8 bytes (in stack and registers)
230 if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
231 if ((currentStackOffset_ & 1U) == 1) {
232 ++currentStackOffset_;
233 }
234 auto stackOffset = currentStackOffset_;
235 currentStackOffset_ += STEP;
236 return stackOffset;
237 }
238 auto scalarNumber = currentScalarNumber_;
239 currentScalarNumber_ += STEP;
240 return Reg(scalarNumber, INT64_TYPE);
241 }
242 if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
243 return currentStackOffset_++;
244 }
245 ASSERT(!type.IsFloat() || type == FLOAT32_TYPE);
246 return Reg(currentScalarNumber_++, type.IsFloat() ? INT32_TYPE : type);
247 }
248
GetNextLocation(DataType::Type type)249 Location Aarch32ParameterInfo::GetNextLocation(DataType::Type type)
250 {
251 auto res = GetNativeParam(TypeInfo::FromDataType(type, Arch::AARCH32));
252 if (std::holds_alternative<Reg>(res)) {
253 auto reg = std::get<Reg>(res);
254 #if (PANDA_TARGET_ARM32_ABI_SOFT || PANDA_TARGET_ARM32_ABI_SOFTFP)
255 if (DataType::IsFloatType(type)) {
256 return Location::MakeRegister(reg.GetId());
257 }
258 #endif
259 return Location::MakeRegister(reg.GetId(), type);
260 }
261 return Location::MakeStackArgument(std::get<uint8_t>(res));
262 }
263
IsValid() const264 bool Aarch32CallingConvention::IsValid() const
265 {
266 return true;
267 }
268
GenerateNativePrologue(const FrameInfo & frameInfo)269 void Aarch32CallingConvention::GenerateNativePrologue(const FrameInfo &frameInfo)
270 {
271 GeneratePrologue(frameInfo);
272 }
GenerateNativeEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)273 void Aarch32CallingConvention::GenerateNativeEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
274 {
275 GenerateEpilogue(frameInfo, postJob);
276 }
277
GeneratePrologue(const FrameInfo & frameInfo)278 void Aarch32CallingConvention::GeneratePrologue([[maybe_unused]] const FrameInfo &frameInfo)
279 {
280 auto encoder = GetEncoder();
281 ASSERT(encoder->IsValid());
282 ASSERT(encoder->InitMasm());
283 const CFrameLayout &fl = encoder->GetFrameLayout();
284 auto fpReg = GetTarget().GetFrameReg();
285 auto spReg = GetTarget().GetStackReg();
286
287 GetMasm()->Push(RegisterList(vixl::aarch32::r11, vixl::aarch32::lr));
288 SET_CFI_OFFSET(pushFplr, encoder->GetCursorOffset());
289
290 ASSERT(!IsDynCallMode());
291
292 encoder->EncodeMov(fpReg, spReg);
293 SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
294 constexpr auto IMM_2 = 2;
295 encoder->EncodeSub(spReg, spReg, Imm(WORD_SIZE_BYTES * IMM_2));
296 encoder->EncodeStr(GetTarget().GetParamReg(0), MemRef(spReg, WORD_SIZE_BYTES));
297
298 // Allocate space for locals
299 auto localsSize = (CFrameSlots::Start() - CFrameData::Start()) * WORD_SIZE_BYTES;
300 encoder->EncodeSub(spReg, spReg, Imm(localsSize));
301
302 SET_CFI_CALLEE_REGS(GetCalleeRegsMask(Arch::AARCH32, false));
303 SET_CFI_CALLEE_VREGS(GetCalleeRegsMask(Arch::AARCH32, true));
304 GetMasm()->Push(RegisterList(GetCalleeRegsMask(Arch::AARCH32, false).GetValue()));
305 GetMasm()->Vpush(
306 SRegisterList(SRegister(GetFirstCalleeReg(Arch::AARCH32, true)), GetCalleeRegsCount(Arch::AARCH32, true)));
307 SET_CFI_OFFSET(pushCallees, encoder->GetCursorOffset());
308
309 // Reset OSR flag and set HasFloatRegsFlag
310 auto calleeRegsSize =
311 (GetCalleeRegsCount(Arch::AARCH32, true) + GetCalleeRegsCount(Arch::AARCH32, false)) * WORD_SIZE_BYTES;
312 auto flags {static_cast<uint32_t>(frameInfo.GetHasFloatRegs()) << CFrameLayout::HasFloatRegsFlag::START_BIT};
313 encoder->EncodeSti(flags, sizeof(flags), MemRef(spReg, calleeRegsSize + localsSize));
314
315 encoder->EncodeSub(
316 spReg, spReg,
317 Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
318 WORD_SIZE_BYTES));
319 }
320
GenerateEpilogue(const FrameInfo & frameInfo,std::function<void ()>)321 void Aarch32CallingConvention::GenerateEpilogue([[maybe_unused]] const FrameInfo &frameInfo,
322 std::function<void()> /* post_job */)
323 {
324 auto encoder = GetEncoder();
325 const CFrameLayout &fl = encoder->GetFrameLayout();
326 auto spReg = GetTarget().GetStackReg();
327
328 encoder->EncodeAdd(
329 spReg, spReg,
330 Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
331 WORD_SIZE_BYTES));
332
333 GetMasm()->Vpop(
334 SRegisterList(SRegister(GetFirstCalleeReg(Arch::AARCH32, true)), GetCalleeRegsCount(Arch::AARCH32, true)));
335 GetMasm()->Pop(RegisterList(GetCalleeRegsMask(Arch::AARCH32, false).GetValue()));
336 SET_CFI_OFFSET(popCallees, encoder->GetCursorOffset());
337
338 // ARM32 doesn't support OSR mode
339 ASSERT(!IsOsrMode());
340 // Support restoring of LR and FP registers once OSR is supported in arm32
341 static_assert(!ArchTraits<Arch::AARCH32>::SUPPORT_OSR);
342 constexpr auto IMM_2 = 2;
343 encoder->EncodeAdd(spReg, spReg, Imm(WORD_SIZE_BYTES * IMM_2));
344 encoder->EncodeAdd(spReg, spReg, Imm(WORD_SIZE_BYTES * (CFrameSlots::Start() - CFrameData::Start())));
345
346 GetMasm()->Pop(RegisterList(vixl::aarch32::r11, vixl::aarch32::lr));
347 SET_CFI_OFFSET(popFplr, encoder->GetCursorOffset());
348
349 encoder->EncodeReturn();
350 }
351 } // namespace ark::compiler::aarch32
352