• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Low-level calling convention
17 */
18 #include "target/aarch32/target.h"
19 #include <cmath>
20 
21 namespace ark::compiler::aarch32 {
22 using vixl::aarch32::RegisterList;
23 using vixl::aarch32::SRegister;
24 using vixl::aarch32::SRegisterList;
25 
26 constexpr size_t MAX_SCALAR_PARAM_ID = 3;                           // r0-r3
27 [[maybe_unused]] constexpr size_t MAX_VECTOR_SINGLE_PARAM_ID = 15;  // s0-s15
28 [[maybe_unused]] constexpr size_t MAX_VECTOR_DOUBLE_PARAM_ID = 7;   // d0-d7
29 
Aarch32CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)30 Aarch32CallingConvention::Aarch32CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
31                                                    CallConvMode mode)
32     : CallingConvention(allocator, enc, descr, mode)
33 {
34 }
35 
GetParameterInfo(uint8_t regsOffset)36 ParameterInfo *Aarch32CallingConvention::GetParameterInfo(uint8_t regsOffset)
37 {
38     auto paramInfo = GetAllocator()->New<aarch32::Aarch32ParameterInfo>();
39     for (int i = 0; i < regsOffset; ++i) {
40         paramInfo->GetNativeParam(INT32_TYPE);
41     }
42     return paramInfo;
43 }
44 
GetCodeEntry()45 void *Aarch32CallingConvention::GetCodeEntry()
46 {
47     auto res = GetMasm()->GetBuffer()->GetOffsetAddress<uint32_t *>(0);
48     return reinterpret_cast<void *>(res);
49 }
50 
GetCodeSize()51 uint32_t Aarch32CallingConvention::GetCodeSize()
52 {
53     return GetMasm()->GetSizeOfCodeGenerated();
54 }
55 
PushPopVRegs(VRegMask vregs,bool isPush=true)56 uint8_t Aarch32CallingConvention::PushPopVRegs(VRegMask vregs, bool isPush = true)
57 {
58     int8_t first = -1;
59     uint8_t size = 0;
60     bool isSequential = true;
61     for (size_t i = 0; i < vregs.size(); ++i) {
62         if (-1 == first && vregs.test(i)) {
63             first = i;
64             ++size;
65             continue;
66         }
67         if (vregs.test(i)) {
68             if (!vregs.test(i - 1)) {
69                 isSequential = false;
70                 break;
71             }
72             ++size;
73         }
74     }
75     if (first == -1) {
76         ASSERT(size == 0);
77         return 0;
78     }
79 
80     if (isSequential) {
81         auto regList = vixl::aarch32::SRegisterList(vixl::aarch32::SRegister(first), size);
82         if (isPush) {
83             GetMasm()->Vpush(regList);
84         } else {
85             GetMasm()->Vpop(regList);
86         }
87         return size;
88     }
89 
90     uint32_t realOffset = 0;
91     if (isPush) {
92         for (int32_t i = vregs.size() - 1; i >= 0; --i) {
93             if (vregs.test(i)) {
94                 GetMasm()->PushRegister(VixlVReg(Reg(i, FLOAT32_TYPE)).S());
95                 ++realOffset;
96             }
97         }
98     } else {
99         constexpr auto VREG_SIZE = 1;
100         for (size_t i = 0; i < vregs.size(); ++i) {
101             if (vregs.test(i)) {
102                 GetMasm()->Vpop(vixl::aarch32::SRegisterList(VixlVReg(Reg(i, FLOAT32_TYPE)).S(), VREG_SIZE));
103                 ++realOffset;
104             }
105         }
106     }
107     return realOffset;
108 }
109 
GetMasm()110 vixl::aarch32::MacroAssembler *Aarch32CallingConvention::GetMasm()
111 {
112     return (static_cast<Aarch32Encoder *>(GetEncoder()))->GetMasm();
113 }
GetTarget()114 constexpr auto Aarch32CallingConvention::GetTarget()
115 {
116     return ark::compiler::Target(Arch::AARCH32);
117 }
118 
PushRegs(RegMask regs,VRegMask vregs,bool isCallee)119 uint8_t Aarch32CallingConvention::PushRegs(RegMask regs, VRegMask vregs, bool isCallee)
120 {
121     auto regdescr = static_cast<Aarch32RegisterDescription *>(GetRegfile());
122     auto fp = GetTarget().GetFrameReg().GetId();
123     if (regs.test(fp)) {
124         regs.reset(fp);
125     }
126     auto lr = GetTarget().GetLinkReg().GetId();
127     if (regs.test(lr)) {
128         regs.reset(lr);
129     }
130 
131     uint8_t realOffset = 0;
132     uint32_t savedRegistersMask = 0;
133 
134     for (size_t i = 0; i < regs.size(); ++i) {
135         if (regs.test(i)) {
136             savedRegistersMask |= 1UL << i;
137             ++realOffset;
138         }
139     }
140 
141     if (((regs.count() + vregs.count()) & 1U) == 1) {
142         // NOTE(igorban) move them to Sub(sp)
143         uint8_t alignReg = regdescr->GetAligmentReg(isCallee);
144         GetMasm()->PushRegister(vixl::aarch32::Register(alignReg));
145         ++realOffset;
146     }
147 
148     if (savedRegistersMask != 0) {
149         GetMasm()->Push(vixl::aarch32::RegisterList(savedRegistersMask));
150     }
151     realOffset += PushPopVRegs(vregs, true);
152     ASSERT((realOffset & 1U) == 0);
153 
154     return realOffset;
155 }
156 
PopRegs(RegMask regs,VRegMask vregs,bool isCallee)157 uint8_t Aarch32CallingConvention::PopRegs(RegMask regs, VRegMask vregs, bool isCallee)
158 {
159     auto regdescr = static_cast<Aarch32RegisterDescription *>(GetRegfile());
160 
161     auto fp = GetTarget().GetFrameReg().GetId();
162     if (regs.test(fp)) {
163         regs.reset(fp);
164     }
165     auto lr = GetTarget().GetLinkReg().GetId();
166     if (regs.test(lr)) {
167         regs.reset(lr);
168     }
169 
170     uint8_t realOffset = 0;
171     realOffset += PushPopVRegs(vregs, false);
172 
173     uint32_t savedRegistersMask = 0;
174 
175     for (size_t i = 0; i < regs.size(); ++i) {
176         if (regs.test(i)) {
177             savedRegistersMask |= 1UL << i;
178             ++realOffset;
179         }
180     }
181 
182     if (savedRegistersMask != 0) {
183         GetMasm()->Pop(vixl::aarch32::RegisterList(savedRegistersMask));
184     }
185 
186     if (((regs.count() + vregs.count()) & 1U) == 1) {
187         uint8_t alignReg = regdescr->GetAligmentReg(isCallee);
188         GetMasm()->Pop(vixl::aarch32::Register(alignReg));
189         ++realOffset;
190     }
191     ASSERT((realOffset & 1U) == 0);
192 
193     return realOffset;
194 }
195 
GetNativeParam(const TypeInfo & type)196 std::variant<Reg, uint8_t> Aarch32ParameterInfo::GetNativeParam(const TypeInfo &type)
197 {
198     constexpr int32_t STEP = 2;
199 #if (PANDA_TARGET_ARM32_ABI_HARD)
200     // Use vector registers
201     if (type == FLOAT32_TYPE) {
202         if (currentVectorNumber_ > MAX_VECTOR_SINGLE_PARAM_ID) {
203             return currentStackOffset_++;
204         }
205         return Reg(currentVectorNumber_++, FLOAT32_TYPE);
206     }
207     if (type == FLOAT64_TYPE) {
208         // Allignment for 8 bytes (in stack and registers)
209         if ((currentVectorNumber_ & 1U) == 1) {
210             ++currentVectorNumber_;
211         }
212         if ((currentVectorNumber_ >> 1U) > MAX_VECTOR_DOUBLE_PARAM_ID) {
213             if ((currentStackOffset_ & 1U) == 1) {
214                 ++currentStackOffset_;
215             }
216             auto stackOffset = currentStackOffset_;
217             currentStackOffset_ += STEP;
218             return stackOffset;
219         }
220         auto vectorNumber = currentVectorNumber_;
221         currentVectorNumber_ += STEP;
222         return Reg(vectorNumber, FLOAT64_TYPE);
223     }
224 #endif  // PANDA_TARGET_ARM32_ABI_HARD
225     if (type.GetSize() == DOUBLE_WORD_SIZE) {
226         if ((currentScalarNumber_ & 1U) == 1) {
227             ++currentScalarNumber_;
228         }
229         // Allignment for 8 bytes (in stack and registers)
230         if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
231             if ((currentStackOffset_ & 1U) == 1) {
232                 ++currentStackOffset_;
233             }
234             auto stackOffset = currentStackOffset_;
235             currentStackOffset_ += STEP;
236             return stackOffset;
237         }
238         auto scalarNumber = currentScalarNumber_;
239         currentScalarNumber_ += STEP;
240         return Reg(scalarNumber, INT64_TYPE);
241     }
242     if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
243         return currentStackOffset_++;
244     }
245     ASSERT(!type.IsFloat() || type == FLOAT32_TYPE);
246     return Reg(currentScalarNumber_++, type.IsFloat() ? INT32_TYPE : type);
247 }
248 
GetNextLocation(DataType::Type type)249 Location Aarch32ParameterInfo::GetNextLocation(DataType::Type type)
250 {
251     auto res = GetNativeParam(TypeInfo::FromDataType(type, Arch::AARCH32));
252     if (std::holds_alternative<Reg>(res)) {
253         auto reg = std::get<Reg>(res);
254 #if (PANDA_TARGET_ARM32_ABI_SOFT || PANDA_TARGET_ARM32_ABI_SOFTFP)
255         if (DataType::IsFloatType(type)) {
256             return Location::MakeRegister(reg.GetId());
257         }
258 #endif
259         return Location::MakeRegister(reg.GetId(), type);
260     }
261     return Location::MakeStackArgument(std::get<uint8_t>(res));
262 }
263 
IsValid() const264 bool Aarch32CallingConvention::IsValid() const
265 {
266     return true;
267 }
268 
GenerateNativePrologue(const FrameInfo & frameInfo)269 void Aarch32CallingConvention::GenerateNativePrologue(const FrameInfo &frameInfo)
270 {
271     GeneratePrologue(frameInfo);
272 }
GenerateNativeEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)273 void Aarch32CallingConvention::GenerateNativeEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
274 {
275     GenerateEpilogue(frameInfo, postJob);
276 }
277 
GeneratePrologue(const FrameInfo & frameInfo)278 void Aarch32CallingConvention::GeneratePrologue([[maybe_unused]] const FrameInfo &frameInfo)
279 {
280     auto encoder = GetEncoder();
281     ASSERT(encoder->IsValid());
282     ASSERT(encoder->InitMasm());
283     const CFrameLayout &fl = encoder->GetFrameLayout();
284     auto fpReg = GetTarget().GetFrameReg();
285     auto spReg = GetTarget().GetStackReg();
286 
287     GetMasm()->Push(RegisterList(vixl::aarch32::r11, vixl::aarch32::lr));
288     SET_CFI_OFFSET(pushFplr, encoder->GetCursorOffset());
289 
290     ASSERT(!IsDynCallMode());
291 
292     encoder->EncodeMov(fpReg, spReg);
293     SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
294     constexpr auto IMM_2 = 2;
295     encoder->EncodeSub(spReg, spReg, Imm(WORD_SIZE_BYTES * IMM_2));
296     encoder->EncodeStr(GetTarget().GetParamReg(0), MemRef(spReg, WORD_SIZE_BYTES));
297 
298     // Allocate space for locals
299     auto localsSize = (CFrameSlots::Start() - CFrameData::Start()) * WORD_SIZE_BYTES;
300     encoder->EncodeSub(spReg, spReg, Imm(localsSize));
301 
302     SET_CFI_CALLEE_REGS(GetCalleeRegsMask(Arch::AARCH32, false));
303     SET_CFI_CALLEE_VREGS(GetCalleeRegsMask(Arch::AARCH32, true));
304     GetMasm()->Push(RegisterList(GetCalleeRegsMask(Arch::AARCH32, false).GetValue()));
305     GetMasm()->Vpush(
306         SRegisterList(SRegister(GetFirstCalleeReg(Arch::AARCH32, true)), GetCalleeRegsCount(Arch::AARCH32, true)));
307     SET_CFI_OFFSET(pushCallees, encoder->GetCursorOffset());
308 
309     // Reset OSR flag and set HasFloatRegsFlag
310     auto calleeRegsSize =
311         (GetCalleeRegsCount(Arch::AARCH32, true) + GetCalleeRegsCount(Arch::AARCH32, false)) * WORD_SIZE_BYTES;
312     auto flags {static_cast<uint32_t>(frameInfo.GetHasFloatRegs()) << CFrameLayout::HasFloatRegsFlag::START_BIT};
313     encoder->EncodeSti(flags, sizeof(flags), MemRef(spReg, calleeRegsSize + localsSize));
314 
315     encoder->EncodeSub(
316         spReg, spReg,
317         Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
318             WORD_SIZE_BYTES));
319 }
320 
GenerateEpilogue(const FrameInfo & frameInfo,std::function<void ()>)321 void Aarch32CallingConvention::GenerateEpilogue([[maybe_unused]] const FrameInfo &frameInfo,
322                                                 std::function<void()> /* post_job */)
323 {
324     auto encoder = GetEncoder();
325     const CFrameLayout &fl = encoder->GetFrameLayout();
326     auto spReg = GetTarget().GetStackReg();
327 
328     encoder->EncodeAdd(
329         spReg, spReg,
330         Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
331             WORD_SIZE_BYTES));
332 
333     GetMasm()->Vpop(
334         SRegisterList(SRegister(GetFirstCalleeReg(Arch::AARCH32, true)), GetCalleeRegsCount(Arch::AARCH32, true)));
335     GetMasm()->Pop(RegisterList(GetCalleeRegsMask(Arch::AARCH32, false).GetValue()));
336     SET_CFI_OFFSET(popCallees, encoder->GetCursorOffset());
337 
338     // ARM32 doesn't support OSR mode
339     ASSERT(!IsOsrMode());
340     // Support restoring of LR and FP registers once OSR is supported in arm32
341     static_assert(!ArchTraits<Arch::AARCH32>::SUPPORT_OSR);
342     constexpr auto IMM_2 = 2;
343     encoder->EncodeAdd(spReg, spReg, Imm(WORD_SIZE_BYTES * IMM_2));
344     encoder->EncodeAdd(spReg, spReg, Imm(WORD_SIZE_BYTES * (CFrameSlots::Start() - CFrameData::Start())));
345 
346     GetMasm()->Pop(RegisterList(vixl::aarch32::r11, vixl::aarch32::lr));
347     SET_CFI_OFFSET(popFplr, encoder->GetCursorOffset());
348 
349     encoder->EncodeReturn();
350 }
351 }  // namespace ark::compiler::aarch32
352