1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Low-level calling convention
17 */
18 #include "target/amd64/target.h"
19
20 namespace ark::compiler::amd64 {
21
Amd64CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)22 Amd64CallingConvention::Amd64CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
23 CallConvMode mode)
24 : CallingConvention(allocator, enc, descr, mode)
25 {
26 }
27
GetTarget()28 constexpr auto Amd64CallingConvention::GetTarget()
29 {
30 return ark::compiler::Target(Arch::X86_64);
31 }
32
IsValid() const33 bool Amd64CallingConvention::IsValid() const
34 {
35 return true;
36 }
37
GetParameterInfo(uint8_t regsOffset)38 ParameterInfo *Amd64CallingConvention::GetParameterInfo(uint8_t regsOffset)
39 {
40 auto paramInfo = GetAllocator()->New<amd64::Amd64ParameterInfo>();
41 // reserve first parameter to method pointer
42 for (int i = 0; i < regsOffset; ++i) {
43 paramInfo->GetNativeParam(INT64_TYPE);
44 }
45 return paramInfo;
46 }
47
GetCodeEntry()48 void *Amd64CallingConvention::GetCodeEntry()
49 {
50 auto code = static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->code();
51 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
52 return reinterpret_cast<void *>(code->baseAddress());
53 }
54
GetCodeSize()55 uint32_t Amd64CallingConvention::GetCodeSize()
56 {
57 return static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->code()->codeSize();
58 }
59
PushRegs(RegList regs,RegList vregs)60 size_t Amd64CallingConvention::PushRegs(RegList regs, RegList vregs)
61 {
62 size_t regsCount {0};
63 size_t vregsCount {0};
64
65 for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
66 uint32_t ii {MAX_NUM_REGS - i - 1};
67 if (vregs.Has(ii)) {
68 ++vregsCount;
69 GetMasm()->sub(asmjit::x86::rsp, asmjit::imm(DOUBLE_WORD_SIZE_BYTES));
70 GetMasm()->movsd(asmjit::x86::ptr(asmjit::x86::rsp), asmjit::x86::xmm(ii));
71 }
72 }
73
74 for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
75 uint32_t ii {MAX_NUM_REGS - i - 1};
76 if (regs.Has(ii)) {
77 ++regsCount;
78 GetMasm()->push(asmjit::x86::gpq(ConvertRegNumber(ii)));
79 }
80 }
81
82 return vregsCount + regsCount;
83 }
84
PopRegs(RegList regs,RegList vregs)85 size_t Amd64CallingConvention::PopRegs(RegList regs, RegList vregs)
86 {
87 size_t regsCount {0};
88 size_t vregsCount {0};
89
90 for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
91 if (regs.Has(i)) {
92 ++regsCount;
93 GetMasm()->pop(asmjit::x86::gpq(ConvertRegNumber(i)));
94 }
95 }
96
97 for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
98 if (vregs.Has(i)) {
99 ++vregsCount;
100 GetMasm()->movsd(asmjit::x86::xmm(i), asmjit::x86::ptr(asmjit::x86::rsp));
101 GetMasm()->add(asmjit::x86::rsp, asmjit::imm(DOUBLE_WORD_SIZE_BYTES));
102 }
103 }
104
105 return vregsCount + regsCount;
106 }
107
GetNativeParam(const TypeInfo & type)108 std::variant<Reg, uint8_t> Amd64ParameterInfo::GetNativeParam(const TypeInfo &type)
109 {
110 if (type.IsFloat()) {
111 if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
112 return currentStackOffset_++;
113 }
114 return Reg(currentVectorNumber_++, type);
115 }
116 if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
117 return currentStackOffset_++;
118 }
119
120 return Target(Arch::X86_64).GetParamReg(currentScalarNumber_++, type);
121 }
122
GetNextLocation(DataType::Type type)123 Location Amd64ParameterInfo::GetNextLocation(DataType::Type type)
124 {
125 if (DataType::IsFloatType(type)) {
126 if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
127 return Location::MakeStackArgument(currentStackOffset_++);
128 }
129 return Location::MakeFpRegister(currentVectorNumber_++);
130 }
131 if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
132 return Location::MakeStackArgument(currentStackOffset_++);
133 }
134 Target target(Arch::X86_64);
135 return Location::MakeRegister(target.GetParamRegId(currentScalarNumber_++));
136 }
137
GeneratePrologue(const FrameInfo & frameInfo)138 void Amd64CallingConvention::GeneratePrologue([[maybe_unused]] const FrameInfo &frameInfo)
139 {
140 auto encoder = GetEncoder();
141 const CFrameLayout &fl = encoder->GetFrameLayout();
142 auto fpReg = GetTarget().GetFrameReg();
143 auto spReg = GetTarget().GetStackReg();
144
145 // we do not push return address, because in amd64 call instruction already pushed it
146 GetMasm()->push(asmjit::x86::rbp); // frame pointer
147 SET_CFI_OFFSET(pushFplr, encoder->GetCursorOffset());
148
149 encoder->EncodeMov(fpReg, spReg);
150 SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
151
152 if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
153 static_assert(CallConvDynInfo::REG_NUM_ARGS == 1);
154 static_assert(CallConvDynInfo::REG_COUNT == CallConvDynInfo::REG_NUM_ARGS + 1);
155
156 constexpr auto NUM_ACTUAL_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_NUM_ARGS);
157 constexpr auto NUM_EXPECTED_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_COUNT);
158 auto numExpected = GetDynInfo().GetNumExpectedArgs();
159
160 auto expandDone = encoder->CreateLabel();
161 encoder->EncodeJump(expandDone, NUM_ACTUAL_REG, Imm(numExpected), Condition::GE);
162 encoder->EncodeMov(NUM_EXPECTED_REG, Imm(numExpected));
163
164 MemRef expandEntrypoint(Reg(GetThreadReg(Arch::X86_64), GetTarget().GetPtrRegType()),
165 GetDynInfo().GetExpandEntrypointTlsOffset());
166 GetEncoder()->MakeCall(expandEntrypoint);
167 encoder->BindLabel(expandDone);
168 }
169
170 encoder->EncodeSub(spReg, spReg, Imm(2U * DOUBLE_WORD_SIZE_BYTES));
171 encoder->EncodeStr(GetTarget().GetParamReg(0), MemRef(spReg, DOUBLE_WORD_SIZE_BYTES));
172
173 // Reset OSR flag and set HasFloatRegsFlag
174 auto flags {static_cast<uint64_t>(frameInfo.GetHasFloatRegs()) << CFrameLayout::HasFloatRegsFlag::START_BIT};
175 encoder->EncodeSti(flags, sizeof(flags), MemRef(spReg));
176 // Allocate space for locals
177 encoder->EncodeSub(spReg, spReg, Imm(DOUBLE_WORD_SIZE_BYTES * (CFrameSlots::Start() - CFrameData::Start())));
178 static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
179
180 RegList calleeRegs {GetCalleeRegsMask(Arch::X86_64, false).GetValue()};
181 RegList calleeVregs {GetCalleeRegsMask(Arch::X86_64, true).GetValue()};
182 SET_CFI_CALLEE_REGS(RegMask(static_cast<size_t>(calleeRegs)));
183 SET_CFI_CALLEE_VREGS(VRegMask(static_cast<size_t>(calleeVregs)));
184 PushRegs(calleeRegs, calleeVregs);
185 SET_CFI_OFFSET(pushCallees, encoder->GetCursorOffset());
186
187 encoder->EncodeSub(
188 spReg, spReg,
189 Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
190 DOUBLE_WORD_SIZE_BYTES));
191 }
192
GenerateEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)193 void Amd64CallingConvention::GenerateEpilogue([[maybe_unused]] const FrameInfo &frameInfo,
194 std::function<void()> postJob)
195 {
196 auto encoder = GetEncoder();
197 const CFrameLayout &fl = encoder->GetFrameLayout();
198 auto spReg = GetTarget().GetStackReg();
199
200 if (postJob) {
201 postJob();
202 }
203
204 encoder->EncodeAdd(
205 spReg, spReg,
206 Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
207 DOUBLE_WORD_SIZE_BYTES));
208
209 PopRegs(RegList(GetCalleeRegsMask(Arch::X86_64, false).GetValue()),
210 RegList(GetCalleeRegsMask(Arch::X86_64, true).GetValue()));
211 SET_CFI_OFFSET(popCallees, encoder->GetCursorOffset());
212
213 // X86_64 doesn't support OSR mode
214 ASSERT(!IsOsrMode());
215 // Support restoring of LR and FP registers once OSR is supported in x86_64
216 static_assert(!ArchTraits<Arch::X86_64>::SUPPORT_OSR);
217 constexpr auto SHIFT = DOUBLE_WORD_SIZE_BYTES * (2 + CFrameSlots::Start() - CFrameData::Start());
218 encoder->EncodeAdd(spReg, spReg, Imm(SHIFT));
219
220 GetMasm()->pop(asmjit::x86::rbp); // frame pointer
221 SET_CFI_OFFSET(popFplr, encoder->GetCursorOffset());
222 GetMasm()->ret();
223 }
224
GenerateNativePrologue(const FrameInfo & frameInfo)225 void Amd64CallingConvention::GenerateNativePrologue(const FrameInfo &frameInfo)
226 {
227 GeneratePrologue(frameInfo);
228 }
229
GenerateNativeEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)230 void Amd64CallingConvention::GenerateNativeEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
231 {
232 GenerateEpilogue(frameInfo, postJob);
233 }
234
GetMasm()235 asmjit::x86::Assembler *Amd64CallingConvention::GetMasm()
236 {
237 return (static_cast<Amd64Encoder *>(GetEncoder()))->GetMasm();
238 }
239
240 } // namespace ark::compiler::amd64
241