• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Low-level calling convention
17 */
18 #include "target/amd64/target.h"
19 
20 namespace ark::compiler::amd64 {
21 
Amd64CallingConvention(ArenaAllocator * allocator,Encoder * enc,RegistersDescription * descr,CallConvMode mode)22 Amd64CallingConvention::Amd64CallingConvention(ArenaAllocator *allocator, Encoder *enc, RegistersDescription *descr,
23                                                CallConvMode mode)
24     : CallingConvention(allocator, enc, descr, mode)
25 {
26 }
27 
GetTarget()28 constexpr auto Amd64CallingConvention::GetTarget()
29 {
30     return ark::compiler::Target(Arch::X86_64);
31 }
32 
IsValid() const33 bool Amd64CallingConvention::IsValid() const
34 {
35     return true;
36 }
37 
GetParameterInfo(uint8_t regsOffset)38 ParameterInfo *Amd64CallingConvention::GetParameterInfo(uint8_t regsOffset)
39 {
40     auto paramInfo = GetAllocator()->New<amd64::Amd64ParameterInfo>();
41     // reserve first parameter to method pointer
42     for (int i = 0; i < regsOffset; ++i) {
43         paramInfo->GetNativeParam(INT64_TYPE);
44     }
45     return paramInfo;
46 }
47 
GetCodeEntry()48 void *Amd64CallingConvention::GetCodeEntry()
49 {
50     auto code = static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->code();
51     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
52     return reinterpret_cast<void *>(code->baseAddress());
53 }
54 
GetCodeSize()55 uint32_t Amd64CallingConvention::GetCodeSize()
56 {
57     return static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->code()->codeSize();
58 }
59 
PushRegs(RegList regs,RegList vregs)60 size_t Amd64CallingConvention::PushRegs(RegList regs, RegList vregs)
61 {
62     size_t regsCount {0};
63     size_t vregsCount {0};
64 
65     for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
66         uint32_t ii {MAX_NUM_REGS - i - 1};
67         if (vregs.Has(ii)) {
68             ++vregsCount;
69             GetMasm()->sub(asmjit::x86::rsp, asmjit::imm(DOUBLE_WORD_SIZE_BYTES));
70             GetMasm()->movsd(asmjit::x86::ptr(asmjit::x86::rsp), asmjit::x86::xmm(ii));
71         }
72     }
73 
74     for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
75         uint32_t ii {MAX_NUM_REGS - i - 1};
76         if (regs.Has(ii)) {
77             ++regsCount;
78             GetMasm()->push(asmjit::x86::gpq(ConvertRegNumber(ii)));
79         }
80     }
81 
82     return vregsCount + regsCount;
83 }
84 
PopRegs(RegList regs,RegList vregs)85 size_t Amd64CallingConvention::PopRegs(RegList regs, RegList vregs)
86 {
87     size_t regsCount {0};
88     size_t vregsCount {0};
89 
90     for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
91         if (regs.Has(i)) {
92             ++regsCount;
93             GetMasm()->pop(asmjit::x86::gpq(ConvertRegNumber(i)));
94         }
95     }
96 
97     for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
98         if (vregs.Has(i)) {
99             ++vregsCount;
100             GetMasm()->movsd(asmjit::x86::xmm(i), asmjit::x86::ptr(asmjit::x86::rsp));
101             GetMasm()->add(asmjit::x86::rsp, asmjit::imm(DOUBLE_WORD_SIZE_BYTES));
102         }
103     }
104 
105     return vregsCount + regsCount;
106 }
107 
GetNativeParam(const TypeInfo & type)108 std::variant<Reg, uint8_t> Amd64ParameterInfo::GetNativeParam(const TypeInfo &type)
109 {
110     if (type.IsFloat()) {
111         if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
112             return currentStackOffset_++;
113         }
114         return Reg(currentVectorNumber_++, type);
115     }
116     if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
117         return currentStackOffset_++;
118     }
119 
120     return Target(Arch::X86_64).GetParamReg(currentScalarNumber_++, type);
121 }
122 
GetNextLocation(DataType::Type type)123 Location Amd64ParameterInfo::GetNextLocation(DataType::Type type)
124 {
125     if (DataType::IsFloatType(type)) {
126         if (currentVectorNumber_ > MAX_VECTOR_PARAM_ID) {
127             return Location::MakeStackArgument(currentStackOffset_++);
128         }
129         return Location::MakeFpRegister(currentVectorNumber_++);
130     }
131     if (currentScalarNumber_ > MAX_SCALAR_PARAM_ID) {
132         return Location::MakeStackArgument(currentStackOffset_++);
133     }
134     Target target(Arch::X86_64);
135     return Location::MakeRegister(target.GetParamRegId(currentScalarNumber_++));
136 }
137 
GeneratePrologue(const FrameInfo & frameInfo)138 void Amd64CallingConvention::GeneratePrologue([[maybe_unused]] const FrameInfo &frameInfo)
139 {
140     auto encoder = GetEncoder();
141     const CFrameLayout &fl = encoder->GetFrameLayout();
142     auto fpReg = GetTarget().GetFrameReg();
143     auto spReg = GetTarget().GetStackReg();
144 
145     // we do not push return address, because in amd64 call instruction already pushed it
146     GetMasm()->push(asmjit::x86::rbp);  // frame pointer
147     SET_CFI_OFFSET(pushFplr, encoder->GetCursorOffset());
148 
149     encoder->EncodeMov(fpReg, spReg);
150     SET_CFI_OFFSET(setFp, encoder->GetCursorOffset());
151 
152     if (IsDynCallMode() && GetDynInfo().IsCheckRequired()) {
153         static_assert(CallConvDynInfo::REG_NUM_ARGS == 1);
154         static_assert(CallConvDynInfo::REG_COUNT == CallConvDynInfo::REG_NUM_ARGS + 1);
155 
156         constexpr auto NUM_ACTUAL_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_NUM_ARGS);
157         constexpr auto NUM_EXPECTED_REG = GetTarget().GetParamReg(CallConvDynInfo::REG_COUNT);
158         auto numExpected = GetDynInfo().GetNumExpectedArgs();
159 
160         auto expandDone = encoder->CreateLabel();
161         encoder->EncodeJump(expandDone, NUM_ACTUAL_REG, Imm(numExpected), Condition::GE);
162         encoder->EncodeMov(NUM_EXPECTED_REG, Imm(numExpected));
163 
164         MemRef expandEntrypoint(Reg(GetThreadReg(Arch::X86_64), GetTarget().GetPtrRegType()),
165                                 GetDynInfo().GetExpandEntrypointTlsOffset());
166         GetEncoder()->MakeCall(expandEntrypoint);
167         encoder->BindLabel(expandDone);
168     }
169 
170     encoder->EncodeSub(spReg, spReg, Imm(2U * DOUBLE_WORD_SIZE_BYTES));
171     encoder->EncodeStr(GetTarget().GetParamReg(0), MemRef(spReg, DOUBLE_WORD_SIZE_BYTES));
172 
173     // Reset OSR flag and set HasFloatRegsFlag
174     auto flags {static_cast<uint64_t>(frameInfo.GetHasFloatRegs()) << CFrameLayout::HasFloatRegsFlag::START_BIT};
175     encoder->EncodeSti(flags, sizeof(flags), MemRef(spReg));
176     // Allocate space for locals
177     encoder->EncodeSub(spReg, spReg, Imm(DOUBLE_WORD_SIZE_BYTES * (CFrameSlots::Start() - CFrameData::Start())));
178     static_assert((CFrameLayout::GetLocalsCount() & 1U) == 0);
179 
180     RegList calleeRegs {GetCalleeRegsMask(Arch::X86_64, false).GetValue()};
181     RegList calleeVregs {GetCalleeRegsMask(Arch::X86_64, true).GetValue()};
182     SET_CFI_CALLEE_REGS(RegMask(static_cast<size_t>(calleeRegs)));
183     SET_CFI_CALLEE_VREGS(VRegMask(static_cast<size_t>(calleeVregs)));
184     PushRegs(calleeRegs, calleeVregs);
185     SET_CFI_OFFSET(pushCallees, encoder->GetCursorOffset());
186 
187     encoder->EncodeSub(
188         spReg, spReg,
189         Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
190             DOUBLE_WORD_SIZE_BYTES));
191 }
192 
GenerateEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)193 void Amd64CallingConvention::GenerateEpilogue([[maybe_unused]] const FrameInfo &frameInfo,
194                                               std::function<void()> postJob)
195 {
196     auto encoder = GetEncoder();
197     const CFrameLayout &fl = encoder->GetFrameLayout();
198     auto spReg = GetTarget().GetStackReg();
199 
200     if (postJob) {
201         postJob();
202     }
203 
204     encoder->EncodeAdd(
205         spReg, spReg,
206         Imm((fl.GetSpillsCount() + fl.GetCallerRegistersCount(false) + fl.GetCallerRegistersCount(true)) *
207             DOUBLE_WORD_SIZE_BYTES));
208 
209     PopRegs(RegList(GetCalleeRegsMask(Arch::X86_64, false).GetValue()),
210             RegList(GetCalleeRegsMask(Arch::X86_64, true).GetValue()));
211     SET_CFI_OFFSET(popCallees, encoder->GetCursorOffset());
212 
213     // X86_64 doesn't support OSR mode
214     ASSERT(!IsOsrMode());
215     // Support restoring of LR and FP registers once OSR is supported in x86_64
216     static_assert(!ArchTraits<Arch::X86_64>::SUPPORT_OSR);
217     constexpr auto SHIFT = DOUBLE_WORD_SIZE_BYTES * (2 + CFrameSlots::Start() - CFrameData::Start());
218     encoder->EncodeAdd(spReg, spReg, Imm(SHIFT));
219 
220     GetMasm()->pop(asmjit::x86::rbp);  // frame pointer
221     SET_CFI_OFFSET(popFplr, encoder->GetCursorOffset());
222     GetMasm()->ret();
223 }
224 
GenerateNativePrologue(const FrameInfo & frameInfo)225 void Amd64CallingConvention::GenerateNativePrologue(const FrameInfo &frameInfo)
226 {
227     GeneratePrologue(frameInfo);
228 }
229 
GenerateNativeEpilogue(const FrameInfo & frameInfo,std::function<void ()> postJob)230 void Amd64CallingConvention::GenerateNativeEpilogue(const FrameInfo &frameInfo, std::function<void()> postJob)
231 {
232     GenerateEpilogue(frameInfo, postJob);
233 }
234 
GetMasm()235 asmjit::x86::Assembler *Amd64CallingConvention::GetMasm()
236 {
237     return (static_cast<Amd64Encoder *>(GetEncoder()))->GetMasm();
238 }
239 
240 }  // namespace ark::compiler::amd64
241