1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "codegen_fastpath.h"
17 #include "optimizer/ir/inst.h"
18 #include "relocations.h"
19
20 namespace ark::compiler {
21
SaveCallerRegistersInFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)22 static void SaveCallerRegistersInFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
23 {
24 if (mask.none()) {
25 return;
26 }
27 auto fpReg = Target(fl.GetArch()).GetFrameReg();
28
29 mask &= GetCallerRegsMask(fl.GetArch(), isFp);
30 auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
31 encoder->SaveRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
32 }
33
RestoreCallerRegistersFromFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)34 static void RestoreCallerRegistersFromFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
35 {
36 if (mask.none()) {
37 return;
38 }
39 auto fpReg = Target(fl.GetArch()).GetFrameReg();
40
41 mask &= GetCallerRegsMask(fl.GetArch(), isFp);
42 auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
43 encoder->LoadRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
44 }
45
InstHasRuntimeCall(const Inst * inst)46 static bool InstHasRuntimeCall(const Inst *inst)
47 {
48 switch (inst->GetOpcode()) {
49 case Opcode::Store:
50 if (inst->CastToStore()->GetNeedBarrier()) {
51 return true;
52 }
53 break;
54 case Opcode::StoreI:
55 if (inst->CastToStoreI()->GetNeedBarrier()) {
56 return true;
57 }
58 break;
59 case Opcode::StoreArray:
60 if (inst->CastToStoreArray()->GetNeedBarrier()) {
61 return true;
62 }
63 break;
64 case Opcode::StoreObject:
65 if (inst->CastToStoreObject()->GetNeedBarrier()) {
66 return true;
67 }
68 break;
69 case Opcode::LoadObjectDynamic:
70 case Opcode::StoreObjectDynamic:
71 return true;
72 case Opcode::Cast:
73 if (inst->CastToCast()->IsDynamicCast()) {
74 return true;
75 }
76 break;
77 default:
78 break;
79 }
80 if (inst->IsRuntimeCall()) {
81 if (!inst->IsIntrinsic()) {
82 return true;
83 }
84 auto intrinsicId = inst->CastToIntrinsic()->GetIntrinsicId();
85 if (intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_SLOW_PATH_ENTRY &&
86 intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_TAIL_CALL) {
87 return true;
88 }
89 }
90 return false;
91 }
92 /*
93 * We determine runtime calls manually, not using MethodProperties::HasRuntimeCalls, because we need to ignore
94 * SLOW_PATH_ENTRY intrinsic, since it doesn't require LR to be preserved.
95 */
HasRuntimeCalls(const Graph & graph)96 static bool HasRuntimeCalls(const Graph &graph)
97 {
98 for (auto bb : graph.GetBlocksRPO()) {
99 for (auto inst : bb->Insts()) {
100 if (InstHasRuntimeCall(inst)) {
101 return true;
102 }
103 }
104 }
105 return false;
106 }
107
GeneratePrologue()108 void CodegenFastPath::GeneratePrologue()
109 {
110 SCOPED_DISASM_STR(this, "FastPath Prologue");
111
112 auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
113 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
114 callerRegs &= GetUsedRegs() & ~GetTarget().GetParamRegsMask(argsNum);
115 SaveCallerRegistersInFrame(callerRegs, GetEncoder(), GetFrameLayout(), false);
116
117 auto hasRuntimeCalls = HasRuntimeCalls(*GetGraph());
118
119 savedRegisters_ = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
120 if (GetTarget().SupportLinkReg() && hasRuntimeCalls) {
121 savedRegisters_ |= GetTarget().GetLinkReg().GetMask();
122 GetEncoder()->EnableLrAsTempReg(true);
123 }
124
125 if (GetUsedVRegs().Any()) {
126 SaveCallerRegistersInFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(), GetFrameLayout(),
127 true);
128 savedFpRegisters_ = GetUsedVRegs() & VRegMask(GetCalleeRegsMask(GetArch(), true));
129 }
130
131 GetEncoder()->PushRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
132
133 if (GetFrameInfo()->GetSpillsCount() != 0) {
134 GetEncoder()->EncodeSub(
135 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
136 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
137 }
138 }
139
GetCallerRegistersToRestore() const140 RegMask CodegenFastPath::GetCallerRegistersToRestore() const
141 {
142 RegMask callerRegs = GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
143
144 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
145 callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
146
147 if (auto retType {GetRuntime()->GetMethodReturnType(GetGraph()->GetMethod())};
148 retType != DataType::VOID && retType != DataType::NO_TYPE) {
149 ASSERT(!DataType::IsFloatType(retType));
150 callerRegs.reset(GetTarget().GetReturnRegId());
151 }
152 return callerRegs;
153 }
154
GenerateEpilogue()155 void CodegenFastPath::GenerateEpilogue()
156 {
157 SCOPED_DISASM_STR(this, "FastPath Epilogue");
158
159 if (GetFrameInfo()->GetSpillsCount() != 0) {
160 GetEncoder()->EncodeAdd(
161 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
162 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
163 }
164
165 RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), GetEncoder(), GetFrameLayout(), false);
166
167 if (GetUsedVRegs().Any()) {
168 RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(),
169 GetFrameLayout(), true);
170 }
171
172 GetEncoder()->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
173
174 GetEncoder()->EncodeReturn();
175 }
176
CreateFrameInfo()177 void CodegenFastPath::CreateFrameInfo()
178 {
179 auto frame = GetGraph()->GetLocalAllocator()->New<FrameInfo>(
180 FrameInfo::PositionedCallers::Encode(true) | FrameInfo::PositionedCallees::Encode(false) |
181 FrameInfo::CallersRelativeFp::Encode(true) | FrameInfo::CalleesRelativeFp::Encode(false) |
182 FrameInfo::PushCallers::Encode(true));
183 frame->SetSpillsCount(GetGraph()->GetStackSlotsCount());
184 CFrameLayout fl(GetGraph()->GetArch(), GetGraph()->GetStackSlotsCount());
185
186 frame->SetCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
187 fl.GetStackStartSlot() + fl.GetCallerLastSlot(false)));
188 frame->SetFpCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
189 fl.GetStackStartSlot() + fl.GetCallerLastSlot(true)));
190 frame->SetCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
191 fl.GetStackStartSlot() + fl.GetCalleeLastSlot(false)));
192 frame->SetFpCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
193 fl.GetStackStartSlot() + fl.GetCalleeLastSlot(true)));
194
195 SetFrameInfo(frame);
196 }
197
IntrinsicSlowPathEntry(IntrinsicInst * inst)198 void CodegenFastPath::IntrinsicSlowPathEntry(IntrinsicInst *inst)
199 {
200 CreateTailCall(inst, false);
201 }
202
203 /*
204 * Safe call of the c++ function from the irtoc
205 * */
IntrinsicSaveTlabStatsSafe(IntrinsicInst * inst,Reg src1,Reg src2,Reg tmp)206 void CodegenFastPath::IntrinsicSaveTlabStatsSafe([[maybe_unused]] IntrinsicInst *inst, Reg src1, Reg src2, Reg tmp)
207 {
208 ASSERT(!inst->HasUsers());
209 ASSERT(tmp.IsValid());
210 ASSERT(tmp != GetRegfile()->GetZeroReg());
211
212 auto regs = GetCallerRegsMask(GetArch(), false) | GetCalleeRegsMask(GetArch(), false);
213 auto vregs = GetCallerRegsMask(GetArch(), true);
214 GetEncoder()->PushRegisters(regs, vregs);
215
216 FillCallParams(src1, src2);
217
218 auto id = RuntimeInterface::EntrypointId::WRITE_TLAB_STATS_NO_BRIDGE;
219 MemRef entry(ThreadReg(), GetRuntime()->GetEntrypointTlsOffset(GetArch(), id));
220 GetEncoder()->EncodeLdr(tmp, false, entry);
221 GetEncoder()->MakeCall(tmp);
222
223 GetEncoder()->PopRegisters(regs, vregs);
224 }
225
IntrinsicSaveRegisters(IntrinsicInst * inst)226 void CodegenFastPath::IntrinsicSaveRegisters([[maybe_unused]] IntrinsicInst *inst)
227 {
228 RegMask calleeRegs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
229 // We need to save all caller regs, since caller doesn't care about registers at all (except parameters)
230 auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
231 auto callerVregs = RegMask(GetCallerRegsMask(GetArch(), true));
232 for (auto &input : inst->GetInputs()) {
233 calleeRegs.reset(input.GetInst()->GetDstReg());
234 callerRegs.reset(input.GetInst()->GetDstReg());
235 }
236 if (GetTarget().SupportLinkReg()) {
237 callerRegs.set(GetTarget().GetLinkReg().GetId());
238 }
239 if (!inst->HasUsers()) {
240 callerRegs.set(GetTarget().GetReturnReg(GetPtrRegType()).GetId());
241 }
242 GetEncoder()->PushRegisters(callerRegs | calleeRegs, callerVregs);
243 }
244
IntrinsicRestoreRegisters(IntrinsicInst * inst)245 void CodegenFastPath::IntrinsicRestoreRegisters([[maybe_unused]] IntrinsicInst *inst)
246 {
247 RegMask calleeRegs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
248 // We need to restore all caller regs, since caller doesn't care about registers at all (except parameters)
249 auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
250 auto callerVregs = RegMask(GetCallerRegsMask(GetArch(), true));
251 for (auto &input : inst->GetInputs()) {
252 calleeRegs.reset(input.GetInst()->GetDstReg());
253 callerRegs.reset(input.GetInst()->GetDstReg());
254 }
255 if (GetTarget().SupportLinkReg()) {
256 callerRegs.set(GetTarget().GetLinkReg().GetId());
257 }
258 if (!inst->HasUsers()) {
259 callerRegs.set(GetTarget().GetReturnReg(GetPtrRegType()).GetId());
260 }
261 GetEncoder()->PopRegisters(callerRegs | calleeRegs, callerVregs);
262 }
263
IntrinsicTailCall(IntrinsicInst * inst)264 void CodegenFastPath::IntrinsicTailCall(IntrinsicInst *inst)
265 {
266 CreateTailCall(inst, true);
267 }
268
CreateTailCall(IntrinsicInst * inst,bool isFastpath)269 void CodegenFastPath::CreateTailCall(IntrinsicInst *inst, bool isFastpath)
270 {
271 auto encoder = GetEncoder();
272
273 if (GetFrameInfo()->GetSpillsCount() != 0) {
274 encoder->EncodeAdd(
275 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
276 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
277 }
278
279 /* Once we reach the slow path, we can release all temp registers, since slow path terminates execution */
280 auto tempsMask = GetTarget().GetTempRegsMask();
281 for (size_t reg = tempsMask.GetMinRegister(); reg <= tempsMask.GetMaxRegister(); reg++) {
282 if (tempsMask.Test(reg)) {
283 GetEncoder()->ReleaseScratchRegister(Reg(reg, INT32_TYPE));
284 }
285 }
286
287 if (isFastpath) {
288 RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), encoder, GetFrameLayout(), false);
289 if (GetUsedVRegs().Any()) {
290 RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), encoder,
291 GetFrameLayout(), true);
292 }
293 } else {
294 RegMask callerRegs = ~GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
295 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
296 callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
297
298 if (GetUsedVRegs().Any()) {
299 VRegMask fpCallerRegs = ~GetUsedVRegs() & RegMask(GetCallerRegsMask(GetArch(), true));
300 SaveCallerRegistersInFrame(fpCallerRegs, encoder, GetFrameLayout(), true);
301 }
302
303 SaveCallerRegistersInFrame(callerRegs, encoder, GetFrameLayout(), false);
304 }
305 encoder->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
306
307 /* First Imm is offset of the runtime entrypoint for Ark Irtoc */
308 /* Second Imm is necessary for proper LLVM Irtoc FastPath compilation */
309 CHECK_LE(inst->GetImms().size(), 2U);
310 if (inst->GetRelocate()) {
311 RelocationInfo relocation;
312 encoder->EncodeJump(&relocation);
313 GetGraph()->GetRelocationHandler()->AddRelocation(relocation);
314 } else {
315 ScopedTmpReg tmp(encoder);
316 auto offset = inst->GetImms()[0];
317 encoder->EncodeLdr(tmp, false, MemRef(ThreadReg(), offset));
318 encoder->EncodeJump(tmp);
319 }
320 }
321
322 } // namespace ark::compiler
323