• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "codegen_fastpath.h"
17 #include "optimizer/ir/inst.h"
18 #include "relocations.h"
19 
20 namespace ark::compiler {
21 
SaveCallerRegistersInFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)22 static void SaveCallerRegistersInFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
23 {
24     if (mask.none()) {
25         return;
26     }
27     auto fpReg = Target(fl.GetArch()).GetFrameReg();
28 
29     mask &= GetCallerRegsMask(fl.GetArch(), isFp);
30     auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
31     encoder->SaveRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
32 }
33 
RestoreCallerRegistersFromFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)34 static void RestoreCallerRegistersFromFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
35 {
36     if (mask.none()) {
37         return;
38     }
39     auto fpReg = Target(fl.GetArch()).GetFrameReg();
40 
41     mask &= GetCallerRegsMask(fl.GetArch(), isFp);
42     auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
43     encoder->LoadRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
44 }
45 
InstHasRuntimeCall(const Inst * inst)46 static bool InstHasRuntimeCall(const Inst *inst)
47 {
48     switch (inst->GetOpcode()) {
49         case Opcode::Store:
50             if (inst->CastToStore()->GetNeedBarrier()) {
51                 return true;
52             }
53             break;
54         case Opcode::StoreI:
55             if (inst->CastToStoreI()->GetNeedBarrier()) {
56                 return true;
57             }
58             break;
59         case Opcode::StoreArray:
60             if (inst->CastToStoreArray()->GetNeedBarrier()) {
61                 return true;
62             }
63             break;
64         case Opcode::StoreObject:
65             if (inst->CastToStoreObject()->GetNeedBarrier()) {
66                 return true;
67             }
68             break;
69         case Opcode::LoadObjectDynamic:
70         case Opcode::StoreObjectDynamic:
71             return true;
72         case Opcode::Cast:
73             if (inst->CastToCast()->IsDynamicCast()) {
74                 return true;
75             }
76             break;
77         default:
78             break;
79     }
80     if (inst->IsRuntimeCall()) {
81         if (!inst->IsIntrinsic()) {
82             return true;
83         }
84         auto intrinsicId = inst->CastToIntrinsic()->GetIntrinsicId();
85         if (intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_SLOW_PATH_ENTRY &&
86             intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_TAIL_CALL) {
87             return true;
88         }
89     }
90     return false;
91 }
92 /*
93  * We determine runtime calls manually, not using MethodProperties::HasRuntimeCalls, because we need to ignore
94  * SLOW_PATH_ENTRY intrinsic, since it doesn't require LR to be preserved.
95  */
HasRuntimeCalls(const Graph & graph)96 static bool HasRuntimeCalls(const Graph &graph)
97 {
98     for (auto bb : graph.GetBlocksRPO()) {
99         for (auto inst : bb->Insts()) {
100             if (InstHasRuntimeCall(inst)) {
101                 return true;
102             }
103         }
104     }
105     return false;
106 }
107 
GeneratePrologue()108 void CodegenFastPath::GeneratePrologue()
109 {
110     SCOPED_DISASM_STR(this, "FastPath Prologue");
111 
112     auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
113     auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
114     callerRegs &= GetUsedRegs() & ~GetTarget().GetParamRegsMask(argsNum);
115     SaveCallerRegistersInFrame(callerRegs, GetEncoder(), GetFrameLayout(), false);
116 
117     auto hasRuntimeCalls = HasRuntimeCalls(*GetGraph());
118 
119     savedRegisters_ = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
120     if (GetTarget().SupportLinkReg() && hasRuntimeCalls) {
121         savedRegisters_ |= GetTarget().GetLinkReg().GetMask();
122         GetEncoder()->EnableLrAsTempReg(true);
123     }
124 
125     if (GetUsedVRegs().Any()) {
126         SaveCallerRegistersInFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(), GetFrameLayout(),
127                                    true);
128         savedFpRegisters_ = GetUsedVRegs() & VRegMask(GetCalleeRegsMask(GetArch(), true));
129     }
130 
131     GetEncoder()->PushRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
132 
133     if (GetFrameInfo()->GetSpillsCount() != 0) {
134         GetEncoder()->EncodeSub(
135             GetTarget().GetStackReg(), GetTarget().GetStackReg(),
136             Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
137     }
138 }
139 
GetCallerRegistersToRestore() const140 RegMask CodegenFastPath::GetCallerRegistersToRestore() const
141 {
142     RegMask callerRegs = GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
143 
144     auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
145     callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
146 
147     if (auto retType {GetRuntime()->GetMethodReturnType(GetGraph()->GetMethod())};
148         retType != DataType::VOID && retType != DataType::NO_TYPE) {
149         ASSERT(!DataType::IsFloatType(retType));
150         callerRegs.reset(GetTarget().GetReturnRegId());
151     }
152     return callerRegs;
153 }
154 
GenerateEpilogue()155 void CodegenFastPath::GenerateEpilogue()
156 {
157     SCOPED_DISASM_STR(this, "FastPath Epilogue");
158 
159     if (GetFrameInfo()->GetSpillsCount() != 0) {
160         GetEncoder()->EncodeAdd(
161             GetTarget().GetStackReg(), GetTarget().GetStackReg(),
162             Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
163     }
164 
165     RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), GetEncoder(), GetFrameLayout(), false);
166 
167     if (GetUsedVRegs().Any()) {
168         RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(),
169                                         GetFrameLayout(), true);
170     }
171 
172     GetEncoder()->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
173 
174     GetEncoder()->EncodeReturn();
175 }
176 
CreateFrameInfo()177 void CodegenFastPath::CreateFrameInfo()
178 {
179     auto frame = GetGraph()->GetLocalAllocator()->New<FrameInfo>(
180         FrameInfo::PositionedCallers::Encode(true) | FrameInfo::PositionedCallees::Encode(false) |
181         FrameInfo::CallersRelativeFp::Encode(true) | FrameInfo::CalleesRelativeFp::Encode(false) |
182         FrameInfo::PushCallers::Encode(true));
183     frame->SetSpillsCount(GetGraph()->GetStackSlotsCount());
184     CFrameLayout fl(GetGraph()->GetArch(), GetGraph()->GetStackSlotsCount());
185 
186     frame->SetCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
187         fl.GetStackStartSlot() + fl.GetCallerLastSlot(false)));
188     frame->SetFpCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
189         fl.GetStackStartSlot() + fl.GetCallerLastSlot(true)));
190     frame->SetCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
191         fl.GetStackStartSlot() + fl.GetCalleeLastSlot(false)));
192     frame->SetFpCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
193         fl.GetStackStartSlot() + fl.GetCalleeLastSlot(true)));
194 
195     SetFrameInfo(frame);
196 }
197 
IntrinsicSlowPathEntry(IntrinsicInst * inst)198 void CodegenFastPath::IntrinsicSlowPathEntry(IntrinsicInst *inst)
199 {
200     CreateTailCall(inst, false);
201 }
202 
203 /*
204  * Safe call of the c++ function from the irtoc
205  * */
IntrinsicSaveTlabStatsSafe(IntrinsicInst * inst,Reg src1,Reg src2,Reg tmp)206 void CodegenFastPath::IntrinsicSaveTlabStatsSafe([[maybe_unused]] IntrinsicInst *inst, Reg src1, Reg src2, Reg tmp)
207 {
208     ASSERT(!inst->HasUsers());
209     ASSERT(tmp.IsValid());
210     ASSERT(tmp != GetRegfile()->GetZeroReg());
211 
212     auto regs = GetCallerRegsMask(GetArch(), false) | GetCalleeRegsMask(GetArch(), false);
213     auto vregs = GetCallerRegsMask(GetArch(), true);
214     GetEncoder()->PushRegisters(regs, vregs);
215 
216     FillCallParams(src1, src2);
217 
218     auto id = RuntimeInterface::EntrypointId::WRITE_TLAB_STATS_NO_BRIDGE;
219     MemRef entry(ThreadReg(), GetRuntime()->GetEntrypointTlsOffset(GetArch(), id));
220     GetEncoder()->EncodeLdr(tmp, false, entry);
221     GetEncoder()->MakeCall(tmp);
222 
223     GetEncoder()->PopRegisters(regs, vregs);
224 }
225 
IntrinsicSaveRegisters(IntrinsicInst * inst)226 void CodegenFastPath::IntrinsicSaveRegisters([[maybe_unused]] IntrinsicInst *inst)
227 {
228     RegMask calleeRegs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
229     // We need to save all caller regs, since caller doesn't care about registers at all (except parameters)
230     auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
231     auto callerVregs = RegMask(GetCallerRegsMask(GetArch(), true));
232     for (auto &input : inst->GetInputs()) {
233         calleeRegs.reset(input.GetInst()->GetDstReg());
234         callerRegs.reset(input.GetInst()->GetDstReg());
235     }
236     if (GetTarget().SupportLinkReg()) {
237         callerRegs.set(GetTarget().GetLinkReg().GetId());
238     }
239     if (!inst->HasUsers()) {
240         callerRegs.set(GetTarget().GetReturnReg(GetPtrRegType()).GetId());
241     }
242     GetEncoder()->PushRegisters(callerRegs | calleeRegs, callerVregs);
243 }
244 
IntrinsicRestoreRegisters(IntrinsicInst * inst)245 void CodegenFastPath::IntrinsicRestoreRegisters([[maybe_unused]] IntrinsicInst *inst)
246 {
247     RegMask calleeRegs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
248     // We need to restore all caller regs, since caller doesn't care about registers at all (except parameters)
249     auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
250     auto callerVregs = RegMask(GetCallerRegsMask(GetArch(), true));
251     for (auto &input : inst->GetInputs()) {
252         calleeRegs.reset(input.GetInst()->GetDstReg());
253         callerRegs.reset(input.GetInst()->GetDstReg());
254     }
255     if (GetTarget().SupportLinkReg()) {
256         callerRegs.set(GetTarget().GetLinkReg().GetId());
257     }
258     if (!inst->HasUsers()) {
259         callerRegs.set(GetTarget().GetReturnReg(GetPtrRegType()).GetId());
260     }
261     GetEncoder()->PopRegisters(callerRegs | calleeRegs, callerVregs);
262 }
263 
IntrinsicTailCall(IntrinsicInst * inst)264 void CodegenFastPath::IntrinsicTailCall(IntrinsicInst *inst)
265 {
266     CreateTailCall(inst, true);
267 }
268 
CreateTailCall(IntrinsicInst * inst,bool isFastpath)269 void CodegenFastPath::CreateTailCall(IntrinsicInst *inst, bool isFastpath)
270 {
271     auto encoder = GetEncoder();
272 
273     if (GetFrameInfo()->GetSpillsCount() != 0) {
274         encoder->EncodeAdd(
275             GetTarget().GetStackReg(), GetTarget().GetStackReg(),
276             Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
277     }
278 
279     /* Once we reach the slow path, we can release all temp registers, since slow path terminates execution */
280     auto tempsMask = GetTarget().GetTempRegsMask();
281     for (size_t reg = tempsMask.GetMinRegister(); reg <= tempsMask.GetMaxRegister(); reg++) {
282         if (tempsMask.Test(reg)) {
283             GetEncoder()->ReleaseScratchRegister(Reg(reg, INT32_TYPE));
284         }
285     }
286 
287     if (isFastpath) {
288         RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), encoder, GetFrameLayout(), false);
289         if (GetUsedVRegs().Any()) {
290             RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), encoder,
291                                             GetFrameLayout(), true);
292         }
293     } else {
294         RegMask callerRegs = ~GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
295         auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
296         callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
297 
298         if (GetUsedVRegs().Any()) {
299             VRegMask fpCallerRegs = ~GetUsedVRegs() & RegMask(GetCallerRegsMask(GetArch(), true));
300             SaveCallerRegistersInFrame(fpCallerRegs, encoder, GetFrameLayout(), true);
301         }
302 
303         SaveCallerRegistersInFrame(callerRegs, encoder, GetFrameLayout(), false);
304     }
305     encoder->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
306 
307     /* First Imm is offset of the runtime entrypoint for Ark Irtoc */
308     /* Second Imm is necessary for proper LLVM Irtoc FastPath compilation */
309     CHECK_LE(inst->GetImms().size(), 2U);
310     if (inst->GetRelocate()) {
311         RelocationInfo relocation;
312         encoder->EncodeJump(&relocation);
313         GetGraph()->GetRelocationHandler()->AddRelocation(relocation);
314     } else {
315         ScopedTmpReg tmp(encoder);
316         auto offset = inst->GetImms()[0];
317         encoder->EncodeLdr(tmp, false, MemRef(ThreadReg(), offset));
318         encoder->EncodeJump(tmp);
319     }
320 }
321 
322 }  // namespace ark::compiler
323