1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "codegen_fastpath.h"
17 #include "optimizer/ir/inst.h"
18 #include "relocations.h"
19
20 namespace panda::compiler {
21
SaveCallerRegistersInFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)22 static void SaveCallerRegistersInFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
23 {
24 if (mask.none()) {
25 return;
26 }
27 auto fpReg = Target(fl.GetArch()).GetFrameReg();
28
29 mask &= GetCallerRegsMask(fl.GetArch(), isFp);
30 auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
31 encoder->SaveRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
32 }
33
RestoreCallerRegistersFromFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)34 static void RestoreCallerRegistersFromFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
35 {
36 if (mask.none()) {
37 return;
38 }
39 auto fpReg = Target(fl.GetArch()).GetFrameReg();
40
41 mask &= GetCallerRegsMask(fl.GetArch(), isFp);
42 auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
43 encoder->LoadRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
44 }
45
InsnHasRuntimeCall(const Inst * inst)46 static bool InsnHasRuntimeCall(const Inst *inst)
47 {
48 switch (inst->GetOpcode()) {
49 case Opcode::StoreArray:
50 if (inst->CastToStoreArray()->GetNeedBarrier()) {
51 return true;
52 }
53 break;
54 case Opcode::StoreObject:
55 if (inst->CastToStoreObject()->GetNeedBarrier()) {
56 return true;
57 }
58 break;
59 case Opcode::LoadObjectDynamic:
60 case Opcode::StoreObjectDynamic:
61 return true;
62 case Opcode::Cast:
63 if (inst->CastToCast()->IsDynamicCast()) {
64 return true;
65 }
66 break;
67 default:
68 break;
69 }
70 if (inst->IsRuntimeCall()) {
71 if (!inst->IsIntrinsic()) {
72 return true;
73 }
74 auto intrinsicId = inst->CastToIntrinsic()->GetIntrinsicId();
75 if (intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_SLOW_PATH_ENTRY &&
76 intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_TAIL_CALL) {
77 return true;
78 }
79 }
80 return false;
81 }
82 /*
83 * We determine runtime calls manually, not using MethodProperties::HasRuntimeCalls, because we need to ignore
84 * SLOW_PATH_ENTRY intrinsic, since it doesn't require LR to be preserved.
85 */
HasRuntimeCalls(const Graph & graph)86 static bool HasRuntimeCalls(const Graph &graph)
87 {
88 for (auto bb : graph.GetBlocksRPO()) {
89 for (auto inst : bb->Insts()) {
90 if (InsnHasRuntimeCall(inst)) {
91 return true;
92 }
93 }
94 }
95 return false;
96 }
97
GeneratePrologue()98 void CodegenFastPath::GeneratePrologue()
99 {
100 SCOPED_DISASM_STR(this, "FastPath Prologue");
101
102 auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
103 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
104 callerRegs &= GetUsedRegs() & ~GetTarget().GetParamRegsMask(argsNum);
105 SaveCallerRegistersInFrame(callerRegs, GetEncoder(), GetFrameLayout(), false);
106
107 auto hasRuntimeCalls = HasRuntimeCalls(*GetGraph());
108
109 savedRegisters_ = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
110 if (GetTarget().SupportLinkReg() && hasRuntimeCalls) {
111 savedRegisters_ |= GetTarget().GetLinkReg().GetMask();
112 }
113
114 if (GetUsedVRegs().Any()) {
115 SaveCallerRegistersInFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(), GetFrameLayout(),
116 true);
117 savedFpRegisters_ = GetUsedVRegs() & VRegMask(GetCalleeRegsMask(GetArch(), true));
118 }
119
120 GetEncoder()->PushRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
121
122 if (GetFrameInfo()->GetSpillsCount() != 0) {
123 GetEncoder()->EncodeSub(
124 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
125 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
126 }
127 }
128
GetCallerRegistersToRestore() const129 RegMask CodegenFastPath::GetCallerRegistersToRestore() const
130 {
131 RegMask callerRegs = GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
132
133 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
134 callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
135
136 if (auto retType {GetRuntime()->GetMethodReturnType(GetGraph()->GetMethod())};
137 retType != DataType::VOID && retType != DataType::NO_TYPE) {
138 ASSERT(!DataType::IsFloatType(retType));
139 callerRegs.reset(GetTarget().GetReturnRegId());
140 }
141 return callerRegs;
142 }
143
GenerateEpilogue()144 void CodegenFastPath::GenerateEpilogue()
145 {
146 SCOPED_DISASM_STR(this, "FastPath Epilogue");
147
148 if (GetFrameInfo()->GetSpillsCount() != 0) {
149 GetEncoder()->EncodeAdd(
150 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
151 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
152 }
153
154 RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), GetEncoder(), GetFrameLayout(), false);
155
156 if (GetUsedVRegs().Any()) {
157 RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(),
158 GetFrameLayout(), true);
159 }
160
161 GetEncoder()->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
162
163 GetEncoder()->EncodeReturn();
164 }
165
CreateFrameInfo()166 void CodegenFastPath::CreateFrameInfo()
167 {
168 auto frame = GetGraph()->GetLocalAllocator()->New<FrameInfo>(
169 FrameInfo::PositionedCallers::Encode(true) | FrameInfo::PositionedCallees::Encode(false) |
170 FrameInfo::CallersRelativeFp::Encode(true) | FrameInfo::CalleesRelativeFp::Encode(false) |
171 FrameInfo::PushCallers::Encode(true));
172 frame->SetSpillsCount(GetGraph()->GetStackSlotsCount());
173 CFrameLayout fl(GetGraph()->GetArch(), GetGraph()->GetStackSlotsCount());
174
175 frame->SetCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
176 fl.GetStackStartSlot() + fl.GetCallerLastSlot(false)));
177 frame->SetFpCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
178 fl.GetStackStartSlot() + fl.GetCallerLastSlot(true)));
179 frame->SetCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
180 fl.GetStackStartSlot() + fl.GetCalleeLastSlot(false)));
181 frame->SetFpCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
182 fl.GetStackStartSlot() + fl.GetCalleeLastSlot(true)));
183
184 SetFrameInfo(frame);
185 }
186
IntrinsicSlowPathEntry(IntrinsicInst * inst)187 void CodegenFastPath::IntrinsicSlowPathEntry(IntrinsicInst *inst)
188 {
189 CreateTailCall(inst, false);
190 }
191
IntrinsicSaveRegisters(IntrinsicInst * inst)192 void CodegenFastPath::IntrinsicSaveRegisters([[maybe_unused]] IntrinsicInst *inst)
193 {
194 RegMask calleeRegs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
195 // We need to save all caller regs, since caller doesn't care about registers at all (except parameters)
196 auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
197 auto callerVregs = RegMask(GetCallerRegsMask(GetArch(), true));
198
199 for (auto &input : inst->GetInputs()) {
200 calleeRegs.reset(input.GetInst()->GetDstReg());
201 callerRegs.reset(input.GetInst()->GetDstReg());
202 }
203 if (GetTarget().SupportLinkReg()) {
204 callerRegs.set(GetTarget().GetLinkReg().GetId());
205 }
206 GetEncoder()->PushRegisters(callerRegs | calleeRegs, callerVregs);
207 }
208
IntrinsicRestoreRegisters(IntrinsicInst * inst)209 void CodegenFastPath::IntrinsicRestoreRegisters([[maybe_unused]] IntrinsicInst *inst)
210 {
211 RegMask calleeRegs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
212 // We need to restore all caller regs, since caller doesn't care about registers at all (except parameters)
213 auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
214 auto callerVregs = RegMask(GetCallerRegsMask(GetArch(), true));
215 for (auto &input : inst->GetInputs()) {
216 calleeRegs.reset(input.GetInst()->GetDstReg());
217 callerRegs.reset(input.GetInst()->GetDstReg());
218 }
219 if (GetTarget().SupportLinkReg()) {
220 callerRegs.set(GetTarget().GetLinkReg().GetId());
221 }
222 GetEncoder()->PopRegisters(callerRegs | calleeRegs, callerVregs);
223 }
224
IntrinsicTailCall(IntrinsicInst * inst)225 void CodegenFastPath::IntrinsicTailCall(IntrinsicInst *inst)
226 {
227 CreateTailCall(inst, true);
228 }
229
CreateTailCall(IntrinsicInst * inst,bool isFastpath)230 void CodegenFastPath::CreateTailCall(IntrinsicInst *inst, bool isFastpath)
231 {
232 auto encoder = GetEncoder();
233
234 if (GetFrameInfo()->GetSpillsCount() != 0) {
235 encoder->EncodeAdd(
236 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
237 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
238 }
239
240 /* Once we reach the slow path, we can release all temp registers, since slow path terminates execution */
241 auto tempsMask = GetTarget().GetTempRegsMask();
242 for (size_t reg = tempsMask.GetMinRegister(); reg <= tempsMask.GetMaxRegister(); reg++) {
243 if (tempsMask.Test(reg)) {
244 GetEncoder()->ReleaseScratchRegister(Reg(reg, INT32_TYPE));
245 }
246 }
247
248 if (isFastpath) {
249 RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), encoder, GetFrameLayout(), false);
250 if (GetUsedVRegs().Any()) {
251 RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), encoder,
252 GetFrameLayout(), true);
253 }
254 } else {
255 RegMask callerRegs = ~GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
256 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
257 callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
258
259 if (GetUsedVRegs().Any()) {
260 VRegMask fpCallerRegs = ~GetUsedVRegs() & RegMask(GetCallerRegsMask(GetArch(), true));
261 SaveCallerRegistersInFrame(fpCallerRegs, encoder, GetFrameLayout(), true);
262 }
263
264 SaveCallerRegistersInFrame(callerRegs, encoder, GetFrameLayout(), false);
265 }
266 encoder->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
267
268 /* First Imm is offset of the runtime entrypoint for Ark Irtoc */
269 /* Second Imm is necessary for proper LLVM Irtoc FastPath compilation */
270 CHECK_LE(inst->GetImms().size(), 2U);
271 if (inst->GetRelocate()) {
272 RelocationInfo relocation;
273 encoder->EncodeJump(&relocation);
274 GetGraph()->GetRelocationHandler()->AddRelocation(relocation);
275 } else {
276 ScopedTmpReg tmp(encoder);
277 auto offset = inst->GetImms()[0];
278 encoder->EncodeLdr(tmp, false, MemRef(ThreadReg(), offset));
279 encoder->EncodeJump(tmp);
280 }
281 }
282
283 } // namespace panda::compiler
284