1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "codegen_fastpath.h"
17 #include "optimizer/ir/inst.h"
18 #include "relocations.h"
19
20 namespace ark::compiler {
21
SaveCallerRegistersInFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)22 static void SaveCallerRegistersInFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
23 {
24 if (mask.none()) {
25 return;
26 }
27 auto fpReg = Target(fl.GetArch()).GetFrameReg();
28
29 mask &= GetCallerRegsMask(fl.GetArch(), isFp);
30 auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
31 encoder->SaveRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
32 }
33
RestoreCallerRegistersFromFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)34 static void RestoreCallerRegistersFromFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
35 {
36 if (mask.none()) {
37 return;
38 }
39 auto fpReg = Target(fl.GetArch()).GetFrameReg();
40
41 mask &= GetCallerRegsMask(fl.GetArch(), isFp);
42 auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
43 encoder->LoadRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
44 }
45
InstHasRuntimeCall(const Inst * inst)46 static bool InstHasRuntimeCall(const Inst *inst)
47 {
48 switch (inst->GetOpcode()) {
49 case Opcode::Store:
50 if (inst->CastToStore()->GetNeedBarrier()) {
51 return true;
52 }
53 break;
54 case Opcode::StoreI:
55 if (inst->CastToStoreI()->GetNeedBarrier()) {
56 return true;
57 }
58 break;
59 case Opcode::StoreArray:
60 if (inst->CastToStoreArray()->GetNeedBarrier()) {
61 return true;
62 }
63 break;
64 case Opcode::StoreObject:
65 if (inst->CastToStoreObject()->GetNeedBarrier()) {
66 return true;
67 }
68 break;
69 case Opcode::LoadObjectDynamic:
70 case Opcode::StoreObjectDynamic:
71 return true;
72 case Opcode::Cast:
73 if (inst->CastToCast()->IsDynamicCast()) {
74 return true;
75 }
76 break;
77 default:
78 break;
79 }
80 if (inst->IsRuntimeCall()) {
81 if (!inst->IsIntrinsic()) {
82 return true;
83 }
84 auto intrinsicId = inst->CastToIntrinsic()->GetIntrinsicId();
85 if (intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_SLOW_PATH_ENTRY &&
86 intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_TAIL_CALL) {
87 return true;
88 }
89 }
90 return false;
91 }
92 /*
93 * We determine runtime calls manually, not using MethodProperties::HasRuntimeCalls, because we need to ignore
94 * SLOW_PATH_ENTRY intrinsic, since it doesn't require LR to be preserved.
95 */
HasRuntimeCalls(const Graph & graph)96 static bool HasRuntimeCalls(const Graph &graph)
97 {
98 for (auto bb : graph.GetBlocksRPO()) {
99 for (auto inst : bb->Insts()) {
100 if (InstHasRuntimeCall(inst)) {
101 return true;
102 }
103 }
104 }
105 return false;
106 }
107
GeneratePrologue()108 void CodegenFastPath::GeneratePrologue()
109 {
110 SCOPED_DISASM_STR(this, "FastPath Prologue");
111
112 auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
113 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
114 callerRegs &= GetUsedRegs() & ~GetTarget().GetParamRegsMask(argsNum);
115 SaveCallerRegistersInFrame(callerRegs, GetEncoder(), GetFrameLayout(), false);
116
117 auto hasRuntimeCalls = HasRuntimeCalls(*GetGraph());
118
119 savedRegisters_ = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
120 if (GetTarget().SupportLinkReg() && hasRuntimeCalls) {
121 savedRegisters_ |= GetTarget().GetLinkReg().GetMask();
122 GetEncoder()->EnableLrAsTempReg(true);
123 }
124
125 if (GetUsedVRegs().Any()) {
126 SaveCallerRegistersInFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(), GetFrameLayout(),
127 true);
128 savedFpRegisters_ = GetUsedVRegs() & VRegMask(GetCalleeRegsMask(GetArch(), true));
129 }
130
131 GetEncoder()->PushRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
132
133 if (GetFrameInfo()->GetSpillsCount() != 0) {
134 GetEncoder()->EncodeSub(
135 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
136 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
137 }
138 }
139
GetCallerRegistersToRestore() const140 RegMask CodegenFastPath::GetCallerRegistersToRestore() const
141 {
142 RegMask callerRegs = GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
143
144 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
145 callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
146
147 if (auto retType {GetRuntime()->GetMethodReturnType(GetGraph()->GetMethod())};
148 retType != DataType::VOID && retType != DataType::NO_TYPE) {
149 ASSERT(!DataType::IsFloatType(retType));
150 callerRegs.reset(GetTarget().GetReturnRegId());
151 }
152 return callerRegs;
153 }
154
GenerateEpilogue()155 void CodegenFastPath::GenerateEpilogue()
156 {
157 SCOPED_DISASM_STR(this, "FastPath Epilogue");
158
159 if (GetFrameInfo()->GetSpillsCount() != 0) {
160 GetEncoder()->EncodeAdd(
161 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
162 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
163 }
164
165 RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), GetEncoder(), GetFrameLayout(), false);
166
167 if (GetUsedVRegs().Any()) {
168 RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(),
169 GetFrameLayout(), true);
170 }
171
172 GetEncoder()->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
173
174 GetEncoder()->EncodeReturn();
175 }
176
CreateFrameInfo()177 void CodegenFastPath::CreateFrameInfo()
178 {
179 auto frame = GetGraph()->GetLocalAllocator()->New<FrameInfo>(
180 FrameInfo::PositionedCallers::Encode(true) | FrameInfo::PositionedCallees::Encode(false) |
181 FrameInfo::CallersRelativeFp::Encode(true) | FrameInfo::CalleesRelativeFp::Encode(false) |
182 FrameInfo::PushCallers::Encode(true));
183 frame->SetSpillsCount(GetGraph()->GetStackSlotsCount());
184 CFrameLayout fl(GetGraph()->GetArch(), GetGraph()->GetStackSlotsCount(), false);
185
186 frame->SetCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
187 fl.GetStackStartSlot() + fl.GetCallerLastSlot(false)));
188 frame->SetFpCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
189 fl.GetStackStartSlot() + fl.GetCallerLastSlot(true)));
190 frame->SetCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
191 fl.GetStackStartSlot() + fl.GetCalleeLastSlot(false)));
192 frame->SetFpCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
193 fl.GetStackStartSlot() + fl.GetCalleeLastSlot(true)));
194
195 SetFrameInfo(frame);
196 }
197
198 /*
199 * Safe call of the c++ function from the irtoc
200 * */
CreateTailCall(IntrinsicInst * inst,bool isFastpath)201 void CodegenFastPath::CreateTailCall(IntrinsicInst *inst, bool isFastpath)
202 {
203 auto encoder = GetEncoder();
204
205 if (GetFrameInfo()->GetSpillsCount() != 0) {
206 encoder->EncodeAdd(
207 GetTarget().GetStackReg(), GetTarget().GetStackReg(),
208 Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
209 }
210
211 /* Once we reach the slow path, we can release all temp registers, since slow path terminates execution */
212 auto tempsMask = GetTarget().GetTempRegsMask();
213 for (size_t reg = tempsMask.GetMinRegister(); reg <= tempsMask.GetMaxRegister(); reg++) {
214 if (tempsMask.Test(reg)) {
215 encoder->ReleaseScratchRegister(Reg(reg, INT32_TYPE));
216 }
217 }
218
219 if (isFastpath) {
220 RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), encoder, GetFrameLayout(), false);
221 if (GetUsedVRegs().Any()) {
222 RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), encoder,
223 GetFrameLayout(), true);
224 }
225 } else {
226 RegMask callerRegs = ~GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
227 auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
228 callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
229
230 if (GetUsedVRegs().Any()) {
231 VRegMask fpCallerRegs = ~GetUsedVRegs() & RegMask(GetCallerRegsMask(GetArch(), true));
232 SaveCallerRegistersInFrame(fpCallerRegs, encoder, GetFrameLayout(), true);
233 }
234
235 SaveCallerRegistersInFrame(callerRegs, encoder, GetFrameLayout(), false);
236 }
237 encoder->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
238
239 /* First Imm is offset of the runtime entrypoint for Ark Irtoc */
240 /* Second Imm is necessary for proper LLVM Irtoc FastPath compilation */
241 CHECK_LE(inst->GetImms().size(), 2U);
242 if (inst->GetRelocate()) {
243 RelocationInfo relocation;
244 encoder->EncodeJump(&relocation);
245 GetGraph()->GetRelocationHandler()->AddRelocation(relocation);
246 } else {
247 ScopedTmpReg tmp(encoder);
248 auto offset = inst->GetImms()[0];
249 encoder->EncodeLdr(tmp, false, MemRef(ThreadReg(), offset));
250 encoder->EncodeJump(tmp);
251 }
252 }
253
EmitSimdIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)254 void CodegenFastPath::EmitSimdIntrinsic(IntrinsicInst *inst, Reg dst, SRCREGS src)
255 {
256 auto intrinsic = inst->GetIntrinsicId();
257 if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_COMPRESS_EIGHT_UTF16_TO_UTF8_CHARS_USING_SIMD) {
258 GetEncoder()->EncodeCompressEightUtf16ToUtf8CharsUsingSimd(src[FIRST_OPERAND], src[SECOND_OPERAND]);
259 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_COMPRESS_SIXTEEN_UTF16_TO_UTF8_CHARS_USING_SIMD) {
260 GetEncoder()->EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(src[FIRST_OPERAND], src[SECOND_OPERAND]);
261 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_MEM_CHAR_U8_X32_USING_SIMD) {
262 GetEncoder()->EncodeMemCharU8X32UsingSimd(dst, src[FIRST_OPERAND], src[SECOND_OPERAND],
263 ConvertInstTmpReg(inst, DataType::FLOAT64));
264 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_MEM_CHAR_U16_X16_USING_SIMD) {
265 GetEncoder()->EncodeMemCharU16X16UsingSimd(dst, src[FIRST_OPERAND], src[SECOND_OPERAND],
266 ConvertInstTmpReg(inst, DataType::FLOAT64));
267 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_MEM_CHAR_U8_X16_USING_SIMD) {
268 GetEncoder()->EncodeMemCharU8X16UsingSimd(dst, src[FIRST_OPERAND], src[SECOND_OPERAND],
269 ConvertInstTmpReg(inst, DataType::FLOAT64));
270 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_MEM_CHAR_U16_X8_USING_SIMD) {
271 GetEncoder()->EncodeMemCharU16X8UsingSimd(dst, src[FIRST_OPERAND], src[SECOND_OPERAND],
272 ConvertInstTmpReg(inst, DataType::FLOAT64));
273 } else {
274 UNREACHABLE();
275 }
276 }
277
EmitReverseIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)278 void CodegenFastPath::EmitReverseIntrinsic(IntrinsicInst *inst, Reg dst, SRCREGS src)
279 {
280 auto intrinsic = inst->GetIntrinsicId();
281 if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_REVERSE_BYTES_U64 ||
282 intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_REVERSE_BYTES_U32) {
283 GetEncoder()->EncodeReverseBytes(dst, src[0]);
284 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_REVERSE_HALF_WORDS) {
285 GetEncoder()->EncodeReverseHalfWords(dst, src[0]);
286 } else {
287 UNREACHABLE();
288 }
289 }
290
EmitMarkWordIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)291 void CodegenFastPath::EmitMarkWordIntrinsic(IntrinsicInst *inst, Reg dst, SRCREGS src)
292 {
293 auto intrinsic = inst->GetIntrinsicId();
294 if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_LOAD_ACQUIRE_MARK_WORD_EXCLUSIVE) {
295 ASSERT(GetRuntime()->GetObjMarkWordOffset(GetArch()) == 0);
296 GetEncoder()->EncodeLdrExclusive(dst, src[0], true);
297 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_STORE_RELEASE_MARK_WORD_EXCLUSIVE) {
298 ASSERT(GetRuntime()->GetObjMarkWordOffset(GetArch()) == 0);
299 GetEncoder()->EncodeStrExclusive(dst, src[SECOND_OPERAND], src[0], true);
300 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_COMPARE_AND_SET_MARK_WORD) {
301 ASSERT(GetRuntime()->GetObjMarkWordOffset(GetArch()) == 0);
302 GetEncoder()->EncodeCompareAndSwap(dst, src[0], src[SECOND_OPERAND], src[THIRD_OPERAND]);
303 } else {
304 UNREACHABLE();
305 }
306 }
307
EmitDataMemoryBarrierFullIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)308 void CodegenFastPath::EmitDataMemoryBarrierFullIntrinsic([[maybe_unused]] IntrinsicInst *inst, [[maybe_unused]] Reg dst,
309 [[maybe_unused]] SRCREGS src)
310 {
311 ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_DATA_MEMORY_BARRIER_FULL);
312 GetEncoder()->EncodeMemoryBarrier(memory_order::FULL);
313 }
314
315 /*
316 * Safe call of the c++ function from the irtoc
317 */
EmitWriteTlabStatsSafeIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)318 void CodegenFastPath::EmitWriteTlabStatsSafeIntrinsic([[maybe_unused]] IntrinsicInst *inst, [[maybe_unused]] Reg dst,
319 SRCREGS src)
320 {
321 ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_WRITE_TLAB_STATS_SAFE);
322 ASSERT(!inst->HasUsers());
323
324 auto src1 = src[FIRST_OPERAND];
325 auto src2 = src[SECOND_OPERAND];
326 auto tmp = src[THIRD_OPERAND];
327
328 ASSERT(tmp.IsValid());
329 ASSERT(tmp != GetRegfile()->GetZeroReg());
330
331 auto regs = GetCallerRegsMask(GetArch(), false) | GetCalleeRegsMask(GetArch(), false);
332 auto vregs = GetCallerRegsMask(GetArch(), true);
333 GetEncoder()->PushRegisters(regs, vregs);
334
335 FillCallParams(src1, src2);
336
337 auto id = RuntimeInterface::EntrypointId::WRITE_TLAB_STATS_NO_BRIDGE;
338 MemRef entry(ThreadReg(), GetRuntime()->GetEntrypointTlsOffset(GetArch(), id));
339 GetEncoder()->EncodeLdr(tmp, false, entry);
340 GetEncoder()->MakeCall(tmp);
341
342 GetEncoder()->PopRegisters(regs, vregs);
343 }
344
EmitExpandU8ToU16Intrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)345 void CodegenFastPath::EmitExpandU8ToU16Intrinsic([[maybe_unused]] IntrinsicInst *inst, Reg dst, SRCREGS src)
346 {
347 ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_EXPAND_U8_TO_U16);
348 GetEncoder()->EncodeUnsignedExtendBytesToShorts(dst, src[0]);
349 }
350
EmitAtomicByteOrIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)351 void CodegenFastPath::EmitAtomicByteOrIntrinsic([[maybe_unused]] IntrinsicInst *inst, [[maybe_unused]] Reg dst,
352 SRCREGS src)
353 {
354 ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_ATOMIC_BYTE_OR);
355 bool fastEncoding = true;
356 if (GetArch() == Arch::AARCH64 && !g_options.IsCpuFeatureEnabled(CpuFeature::ATOMICS)) {
357 fastEncoding = false;
358 }
359 GetEncoder()->EncodeAtomicByteOr(src[FIRST_OPERAND], src[SECOND_OPERAND], fastEncoding);
360 }
361
EmitSaveOrRestoreRegsEpIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)362 void CodegenFastPath::EmitSaveOrRestoreRegsEpIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst,
363 [[maybe_unused]] SRCREGS src)
364 {
365 RegMask calleeRegs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
366 // We need to restore all caller regs, since caller doesn't care about registers at all (except parameters)
367 auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
368 auto callerVregs = RegMask(GetCallerRegsMask(GetArch(), true));
369 for (auto &input : inst->GetInputs()) {
370 calleeRegs.reset(input.GetInst()->GetDstReg());
371 callerRegs.reset(input.GetInst()->GetDstReg());
372 }
373 if (GetTarget().SupportLinkReg()) {
374 callerRegs.set(GetTarget().GetLinkReg().GetId());
375 }
376 if (!inst->HasUsers()) {
377 callerRegs.set(GetTarget().GetReturnReg(GetPtrRegType()).GetId());
378 }
379 auto intrinsic = inst->GetIntrinsicId();
380 if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_SAVE_REGISTERS_EP) {
381 GetEncoder()->PushRegisters(callerRegs | calleeRegs, callerVregs);
382 } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_RESTORE_REGISTERS_EP) {
383 GetEncoder()->PopRegisters(callerRegs | calleeRegs, callerVregs);
384 } else {
385 UNREACHABLE();
386 }
387 }
388
EmitTailCallIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)389 void CodegenFastPath::EmitTailCallIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst, [[maybe_unused]] SRCREGS src)
390 {
391 ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_TAIL_CALL);
392 CreateTailCall(inst, true);
393 }
394
EmitSlowPathEntryIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)395 void CodegenFastPath::EmitSlowPathEntryIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst,
396 [[maybe_unused]] SRCREGS src)
397 {
398 ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_SLOW_PATH_ENTRY);
399 CreateTailCall(inst, false);
400 }
401 } // namespace ark::compiler
402