• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "codegen_fastpath.h"
17 #include "optimizer/ir/inst.h"
18 #include "relocations.h"
19 
20 namespace ark::compiler {
21 
SaveCallerRegistersInFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)22 static void SaveCallerRegistersInFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
23 {
24     if (mask.none()) {
25         return;
26     }
27     auto fpReg = Target(fl.GetArch()).GetFrameReg();
28 
29     mask &= GetCallerRegsMask(fl.GetArch(), isFp);
30     auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
31     encoder->SaveRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
32 }
33 
RestoreCallerRegistersFromFrame(RegMask mask,Encoder * encoder,const CFrameLayout & fl,bool isFp)34 static void RestoreCallerRegistersFromFrame(RegMask mask, Encoder *encoder, const CFrameLayout &fl, bool isFp)
35 {
36     if (mask.none()) {
37         return;
38     }
39     auto fpReg = Target(fl.GetArch()).GetFrameReg();
40 
41     mask &= GetCallerRegsMask(fl.GetArch(), isFp);
42     auto startSlot = fl.GetStackStartSlot() + fl.GetCallerLastSlot(isFp);
43     encoder->LoadRegisters(mask, isFp, -startSlot, fpReg, GetCallerRegsMask(fl.GetArch(), isFp));
44 }
45 
InstHasRuntimeCall(const Inst * inst)46 static bool InstHasRuntimeCall(const Inst *inst)
47 {
48     switch (inst->GetOpcode()) {
49         case Opcode::Store:
50             if (inst->CastToStore()->GetNeedBarrier()) {
51                 return true;
52             }
53             break;
54         case Opcode::StoreI:
55             if (inst->CastToStoreI()->GetNeedBarrier()) {
56                 return true;
57             }
58             break;
59         case Opcode::StoreArray:
60             if (inst->CastToStoreArray()->GetNeedBarrier()) {
61                 return true;
62             }
63             break;
64         case Opcode::StoreObject:
65             if (inst->CastToStoreObject()->GetNeedBarrier()) {
66                 return true;
67             }
68             break;
69         case Opcode::LoadObjectDynamic:
70         case Opcode::StoreObjectDynamic:
71             return true;
72         case Opcode::Cast:
73             if (inst->CastToCast()->IsDynamicCast()) {
74                 return true;
75             }
76             break;
77         default:
78             break;
79     }
80     if (inst->IsRuntimeCall()) {
81         if (!inst->IsIntrinsic()) {
82             return true;
83         }
84         auto intrinsicId = inst->CastToIntrinsic()->GetIntrinsicId();
85         if (intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_SLOW_PATH_ENTRY &&
86             intrinsicId != RuntimeInterface::IntrinsicId::INTRINSIC_TAIL_CALL) {
87             return true;
88         }
89     }
90     return false;
91 }
92 /*
93  * We determine runtime calls manually, not using MethodProperties::HasRuntimeCalls, because we need to ignore
94  * SLOW_PATH_ENTRY intrinsic, since it doesn't require LR to be preserved.
95  */
HasRuntimeCalls(const Graph & graph)96 static bool HasRuntimeCalls(const Graph &graph)
97 {
98     for (auto bb : graph.GetBlocksRPO()) {
99         for (auto inst : bb->Insts()) {
100             if (InstHasRuntimeCall(inst)) {
101                 return true;
102             }
103         }
104     }
105     return false;
106 }
107 
GeneratePrologue()108 void CodegenFastPath::GeneratePrologue()
109 {
110     SCOPED_DISASM_STR(this, "FastPath Prologue");
111 
112     auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
113     auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
114     callerRegs &= GetUsedRegs() & ~GetTarget().GetParamRegsMask(argsNum);
115     SaveCallerRegistersInFrame(callerRegs, GetEncoder(), GetFrameLayout(), false);
116 
117     auto hasRuntimeCalls = HasRuntimeCalls(*GetGraph());
118 
119     savedRegisters_ = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
120     if (GetTarget().SupportLinkReg() && hasRuntimeCalls) {
121         savedRegisters_ |= GetTarget().GetLinkReg().GetMask();
122         GetEncoder()->EnableLrAsTempReg(true);
123     }
124 
125     if (GetUsedVRegs().Any()) {
126         SaveCallerRegistersInFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(), GetFrameLayout(),
127                                    true);
128         savedFpRegisters_ = GetUsedVRegs() & VRegMask(GetCalleeRegsMask(GetArch(), true));
129     }
130 
131     GetEncoder()->PushRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
132 
133     if (GetFrameInfo()->GetSpillsCount() != 0) {
134         GetEncoder()->EncodeSub(
135             GetTarget().GetStackReg(), GetTarget().GetStackReg(),
136             Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
137     }
138 }
139 
GetCallerRegistersToRestore() const140 RegMask CodegenFastPath::GetCallerRegistersToRestore() const
141 {
142     RegMask callerRegs = GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
143 
144     auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
145     callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
146 
147     if (auto retType {GetRuntime()->GetMethodReturnType(GetGraph()->GetMethod())};
148         retType != DataType::VOID && retType != DataType::NO_TYPE) {
149         ASSERT(!DataType::IsFloatType(retType));
150         callerRegs.reset(GetTarget().GetReturnRegId());
151     }
152     return callerRegs;
153 }
154 
GenerateEpilogue()155 void CodegenFastPath::GenerateEpilogue()
156 {
157     SCOPED_DISASM_STR(this, "FastPath Epilogue");
158 
159     if (GetFrameInfo()->GetSpillsCount() != 0) {
160         GetEncoder()->EncodeAdd(
161             GetTarget().GetStackReg(), GetTarget().GetStackReg(),
162             Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
163     }
164 
165     RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), GetEncoder(), GetFrameLayout(), false);
166 
167     if (GetUsedVRegs().Any()) {
168         RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), GetEncoder(),
169                                         GetFrameLayout(), true);
170     }
171 
172     GetEncoder()->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
173 
174     GetEncoder()->EncodeReturn();
175 }
176 
CreateFrameInfo()177 void CodegenFastPath::CreateFrameInfo()
178 {
179     auto frame = GetGraph()->GetLocalAllocator()->New<FrameInfo>(
180         FrameInfo::PositionedCallers::Encode(true) | FrameInfo::PositionedCallees::Encode(false) |
181         FrameInfo::CallersRelativeFp::Encode(true) | FrameInfo::CalleesRelativeFp::Encode(false) |
182         FrameInfo::PushCallers::Encode(true));
183     frame->SetSpillsCount(GetGraph()->GetStackSlotsCount());
184     CFrameLayout fl(GetGraph()->GetArch(), GetGraph()->GetStackSlotsCount(), false);
185 
186     frame->SetCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
187         fl.GetStackStartSlot() + fl.GetCallerLastSlot(false)));
188     frame->SetFpCallersOffset(fl.GetOffset<CFrameLayout::OffsetOrigin::SP, CFrameLayout::OffsetUnit::SLOTS>(
189         fl.GetStackStartSlot() + fl.GetCallerLastSlot(true)));
190     frame->SetCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
191         fl.GetStackStartSlot() + fl.GetCalleeLastSlot(false)));
192     frame->SetFpCalleesOffset(-fl.GetOffset<CFrameLayout::OffsetOrigin::FP, CFrameLayout::OffsetUnit::SLOTS>(
193         fl.GetStackStartSlot() + fl.GetCalleeLastSlot(true)));
194 
195     SetFrameInfo(frame);
196 }
197 
198 /*
199  * Safe call of the c++ function from the irtoc
200  * */
CreateTailCall(IntrinsicInst * inst,bool isFastpath)201 void CodegenFastPath::CreateTailCall(IntrinsicInst *inst, bool isFastpath)
202 {
203     auto encoder = GetEncoder();
204 
205     if (GetFrameInfo()->GetSpillsCount() != 0) {
206         encoder->EncodeAdd(
207             GetTarget().GetStackReg(), GetTarget().GetStackReg(),
208             Imm(RoundUp(GetFrameInfo()->GetSpillsCount() * GetTarget().WordSize(), GetTarget().GetSpAlignment())));
209     }
210 
211     /* Once we reach the slow path, we can release all temp registers, since slow path terminates execution */
212     auto tempsMask = GetTarget().GetTempRegsMask();
213     for (size_t reg = tempsMask.GetMinRegister(); reg <= tempsMask.GetMaxRegister(); reg++) {
214         if (tempsMask.Test(reg)) {
215             encoder->ReleaseScratchRegister(Reg(reg, INT32_TYPE));
216         }
217     }
218 
219     if (isFastpath) {
220         RestoreCallerRegistersFromFrame(GetCallerRegistersToRestore(), encoder, GetFrameLayout(), false);
221         if (GetUsedVRegs().Any()) {
222             RestoreCallerRegistersFromFrame(GetUsedVRegs() & GetCallerRegsMask(GetArch(), true), encoder,
223                                             GetFrameLayout(), true);
224         }
225     } else {
226         RegMask callerRegs = ~GetUsedRegs() & RegMask(GetCallerRegsMask(GetArch(), false));
227         auto argsNum = GetRuntime()->GetMethodArgumentsCount(GetGraph()->GetMethod());
228         callerRegs &= ~GetTarget().GetParamRegsMask(argsNum);
229 
230         if (GetUsedVRegs().Any()) {
231             VRegMask fpCallerRegs = ~GetUsedVRegs() & RegMask(GetCallerRegsMask(GetArch(), true));
232             SaveCallerRegistersInFrame(fpCallerRegs, encoder, GetFrameLayout(), true);
233         }
234 
235         SaveCallerRegistersInFrame(callerRegs, encoder, GetFrameLayout(), false);
236     }
237     encoder->PopRegisters(savedRegisters_, savedFpRegisters_, GetTarget().SupportLinkReg());
238 
239     /* First Imm is offset of the runtime entrypoint for Ark Irtoc */
240     /* Second Imm is necessary for proper LLVM Irtoc FastPath compilation */
241     CHECK_LE(inst->GetImms().size(), 2U);
242     if (inst->GetRelocate()) {
243         RelocationInfo relocation;
244         encoder->EncodeJump(&relocation);
245         GetGraph()->GetRelocationHandler()->AddRelocation(relocation);
246     } else {
247         ScopedTmpReg tmp(encoder);
248         auto offset = inst->GetImms()[0];
249         encoder->EncodeLdr(tmp, false, MemRef(ThreadReg(), offset));
250         encoder->EncodeJump(tmp);
251     }
252 }
253 
EmitSimdIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)254 void CodegenFastPath::EmitSimdIntrinsic(IntrinsicInst *inst, Reg dst, SRCREGS src)
255 {
256     auto intrinsic = inst->GetIntrinsicId();
257     if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_COMPRESS_EIGHT_UTF16_TO_UTF8_CHARS_USING_SIMD) {
258         GetEncoder()->EncodeCompressEightUtf16ToUtf8CharsUsingSimd(src[FIRST_OPERAND], src[SECOND_OPERAND]);
259     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_COMPRESS_SIXTEEN_UTF16_TO_UTF8_CHARS_USING_SIMD) {
260         GetEncoder()->EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(src[FIRST_OPERAND], src[SECOND_OPERAND]);
261     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_MEM_CHAR_U8_X32_USING_SIMD) {
262         GetEncoder()->EncodeMemCharU8X32UsingSimd(dst, src[FIRST_OPERAND], src[SECOND_OPERAND],
263                                                   ConvertInstTmpReg(inst, DataType::FLOAT64));
264     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_MEM_CHAR_U16_X16_USING_SIMD) {
265         GetEncoder()->EncodeMemCharU16X16UsingSimd(dst, src[FIRST_OPERAND], src[SECOND_OPERAND],
266                                                    ConvertInstTmpReg(inst, DataType::FLOAT64));
267     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_MEM_CHAR_U8_X16_USING_SIMD) {
268         GetEncoder()->EncodeMemCharU8X16UsingSimd(dst, src[FIRST_OPERAND], src[SECOND_OPERAND],
269                                                   ConvertInstTmpReg(inst, DataType::FLOAT64));
270     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_MEM_CHAR_U16_X8_USING_SIMD) {
271         GetEncoder()->EncodeMemCharU16X8UsingSimd(dst, src[FIRST_OPERAND], src[SECOND_OPERAND],
272                                                   ConvertInstTmpReg(inst, DataType::FLOAT64));
273     } else {
274         UNREACHABLE();
275     }
276 }
277 
EmitReverseIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)278 void CodegenFastPath::EmitReverseIntrinsic(IntrinsicInst *inst, Reg dst, SRCREGS src)
279 {
280     auto intrinsic = inst->GetIntrinsicId();
281     if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_REVERSE_BYTES_U64 ||
282         intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_REVERSE_BYTES_U32) {
283         GetEncoder()->EncodeReverseBytes(dst, src[0]);
284     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_REVERSE_HALF_WORDS) {
285         GetEncoder()->EncodeReverseHalfWords(dst, src[0]);
286     } else {
287         UNREACHABLE();
288     }
289 }
290 
EmitMarkWordIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)291 void CodegenFastPath::EmitMarkWordIntrinsic(IntrinsicInst *inst, Reg dst, SRCREGS src)
292 {
293     auto intrinsic = inst->GetIntrinsicId();
294     if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_LOAD_ACQUIRE_MARK_WORD_EXCLUSIVE) {
295         ASSERT(GetRuntime()->GetObjMarkWordOffset(GetArch()) == 0);
296         GetEncoder()->EncodeLdrExclusive(dst, src[0], true);
297     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_STORE_RELEASE_MARK_WORD_EXCLUSIVE) {
298         ASSERT(GetRuntime()->GetObjMarkWordOffset(GetArch()) == 0);
299         GetEncoder()->EncodeStrExclusive(dst, src[SECOND_OPERAND], src[0], true);
300     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_COMPARE_AND_SET_MARK_WORD) {
301         ASSERT(GetRuntime()->GetObjMarkWordOffset(GetArch()) == 0);
302         GetEncoder()->EncodeCompareAndSwap(dst, src[0], src[SECOND_OPERAND], src[THIRD_OPERAND]);
303     } else {
304         UNREACHABLE();
305     }
306 }
307 
EmitDataMemoryBarrierFullIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)308 void CodegenFastPath::EmitDataMemoryBarrierFullIntrinsic([[maybe_unused]] IntrinsicInst *inst, [[maybe_unused]] Reg dst,
309                                                          [[maybe_unused]] SRCREGS src)
310 {
311     ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_DATA_MEMORY_BARRIER_FULL);
312     GetEncoder()->EncodeMemoryBarrier(memory_order::FULL);
313 }
314 
315 /*
316  * Safe call of the c++ function from the irtoc
317  */
EmitWriteTlabStatsSafeIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)318 void CodegenFastPath::EmitWriteTlabStatsSafeIntrinsic([[maybe_unused]] IntrinsicInst *inst, [[maybe_unused]] Reg dst,
319                                                       SRCREGS src)
320 {
321     ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_WRITE_TLAB_STATS_SAFE);
322     ASSERT(!inst->HasUsers());
323 
324     auto src1 = src[FIRST_OPERAND];
325     auto src2 = src[SECOND_OPERAND];
326     auto tmp = src[THIRD_OPERAND];
327 
328     ASSERT(tmp.IsValid());
329     ASSERT(tmp != GetRegfile()->GetZeroReg());
330 
331     auto regs = GetCallerRegsMask(GetArch(), false) | GetCalleeRegsMask(GetArch(), false);
332     auto vregs = GetCallerRegsMask(GetArch(), true);
333     GetEncoder()->PushRegisters(regs, vregs);
334 
335     FillCallParams(src1, src2);
336 
337     auto id = RuntimeInterface::EntrypointId::WRITE_TLAB_STATS_NO_BRIDGE;
338     MemRef entry(ThreadReg(), GetRuntime()->GetEntrypointTlsOffset(GetArch(), id));
339     GetEncoder()->EncodeLdr(tmp, false, entry);
340     GetEncoder()->MakeCall(tmp);
341 
342     GetEncoder()->PopRegisters(regs, vregs);
343 }
344 
EmitExpandU8ToU16Intrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)345 void CodegenFastPath::EmitExpandU8ToU16Intrinsic([[maybe_unused]] IntrinsicInst *inst, Reg dst, SRCREGS src)
346 {
347     ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_EXPAND_U8_TO_U16);
348     GetEncoder()->EncodeUnsignedExtendBytesToShorts(dst, src[0]);
349 }
350 
EmitAtomicByteOrIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)351 void CodegenFastPath::EmitAtomicByteOrIntrinsic([[maybe_unused]] IntrinsicInst *inst, [[maybe_unused]] Reg dst,
352                                                 SRCREGS src)
353 {
354     ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_ATOMIC_BYTE_OR);
355     bool fastEncoding = true;
356     if (GetArch() == Arch::AARCH64 && !g_options.IsCpuFeatureEnabled(CpuFeature::ATOMICS)) {
357         fastEncoding = false;
358     }
359     GetEncoder()->EncodeAtomicByteOr(src[FIRST_OPERAND], src[SECOND_OPERAND], fastEncoding);
360 }
361 
EmitSaveOrRestoreRegsEpIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)362 void CodegenFastPath::EmitSaveOrRestoreRegsEpIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst,
363                                                        [[maybe_unused]] SRCREGS src)
364 {
365     RegMask calleeRegs = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
366     // We need to restore all caller regs, since caller doesn't care about registers at all (except parameters)
367     auto callerRegs = RegMask(GetCallerRegsMask(GetArch(), false));
368     auto callerVregs = RegMask(GetCallerRegsMask(GetArch(), true));
369     for (auto &input : inst->GetInputs()) {
370         calleeRegs.reset(input.GetInst()->GetDstReg());
371         callerRegs.reset(input.GetInst()->GetDstReg());
372     }
373     if (GetTarget().SupportLinkReg()) {
374         callerRegs.set(GetTarget().GetLinkReg().GetId());
375     }
376     if (!inst->HasUsers()) {
377         callerRegs.set(GetTarget().GetReturnReg(GetPtrRegType()).GetId());
378     }
379     auto intrinsic = inst->GetIntrinsicId();
380     if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_SAVE_REGISTERS_EP) {
381         GetEncoder()->PushRegisters(callerRegs | calleeRegs, callerVregs);
382     } else if (intrinsic == RuntimeInterface::IntrinsicId::INTRINSIC_RESTORE_REGISTERS_EP) {
383         GetEncoder()->PopRegisters(callerRegs | calleeRegs, callerVregs);
384     } else {
385         UNREACHABLE();
386     }
387 }
388 
EmitTailCallIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)389 void CodegenFastPath::EmitTailCallIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst, [[maybe_unused]] SRCREGS src)
390 {
391     ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_TAIL_CALL);
392     CreateTailCall(inst, true);
393 }
394 
EmitSlowPathEntryIntrinsic(IntrinsicInst * inst,Reg dst,SRCREGS src)395 void CodegenFastPath::EmitSlowPathEntryIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst,
396                                                  [[maybe_unused]] SRCREGS src)
397 {
398     ASSERT(inst->GetIntrinsicId() == RuntimeInterface::IntrinsicId::INTRINSIC_SLOW_PATH_ENTRY);
399     CreateTailCall(inst, false);
400 }
401 }  // namespace ark::compiler
402