1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "aarch64_proepilog.h"
17 #include "aarch64_cg.h"
18 #include "cg_option.h"
19 #include "cgfunc.h"
20
21 #define PROEPILOG_DUMP CG_DEBUG_FUNC(cgFunc)
22 namespace maplebe {
23 using namespace maple;
24
25 namespace {
26 constexpr int32 kSoeChckOffset = 8192;
27
28 enum RegsPushPop : uint8 { kRegsPushOp, kRegsPopOp };
29
30 enum PushPopType : uint8 { kPushPopSingle = 0, kPushPopPair = 1 };
31
32 MOperator pushPopOps[kRegsPopOp + 1][kRegTyFloat + 1][kPushPopPair + 1] = {{
33 /* push */
34 {0}, /* undef */
35 {
36 /* kRegTyInt */
37 MOP_xstr, /* single */
38 MOP_xstp, /* pair */
39 },
40 {
41 /* kRegTyFloat */
42 MOP_dstr, /* single */
43 MOP_dstp, /* pair */
44 },
45 },
46 {
47 /* pop */
48 {0}, /* undef */
49 {
50 /* kRegTyInt */
51 MOP_xldr, /* single */
52 MOP_xldp, /* pair */
53 },
54 {
55 /* kRegTyFloat */
56 MOP_dldr, /* single */
57 MOP_dldp, /* pair */
58 },
59 }};
60
AppendInstructionTo(Insn & insn,CGFunc & func)61 inline void AppendInstructionTo(Insn &insn, CGFunc &func)
62 {
63 func.GetCurBB()->AppendInsn(insn);
64 }
65 } // namespace
66
NeedProEpilog()67 bool AArch64GenProEpilog::NeedProEpilog()
68 {
69 if (cgFunc.GetMirModule().GetSrcLang() != kSrcLangC) {
70 return true;
71 } else if (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || cgFunc.HasVLAOrAlloca()) {
72 return true;
73 }
74
75 FOR_ALL_BB(bb, &cgFunc)
76 {
77 FOR_BB_INSNS_REV(insn, bb)
78 {
79 if (insn->IsMachineInstruction() && (insn->IsCall() || insn->IsSpecialCall())) {
80 return true;
81 }
82 }
83 }
84 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
85 const MapleVector<AArch64reg> ®sToRestore = (aarchCGFunc.GetProEpilogSavedRegs().empty())
86 ? aarchCGFunc.GetCalleeSavedRegs()
87 : aarchCGFunc.GetProEpilogSavedRegs();
88 size_t calleeSavedRegSize = kOneRegister;
89 CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot LR ?");
90 if (regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() ||
91 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSizeOfLocals() > 0 ||
92 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSizeOfCold() > 0 ||
93 cgFunc.GetFunction().GetAttr(FUNCATTR_callersensitive)) {
94 return true;
95 }
96 if (cgFunc.GetCG()->IsStackProtectorAll()) {
97 return true;
98 }
99 return false;
100 }
101
102 // find a idle register, default R30
GetStackGuardRegister(const BB & bb) const103 AArch64reg AArch64GenProEpilog::GetStackGuardRegister(const BB &bb) const
104 {
105 if (Globals::GetInstance()->GetOptimLevel() == CGOptions::kLevel0) {
106 return R30;
107 }
108 for (regno_t reg = R9; reg < R29; ++reg) {
109 if (bb.GetLiveInRegNO().count(reg) == 0 && reg != R16) {
110 if (!AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg))) {
111 return static_cast<AArch64reg>(reg);
112 }
113 }
114 }
115 return R30;
116 }
117
118 // find two idle register, default R30 and R16
GetStackGuardCheckRegister(const BB & bb) const119 std::pair<AArch64reg, AArch64reg> AArch64GenProEpilog::GetStackGuardCheckRegister(const BB &bb) const
120 {
121 AArch64reg stGuardReg = R30;
122 AArch64reg stCheckReg = R16;
123 if (Globals::GetInstance()->GetOptimLevel() == CGOptions::kLevel0) {
124 return {stGuardReg, stCheckReg};
125 }
126 for (regno_t reg = R9; reg < R29; ++reg) {
127 if (bb.GetLiveOutRegNO().count(reg) == 0 && reg != R16) {
128 if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg))) {
129 continue;
130 }
131 if (stGuardReg == R30) {
132 stGuardReg = static_cast<AArch64reg>(reg);
133 } else {
134 stCheckReg = static_cast<AArch64reg>(reg);
135 break;
136 }
137 }
138 }
139 return {stGuardReg, stCheckReg};
140 }
141
142 // RealStackFrameSize - [GR,16] - [VR,16] - 8 (from fp to stack protect area)
143 // We allocate 16 byte for stack protect area
GetDownStack()144 MemOperand *AArch64GenProEpilog::GetDownStack()
145 {
146 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
147 uint64 vArea = 0;
148 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
149 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
150 if (ml->GetSizeOfGRSaveArea() > 0) {
151 vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment);
152 }
153 if (ml->GetSizeOfVRSaveArea() > 0) {
154 vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment);
155 }
156 }
157
158 int32 stkSize = static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
159 if (useFP) {
160 stkSize -= static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->SizeOfArgsToStackPass() +
161 cgFunc.GetFunction().GetFrameReseverdSlot());
162 }
163 int32 memSize = (stkSize - kOffset8MemPos) - static_cast<int32>(vArea);
164 MemOperand *downStk = aarchCGFunc.CreateStackMemOpnd(stackBaseReg, memSize, GetPointerBitSize());
165 if (downStk->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) {
166 downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R16);
167 }
168 return downStk;
169 }
170
GenStackGuard(AArch64reg regNO)171 RegOperand &AArch64GenProEpilog::GenStackGuard(AArch64reg regNO)
172 {
173 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
174 aarchCGFunc.GetDummyBB()->ClearInsns();
175
176 cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
177
178 MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
179 GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard")));
180 DEBUG_ASSERT(stkGuardSym != nullptr, "nullptr check");
181 StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0);
182 RegOperand &stAddrOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(regNO, GetPointerBitSize(), kRegTyInt);
183 aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd);
184
185 MemOperand *guardMemOp = aarchCGFunc.CreateMemOperand(GetPointerBitSize(), stAddrOpnd,
186 aarchCGFunc.CreateImmOperand(0, k32BitSize, false), false);
187 MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64);
188 Insn &insn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *guardMemOp);
189 insn.SetDoNotRemove(true);
190 cgFunc.GetCurBB()->AppendInsn(insn);
191 return stAddrOpnd;
192 }
193
AddStackGuard(BB & bb)194 void AArch64GenProEpilog::AddStackGuard(BB &bb)
195 {
196 if (!cgFunc.GetNeedStackProtect()) {
197 return;
198 }
199 BB *formerCurBB = cgFunc.GetCurBB();
200 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
201 auto &stAddrOpnd = GenStackGuard(GetStackGuardRegister(bb));
202 auto mOp = aarchCGFunc.PickStInsn(GetPointerBitSize(), PTY_u64);
203 Insn &tmpInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *GetDownStack());
204 tmpInsn.SetDoNotRemove(true);
205 cgFunc.GetCurBB()->AppendInsn(tmpInsn);
206
207 bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
208 cgFunc.SetCurBB(*formerCurBB);
209 }
210
GetOrGenStackGuardCheckFailBB(BB & bb)211 BB &AArch64GenProEpilog::GetOrGenStackGuardCheckFailBB(BB &bb)
212 {
213 if (stackChkFailBB != nullptr) {
214 return *stackChkFailBB;
215 }
216 BB *formerCurBB = cgFunc.GetCurBB();
217 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
218
219 // create new check fail BB
220 auto failLable = aarchCGFunc.CreateLabel();
221 stackChkFailBB = aarchCGFunc.CreateNewBB(failLable, bb.IsUnreachable(), BB::kBBNoReturn, bb.GetFrequency());
222 cgFunc.SetCurBB(*stackChkFailBB);
223 MIRSymbol *failFunc = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
224 GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_fail")));
225 DEBUG_ASSERT(failFunc != nullptr, "nullptr check");
226 ListOperand *srcOpnds = aarchCGFunc.CreateListOpnd(*cgFunc.GetFuncScopeAllocator());
227 Insn &callInsn = aarchCGFunc.AppendCall(*failFunc, *srcOpnds);
228 callInsn.SetDoNotRemove(true);
229 ASSERT_NOT_NULL(cgFunc.GetLastBB());
230 cgFunc.GetLastBB()->PrependBB(*stackChkFailBB);
231
232 cgFunc.SetCurBB(*formerCurBB);
233 return *stackChkFailBB;
234 }
235
GenStackGuardCheckInsn(BB & bb)236 void AArch64GenProEpilog::GenStackGuardCheckInsn(BB &bb)
237 {
238 if (!cgFunc.GetNeedStackProtect()) {
239 return;
240 }
241
242 BB *formerCurBB = cgFunc.GetCurBB();
243 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
244 auto [stGuardReg, stCheckReg] = GetStackGuardCheckRegister(bb);
245 auto &stAddrOpnd = GenStackGuard(stGuardReg);
246 RegOperand &checkOp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stCheckReg, GetPointerBitSize(), kRegTyInt);
247 auto mOp = aarchCGFunc.PickLdInsn(GetPointerBitSize(), PTY_u64);
248 Insn &newInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, checkOp, *GetDownStack());
249 newInsn.SetDoNotRemove(true);
250 cgFunc.GetCurBB()->AppendInsn(newInsn);
251
252 cgFunc.SelectBxor(stAddrOpnd, stAddrOpnd, checkOp, PTY_u64);
253 auto &failBB = GetOrGenStackGuardCheckFailBB(bb);
254 aarchCGFunc.SelectCondGoto(aarchCGFunc.GetOrCreateLabelOperand(failBB.GetLabIdx()), OP_brtrue, OP_ne, stAddrOpnd,
255 aarchCGFunc.CreateImmOperand(0, k64BitSize, false), PTY_u64, false);
256
257 auto chkBB = cgFunc.CreateNewBB(bb.GetLabIdx(), bb.IsUnreachable(), BB::kBBIf, bb.GetFrequency());
258 chkBB->AppendBBInsns(bb);
259 bb.ClearInsns();
260 auto *lastInsn = chkBB->GetLastMachineInsn();
261 if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
262 chkBB->RemoveInsn(*lastInsn);
263 bb.AppendInsn(*lastInsn);
264 }
265 if (&bb == cgFunc.GetFirstBB()) {
266 cgFunc.SetFirstBB(*chkBB);
267 }
268 chkBB->AppendBBInsns(*(cgFunc.GetCurBB()));
269 bb.PrependBB(*chkBB);
270 chkBB->PushBackSuccs(bb);
271 auto &originPreds = bb.GetPreds();
272 for (auto pred : originPreds) {
273 pred->ReplaceSucc(bb, *chkBB);
274 chkBB->PushBackPreds(*pred);
275 }
276 LabelIdx nextLable = aarchCGFunc.CreateLabel();
277 bb.SetLabIdx(nextLable);
278 cgFunc.SetLab2BBMap(nextLable, bb);
279 bb.ClearPreds();
280 bb.PushBackPreds(*chkBB);
281 chkBB->PushBackSuccs(failBB);
282 failBB.PushBackPreds(*chkBB);
283
284 cgFunc.SetCurBB(*formerCurBB);
285 }
286
SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc & cgFunc,const MemOperand & mo,uint32 bitLen,AArch64reg baseRegNum)287 MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, const MemOperand &mo,
288 uint32 bitLen, AArch64reg baseRegNum)
289 {
290 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
291 CHECK_FATAL(mo.GetAddrMode() == MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi");
292 OfstOperand *ofstOp = mo.GetOffsetImmediate();
293 int32 offsetVal = static_cast<int32>(ofstOp->GetOffsetValue());
294 CHECK_FATAL(offsetVal > 0, "offsetVal should be greater than 0");
295 CHECK_FATAL((static_cast<uint32>(offsetVal) & 0x7) == 0, "(offsetVal & 0x7) should be equal to 0");
296 /*
297 * Offset adjustment due to FP/SP has already been done
298 * in AArch64GenProEpilog::GeneratePushRegs() and AArch64GenProEpilog::GeneratePopRegs()
299 */
300 RegOperand &br = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(baseRegNum, bitLen, kRegTyInt);
301 ImmOperand &immAddEnd = aarchCGFunc.CreateImmOperand(offsetVal, k64BitSize, true);
302 RegOperand *origBaseReg = mo.GetBaseRegister();
303 aarchCGFunc.SelectAdd(br, *origBaseReg, immAddEnd, PTY_i64);
304
305 return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, 0);
306 }
307
AppendInstructionPushPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)308 void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
309 int32 offset)
310 {
311 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
312 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
313 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
314 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
315 Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
316
317 uint32 dataSize = GetPointerBitSize();
318 CHECK_FATAL(offset >= 0, "offset must >= 0");
319 if (offset > kStpLdpImm64UpperBound) {
320 o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast<MemOperand *>(o2), dataSize, R16);
321 }
322 Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
323 // Identify that the instruction is not alias with any other memory instructions.
324 auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
325 memDefUse->SetIndependent();
326 pushInsn.SetReferenceOsts(memDefUse);
327 std::string comment = "SAVE CALLEE REGISTER PAIR";
328 pushInsn.SetComment(comment);
329 AppendInstructionTo(pushInsn, cgFunc);
330 }
331
AppendInstructionPushSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)332 void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
333 {
334 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
335 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle];
336 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerBitSize(), rty);
337 Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
338
339 MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
340 uint32 dataSize = GetPointerBitSize();
341 if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
342 o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R16);
343 }
344
345 Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
346 // Identify that the instruction is not alias with any other memory instructions.
347 auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
348 memDefUse->SetIndependent();
349 pushInsn.SetReferenceOsts(memDefUse);
350 std::string comment = "SAVE CALLEE REGISTER";
351 pushInsn.SetComment(comment);
352 AppendInstructionTo(pushInsn, cgFunc);
353 }
354
AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty,bool isAllocate)355 Insn &AArch64GenProEpilog::AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
356 AArch64reg reg1, RegType rty,
357 bool isAllocate)
358 {
359 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
360 MOperator mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopPair];
361 uint8 size;
362 if (CGOptions::IsArm64ilp32()) {
363 size = k8ByteSize;
364 } else {
365 size = GetPointerSize();
366 }
367 if (fpToSpDistance <= kStrLdrImm64UpperBound - kOffset8MemPos) {
368 mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopSingle] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
369 MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), size * kBitsPerByte);
370 if (storeFP) {
371 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
372 Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o2);
373 AppendInstructionTo(insn1, cgFunc);
374 }
375 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
376 o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance + size), size * kBitsPerByte);
377 Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
378 AppendInstructionTo(insn2, cgFunc);
379 return insn2;
380 } else {
381 RegOperand &oo = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, size * kBitsPerByte, kRegTyInt);
382 ImmOperand &io1 = aarchCGFunc.CreateImmOperand(fpToSpDistance, k64BitSize, true);
383 aarchCGFunc.SelectCopyImm(oo, io1, PTY_i64);
384 RegOperand &rsp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, size * kBitsPerByte, kRegTyInt);
385 MemOperand *mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
386 if (storeFP) {
387 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
388 Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o0, *mo);
389 AppendInstructionTo(insn1, cgFunc);
390 }
391 ImmOperand &io2 = aarchCGFunc.CreateImmOperand(size, k64BitSize, true);
392 aarchCGFunc.SelectAdd(oo, oo, io2, PTY_i64);
393 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
394 mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
395 Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o1, *mo);
396 AppendInstructionTo(insn2, cgFunc);
397 return insn2;
398 }
399 }
400
CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty)401 Insn &AArch64GenProEpilog::CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
402 AArch64reg reg1, RegType rty)
403 {
404 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
405 CG *currCG = cgFunc.GetCG();
406 MOperator mOp = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
407 ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
408 : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
409 Insn *allocInsn = nullptr;
410 if (fpToSpDistance > kStpLdpImm64UpperBound) {
411 allocInsn = &AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, true);
412 } else {
413 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
414 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
415 Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
416 allocInsn = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
417 ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
418 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
419 AppendInstructionTo(*allocInsn, cgFunc);
420 }
421 if (currCG->InstrumentWithDebugTraceCall()) {
422 aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
423 }
424 return *allocInsn;
425 }
426
AppendInstructionAllocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)427 void AArch64GenProEpilog::AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
428 {
429 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
430 CG *currCG = cgFunc.GetCG();
431 if (currCG->GenerateVerboseCG()) {
432 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
433 }
434
435 Insn *ipoint = nullptr;
436 /*
437 * stackFrameSize includes the size of args to stack-pass
438 * if a function has neither VLA nor alloca.
439 */
440 int32 stackFrameSize =
441 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
442 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
443 /*
444 * ldp/stp's imm should be within -512 and 504;
445 * if stp's imm > 512, we fall back to the stp-sub version
446 */
447 bool useStpSub = false;
448 int64 offset = 0;
449 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
450 /*
451 * stack_frame_size == size of formal parameters + callee-saved (including FP/RL)
452 * + size of local vars
453 * + size of actuals
454 * (when passing more than 8 args, its caller's responsibility to
455 * allocate space for it. size of actuals represent largest such size in the function.
456 */
457 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
458 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
459 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
460 ipoint = cgFunc.GetCurBB()->GetLastInsn();
461 } else {
462 if (stackFrameSize > kStpLdpImm64UpperBound) {
463 useStpSub = true;
464 offset = kOffset16MemPos;
465 stackFrameSize -= offset;
466 } else {
467 offset = stackFrameSize;
468 }
469 MOperator mOp = (storeFP || offset > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
470 : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
471 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
472 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
473 MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(static_cast<int32>(-offset), GetPointerBitSize());
474 ipoint = (storeFP || offset > kStrLdrPerPostUpperBound) ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
475 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
476 AppendInstructionTo(*ipoint, cgFunc);
477 if (currCG->InstrumentWithDebugTraceCall()) {
478 aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
479 }
480 }
481
482 ipoint->SetStackDef(true);
483
484 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
485 CHECK_FATAL(!useStpSub, "Invalid assumption");
486 ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
487 }
488
489 CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
490 if (useStpSub) {
491 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
492 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
493 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
494 ipoint = cgFunc.GetCurBB()->GetLastInsn();
495 aarchCGFunc.SetUsedStpSubPairForCallFrameAllocation(true);
496 ipoint->SetStackDef(true);
497 }
498 }
499
AppendInstructionAllocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)500 void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
501 {
502 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
503 CG *currCG = cgFunc.GetCG();
504 if (currCG->GenerateVerboseCG()) {
505 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame for debugging"));
506 }
507
508 int32 stackFrameSize =
509 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
510 int64 fpToSpDistance =
511 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
512
513 Insn *ipoint = nullptr;
514
515 if (fpToSpDistance > 0) {
516 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
517 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
518 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
519 ipoint = cgFunc.GetCurBB()->GetLastInsn();
520 ipoint->SetStackDef(true);
521 ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
522 CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
523 } else {
524 bool useStpSub = false;
525
526 if (stackFrameSize > kStpLdpImm64UpperBound) {
527 useStpSub = true;
528 RegOperand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
529 ImmOperand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
530 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
531 ipoint = cgFunc.GetCurBB()->GetLastInsn();
532 ipoint->SetStackDef(true);
533 } else {
534 MOperator mOp = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
535 ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
536 : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
537 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
538 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
539 MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-stackFrameSize, GetPointerBitSize());
540 ipoint = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
541 ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
542 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
543 AppendInstructionTo(*ipoint, cgFunc);
544 ipoint->SetStackDef(true);
545 }
546
547 if (useStpSub) {
548 MOperator mOp =
549 storeFP ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
550 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
551 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
552 MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, 0, GetPointerBitSize());
553 ipoint = storeFP ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
554 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
555 AppendInstructionTo(*ipoint, cgFunc);
556 }
557
558 if (currCG->InstrumentWithDebugTraceCall()) {
559 aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
560 }
561 }
562 }
563
564 /*
565 * From AArch64 Reference Manual
566 * C1.3.3 Load/Store Addressing Mode
567 * ...
568 * When stack alignment checking is enabled by system software and
569 * the base register is the SP, the current stack pointer must be
570 * initially quadword aligned, that is aligned to 16 bytes. Misalignment
571 * generates a Stack Alignment fault. The offset does not have to
572 * be a multiple of 16 bytes unless the specific Load/Store instruction
573 * requires this. SP cannot be used as a register offset.
574 */
GeneratePushRegs()575 void AArch64GenProEpilog::GeneratePushRegs()
576 {
577 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
578 CG *currCG = cgFunc.GetCG();
579 const MapleVector<AArch64reg> ®sToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
580 ? aarchCGFunc.GetCalleeSavedRegs()
581 : aarchCGFunc.GetProEpilogSavedRegs();
582
583 CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?");
584
585 AArch64reg intRegFirstHalf = kRinvalid;
586 AArch64reg fpRegFirstHalf = kRinvalid;
587
588 if (currCG->GenerateVerboseCG()) {
589 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("save callee-saved registers"));
590 }
591
592 /*
593 * Even if we don't use RFP, since we push a pair of registers in one instruction
594 * and the stack needs be aligned on a 16-byte boundary, push RFP as well if function has a call
595 * Make sure this is reflected when computing callee_saved_regs.size()
596 */
597 if (!currCG->GenerateDebugFriendlyCode()) {
598 AppendInstructionAllocateCallFrame(R29, RLR, kRegTyInt);
599 } else {
600 AppendInstructionAllocateCallFrameDebug(R29, RLR, kRegTyInt);
601 }
602
603 if (useFP) {
604 if (currCG->GenerateVerboseCG()) {
605 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
606 }
607 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
608 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
609 int64 fpToSpDistance =
610 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
611 bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
612 if ((fpToSpDistance > 0) || isLmbc) {
613 Operand *immOpnd;
614 if (isLmbc) {
615 int32 size =
616 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
617 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
618 } else {
619 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
620 }
621 if (!isLmbc || cgFunc.SeenFP() || cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
622 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
623 }
624 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
625 } else {
626 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
627 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
628 }
629 }
630
631 MapleVector<AArch64reg>::const_iterator it = regsToSave.begin();
632 // skip the RFP & RLR
633 if (*it == RFP) {
634 ++it;
635 }
636 CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
637 ++it;
638
639 // callee save offset
640 // fp - callee save base = RealStackFrameSize - [GR,16] - [VR,16] - [cold,16] - [callee] - stack protect + 16(fplr)
641 AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
642 int32 offset = 0;
643 if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
644 offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
645 memLayout->GetSizeOfLocals());
646 } else {
647 offset = (static_cast<int32>(memLayout->RealStackFrameSize()) -
648 static_cast<int32>(static_cast<int32>(aarchCGFunc.SizeOfCalleeSaved()) -
649 (kDivide2 * static_cast<int32>(kAarch64IntregBytelen))) - /* for FP/LR */
650 static_cast<int32>(memLayout->SizeOfArgsToStackPass()) -
651 static_cast<int32>(cgFunc.GetFunction().GetFrameReseverdSlot()));
652 }
653
654 if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) {
655 offset -= kAarch64StackPtrAlignment;
656 }
657
658 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) &&
659 cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
660 /* GR/VR save areas are above the callee save area */
661 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
662 auto saveareasize = static_cast<int32>(RoundUp(ml->GetSizeOfGRSaveArea(), GetPointerSize() * k2BitSize) +
663 RoundUp(ml->GetSizeOfVRSaveArea(), GetPointerSize() * k2BitSize));
664 offset -= saveareasize;
665 }
666 offset -= static_cast<int32>(RoundUp(memLayout->GetSizeOfSegCold(), k16BitSize));
667
668 std::vector<std::pair<uint16, int32>> calleeRegAndOffsetVec;
669 for (; it != regsToSave.end(); ++it) {
670 AArch64reg reg = *it;
671 // skip the RFP
672 if (reg == RFP) {
673 continue;
674 }
675 CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
676 RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
677 AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
678 if (firstHalf == kRinvalid) {
679 /* remember it */
680 firstHalf = reg;
681 } else {
682 uint16 reg0NO = (regType == kRegTyInt) ?
683 static_cast<uint16>(firstHalf - 1) : static_cast<uint16>(firstHalf - V8 + 72);
684 uint16 reg1NO = (regType == kRegTyInt) ? static_cast<uint16>(reg - 1) : static_cast<uint16>(reg - V8 + 72);
685 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(reg0NO, offset));
686 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(reg1NO, offset + k8ByteSize));
687 AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset);
688 AArch64isa::GetNextOffsetCalleeSaved(offset);
689 firstHalf = kRinvalid;
690 }
691 }
692
693 if (intRegFirstHalf != kRinvalid) {
694 uint16 regNO = static_cast<uint16>(intRegFirstHalf - 1);
695 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
696 AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
697 AArch64isa::GetNextOffsetCalleeSaved(offset);
698 }
699
700 if (fpRegFirstHalf != kRinvalid) {
701 uint16 regNO = static_cast<uint16>(fpRegFirstHalf - V8 + 72);
702 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
703 AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
704 AArch64isa::GetNextOffsetCalleeSaved(offset);
705 }
706
707 const auto &emitMemoryManager = CGOptions::GetInstance().GetEmitMemoryManager();
708 if (emitMemoryManager.codeSpace != nullptr) {
709 emitMemoryManager.funcCalleeOffsetSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), calleeRegAndOffsetVec);
710 int64 fpToCurSpDistance =
711 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
712 int32 fp2PrevFrameSPDelta =
713 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize()) -
714 fpToCurSpDistance;
715 emitMemoryManager.funcFpSPDeltaSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), fp2PrevFrameSPDelta);
716 }
717 }
718
GeneratePushUnnamedVarargRegs()719 void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs()
720 {
721 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
722 uint32 offset;
723 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
724 AArch64MemLayout *memlayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
725 uint8 size;
726 if (CGOptions::IsArm64ilp32()) {
727 size = k8ByteSize;
728 } else {
729 size = GetPointerSize();
730 }
731 uint32 dataSizeBits = size * kBitsPerByte;
732 if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
733 offset = static_cast<uint32>(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */
734 } else {
735 offset = static_cast<uint32>(memlayout->GetGRSaveAreaBaseLoc()) + memlayout->SizeOfArgsToStackPass();
736 }
737 if ((memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) != 0) {
738 offset += size; /* End of area should be aligned. Hole between VR and GR area */
739 }
740 CHECK_FATAL(size != 0, "Divisor cannot be zero");
741 uint32 startRegno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size);
742 DEBUG_ASSERT(startRegno <= k8BitSize, "Incorrect starting GR regno for GR Save Area");
743 for (uint32 i = startRegno + static_cast<uint32>(R0); i < static_cast<uint32>(R8); i++) {
744 uint32 tmpOffset = 0;
745 if (CGOptions::IsBigEndian()) {
746 if ((dataSizeBits >> k8BitShift) < k8BitSize) {
747 tmpOffset += k8BitSize - (dataSizeBits >> k8BitShift);
748 }
749 }
750 Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits);
751 RegOperand ® =
752 aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(i), k64BitSize, kRegTyInt);
753 Insn &inst =
754 cgFunc.GetInsnBuilder()->BuildInsn(aarchCGFunc.PickStInsn(dataSizeBits, PTY_i64), reg, *stackLoc);
755 cgFunc.GetCurBB()->AppendInsn(inst);
756 offset += size;
757 }
758 if (!CGOptions::UseGeneralRegOnly()) {
759 if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
760 offset = static_cast<uint32>(memlayout->GetVRSaveAreaBaseLoc());
761 } else {
762 offset = static_cast<uint32>(memlayout->GetVRSaveAreaBaseLoc()) + memlayout->SizeOfArgsToStackPass();
763 }
764 startRegno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize));
765 DEBUG_ASSERT(startRegno <= k8BitSize, "Incorrect starting GR regno for VR Save Area");
766 dataSizeBits = k128BitSize;
767 for (uint32 i = startRegno + static_cast<uint32>(V0); i < static_cast<uint32>(V8); i++) {
768 uint32 tmpOffset = 0;
769 if (CGOptions::IsBigEndian()) {
770 if ((dataSizeBits >> k8BitShift) < k16BitSize) {
771 tmpOffset += k16BitSize - (dataSizeBits >> k8BitShift);
772 }
773 }
774 Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits);
775 RegOperand ® = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(i),
776 dataSizeBits, kRegTyFloat);
777 Insn &inst =
778 cgFunc.GetInsnBuilder()->BuildInsn(aarchCGFunc.PickStInsn(dataSizeBits, PTY_f128), reg, *stackLoc);
779 cgFunc.GetCurBB()->AppendInsn(inst);
780 offset += (size * k2BitSize);
781 }
782 }
783 }
784 }
785
AppendInstructionStackCheck(AArch64reg reg,RegType rty,int32 offset)786 void AArch64GenProEpilog::AppendInstructionStackCheck(AArch64reg reg, RegType rty, int32 offset)
787 {
788 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
789 CG *currCG = cgFunc.GetCG();
790 /* sub x16, sp, #0x2000 */
791 auto &x16Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, rty);
792 auto &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, rty);
793 auto &imm1 = aarchCGFunc.CreateImmOperand(offset, k64BitSize, true);
794 aarchCGFunc.SelectSub(x16Opnd, spOpnd, imm1, PTY_u64);
795
796 /* ldr wzr, [x16] */
797 auto &wzr = cgFunc.GetZeroOpnd(k32BitSize);
798 auto &refX16 = aarchCGFunc.CreateMemOpnd(reg, 0, k64BitSize);
799 auto &soeInstr = cgFunc.GetInsnBuilder()->BuildInsn(MOP_wldr, wzr, refX16);
800 if (currCG->GenerateVerboseCG()) {
801 soeInstr.SetComment("soerror");
802 }
803 soeInstr.SetDoNotRemove(true);
804 AppendInstructionTo(soeInstr, cgFunc);
805 }
806
GenerateProlog(BB & bb)807 void AArch64GenProEpilog::GenerateProlog(BB &bb)
808 {
809 if (!cgFunc.GetHasProEpilogue()) {
810 return;
811 }
812 if (PROEPILOG_DUMP) {
813 LogInfo::MapleLogger() << "generate prolog at BB " << bb.GetId() << "\n";
814 }
815
816 AddStackGuard(bb);
817 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
818 CG *currCG = cgFunc.GetCG();
819 BB *formerCurBB = cgFunc.GetCurBB();
820 aarchCGFunc.GetDummyBB()->ClearInsns();
821 cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
822
823 // insert .loc for function
824 if (currCG->GetCGOptions().WithLoc() && (!currCG->GetMIRModule()->IsCModule())) {
825 MIRFunction *func = &cgFunc.GetFunction();
826 MIRSymbol *fSym = GlobalTables::GetGsymTable().GetSymbolFromStidx(func->GetStIdx().Idx());
827 if (currCG->GetCGOptions().WithSrc()) {
828 uint32 tempmaxsize = static_cast<uint32>(currCG->GetMIRModule()->GetSrcFileInfo().size());
829 CHECK_FATAL(tempmaxsize >= 1, "value overflow");
830 uint32 endfilenum = currCG->GetMIRModule()->GetSrcFileInfo()[tempmaxsize - 1].second;
831 if (fSym->GetSrcPosition().FileNum() != 0 && fSym->GetSrcPosition().FileNum() <= endfilenum) {
832 int64_t lineNum = fSym->GetSrcPosition().LineNum();
833 if (lineNum == 0) {
834 if (cgFunc.GetFunction().GetAttr(FUNCATTR_native)) {
835 lineNum = 0xffffe;
836 } else {
837 lineNum = 0xffffd;
838 }
839 }
840 Insn &loc =
841 cgFunc.BuildLocInsn(fSym->GetSrcPosition().FileNum(), lineNum, fSym->GetSrcPosition().Column());
842 cgFunc.GetCurBB()->AppendInsn(loc);
843 }
844 } else {
845 cgFunc.GetCurBB()->AppendInsn(cgFunc.BuildLocInsn(1, fSym->GetSrcPosition().MplLineNum(), 0));
846 }
847 }
848
849 const MapleVector<AArch64reg> ®sToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
850 ? aarchCGFunc.GetCalleeSavedRegs()
851 : aarchCGFunc.GetProEpilogSavedRegs();
852 if (!regsToSave.empty()) {
853 /*
854 * Among other things, push the FP & LR pair.
855 * FP/LR are added to the callee-saved list in AllocateRegisters()
856 * We add them to the callee-saved list regardless of UseFP() being true/false.
857 * Activation Frame is allocated as part of pushing FP/LR pair
858 */
859 GeneratePushRegs();
860 } else {
861 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
862 int32 stackFrameSize =
863 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
864 if (stackFrameSize > 0) {
865 if (currCG->GenerateVerboseCG()) {
866 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
867 }
868 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
869 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
870 cgFunc.GetCurBB()->GetLastInsn()->SetStackDef(true);
871 }
872 if (currCG->GenerateVerboseCG()) {
873 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
874 }
875 if (useFP) {
876 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
877 bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
878 int64 fpToSpDistance =
879 cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
880 if ((fpToSpDistance > 0) || isLmbc) {
881 Operand *immOpnd;
882 if (isLmbc) {
883 int32 size = static_cast<int32>(
884 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
885 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
886 } else {
887 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
888 }
889 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
890 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
891 } else {
892 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
893 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
894 }
895 }
896 }
897 GeneratePushUnnamedVarargRegs();
898 if (currCG->DoCheckSOE()) {
899 AppendInstructionStackCheck(R16, kRegTyInt, kSoeChckOffset);
900 }
901 bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
902 cgFunc.SetCurBB(*formerCurBB);
903 }
904
GenerateRet(BB & bb)905 void AArch64GenProEpilog::GenerateRet(BB &bb)
906 {
907 auto *lastInsn = bb.GetLastMachineInsn();
908 if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
909 return;
910 }
911 /* Insert the loc insn before ret insn
912 so that the breakpoint can break at the end of the block's reverse parenthesis line. */
913 SrcPosition pos = cgFunc.GetFunction().GetScope()->GetRangeHigh();
914 if (cgFunc.GetCG()->GetCGOptions().WithDwarf() && cgFunc.GetWithSrc() && cgFunc.GetMirModule().IsCModule() &&
915 pos.FileNum() != 0) {
916 bb.AppendInsn(cgFunc.BuildLocInsn(pos.FileNum(), pos.LineNum(), pos.Column()));
917 }
918 bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_xret));
919 }
920
921 /*
922 * If all the preds of exitBB made the TailcallOpt(replace blr/bl with br/b), return true, we don't create ret insn.
923 * Otherwise, return false, create the ret insn.
924 */
TestPredsOfRetBB(const BB & exitBB)925 bool AArch64GenProEpilog::TestPredsOfRetBB(const BB &exitBB)
926 {
927 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
928 if (cgFunc.GetMirModule().IsCModule() &&
929 (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || ml->GetSizeOfLocals() > 0 || cgFunc.HasVLAOrAlloca())) {
930 return false;
931 }
932 const Insn *lastInsn = exitBB.GetLastInsn();
933 while (lastInsn != nullptr && (!lastInsn->IsMachineInstruction() || lastInsn->IsPseudo())) {
934 lastInsn = lastInsn->GetPrev();
935 }
936 bool isTailCall = lastInsn == nullptr ? false : lastInsn->IsTailCall();
937 return isTailCall;
938 }
939
AppendInstructionPopSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)940 void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
941 {
942 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
943 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle];
944 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerBitSize(), rty);
945 Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
946 MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
947 uint32 dataSize = GetPointerBitSize();
948 if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
949 o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R16);
950 }
951
952 Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
953 // Identify that the instruction is not alias with any other memory instructions.
954 auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
955 memDefUse->SetIndependent();
956 popInsn.SetReferenceOsts(memDefUse);
957 popInsn.SetComment("RESTORE");
958 cgFunc.GetCurBB()->AppendInsn(popInsn);
959 }
960
AppendInstructionPopPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)961 void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
962 int32 offset)
963 {
964 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
965 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
966 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
967 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
968 Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
969
970 uint32 dataSize = GetPointerBitSize();
971 CHECK_FATAL(offset >= 0, "offset must >= 0");
972 if (offset > kStpLdpImm64UpperBound) {
973 o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, static_cast<MemOperand &>(*o2), dataSize, R16);
974 }
975 Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
976 // Identify that the instruction is not alias with any other memory instructions.
977 auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
978 memDefUse->SetIndependent();
979 popInsn.SetReferenceOsts(memDefUse);
980 popInsn.SetComment("RESTORE RESTORE");
981 cgFunc.GetCurBB()->AppendInsn(popInsn);
982 }
983
AppendInstructionDeallocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)984 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
985 {
986 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
987 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
988 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
989 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
990 int32 stackFrameSize =
991 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
992 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
993 /*
994 * ldp/stp's imm should be within -512 and 504;
995 * if ldp's imm > 504, we fall back to the ldp-add version
996 */
997 bool useLdpAdd = false;
998 int32 offset = 0;
999
1000 Operand *o2 = nullptr;
1001 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1002 o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
1003 } else {
1004 if (stackFrameSize > kStpLdpImm64UpperBound) {
1005 useLdpAdd = true;
1006 offset = kOffset16MemPos;
1007 stackFrameSize -= offset;
1008 } else {
1009 offset = stackFrameSize;
1010 }
1011 o2 = &aarchCGFunc.CreateCallFrameOperand(offset, GetPointerBitSize());
1012 }
1013
1014 if (useLdpAdd) {
1015 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1016 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1017 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1018 }
1019
1020 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1021 CHECK_FATAL(!useLdpAdd, "Invalid assumption");
1022 if (fpToSpDistance > kStpLdpImm64UpperBound) {
1023 (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
1024 } else {
1025 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1026 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1027 }
1028 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1029 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1030 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1031 } else {
1032 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1033 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1034 }
1035 cgFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
1036 }
1037
AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)1038 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
1039 {
1040 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1041 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
1042 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
1043 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
1044 int32 stackFrameSize =
1045 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1046 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1047 /*
1048 * ldp/stp's imm should be within -512 and 504;
1049 * if ldp's imm > 504, we fall back to the ldp-add version
1050 */
1051 bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc);
1052 if (cgFunc.HasVLAOrAlloca() || fpToSpDistance == 0 || isLmbc) {
1053 int32 lmbcOffset = 0;
1054 if (!isLmbc) {
1055 stackFrameSize -= fpToSpDistance;
1056 } else {
1057 lmbcOffset = fpToSpDistance - (kDivide2 * k8ByteSize);
1058 }
1059 if (stackFrameSize > kStpLdpImm64UpperBound || isLmbc) {
1060 Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, (isLmbc ? lmbcOffset : 0), GetPointerBitSize());
1061 mOp = storeFP ? pushPopOps[kRegsPopOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
1062 Insn &deallocInsn = storeFP ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
1063 : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
1064 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1065 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1066 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1067 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1068 } else {
1069 MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(stackFrameSize, GetPointerBitSize());
1070 mOp = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPopOp][rty][kPushPopPair]
1071 : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
1072 Insn &deallocInsn = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
1073 ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
1074 : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
1075 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1076 }
1077 } else {
1078 Operand *o2 =
1079 aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
1080 if (fpToSpDistance > kStpLdpImm64UpperBound) {
1081 (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
1082 } else {
1083 mOp = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPopOp][rty][kPushPopPair]
1084 : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
1085 Insn &deallocInsn = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
1086 ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
1087 : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
1088 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1089 }
1090
1091 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1092 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1093 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1094 }
1095 cgFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
1096 }
1097
GeneratePopRegs()1098 void AArch64GenProEpilog::GeneratePopRegs()
1099 {
1100 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1101 CG *currCG = cgFunc.GetCG();
1102
1103 const MapleVector<AArch64reg> ®sToRestore = (aarchCGFunc.GetProEpilogSavedRegs().empty())
1104 ? aarchCGFunc.GetCalleeSavedRegs()
1105 : aarchCGFunc.GetProEpilogSavedRegs();
1106
1107 CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?");
1108
1109 AArch64reg intRegFirstHalf = kRinvalid;
1110 AArch64reg fpRegFirstHalf = kRinvalid;
1111
1112 if (currCG->GenerateVerboseCG()) {
1113 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("restore callee-saved registers"));
1114 }
1115
1116 MapleVector<AArch64reg>::const_iterator it = regsToRestore.begin();
1117 /*
1118 * Even if we don't use FP, since we push a pair of registers
1119 * in a single instruction (i.e., stp) and the stack needs be aligned
1120 * on a 16-byte boundary, push FP as well if the function has a call.
1121 * Make sure this is reflected when computing calleeSavedRegs.size()
1122 * skip the first two registers
1123 */
1124 // skip the RFP & RLR
1125 if (*it == RFP) {
1126 ++it;
1127 }
1128 CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
1129 ++it;
1130
1131 AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1132 int32 offset;
1133 if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
1134 offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
1135 memLayout->GetSizeOfLocals());
1136 } else {
1137 offset = (static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() -
1138 (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kAarch64IntregBytelen))) -
1139 memLayout->SizeOfArgsToStackPass() -
1140 cgFunc.GetFunction().GetFrameReseverdSlot();
1141 }
1142
1143 if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) {
1144 offset -= kAarch64StackPtrAlignment;
1145 }
1146
1147 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1148 /* GR/VR save areas are above the callee save area */
1149 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1150 auto saveareasize = static_cast<int32>(RoundUp(ml->GetSizeOfGRSaveArea(), GetPointerSize() * k2BitSize) +
1151 RoundUp(ml->GetSizeOfVRSaveArea(), GetPointerSize() * k2BitSize));
1152 offset -= saveareasize;
1153 }
1154
1155 offset -= static_cast<int32>(RoundUp(memLayout->GetSizeOfSegCold(), k16BitSize));
1156
1157 /*
1158 * We are using a cleared dummy block; so insertPoint cannot be ret;
1159 * see GenerateEpilog()
1160 */
1161 for (; it != regsToRestore.end(); ++it) {
1162 AArch64reg reg = *it;
1163 if (reg == RFP) {
1164 continue;
1165 }
1166 CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
1167
1168 RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
1169 AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
1170 if (firstHalf == kRinvalid) {
1171 /* remember it */
1172 firstHalf = reg;
1173 } else {
1174 /* flush the pair */
1175 AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset);
1176 AArch64isa::GetNextOffsetCalleeSaved(offset);
1177 firstHalf = kRinvalid;
1178 }
1179 }
1180
1181 if (intRegFirstHalf != kRinvalid) {
1182 AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
1183 AArch64isa::GetNextOffsetCalleeSaved(offset);
1184 }
1185
1186 if (fpRegFirstHalf != kRinvalid) {
1187 AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
1188 AArch64isa::GetNextOffsetCalleeSaved(offset);
1189 }
1190
1191 if (!currCG->GenerateDebugFriendlyCode()) {
1192 AppendInstructionDeallocateCallFrame(R29, RLR, kRegTyInt);
1193 } else {
1194 AppendInstructionDeallocateCallFrameDebug(R29, RLR, kRegTyInt);
1195 }
1196 }
1197
AppendJump(const MIRSymbol & funcSymbol)1198 void AArch64GenProEpilog::AppendJump(const MIRSymbol &funcSymbol)
1199 {
1200 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1201 Operand &targetOpnd = aarchCGFunc.GetOrCreateFuncNameOpnd(funcSymbol);
1202 cgFunc.GetCurBB()->AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn(MOP_xuncond, targetOpnd));
1203 }
1204
AppendBBtoEpilog(BB & epilogBB,BB & newBB)1205 void AArch64GenProEpilog::AppendBBtoEpilog(BB &epilogBB, BB &newBB)
1206 {
1207 FOR_BB_INSNS(insn, &newBB)
1208 {
1209 insn->SetDoNotRemove(true);
1210 }
1211 auto *lastInsn = epilogBB.GetLastMachineInsn();
1212 if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
1213 epilogBB.RemoveInsn(*lastInsn);
1214 epilogBB.AppendBBInsns(newBB);
1215 epilogBB.AppendInsn(*lastInsn);
1216 } else {
1217 epilogBB.AppendBBInsns(newBB);
1218 }
1219 }
1220
GenerateEpilog(BB & bb)1221 void AArch64GenProEpilog::GenerateEpilog(BB &bb)
1222 {
1223 if (!cgFunc.GetHasProEpilogue()) {
1224 return;
1225 }
1226 if (PROEPILOG_DUMP) {
1227 LogInfo::MapleLogger() << "generate epilog at BB " << bb.GetId() << "\n";
1228 }
1229
1230 /* generate stack protected instruction */
1231 GenStackGuardCheckInsn(bb);
1232
1233 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1234 CG *currCG = cgFunc.GetCG();
1235 BB *formerCurBB = cgFunc.GetCurBB();
1236 aarchCGFunc.GetDummyBB()->ClearInsns();
1237 cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
1238
1239 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1240 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
1241
1242 if (cgFunc.HasVLAOrAlloca() && cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1243 aarchCGFunc.SelectCopy(spOpnd, PTY_u64, fpOpnd, PTY_u64);
1244 }
1245
1246 const MapleVector<AArch64reg> ®sToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
1247 ? aarchCGFunc.GetCalleeSavedRegs()
1248 : aarchCGFunc.GetProEpilogSavedRegs();
1249 if (!regsToSave.empty()) {
1250 GeneratePopRegs();
1251 } else {
1252 auto stackFrameSize = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize();
1253 if (stackFrameSize > 0) {
1254 if (currCG->GenerateVerboseCG()) {
1255 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("pop up activation frame"));
1256 }
1257
1258 if (cgFunc.HasVLAOrAlloca()) {
1259 auto size = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSegArgsToStkPass().GetSize();
1260 stackFrameSize = stackFrameSize < size ? 0 : stackFrameSize - size;
1261 }
1262
1263 if (stackFrameSize > 0) {
1264 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1265 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1266 aarchCGFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
1267 }
1268 }
1269 }
1270
1271 if (currCG->InstrumentWithDebugTraceCall()) {
1272 AppendJump(*(currCG->GetDebugTraceExitFunction()));
1273 }
1274
1275 AppendBBtoEpilog(bb, *cgFunc.GetCurBB());
1276 if (cgFunc.GetCurBB()->GetHasCfi()) {
1277 bb.SetHasCfi();
1278 }
1279
1280 cgFunc.SetCurBB(*formerCurBB);
1281 }
1282
GenerateEpilogForCleanup(BB & bb)1283 void AArch64GenProEpilog::GenerateEpilogForCleanup(BB &bb)
1284 {
1285 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1286 CHECK_FATAL(!cgFunc.GetExitBBsVec().empty(), "exit bb size is zero!");
1287 if (cgFunc.GetExitBB(0)->IsUnreachable()) {
1288 /* if exitbb is unreachable then exitbb can not be generated */
1289 GenerateEpilog(bb);
1290 } else if (aarchCGFunc.NeedCleanup()) { /* bl to the exit epilogue */
1291 LabelOperand &targetOpnd = aarchCGFunc.GetOrCreateLabelOperand(cgFunc.GetExitBB(0)->GetLabIdx());
1292 bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn(MOP_xuncond, targetOpnd));
1293 }
1294 }
1295
Run()1296 void AArch64GenProEpilog::Run()
1297 {
1298 CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label,
1299 "The first statement should be a label");
1300 // update exitBB
1301 if (cgFunc.IsExitBBsVecEmpty()) {
1302 if (cgFunc.GetCleanupBB() != nullptr && cgFunc.GetCleanupBB()->GetPrev() != nullptr) {
1303 cgFunc.PushBackExitBBsVec(*cgFunc.GetCleanupBB()->GetPrev());
1304 } else if (!cgFunc.GetMirModule().IsCModule()) {
1305 cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev());
1306 }
1307 }
1308 cgFunc.SetHasProEpilogue(NeedProEpilog());
1309
1310 // not run proepilog analysis or analysis failed, insert proepilog at firstBB and exitBB
1311 GenerateProlog(*(cgFunc.GetFirstBB()));
1312 for (auto *exitBB : cgFunc.GetExitBBsVec()) {
1313 GenerateEpilog(*exitBB);
1314 }
1315
1316 // insert ret insn for exitBB
1317 for (auto *exitBB : cgFunc.GetExitBBsVec()) {
1318 if (cgFunc.GetHasProEpilogue() || (!exitBB->GetPreds().empty() && !TestPredsOfRetBB(*exitBB))) {
1319 GenerateRet(*exitBB);
1320 }
1321 }
1322 }
1323 } /* namespace maplebe */
1324