• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "aarch64_proepilog.h"
17 #include "aarch64_cg.h"
18 #include "cg_option.h"
19 #include "cgfunc.h"
20 
21 #define PROEPILOG_DUMP CG_DEBUG_FUNC(cgFunc)
22 namespace maplebe {
23 using namespace maple;
24 
25 namespace {
26 constexpr int32 kSoeChckOffset = 8192;
27 
28 enum RegsPushPop : uint8 { kRegsPushOp, kRegsPopOp };
29 
30 enum PushPopType : uint8 { kPushPopSingle = 0, kPushPopPair = 1 };
31 
32 MOperator pushPopOps[kRegsPopOp + 1][kRegTyFloat + 1][kPushPopPair + 1] = {{
33                                                                                /* push */
34                                                                                {0}, /* undef */
35                                                                                {
36                                                                                    /* kRegTyInt */
37                                                                                    MOP_xstr, /* single */
38                                                                                    MOP_xstp, /* pair   */
39                                                                                },
40                                                                                {
41                                                                                    /* kRegTyFloat */
42                                                                                    MOP_dstr, /* single */
43                                                                                    MOP_dstp, /* pair   */
44                                                                                },
45                                                                            },
46                                                                            {
47                                                                                /* pop */
48                                                                                {0}, /* undef */
49                                                                                {
50                                                                                    /* kRegTyInt */
51                                                                                    MOP_xldr, /* single */
52                                                                                    MOP_xldp, /* pair   */
53                                                                                },
54                                                                                {
55                                                                                    /* kRegTyFloat */
56                                                                                    MOP_dldr, /* single */
57                                                                                    MOP_dldp, /* pair   */
58                                                                                },
59                                                                            }};
60 
AppendInstructionTo(Insn & insn,CGFunc & func)61 inline void AppendInstructionTo(Insn &insn, CGFunc &func)
62 {
63     func.GetCurBB()->AppendInsn(insn);
64 }
65 }  // namespace
66 
NeedProEpilog()67 bool AArch64GenProEpilog::NeedProEpilog()
68 {
69     if (cgFunc.GetMirModule().GetSrcLang() != kSrcLangC) {
70         return true;
71     } else if (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || cgFunc.HasVLAOrAlloca()) {
72         return true;
73     }
74 
75     FOR_ALL_BB(bb, &cgFunc)
76     {
77         FOR_BB_INSNS_REV(insn, bb)
78         {
79             if (insn->IsMachineInstruction() && (insn->IsCall() || insn->IsSpecialCall())) {
80                 return true;
81             }
82         }
83     }
84     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
85     const MapleVector<AArch64reg> &regsToRestore = (aarchCGFunc.GetProEpilogSavedRegs().empty())
86                                                        ? aarchCGFunc.GetCalleeSavedRegs()
87                                                        : aarchCGFunc.GetProEpilogSavedRegs();
88     size_t calleeSavedRegSize = kOneRegister;
89     CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot LR ?");
90     if (regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() ||
91         static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSizeOfLocals() > 0 ||
92         static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSizeOfCold() > 0 ||
93         cgFunc.GetFunction().GetAttr(FUNCATTR_callersensitive)) {
94         return true;
95     }
96     if (cgFunc.GetCG()->IsStackProtectorAll()) {
97         return true;
98     }
99     return false;
100 }
101 
102 // find a idle register, default R30
GetStackGuardRegister(const BB & bb) const103 AArch64reg AArch64GenProEpilog::GetStackGuardRegister(const BB &bb) const
104 {
105     if (Globals::GetInstance()->GetOptimLevel() == CGOptions::kLevel0) {
106         return R30;
107     }
108     for (regno_t reg = R9; reg < R29; ++reg) {
109         if (bb.GetLiveInRegNO().count(reg) == 0 && reg != R16) {
110             if (!AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg))) {
111                 return static_cast<AArch64reg>(reg);
112             }
113         }
114     }
115     return R30;
116 }
117 
118 // find two idle register, default R30 and R16
GetStackGuardCheckRegister(const BB & bb) const119 std::pair<AArch64reg, AArch64reg> AArch64GenProEpilog::GetStackGuardCheckRegister(const BB &bb) const
120 {
121     AArch64reg stGuardReg = R30;
122     AArch64reg stCheckReg = R16;
123     if (Globals::GetInstance()->GetOptimLevel() == CGOptions::kLevel0) {
124         return {stGuardReg, stCheckReg};
125     }
126     for (regno_t reg = R9; reg < R29; ++reg) {
127         if (bb.GetLiveOutRegNO().count(reg) == 0 && reg != R16) {
128             if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg))) {
129                 continue;
130             }
131             if (stGuardReg == R30) {
132                 stGuardReg = static_cast<AArch64reg>(reg);
133             } else {
134                 stCheckReg = static_cast<AArch64reg>(reg);
135                 break;
136             }
137         }
138     }
139     return {stGuardReg, stCheckReg};
140 }
141 
142 // RealStackFrameSize - [GR,16] - [VR,16] - 8 (from fp to stack protect area)
143 // We allocate 16 byte for stack protect area
GetDownStack()144 MemOperand *AArch64GenProEpilog::GetDownStack()
145 {
146     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
147     uint64 vArea = 0;
148     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
149         AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
150         if (ml->GetSizeOfGRSaveArea() > 0) {
151             vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment);
152         }
153         if (ml->GetSizeOfVRSaveArea() > 0) {
154             vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment);
155         }
156     }
157 
158     int32 stkSize = static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
159     if (useFP) {
160         stkSize -= static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->SizeOfArgsToStackPass() +
161                                       cgFunc.GetFunction().GetFrameReseverdSlot());
162     }
163     int32 memSize = (stkSize - kOffset8MemPos) - static_cast<int32>(vArea);
164     MemOperand *downStk = aarchCGFunc.CreateStackMemOpnd(stackBaseReg, memSize, GetPointerBitSize());
165     if (downStk->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) {
166         downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R16);
167     }
168     return downStk;
169 }
170 
GenStackGuard(AArch64reg regNO)171 RegOperand &AArch64GenProEpilog::GenStackGuard(AArch64reg regNO)
172 {
173     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
174     aarchCGFunc.GetDummyBB()->ClearInsns();
175 
176     cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
177 
178     MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
179         GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard")));
180     DEBUG_ASSERT(stkGuardSym != nullptr, "nullptr check");
181     StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0);
182     RegOperand &stAddrOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(regNO, GetPointerBitSize(), kRegTyInt);
183     aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd);
184 
185     MemOperand *guardMemOp = aarchCGFunc.CreateMemOperand(GetPointerBitSize(), stAddrOpnd,
186                                                           aarchCGFunc.CreateImmOperand(0, k32BitSize, false), false);
187     MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64);
188     Insn &insn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *guardMemOp);
189     insn.SetDoNotRemove(true);
190     cgFunc.GetCurBB()->AppendInsn(insn);
191     return stAddrOpnd;
192 }
193 
AddStackGuard(BB & bb)194 void AArch64GenProEpilog::AddStackGuard(BB &bb)
195 {
196     if (!cgFunc.GetNeedStackProtect()) {
197         return;
198     }
199     BB *formerCurBB = cgFunc.GetCurBB();
200     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
201     auto &stAddrOpnd = GenStackGuard(GetStackGuardRegister(bb));
202     auto mOp = aarchCGFunc.PickStInsn(GetPointerBitSize(), PTY_u64);
203     Insn &tmpInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *GetDownStack());
204     tmpInsn.SetDoNotRemove(true);
205     cgFunc.GetCurBB()->AppendInsn(tmpInsn);
206 
207     bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
208     cgFunc.SetCurBB(*formerCurBB);
209 }
210 
GetOrGenStackGuardCheckFailBB(BB & bb)211 BB &AArch64GenProEpilog::GetOrGenStackGuardCheckFailBB(BB &bb)
212 {
213     if (stackChkFailBB != nullptr) {
214         return *stackChkFailBB;
215     }
216     BB *formerCurBB = cgFunc.GetCurBB();
217     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
218 
219     // create new check fail BB
220     auto failLable = aarchCGFunc.CreateLabel();
221     stackChkFailBB = aarchCGFunc.CreateNewBB(failLable, bb.IsUnreachable(), BB::kBBNoReturn, bb.GetFrequency());
222     cgFunc.SetCurBB(*stackChkFailBB);
223     MIRSymbol *failFunc = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
224         GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_fail")));
225     DEBUG_ASSERT(failFunc != nullptr, "nullptr check");
226     ListOperand *srcOpnds = aarchCGFunc.CreateListOpnd(*cgFunc.GetFuncScopeAllocator());
227     Insn &callInsn = aarchCGFunc.AppendCall(*failFunc, *srcOpnds);
228     callInsn.SetDoNotRemove(true);
229     ASSERT_NOT_NULL(cgFunc.GetLastBB());
230     cgFunc.GetLastBB()->PrependBB(*stackChkFailBB);
231 
232     cgFunc.SetCurBB(*formerCurBB);
233     return *stackChkFailBB;
234 }
235 
GenStackGuardCheckInsn(BB & bb)236 void AArch64GenProEpilog::GenStackGuardCheckInsn(BB &bb)
237 {
238     if (!cgFunc.GetNeedStackProtect()) {
239         return;
240     }
241 
242     BB *formerCurBB = cgFunc.GetCurBB();
243     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
244     auto [stGuardReg, stCheckReg] = GetStackGuardCheckRegister(bb);
245     auto &stAddrOpnd = GenStackGuard(stGuardReg);
246     RegOperand &checkOp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stCheckReg, GetPointerBitSize(), kRegTyInt);
247     auto mOp = aarchCGFunc.PickLdInsn(GetPointerBitSize(), PTY_u64);
248     Insn &newInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, checkOp, *GetDownStack());
249     newInsn.SetDoNotRemove(true);
250     cgFunc.GetCurBB()->AppendInsn(newInsn);
251 
252     cgFunc.SelectBxor(stAddrOpnd, stAddrOpnd, checkOp, PTY_u64);
253     auto &failBB = GetOrGenStackGuardCheckFailBB(bb);
254     aarchCGFunc.SelectCondGoto(aarchCGFunc.GetOrCreateLabelOperand(failBB.GetLabIdx()), OP_brtrue, OP_ne, stAddrOpnd,
255                                aarchCGFunc.CreateImmOperand(0, k64BitSize, false), PTY_u64, false);
256 
257     auto chkBB = cgFunc.CreateNewBB(bb.GetLabIdx(), bb.IsUnreachable(), BB::kBBIf, bb.GetFrequency());
258     chkBB->AppendBBInsns(bb);
259     bb.ClearInsns();
260     auto *lastInsn = chkBB->GetLastMachineInsn();
261     if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
262         chkBB->RemoveInsn(*lastInsn);
263         bb.AppendInsn(*lastInsn);
264     }
265     if (&bb == cgFunc.GetFirstBB()) {
266         cgFunc.SetFirstBB(*chkBB);
267     }
268     chkBB->AppendBBInsns(*(cgFunc.GetCurBB()));
269     bb.PrependBB(*chkBB);
270     chkBB->PushBackSuccs(bb);
271     auto &originPreds = bb.GetPreds();
272     for (auto pred : originPreds) {
273         pred->ReplaceSucc(bb, *chkBB);
274         chkBB->PushBackPreds(*pred);
275     }
276     LabelIdx nextLable = aarchCGFunc.CreateLabel();
277     bb.SetLabIdx(nextLable);
278     cgFunc.SetLab2BBMap(nextLable, bb);
279     bb.ClearPreds();
280     bb.PushBackPreds(*chkBB);
281     chkBB->PushBackSuccs(failBB);
282     failBB.PushBackPreds(*chkBB);
283 
284     cgFunc.SetCurBB(*formerCurBB);
285 }
286 
SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc & cgFunc,const MemOperand & mo,uint32 bitLen,AArch64reg baseRegNum)287 MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, const MemOperand &mo,
288                                                                                    uint32 bitLen, AArch64reg baseRegNum)
289 {
290     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
291     CHECK_FATAL(mo.GetAddrMode() == MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi");
292     OfstOperand *ofstOp = mo.GetOffsetImmediate();
293     int32 offsetVal = static_cast<int32>(ofstOp->GetOffsetValue());
294     CHECK_FATAL(offsetVal > 0, "offsetVal should be greater than 0");
295     CHECK_FATAL((static_cast<uint32>(offsetVal) & 0x7) == 0, "(offsetVal & 0x7) should be equal to 0");
296     /*
297      * Offset adjustment due to FP/SP has already been done
298      * in AArch64GenProEpilog::GeneratePushRegs() and AArch64GenProEpilog::GeneratePopRegs()
299      */
300     RegOperand &br = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(baseRegNum, bitLen, kRegTyInt);
301     ImmOperand &immAddEnd = aarchCGFunc.CreateImmOperand(offsetVal, k64BitSize, true);
302     RegOperand *origBaseReg = mo.GetBaseRegister();
303     aarchCGFunc.SelectAdd(br, *origBaseReg, immAddEnd, PTY_i64);
304 
305     return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, 0);
306 }
307 
AppendInstructionPushPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)308 void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
309                                                     int32 offset)
310 {
311     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
312     MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
313     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
314     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
315     Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
316 
317     uint32 dataSize = GetPointerBitSize();
318     CHECK_FATAL(offset >= 0, "offset must >= 0");
319     if (offset > kStpLdpImm64UpperBound) {
320         o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast<MemOperand *>(o2), dataSize, R16);
321     }
322     Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
323     // Identify that the instruction is not alias with any other memory instructions.
324     auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
325     memDefUse->SetIndependent();
326     pushInsn.SetReferenceOsts(memDefUse);
327     std::string comment = "SAVE CALLEE REGISTER PAIR";
328     pushInsn.SetComment(comment);
329     AppendInstructionTo(pushInsn, cgFunc);
330 }
331 
AppendInstructionPushSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)332 void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
333 {
334     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
335     MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle];
336     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerBitSize(), rty);
337     Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
338 
339     MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
340     uint32 dataSize = GetPointerBitSize();
341     if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
342         o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R16);
343     }
344 
345     Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
346     // Identify that the instruction is not alias with any other memory instructions.
347     auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
348     memDefUse->SetIndependent();
349     pushInsn.SetReferenceOsts(memDefUse);
350     std::string comment = "SAVE CALLEE REGISTER";
351     pushInsn.SetComment(comment);
352     AppendInstructionTo(pushInsn, cgFunc);
353 }
354 
AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty,bool isAllocate)355 Insn &AArch64GenProEpilog::AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
356                                                                              AArch64reg reg1, RegType rty,
357                                                                              bool isAllocate)
358 {
359     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
360     MOperator mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopPair];
361     uint8 size;
362     if (CGOptions::IsArm64ilp32()) {
363         size = k8ByteSize;
364     } else {
365         size = GetPointerSize();
366     }
367     if (fpToSpDistance <= kStrLdrImm64UpperBound - kOffset8MemPos) {
368         mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopSingle] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
369         MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), size * kBitsPerByte);
370         if (storeFP) {
371             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
372             Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o2);
373             AppendInstructionTo(insn1, cgFunc);
374         }
375         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
376         o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance + size), size * kBitsPerByte);
377         Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
378         AppendInstructionTo(insn2, cgFunc);
379         return insn2;
380     } else {
381         RegOperand &oo = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, size * kBitsPerByte, kRegTyInt);
382         ImmOperand &io1 = aarchCGFunc.CreateImmOperand(fpToSpDistance, k64BitSize, true);
383         aarchCGFunc.SelectCopyImm(oo, io1, PTY_i64);
384         RegOperand &rsp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, size * kBitsPerByte, kRegTyInt);
385         MemOperand *mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
386         if (storeFP) {
387             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
388             Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o0, *mo);
389             AppendInstructionTo(insn1, cgFunc);
390         }
391         ImmOperand &io2 = aarchCGFunc.CreateImmOperand(size, k64BitSize, true);
392         aarchCGFunc.SelectAdd(oo, oo, io2, PTY_i64);
393         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
394         mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
395         Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o1, *mo);
396         AppendInstructionTo(insn2, cgFunc);
397         return insn2;
398     }
399 }
400 
CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty)401 Insn &AArch64GenProEpilog::CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
402                                                                           AArch64reg reg1, RegType rty)
403 {
404     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
405     CG *currCG = cgFunc.GetCG();
406     MOperator mOp = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
407                         ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
408                         : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
409     Insn *allocInsn = nullptr;
410     if (fpToSpDistance > kStpLdpImm64UpperBound) {
411         allocInsn = &AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, true);
412     } else {
413         Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
414         Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
415         Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
416         allocInsn = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
417                         ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
418                         : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
419         AppendInstructionTo(*allocInsn, cgFunc);
420     }
421     if (currCG->InstrumentWithDebugTraceCall()) {
422         aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
423     }
424     return *allocInsn;
425 }
426 
AppendInstructionAllocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)427 void AArch64GenProEpilog::AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
428 {
429     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
430     CG *currCG = cgFunc.GetCG();
431     if (currCG->GenerateVerboseCG()) {
432         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
433     }
434 
435     Insn *ipoint = nullptr;
436     /*
437      * stackFrameSize includes the size of args to stack-pass
438      * if a function has neither VLA nor alloca.
439      */
440     int32 stackFrameSize =
441         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
442     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
443     /*
444      * ldp/stp's imm should be within -512 and 504;
445      * if stp's imm > 512, we fall back to the stp-sub version
446      */
447     bool useStpSub = false;
448     int64 offset = 0;
449     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
450         /*
451          * stack_frame_size == size of formal parameters + callee-saved (including FP/RL)
452          *                     + size of local vars
453          *                     + size of actuals
454          * (when passing more than 8 args, its caller's responsibility to
455          *  allocate space for it. size of actuals represent largest such size in the function.
456          */
457         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
458         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
459         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
460         ipoint = cgFunc.GetCurBB()->GetLastInsn();
461     } else {
462         if (stackFrameSize > kStpLdpImm64UpperBound) {
463             useStpSub = true;
464             offset = kOffset16MemPos;
465             stackFrameSize -= offset;
466         } else {
467             offset = stackFrameSize;
468         }
469         MOperator mOp = (storeFP || offset > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
470                                                                        : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
471         RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
472         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
473         MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(static_cast<int32>(-offset), GetPointerBitSize());
474         ipoint = (storeFP || offset > kStrLdrPerPostUpperBound) ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
475                                                                 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
476         AppendInstructionTo(*ipoint, cgFunc);
477         if (currCG->InstrumentWithDebugTraceCall()) {
478             aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
479         }
480     }
481 
482     ipoint->SetStackDef(true);
483 
484     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
485         CHECK_FATAL(!useStpSub, "Invalid assumption");
486         ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
487     }
488 
489     CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
490     if (useStpSub) {
491         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
492         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
493         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
494         ipoint = cgFunc.GetCurBB()->GetLastInsn();
495         aarchCGFunc.SetUsedStpSubPairForCallFrameAllocation(true);
496         ipoint->SetStackDef(true);
497     }
498 }
499 
AppendInstructionAllocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)500 void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
501 {
502     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
503     CG *currCG = cgFunc.GetCG();
504     if (currCG->GenerateVerboseCG()) {
505         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame for debugging"));
506     }
507 
508     int32 stackFrameSize =
509         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
510     int64 fpToSpDistance =
511         (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
512 
513     Insn *ipoint = nullptr;
514 
515     if (fpToSpDistance > 0) {
516         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
517         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
518         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
519         ipoint = cgFunc.GetCurBB()->GetLastInsn();
520         ipoint->SetStackDef(true);
521         ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
522         CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
523     } else {
524         bool useStpSub = false;
525 
526         if (stackFrameSize > kStpLdpImm64UpperBound) {
527             useStpSub = true;
528             RegOperand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
529             ImmOperand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
530             aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
531             ipoint = cgFunc.GetCurBB()->GetLastInsn();
532             ipoint->SetStackDef(true);
533         } else {
534             MOperator mOp = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
535                                 ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
536                                 : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
537             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
538             RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
539             MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-stackFrameSize, GetPointerBitSize());
540             ipoint = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
541                          ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
542                          : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
543             AppendInstructionTo(*ipoint, cgFunc);
544             ipoint->SetStackDef(true);
545         }
546 
547         if (useStpSub) {
548             MOperator mOp =
549                 storeFP ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
550             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
551             RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
552             MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, 0, GetPointerBitSize());
553             ipoint = storeFP ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
554                              : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
555             AppendInstructionTo(*ipoint, cgFunc);
556         }
557 
558         if (currCG->InstrumentWithDebugTraceCall()) {
559             aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
560         }
561     }
562 }
563 
564 /*
565  *  From AArch64 Reference Manual
566  *  C1.3.3 Load/Store Addressing Mode
567  *  ...
568  *  When stack alignment checking is enabled by system software and
569  *  the base register is the SP, the current stack pointer must be
570  *  initially quadword aligned, that is aligned to 16 bytes. Misalignment
571  *  generates a Stack Alignment fault.  The offset does not have to
572  *  be a multiple of 16 bytes unless the specific Load/Store instruction
573  *  requires this. SP cannot be used as a register offset.
574  */
GeneratePushRegs()575 void AArch64GenProEpilog::GeneratePushRegs()
576 {
577     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
578     CG *currCG = cgFunc.GetCG();
579     const MapleVector<AArch64reg> &regsToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
580                                                     ? aarchCGFunc.GetCalleeSavedRegs()
581                                                     : aarchCGFunc.GetProEpilogSavedRegs();
582 
583     CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?");
584 
585     AArch64reg intRegFirstHalf = kRinvalid;
586     AArch64reg fpRegFirstHalf = kRinvalid;
587 
588     if (currCG->GenerateVerboseCG()) {
589         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("save callee-saved registers"));
590     }
591 
592     /*
593      * Even if we don't use RFP, since we push a pair of registers in one instruction
594      * and the stack needs be aligned on a 16-byte boundary, push RFP as well if function has a call
595      * Make sure this is reflected when computing callee_saved_regs.size()
596      */
597     if (!currCG->GenerateDebugFriendlyCode()) {
598         AppendInstructionAllocateCallFrame(R29, RLR, kRegTyInt);
599     } else {
600         AppendInstructionAllocateCallFrameDebug(R29, RLR, kRegTyInt);
601     }
602 
603     if (useFP) {
604         if (currCG->GenerateVerboseCG()) {
605             cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
606         }
607         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
608         Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
609         int64 fpToSpDistance =
610             (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
611         bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
612         if ((fpToSpDistance > 0) || isLmbc) {
613             Operand *immOpnd;
614             if (isLmbc) {
615                 int32 size =
616                     static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
617                 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
618             } else {
619                 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
620             }
621             if (!isLmbc || cgFunc.SeenFP() || cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
622                 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
623             }
624             cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
625         } else {
626             aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
627             cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
628         }
629     }
630 
631     MapleVector<AArch64reg>::const_iterator it = regsToSave.begin();
632     // skip the RFP & RLR
633     if (*it == RFP) {
634         ++it;
635     }
636     CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
637     ++it;
638 
639     // callee save offset
640     // fp - callee save base = RealStackFrameSize - [GR,16] - [VR,16] - [cold,16] - [callee] - stack protect + 16(fplr)
641     AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
642     int32 offset = 0;
643     if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
644         offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
645                                     memLayout->GetSizeOfLocals());
646     } else {
647         offset = (static_cast<int32>(memLayout->RealStackFrameSize()) -
648                   static_cast<int32>(static_cast<int32>(aarchCGFunc.SizeOfCalleeSaved()) -
649                                      (kDivide2 * static_cast<int32>(kAarch64IntregBytelen))) - /* for FP/LR */
650                   static_cast<int32>(memLayout->SizeOfArgsToStackPass()) -
651                   static_cast<int32>(cgFunc.GetFunction().GetFrameReseverdSlot()));
652     }
653 
654     if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) {
655         offset -= kAarch64StackPtrAlignment;
656     }
657 
658     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) &&
659         cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
660         /* GR/VR save areas are above the callee save area */
661         AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
662         auto saveareasize = static_cast<int32>(RoundUp(ml->GetSizeOfGRSaveArea(), GetPointerSize() * k2BitSize) +
663                                                RoundUp(ml->GetSizeOfVRSaveArea(), GetPointerSize() * k2BitSize));
664         offset -= saveareasize;
665     }
666     offset -= static_cast<int32>(RoundUp(memLayout->GetSizeOfSegCold(), k16BitSize));
667 
668     std::vector<std::pair<uint16, int32>> calleeRegAndOffsetVec;
669     for (; it != regsToSave.end(); ++it) {
670         AArch64reg reg = *it;
671         // skip the RFP
672         if (reg == RFP) {
673             continue;
674         }
675         CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
676         RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
677         AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
678         if (firstHalf == kRinvalid) {
679             /* remember it */
680             firstHalf = reg;
681         } else {
682             uint16 reg0NO = (regType == kRegTyInt) ?
683                 static_cast<uint16>(firstHalf - 1) : static_cast<uint16>(firstHalf  - V8 + 72);
684             uint16 reg1NO = (regType == kRegTyInt) ? static_cast<uint16>(reg - 1) : static_cast<uint16>(reg - V8 + 72);
685             calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(reg0NO, offset));
686             calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(reg1NO, offset + k8ByteSize));
687             AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset);
688             AArch64isa::GetNextOffsetCalleeSaved(offset);
689             firstHalf = kRinvalid;
690         }
691     }
692 
693     if (intRegFirstHalf != kRinvalid) {
694         uint16 regNO = static_cast<uint16>(intRegFirstHalf - 1);
695         calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
696         AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
697         AArch64isa::GetNextOffsetCalleeSaved(offset);
698     }
699 
700     if (fpRegFirstHalf != kRinvalid) {
701         uint16 regNO = static_cast<uint16>(fpRegFirstHalf - V8 + 72);
702         calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
703         AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
704         AArch64isa::GetNextOffsetCalleeSaved(offset);
705     }
706 
707     const auto &emitMemoryManager = CGOptions::GetInstance().GetEmitMemoryManager();
708     if (emitMemoryManager.codeSpace != nullptr) {
709         emitMemoryManager.funcCalleeOffsetSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), calleeRegAndOffsetVec);
710         int64 fpToCurSpDistance =
711             (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
712         int32 fp2PrevFrameSPDelta =
713             static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize()) -
714 	        fpToCurSpDistance;
715         emitMemoryManager.funcFpSPDeltaSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), fp2PrevFrameSPDelta);
716     }
717 }
718 
GeneratePushUnnamedVarargRegs()719 void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs()
720 {
721     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
722     uint32 offset;
723     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
724         AArch64MemLayout *memlayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
725         uint8 size;
726         if (CGOptions::IsArm64ilp32()) {
727             size = k8ByteSize;
728         } else {
729             size = GetPointerSize();
730         }
731         uint32 dataSizeBits = size * kBitsPerByte;
732         if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
733             offset = static_cast<uint32>(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */
734         } else {
735             offset = static_cast<uint32>(memlayout->GetGRSaveAreaBaseLoc()) + memlayout->SizeOfArgsToStackPass();
736         }
737         if ((memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) != 0) {
738             offset += size; /* End of area should be aligned. Hole between VR and GR area */
739         }
740         CHECK_FATAL(size != 0, "Divisor cannot be zero");
741         uint32 startRegno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size);
742         DEBUG_ASSERT(startRegno <= k8BitSize, "Incorrect starting GR regno for GR Save Area");
743         for (uint32 i = startRegno + static_cast<uint32>(R0); i < static_cast<uint32>(R8); i++) {
744             uint32 tmpOffset = 0;
745             if (CGOptions::IsBigEndian()) {
746                 if ((dataSizeBits >> k8BitShift) < k8BitSize) {
747                     tmpOffset += k8BitSize - (dataSizeBits >> k8BitShift);
748                 }
749             }
750             Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits);
751             RegOperand &reg =
752                 aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(i), k64BitSize, kRegTyInt);
753             Insn &inst =
754                 cgFunc.GetInsnBuilder()->BuildInsn(aarchCGFunc.PickStInsn(dataSizeBits, PTY_i64), reg, *stackLoc);
755             cgFunc.GetCurBB()->AppendInsn(inst);
756             offset += size;
757         }
758         if (!CGOptions::UseGeneralRegOnly()) {
759             if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
760                 offset = static_cast<uint32>(memlayout->GetVRSaveAreaBaseLoc());
761             } else {
762                 offset = static_cast<uint32>(memlayout->GetVRSaveAreaBaseLoc()) + memlayout->SizeOfArgsToStackPass();
763             }
764             startRegno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize));
765             DEBUG_ASSERT(startRegno <= k8BitSize, "Incorrect starting GR regno for VR Save Area");
766             dataSizeBits = k128BitSize;
767             for (uint32 i = startRegno + static_cast<uint32>(V0); i < static_cast<uint32>(V8); i++) {
768                 uint32 tmpOffset = 0;
769                 if (CGOptions::IsBigEndian()) {
770                     if ((dataSizeBits >> k8BitShift) < k16BitSize) {
771                         tmpOffset += k16BitSize - (dataSizeBits >> k8BitShift);
772                     }
773                 }
774                 Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits);
775                 RegOperand &reg = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(i),
776                                                                                  dataSizeBits, kRegTyFloat);
777                 Insn &inst =
778                     cgFunc.GetInsnBuilder()->BuildInsn(aarchCGFunc.PickStInsn(dataSizeBits, PTY_f128), reg, *stackLoc);
779                 cgFunc.GetCurBB()->AppendInsn(inst);
780                 offset += (size * k2BitSize);
781             }
782         }
783     }
784 }
785 
AppendInstructionStackCheck(AArch64reg reg,RegType rty,int32 offset)786 void AArch64GenProEpilog::AppendInstructionStackCheck(AArch64reg reg, RegType rty, int32 offset)
787 {
788     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
789     CG *currCG = cgFunc.GetCG();
790     /* sub x16, sp, #0x2000 */
791     auto &x16Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, rty);
792     auto &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, rty);
793     auto &imm1 = aarchCGFunc.CreateImmOperand(offset, k64BitSize, true);
794     aarchCGFunc.SelectSub(x16Opnd, spOpnd, imm1, PTY_u64);
795 
796     /* ldr wzr, [x16] */
797     auto &wzr = cgFunc.GetZeroOpnd(k32BitSize);
798     auto &refX16 = aarchCGFunc.CreateMemOpnd(reg, 0, k64BitSize);
799     auto &soeInstr = cgFunc.GetInsnBuilder()->BuildInsn(MOP_wldr, wzr, refX16);
800     if (currCG->GenerateVerboseCG()) {
801         soeInstr.SetComment("soerror");
802     }
803     soeInstr.SetDoNotRemove(true);
804     AppendInstructionTo(soeInstr, cgFunc);
805 }
806 
GenerateProlog(BB & bb)807 void AArch64GenProEpilog::GenerateProlog(BB &bb)
808 {
809     if (!cgFunc.GetHasProEpilogue()) {
810         return;
811     }
812     if (PROEPILOG_DUMP) {
813         LogInfo::MapleLogger() << "generate prolog at BB " << bb.GetId() << "\n";
814     }
815 
816     AddStackGuard(bb);
817     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
818     CG *currCG = cgFunc.GetCG();
819     BB *formerCurBB = cgFunc.GetCurBB();
820     aarchCGFunc.GetDummyBB()->ClearInsns();
821     cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
822 
823     // insert .loc for function
824     if (currCG->GetCGOptions().WithLoc() && (!currCG->GetMIRModule()->IsCModule())) {
825         MIRFunction *func = &cgFunc.GetFunction();
826         MIRSymbol *fSym = GlobalTables::GetGsymTable().GetSymbolFromStidx(func->GetStIdx().Idx());
827         if (currCG->GetCGOptions().WithSrc()) {
828             uint32 tempmaxsize = static_cast<uint32>(currCG->GetMIRModule()->GetSrcFileInfo().size());
829             CHECK_FATAL(tempmaxsize >= 1, "value overflow");
830             uint32 endfilenum = currCG->GetMIRModule()->GetSrcFileInfo()[tempmaxsize - 1].second;
831             if (fSym->GetSrcPosition().FileNum() != 0 && fSym->GetSrcPosition().FileNum() <= endfilenum) {
832                 int64_t lineNum = fSym->GetSrcPosition().LineNum();
833                 if (lineNum == 0) {
834                     if (cgFunc.GetFunction().GetAttr(FUNCATTR_native)) {
835                         lineNum = 0xffffe;
836                     } else {
837                         lineNum = 0xffffd;
838                     }
839                 }
840                 Insn &loc =
841                     cgFunc.BuildLocInsn(fSym->GetSrcPosition().FileNum(), lineNum, fSym->GetSrcPosition().Column());
842                 cgFunc.GetCurBB()->AppendInsn(loc);
843             }
844         } else {
845             cgFunc.GetCurBB()->AppendInsn(cgFunc.BuildLocInsn(1, fSym->GetSrcPosition().MplLineNum(), 0));
846         }
847     }
848 
849     const MapleVector<AArch64reg> &regsToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
850                                                     ? aarchCGFunc.GetCalleeSavedRegs()
851                                                     : aarchCGFunc.GetProEpilogSavedRegs();
852     if (!regsToSave.empty()) {
853         /*
854          * Among other things, push the FP & LR pair.
855          * FP/LR are added to the callee-saved list in AllocateRegisters()
856          * We add them to the callee-saved list regardless of UseFP() being true/false.
857          * Activation Frame is allocated as part of pushing FP/LR pair
858          */
859         GeneratePushRegs();
860     } else {
861         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
862         int32 stackFrameSize =
863             static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
864         if (stackFrameSize > 0) {
865             if (currCG->GenerateVerboseCG()) {
866                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
867             }
868             Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
869             aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
870             cgFunc.GetCurBB()->GetLastInsn()->SetStackDef(true);
871         }
872         if (currCG->GenerateVerboseCG()) {
873             cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
874         }
875         if (useFP) {
876             Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
877             bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
878             int64 fpToSpDistance =
879                 cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
880             if ((fpToSpDistance > 0) || isLmbc) {
881                 Operand *immOpnd;
882                 if (isLmbc) {
883                     int32 size = static_cast<int32>(
884                         static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
885                     immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
886                 } else {
887                     immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
888                 }
889                 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
890                 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
891             } else {
892                 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
893                 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
894             }
895         }
896     }
897     GeneratePushUnnamedVarargRegs();
898     if (currCG->DoCheckSOE()) {
899         AppendInstructionStackCheck(R16, kRegTyInt, kSoeChckOffset);
900     }
901     bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
902     cgFunc.SetCurBB(*formerCurBB);
903 }
904 
GenerateRet(BB & bb)905 void AArch64GenProEpilog::GenerateRet(BB &bb)
906 {
907     auto *lastInsn = bb.GetLastMachineInsn();
908     if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
909         return;
910     }
911     /* Insert the loc insn before ret insn
912        so that the breakpoint can break at the end of the block's reverse parenthesis line. */
913     SrcPosition pos = cgFunc.GetFunction().GetScope()->GetRangeHigh();
914     if (cgFunc.GetCG()->GetCGOptions().WithDwarf() && cgFunc.GetWithSrc() && cgFunc.GetMirModule().IsCModule() &&
915         pos.FileNum() != 0) {
916         bb.AppendInsn(cgFunc.BuildLocInsn(pos.FileNum(), pos.LineNum(), pos.Column()));
917     }
918     bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_xret));
919 }
920 
921 /*
922  * If all the preds of exitBB made the TailcallOpt(replace blr/bl with br/b), return true, we don't create ret insn.
923  * Otherwise, return false, create the ret insn.
924  */
TestPredsOfRetBB(const BB & exitBB)925 bool AArch64GenProEpilog::TestPredsOfRetBB(const BB &exitBB)
926 {
927     AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
928     if (cgFunc.GetMirModule().IsCModule() &&
929         (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || ml->GetSizeOfLocals() > 0 || cgFunc.HasVLAOrAlloca())) {
930         return false;
931     }
932     const Insn *lastInsn = exitBB.GetLastInsn();
933     while (lastInsn != nullptr && (!lastInsn->IsMachineInstruction() || lastInsn->IsPseudo())) {
934         lastInsn = lastInsn->GetPrev();
935     }
936     bool isTailCall = lastInsn == nullptr ? false : lastInsn->IsTailCall();
937     return isTailCall;
938 }
939 
AppendInstructionPopSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)940 void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
941 {
942     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
943     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle];
944     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerBitSize(), rty);
945     Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
946     MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
947     uint32 dataSize = GetPointerBitSize();
948     if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
949         o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R16);
950     }
951 
952     Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
953     // Identify that the instruction is not alias with any other memory instructions.
954     auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
955     memDefUse->SetIndependent();
956     popInsn.SetReferenceOsts(memDefUse);
957     popInsn.SetComment("RESTORE");
958     cgFunc.GetCurBB()->AppendInsn(popInsn);
959 }
960 
AppendInstructionPopPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)961 void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
962                                                    int32 offset)
963 {
964     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
965     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
966     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
967     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
968     Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
969 
970     uint32 dataSize = GetPointerBitSize();
971     CHECK_FATAL(offset >= 0, "offset must >= 0");
972     if (offset > kStpLdpImm64UpperBound) {
973         o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, static_cast<MemOperand &>(*o2), dataSize, R16);
974     }
975     Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
976     // Identify that the instruction is not alias with any other memory instructions.
977     auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
978     memDefUse->SetIndependent();
979     popInsn.SetReferenceOsts(memDefUse);
980     popInsn.SetComment("RESTORE RESTORE");
981     cgFunc.GetCurBB()->AppendInsn(popInsn);
982 }
983 
AppendInstructionDeallocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)984 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
985 {
986     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
987     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
988     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
989     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
990     int32 stackFrameSize =
991         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
992     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
993     /*
994      * ldp/stp's imm should be within -512 and 504;
995      * if ldp's imm > 504, we fall back to the ldp-add version
996      */
997     bool useLdpAdd = false;
998     int32 offset = 0;
999 
1000     Operand *o2 = nullptr;
1001     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1002         o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
1003     } else {
1004         if (stackFrameSize > kStpLdpImm64UpperBound) {
1005             useLdpAdd = true;
1006             offset = kOffset16MemPos;
1007             stackFrameSize -= offset;
1008         } else {
1009             offset = stackFrameSize;
1010         }
1011         o2 = &aarchCGFunc.CreateCallFrameOperand(offset, GetPointerBitSize());
1012     }
1013 
1014     if (useLdpAdd) {
1015         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1016         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1017         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1018     }
1019 
1020     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1021         CHECK_FATAL(!useLdpAdd, "Invalid assumption");
1022         if (fpToSpDistance > kStpLdpImm64UpperBound) {
1023             (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
1024         } else {
1025             Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1026             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1027         }
1028         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1029         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1030         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1031     } else {
1032         Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1033         cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1034     }
1035     cgFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
1036 }
1037 
AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)1038 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
1039 {
1040     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1041     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
1042     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
1043     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
1044     int32 stackFrameSize =
1045         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1046     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1047     /*
1048      * ldp/stp's imm should be within -512 and 504;
1049      * if ldp's imm > 504, we fall back to the ldp-add version
1050      */
1051     bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc);
1052     if (cgFunc.HasVLAOrAlloca() || fpToSpDistance == 0 || isLmbc) {
1053         int32 lmbcOffset = 0;
1054         if (!isLmbc) {
1055             stackFrameSize -= fpToSpDistance;
1056         } else {
1057             lmbcOffset = fpToSpDistance - (kDivide2 * k8ByteSize);
1058         }
1059         if (stackFrameSize > kStpLdpImm64UpperBound || isLmbc) {
1060             Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, (isLmbc ? lmbcOffset : 0), GetPointerBitSize());
1061             mOp = storeFP ? pushPopOps[kRegsPopOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
1062             Insn &deallocInsn = storeFP ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
1063                                         : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
1064             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1065             Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1066             Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1067             aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1068         } else {
1069             MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(stackFrameSize, GetPointerBitSize());
1070             mOp = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPopOp][rty][kPushPopPair]
1071                                                                          : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
1072             Insn &deallocInsn = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
1073                                     ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
1074                                     : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
1075             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1076         }
1077     } else {
1078         Operand *o2 =
1079             aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
1080         if (fpToSpDistance > kStpLdpImm64UpperBound) {
1081             (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
1082         } else {
1083             mOp = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPopOp][rty][kPushPopPair]
1084                                                                          : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
1085             Insn &deallocInsn = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
1086                                     ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
1087                                     : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
1088             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1089         }
1090 
1091         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1092         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1093         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1094     }
1095     cgFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
1096 }
1097 
GeneratePopRegs()1098 void AArch64GenProEpilog::GeneratePopRegs()
1099 {
1100     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1101     CG *currCG = cgFunc.GetCG();
1102 
1103     const MapleVector<AArch64reg> &regsToRestore = (aarchCGFunc.GetProEpilogSavedRegs().empty())
1104                                                        ? aarchCGFunc.GetCalleeSavedRegs()
1105                                                        : aarchCGFunc.GetProEpilogSavedRegs();
1106 
1107     CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?");
1108 
1109     AArch64reg intRegFirstHalf = kRinvalid;
1110     AArch64reg fpRegFirstHalf = kRinvalid;
1111 
1112     if (currCG->GenerateVerboseCG()) {
1113         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("restore callee-saved registers"));
1114     }
1115 
1116     MapleVector<AArch64reg>::const_iterator it = regsToRestore.begin();
1117     /*
1118      * Even if we don't use FP, since we push a pair of registers
1119      * in a single instruction (i.e., stp) and the stack needs be aligned
1120      * on a 16-byte boundary, push FP as well if the function has a call.
1121      * Make sure this is reflected when computing calleeSavedRegs.size()
1122      * skip the first two registers
1123      */
1124     // skip the RFP & RLR
1125     if (*it == RFP) {
1126         ++it;
1127     }
1128     CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
1129     ++it;
1130 
1131     AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1132     int32 offset;
1133     if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
1134         offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
1135                                     memLayout->GetSizeOfLocals());
1136     } else {
1137         offset = (static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() -
1138                   (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kAarch64IntregBytelen))) -
1139                  memLayout->SizeOfArgsToStackPass() -
1140                  cgFunc.GetFunction().GetFrameReseverdSlot();
1141     }
1142 
1143     if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) {
1144         offset -= kAarch64StackPtrAlignment;
1145     }
1146 
1147     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1148         /* GR/VR save areas are above the callee save area */
1149         AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1150         auto saveareasize = static_cast<int32>(RoundUp(ml->GetSizeOfGRSaveArea(), GetPointerSize() * k2BitSize) +
1151                                                RoundUp(ml->GetSizeOfVRSaveArea(), GetPointerSize() * k2BitSize));
1152         offset -= saveareasize;
1153     }
1154 
1155     offset -= static_cast<int32>(RoundUp(memLayout->GetSizeOfSegCold(), k16BitSize));
1156 
1157     /*
1158      * We are using a cleared dummy block; so insertPoint cannot be ret;
1159      * see GenerateEpilog()
1160      */
1161     for (; it != regsToRestore.end(); ++it) {
1162         AArch64reg reg = *it;
1163         if (reg == RFP) {
1164             continue;
1165         }
1166         CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
1167 
1168         RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
1169         AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
1170         if (firstHalf == kRinvalid) {
1171             /* remember it */
1172             firstHalf = reg;
1173         } else {
1174             /* flush the pair */
1175             AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset);
1176             AArch64isa::GetNextOffsetCalleeSaved(offset);
1177             firstHalf = kRinvalid;
1178         }
1179     }
1180 
1181     if (intRegFirstHalf != kRinvalid) {
1182         AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
1183         AArch64isa::GetNextOffsetCalleeSaved(offset);
1184     }
1185 
1186     if (fpRegFirstHalf != kRinvalid) {
1187         AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
1188         AArch64isa::GetNextOffsetCalleeSaved(offset);
1189     }
1190 
1191     if (!currCG->GenerateDebugFriendlyCode()) {
1192         AppendInstructionDeallocateCallFrame(R29, RLR, kRegTyInt);
1193     } else {
1194         AppendInstructionDeallocateCallFrameDebug(R29, RLR, kRegTyInt);
1195     }
1196 }
1197 
AppendJump(const MIRSymbol & funcSymbol)1198 void AArch64GenProEpilog::AppendJump(const MIRSymbol &funcSymbol)
1199 {
1200     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1201     Operand &targetOpnd = aarchCGFunc.GetOrCreateFuncNameOpnd(funcSymbol);
1202     cgFunc.GetCurBB()->AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn(MOP_xuncond, targetOpnd));
1203 }
1204 
AppendBBtoEpilog(BB & epilogBB,BB & newBB)1205 void AArch64GenProEpilog::AppendBBtoEpilog(BB &epilogBB, BB &newBB)
1206 {
1207     FOR_BB_INSNS(insn, &newBB)
1208     {
1209         insn->SetDoNotRemove(true);
1210     }
1211     auto *lastInsn = epilogBB.GetLastMachineInsn();
1212     if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
1213         epilogBB.RemoveInsn(*lastInsn);
1214         epilogBB.AppendBBInsns(newBB);
1215         epilogBB.AppendInsn(*lastInsn);
1216     } else {
1217         epilogBB.AppendBBInsns(newBB);
1218     }
1219 }
1220 
GenerateEpilog(BB & bb)1221 void AArch64GenProEpilog::GenerateEpilog(BB &bb)
1222 {
1223     if (!cgFunc.GetHasProEpilogue()) {
1224         return;
1225     }
1226     if (PROEPILOG_DUMP) {
1227         LogInfo::MapleLogger() << "generate epilog at BB " << bb.GetId() << "\n";
1228     }
1229 
1230     /* generate stack protected instruction */
1231     GenStackGuardCheckInsn(bb);
1232 
1233     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1234     CG *currCG = cgFunc.GetCG();
1235     BB *formerCurBB = cgFunc.GetCurBB();
1236     aarchCGFunc.GetDummyBB()->ClearInsns();
1237     cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
1238 
1239     Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1240     Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
1241 
1242     if (cgFunc.HasVLAOrAlloca() && cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1243         aarchCGFunc.SelectCopy(spOpnd, PTY_u64, fpOpnd, PTY_u64);
1244     }
1245 
1246     const MapleVector<AArch64reg> &regsToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
1247                                                     ? aarchCGFunc.GetCalleeSavedRegs()
1248                                                     : aarchCGFunc.GetProEpilogSavedRegs();
1249     if (!regsToSave.empty()) {
1250         GeneratePopRegs();
1251     } else {
1252         auto stackFrameSize = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize();
1253         if (stackFrameSize > 0) {
1254             if (currCG->GenerateVerboseCG()) {
1255                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("pop up activation frame"));
1256             }
1257 
1258             if (cgFunc.HasVLAOrAlloca()) {
1259                 auto size = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSegArgsToStkPass().GetSize();
1260                 stackFrameSize = stackFrameSize < size ? 0 : stackFrameSize - size;
1261             }
1262 
1263             if (stackFrameSize > 0) {
1264                 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1265                 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1266                 aarchCGFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
1267             }
1268         }
1269     }
1270 
1271     if (currCG->InstrumentWithDebugTraceCall()) {
1272         AppendJump(*(currCG->GetDebugTraceExitFunction()));
1273     }
1274 
1275     AppendBBtoEpilog(bb, *cgFunc.GetCurBB());
1276     if (cgFunc.GetCurBB()->GetHasCfi()) {
1277         bb.SetHasCfi();
1278     }
1279 
1280     cgFunc.SetCurBB(*formerCurBB);
1281 }
1282 
GenerateEpilogForCleanup(BB & bb)1283 void AArch64GenProEpilog::GenerateEpilogForCleanup(BB &bb)
1284 {
1285     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1286     CHECK_FATAL(!cgFunc.GetExitBBsVec().empty(), "exit bb size is zero!");
1287     if (cgFunc.GetExitBB(0)->IsUnreachable()) {
1288         /* if exitbb is unreachable then exitbb can not be generated */
1289         GenerateEpilog(bb);
1290     } else if (aarchCGFunc.NeedCleanup()) { /* bl to the exit epilogue */
1291         LabelOperand &targetOpnd = aarchCGFunc.GetOrCreateLabelOperand(cgFunc.GetExitBB(0)->GetLabIdx());
1292         bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn(MOP_xuncond, targetOpnd));
1293     }
1294 }
1295 
Run()1296 void AArch64GenProEpilog::Run()
1297 {
1298     CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label,
1299                 "The first statement should be a label");
1300     // update exitBB
1301     if (cgFunc.IsExitBBsVecEmpty()) {
1302         if (cgFunc.GetCleanupBB() != nullptr && cgFunc.GetCleanupBB()->GetPrev() != nullptr) {
1303             cgFunc.PushBackExitBBsVec(*cgFunc.GetCleanupBB()->GetPrev());
1304         } else if (!cgFunc.GetMirModule().IsCModule()) {
1305             cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev());
1306         }
1307     }
1308     cgFunc.SetHasProEpilogue(NeedProEpilog());
1309 
1310     // not run proepilog analysis or analysis failed, insert proepilog at firstBB and exitBB
1311     GenerateProlog(*(cgFunc.GetFirstBB()));
1312     for (auto *exitBB : cgFunc.GetExitBBsVec()) {
1313         GenerateEpilog(*exitBB);
1314     }
1315 
1316     // insert ret insn for exitBB
1317     for (auto *exitBB : cgFunc.GetExitBBsVec()) {
1318         if (cgFunc.GetHasProEpilogue() || (!exitBB->GetPreds().empty() && !TestPredsOfRetBB(*exitBB))) {
1319             GenerateRet(*exitBB);
1320         }
1321     }
1322 }
1323 } /* namespace maplebe */
1324