• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "aarch64_proepilog.h"
17 #include "aarch64_cg.h"
18 #include "cg_option.h"
19 #include "cgfunc.h"
20 
21 namespace maplebe {
22 using namespace maple;
23 
24 namespace {
25 constexpr int32 kSoeChckOffset = 8192;
26 
27 enum RegsPushPop : uint8 { kRegsPushOp, kRegsPopOp };
28 
29 enum PushPopType : uint8 { kPushPopSingle = 0, kPushPopPair = 1 };
30 
31 MOperator pushPopOps[kRegsPopOp + 1][kRegTyFloat + 1][kPushPopPair + 1] = {{
32                                                                                /* push */
33                                                                                {0}, /* undef */
34                                                                                {
35                                                                                    /* kRegTyInt */
36                                                                                    MOP_xstr, /* single */
37                                                                                    MOP_xstp, /* pair   */
38                                                                                },
39                                                                                {
40                                                                                    /* kRegTyFloat */
41                                                                                    MOP_dstr, /* single */
42                                                                                    MOP_dstp, /* pair   */
43                                                                                },
44                                                                            },
45                                                                            {
46                                                                                /* pop */
47                                                                                {0}, /* undef */
48                                                                                {
49                                                                                    /* kRegTyInt */
50                                                                                    MOP_xldr, /* single */
51                                                                                    MOP_xldp, /* pair   */
52                                                                                },
53                                                                                {
54                                                                                    /* kRegTyFloat */
55                                                                                    MOP_dldr, /* single */
56                                                                                    MOP_dldp, /* pair   */
57                                                                                },
58                                                                            }};
59 
AppendInstructionTo(Insn & insn,CGFunc & func)60 inline void AppendInstructionTo(Insn &insn, CGFunc &func)
61 {
62     func.GetCurBB()->AppendInsn(insn);
63 }
64 }  // namespace
65 
HasLoop()66 bool AArch64GenProEpilog::HasLoop()
67 {
68     FOR_ALL_BB(bb, &cgFunc) {
69         if (bb->IsBackEdgeDest()) {
70             return true;
71         }
72         FOR_BB_INSNS_REV(insn, bb) {
73             if (!insn->IsMachineInstruction()) {
74                 continue;
75             }
76             if (insn->HasLoop()) {
77                 return true;
78             }
79         }
80     }
81     return false;
82 }
83 
84 /*
85  *  Remove redundant mov and mark optimizable bl/blr insn in the BB.
86  *  Return value: true to call this modified block again.
87  */
OptimizeTailBB(BB & bb,MapleSet<Insn * > & callInsns,const BB & exitBB) const88 bool AArch64GenProEpilog::OptimizeTailBB(BB &bb, MapleSet<Insn *> &callInsns, const BB &exitBB) const
89 {
90     if (bb.NumInsn() == 1 &&
91         (bb.GetLastInsn()->GetMachineOpcode() != MOP_xbr && bb.GetLastInsn()->GetMachineOpcode() != MOP_xblr &&
92          bb.GetLastInsn()->GetMachineOpcode() != MOP_xbl && bb.GetLastInsn()->GetMachineOpcode() != MOP_xuncond)) {
93         return false;
94     }
95     FOR_BB_INSNS_REV_SAFE(insn, &bb, prev_insn) {
96         if (!insn->IsMachineInstruction() || AArch64isa::IsPseudoInstruction(insn->GetMachineOpcode())) {
97             continue;
98         }
99         MOperator insnMop = insn->GetMachineOpcode();
100         switch (insnMop) {
101             case MOP_xldr:
102             case MOP_xldp:
103             case MOP_dldr:
104             case MOP_dldp: {
105                 if (bb.GetKind() == BB::kBBReturn) {
106                     RegOperand &reg = static_cast<RegOperand &>(insn->GetOperand(0));
107                     if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg.GetRegisterNumber()))) {
108                         break; /* inserted restore from calleeregs-placement, ignore */
109                     }
110                 }
111                 return false;
112             }
113             case MOP_wmovrr:
114             case MOP_xmovrr: {
115                 CHECK_FATAL(insn->GetOperand(0).IsRegister(), "operand0 is not register");
116                 CHECK_FATAL(insn->GetOperand(1).IsRegister(), "operand1 is not register");
117                 auto &reg1 = static_cast<RegOperand &>(insn->GetOperand(0));
118                 auto &reg2 = static_cast<RegOperand &>(insn->GetOperand(1));
119 
120                 if (reg1.GetRegisterNumber() != R0 || reg2.GetRegisterNumber() != R0) {
121                     return false;
122                 }
123 
124                 bb.RemoveInsn(*insn);
125                 break;
126             }
127             case MOP_xblr: {
128                 if (insn->GetOperand(0).IsRegister()) {
129                     RegOperand &reg = static_cast<RegOperand &>(insn->GetOperand(0));
130                     if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg.GetRegisterNumber()))) {
131                         return false; /* can't tailcall, register will be overwritten by restore */
132                     }
133                 }
134                 /* flow through */
135             }
136                 [[clang::fallthrough]];
137             case MOP_xbl: {
138                 callInsns.insert(insn);
139                 return false;
140             }
141             case MOP_xuncond: {
142                 LabelOperand &bLab = static_cast<LabelOperand &>(insn->GetOperand(0));
143                 if (exitBB.GetLabIdx() == bLab.GetLabelIndex()) {
144                     break;
145                 }
146                 return false;
147             }
148             default:
149                 return false;
150         }
151     }
152 
153     return true;
154 }
155 
156 /* Recursively invoke this function for all predecessors of exitBB */
TailCallBBOpt(BB & bb,MapleSet<Insn * > & callInsns,BB & exitBB)157 void AArch64GenProEpilog::TailCallBBOpt(BB &bb, MapleSet<Insn *> &callInsns, BB &exitBB)
158 {
159     /* callsite also in the return block as in "if () return; else foo();"
160        call in the exit block */
161     if (!bb.IsEmpty() && !OptimizeTailBB(bb, callInsns, exitBB)) {
162         return;
163     }
164 
165     for (auto tmpBB : bb.GetPreds()) {
166         if (tmpBB->GetSuccs().size() != 1 || !tmpBB->GetEhSuccs().empty() ||
167             (tmpBB->GetKind() != BB::kBBFallthru && tmpBB->GetKind() != BB::kBBGoto)) {
168             continue;
169         }
170 
171         if (OptimizeTailBB(*tmpBB, callInsns, exitBB)) {
172             TailCallBBOpt(*tmpBB, callInsns, exitBB);
173         }
174     }
175 }
176 
177 /*
178  *  If a function without callee-saved register, and end with a function call,
179  *  then transfer bl/blr to b/br.
180  *  Return value: true if function do not need Prologue/Epilogue. false otherwise.
181  */
TailCallOpt()182 bool AArch64GenProEpilog::TailCallOpt()
183 {
184     /* Count how many call insns in the whole function. */
185     uint32 nCount = 0;
186     bool hasGetStackClass = false;
187 
188     FOR_ALL_BB(bb, &cgFunc) {
189         FOR_BB_INSNS(insn, bb) {
190             if (insn->IsMachineInstruction() && insn->IsCall()) {
191                 ++nCount;
192             }
193         }
194     }
195     if ((nCount > 0 && cgFunc.GetFunction().GetAttr(FUNCATTR_interface)) || hasGetStackClass) {
196         return false;
197     }
198 
199     if (nCount == 0) {
200         // no bl instr in any bb
201         return true;
202     }
203 
204     size_t exitBBSize = cgFunc.GetExitBBsVec().size();
205     /* For now to reduce complexity */
206 
207     BB *exitBB = nullptr;
208     if (exitBBSize == 0) {
209         if (cgFunc.GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc.GetCleanupLabel() &&
210             cgFunc.GetLastBB()->GetPrev()->GetPrev() != nullptr) {
211             exitBB = cgFunc.GetLastBB()->GetPrev()->GetPrev();
212         } else {
213             exitBB = cgFunc.GetLastBB()->GetPrev();
214         }
215     } else {
216         exitBB = cgFunc.GetExitBBsVec().front();
217     }
218     uint32 i = 1;
219     size_t optCount = 0;
220     do {
221         MapleSet<Insn *> callInsns(tmpAlloc.Adapter());
222         TailCallBBOpt(*exitBB, callInsns, *exitBB);
223         if (callInsns.size() != 0) {
224             optCount += callInsns.size();
225             (void)exitBB2CallSitesMap.emplace(exitBB, callInsns);
226         }
227         if (i < exitBBSize) {
228             exitBB = cgFunc.GetExitBBsVec()[i];
229             ++i;
230         } else {
231             break;
232         }
233     } while (1);
234 
235     /* regular calls exist in function */
236     if (nCount != optCount) {
237         return false;
238     }
239     return true;
240 }
241 
IsAddOrSubOp(MOperator mOp)242 static bool IsAddOrSubOp(MOperator mOp)
243 {
244     switch (mOp) {
245         case MOP_xaddrrr:
246         case MOP_xaddrrrs:
247         case MOP_xxwaddrrre:
248         case MOP_xaddrri24:
249         case MOP_xaddrri12:
250         case MOP_xsubrrr:
251         case MOP_xsubrrrs:
252         case MOP_xxwsubrrre:
253         case MOP_xsubrri12:
254             return true;
255         default:
256             return false;
257     }
258 }
259 
260 /* tailcallopt cannot be used if stack address of this function is taken and passed,
261    not checking the passing for now, just taken */
IsStackAddrTaken(CGFunc & cgFunc)262 static bool IsStackAddrTaken(CGFunc &cgFunc)
263 {
264     FOR_ALL_BB(bb, &cgFunc) {
265         FOR_BB_INSNS_REV(insn, bb) {
266             if (IsAddOrSubOp(insn->GetMachineOpcode())) {
267                 for (uint32 i = 0; i < insn->GetOperandSize(); i++) {
268                     if (insn->GetOperand(i).IsRegister()) {
269                         RegOperand &reg = static_cast<RegOperand &>(insn->GetOperand(i));
270                         if (reg.GetRegisterNumber() == R29 || reg.GetRegisterNumber() == R31 ||
271                             reg.GetRegisterNumber() == RSP) {
272                             return true;
273                         }
274                     }
275                 }
276             }
277         }
278     }
279     return false;
280 }
281 
NeedProEpilog()282 bool AArch64GenProEpilog::NeedProEpilog()
283 {
284     if (cgFunc.GetMirModule().GetSrcLang() != kSrcLangC) {
285         return true;
286     } else if (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || cgFunc.HasVLAOrAlloca()) {
287         return true;
288     }
289     bool funcHasCalls = false;
290     if (cgFunc.GetCG()->DoTailCall() && !IsStackAddrTaken(cgFunc) && !stackProtect) {
291         funcHasCalls = !TailCallOpt();  // return value == "no call instr/only or 1 tailcall"
292     } else {
293         FOR_ALL_BB(bb, &cgFunc) {
294             FOR_BB_INSNS_REV(insn, bb) {
295                 if (insn->IsMachineInstruction() && insn->IsCall()) {
296                     funcHasCalls = true;
297                 }
298             }
299         }
300     }
301     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
302     const MapleVector<AArch64reg> &regsToRestore =
303         (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
304     size_t calleeSavedRegSize = kTwoRegister;
305     CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot FP and LR ?");
306     if (funcHasCalls || regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() ||
307         static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSizeOfLocals() > 0 ||
308         cgFunc.GetFunction().GetAttr(FUNCATTR_callersensitive)) {
309         return true;
310     }
311     return false;
312 }
313 
GenStackGuard(BB & bb)314 void AArch64GenProEpilog::GenStackGuard(BB &bb)
315 {
316     if (!stackProtect) {
317         return;
318     }
319     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
320     BB *formerCurBB = cgFunc.GetCurBB();
321     aarchCGFunc.GetDummyBB()->ClearInsns();
322     aarchCGFunc.GetDummyBB()->SetIsProEpilog(true);
323     cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
324 
325     MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
326         GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard")));
327     StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0);
328     RegOperand &stAddrOpnd =
329         aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, GetPointerSize() * kBitsPerByte, kRegTyInt);
330     aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd);
331 
332     MemOperand *guardMemOp =
333         aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOi, GetPointerSize() * kBitsPerByte, stAddrOpnd, nullptr,
334                                      &aarchCGFunc.GetOrCreateOfstOpnd(0, k32BitSize), stkGuardSym);
335     MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64);
336     Insn &insn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *guardMemOp);
337     insn.SetDoNotRemove(true);
338     cgFunc.GetCurBB()->AppendInsn(insn);
339 
340     uint64 vArea = 0;
341     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
342         AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
343         if (ml->GetSizeOfGRSaveArea() > 0) {
344             vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment);
345         }
346         if (ml->GetSizeOfVRSaveArea() > 0) {
347             vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment);
348         }
349     }
350 
351     int32 stkSize = static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
352     if (useFP) {
353         stkSize -=
354             (static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->SizeOfArgsToStackPass()) +
355              cgFunc.GetFunction().GetFrameReseverdSlot());
356     }
357     int32 memSize = (stkSize - kOffset8MemPos) - static_cast<int32>(vArea);
358     MemOperand *downStk = aarchCGFunc.CreateStackMemOpnd(stackBaseReg, memSize, GetPointerSize() * kBitsPerByte);
359     if (downStk->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) {
360         downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R10);
361     }
362     mOp = aarchCGFunc.PickStInsn(GetPointerSize() * kBitsPerByte, PTY_u64);
363     Insn &tmpInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *downStk);
364     tmpInsn.SetDoNotRemove(true);
365     cgFunc.GetCurBB()->AppendInsn(tmpInsn);
366 
367     bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
368     aarchCGFunc.GetDummyBB()->SetIsProEpilog(false);
369     cgFunc.SetCurBB(*formerCurBB);
370 }
371 
GenStackGuardCheckInsn(BB & bb)372 BB &AArch64GenProEpilog::GenStackGuardCheckInsn(BB &bb)
373 {
374     if (!stackProtect) {
375         return bb;
376     }
377 
378     BB *formerCurBB = cgFunc.GetCurBB();
379     cgFunc.GetDummyBB()->ClearInsns();
380     cgFunc.SetCurBB(*(cgFunc.GetDummyBB()));
381     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
382 
383     const MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
384         GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard")));
385     StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0);
386     RegOperand &stAddrOpnd =
387         aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, GetPointerSize() * kBitsPerByte, kRegTyInt);
388     aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd);
389 
390     MemOperand *guardMemOp =
391         aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOi, GetPointerSize() * kBitsPerByte, stAddrOpnd, nullptr,
392                                      &aarchCGFunc.GetOrCreateOfstOpnd(0, k32BitSize), stkGuardSym);
393     MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64);
394     Insn &insn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *guardMemOp);
395     insn.SetDoNotRemove(true);
396     cgFunc.GetCurBB()->AppendInsn(insn);
397 
398     uint64 vArea = 0;
399     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
400         AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
401         if (ml->GetSizeOfGRSaveArea() > 0) {
402             vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment);
403         }
404         if (ml->GetSizeOfVRSaveArea() > 0) {
405             vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment);
406         }
407     }
408 
409     RegOperand &checkOp =
410         aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R10, GetPointerSize() * kBitsPerByte, kRegTyInt);
411     int32 stkSize = static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
412     if (useFP) {
413         stkSize -=
414             (static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->SizeOfArgsToStackPass()) +
415              cgFunc.GetFunction().GetFrameReseverdSlot());
416     }
417     int32 memSize = (stkSize - kOffset8MemPos) - static_cast<int32>(vArea);
418     MemOperand *downStk = aarchCGFunc.CreateStackMemOpnd(stackBaseReg, memSize, GetPointerSize() * kBitsPerByte);
419     if (downStk->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) {
420         downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R10);
421     }
422     mOp = aarchCGFunc.PickLdInsn(GetPointerSize() * kBitsPerByte, PTY_u64);
423     Insn &newInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, checkOp, *downStk);
424     newInsn.SetDoNotRemove(true);
425     cgFunc.GetCurBB()->AppendInsn(newInsn);
426 
427     cgFunc.SelectBxor(stAddrOpnd, stAddrOpnd, checkOp, PTY_u64);
428     LabelIdx failLable = aarchCGFunc.CreateLabel();
429     aarchCGFunc.SelectCondGoto(aarchCGFunc.GetOrCreateLabelOperand(failLable), OP_brtrue, OP_eq, stAddrOpnd,
430                                aarchCGFunc.CreateImmOperand(0, k64BitSize, false), PTY_u64, false);
431 
432     bb.AppendBBInsns(*(cgFunc.GetCurBB()));
433 
434     LabelIdx nextBBLableIdx = aarchCGFunc.CreateLabel();
435     BB *nextBB = aarchCGFunc.CreateNewBB(nextBBLableIdx, bb.IsUnreachable(), BB::kBBFallthru, bb.GetFrequency());
436     bb.AppendBB(*nextBB);
437     bb.PushBackSuccs(*nextBB);
438     nextBB->PushBackPreds(bb);
439     cgFunc.SetCurBB(*nextBB);
440     MIRSymbol *failFunc = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
441         GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_fail")));
442     ListOperand *srcOpnds = aarchCGFunc.CreateListOpnd(*cgFunc.GetFuncScopeAllocator());
443     Insn &callInsn = aarchCGFunc.AppendCall(*failFunc, *srcOpnds);
444     callInsn.SetDoNotRemove(true);
445 
446     BB *newBB = cgFunc.CreateNewBB(failLable, bb.IsUnreachable(), bb.GetKind(), bb.GetFrequency());
447     nextBB->AppendBB(*newBB);
448     if (cgFunc.GetLastBB() == &bb) {
449         cgFunc.SetLastBB(*newBB);
450     }
451     bb.PushBackSuccs(*newBB);
452     nextBB->PushBackSuccs(*newBB);
453     newBB->PushBackPreds(*nextBB);
454     newBB->PushBackPreds(bb);
455 
456     bb.SetKind(BB::kBBIf);
457     cgFunc.SetCurBB(*formerCurBB);
458     return *newBB;
459 }
460 
InsertOpndRegs(Operand & op,std::set<regno_t> & vecRegs) const461 bool AArch64GenProEpilog::InsertOpndRegs(Operand &op, std::set<regno_t> &vecRegs) const
462 {
463     Operand *opnd = &op;
464     CHECK_FATAL(opnd != nullptr, "opnd is nullptr in InsertRegs");
465     if (opnd->IsList()) {
466         MapleList<RegOperand *> pregList = static_cast<ListOperand *>(opnd)->GetOperands();
467         for (auto *preg : pregList) {
468             if (preg != nullptr) {
469                 vecRegs.insert(preg->GetRegisterNumber());
470             }
471         }
472     }
473     if (opnd->IsMemoryAccessOperand()) { /* the registers of kOpdMem are complex to be detected */
474         RegOperand *baseOpnd = static_cast<MemOperand *>(opnd)->GetBaseRegister();
475         if (baseOpnd != nullptr) {
476             vecRegs.insert(baseOpnd->GetRegisterNumber());
477         }
478         RegOperand *indexOpnd = static_cast<MemOperand *>(opnd)->GetIndexRegister();
479         if (indexOpnd != nullptr) {
480             vecRegs.insert(indexOpnd->GetRegisterNumber());
481         }
482     }
483     if (opnd->IsRegister()) {
484         RegOperand *preg = static_cast<RegOperand *>(opnd);
485         if (preg != nullptr) {
486             vecRegs.insert(preg->GetRegisterNumber());
487         }
488     }
489     return true;
490 }
491 
InsertInsnRegs(Insn & insn,bool insertSource,std::set<regno_t> & vecSourceRegs,bool insertTarget,std::set<regno_t> & vecTargetRegs)492 bool AArch64GenProEpilog::InsertInsnRegs(Insn &insn, bool insertSource, std::set<regno_t> &vecSourceRegs,
493                                          bool insertTarget, std::set<regno_t> &vecTargetRegs)
494 {
495     Insn *curInsn = &insn;
496     for (uint32 o = 0; o < curInsn->GetOperandSize(); ++o) {
497         Operand &opnd = curInsn->GetOperand(o);
498         if (insertSource && curInsn->OpndIsUse(o)) {
499             InsertOpndRegs(opnd, vecSourceRegs);
500         }
501         if (insertTarget && curInsn->OpndIsDef(o)) {
502             InsertOpndRegs(opnd, vecTargetRegs);
503         }
504     }
505     return true;
506 }
507 
FindRegs(Operand & op,std::set<regno_t> & vecRegs) const508 bool AArch64GenProEpilog::FindRegs(Operand &op, std::set<regno_t> &vecRegs) const
509 {
510     Operand *opnd = &op;
511     if (opnd == nullptr || vecRegs.empty()) {
512         return false;
513     }
514     if (opnd->IsList()) {
515         MapleList<RegOperand *> pregList = static_cast<ListOperand *>(opnd)->GetOperands();
516         for (auto *preg : pregList) {
517             if (preg->GetRegisterNumber() == R29 || vecRegs.find(preg->GetRegisterNumber()) != vecRegs.end()) {
518                 return true; /* the opReg will overwrite or reread the vecRegs */
519             }
520         }
521     }
522     if (opnd->IsMemoryAccessOperand()) { /* the registers of kOpdMem are complex to be detected */
523         RegOperand *baseOpnd = static_cast<MemOperand *>(opnd)->GetBaseRegister();
524         RegOperand *indexOpnd = static_cast<MemOperand *>(opnd)->GetIndexRegister();
525         if ((baseOpnd != nullptr && baseOpnd->GetRegisterNumber() == R29) ||
526             (indexOpnd != nullptr && indexOpnd->GetRegisterNumber() == R29)) {
527             return true; /* Avoid modifying data on the stack */
528         }
529         if ((baseOpnd != nullptr && vecRegs.find(baseOpnd->GetRegisterNumber()) != vecRegs.end()) ||
530             (indexOpnd != nullptr && vecRegs.find(indexOpnd->GetRegisterNumber()) != vecRegs.end())) {
531             return true;
532         }
533     }
534     if (opnd->IsRegister()) {
535         RegOperand *regOpnd = static_cast<RegOperand *>(opnd);
536         if (regOpnd->GetRegisterNumber() == R29 || vecRegs.find(regOpnd->GetRegisterNumber()) != vecRegs.end()) {
537             return true; /* dst is a target register, result_dst is a target register */
538         }
539     }
540     return false;
541 }
542 
BackwardFindDependency(BB & ifbb,std::set<regno_t> & vecReturnSourceRegs,std::list<Insn * > & existingInsns,std::list<Insn * > & moveInsns)543 bool AArch64GenProEpilog::BackwardFindDependency(BB &ifbb, std::set<regno_t> &vecReturnSourceRegs,
544                                                  std::list<Insn *> &existingInsns, std::list<Insn *> &moveInsns)
545 {
546     /*
547      * Pattern match,(*) instruction are moved down below branch.
548      *   ********************
549      *   curInsn: <instruction> <target> <source>
550      *   <existingInsns> in predBB
551      *   <existingInsns> in ifBB
552      *   <existingInsns> in returnBB
553      *   *********************
554      *                        list: the insns can be moved into the coldBB
555      * (1) the instruction is neither a branch nor a call, except for the ifbb.GetLastInsn()
556      *     As long as a branch insn exists,
557      *     the fast path finding fails and the return value is false,
558      *     but the code sinking can be continued.
559      * (2) the predBB is not a ifBB,
560      *     As long as a ifBB in preds exists,
561      *     the code sinking fails,
562      *     but fast path finding can be continued.
563      * (3) the targetRegs of insns in existingInsns can neither be reread or overwrite
564      * (4) the sourceRegs of insns in existingInsns can not be overwrite
565      * (5) the sourceRegs of insns in returnBB can neither be reread or overwrite
566      * (6) the targetRegs and sourceRegs cannot be R29 R30, to protect the stack
567      * (7) modified the reg when:
568      *     --------------
569      *     curInsn: move R2,R1
570      *     <existingInsns>: <instruction>s <target>s <source>s
571      *                      <instruction>s <target>s <source-R2>s
572      *                      -> <instruction>s <target>s <source-R1>s
573      *     ------------
574      *     (a) all targets cannot be R1, all sources cannot be R1
575      *         all targets cannot be R2, all return sources cannot be R2
576      *     (b) the targetRegs and sourceRegs cannot be list or MemoryAccess
577      *     (c) no ifBB in preds, no branch insns
578      *     (d) the bits of source-R2 must be equal to the R2
579      *     (e) replace the R2 with R1
580      */
581     BB *pred = &ifbb;
582     std::set<regno_t> vecTargetRegs; /* the targrtRegs of existingInsns */
583     std::set<regno_t> vecSourceRegs; /* the soureRegs of existingInsns */
584     bool ifPred = false;             /* Indicates whether a ifBB in pred exists */
585     bool bl = false;                 /* Indicates whether a branch insn exists */
586     do {
587         FOR_BB_INSNS_REV(insn, pred) {
588             /* code sinking */
589             if (insn->IsImmaterialInsn()) {
590                 moveInsns.push_back(insn);
591                 continue;
592             }
593             /* code sinking */
594             if (!insn->IsMachineInstruction()) {
595                 moveInsns.push_back(insn);
596                 continue;
597             }
598             /* code sinking fails, the insns must be retained in the ifBB */
599             if (ifPred || insn == ifbb.GetLastInsn() || insn->IsBranch() || insn->IsCall() || insn->IsStore() ||
600                 insn->IsStorePair()) {
601                 /* fast path finding fails */
602                 if (insn != ifbb.GetLastInsn() &&
603                     (insn->IsBranch() || insn->IsCall() || insn->IsStore() || insn->IsStorePair())) {
604                     bl = true;
605                 }
606                 InsertInsnRegs(*insn, true, vecSourceRegs, true, vecTargetRegs);
607                 existingInsns.push_back(insn);
608                 continue;
609             }
610             bool allow = true; /* whether allow this insn move into the codeBB */
611             for (uint32 o = 0; allow && o < insn->GetOperandSize(); ++o) {
612                 Operand &opnd = insn->GetOperand(o);
613                 if (insn->OpndIsDef(o)) {
614                     allow = allow & !FindRegs(opnd, vecTargetRegs);
615                     allow = allow & !FindRegs(opnd, vecSourceRegs);
616                     allow = allow & !FindRegs(opnd, vecReturnSourceRegs);
617                 }
618                 if (insn->OpndIsUse(o)) {
619                     allow = allow & !FindRegs(opnd, vecTargetRegs);
620                 }
621             }
622             /* if a result_dst not allowed, this insn can be allowed on the condition of mov Rx,R0/R1,
623              * and tje existing insns cannot be blr
624              * RLR 31, RFP 32, RSP 33, RZR 34 */
625             if (!ifPred && !bl && !allow &&
626                 (insn->GetMachineOpcode() == MOP_xmovrr || insn->GetMachineOpcode() == MOP_wmovrr)) {
627                 Operand *resultOpnd = &(insn->GetOperand(0));
628                 Operand *srcOpnd = &(insn->GetOperand(1));
629                 regno_t resultNO = static_cast<RegOperand *>(resultOpnd)->GetRegisterNumber();
630                 regno_t srcNO = static_cast<RegOperand *>(srcOpnd)->GetRegisterNumber();
631                 if (!FindRegs(*resultOpnd, vecTargetRegs) && !FindRegs(*srcOpnd, vecTargetRegs) &&
632                     !FindRegs(*srcOpnd, vecSourceRegs) && !FindRegs(*srcOpnd, vecReturnSourceRegs) &&
633                     (srcNO < RLR || srcNO > RZR)) {
634                     allow = true; /* allow on the conditional mov Rx,Rxx */
635                     for (auto *exit : existingInsns) {
636                         /* the registers of kOpdMem are complex to be detected */
637                         for (uint32 o = 0; o < exit->GetOperandSize(); ++o) {
638                             if (!exit->OpndIsUse(o)) {
639                                 continue;
640                             }
641                             Operand *opd = &(exit->GetOperand(o));
642                             if (opd->IsList() || opd->IsMemoryAccessOperand()) {
643                                 allow = false;
644                                 break;
645                             }
646                             /* Distinguish between 32-bit regs and 64-bit regs */
647                             if (opd->IsRegister() && static_cast<RegOperand *>(opd)->GetRegisterNumber() == resultNO &&
648                                 opd != resultOpnd) {
649                                 allow = false;
650                                 break;
651                             }
652                         }
653                     }
654                 }
655                 /* replace the R2 with R1 */
656                 if (allow) {
657                     for (auto *exit : existingInsns) {
658                         for (uint32 o = 0; o < exit->GetOperandSize(); ++o) {
659                             if (!exit->OpndIsUse(o)) {
660                                 continue;
661                             }
662                             Operand *opd = &(exit->GetOperand(o));
663                             if (opd->IsRegister() && (opd == resultOpnd)) {
664                                 exit->SetOperand(o, *srcOpnd);
665                             }
666                         }
667                     }
668                 }
669             }
670             if (!allow) { /* all result_dsts are not target register */
671                 /* code sinking fails */
672                 InsertInsnRegs(*insn, true, vecSourceRegs, true, vecTargetRegs);
673                 existingInsns.push_back(insn);
674             } else {
675                 moveInsns.push_back(insn);
676             }
677         }
678         if (pred->GetPreds().empty()) {
679             break;
680         }
681         if (!ifPred) {
682             for (auto *tmPred : pred->GetPreds()) {
683                 pred = tmPred;
684                 /* try to find the BB without branch */
685                 if (tmPred->GetKind() == BB::kBBGoto || tmPred->GetKind() == BB::kBBFallthru) {
686                     ifPred = false;
687                     break;
688                 } else {
689                     ifPred = true;
690                 }
691             }
692         }
693     } while (pred != nullptr);
694     for (std::set<regno_t>::iterator it = vecTargetRegs.begin(); it != vecTargetRegs.end(); ++it) {
695         if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(*it))) { /* flag register */
696             return false;
697         }
698     }
699     return !bl;
700 }
701 
IsolateFastPath(BB & bb)702 BB *AArch64GenProEpilog::IsolateFastPath(BB &bb)
703 {
704     /*
705      * Detect "if (cond) return" fast path, and move extra instructions
706      * to the slow path.
707      * Must match the following block structure. BB1 can be a series of
708      * single-pred/single-succ blocks.
709      *     BB1 ops1 cmp-br to BB3        BB1 cmp-br to BB3
710      *     BB2 ops2 br to retBB    ==>   BB2 ret
711      *     BB3 slow path                 BB3 ops1 ops2
712      * if the detect is successful, BB3 will be used to generate prolog stuff.
713      */
714     if (bb.GetPrev() != nullptr) {
715         return nullptr;
716     }
717     BB *ifBB = nullptr;
718     BB *returnBB = nullptr;
719     BB *coldBB = nullptr;
720     {
721         BB *curBB = &bb;
722         /* Look for straight line code */
723         while (1) {
724             if (!curBB->GetEhSuccs().empty()) {
725                 return nullptr;
726             }
727             if (curBB->GetSuccs().size() == 1) {
728                 if (curBB->HasCall()) {
729                     return nullptr;
730                 }
731                 BB *succ = curBB->GetSuccs().front();
732                 if (succ->GetPreds().size() != 1 || !succ->GetEhPreds().empty()) {
733                     return nullptr;
734                 }
735                 curBB = succ;
736             } else if (curBB->GetKind() == BB::kBBIf) {
737                 ifBB = curBB;
738                 break;
739             } else {
740                 return nullptr;
741             }
742         }
743     }
744     /* targets of if bb can only be reached by if bb */
745     {
746         CHECK_FATAL(!ifBB->GetSuccs().empty(), "null succs check!");
747         BB *first = ifBB->GetSuccs().front();
748         BB *second = ifBB->GetSuccs().back();
749         if (first->GetPreds().size() != 1 || !first->GetEhPreds().empty()) {
750             return nullptr;
751         }
752         if (second->GetPreds().size() != 1 || !second->GetEhPreds().empty()) {
753             return nullptr;
754         }
755         /* One target of the if bb jumps to a return bb */
756         if (first->GetKind() != BB::kBBGoto && first->GetKind() != BB::kBBFallthru) {
757             return nullptr;
758         }
759         if (first->GetSuccs().size() != 1) {
760             return nullptr;
761         }
762         if (first->GetSuccs().front()->GetKind() != BB::kBBReturn) {
763             return nullptr;
764         }
765         if (first->GetSuccs().front()->GetPreds().size() != 1) {
766             return nullptr;
767         }
768         if (first->GetSuccs().front()->NumInsn() > kInsnNum2) { /* avoid a insn is used to debug */
769             return nullptr;
770         }
771         if (second->GetSuccs().empty()) {
772             return nullptr;
773         }
774         returnBB = first;
775         coldBB = second;
776     }
777     /* Search backward looking for dependencies for the cond branch */
778     std::list<Insn *> existingInsns; /* the insns must be retained in the ifBB (and the return BB) */
779     std::list<Insn *> moveInsns;     /* instructions to be moved to coldbb */
780     /*
781      * The control flow matches at this point.
782      * Make sure the SourceRegs of the insns in returnBB (vecReturnSourceReg) cannot be overwrite.
783      * the regs in insns have three forms: list, MemoryAccess, or Register.
784      */
785     CHECK_FATAL(returnBB != nullptr, "null ptr check");
786     std::set<regno_t> vecReturnSourceRegs;
787     FOR_BB_INSNS_REV(insn, returnBB) {
788         if (!insn->IsMachineInstruction()) {
789             continue;
790         }
791         if (insn->IsBranch() || insn->IsCall() || insn->IsStore() || insn->IsStorePair()) {
792             return nullptr;
793         }
794         InsertInsnRegs(*insn, true, vecReturnSourceRegs, false, vecReturnSourceRegs);
795         existingInsns.push_back(insn);
796     }
797     FOR_BB_INSNS_REV(insn, returnBB->GetSuccs().front()) {
798         if (!insn->IsMachineInstruction()) {
799             continue;
800         }
801         if (insn->IsBranch() || insn->IsCall() || insn->IsStore() || insn->IsStorePair()) {
802             return nullptr;
803         }
804         InsertInsnRegs(*insn, true, vecReturnSourceRegs, false, vecReturnSourceRegs);
805         existingInsns.push_back(insn);
806     }
807     /*
808      * The mv is the 1st move using the parameter register leading to the branch
809      * The ld is the load using the parameter register indirectly for the branch
810      * The depMv is the move which preserves the result of the load but might
811      *    destroy a parameter register which will be moved below the branch.
812      */
813     bool fast = BackwardFindDependency(*ifBB, vecReturnSourceRegs, existingInsns, moveInsns);
814     /* move extra instructions to the slow path */
815     if (!fast) {
816         return nullptr;
817     }
818     for (auto in : moveInsns) {
819         in->GetBB()->RemoveInsn(*in);
820         CHECK_FATAL(coldBB != nullptr, "null ptr check");
821         static_cast<void>(coldBB->InsertInsnBegin(*in));
822     }
823     /* All instructions are in the right place, replace branch to ret bb to just ret. */
824     /* Remove the lastInsn of gotoBB */
825     if (returnBB->GetKind() == BB::kBBGoto) {
826         returnBB->RemoveInsn(*returnBB->GetLastInsn());
827     }
828     BB *tgtBB = returnBB->GetSuccs().front();
829     CHECK_FATAL(tgtBB != nullptr, "null ptr check");
830     FOR_BB_INSNS(insn, tgtBB) {
831         returnBB->AppendInsn(*insn); /* add the insns such as MOP_xret */
832     }
833     returnBB->AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_xret));
834     /* bb is now a retbb and has no succ. */
835     returnBB->SetKind(BB::kBBReturn);
836     auto predIt = std::find(tgtBB->GetPredsBegin(), tgtBB->GetPredsEnd(), returnBB);
837     tgtBB->ErasePreds(predIt);
838     tgtBB->ClearInsns();
839     returnBB->ClearSuccs();
840     if (tgtBB->GetPrev() != nullptr && tgtBB->GetNext() != nullptr) {
841         tgtBB->GetPrev()->SetNext(tgtBB->GetNext());
842         tgtBB->GetNext()->SetPrev(tgtBB->GetPrev());
843     }
844     SetFastPathReturnBB(tgtBB);
845     return coldBB;
846 }
847 
SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc & cgFunc,const MemOperand & mo,uint32 bitLen,AArch64reg baseRegNum)848 MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, const MemOperand &mo,
849                                                                                    uint32 bitLen, AArch64reg baseRegNum)
850 {
851     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
852     CHECK_FATAL(mo.GetAddrMode() == MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi");
853     OfstOperand *ofstOp = mo.GetOffsetImmediate();
854     int32 offsetVal = static_cast<int32>(ofstOp->GetOffsetValue());
855     CHECK_FATAL(offsetVal > 0, "offsetVal should be greater than 0");
856     CHECK_FATAL((static_cast<uint32>(offsetVal) & 0x7) == 0, "(offsetVal & 0x7) should be equal to 0");
857     /*
858      * Offset adjustment due to FP/SP has already been done
859      * in AArch64GenProEpilog::GeneratePushRegs() and AArch64GenProEpilog::GeneratePopRegs()
860      */
861     RegOperand &br = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(baseRegNum, bitLen, kRegTyInt);
862     if (aarchCGFunc.GetSplitBaseOffset() == 0) {
863         aarchCGFunc.SetSplitBaseOffset(offsetVal); /* remember the offset; don't forget to clear it */
864         ImmOperand &immAddEnd = aarchCGFunc.CreateImmOperand(offsetVal, k64BitSize, true);
865         RegOperand *origBaseReg = mo.GetBaseRegister();
866         aarchCGFunc.SelectAdd(br, *origBaseReg, immAddEnd, PTY_i64);
867     }
868     offsetVal = offsetVal - aarchCGFunc.GetSplitBaseOffset();
869     return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, offsetVal);
870 }
871 
AppendInstructionPushPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)872 void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
873                                                     int32 offset)
874 {
875     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
876     MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
877     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
878     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
879     Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerSize() * kBitsPerByte);
880 
881     uint32 dataSize = GetPointerSize() * kBitsPerByte;
882     CHECK_FATAL(offset >= 0, "offset must >= 0");
883     if (offset > kStpLdpImm64UpperBound) {
884         o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast<MemOperand *>(o2), dataSize, R16);
885     }
886     Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
887     std::string comment = "SAVE CALLEE REGISTER PAIR";
888     pushInsn.SetComment(comment);
889     AppendInstructionTo(pushInsn, cgFunc);
890 
891     /* Append CFi code */
892     if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) {
893         int32 stackFrameSize =
894             static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
895         stackFrameSize -= (static_cast<int32>(cgFunc.GetMemlayout()->SizeOfArgsToStackPass()) +
896                            cgFunc.GetFunction().GetFrameReseverdSlot());
897         int32 cfiOffset = stackFrameSize - offset;
898         BB *curBB = cgFunc.GetCurBB();
899         Insn *newInsn = curBB->InsertInsnAfter(pushInsn, aarchCGFunc.CreateCfiOffsetInsn(reg0, -cfiOffset, k64BitSize));
900         curBB->InsertInsnAfter(*newInsn,
901                                aarchCGFunc.CreateCfiOffsetInsn(reg1, -cfiOffset + kOffset8MemPos, k64BitSize));
902     }
903 }
904 
AppendInstructionPushSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)905 void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
906 {
907     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
908     MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle];
909     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerSize() * kBitsPerByte, rty);
910     Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerSize() * kBitsPerByte);
911 
912     MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
913     uint32 dataSize = GetPointerSize() * kBitsPerByte;
914     if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
915         o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R9);
916     }
917 
918     Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
919     std::string comment = "SAVE CALLEE REGISTER";
920     pushInsn.SetComment(comment);
921     AppendInstructionTo(pushInsn, cgFunc);
922 
923     /* Append CFI code */
924     if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) {
925         int32 stackFrameSize =
926             static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
927         stackFrameSize -= (static_cast<int32>(cgFunc.GetMemlayout()->SizeOfArgsToStackPass()) +
928                            cgFunc.GetFunction().GetFrameReseverdSlot());
929         int32 cfiOffset = stackFrameSize - offset;
930         cgFunc.GetCurBB()->InsertInsnAfter(pushInsn, aarchCGFunc.CreateCfiOffsetInsn(reg, -cfiOffset, k64BitSize));
931     }
932 }
933 
AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty,bool isAllocate)934 Insn &AArch64GenProEpilog::AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
935                                                                              AArch64reg reg1, RegType rty,
936                                                                              bool isAllocate)
937 {
938     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
939     MOperator mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopPair];
940     uint8 size;
941     if (CGOptions::IsArm64ilp32()) {
942         size = k8ByteSize;
943     } else {
944         size = GetPointerSize();
945     }
946     if (fpToSpDistance <= kStrLdrImm64UpperBound - kOffset8MemPos) {
947         mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopSingle] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
948         RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
949         MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), size * kBitsPerByte);
950         Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o2);
951         AppendInstructionTo(insn1, cgFunc);
952         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
953         o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance + size), size * kBitsPerByte);
954         Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
955         AppendInstructionTo(insn2, cgFunc);
956         return insn2;
957     } else {
958         RegOperand &oo = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, size * kBitsPerByte, kRegTyInt);
959         ImmOperand &io1 = aarchCGFunc.CreateImmOperand(fpToSpDistance, k64BitSize, true);
960         aarchCGFunc.SelectCopyImm(oo, io1, PTY_i64);
961         RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
962         RegOperand &rsp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, size * kBitsPerByte, kRegTyInt);
963         MemOperand *mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
964         Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o0, *mo);
965         AppendInstructionTo(insn1, cgFunc);
966         ImmOperand &io2 = aarchCGFunc.CreateImmOperand(size, k64BitSize, true);
967         aarchCGFunc.SelectAdd(oo, oo, io2, PTY_i64);
968         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
969         mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
970         Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o1, *mo);
971         AppendInstructionTo(insn2, cgFunc);
972         return insn2;
973     }
974 }
975 
CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty)976 Insn &AArch64GenProEpilog::CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
977                                                                           AArch64reg reg1, RegType rty)
978 {
979     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
980     CG *currCG = cgFunc.GetCG();
981     MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
982     Insn *allocInsn = nullptr;
983     if (fpToSpDistance > kStpLdpImm64UpperBound) {
984         allocInsn = &AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, true);
985     } else {
986         Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
987         Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
988         Operand *o2 =
989             aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerSize() * kBitsPerByte);
990         allocInsn = &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
991         AppendInstructionTo(*allocInsn, cgFunc);
992     }
993     if (currCG->NeedInsertInstrumentationFunction()) {
994         aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction());
995     } else if (currCG->InstrumentWithDebugTraceCall()) {
996         aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
997     } else if (currCG->InstrumentWithProfile()) {
998         aarchCGFunc.AppendCall(*currCG->GetProfileFunction());
999     }
1000     return *allocInsn;
1001 }
1002 
AppendInstructionAllocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)1003 void AArch64GenProEpilog::AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
1004 {
1005     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1006     CG *currCG = cgFunc.GetCG();
1007     if (currCG->GenerateVerboseCG()) {
1008         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
1009     }
1010 
1011     Insn *ipoint = nullptr;
1012     /*
1013      * stackFrameSize includes the size of args to stack-pass
1014      * if a function has neither VLA nor alloca.
1015      */
1016     int32 stackFrameSize =
1017         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1018     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1019     /*
1020      * ldp/stp's imm should be within -512 and 504;
1021      * if stp's imm > 512, we fall back to the stp-sub version
1022      */
1023     bool useStpSub = false;
1024     int64 offset = 0;
1025     int32 cfiOffset = 0;
1026     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1027         /*
1028          * stack_frame_size == size of formal parameters + callee-saved (including FP/RL)
1029          *                     + size of local vars
1030          *                     + size of actuals
1031          * (when passing more than 8 args, its caller's responsibility to
1032          *  allocate space for it. size of actuals represent largest such size in the function.
1033          */
1034         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1035         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1036         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1037         ipoint = cgFunc.GetCurBB()->GetLastInsn();
1038         cfiOffset = stackFrameSize;
1039     } else {
1040         if (stackFrameSize > kStpLdpImm64UpperBound) {
1041             useStpSub = true;
1042             offset = kOffset16MemPos;
1043             stackFrameSize -= offset;
1044         } else {
1045             offset = stackFrameSize;
1046         }
1047         MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
1048         RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1049         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1050         MemOperand &o2 =
1051             aarchCGFunc.CreateCallFrameOperand(static_cast<int32>(-offset), GetPointerSize() * kBitsPerByte);
1052         ipoint = &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2);
1053         AppendInstructionTo(*ipoint, cgFunc);
1054         cfiOffset = offset;
1055         if (currCG->NeedInsertInstrumentationFunction()) {
1056             aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction());
1057         } else if (currCG->InstrumentWithDebugTraceCall()) {
1058             aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
1059         } else if (currCG->InstrumentWithProfile()) {
1060             aarchCGFunc.AppendCall(*currCG->GetProfileFunction());
1061         }
1062     }
1063 
1064     ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint);
1065 
1066     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1067         CHECK_FATAL(!useStpSub, "Invalid assumption");
1068         ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
1069     }
1070 
1071     if (useStpSub) {
1072         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1073         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1074         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1075         ipoint = cgFunc.GetCurBB()->GetLastInsn();
1076         aarchCGFunc.SetUsedStpSubPairForCallFrameAllocation(true);
1077     }
1078 
1079     CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
1080     int32 cfiOffsetSecond = 0;
1081     if (useStpSub) {
1082         cfiOffsetSecond = stackFrameSize;
1083         ipoint = InsertCFIDefCfaOffset(cfiOffsetSecond, *ipoint);
1084     }
1085     cfiOffsetSecond = GetOffsetFromCFA();
1086     if (!cgFunc.HasVLAOrAlloca()) {
1087         cfiOffsetSecond -= fpToSpDistance;
1088     }
1089     if (cgFunc.GenCfi()) {
1090         BB *curBB = cgFunc.GetCurBB();
1091         if (useFP) {
1092             ipoint = curBB->InsertInsnAfter(
1093                 *ipoint, aarchCGFunc.CreateCfiOffsetInsn(stackBaseReg, -cfiOffsetSecond, k64BitSize));
1094         }
1095         curBB->InsertInsnAfter(*ipoint,
1096                                aarchCGFunc.CreateCfiOffsetInsn(RLR, -cfiOffsetSecond + kOffset8MemPos, k64BitSize));
1097     }
1098 }
1099 
AppendInstructionAllocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)1100 void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
1101 {
1102     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1103     CG *currCG = cgFunc.GetCG();
1104     if (currCG->GenerateVerboseCG()) {
1105         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame for debugging"));
1106     }
1107 
1108     int32 stackFrameSize =
1109         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1110     int64 fpToSpDistance =
1111         (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
1112 
1113     Insn *ipoint = nullptr;
1114     int32 cfiOffset = 0;
1115 
1116     if (fpToSpDistance > 0) {
1117         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1118         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1119         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1120         ipoint = cgFunc.GetCurBB()->GetLastInsn();
1121         cfiOffset = stackFrameSize;
1122         (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint);
1123         if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
1124             fpToSpDistance -= (kDivide2 * k8ByteSize);
1125         }
1126         ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
1127         CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
1128         cfiOffset = GetOffsetFromCFA();
1129         cfiOffset -= fpToSpDistance;
1130     } else {
1131         bool useStpSub = false;
1132 
1133         if (stackFrameSize > kStpLdpImm64UpperBound) {
1134             useStpSub = true;
1135             RegOperand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1136             ImmOperand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1137             aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1138             ipoint = cgFunc.GetCurBB()->GetLastInsn();
1139             cfiOffset = stackFrameSize;
1140             ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint);
1141         } else {
1142             MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
1143             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1144             RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1145             MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-stackFrameSize, GetPointerSize() * kBitsPerByte);
1146             ipoint = &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2);
1147             AppendInstructionTo(*ipoint, cgFunc);
1148             cfiOffset = stackFrameSize;
1149             ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint);
1150         }
1151 
1152         if (useStpSub) {
1153             MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
1154             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1155             RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1156             MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, 0, GetPointerSize() * kBitsPerByte);
1157             ipoint = &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1158             AppendInstructionTo(*ipoint, cgFunc);
1159         }
1160 
1161         if (currCG->NeedInsertInstrumentationFunction()) {
1162             aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction());
1163         } else if (currCG->InstrumentWithDebugTraceCall()) {
1164             aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
1165         } else if (currCG->InstrumentWithProfile()) {
1166             aarchCGFunc.AppendCall(*currCG->GetProfileFunction());
1167         }
1168 
1169         CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
1170         cfiOffset = GetOffsetFromCFA();
1171     }
1172     if (cgFunc.GenCfi()) {
1173         BB *curBB = cgFunc.GetCurBB();
1174         if (useFP) {
1175             ipoint =
1176                 curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(stackBaseReg, -cfiOffset, k64BitSize));
1177         }
1178         curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(RLR, -cfiOffset + kOffset8MemPos, k64BitSize));
1179     }
1180 }
1181 
1182 /*
1183  *  From AArch64 Reference Manual
1184  *  C1.3.3 Load/Store Addressing Mode
1185  *  ...
1186  *  When stack alignment checking is enabled by system software and
1187  *  the base register is the SP, the current stack pointer must be
1188  *  initially quadword aligned, that is aligned to 16 bytes. Misalignment
1189  *  generates a Stack Alignment fault.  The offset does not have to
1190  *  be a multiple of 16 bytes unless the specific Load/Store instruction
1191  *  requires this. SP cannot be used as a register offset.
1192  */
GeneratePushRegs()1193 void AArch64GenProEpilog::GeneratePushRegs()
1194 {
1195     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1196     CG *currCG = cgFunc.GetCG();
1197     const MapleVector<AArch64reg> &regsToSave =
1198         (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
1199 
1200     CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?");
1201 
1202     AArch64reg intRegFirstHalf = kRinvalid;
1203     AArch64reg fpRegFirstHalf = kRinvalid;
1204 
1205     if (currCG->GenerateVerboseCG()) {
1206         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("save callee-saved registers"));
1207     }
1208 
1209     /*
1210      * Even if we don't use RFP, since we push a pair of registers in one instruction
1211      * and the stack needs be aligned on a 16-byte boundary, push RFP as well if function has a call
1212      * Make sure this is reflected when computing callee_saved_regs.size()
1213      */
1214     if (!currCG->GenerateDebugFriendlyCode()) {
1215         AppendInstructionAllocateCallFrame(R29, RLR, kRegTyInt);
1216     } else {
1217         AppendInstructionAllocateCallFrameDebug(R29, RLR, kRegTyInt);
1218     }
1219 
1220     if (useFP) {
1221         if (currCG->GenerateVerboseCG()) {
1222             cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
1223         }
1224         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1225         Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
1226         int64 fpToSpDistance =
1227             (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
1228         bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
1229         if ((fpToSpDistance > 0) || isLmbc) {
1230             Operand *immOpnd;
1231             if (isLmbc) {
1232                 int32 size =
1233                     static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1234                 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
1235             } else {
1236                 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
1237             }
1238             if (!isLmbc || cgFunc.SeenFP() || cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1239                 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
1240             }
1241             cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
1242             if (cgFunc.GenCfi()) {
1243                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(
1244                     stackBaseReg,
1245                     static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() - fpToSpDistance,
1246                     k64BitSize));
1247             }
1248         } else {
1249             aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
1250             cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
1251             if (cgFunc.GenCfi()) {
1252                 cgFunc.GetCurBB()->AppendInsn(
1253                     cgFunc.GetInsnBuilder()
1254                         ->BuildCfiInsn(cfi::OP_CFI_def_cfa_register)
1255                         .AddOpndChain(aarchCGFunc.CreateCfiRegOperand(stackBaseReg, k64BitSize)));
1256             }
1257         }
1258     }
1259 
1260     MapleVector<AArch64reg>::const_iterator it = regsToSave.begin();
1261     /* skip the first two registers */
1262     CHECK_FATAL(*it == RFP, "The first callee saved reg is expected to be RFP");
1263     ++it;
1264     CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
1265     ++it;
1266 
1267     AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1268     int32 offset;
1269     if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
1270         offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
1271                                     memLayout->GetSizeOfLocals());
1272     } else {
1273         offset = (static_cast<int32>(memLayout->RealStackFrameSize() -
1274                                      (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen))) - /* for FP/LR */
1275                   memLayout->SizeOfArgsToStackPass() -
1276                   cgFunc.GetFunction().GetFrameReseverdSlot());
1277     }
1278 
1279     if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) {
1280         offset -= kAarch64StackPtrAlignment;
1281     }
1282 
1283     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1284         /* GR/VR save areas are above the callee save area */
1285         AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1286         auto saveareasize = static_cast<int32>(RoundUp(ml->GetSizeOfGRSaveArea(), GetPointerSize() * k2BitSize) +
1287                                                RoundUp(ml->GetSizeOfVRSaveArea(), GetPointerSize() * k2BitSize));
1288         offset -= saveareasize;
1289     }
1290 
1291     std::vector<std::pair<uint16, int32>> calleeRegAndOffsetVec;
1292     for (; it != regsToSave.end(); ++it) {
1293         AArch64reg reg = *it;
1294         CHECK_FATAL(reg != RFP, "stray RFP in callee_saved_list?");
1295         CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
1296         RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
1297         AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
1298         if (firstHalf == kRinvalid) {
1299             /* remember it */
1300             firstHalf = reg;
1301             /* for int callee-saved register: x19->19,x20->20 ...
1302                for float callee-saved register: d8->72, d9->73 ..., d15->79
1303             */
1304             uint16 regNO = (regType == kRegTyInt) ? static_cast<uint16>(reg - 1) : static_cast<uint16>(reg - V8 + 72);
1305             calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
1306         } else {
1307             uint16 regNO = (regType == kRegTyInt) ? static_cast<uint16>(reg - 1) : static_cast<uint16>(reg - V8 + 72);
1308             calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset + k8ByteSize));
1309             AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset);
1310             GetNextOffsetCalleeSaved(offset);
1311             firstHalf = kRinvalid;
1312         }
1313     }
1314 
1315     if (intRegFirstHalf != kRinvalid) {
1316         AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
1317         GetNextOffsetCalleeSaved(offset);
1318     }
1319 
1320     if (fpRegFirstHalf != kRinvalid) {
1321         AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
1322         GetNextOffsetCalleeSaved(offset);
1323     }
1324 
1325     /*
1326      * in case we split stp/ldp instructions,
1327      * so that we generate a load-into-base-register instruction
1328      * for pop pairs as well.
1329      */
1330     aarchCGFunc.SetSplitBaseOffset(0);
1331 
1332     const auto &emitMemoryManager = CGOptions::GetInstance().GetEmitMemoryManager();
1333     if (emitMemoryManager.codeSpace != nullptr) {
1334         emitMemoryManager.funcCalleeOffsetSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), calleeRegAndOffsetVec);
1335         int64 fpToCurSpDistance =
1336             (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
1337         int32 fp2PrevFrameSPDelta =
1338             static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() - fpToCurSpDistance;
1339         emitMemoryManager.funcFpSPDeltaSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), fp2PrevFrameSPDelta);
1340     }
1341 }
1342 
GeneratePushUnnamedVarargRegs()1343 void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs()
1344 {
1345     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1346     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1347         AArch64MemLayout *memlayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1348         uint8 size;
1349         if (CGOptions::IsArm64ilp32()) {
1350             size = k8ByteSize;
1351         } else {
1352             size = GetPointerSize();
1353         }
1354         uint32 dataSizeBits = size * kBitsPerByte;
1355         uint32 offset;
1356         if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1357             offset = static_cast<uint32>(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */
1358             if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) {
1359                 offset += size; /* End of area should be aligned. Hole between VR and GR area */
1360             }
1361         } else {
1362             offset = (UINT32_MAX - memlayout->GetSizeOfGRSaveArea()) + 1; /* FP reference */
1363             if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) {
1364                 offset -= size;
1365             }
1366         }
1367         uint32 grSize = (UINT32_MAX - offset) + 1;
1368         uint32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size);
1369         DEBUG_ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area");
1370         for (uint32 i = start_regno + static_cast<uint32>(R0); i < static_cast<uint32>(R8); i++) {
1371             uint32 tmpOffset = 0;
1372             if (CGOptions::IsBigEndian()) {
1373                 if ((dataSizeBits >> k8BitShift) < k8BitSize) {
1374                     tmpOffset += k8BitSize - (dataSizeBits >> k8BitShift);
1375                 }
1376             }
1377             Operand *stackLoc;
1378             if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1379                 stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits);
1380             } else {
1381                 stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size);
1382             }
1383             RegOperand &reg =
1384                 aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(i), k64BitSize, kRegTyInt);
1385             Insn &inst =
1386                 cgFunc.GetInsnBuilder()->BuildInsn(aarchCGFunc.PickStInsn(dataSizeBits, PTY_i64), reg, *stackLoc);
1387             cgFunc.GetCurBB()->AppendInsn(inst);
1388             offset += size;
1389         }
1390         if (!CGOptions::UseGeneralRegOnly()) {
1391             if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1392                 offset = static_cast<uint32>(memlayout->GetVRSaveAreaBaseLoc());
1393             } else {
1394                 offset = (UINT32_MAX - (memlayout->GetSizeOfVRSaveArea() + grSize)) + 1;
1395             }
1396             start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize));
1397             DEBUG_ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area");
1398             for (uint32 i = start_regno + static_cast<uint32>(V0); i < static_cast<uint32>(V8); i++) {
1399                 uint32 tmpOffset = 0;
1400                 if (CGOptions::IsBigEndian()) {
1401                     if ((dataSizeBits >> k8BitShift) < k16BitSize) {
1402                         tmpOffset += k16BitSize - (dataSizeBits >> k8BitShift);
1403                     }
1404                 }
1405                 Operand *stackLoc;
1406                 if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1407                     stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits);
1408                 } else {
1409                     stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size);
1410                 }
1411                 RegOperand &reg =
1412                     aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(i), k64BitSize, kRegTyFloat);
1413                 Insn &inst =
1414                     cgFunc.GetInsnBuilder()->BuildInsn(aarchCGFunc.PickStInsn(dataSizeBits, PTY_f64), reg, *stackLoc);
1415                 cgFunc.GetCurBB()->AppendInsn(inst);
1416                 offset += (size * k2BitSize);
1417             }
1418         }
1419     }
1420 }
1421 
AppendInstructionStackCheck(AArch64reg reg,RegType rty,int32 offset)1422 void AArch64GenProEpilog::AppendInstructionStackCheck(AArch64reg reg, RegType rty, int32 offset)
1423 {
1424     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1425     CG *currCG = cgFunc.GetCG();
1426     /* sub x16, sp, #0x2000 */
1427     auto &x16Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, rty);
1428     auto &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, rty);
1429     auto &imm1 = aarchCGFunc.CreateImmOperand(offset, k64BitSize, true);
1430     aarchCGFunc.SelectSub(x16Opnd, spOpnd, imm1, PTY_u64);
1431 
1432     /* ldr wzr, [x16] */
1433     auto &wzr = cgFunc.GetZeroOpnd(k32BitSize);
1434     auto &refX16 = aarchCGFunc.CreateMemOpnd(reg, 0, k64BitSize);
1435     auto &soeInstr = cgFunc.GetInsnBuilder()->BuildInsn(MOP_wldr, wzr, refX16);
1436     if (currCG->GenerateVerboseCG()) {
1437         soeInstr.SetComment("soerror");
1438     }
1439     soeInstr.SetDoNotRemove(true);
1440     AppendInstructionTo(soeInstr, cgFunc);
1441 }
1442 
GenerateProlog(BB & bb)1443 void AArch64GenProEpilog::GenerateProlog(BB &bb)
1444 {
1445     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1446     CG *currCG = cgFunc.GetCG();
1447     BB *formerCurBB = cgFunc.GetCurBB();
1448     aarchCGFunc.GetDummyBB()->ClearInsns();
1449     aarchCGFunc.GetDummyBB()->SetIsProEpilog(true);
1450     cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
1451     if (!cgFunc.GetHasProEpilogue()) {
1452         return;
1453     }
1454 
1455     // insert .loc for function
1456     if (currCG->GetCGOptions().WithLoc() &&
1457         (!currCG->GetMIRModule()->IsCModule() || currCG->GetMIRModule()->IsWithDbgInfo())) {
1458         MIRFunction *func = &cgFunc.GetFunction();
1459         MIRSymbol *fSym = GlobalTables::GetGsymTable().GetSymbolFromStidx(func->GetStIdx().Idx());
1460         if (currCG->GetCGOptions().WithSrc()) {
1461             uint32 tempmaxsize = static_cast<uint32>(currCG->GetMIRModule()->GetSrcFileInfo().size());
1462             uint32 endfilenum = currCG->GetMIRModule()->GetSrcFileInfo()[tempmaxsize - 1].second;
1463             if (fSym->GetSrcPosition().FileNum() != 0 && fSym->GetSrcPosition().FileNum() <= endfilenum) {
1464                 Operand *o0 = cgFunc.CreateDbgImmOperand(fSym->GetSrcPosition().FileNum());
1465                 int64_t lineNum = fSym->GetSrcPosition().LineNum();
1466                 if (lineNum == 0) {
1467                     if (cgFunc.GetFunction().GetAttr(FUNCATTR_native)) {
1468                         lineNum = 0xffffe;
1469                     } else {
1470                         lineNum = 0xffffd;
1471                     }
1472                 }
1473                 Operand *o1 = cgFunc.CreateDbgImmOperand(lineNum);
1474                 Insn &loc =
1475                     cgFunc.GetInsnBuilder()->BuildDbgInsn(mpldbg::OP_DBG_loc).AddOpndChain(*o0).AddOpndChain(*o1);
1476                 cgFunc.GetCurBB()->AppendInsn(loc);
1477             }
1478         } else {
1479             Operand *o0 = cgFunc.CreateDbgImmOperand(1);
1480             Operand *o1 = cgFunc.CreateDbgImmOperand(fSym->GetSrcPosition().MplLineNum());
1481             Insn &loc = cgFunc.GetInsnBuilder()->BuildDbgInsn(mpldbg::OP_DBG_loc).AddOpndChain(*o0).AddOpndChain(*o1);
1482             cgFunc.GetCurBB()->AppendInsn(loc);
1483         }
1484     }
1485 
1486     const MapleVector<AArch64reg> &regsToSave =
1487         (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
1488     if (!regsToSave.empty()) {
1489         /*
1490          * Among other things, push the FP & LR pair.
1491          * FP/LR are added to the callee-saved list in AllocateRegisters()
1492          * We add them to the callee-saved list regardless of UseFP() being true/false.
1493          * Activation Frame is allocated as part of pushing FP/LR pair
1494          */
1495         GeneratePushRegs();
1496     } else {
1497         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1498         int32 stackFrameSize =
1499             static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1500         if (stackFrameSize > 0) {
1501             if (currCG->GenerateVerboseCG()) {
1502                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
1503             }
1504             Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1505             aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1506 
1507             int32 offset = stackFrameSize;
1508             (void)InsertCFIDefCfaOffset(offset, *(cgFunc.GetCurBB()->GetLastInsn()));
1509         }
1510         if (currCG->GenerateVerboseCG()) {
1511             cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
1512         }
1513         if (useFP) {
1514             Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
1515             bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
1516             int64 fpToSpDistance =
1517                 cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1518             if ((fpToSpDistance > 0) || isLmbc) {
1519                 Operand *immOpnd;
1520                 if (isLmbc) {
1521                     int32 size = static_cast<int32>(
1522                         static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1523                     immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
1524                 } else {
1525                     immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
1526                 }
1527                 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
1528                 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
1529                 if (cgFunc.GenCfi()) {
1530                     cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(
1531                         stackBaseReg,
1532                         static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() - fpToSpDistance,
1533                         k64BitSize));
1534                 }
1535             } else {
1536                 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
1537                 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
1538                 if (cgFunc.GenCfi()) {
1539                     cgFunc.GetCurBB()->AppendInsn(
1540                         cgFunc.GetInsnBuilder()
1541                             ->BuildCfiInsn(cfi::OP_CFI_def_cfa_register)
1542                             .AddOpndChain(aarchCGFunc.CreateCfiRegOperand(stackBaseReg, k64BitSize)));
1543                 }
1544             }
1545         }
1546     }
1547     GeneratePushUnnamedVarargRegs();
1548     if (currCG->DoCheckSOE()) {
1549         AppendInstructionStackCheck(R16, kRegTyInt, kSoeChckOffset);
1550     }
1551     bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
1552     cgFunc.SetCurBB(*formerCurBB);
1553     aarchCGFunc.GetDummyBB()->SetIsProEpilog(false);
1554 }
1555 
GenerateRet(BB & bb)1556 void AArch64GenProEpilog::GenerateRet(BB &bb)
1557 {
1558     bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_xret));
1559 }
1560 
1561 /*
1562  * If all the preds of exitBB made the TailcallOpt(replace blr/bl with br/b), return true, we don't create ret insn.
1563  * Otherwise, return false, create the ret insn.
1564  */
TestPredsOfRetBB(const BB & exitBB)1565 bool AArch64GenProEpilog::TestPredsOfRetBB(const BB &exitBB)
1566 {
1567     AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1568     if (cgFunc.GetMirModule().IsCModule() &&
1569         (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || ml->GetSizeOfLocals() > 0 || cgFunc.HasVLAOrAlloca())) {
1570         return false;
1571     }
1572     for (auto tmpBB : exitBB.GetPreds()) {
1573         Insn *firstInsn = tmpBB->GetFirstInsn();
1574         if ((firstInsn == nullptr || tmpBB->IsCommentBB()) && (!tmpBB->GetPreds().empty())) {
1575             if (!TestPredsOfRetBB(*tmpBB)) {
1576                 return false;
1577             }
1578         } else {
1579             Insn *lastInsn = tmpBB->GetLastInsn();
1580             if (lastInsn == nullptr) {
1581                 return false;
1582             }
1583             MOperator insnMop = lastInsn->GetMachineOpcode();
1584             if (insnMop != MOP_tail_call_opt_xbl && insnMop != MOP_tail_call_opt_xblr) {
1585                 return false;
1586             }
1587         }
1588     }
1589     return true;
1590 }
1591 
AppendInstructionPopSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)1592 void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
1593 {
1594     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1595     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle];
1596     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerSize() * kBitsPerByte, rty);
1597     Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerSize() * kBitsPerByte);
1598     MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
1599     uint32 dataSize = GetPointerSize() * kBitsPerByte;
1600     if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
1601         o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R9);
1602     }
1603 
1604     Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
1605     popInsn.SetComment("RESTORE");
1606     cgFunc.GetCurBB()->AppendInsn(popInsn);
1607 
1608     /* Append CFI code. */
1609     if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) {
1610         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg, k64BitSize));
1611     }
1612 }
1613 
AppendInstructionPopPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)1614 void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
1615                                                    int32 offset)
1616 {
1617     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1618     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
1619     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1620     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1621     Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerSize() * kBitsPerByte);
1622 
1623     uint32 dataSize = GetPointerSize() * kBitsPerByte;
1624     CHECK_FATAL(offset >= 0, "offset must >= 0");
1625     if (offset > kStpLdpImm64UpperBound) {
1626         o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, static_cast<MemOperand &>(*o2), dataSize, R16);
1627     }
1628     Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1629     popInsn.SetComment("RESTORE RESTORE");
1630     cgFunc.GetCurBB()->AppendInsn(popInsn);
1631 
1632     /* Append CFI code */
1633     if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) {
1634         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg0, k64BitSize));
1635         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg1, k64BitSize));
1636     }
1637 }
1638 
AppendInstructionDeallocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)1639 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
1640 {
1641     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1642     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
1643     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1644     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1645     int32 stackFrameSize =
1646         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1647     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1648     /*
1649      * ldp/stp's imm should be within -512 and 504;
1650      * if ldp's imm > 504, we fall back to the ldp-add version
1651      */
1652     bool useLdpAdd = false;
1653     int32 offset = 0;
1654 
1655     Operand *o2 = nullptr;
1656     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1657         o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerSize() * kBitsPerByte);
1658     } else {
1659         if (stackFrameSize > kStpLdpImm64UpperBound) {
1660             useLdpAdd = true;
1661             offset = kOffset16MemPos;
1662             stackFrameSize -= offset;
1663         } else {
1664             offset = stackFrameSize;
1665         }
1666         o2 = &aarchCGFunc.CreateCallFrameOperand(offset, GetPointerSize() * kBitsPerByte);
1667     }
1668 
1669     if (useLdpAdd) {
1670         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1671         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1672         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1673         if (cgFunc.GenCfi()) {
1674             int64 cfiOffset = GetOffsetFromCFA();
1675             BB *curBB = cgFunc.GetCurBB();
1676             curBB->InsertInsnAfter(*(curBB->GetLastInsn()),
1677                                    aarchCGFunc.CreateCfiDefCfaInsn(RSP, cfiOffset - stackFrameSize, k64BitSize));
1678         }
1679     }
1680 
1681     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1682         CHECK_FATAL(!useLdpAdd, "Invalid assumption");
1683         if (fpToSpDistance > kStpLdpImm64UpperBound) {
1684             (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
1685         } else {
1686             Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1687             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1688         }
1689         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1690         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1691         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1692     } else {
1693         Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1694         cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1695     }
1696 
1697     if (cgFunc.GenCfi()) {
1698         /* Append CFI restore */
1699         if (useFP) {
1700             cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize));
1701         }
1702         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize));
1703     }
1704 }
1705 
AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)1706 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
1707 {
1708     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1709     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
1710     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1711     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1712     int32 stackFrameSize =
1713         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1714     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1715     /*
1716      * ldp/stp's imm should be within -512 and 504;
1717      * if ldp's imm > 504, we fall back to the ldp-add version
1718      */
1719     bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc);
1720     if (cgFunc.HasVLAOrAlloca() || fpToSpDistance == 0 || isLmbc) {
1721         int lmbcOffset = 0;
1722         if (!isLmbc) {
1723             stackFrameSize -= fpToSpDistance;
1724         } else {
1725             lmbcOffset = fpToSpDistance - (kDivide2 * k8ByteSize);
1726         }
1727         if (stackFrameSize > kStpLdpImm64UpperBound || isLmbc) {
1728             Operand *o2;
1729             o2 = aarchCGFunc.CreateStackMemOpnd(RSP, (isLmbc ? lmbcOffset : 0), GetPointerSize() * kBitsPerByte);
1730             Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1731             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1732             if (cgFunc.GenCfi()) {
1733                 /* Append CFI restore */
1734                 if (useFP) {
1735                     cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize));
1736                 }
1737                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize));
1738             }
1739             Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1740             Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1741             aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1742         } else {
1743             MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(stackFrameSize, GetPointerSize() * kBitsPerByte);
1744             Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2);
1745             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1746             if (cgFunc.GenCfi()) {
1747                 if (useFP) {
1748                     cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize));
1749                 }
1750                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize));
1751             }
1752         }
1753     } else {
1754         Operand *o2;
1755         o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerSize() * kBitsPerByte);
1756         if (fpToSpDistance > kStpLdpImm64UpperBound) {
1757             (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
1758         } else {
1759             Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1760             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1761         }
1762 
1763         if (cgFunc.GenCfi()) {
1764             if (useFP) {
1765                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize));
1766             }
1767             cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize));
1768         }
1769         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1770         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1771         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1772     }
1773 }
1774 
GeneratePopRegs()1775 void AArch64GenProEpilog::GeneratePopRegs()
1776 {
1777     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1778     CG *currCG = cgFunc.GetCG();
1779 
1780     const MapleVector<AArch64reg> &regsToRestore =
1781         (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
1782 
1783     CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?");
1784 
1785     AArch64reg intRegFirstHalf = kRinvalid;
1786     AArch64reg fpRegFirstHalf = kRinvalid;
1787 
1788     if (currCG->GenerateVerboseCG()) {
1789         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("restore callee-saved registers"));
1790     }
1791 
1792     MapleVector<AArch64reg>::const_iterator it = regsToRestore.begin();
1793     /*
1794      * Even if we don't use FP, since we push a pair of registers
1795      * in a single instruction (i.e., stp) and the stack needs be aligned
1796      * on a 16-byte boundary, push FP as well if the function has a call.
1797      * Make sure this is reflected when computing calleeSavedRegs.size()
1798      * skip the first two registers
1799      */
1800     CHECK_FATAL(*it == RFP, "The first callee saved reg is expected to be RFP");
1801     ++it;
1802     CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
1803     ++it;
1804 
1805     AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1806     int32 offset;
1807     if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
1808         offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
1809                                     memLayout->GetSizeOfLocals());
1810     } else {
1811         offset = (static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() -
1812                   (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen))) - /* for FP/LR */
1813                  memLayout->SizeOfArgsToStackPass() -
1814                  cgFunc.GetFunction().GetFrameReseverdSlot();
1815     }
1816 
1817     if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) {
1818         offset -= kAarch64StackPtrAlignment;
1819     }
1820 
1821     if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1822         /* GR/VR save areas are above the callee save area */
1823         AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1824         auto saveareasize = static_cast<int32>(RoundUp(ml->GetSizeOfGRSaveArea(), GetPointerSize() * k2BitSize) +
1825                                                RoundUp(ml->GetSizeOfVRSaveArea(), GetPointerSize() * k2BitSize));
1826         offset -= saveareasize;
1827     }
1828 
1829     /*
1830      * We are using a cleared dummy block; so insertPoint cannot be ret;
1831      * see GenerateEpilog()
1832      */
1833     for (; it != regsToRestore.end(); ++it) {
1834         AArch64reg reg = *it;
1835         CHECK_FATAL(reg != RFP, "stray RFP in callee_saved_list?");
1836         CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
1837 
1838         RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
1839         AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
1840         if (firstHalf == kRinvalid) {
1841             /* remember it */
1842             firstHalf = reg;
1843         } else {
1844             /* flush the pair */
1845             AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset);
1846             GetNextOffsetCalleeSaved(offset);
1847             firstHalf = kRinvalid;
1848         }
1849     }
1850 
1851     if (intRegFirstHalf != kRinvalid) {
1852         AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
1853         GetNextOffsetCalleeSaved(offset);
1854     }
1855 
1856     if (fpRegFirstHalf != kRinvalid) {
1857         AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
1858         GetNextOffsetCalleeSaved(offset);
1859     }
1860 
1861     if (!currCG->GenerateDebugFriendlyCode()) {
1862         AppendInstructionDeallocateCallFrame(R29, RLR, kRegTyInt);
1863     } else {
1864         AppendInstructionDeallocateCallFrameDebug(R29, RLR, kRegTyInt);
1865     }
1866 
1867     if (cgFunc.GenCfi()) {
1868         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(RSP, 0, k64BitSize));
1869     }
1870     /*
1871      * in case we split stp/ldp instructions,
1872      * so that we generate a load-into-base-register instruction
1873      * for the next function, maybe? (seems not necessary, but...)
1874      */
1875     aarchCGFunc.SetSplitBaseOffset(0);
1876 }
1877 
AppendJump(const MIRSymbol & funcSymbol)1878 void AArch64GenProEpilog::AppendJump(const MIRSymbol &funcSymbol)
1879 {
1880     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1881     Operand &targetOpnd = aarchCGFunc.GetOrCreateFuncNameOpnd(funcSymbol);
1882     cgFunc.GetCurBB()->AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn(MOP_xuncond, targetOpnd));
1883 }
1884 
GenerateEpilog(BB & bb)1885 void AArch64GenProEpilog::GenerateEpilog(BB &bb)
1886 {
1887     if (!cgFunc.GetHasProEpilogue()) {
1888         if (bb.GetPreds().empty() || !TestPredsOfRetBB(bb)) {
1889             GenerateRet(bb);
1890         }
1891         return;
1892     }
1893 
1894     /* generate stack protected instruction */
1895     BB &epilogBB = GenStackGuardCheckInsn(bb);
1896 
1897     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1898     CG *currCG = cgFunc.GetCG();
1899     BB *formerCurBB = cgFunc.GetCurBB();
1900     aarchCGFunc.GetDummyBB()->ClearInsns();
1901     aarchCGFunc.GetDummyBB()->SetIsProEpilog(true);
1902     cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
1903 
1904     Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1905     Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
1906 
1907     if (cgFunc.HasVLAOrAlloca() && cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1908         aarchCGFunc.SelectCopy(spOpnd, PTY_u64, fpOpnd, PTY_u64);
1909     }
1910 
1911     /* Hack: exit bb should always be reachable, since we need its existance for ".cfi_remember_state" */
1912     if (&epilogBB != cgFunc.GetLastBB() && epilogBB.GetNext() != nullptr) {
1913         BB *nextBB = epilogBB.GetNext();
1914         do {
1915             if (nextBB == cgFunc.GetLastBB() || !nextBB->IsEmpty()) {
1916                 break;
1917             }
1918             nextBB = nextBB->GetNext();
1919         } while (nextBB != nullptr);
1920         if (nextBB != nullptr && !nextBB->IsEmpty() && cgFunc.GenCfi()) {
1921             cgFunc.GetCurBB()->AppendInsn(cgFunc.GetInsnBuilder()->BuildCfiInsn(cfi::OP_CFI_remember_state));
1922             cgFunc.GetCurBB()->SetHasCfi();
1923             nextBB->InsertInsnBefore(*nextBB->GetFirstInsn(),
1924                                      cgFunc.GetInsnBuilder()->BuildCfiInsn(cfi::OP_CFI_restore_state));
1925             nextBB->SetHasCfi();
1926         }
1927     }
1928 
1929     const MapleVector<AArch64reg> &regsToSave =
1930         (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
1931     if (!regsToSave.empty()) {
1932         GeneratePopRegs();
1933     } else {
1934         auto stackFrameSize = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize();
1935         if (stackFrameSize > 0) {
1936             if (currCG->GenerateVerboseCG()) {
1937                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("pop up activation frame"));
1938             }
1939 
1940             if (cgFunc.HasVLAOrAlloca()) {
1941                 auto size = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSegArgsToStkPass().GetSize();
1942                 stackFrameSize = stackFrameSize < size ? 0 : stackFrameSize - size;
1943             }
1944 
1945             if (stackFrameSize > 0) {
1946                 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1947                 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1948                 if (cgFunc.GenCfi()) {
1949                     cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(RSP, 0, k64BitSize));
1950                 }
1951             }
1952         }
1953     }
1954 
1955     if (currCG->InstrumentWithDebugTraceCall()) {
1956         AppendJump(*(currCG->GetDebugTraceExitFunction()));
1957     }
1958 
1959     GenerateRet(*(cgFunc.GetCurBB()));
1960     epilogBB.AppendBBInsns(*cgFunc.GetCurBB());
1961     if (cgFunc.GetCurBB()->GetHasCfi()) {
1962         epilogBB.SetHasCfi();
1963     }
1964 
1965     cgFunc.SetCurBB(*formerCurBB);
1966     aarchCGFunc.GetDummyBB()->SetIsProEpilog(false);
1967 }
1968 
GenerateEpilogForCleanup(BB & bb)1969 void AArch64GenProEpilog::GenerateEpilogForCleanup(BB &bb)
1970 {
1971     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1972     CHECK_FATAL(!cgFunc.GetExitBBsVec().empty(), "exit bb size is zero!");
1973     if (cgFunc.GetExitBB(0)->IsUnreachable()) {
1974         /* if exitbb is unreachable then exitbb can not be generated */
1975         GenerateEpilog(bb);
1976     } else if (aarchCGFunc.NeedCleanup()) { /* bl to the exit epilogue */
1977         LabelOperand &targetOpnd = aarchCGFunc.GetOrCreateLabelOperand(cgFunc.GetExitBB(0)->GetLabIdx());
1978         bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn(MOP_xuncond, targetOpnd));
1979     }
1980 }
1981 
ConvertToTailCalls(MapleSet<Insn * > & callInsnsMap)1982 void AArch64GenProEpilog::ConvertToTailCalls(MapleSet<Insn *> &callInsnsMap)
1983 {
1984     BB *exitBB = GetCurTailcallExitBB();
1985 
1986     /* ExitBB is filled only by now. If exitBB has restore of SP indicating extra stack space has
1987        been allocated, such as a function call with more than 8 args, argument with large aggr etc */
1988     FOR_BB_INSNS(insn, exitBB) {
1989         if (insn->GetMachineOpcode() == MOP_xaddrri12 || insn->GetMachineOpcode() == MOP_xaddrri24) {
1990             RegOperand &reg = static_cast<RegOperand &>(insn->GetOperand(0));
1991             if (reg.GetRegisterNumber() == RSP) {
1992                 return;
1993             }
1994         }
1995     }
1996 
1997     /* Replace all of the call insns. */
1998     for (Insn *callInsn : callInsnsMap) {
1999         MOperator insnMop = callInsn->GetMachineOpcode();
2000         switch (insnMop) {
2001             case MOP_xbl: {
2002                 callInsn->SetMOP(AArch64CG::kMd[MOP_tail_call_opt_xbl]);
2003                 break;
2004             }
2005             case MOP_xblr: {
2006                 callInsn->SetMOP(AArch64CG::kMd[MOP_tail_call_opt_xblr]);
2007                 break;
2008             }
2009             default:
2010                 CHECK_FATAL(false, "Internal error.");
2011                 break;
2012         }
2013         BB *bb = callInsn->GetBB();
2014         if (bb->GetKind() == BB::kBBGoto) {
2015             bb->SetKind(BB::kBBFallthru);
2016             if (bb->GetLastInsn()->GetMachineOpcode() == MOP_xuncond) {
2017                 bb->RemoveInsn(*bb->GetLastInsn());
2018             }
2019         }
2020         for (auto sBB : bb->GetSuccs()) {
2021             bb->RemoveSuccs(*sBB);
2022             sBB->RemovePreds(*bb);
2023             break;
2024         }
2025     }
2026 
2027     /* copy instrs from exit block */
2028     for (Insn *callInsn : callInsnsMap) {
2029         BB *toBB = callInsn->GetBB();
2030         BB *fromBB = exitBB;
2031         if (toBB == fromBB) {
2032             /* callsite also in the return exit block, just change the return to branch */
2033             Insn *lastInsn = toBB->GetLastInsn();
2034             if (lastInsn->GetMachineOpcode() == MOP_xret) {
2035                 Insn *newInsn = cgFunc.GetTheCFG()->CloneInsn(*callInsn);
2036                 toBB->ReplaceInsn(*lastInsn, *newInsn);
2037                 for (Insn *insn = callInsn->GetNextMachineInsn(); insn != newInsn; insn = insn->GetNextMachineInsn()) {
2038                     insn->SetDoNotRemove(true);
2039                 }
2040                 toBB->RemoveInsn(*callInsn);
2041                 return;
2042             }
2043             CHECK_FATAL(0, "Tailcall in incorrect block");
2044         }
2045         FOR_BB_INSNS_SAFE(insn, fromBB, next) {
2046             if (insn->IsCfiInsn() || (insn->IsMachineInstruction() && insn->GetMachineOpcode() != MOP_xret)) {
2047                 Insn *newInsn = cgFunc.GetTheCFG()->CloneInsn(*insn);
2048                 newInsn->SetDoNotRemove(true);
2049                 toBB->InsertInsnBefore(*callInsn, *newInsn);
2050             }
2051         }
2052     }
2053 
2054     /* remove instrs in exit block */
2055     BB *bb = exitBB;
2056     if (bb->GetPreds().size() > 0) {
2057         return; /* exit block still needed by other non-tailcall blocks */
2058     }
2059     Insn &junk = cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_pseudo_none);
2060     bb->AppendInsn(junk);
2061     FOR_BB_INSNS_SAFE(insn, bb, next) {
2062         if (insn->GetMachineOpcode() != MOP_pseudo_none) {
2063             bb->RemoveInsn(*insn);
2064         }
2065     }
2066 }
2067 
Run()2068 void AArch64GenProEpilog::Run()
2069 {
2070     CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label,
2071                 "The first statement should be a label");
2072     NeedStackProtect();
2073     cgFunc.SetHasProEpilogue(NeedProEpilog());
2074     if (cgFunc.GetHasProEpilogue()) {
2075         GenStackGuard(*(cgFunc.GetFirstBB()));
2076     }
2077     BB *proLog = nullptr;
2078     if (cgFunc.GetCG()->DoPrologueEpilogue() && Globals::GetInstance()->GetOptimLevel() == CGOptions::kLevel2) {
2079         /* There are some O2 dependent assumptions made */
2080         proLog = IsolateFastPath(*(cgFunc.GetFirstBB()));
2081     }
2082 
2083     if (cgFunc.IsExitBBsVecEmpty()) {
2084         if (cgFunc.GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc.GetCleanupLabel() &&
2085             cgFunc.GetLastBB()->GetPrev()->GetPrev()) {
2086             cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev()->GetPrev());
2087         } else {
2088             cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev());
2089         }
2090     }
2091 
2092     if (proLog != nullptr) {
2093         GenerateProlog(*proLog);
2094         proLog->SetFastPath(true);
2095         cgFunc.GetFirstBB()->SetFastPath(true);
2096     } else {
2097         GenerateProlog(*(cgFunc.GetFirstBB()));
2098     }
2099 
2100     for (auto *exitBB : cgFunc.GetExitBBsVec()) {
2101         if (GetFastPathReturnBB() != exitBB) {
2102             GenerateEpilog(*exitBB);
2103         }
2104     }
2105 
2106     if (cgFunc.GetFunction().IsJava()) {
2107         GenerateEpilogForCleanup(*(cgFunc.GetCleanupBB()));
2108     }
2109 
2110     if (cgFunc.GetMirModule().IsCModule() && !exitBB2CallSitesMap.empty()) {
2111         cgFunc.GetTheCFG()->InitInsnVisitor(cgFunc);
2112         for (auto pair : exitBB2CallSitesMap) {
2113             BB *curExitBB = pair.first;
2114             MapleSet<Insn *> &callInsnsMap = pair.second;
2115             SetCurTailcallExitBB(curExitBB);
2116             ConvertToTailCalls(callInsnsMap);
2117         }
2118     }
2119 }
2120 } /* namespace maplebe */
2121