• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "aarch64_proepilog.h"
17 #include "aarch64_cg.h"
18 
19 #define PROEPILOG_DUMP CG_DEBUG_FUNC(cgFunc)
20 namespace maplebe {
21 using namespace maple;
22 
23 namespace {
24 enum RegsPushPop : uint8 { kRegsPushOp, kRegsPopOp };
25 
26 enum PushPopType : uint8 { kPushPopSingle = 0, kPushPopPair = 1 };
27 
28 MOperator pushPopOps[kRegsPopOp + 1][kRegTyFloat + 1][kPushPopPair + 1] = {{
29                                                                                /* push */
30                                                                                {0}, /* undef */
31                                                                                {
32                                                                                    /* kRegTyInt */
33                                                                                    MOP_xstr, /* single */
34                                                                                    MOP_xstp, /* pair   */
35                                                                                },
36                                                                                {
37                                                                                    /* kRegTyFloat */
38                                                                                    MOP_dstr, /* single */
39                                                                                    MOP_dstp, /* pair   */
40                                                                                },
41                                                                            },
42                                                                            {
43                                                                                /* pop */
44                                                                                {0}, /* undef */
45                                                                                {
46                                                                                    /* kRegTyInt */
47                                                                                    MOP_xldr, /* single */
48                                                                                    MOP_xldp, /* pair   */
49                                                                                },
50                                                                                {
51                                                                                    /* kRegTyFloat */
52                                                                                    MOP_dldr, /* single */
53                                                                                    MOP_dldp, /* pair   */
54                                                                                },
55                                                                            }};
56 
AppendInstructionTo(Insn & insn,CGFunc & func)57 inline void AppendInstructionTo(Insn &insn, CGFunc &func)
58 {
59     func.GetCurBB()->AppendInsn(insn);
60 }
61 }  // namespace
62 
SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc & cgFunc,const MemOperand & mo,uint32 bitLen,AArch64reg baseRegNum)63 MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, const MemOperand &mo,
64                                                                                    uint32 bitLen, AArch64reg baseRegNum)
65 {
66     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
67     CHECK_FATAL(mo.GetAddrMode() == MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi");
68     OfstOperand *ofstOp = mo.GetOffsetImmediate();
69     int32 offsetVal = static_cast<int32>(ofstOp->GetOffsetValue());
70     CHECK_FATAL(offsetVal > 0, "offsetVal should be greater than 0");
71     CHECK_FATAL((static_cast<uint32>(offsetVal) & 0x7) == 0, "(offsetVal & 0x7) should be equal to 0");
72     /*
73      * Offset adjustment due to FP/SP has already been done
74      * in AArch64GenProEpilog::GeneratePushRegs() and AArch64GenProEpilog::GeneratePopRegs()
75      */
76     RegOperand &br = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(baseRegNum, bitLen, kRegTyInt);
77     ImmOperand &immAddEnd = aarchCGFunc.CreateImmOperand(offsetVal, k64BitSize, true);
78     RegOperand *origBaseReg = mo.GetBaseRegister();
79     aarchCGFunc.SelectAdd(br, *origBaseReg, immAddEnd, PTY_i64);
80 
81     return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, 0);
82 }
83 
AppendInstructionPushPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)84 void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
85                                                     int32 offset)
86 {
87     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
88     MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
89     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
90     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
91     Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
92 
93     uint32 dataSize = GetPointerBitSize();
94     CHECK_FATAL(offset >= 0, "offset must >= 0");
95     if (offset > kStpLdpImm64UpperBound) {
96         o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast<MemOperand *>(o2), dataSize, R16);
97     }
98     Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
99     // Identify that the instruction is not alias with any other memory instructions.
100     auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
101     memDefUse->SetIndependent();
102     pushInsn.SetReferenceOsts(memDefUse);
103     std::string comment = "SAVE CALLEE REGISTER PAIR";
104     pushInsn.SetComment(comment);
105     AppendInstructionTo(pushInsn, cgFunc);
106 }
107 
AppendInstructionPushSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)108 void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
109 {
110     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
111     MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle];
112     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerBitSize(), rty);
113     Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
114 
115     MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
116     uint32 dataSize = GetPointerBitSize();
117     if (aarchMemO1 != nullptr) {
118         if (aarchMemO1->GetMemVaryType() == kNotVary &&
119             aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
120             o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R16);
121         }
122     }
123 
124     Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
125     // Identify that the instruction is not alias with any other memory instructions.
126     auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
127     memDefUse->SetIndependent();
128     pushInsn.SetReferenceOsts(memDefUse);
129     std::string comment = "SAVE CALLEE REGISTER";
130     pushInsn.SetComment(comment);
131     AppendInstructionTo(pushInsn, cgFunc);
132 }
133 
AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty,bool isAllocate)134 Insn &AArch64GenProEpilog::AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
135                                                                              AArch64reg reg1, RegType rty,
136                                                                              bool isAllocate)
137 {
138     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
139     MOperator mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopPair];
140     uint8 size;
141     if (CGOptions::IsArm64ilp32()) {
142         size = k8ByteSize;
143     } else {
144         size = GetPointerSize();
145     }
146     if (fpToSpDistance <= kStrLdrImm64UpperBound - kOffset8MemPos) {
147         mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopSingle] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
148         MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), size * kBitsPerByte);
149         if (storeFP) {
150             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
151             Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o2);
152             AppendInstructionTo(insn1, cgFunc);
153         }
154         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
155         o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance + size), size * kBitsPerByte);
156         Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
157         AppendInstructionTo(insn2, cgFunc);
158         return insn2;
159     } else {
160         RegOperand &oo = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, size * kBitsPerByte, kRegTyInt);
161         ImmOperand &io1 = aarchCGFunc.CreateImmOperand(fpToSpDistance, k64BitSize, true);
162         aarchCGFunc.SelectCopyImm(oo, io1, PTY_i64);
163         RegOperand &rsp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, size * kBitsPerByte, kRegTyInt);
164         MemOperand *mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
165         if (storeFP) {
166             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
167             Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o0, *mo);
168             AppendInstructionTo(insn1, cgFunc);
169         }
170         ImmOperand &io2 = aarchCGFunc.CreateImmOperand(size, k64BitSize, true);
171         aarchCGFunc.SelectAdd(oo, oo, io2, PTY_i64);
172         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
173         mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
174         Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o1, *mo);
175         AppendInstructionTo(insn2, cgFunc);
176         return insn2;
177     }
178 }
179 
CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty)180 Insn &AArch64GenProEpilog::CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
181                                                                           AArch64reg reg1, RegType rty)
182 {
183     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
184     MOperator mOp = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
185                         ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
186                         : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
187     Insn *allocInsn = nullptr;
188     if (fpToSpDistance > kStpLdpImm64UpperBound) {
189         allocInsn = &AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, true);
190     } else {
191         Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
192         Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
193         Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
194         allocInsn = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
195                         ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
196                         : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
197         AppendInstructionTo(*allocInsn, cgFunc);
198     }
199     return *allocInsn;
200 }
201 
AppendInstructionAllocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)202 void AArch64GenProEpilog::AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
203 {
204     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
205     CG *currCG = cgFunc.GetCG();
206     if (currCG->GenerateVerboseCG()) {
207         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
208     }
209 
210     Insn *ipoint = nullptr;
211     /*
212      * stackFrameSize includes the size of args to stack-pass
213      * if a function has neither VLA nor alloca.
214      */
215     int32 stackFrameSize =
216         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
217     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
218     /*
219      * ldp/stp's imm should be within -512 and 504;
220      * if stp's imm > 512, we fall back to the stp-sub version
221      */
222     bool useStpSub = false;
223     int64 offset = 0;
224     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
225         /*
226          * stack_frame_size == size of formal parameters + callee-saved (including FP/RL)
227          *                     + size of local vars
228          *                     + size of actuals
229          * (when passing more than 8 args, its caller's responsibility to
230          *  allocate space for it. size of actuals represent largest such size in the function.
231          */
232         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
233         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
234         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
235         ipoint = cgFunc.GetCurBB()->GetLastInsn();
236     } else {
237         if (stackFrameSize > kStpLdpImm64UpperBound) {
238             useStpSub = true;
239             offset = kOffset16MemPos;
240             stackFrameSize -= offset;
241         } else {
242             offset = stackFrameSize;
243         }
244         MOperator mOp = (storeFP || offset > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
245                                                                        : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
246         RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
247         RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
248         MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(static_cast<int32>(-offset), GetPointerBitSize());
249         ipoint = (storeFP || offset > kStrLdrPerPostUpperBound) ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
250                                                                 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
251         AppendInstructionTo(*ipoint, cgFunc);
252     }
253 
254     ipoint->SetStackDef(true);
255 
256     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
257         CHECK_FATAL(!useStpSub, "Invalid assumption");
258         ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
259     }
260 
261     CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
262     if (useStpSub) {
263         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
264         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
265         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
266         ipoint = cgFunc.GetCurBB()->GetLastInsn();
267         aarchCGFunc.SetUsedStpSubPairForCallFrameAllocation(true);
268         ipoint->SetStackDef(true);
269     }
270 }
271 
AppendInstructionAllocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)272 void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
273 {
274     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
275     CG *currCG = cgFunc.GetCG();
276     if (currCG->GenerateVerboseCG()) {
277         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame for debugging"));
278     }
279 
280     int32 stackFrameSize =
281         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
282     int64 fpToSpDistance =
283         (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
284 
285     Insn *ipoint = nullptr;
286 
287     if (fpToSpDistance > 0) {
288         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
289         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
290         aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
291         ipoint = cgFunc.GetCurBB()->GetLastInsn();
292         ipoint->SetStackDef(true);
293         ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
294         CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
295     } else {
296         bool useStpSub = false;
297 
298         if (stackFrameSize > kStpLdpImm64UpperBound) {
299             useStpSub = true;
300             RegOperand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
301             ImmOperand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
302             aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
303             ipoint = cgFunc.GetCurBB()->GetLastInsn();
304             ipoint->SetStackDef(true);
305         } else {
306             MOperator mOp = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
307                                 ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
308                                 : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
309             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
310             RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
311             MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-stackFrameSize, GetPointerBitSize());
312             ipoint = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
313                          ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
314                          : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
315             AppendInstructionTo(*ipoint, cgFunc);
316             ipoint->SetStackDef(true);
317         }
318 
319         if (useStpSub) {
320             MOperator mOp =
321                 storeFP ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
322             RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
323             RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
324             MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, 0, GetPointerBitSize());
325             ipoint = storeFP ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
326                              : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
327             AppendInstructionTo(*ipoint, cgFunc);
328         }
329     }
330 }
331 
GenerateFrameTypeSave(SaveInfo & frameTypeInfo,int32 stackSize,int64 fpToSpDistance)332 void AArch64GenProEpilog::GenerateFrameTypeSave(SaveInfo& frameTypeInfo, int32 stackSize, int64 fpToSpDistance)
333 {
334     if (!frameTypeInfo.shouldSave) {
335         return;
336     }
337     CHECK_FATAL(frameTypeInfo.offset < 0, "must be!!");
338     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
339     //mov
340     auto &x10Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R10, k64BitSize, kRegTyInt);
341     auto immOpnd = &aarchCGFunc.CreateImmOperand(frameTypeInfo.idx, k64BitSize, true);
342     aarchCGFunc.SelectCopyImm(x10Opnd, *immOpnd, PTY_i64);
343     // store
344     Operand *o1 = aarchCGFunc.CreateStackMemOpnd(RSP,
345         static_cast<uint32>(frameTypeInfo.offset) + fpToSpDistance, GetPointerBitSize());
346     MemOperand *mem = static_cast<MemOperand *>(o1);
347     uint32 dataSize = GetPointerBitSize();
348     if (mem->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*mem, dataSize)) {
349         o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*mem, dataSize, R16);
350     }
351 
352     Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(MOP_xstr, x10Opnd, *o1);
353     AppendInstructionTo(pushInsn, cgFunc);
354 }
355 
GenerateFunctionSave(SaveInfo & funcInfo,int32 stackSize,int64 fpToSpDistance)356 void AArch64GenProEpilog::GenerateFunctionSave(SaveInfo& funcInfo, int32 stackSize, int64 fpToSpDistance)
357 {
358     if (!funcInfo.shouldSave) {
359         return;
360     }
361     CHECK_FATAL(funcInfo.offset < 0, "must be!!");
362     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
363     auto &mirFunc = aarchCGFunc.GetFunction();
364     CCLocInfo ploc;
365     CCImpl &parmlocator = *aarchCGFunc.GetOrCreateLocator(CCImpl::GetCallConvKind(aarchCGFunc.GetFunction()));
366     auto &x10Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R10, k64BitSize, kRegTyInt);
367     CHECK_FATAL(static_cast<size_t>(funcInfo.idx) <= mirFunc.GetFormalCount(), "should be in range");
368     for (size_t i = 0; i < mirFunc.GetFormalCount(); ++i) {
369         MIRType *ty = mirFunc.GetNthParamType(i);
370         parmlocator.LocateNextParm(*ty, ploc, (i == 0), mirFunc.GetMIRFuncType());
371         if (i != static_cast<size_t>(funcInfo.idx)) {
372             continue;
373         }
374         if (ploc.reg0 == kRinvalid) {
375             Operand* o1 = aarchCGFunc.CreateStackMemOpnd(RSP, ploc.memOffset + stackSize, k64BitSize);
376             uint32 dataSize = GetPointerBitSize();
377             if (ploc.memOffset + stackSize > kStpLdpImm64UpperBound) {
378                 o1 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc,
379                     static_cast<MemOperand &>(*o1), dataSize, R16);
380             }
381             Insn &ldrInsn = cgFunc.GetInsnBuilder()->BuildInsn(MOP_xldr, x10Opnd, *o1);
382             AppendInstructionTo(ldrInsn, cgFunc);
383         } else {
384             auto &funcOpnd =
385                 aarchCGFunc.GetOrCreatePhysicalRegisterOperand((AArch64reg)ploc.GetReg0(), k64BitSize, kRegTyInt);
386             aarchCGFunc.SelectCopy(x10Opnd, ploc.GetPrimTypeOfReg0(), funcOpnd, ploc.GetPrimTypeOfReg0());
387         }
388     }
389     Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP,
390         static_cast<uint32>(funcInfo.offset) + fpToSpDistance, GetPointerBitSize());
391     MemOperand *mem2 = static_cast<MemOperand *>(o2);
392     uint32 dataSize = GetPointerBitSize();
393     if (mem2->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*mem2, dataSize)) {
394         o2 = &aarchCGFunc.SplitOffsetWithAddInstruction(*mem2, dataSize, R16);
395     }
396     Insn &pushInsn2 = cgFunc.GetInsnBuilder()->BuildInsn(MOP_xstr, x10Opnd, *o2);
397     AppendInstructionTo(pushInsn2, cgFunc);
398 }
399 
GenerateSave()400 void AArch64GenProEpilog::GenerateSave()
401 {
402     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
403     auto &mirFunc = aarchCGFunc.GetFunction();
404     auto &frameTypeInfo =  mirFunc.GetFrameTypeInfo();
405     auto &funcInfo = mirFunc.GetFuncInfo();
406 
407     int32 stackFrameSize =
408         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
409     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
410     GenerateFrameTypeSave(frameTypeInfo, stackFrameSize, fpToSpDistance);
411     GenerateFunctionSave(funcInfo, stackFrameSize, fpToSpDistance);
412 }
413 
414 /*
415  *  From AArch64 Reference Manual
416  *  C1.3.3 Load/Store Addressing Mode
417  *  ...
418  *  When stack alignment checking is enabled by system software and
419  *  the base register is the SP, the current stack pointer must be
420  *  initially quadword aligned, that is aligned to 16 bytes. Misalignment
421  *  generates a Stack Alignment fault.  The offset does not have to
422  *  be a multiple of 16 bytes unless the specific Load/Store instruction
423  *  requires this. SP cannot be used as a register offset.
424  */
GeneratePushRegs()425 void AArch64GenProEpilog::GeneratePushRegs()
426 {
427     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
428     CG *currCG = cgFunc.GetCG();
429     const MapleVector<AArch64reg> &regsToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
430                                                     ? aarchCGFunc.GetCalleeSavedRegs()
431                                                     : aarchCGFunc.GetProEpilogSavedRegs();
432 
433     CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?");
434 
435     AArch64reg intRegFirstHalf = kRinvalid;
436     AArch64reg fpRegFirstHalf = kRinvalid;
437 
438     if (currCG->GenerateVerboseCG()) {
439         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("save callee-saved registers"));
440     }
441 
442     /*
443      * Even if we don't use RFP, since we push a pair of registers in one instruction
444      * and the stack needs be aligned on a 16-byte boundary, push RFP as well if function has a call
445      * Make sure this is reflected when computing callee_saved_regs.size()
446      */
447     if (!currCG->GenerateDebugFriendlyCode()) {
448         AppendInstructionAllocateCallFrame(R29, RLR, kRegTyInt);
449     } else {
450         AppendInstructionAllocateCallFrameDebug(R29, RLR, kRegTyInt);
451     }
452     GenerateSave();
453     if (useFP) {
454         if (currCG->GenerateVerboseCG()) {
455             cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
456         }
457         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
458         Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
459         int64 fpToSpDistance =
460             (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
461         bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
462         if ((fpToSpDistance > 0) || isLmbc) {
463             Operand *immOpnd;
464             if (isLmbc) {
465                 int32 size =
466                     static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
467                 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
468             } else {
469                 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
470             }
471             if (!isLmbc || cgFunc.SeenFP() || cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
472                 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
473             }
474             cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
475         } else {
476             aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
477             cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
478         }
479     }
480 
481     MapleVector<AArch64reg>::const_iterator it = regsToSave.begin();
482     // skip the RFP & RLR
483     if (*it == RFP) {
484         ++it;
485     }
486     CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
487     ++it;
488 
489     // callee save offset
490     // fp - callee save base = RealStackFrameSize - [GR,16] - [VR,16] - [cold,16] - [callee] - stack protect + 16(fplr)
491     AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
492     int32 offset = 0;
493     if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
494         offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
495                                     memLayout->GetSizeOfLocals());
496     } else {
497         offset = (static_cast<int32>(memLayout->RealStackFrameSize()) -
498                   static_cast<int32>(static_cast<int32>(aarchCGFunc.SizeOfCalleeSaved()) -
499                                      (kDivide2 * static_cast<int32>(kAarch64IntregBytelen))) - /* for FP/LR */
500                   static_cast<int32>(memLayout->SizeOfArgsToStackPass()) -
501                   static_cast<int32>(cgFunc.GetFunction().GetFrameReseverdSlot()));
502     }
503 
504     offset -= static_cast<int32>(RoundUp(memLayout->GetSizeOfSegCold(), k16BitSize));
505 
506     std::vector<std::pair<uint16, int32>> calleeRegAndOffsetVec;
507     for (; it != regsToSave.end(); ++it) {
508         AArch64reg reg = *it;
509         // skip the RFP
510         if (reg == RFP) {
511             continue;
512         }
513         CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
514         RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
515         AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
516         if (firstHalf == kRinvalid) {
517             /* remember it */
518             firstHalf = reg;
519         } else {
520             uint16 reg0NO = (regType == kRegTyInt) ?
521                 static_cast<uint16>(firstHalf - 1) : static_cast<uint16>(firstHalf  - V8 + 72);
522             uint16 reg1NO = (regType == kRegTyInt) ? static_cast<uint16>(reg - 1) : static_cast<uint16>(reg - V8 + 72);
523             calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(reg0NO, offset));
524             calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(reg1NO, offset + k8ByteSize));
525             AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset);
526             AArch64isa::GetNextOffsetCalleeSaved(offset);
527             firstHalf = kRinvalid;
528         }
529     }
530 
531     if (intRegFirstHalf != kRinvalid) {
532         uint16 regNO = static_cast<uint16>(intRegFirstHalf - 1);
533         calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
534         AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
535         AArch64isa::GetNextOffsetCalleeSaved(offset);
536     }
537 
538     if (fpRegFirstHalf != kRinvalid) {
539         uint16 regNO = static_cast<uint16>(fpRegFirstHalf - V8 + 72);
540         calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
541         AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
542         AArch64isa::GetNextOffsetCalleeSaved(offset);
543     }
544 
545     const auto &emitMemoryManager = CGOptions::GetInstance().GetEmitMemoryManager();
546     if (emitMemoryManager.codeSpace != nullptr) {
547         emitMemoryManager.funcCalleeOffsetSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), calleeRegAndOffsetVec);
548         int64 fpToCurSpDistance =
549             (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
550         int32 fp2PrevFrameSPDelta =
551             static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize()) -
552 	        fpToCurSpDistance;
553         emitMemoryManager.funcFpSPDeltaSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), fp2PrevFrameSPDelta);
554     }
555 }
556 
GenerateProlog(BB & bb)557 void AArch64GenProEpilog::GenerateProlog(BB &bb)
558 {
559     if (!cgFunc.GetHasProEpilogue()) {
560         return;
561     }
562     if (PROEPILOG_DUMP) {
563         LogInfo::MapleLogger() << "generate prolog at BB " << bb.GetId() << "\n";
564     }
565 
566     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
567     CG *currCG = cgFunc.GetCG();
568     BB *formerCurBB = cgFunc.GetCurBB();
569     aarchCGFunc.GetDummyBB()->ClearInsns();
570     cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
571 
572     const MapleVector<AArch64reg> &regsToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
573                                                     ? aarchCGFunc.GetCalleeSavedRegs()
574                                                     : aarchCGFunc.GetProEpilogSavedRegs();
575     if (!regsToSave.empty()) {
576         /*
577          * Among other things, push the FP & LR pair.
578          * FP/LR are added to the callee-saved list in AllocateRegisters()
579          * We add them to the callee-saved list regardless of UseFP() being true/false.
580          * Activation Frame is allocated as part of pushing FP/LR pair
581          */
582         GeneratePushRegs();
583     } else {
584         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
585         int32 stackFrameSize =
586             static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
587         if (stackFrameSize > 0) {
588             if (currCG->GenerateVerboseCG()) {
589                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
590             }
591             Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
592             aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
593             cgFunc.GetCurBB()->GetLastInsn()->SetStackDef(true);
594         }
595         if (currCG->GenerateVerboseCG()) {
596             cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
597         }
598         if (useFP) {
599             Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
600             bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
601             int64 fpToSpDistance =
602                 cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
603             if ((fpToSpDistance > 0) || isLmbc) {
604                 Operand *immOpnd;
605                 if (isLmbc) {
606                     int32 size = static_cast<int32>(
607                         static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
608                     immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
609                 } else {
610                     immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
611                 }
612                 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
613                 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
614             } else {
615                 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
616                 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
617             }
618         }
619     }
620     bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
621     cgFunc.SetCurBB(*formerCurBB);
622 }
623 
GenerateRet(BB & bb)624 void AArch64GenProEpilog::GenerateRet(BB &bb)
625 {
626     auto *lastInsn = bb.GetLastMachineInsn();
627     if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
628         return;
629     }
630     bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_xret));
631 }
632 
AppendInstructionPopSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)633 void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
634 {
635     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
636     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle];
637     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerBitSize(), rty);
638     Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
639     MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
640     uint32 dataSize = GetPointerBitSize();
641     if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
642         o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R16);
643     }
644 
645     Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
646     // Identify that the instruction is not alias with any other memory instructions.
647     auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
648     memDefUse->SetIndependent();
649     popInsn.SetReferenceOsts(memDefUse);
650     popInsn.SetComment("RESTORE");
651     cgFunc.GetCurBB()->AppendInsn(popInsn);
652 }
653 
AppendInstructionPopPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)654 void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
655                                                    int32 offset)
656 {
657     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
658     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
659     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
660     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
661     Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
662 
663     uint32 dataSize = GetPointerBitSize();
664     CHECK_FATAL(offset >= 0, "offset must >= 0");
665     if (offset > kStpLdpImm64UpperBound) {
666         o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, static_cast<MemOperand &>(*o2), dataSize, R16);
667     }
668     Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
669     // Identify that the instruction is not alias with any other memory instructions.
670     auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
671     memDefUse->SetIndependent();
672     popInsn.SetReferenceOsts(memDefUse);
673     popInsn.SetComment("RESTORE RESTORE");
674     cgFunc.GetCurBB()->AppendInsn(popInsn);
675 }
676 
AppendInstructionDeallocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)677 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
678 {
679     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
680     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
681     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
682     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
683     int32 stackFrameSize =
684         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
685     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
686     /*
687      * ldp/stp's imm should be within -512 and 504;
688      * if ldp's imm > 504, we fall back to the ldp-add version
689      */
690     bool useLdpAdd = false;
691     int32 offset = 0;
692 
693     Operand *o2 = nullptr;
694     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
695         o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
696     } else {
697         if (stackFrameSize > kStpLdpImm64UpperBound) {
698             useLdpAdd = true;
699             offset = kOffset16MemPos;
700             stackFrameSize -= offset;
701         } else {
702             offset = stackFrameSize;
703         }
704         o2 = &aarchCGFunc.CreateCallFrameOperand(offset, GetPointerBitSize());
705     }
706 
707     if (useLdpAdd) {
708         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
709         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
710         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
711     }
712 
713     if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
714         CHECK_FATAL(!useLdpAdd, "Invalid assumption");
715         if (fpToSpDistance > kStpLdpImm64UpperBound) {
716             (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
717         } else {
718             Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
719             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
720         }
721         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
722         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
723         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
724     } else {
725         Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
726         cgFunc.GetCurBB()->AppendInsn(deallocInsn);
727     }
728     cgFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
729 }
730 
AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)731 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
732 {
733     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
734     MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
735     Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
736     Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
737     int32 stackFrameSize =
738         static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
739     int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
740     /*
741      * ldp/stp's imm should be within -512 and 504;
742      * if ldp's imm > 504, we fall back to the ldp-add version
743      */
744     bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc);
745     if (cgFunc.HasVLAOrAlloca() || fpToSpDistance == 0 || isLmbc) {
746         int32 lmbcOffset = 0;
747         if (!isLmbc) {
748             stackFrameSize -= fpToSpDistance;
749         } else {
750             lmbcOffset = fpToSpDistance - (kDivide2 * k8ByteSize);
751         }
752         if (stackFrameSize > kStpLdpImm64UpperBound || isLmbc) {
753             Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, (isLmbc ? lmbcOffset : 0), GetPointerBitSize());
754             mOp = storeFP ? pushPopOps[kRegsPopOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
755             Insn &deallocInsn = storeFP ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
756                                         : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
757             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
758             Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
759             Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
760             aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
761         } else {
762             MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(stackFrameSize, GetPointerBitSize());
763             mOp = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPopOp][rty][kPushPopPair]
764                                                                          : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
765             Insn &deallocInsn = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
766                                     ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
767                                     : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
768             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
769         }
770     } else {
771         Operand *o2 =
772             aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
773         if (fpToSpDistance > kStpLdpImm64UpperBound) {
774             (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
775         } else {
776             mOp = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPopOp][rty][kPushPopPair]
777                                                                          : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
778             Insn &deallocInsn = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
779                                     ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
780                                     : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
781             cgFunc.GetCurBB()->AppendInsn(deallocInsn);
782         }
783 
784         Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
785         Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
786         aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
787     }
788     cgFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
789 }
790 
GeneratePopRegs()791 void AArch64GenProEpilog::GeneratePopRegs()
792 {
793     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
794     CG *currCG = cgFunc.GetCG();
795 
796     const MapleVector<AArch64reg> &regsToRestore = (aarchCGFunc.GetProEpilogSavedRegs().empty())
797                                                        ? aarchCGFunc.GetCalleeSavedRegs()
798                                                        : aarchCGFunc.GetProEpilogSavedRegs();
799 
800     CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?");
801 
802     AArch64reg intRegFirstHalf = kRinvalid;
803     AArch64reg fpRegFirstHalf = kRinvalid;
804 
805     if (currCG->GenerateVerboseCG()) {
806         cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("restore callee-saved registers"));
807     }
808 
809     MapleVector<AArch64reg>::const_iterator it = regsToRestore.begin();
810     /*
811      * Even if we don't use FP, since we push a pair of registers
812      * in a single instruction (i.e., stp) and the stack needs be aligned
813      * on a 16-byte boundary, push FP as well if the function has a call.
814      * Make sure this is reflected when computing calleeSavedRegs.size()
815      * skip the first two registers
816      */
817     // skip the RFP & RLR
818     if (*it == RFP) {
819         ++it;
820     }
821     CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
822     ++it;
823 
824     AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
825     int32 offset;
826     if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
827         offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
828                                     memLayout->GetSizeOfLocals());
829     } else {
830         offset = (static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() -
831                   (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kAarch64IntregBytelen))) -
832                  memLayout->SizeOfArgsToStackPass() -
833                  cgFunc.GetFunction().GetFrameReseverdSlot();
834     }
835     offset -= static_cast<int32>(RoundUp(memLayout->GetSizeOfSegCold(), k16BitSize));
836 
837     /*
838      * We are using a cleared dummy block; so insertPoint cannot be ret;
839      * see GenerateEpilog()
840      */
841     for (; it != regsToRestore.end(); ++it) {
842         AArch64reg reg = *it;
843         if (reg == RFP) {
844             continue;
845         }
846         CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
847 
848         RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
849         AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
850         if (firstHalf == kRinvalid) {
851             /* remember it */
852             firstHalf = reg;
853         } else {
854             /* flush the pair */
855             AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset);
856             AArch64isa::GetNextOffsetCalleeSaved(offset);
857             firstHalf = kRinvalid;
858         }
859     }
860 
861     if (intRegFirstHalf != kRinvalid) {
862         AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
863         AArch64isa::GetNextOffsetCalleeSaved(offset);
864     }
865 
866     if (fpRegFirstHalf != kRinvalid) {
867         AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
868         AArch64isa::GetNextOffsetCalleeSaved(offset);
869     }
870 
871     if (!currCG->GenerateDebugFriendlyCode()) {
872         AppendInstructionDeallocateCallFrame(R29, RLR, kRegTyInt);
873     } else {
874         AppendInstructionDeallocateCallFrameDebug(R29, RLR, kRegTyInt);
875     }
876 }
877 
AppendBBtoEpilog(BB & epilogBB,BB & newBB)878 void AArch64GenProEpilog::AppendBBtoEpilog(BB &epilogBB, BB &newBB)
879 {
880     FOR_BB_INSNS(insn, &newBB)
881     {
882         insn->SetDoNotRemove(true);
883     }
884     auto *lastInsn = epilogBB.GetLastMachineInsn();
885     if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
886         epilogBB.RemoveInsn(*lastInsn);
887         epilogBB.AppendBBInsns(newBB);
888         epilogBB.AppendInsn(*lastInsn);
889     } else {
890         epilogBB.AppendBBInsns(newBB);
891     }
892 }
893 
GenerateEpilog(BB & bb)894 void AArch64GenProEpilog::GenerateEpilog(BB &bb)
895 {
896     if (!cgFunc.GetHasProEpilogue()) {
897         return;
898     }
899     if (PROEPILOG_DUMP) {
900         LogInfo::MapleLogger() << "generate epilog at BB " << bb.GetId() << "\n";
901     }
902 
903     auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
904     CG *currCG = cgFunc.GetCG();
905     BB *formerCurBB = cgFunc.GetCurBB();
906     aarchCGFunc.GetDummyBB()->ClearInsns();
907     cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
908 
909     Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
910     Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
911 
912     if (cgFunc.HasVLAOrAlloca() && cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
913         aarchCGFunc.SelectCopy(spOpnd, PTY_u64, fpOpnd, PTY_u64);
914     }
915 
916     const MapleVector<AArch64reg> &regsToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
917                                                     ? aarchCGFunc.GetCalleeSavedRegs()
918                                                     : aarchCGFunc.GetProEpilogSavedRegs();
919     if (!regsToSave.empty()) {
920         GeneratePopRegs();
921     } else {
922         auto stackFrameSize = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize();
923         if (stackFrameSize > 0) {
924             if (currCG->GenerateVerboseCG()) {
925                 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("pop up activation frame"));
926             }
927 
928             if (cgFunc.HasVLAOrAlloca()) {
929                 auto size = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSegArgsToStkPass().GetSize();
930                 stackFrameSize = stackFrameSize < size ? 0 : stackFrameSize - size;
931             }
932 
933             if (stackFrameSize > 0) {
934                 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
935                 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
936                 aarchCGFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
937             }
938         }
939     }
940 
941     AppendBBtoEpilog(bb, *cgFunc.GetCurBB());
942     if (cgFunc.GetCurBB()->GetHasCfi()) {
943         bb.SetHasCfi();
944     }
945 
946     cgFunc.SetCurBB(*formerCurBB);
947 }
948 
Run()949 void AArch64GenProEpilog::Run()
950 {
951     CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label,
952                 "The first statement should be a label");
953     if (cgFunc.GetFunction().IsDeoptFunc()) {   // deopt function does not need prologue/epilogue
954         return;
955     }
956     // update exitBB
957     if (cgFunc.IsExitBBsVecEmpty()) {
958         cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev());
959     }
960     cgFunc.SetHasProEpilogue(true);
961 
962     // not run proepilog analysis or analysis failed, insert proepilog at firstBB and exitBB
963     GenerateProlog(*(cgFunc.GetFirstBB()));
964     for (auto *exitBB : cgFunc.GetExitBBsVec()) {
965         GenerateEpilog(*exitBB);
966     }
967 
968     // insert ret insn for exitBB
969     for (auto *exitBB : cgFunc.GetExitBBsVec()) {
970         if (cgFunc.GetHasProEpilogue()) {
971             GenerateRet(*exitBB);
972         }
973     }
974 }
975 } /* namespace maplebe */
976