1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "aarch64_proepilog.h"
17 #include "aarch64_cg.h"
18
19 #define PROEPILOG_DUMP CG_DEBUG_FUNC(cgFunc)
20 namespace maplebe {
21 using namespace maple;
22
23 namespace {
24 enum RegsPushPop : uint8 { kRegsPushOp, kRegsPopOp };
25
26 enum PushPopType : uint8 { kPushPopSingle = 0, kPushPopPair = 1 };
27
28 MOperator pushPopOps[kRegsPopOp + 1][kRegTyFloat + 1][kPushPopPair + 1] = {{
29 /* push */
30 {0}, /* undef */
31 {
32 /* kRegTyInt */
33 MOP_xstr, /* single */
34 MOP_xstp, /* pair */
35 },
36 {
37 /* kRegTyFloat */
38 MOP_dstr, /* single */
39 MOP_dstp, /* pair */
40 },
41 },
42 {
43 /* pop */
44 {0}, /* undef */
45 {
46 /* kRegTyInt */
47 MOP_xldr, /* single */
48 MOP_xldp, /* pair */
49 },
50 {
51 /* kRegTyFloat */
52 MOP_dldr, /* single */
53 MOP_dldp, /* pair */
54 },
55 }};
56
AppendInstructionTo(Insn & insn,CGFunc & func)57 inline void AppendInstructionTo(Insn &insn, CGFunc &func)
58 {
59 func.GetCurBB()->AppendInsn(insn);
60 }
61 } // namespace
62
SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc & cgFunc,const MemOperand & mo,uint32 bitLen,AArch64reg baseRegNum)63 MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, const MemOperand &mo,
64 uint32 bitLen, AArch64reg baseRegNum)
65 {
66 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
67 CHECK_FATAL(mo.GetAddrMode() == MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi");
68 OfstOperand *ofstOp = mo.GetOffsetImmediate();
69 int32 offsetVal = static_cast<int32>(ofstOp->GetOffsetValue());
70 CHECK_FATAL(offsetVal > 0, "offsetVal should be greater than 0");
71 CHECK_FATAL((static_cast<uint32>(offsetVal) & 0x7) == 0, "(offsetVal & 0x7) should be equal to 0");
72 /*
73 * Offset adjustment due to FP/SP has already been done
74 * in AArch64GenProEpilog::GeneratePushRegs() and AArch64GenProEpilog::GeneratePopRegs()
75 */
76 RegOperand &br = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(baseRegNum, bitLen, kRegTyInt);
77 ImmOperand &immAddEnd = aarchCGFunc.CreateImmOperand(offsetVal, k64BitSize, true);
78 RegOperand *origBaseReg = mo.GetBaseRegister();
79 aarchCGFunc.SelectAdd(br, *origBaseReg, immAddEnd, PTY_i64);
80
81 return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, 0);
82 }
83
AppendInstructionPushPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)84 void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
85 int32 offset)
86 {
87 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
88 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
89 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
90 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
91 Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
92
93 uint32 dataSize = GetPointerBitSize();
94 CHECK_FATAL(offset >= 0, "offset must >= 0");
95 if (offset > kStpLdpImm64UpperBound) {
96 o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast<MemOperand *>(o2), dataSize, R16);
97 }
98 Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
99 // Identify that the instruction is not alias with any other memory instructions.
100 auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
101 memDefUse->SetIndependent();
102 pushInsn.SetReferenceOsts(memDefUse);
103 std::string comment = "SAVE CALLEE REGISTER PAIR";
104 pushInsn.SetComment(comment);
105 AppendInstructionTo(pushInsn, cgFunc);
106 }
107
AppendInstructionPushSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)108 void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
109 {
110 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
111 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle];
112 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerBitSize(), rty);
113 Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
114
115 MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
116 uint32 dataSize = GetPointerBitSize();
117 if (aarchMemO1 != nullptr) {
118 if (aarchMemO1->GetMemVaryType() == kNotVary &&
119 aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
120 o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R16);
121 }
122 }
123
124 Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
125 // Identify that the instruction is not alias with any other memory instructions.
126 auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
127 memDefUse->SetIndependent();
128 pushInsn.SetReferenceOsts(memDefUse);
129 std::string comment = "SAVE CALLEE REGISTER";
130 pushInsn.SetComment(comment);
131 AppendInstructionTo(pushInsn, cgFunc);
132 }
133
AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty,bool isAllocate)134 Insn &AArch64GenProEpilog::AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
135 AArch64reg reg1, RegType rty,
136 bool isAllocate)
137 {
138 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
139 MOperator mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopPair];
140 uint8 size;
141 if (CGOptions::IsArm64ilp32()) {
142 size = k8ByteSize;
143 } else {
144 size = GetPointerSize();
145 }
146 if (fpToSpDistance <= kStrLdrImm64UpperBound - kOffset8MemPos) {
147 mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopSingle] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
148 MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), size * kBitsPerByte);
149 if (storeFP) {
150 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
151 Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o2);
152 AppendInstructionTo(insn1, cgFunc);
153 }
154 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
155 o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance + size), size * kBitsPerByte);
156 Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
157 AppendInstructionTo(insn2, cgFunc);
158 return insn2;
159 } else {
160 RegOperand &oo = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, size * kBitsPerByte, kRegTyInt);
161 ImmOperand &io1 = aarchCGFunc.CreateImmOperand(fpToSpDistance, k64BitSize, true);
162 aarchCGFunc.SelectCopyImm(oo, io1, PTY_i64);
163 RegOperand &rsp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, size * kBitsPerByte, kRegTyInt);
164 MemOperand *mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
165 if (storeFP) {
166 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
167 Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o0, *mo);
168 AppendInstructionTo(insn1, cgFunc);
169 }
170 ImmOperand &io2 = aarchCGFunc.CreateImmOperand(size, k64BitSize, true);
171 aarchCGFunc.SelectAdd(oo, oo, io2, PTY_i64);
172 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
173 mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
174 Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o1, *mo);
175 AppendInstructionTo(insn2, cgFunc);
176 return insn2;
177 }
178 }
179
CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty)180 Insn &AArch64GenProEpilog::CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
181 AArch64reg reg1, RegType rty)
182 {
183 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
184 MOperator mOp = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
185 ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
186 : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
187 Insn *allocInsn = nullptr;
188 if (fpToSpDistance > kStpLdpImm64UpperBound) {
189 allocInsn = &AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, true);
190 } else {
191 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
192 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
193 Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
194 allocInsn = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
195 ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
196 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
197 AppendInstructionTo(*allocInsn, cgFunc);
198 }
199 return *allocInsn;
200 }
201
AppendInstructionAllocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)202 void AArch64GenProEpilog::AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
203 {
204 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
205 CG *currCG = cgFunc.GetCG();
206 if (currCG->GenerateVerboseCG()) {
207 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
208 }
209
210 Insn *ipoint = nullptr;
211 /*
212 * stackFrameSize includes the size of args to stack-pass
213 * if a function has neither VLA nor alloca.
214 */
215 int32 stackFrameSize =
216 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
217 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
218 /*
219 * ldp/stp's imm should be within -512 and 504;
220 * if stp's imm > 512, we fall back to the stp-sub version
221 */
222 bool useStpSub = false;
223 int64 offset = 0;
224 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
225 /*
226 * stack_frame_size == size of formal parameters + callee-saved (including FP/RL)
227 * + size of local vars
228 * + size of actuals
229 * (when passing more than 8 args, its caller's responsibility to
230 * allocate space for it. size of actuals represent largest such size in the function.
231 */
232 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
233 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
234 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
235 ipoint = cgFunc.GetCurBB()->GetLastInsn();
236 } else {
237 if (stackFrameSize > kStpLdpImm64UpperBound) {
238 useStpSub = true;
239 offset = kOffset16MemPos;
240 stackFrameSize -= offset;
241 } else {
242 offset = stackFrameSize;
243 }
244 MOperator mOp = (storeFP || offset > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
245 : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
246 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
247 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
248 MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(static_cast<int32>(-offset), GetPointerBitSize());
249 ipoint = (storeFP || offset > kStrLdrPerPostUpperBound) ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
250 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
251 AppendInstructionTo(*ipoint, cgFunc);
252 }
253
254 ipoint->SetStackDef(true);
255
256 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
257 CHECK_FATAL(!useStpSub, "Invalid assumption");
258 ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
259 }
260
261 CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
262 if (useStpSub) {
263 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
264 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
265 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
266 ipoint = cgFunc.GetCurBB()->GetLastInsn();
267 aarchCGFunc.SetUsedStpSubPairForCallFrameAllocation(true);
268 ipoint->SetStackDef(true);
269 }
270 }
271
AppendInstructionAllocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)272 void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
273 {
274 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
275 CG *currCG = cgFunc.GetCG();
276 if (currCG->GenerateVerboseCG()) {
277 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame for debugging"));
278 }
279
280 int32 stackFrameSize =
281 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
282 int64 fpToSpDistance =
283 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
284
285 Insn *ipoint = nullptr;
286
287 if (fpToSpDistance > 0) {
288 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
289 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
290 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
291 ipoint = cgFunc.GetCurBB()->GetLastInsn();
292 ipoint->SetStackDef(true);
293 ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
294 CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
295 } else {
296 bool useStpSub = false;
297
298 if (stackFrameSize > kStpLdpImm64UpperBound) {
299 useStpSub = true;
300 RegOperand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
301 ImmOperand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
302 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
303 ipoint = cgFunc.GetCurBB()->GetLastInsn();
304 ipoint->SetStackDef(true);
305 } else {
306 MOperator mOp = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
307 ? pushPopOps[kRegsPushOp][rty][kPushPopPair]
308 : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
309 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
310 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
311 MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-stackFrameSize, GetPointerBitSize());
312 ipoint = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
313 ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
314 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
315 AppendInstructionTo(*ipoint, cgFunc);
316 ipoint->SetStackDef(true);
317 }
318
319 if (useStpSub) {
320 MOperator mOp =
321 storeFP ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPushOp][rty][kPushPopSingle];
322 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
323 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
324 MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, 0, GetPointerBitSize());
325 ipoint = storeFP ? &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
326 : &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
327 AppendInstructionTo(*ipoint, cgFunc);
328 }
329 }
330 }
331
GenerateFrameTypeSave(SaveInfo & frameTypeInfo,int32 stackSize,int64 fpToSpDistance)332 void AArch64GenProEpilog::GenerateFrameTypeSave(SaveInfo& frameTypeInfo, int32 stackSize, int64 fpToSpDistance)
333 {
334 if (!frameTypeInfo.shouldSave) {
335 return;
336 }
337 CHECK_FATAL(frameTypeInfo.offset < 0, "must be!!");
338 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
339 //mov
340 auto &x10Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R10, k64BitSize, kRegTyInt);
341 auto immOpnd = &aarchCGFunc.CreateImmOperand(frameTypeInfo.idx, k64BitSize, true);
342 aarchCGFunc.SelectCopyImm(x10Opnd, *immOpnd, PTY_i64);
343 // store
344 Operand *o1 = aarchCGFunc.CreateStackMemOpnd(RSP,
345 static_cast<uint32>(frameTypeInfo.offset) + fpToSpDistance, GetPointerBitSize());
346 MemOperand *mem = static_cast<MemOperand *>(o1);
347 uint32 dataSize = GetPointerBitSize();
348 if (mem->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*mem, dataSize)) {
349 o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*mem, dataSize, R16);
350 }
351
352 Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(MOP_xstr, x10Opnd, *o1);
353 AppendInstructionTo(pushInsn, cgFunc);
354 }
355
GenerateFunctionSave(SaveInfo & funcInfo,int32 stackSize,int64 fpToSpDistance)356 void AArch64GenProEpilog::GenerateFunctionSave(SaveInfo& funcInfo, int32 stackSize, int64 fpToSpDistance)
357 {
358 if (!funcInfo.shouldSave) {
359 return;
360 }
361 CHECK_FATAL(funcInfo.offset < 0, "must be!!");
362 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
363 auto &mirFunc = aarchCGFunc.GetFunction();
364 CCLocInfo ploc;
365 CCImpl &parmlocator = *aarchCGFunc.GetOrCreateLocator(CCImpl::GetCallConvKind(aarchCGFunc.GetFunction()));
366 auto &x10Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R10, k64BitSize, kRegTyInt);
367 CHECK_FATAL(static_cast<size_t>(funcInfo.idx) <= mirFunc.GetFormalCount(), "should be in range");
368 for (size_t i = 0; i < mirFunc.GetFormalCount(); ++i) {
369 MIRType *ty = mirFunc.GetNthParamType(i);
370 parmlocator.LocateNextParm(*ty, ploc, (i == 0), mirFunc.GetMIRFuncType());
371 if (i != static_cast<size_t>(funcInfo.idx)) {
372 continue;
373 }
374 if (ploc.reg0 == kRinvalid) {
375 Operand* o1 = aarchCGFunc.CreateStackMemOpnd(RSP, ploc.memOffset + stackSize, k64BitSize);
376 uint32 dataSize = GetPointerBitSize();
377 if (ploc.memOffset + stackSize > kStpLdpImm64UpperBound) {
378 o1 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc,
379 static_cast<MemOperand &>(*o1), dataSize, R16);
380 }
381 Insn &ldrInsn = cgFunc.GetInsnBuilder()->BuildInsn(MOP_xldr, x10Opnd, *o1);
382 AppendInstructionTo(ldrInsn, cgFunc);
383 } else {
384 auto &funcOpnd =
385 aarchCGFunc.GetOrCreatePhysicalRegisterOperand((AArch64reg)ploc.GetReg0(), k64BitSize, kRegTyInt);
386 aarchCGFunc.SelectCopy(x10Opnd, ploc.GetPrimTypeOfReg0(), funcOpnd, ploc.GetPrimTypeOfReg0());
387 }
388 }
389 Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP,
390 static_cast<uint32>(funcInfo.offset) + fpToSpDistance, GetPointerBitSize());
391 MemOperand *mem2 = static_cast<MemOperand *>(o2);
392 uint32 dataSize = GetPointerBitSize();
393 if (mem2->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*mem2, dataSize)) {
394 o2 = &aarchCGFunc.SplitOffsetWithAddInstruction(*mem2, dataSize, R16);
395 }
396 Insn &pushInsn2 = cgFunc.GetInsnBuilder()->BuildInsn(MOP_xstr, x10Opnd, *o2);
397 AppendInstructionTo(pushInsn2, cgFunc);
398 }
399
GenerateSave()400 void AArch64GenProEpilog::GenerateSave()
401 {
402 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
403 auto &mirFunc = aarchCGFunc.GetFunction();
404 auto &frameTypeInfo = mirFunc.GetFrameTypeInfo();
405 auto &funcInfo = mirFunc.GetFuncInfo();
406
407 int32 stackFrameSize =
408 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
409 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
410 GenerateFrameTypeSave(frameTypeInfo, stackFrameSize, fpToSpDistance);
411 GenerateFunctionSave(funcInfo, stackFrameSize, fpToSpDistance);
412 }
413
414 /*
415 * From AArch64 Reference Manual
416 * C1.3.3 Load/Store Addressing Mode
417 * ...
418 * When stack alignment checking is enabled by system software and
419 * the base register is the SP, the current stack pointer must be
420 * initially quadword aligned, that is aligned to 16 bytes. Misalignment
421 * generates a Stack Alignment fault. The offset does not have to
422 * be a multiple of 16 bytes unless the specific Load/Store instruction
423 * requires this. SP cannot be used as a register offset.
424 */
GeneratePushRegs()425 void AArch64GenProEpilog::GeneratePushRegs()
426 {
427 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
428 CG *currCG = cgFunc.GetCG();
429 const MapleVector<AArch64reg> ®sToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
430 ? aarchCGFunc.GetCalleeSavedRegs()
431 : aarchCGFunc.GetProEpilogSavedRegs();
432
433 CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?");
434
435 AArch64reg intRegFirstHalf = kRinvalid;
436 AArch64reg fpRegFirstHalf = kRinvalid;
437
438 if (currCG->GenerateVerboseCG()) {
439 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("save callee-saved registers"));
440 }
441
442 /*
443 * Even if we don't use RFP, since we push a pair of registers in one instruction
444 * and the stack needs be aligned on a 16-byte boundary, push RFP as well if function has a call
445 * Make sure this is reflected when computing callee_saved_regs.size()
446 */
447 if (!currCG->GenerateDebugFriendlyCode()) {
448 AppendInstructionAllocateCallFrame(R29, RLR, kRegTyInt);
449 } else {
450 AppendInstructionAllocateCallFrameDebug(R29, RLR, kRegTyInt);
451 }
452 GenerateSave();
453 if (useFP) {
454 if (currCG->GenerateVerboseCG()) {
455 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
456 }
457 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
458 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
459 int64 fpToSpDistance =
460 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
461 bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
462 if ((fpToSpDistance > 0) || isLmbc) {
463 Operand *immOpnd;
464 if (isLmbc) {
465 int32 size =
466 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
467 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
468 } else {
469 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
470 }
471 if (!isLmbc || cgFunc.SeenFP() || cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
472 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
473 }
474 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
475 } else {
476 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
477 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
478 }
479 }
480
481 MapleVector<AArch64reg>::const_iterator it = regsToSave.begin();
482 // skip the RFP & RLR
483 if (*it == RFP) {
484 ++it;
485 }
486 CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
487 ++it;
488
489 // callee save offset
490 // fp - callee save base = RealStackFrameSize - [GR,16] - [VR,16] - [cold,16] - [callee] - stack protect + 16(fplr)
491 AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
492 int32 offset = 0;
493 if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
494 offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
495 memLayout->GetSizeOfLocals());
496 } else {
497 offset = (static_cast<int32>(memLayout->RealStackFrameSize()) -
498 static_cast<int32>(static_cast<int32>(aarchCGFunc.SizeOfCalleeSaved()) -
499 (kDivide2 * static_cast<int32>(kAarch64IntregBytelen))) - /* for FP/LR */
500 static_cast<int32>(memLayout->SizeOfArgsToStackPass()) -
501 static_cast<int32>(cgFunc.GetFunction().GetFrameReseverdSlot()));
502 }
503
504 offset -= static_cast<int32>(RoundUp(memLayout->GetSizeOfSegCold(), k16BitSize));
505
506 std::vector<std::pair<uint16, int32>> calleeRegAndOffsetVec;
507 for (; it != regsToSave.end(); ++it) {
508 AArch64reg reg = *it;
509 // skip the RFP
510 if (reg == RFP) {
511 continue;
512 }
513 CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
514 RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
515 AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
516 if (firstHalf == kRinvalid) {
517 /* remember it */
518 firstHalf = reg;
519 } else {
520 uint16 reg0NO = (regType == kRegTyInt) ?
521 static_cast<uint16>(firstHalf - 1) : static_cast<uint16>(firstHalf - V8 + 72);
522 uint16 reg1NO = (regType == kRegTyInt) ? static_cast<uint16>(reg - 1) : static_cast<uint16>(reg - V8 + 72);
523 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(reg0NO, offset));
524 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(reg1NO, offset + k8ByteSize));
525 AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset);
526 AArch64isa::GetNextOffsetCalleeSaved(offset);
527 firstHalf = kRinvalid;
528 }
529 }
530
531 if (intRegFirstHalf != kRinvalid) {
532 uint16 regNO = static_cast<uint16>(intRegFirstHalf - 1);
533 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
534 AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
535 AArch64isa::GetNextOffsetCalleeSaved(offset);
536 }
537
538 if (fpRegFirstHalf != kRinvalid) {
539 uint16 regNO = static_cast<uint16>(fpRegFirstHalf - V8 + 72);
540 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
541 AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
542 AArch64isa::GetNextOffsetCalleeSaved(offset);
543 }
544
545 const auto &emitMemoryManager = CGOptions::GetInstance().GetEmitMemoryManager();
546 if (emitMemoryManager.codeSpace != nullptr) {
547 emitMemoryManager.funcCalleeOffsetSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), calleeRegAndOffsetVec);
548 int64 fpToCurSpDistance =
549 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
550 int32 fp2PrevFrameSPDelta =
551 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize()) -
552 fpToCurSpDistance;
553 emitMemoryManager.funcFpSPDeltaSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), fp2PrevFrameSPDelta);
554 }
555 }
556
GenerateProlog(BB & bb)557 void AArch64GenProEpilog::GenerateProlog(BB &bb)
558 {
559 if (!cgFunc.GetHasProEpilogue()) {
560 return;
561 }
562 if (PROEPILOG_DUMP) {
563 LogInfo::MapleLogger() << "generate prolog at BB " << bb.GetId() << "\n";
564 }
565
566 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
567 CG *currCG = cgFunc.GetCG();
568 BB *formerCurBB = cgFunc.GetCurBB();
569 aarchCGFunc.GetDummyBB()->ClearInsns();
570 cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
571
572 const MapleVector<AArch64reg> ®sToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
573 ? aarchCGFunc.GetCalleeSavedRegs()
574 : aarchCGFunc.GetProEpilogSavedRegs();
575 if (!regsToSave.empty()) {
576 /*
577 * Among other things, push the FP & LR pair.
578 * FP/LR are added to the callee-saved list in AllocateRegisters()
579 * We add them to the callee-saved list regardless of UseFP() being true/false.
580 * Activation Frame is allocated as part of pushing FP/LR pair
581 */
582 GeneratePushRegs();
583 } else {
584 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
585 int32 stackFrameSize =
586 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
587 if (stackFrameSize > 0) {
588 if (currCG->GenerateVerboseCG()) {
589 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
590 }
591 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
592 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
593 cgFunc.GetCurBB()->GetLastInsn()->SetStackDef(true);
594 }
595 if (currCG->GenerateVerboseCG()) {
596 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
597 }
598 if (useFP) {
599 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
600 bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
601 int64 fpToSpDistance =
602 cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
603 if ((fpToSpDistance > 0) || isLmbc) {
604 Operand *immOpnd;
605 if (isLmbc) {
606 int32 size = static_cast<int32>(
607 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
608 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
609 } else {
610 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
611 }
612 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
613 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
614 } else {
615 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
616 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
617 }
618 }
619 }
620 bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
621 cgFunc.SetCurBB(*formerCurBB);
622 }
623
GenerateRet(BB & bb)624 void AArch64GenProEpilog::GenerateRet(BB &bb)
625 {
626 auto *lastInsn = bb.GetLastMachineInsn();
627 if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
628 return;
629 }
630 bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_xret));
631 }
632
AppendInstructionPopSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)633 void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
634 {
635 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
636 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle];
637 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerBitSize(), rty);
638 Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
639 MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
640 uint32 dataSize = GetPointerBitSize();
641 if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
642 o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R16);
643 }
644
645 Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
646 // Identify that the instruction is not alias with any other memory instructions.
647 auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
648 memDefUse->SetIndependent();
649 popInsn.SetReferenceOsts(memDefUse);
650 popInsn.SetComment("RESTORE");
651 cgFunc.GetCurBB()->AppendInsn(popInsn);
652 }
653
AppendInstructionPopPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)654 void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
655 int32 offset)
656 {
657 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
658 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
659 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
660 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
661 Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerBitSize());
662
663 uint32 dataSize = GetPointerBitSize();
664 CHECK_FATAL(offset >= 0, "offset must >= 0");
665 if (offset > kStpLdpImm64UpperBound) {
666 o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, static_cast<MemOperand &>(*o2), dataSize, R16);
667 }
668 Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
669 // Identify that the instruction is not alias with any other memory instructions.
670 auto *memDefUse = cgFunc.GetFuncScopeAllocator()->New<MemDefUse>(*cgFunc.GetFuncScopeAllocator());
671 memDefUse->SetIndependent();
672 popInsn.SetReferenceOsts(memDefUse);
673 popInsn.SetComment("RESTORE RESTORE");
674 cgFunc.GetCurBB()->AppendInsn(popInsn);
675 }
676
AppendInstructionDeallocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)677 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
678 {
679 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
680 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
681 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
682 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
683 int32 stackFrameSize =
684 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
685 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
686 /*
687 * ldp/stp's imm should be within -512 and 504;
688 * if ldp's imm > 504, we fall back to the ldp-add version
689 */
690 bool useLdpAdd = false;
691 int32 offset = 0;
692
693 Operand *o2 = nullptr;
694 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
695 o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
696 } else {
697 if (stackFrameSize > kStpLdpImm64UpperBound) {
698 useLdpAdd = true;
699 offset = kOffset16MemPos;
700 stackFrameSize -= offset;
701 } else {
702 offset = stackFrameSize;
703 }
704 o2 = &aarchCGFunc.CreateCallFrameOperand(offset, GetPointerBitSize());
705 }
706
707 if (useLdpAdd) {
708 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
709 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
710 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
711 }
712
713 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
714 CHECK_FATAL(!useLdpAdd, "Invalid assumption");
715 if (fpToSpDistance > kStpLdpImm64UpperBound) {
716 (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
717 } else {
718 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
719 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
720 }
721 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
722 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
723 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
724 } else {
725 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
726 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
727 }
728 cgFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
729 }
730
AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)731 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
732 {
733 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
734 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
735 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerBitSize(), rty);
736 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerBitSize(), rty);
737 int32 stackFrameSize =
738 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
739 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
740 /*
741 * ldp/stp's imm should be within -512 and 504;
742 * if ldp's imm > 504, we fall back to the ldp-add version
743 */
744 bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc);
745 if (cgFunc.HasVLAOrAlloca() || fpToSpDistance == 0 || isLmbc) {
746 int32 lmbcOffset = 0;
747 if (!isLmbc) {
748 stackFrameSize -= fpToSpDistance;
749 } else {
750 lmbcOffset = fpToSpDistance - (kDivide2 * k8ByteSize);
751 }
752 if (stackFrameSize > kStpLdpImm64UpperBound || isLmbc) {
753 Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, (isLmbc ? lmbcOffset : 0), GetPointerBitSize());
754 mOp = storeFP ? pushPopOps[kRegsPopOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
755 Insn &deallocInsn = storeFP ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
756 : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
757 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
758 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
759 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
760 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
761 } else {
762 MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(stackFrameSize, GetPointerBitSize());
763 mOp = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPopOp][rty][kPushPopPair]
764 : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
765 Insn &deallocInsn = (storeFP || stackFrameSize > kStrLdrPerPostUpperBound)
766 ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2)
767 : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, o2);
768 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
769 }
770 } else {
771 Operand *o2 =
772 aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerBitSize());
773 if (fpToSpDistance > kStpLdpImm64UpperBound) {
774 (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
775 } else {
776 mOp = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound) ? pushPopOps[kRegsPopOp][rty][kPushPopPair]
777 : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
778 Insn &deallocInsn = (storeFP || fpToSpDistance > kStrLdrPerPostUpperBound)
779 ? cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2)
780 : cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
781 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
782 }
783
784 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
785 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
786 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
787 }
788 cgFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
789 }
790
GeneratePopRegs()791 void AArch64GenProEpilog::GeneratePopRegs()
792 {
793 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
794 CG *currCG = cgFunc.GetCG();
795
796 const MapleVector<AArch64reg> ®sToRestore = (aarchCGFunc.GetProEpilogSavedRegs().empty())
797 ? aarchCGFunc.GetCalleeSavedRegs()
798 : aarchCGFunc.GetProEpilogSavedRegs();
799
800 CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?");
801
802 AArch64reg intRegFirstHalf = kRinvalid;
803 AArch64reg fpRegFirstHalf = kRinvalid;
804
805 if (currCG->GenerateVerboseCG()) {
806 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("restore callee-saved registers"));
807 }
808
809 MapleVector<AArch64reg>::const_iterator it = regsToRestore.begin();
810 /*
811 * Even if we don't use FP, since we push a pair of registers
812 * in a single instruction (i.e., stp) and the stack needs be aligned
813 * on a 16-byte boundary, push FP as well if the function has a call.
814 * Make sure this is reflected when computing calleeSavedRegs.size()
815 * skip the first two registers
816 */
817 // skip the RFP & RLR
818 if (*it == RFP) {
819 ++it;
820 }
821 CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
822 ++it;
823
824 AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
825 int32 offset;
826 if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
827 offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
828 memLayout->GetSizeOfLocals());
829 } else {
830 offset = (static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() -
831 (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kAarch64IntregBytelen))) -
832 memLayout->SizeOfArgsToStackPass() -
833 cgFunc.GetFunction().GetFrameReseverdSlot();
834 }
835 offset -= static_cast<int32>(RoundUp(memLayout->GetSizeOfSegCold(), k16BitSize));
836
837 /*
838 * We are using a cleared dummy block; so insertPoint cannot be ret;
839 * see GenerateEpilog()
840 */
841 for (; it != regsToRestore.end(); ++it) {
842 AArch64reg reg = *it;
843 if (reg == RFP) {
844 continue;
845 }
846 CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
847
848 RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
849 AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
850 if (firstHalf == kRinvalid) {
851 /* remember it */
852 firstHalf = reg;
853 } else {
854 /* flush the pair */
855 AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset);
856 AArch64isa::GetNextOffsetCalleeSaved(offset);
857 firstHalf = kRinvalid;
858 }
859 }
860
861 if (intRegFirstHalf != kRinvalid) {
862 AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
863 AArch64isa::GetNextOffsetCalleeSaved(offset);
864 }
865
866 if (fpRegFirstHalf != kRinvalid) {
867 AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
868 AArch64isa::GetNextOffsetCalleeSaved(offset);
869 }
870
871 if (!currCG->GenerateDebugFriendlyCode()) {
872 AppendInstructionDeallocateCallFrame(R29, RLR, kRegTyInt);
873 } else {
874 AppendInstructionDeallocateCallFrameDebug(R29, RLR, kRegTyInt);
875 }
876 }
877
AppendBBtoEpilog(BB & epilogBB,BB & newBB)878 void AArch64GenProEpilog::AppendBBtoEpilog(BB &epilogBB, BB &newBB)
879 {
880 FOR_BB_INSNS(insn, &newBB)
881 {
882 insn->SetDoNotRemove(true);
883 }
884 auto *lastInsn = epilogBB.GetLastMachineInsn();
885 if (lastInsn != nullptr && (lastInsn->IsTailCall() || lastInsn->IsBranch())) {
886 epilogBB.RemoveInsn(*lastInsn);
887 epilogBB.AppendBBInsns(newBB);
888 epilogBB.AppendInsn(*lastInsn);
889 } else {
890 epilogBB.AppendBBInsns(newBB);
891 }
892 }
893
GenerateEpilog(BB & bb)894 void AArch64GenProEpilog::GenerateEpilog(BB &bb)
895 {
896 if (!cgFunc.GetHasProEpilogue()) {
897 return;
898 }
899 if (PROEPILOG_DUMP) {
900 LogInfo::MapleLogger() << "generate epilog at BB " << bb.GetId() << "\n";
901 }
902
903 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
904 CG *currCG = cgFunc.GetCG();
905 BB *formerCurBB = cgFunc.GetCurBB();
906 aarchCGFunc.GetDummyBB()->ClearInsns();
907 cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
908
909 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
910 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
911
912 if (cgFunc.HasVLAOrAlloca() && cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
913 aarchCGFunc.SelectCopy(spOpnd, PTY_u64, fpOpnd, PTY_u64);
914 }
915
916 const MapleVector<AArch64reg> ®sToSave = (aarchCGFunc.GetProEpilogSavedRegs().empty())
917 ? aarchCGFunc.GetCalleeSavedRegs()
918 : aarchCGFunc.GetProEpilogSavedRegs();
919 if (!regsToSave.empty()) {
920 GeneratePopRegs();
921 } else {
922 auto stackFrameSize = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize();
923 if (stackFrameSize > 0) {
924 if (currCG->GenerateVerboseCG()) {
925 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("pop up activation frame"));
926 }
927
928 if (cgFunc.HasVLAOrAlloca()) {
929 auto size = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSegArgsToStkPass().GetSize();
930 stackFrameSize = stackFrameSize < size ? 0 : stackFrameSize - size;
931 }
932
933 if (stackFrameSize > 0) {
934 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
935 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
936 aarchCGFunc.GetCurBB()->GetLastInsn()->SetStackRevert(true);
937 }
938 }
939 }
940
941 AppendBBtoEpilog(bb, *cgFunc.GetCurBB());
942 if (cgFunc.GetCurBB()->GetHasCfi()) {
943 bb.SetHasCfi();
944 }
945
946 cgFunc.SetCurBB(*formerCurBB);
947 }
948
Run()949 void AArch64GenProEpilog::Run()
950 {
951 CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label,
952 "The first statement should be a label");
953 if (cgFunc.GetFunction().IsDeoptFunc()) { // deopt function does not need prologue/epilogue
954 return;
955 }
956 // update exitBB
957 if (cgFunc.IsExitBBsVecEmpty()) {
958 cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev());
959 }
960 cgFunc.SetHasProEpilogue(true);
961
962 // not run proepilog analysis or analysis failed, insert proepilog at firstBB and exitBB
963 GenerateProlog(*(cgFunc.GetFirstBB()));
964 for (auto *exitBB : cgFunc.GetExitBBsVec()) {
965 GenerateEpilog(*exitBB);
966 }
967
968 // insert ret insn for exitBB
969 for (auto *exitBB : cgFunc.GetExitBBsVec()) {
970 if (cgFunc.GetHasProEpilogue()) {
971 GenerateRet(*exitBB);
972 }
973 }
974 }
975 } /* namespace maplebe */
976