1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "aarch64_proepilog.h"
17 #include "aarch64_cg.h"
18 #include "cg_option.h"
19 #include "cgfunc.h"
20
21 namespace maplebe {
22 using namespace maple;
23
24 namespace {
25 constexpr int32 kSoeChckOffset = 8192;
26
27 enum RegsPushPop : uint8 { kRegsPushOp, kRegsPopOp };
28
29 enum PushPopType : uint8 { kPushPopSingle = 0, kPushPopPair = 1 };
30
31 MOperator pushPopOps[kRegsPopOp + 1][kRegTyFloat + 1][kPushPopPair + 1] = {{
32 /* push */
33 {0}, /* undef */
34 {
35 /* kRegTyInt */
36 MOP_xstr, /* single */
37 MOP_xstp, /* pair */
38 },
39 {
40 /* kRegTyFloat */
41 MOP_dstr, /* single */
42 MOP_dstp, /* pair */
43 },
44 },
45 {
46 /* pop */
47 {0}, /* undef */
48 {
49 /* kRegTyInt */
50 MOP_xldr, /* single */
51 MOP_xldp, /* pair */
52 },
53 {
54 /* kRegTyFloat */
55 MOP_dldr, /* single */
56 MOP_dldp, /* pair */
57 },
58 }};
59
AppendInstructionTo(Insn & insn,CGFunc & func)60 inline void AppendInstructionTo(Insn &insn, CGFunc &func)
61 {
62 func.GetCurBB()->AppendInsn(insn);
63 }
64 } // namespace
65
HasLoop()66 bool AArch64GenProEpilog::HasLoop()
67 {
68 FOR_ALL_BB(bb, &cgFunc) {
69 if (bb->IsBackEdgeDest()) {
70 return true;
71 }
72 FOR_BB_INSNS_REV(insn, bb) {
73 if (!insn->IsMachineInstruction()) {
74 continue;
75 }
76 if (insn->HasLoop()) {
77 return true;
78 }
79 }
80 }
81 return false;
82 }
83
84 /*
85 * Remove redundant mov and mark optimizable bl/blr insn in the BB.
86 * Return value: true to call this modified block again.
87 */
OptimizeTailBB(BB & bb,MapleSet<Insn * > & callInsns,const BB & exitBB) const88 bool AArch64GenProEpilog::OptimizeTailBB(BB &bb, MapleSet<Insn *> &callInsns, const BB &exitBB) const
89 {
90 if (bb.NumInsn() == 1 &&
91 (bb.GetLastInsn()->GetMachineOpcode() != MOP_xbr && bb.GetLastInsn()->GetMachineOpcode() != MOP_xblr &&
92 bb.GetLastInsn()->GetMachineOpcode() != MOP_xbl && bb.GetLastInsn()->GetMachineOpcode() != MOP_xuncond)) {
93 return false;
94 }
95 FOR_BB_INSNS_REV_SAFE(insn, &bb, prev_insn) {
96 if (!insn->IsMachineInstruction() || AArch64isa::IsPseudoInstruction(insn->GetMachineOpcode())) {
97 continue;
98 }
99 MOperator insnMop = insn->GetMachineOpcode();
100 switch (insnMop) {
101 case MOP_xldr:
102 case MOP_xldp:
103 case MOP_dldr:
104 case MOP_dldp: {
105 if (bb.GetKind() == BB::kBBReturn) {
106 RegOperand ® = static_cast<RegOperand &>(insn->GetOperand(0));
107 if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg.GetRegisterNumber()))) {
108 break; /* inserted restore from calleeregs-placement, ignore */
109 }
110 }
111 return false;
112 }
113 case MOP_wmovrr:
114 case MOP_xmovrr: {
115 CHECK_FATAL(insn->GetOperand(0).IsRegister(), "operand0 is not register");
116 CHECK_FATAL(insn->GetOperand(1).IsRegister(), "operand1 is not register");
117 auto ®1 = static_cast<RegOperand &>(insn->GetOperand(0));
118 auto ®2 = static_cast<RegOperand &>(insn->GetOperand(1));
119
120 if (reg1.GetRegisterNumber() != R0 || reg2.GetRegisterNumber() != R0) {
121 return false;
122 }
123
124 bb.RemoveInsn(*insn);
125 break;
126 }
127 case MOP_xblr: {
128 if (insn->GetOperand(0).IsRegister()) {
129 RegOperand ® = static_cast<RegOperand &>(insn->GetOperand(0));
130 if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg.GetRegisterNumber()))) {
131 return false; /* can't tailcall, register will be overwritten by restore */
132 }
133 }
134 /* flow through */
135 }
136 [[clang::fallthrough]];
137 case MOP_xbl: {
138 callInsns.insert(insn);
139 return false;
140 }
141 case MOP_xuncond: {
142 LabelOperand &bLab = static_cast<LabelOperand &>(insn->GetOperand(0));
143 if (exitBB.GetLabIdx() == bLab.GetLabelIndex()) {
144 break;
145 }
146 return false;
147 }
148 default:
149 return false;
150 }
151 }
152
153 return true;
154 }
155
156 /* Recursively invoke this function for all predecessors of exitBB */
TailCallBBOpt(BB & bb,MapleSet<Insn * > & callInsns,BB & exitBB)157 void AArch64GenProEpilog::TailCallBBOpt(BB &bb, MapleSet<Insn *> &callInsns, BB &exitBB)
158 {
159 /* callsite also in the return block as in "if () return; else foo();"
160 call in the exit block */
161 if (!bb.IsEmpty() && !OptimizeTailBB(bb, callInsns, exitBB)) {
162 return;
163 }
164
165 for (auto tmpBB : bb.GetPreds()) {
166 if (tmpBB->GetSuccs().size() != 1 || !tmpBB->GetEhSuccs().empty() ||
167 (tmpBB->GetKind() != BB::kBBFallthru && tmpBB->GetKind() != BB::kBBGoto)) {
168 continue;
169 }
170
171 if (OptimizeTailBB(*tmpBB, callInsns, exitBB)) {
172 TailCallBBOpt(*tmpBB, callInsns, exitBB);
173 }
174 }
175 }
176
177 /*
178 * If a function without callee-saved register, and end with a function call,
179 * then transfer bl/blr to b/br.
180 * Return value: true if function do not need Prologue/Epilogue. false otherwise.
181 */
TailCallOpt()182 bool AArch64GenProEpilog::TailCallOpt()
183 {
184 /* Count how many call insns in the whole function. */
185 uint32 nCount = 0;
186 bool hasGetStackClass = false;
187
188 FOR_ALL_BB(bb, &cgFunc) {
189 FOR_BB_INSNS(insn, bb) {
190 if (insn->IsMachineInstruction() && insn->IsCall()) {
191 ++nCount;
192 }
193 }
194 }
195 if ((nCount > 0 && cgFunc.GetFunction().GetAttr(FUNCATTR_interface)) || hasGetStackClass) {
196 return false;
197 }
198
199 if (nCount == 0) {
200 // no bl instr in any bb
201 return true;
202 }
203
204 size_t exitBBSize = cgFunc.GetExitBBsVec().size();
205 /* For now to reduce complexity */
206
207 BB *exitBB = nullptr;
208 if (exitBBSize == 0) {
209 if (cgFunc.GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc.GetCleanupLabel() &&
210 cgFunc.GetLastBB()->GetPrev()->GetPrev() != nullptr) {
211 exitBB = cgFunc.GetLastBB()->GetPrev()->GetPrev();
212 } else {
213 exitBB = cgFunc.GetLastBB()->GetPrev();
214 }
215 } else {
216 exitBB = cgFunc.GetExitBBsVec().front();
217 }
218 uint32 i = 1;
219 size_t optCount = 0;
220 do {
221 MapleSet<Insn *> callInsns(tmpAlloc.Adapter());
222 TailCallBBOpt(*exitBB, callInsns, *exitBB);
223 if (callInsns.size() != 0) {
224 optCount += callInsns.size();
225 (void)exitBB2CallSitesMap.emplace(exitBB, callInsns);
226 }
227 if (i < exitBBSize) {
228 exitBB = cgFunc.GetExitBBsVec()[i];
229 ++i;
230 } else {
231 break;
232 }
233 } while (1);
234
235 /* regular calls exist in function */
236 if (nCount != optCount) {
237 return false;
238 }
239 return true;
240 }
241
IsAddOrSubOp(MOperator mOp)242 static bool IsAddOrSubOp(MOperator mOp)
243 {
244 switch (mOp) {
245 case MOP_xaddrrr:
246 case MOP_xaddrrrs:
247 case MOP_xxwaddrrre:
248 case MOP_xaddrri24:
249 case MOP_xaddrri12:
250 case MOP_xsubrrr:
251 case MOP_xsubrrrs:
252 case MOP_xxwsubrrre:
253 case MOP_xsubrri12:
254 return true;
255 default:
256 return false;
257 }
258 }
259
260 /* tailcallopt cannot be used if stack address of this function is taken and passed,
261 not checking the passing for now, just taken */
IsStackAddrTaken(CGFunc & cgFunc)262 static bool IsStackAddrTaken(CGFunc &cgFunc)
263 {
264 FOR_ALL_BB(bb, &cgFunc) {
265 FOR_BB_INSNS_REV(insn, bb) {
266 if (IsAddOrSubOp(insn->GetMachineOpcode())) {
267 for (uint32 i = 0; i < insn->GetOperandSize(); i++) {
268 if (insn->GetOperand(i).IsRegister()) {
269 RegOperand ® = static_cast<RegOperand &>(insn->GetOperand(i));
270 if (reg.GetRegisterNumber() == R29 || reg.GetRegisterNumber() == R31 ||
271 reg.GetRegisterNumber() == RSP) {
272 return true;
273 }
274 }
275 }
276 }
277 }
278 }
279 return false;
280 }
281
NeedProEpilog()282 bool AArch64GenProEpilog::NeedProEpilog()
283 {
284 if (cgFunc.GetMirModule().GetSrcLang() != kSrcLangC) {
285 return true;
286 } else if (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || cgFunc.HasVLAOrAlloca()) {
287 return true;
288 }
289 bool funcHasCalls = false;
290 if (cgFunc.GetCG()->DoTailCall() && !IsStackAddrTaken(cgFunc) && !stackProtect) {
291 funcHasCalls = !TailCallOpt(); // return value == "no call instr/only or 1 tailcall"
292 } else {
293 FOR_ALL_BB(bb, &cgFunc) {
294 FOR_BB_INSNS_REV(insn, bb) {
295 if (insn->IsMachineInstruction() && insn->IsCall()) {
296 funcHasCalls = true;
297 }
298 }
299 }
300 }
301 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
302 const MapleVector<AArch64reg> ®sToRestore =
303 (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
304 size_t calleeSavedRegSize = kTwoRegister;
305 CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot FP and LR ?");
306 if (funcHasCalls || regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() ||
307 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSizeOfLocals() > 0 ||
308 cgFunc.GetFunction().GetAttr(FUNCATTR_callersensitive)) {
309 return true;
310 }
311 return false;
312 }
313
GenStackGuard(BB & bb)314 void AArch64GenProEpilog::GenStackGuard(BB &bb)
315 {
316 if (!stackProtect) {
317 return;
318 }
319 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
320 BB *formerCurBB = cgFunc.GetCurBB();
321 aarchCGFunc.GetDummyBB()->ClearInsns();
322 aarchCGFunc.GetDummyBB()->SetIsProEpilog(true);
323 cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
324
325 MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
326 GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard")));
327 StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0);
328 RegOperand &stAddrOpnd =
329 aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, GetPointerSize() * kBitsPerByte, kRegTyInt);
330 aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd);
331
332 MemOperand *guardMemOp =
333 aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOi, GetPointerSize() * kBitsPerByte, stAddrOpnd, nullptr,
334 &aarchCGFunc.GetOrCreateOfstOpnd(0, k32BitSize), stkGuardSym);
335 MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64);
336 Insn &insn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *guardMemOp);
337 insn.SetDoNotRemove(true);
338 cgFunc.GetCurBB()->AppendInsn(insn);
339
340 uint64 vArea = 0;
341 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
342 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
343 if (ml->GetSizeOfGRSaveArea() > 0) {
344 vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment);
345 }
346 if (ml->GetSizeOfVRSaveArea() > 0) {
347 vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment);
348 }
349 }
350
351 int32 stkSize = static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
352 if (useFP) {
353 stkSize -=
354 (static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->SizeOfArgsToStackPass()) +
355 cgFunc.GetFunction().GetFrameReseverdSlot());
356 }
357 int32 memSize = (stkSize - kOffset8MemPos) - static_cast<int32>(vArea);
358 MemOperand *downStk = aarchCGFunc.CreateStackMemOpnd(stackBaseReg, memSize, GetPointerSize() * kBitsPerByte);
359 if (downStk->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) {
360 downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R10);
361 }
362 mOp = aarchCGFunc.PickStInsn(GetPointerSize() * kBitsPerByte, PTY_u64);
363 Insn &tmpInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *downStk);
364 tmpInsn.SetDoNotRemove(true);
365 cgFunc.GetCurBB()->AppendInsn(tmpInsn);
366
367 bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
368 aarchCGFunc.GetDummyBB()->SetIsProEpilog(false);
369 cgFunc.SetCurBB(*formerCurBB);
370 }
371
GenStackGuardCheckInsn(BB & bb)372 BB &AArch64GenProEpilog::GenStackGuardCheckInsn(BB &bb)
373 {
374 if (!stackProtect) {
375 return bb;
376 }
377
378 BB *formerCurBB = cgFunc.GetCurBB();
379 cgFunc.GetDummyBB()->ClearInsns();
380 cgFunc.SetCurBB(*(cgFunc.GetDummyBB()));
381 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
382
383 const MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
384 GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard")));
385 StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0);
386 RegOperand &stAddrOpnd =
387 aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, GetPointerSize() * kBitsPerByte, kRegTyInt);
388 aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd);
389
390 MemOperand *guardMemOp =
391 aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOi, GetPointerSize() * kBitsPerByte, stAddrOpnd, nullptr,
392 &aarchCGFunc.GetOrCreateOfstOpnd(0, k32BitSize), stkGuardSym);
393 MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64);
394 Insn &insn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, stAddrOpnd, *guardMemOp);
395 insn.SetDoNotRemove(true);
396 cgFunc.GetCurBB()->AppendInsn(insn);
397
398 uint64 vArea = 0;
399 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
400 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
401 if (ml->GetSizeOfGRSaveArea() > 0) {
402 vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment);
403 }
404 if (ml->GetSizeOfVRSaveArea() > 0) {
405 vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment);
406 }
407 }
408
409 RegOperand &checkOp =
410 aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R10, GetPointerSize() * kBitsPerByte, kRegTyInt);
411 int32 stkSize = static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
412 if (useFP) {
413 stkSize -=
414 (static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->SizeOfArgsToStackPass()) +
415 cgFunc.GetFunction().GetFrameReseverdSlot());
416 }
417 int32 memSize = (stkSize - kOffset8MemPos) - static_cast<int32>(vArea);
418 MemOperand *downStk = aarchCGFunc.CreateStackMemOpnd(stackBaseReg, memSize, GetPointerSize() * kBitsPerByte);
419 if (downStk->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) {
420 downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R10);
421 }
422 mOp = aarchCGFunc.PickLdInsn(GetPointerSize() * kBitsPerByte, PTY_u64);
423 Insn &newInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, checkOp, *downStk);
424 newInsn.SetDoNotRemove(true);
425 cgFunc.GetCurBB()->AppendInsn(newInsn);
426
427 cgFunc.SelectBxor(stAddrOpnd, stAddrOpnd, checkOp, PTY_u64);
428 LabelIdx failLable = aarchCGFunc.CreateLabel();
429 aarchCGFunc.SelectCondGoto(aarchCGFunc.GetOrCreateLabelOperand(failLable), OP_brtrue, OP_eq, stAddrOpnd,
430 aarchCGFunc.CreateImmOperand(0, k64BitSize, false), PTY_u64, false);
431
432 bb.AppendBBInsns(*(cgFunc.GetCurBB()));
433
434 LabelIdx nextBBLableIdx = aarchCGFunc.CreateLabel();
435 BB *nextBB = aarchCGFunc.CreateNewBB(nextBBLableIdx, bb.IsUnreachable(), BB::kBBFallthru, bb.GetFrequency());
436 bb.AppendBB(*nextBB);
437 bb.PushBackSuccs(*nextBB);
438 nextBB->PushBackPreds(bb);
439 cgFunc.SetCurBB(*nextBB);
440 MIRSymbol *failFunc = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(
441 GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_fail")));
442 ListOperand *srcOpnds = aarchCGFunc.CreateListOpnd(*cgFunc.GetFuncScopeAllocator());
443 Insn &callInsn = aarchCGFunc.AppendCall(*failFunc, *srcOpnds);
444 callInsn.SetDoNotRemove(true);
445
446 BB *newBB = cgFunc.CreateNewBB(failLable, bb.IsUnreachable(), bb.GetKind(), bb.GetFrequency());
447 nextBB->AppendBB(*newBB);
448 if (cgFunc.GetLastBB() == &bb) {
449 cgFunc.SetLastBB(*newBB);
450 }
451 bb.PushBackSuccs(*newBB);
452 nextBB->PushBackSuccs(*newBB);
453 newBB->PushBackPreds(*nextBB);
454 newBB->PushBackPreds(bb);
455
456 bb.SetKind(BB::kBBIf);
457 cgFunc.SetCurBB(*formerCurBB);
458 return *newBB;
459 }
460
InsertOpndRegs(Operand & op,std::set<regno_t> & vecRegs) const461 bool AArch64GenProEpilog::InsertOpndRegs(Operand &op, std::set<regno_t> &vecRegs) const
462 {
463 Operand *opnd = &op;
464 CHECK_FATAL(opnd != nullptr, "opnd is nullptr in InsertRegs");
465 if (opnd->IsList()) {
466 MapleList<RegOperand *> pregList = static_cast<ListOperand *>(opnd)->GetOperands();
467 for (auto *preg : pregList) {
468 if (preg != nullptr) {
469 vecRegs.insert(preg->GetRegisterNumber());
470 }
471 }
472 }
473 if (opnd->IsMemoryAccessOperand()) { /* the registers of kOpdMem are complex to be detected */
474 RegOperand *baseOpnd = static_cast<MemOperand *>(opnd)->GetBaseRegister();
475 if (baseOpnd != nullptr) {
476 vecRegs.insert(baseOpnd->GetRegisterNumber());
477 }
478 RegOperand *indexOpnd = static_cast<MemOperand *>(opnd)->GetIndexRegister();
479 if (indexOpnd != nullptr) {
480 vecRegs.insert(indexOpnd->GetRegisterNumber());
481 }
482 }
483 if (opnd->IsRegister()) {
484 RegOperand *preg = static_cast<RegOperand *>(opnd);
485 if (preg != nullptr) {
486 vecRegs.insert(preg->GetRegisterNumber());
487 }
488 }
489 return true;
490 }
491
InsertInsnRegs(Insn & insn,bool insertSource,std::set<regno_t> & vecSourceRegs,bool insertTarget,std::set<regno_t> & vecTargetRegs)492 bool AArch64GenProEpilog::InsertInsnRegs(Insn &insn, bool insertSource, std::set<regno_t> &vecSourceRegs,
493 bool insertTarget, std::set<regno_t> &vecTargetRegs)
494 {
495 Insn *curInsn = &insn;
496 for (uint32 o = 0; o < curInsn->GetOperandSize(); ++o) {
497 Operand &opnd = curInsn->GetOperand(o);
498 if (insertSource && curInsn->OpndIsUse(o)) {
499 InsertOpndRegs(opnd, vecSourceRegs);
500 }
501 if (insertTarget && curInsn->OpndIsDef(o)) {
502 InsertOpndRegs(opnd, vecTargetRegs);
503 }
504 }
505 return true;
506 }
507
FindRegs(Operand & op,std::set<regno_t> & vecRegs) const508 bool AArch64GenProEpilog::FindRegs(Operand &op, std::set<regno_t> &vecRegs) const
509 {
510 Operand *opnd = &op;
511 if (opnd == nullptr || vecRegs.empty()) {
512 return false;
513 }
514 if (opnd->IsList()) {
515 MapleList<RegOperand *> pregList = static_cast<ListOperand *>(opnd)->GetOperands();
516 for (auto *preg : pregList) {
517 if (preg->GetRegisterNumber() == R29 || vecRegs.find(preg->GetRegisterNumber()) != vecRegs.end()) {
518 return true; /* the opReg will overwrite or reread the vecRegs */
519 }
520 }
521 }
522 if (opnd->IsMemoryAccessOperand()) { /* the registers of kOpdMem are complex to be detected */
523 RegOperand *baseOpnd = static_cast<MemOperand *>(opnd)->GetBaseRegister();
524 RegOperand *indexOpnd = static_cast<MemOperand *>(opnd)->GetIndexRegister();
525 if ((baseOpnd != nullptr && baseOpnd->GetRegisterNumber() == R29) ||
526 (indexOpnd != nullptr && indexOpnd->GetRegisterNumber() == R29)) {
527 return true; /* Avoid modifying data on the stack */
528 }
529 if ((baseOpnd != nullptr && vecRegs.find(baseOpnd->GetRegisterNumber()) != vecRegs.end()) ||
530 (indexOpnd != nullptr && vecRegs.find(indexOpnd->GetRegisterNumber()) != vecRegs.end())) {
531 return true;
532 }
533 }
534 if (opnd->IsRegister()) {
535 RegOperand *regOpnd = static_cast<RegOperand *>(opnd);
536 if (regOpnd->GetRegisterNumber() == R29 || vecRegs.find(regOpnd->GetRegisterNumber()) != vecRegs.end()) {
537 return true; /* dst is a target register, result_dst is a target register */
538 }
539 }
540 return false;
541 }
542
BackwardFindDependency(BB & ifbb,std::set<regno_t> & vecReturnSourceRegs,std::list<Insn * > & existingInsns,std::list<Insn * > & moveInsns)543 bool AArch64GenProEpilog::BackwardFindDependency(BB &ifbb, std::set<regno_t> &vecReturnSourceRegs,
544 std::list<Insn *> &existingInsns, std::list<Insn *> &moveInsns)
545 {
546 /*
547 * Pattern match,(*) instruction are moved down below branch.
548 * ********************
549 * curInsn: <instruction> <target> <source>
550 * <existingInsns> in predBB
551 * <existingInsns> in ifBB
552 * <existingInsns> in returnBB
553 * *********************
554 * list: the insns can be moved into the coldBB
555 * (1) the instruction is neither a branch nor a call, except for the ifbb.GetLastInsn()
556 * As long as a branch insn exists,
557 * the fast path finding fails and the return value is false,
558 * but the code sinking can be continued.
559 * (2) the predBB is not a ifBB,
560 * As long as a ifBB in preds exists,
561 * the code sinking fails,
562 * but fast path finding can be continued.
563 * (3) the targetRegs of insns in existingInsns can neither be reread or overwrite
564 * (4) the sourceRegs of insns in existingInsns can not be overwrite
565 * (5) the sourceRegs of insns in returnBB can neither be reread or overwrite
566 * (6) the targetRegs and sourceRegs cannot be R29 R30, to protect the stack
567 * (7) modified the reg when:
568 * --------------
569 * curInsn: move R2,R1
570 * <existingInsns>: <instruction>s <target>s <source>s
571 * <instruction>s <target>s <source-R2>s
572 * -> <instruction>s <target>s <source-R1>s
573 * ------------
574 * (a) all targets cannot be R1, all sources cannot be R1
575 * all targets cannot be R2, all return sources cannot be R2
576 * (b) the targetRegs and sourceRegs cannot be list or MemoryAccess
577 * (c) no ifBB in preds, no branch insns
578 * (d) the bits of source-R2 must be equal to the R2
579 * (e) replace the R2 with R1
580 */
581 BB *pred = &ifbb;
582 std::set<regno_t> vecTargetRegs; /* the targrtRegs of existingInsns */
583 std::set<regno_t> vecSourceRegs; /* the soureRegs of existingInsns */
584 bool ifPred = false; /* Indicates whether a ifBB in pred exists */
585 bool bl = false; /* Indicates whether a branch insn exists */
586 do {
587 FOR_BB_INSNS_REV(insn, pred) {
588 /* code sinking */
589 if (insn->IsImmaterialInsn()) {
590 moveInsns.push_back(insn);
591 continue;
592 }
593 /* code sinking */
594 if (!insn->IsMachineInstruction()) {
595 moveInsns.push_back(insn);
596 continue;
597 }
598 /* code sinking fails, the insns must be retained in the ifBB */
599 if (ifPred || insn == ifbb.GetLastInsn() || insn->IsBranch() || insn->IsCall() || insn->IsStore() ||
600 insn->IsStorePair()) {
601 /* fast path finding fails */
602 if (insn != ifbb.GetLastInsn() &&
603 (insn->IsBranch() || insn->IsCall() || insn->IsStore() || insn->IsStorePair())) {
604 bl = true;
605 }
606 InsertInsnRegs(*insn, true, vecSourceRegs, true, vecTargetRegs);
607 existingInsns.push_back(insn);
608 continue;
609 }
610 bool allow = true; /* whether allow this insn move into the codeBB */
611 for (uint32 o = 0; allow && o < insn->GetOperandSize(); ++o) {
612 Operand &opnd = insn->GetOperand(o);
613 if (insn->OpndIsDef(o)) {
614 allow = allow & !FindRegs(opnd, vecTargetRegs);
615 allow = allow & !FindRegs(opnd, vecSourceRegs);
616 allow = allow & !FindRegs(opnd, vecReturnSourceRegs);
617 }
618 if (insn->OpndIsUse(o)) {
619 allow = allow & !FindRegs(opnd, vecTargetRegs);
620 }
621 }
622 /* if a result_dst not allowed, this insn can be allowed on the condition of mov Rx,R0/R1,
623 * and tje existing insns cannot be blr
624 * RLR 31, RFP 32, RSP 33, RZR 34 */
625 if (!ifPred && !bl && !allow &&
626 (insn->GetMachineOpcode() == MOP_xmovrr || insn->GetMachineOpcode() == MOP_wmovrr)) {
627 Operand *resultOpnd = &(insn->GetOperand(0));
628 Operand *srcOpnd = &(insn->GetOperand(1));
629 regno_t resultNO = static_cast<RegOperand *>(resultOpnd)->GetRegisterNumber();
630 regno_t srcNO = static_cast<RegOperand *>(srcOpnd)->GetRegisterNumber();
631 if (!FindRegs(*resultOpnd, vecTargetRegs) && !FindRegs(*srcOpnd, vecTargetRegs) &&
632 !FindRegs(*srcOpnd, vecSourceRegs) && !FindRegs(*srcOpnd, vecReturnSourceRegs) &&
633 (srcNO < RLR || srcNO > RZR)) {
634 allow = true; /* allow on the conditional mov Rx,Rxx */
635 for (auto *exit : existingInsns) {
636 /* the registers of kOpdMem are complex to be detected */
637 for (uint32 o = 0; o < exit->GetOperandSize(); ++o) {
638 if (!exit->OpndIsUse(o)) {
639 continue;
640 }
641 Operand *opd = &(exit->GetOperand(o));
642 if (opd->IsList() || opd->IsMemoryAccessOperand()) {
643 allow = false;
644 break;
645 }
646 /* Distinguish between 32-bit regs and 64-bit regs */
647 if (opd->IsRegister() && static_cast<RegOperand *>(opd)->GetRegisterNumber() == resultNO &&
648 opd != resultOpnd) {
649 allow = false;
650 break;
651 }
652 }
653 }
654 }
655 /* replace the R2 with R1 */
656 if (allow) {
657 for (auto *exit : existingInsns) {
658 for (uint32 o = 0; o < exit->GetOperandSize(); ++o) {
659 if (!exit->OpndIsUse(o)) {
660 continue;
661 }
662 Operand *opd = &(exit->GetOperand(o));
663 if (opd->IsRegister() && (opd == resultOpnd)) {
664 exit->SetOperand(o, *srcOpnd);
665 }
666 }
667 }
668 }
669 }
670 if (!allow) { /* all result_dsts are not target register */
671 /* code sinking fails */
672 InsertInsnRegs(*insn, true, vecSourceRegs, true, vecTargetRegs);
673 existingInsns.push_back(insn);
674 } else {
675 moveInsns.push_back(insn);
676 }
677 }
678 if (pred->GetPreds().empty()) {
679 break;
680 }
681 if (!ifPred) {
682 for (auto *tmPred : pred->GetPreds()) {
683 pred = tmPred;
684 /* try to find the BB without branch */
685 if (tmPred->GetKind() == BB::kBBGoto || tmPred->GetKind() == BB::kBBFallthru) {
686 ifPred = false;
687 break;
688 } else {
689 ifPred = true;
690 }
691 }
692 }
693 } while (pred != nullptr);
694 for (std::set<regno_t>::iterator it = vecTargetRegs.begin(); it != vecTargetRegs.end(); ++it) {
695 if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(*it))) { /* flag register */
696 return false;
697 }
698 }
699 return !bl;
700 }
701
IsolateFastPath(BB & bb)702 BB *AArch64GenProEpilog::IsolateFastPath(BB &bb)
703 {
704 /*
705 * Detect "if (cond) return" fast path, and move extra instructions
706 * to the slow path.
707 * Must match the following block structure. BB1 can be a series of
708 * single-pred/single-succ blocks.
709 * BB1 ops1 cmp-br to BB3 BB1 cmp-br to BB3
710 * BB2 ops2 br to retBB ==> BB2 ret
711 * BB3 slow path BB3 ops1 ops2
712 * if the detect is successful, BB3 will be used to generate prolog stuff.
713 */
714 if (bb.GetPrev() != nullptr) {
715 return nullptr;
716 }
717 BB *ifBB = nullptr;
718 BB *returnBB = nullptr;
719 BB *coldBB = nullptr;
720 {
721 BB *curBB = &bb;
722 /* Look for straight line code */
723 while (1) {
724 if (!curBB->GetEhSuccs().empty()) {
725 return nullptr;
726 }
727 if (curBB->GetSuccs().size() == 1) {
728 if (curBB->HasCall()) {
729 return nullptr;
730 }
731 BB *succ = curBB->GetSuccs().front();
732 if (succ->GetPreds().size() != 1 || !succ->GetEhPreds().empty()) {
733 return nullptr;
734 }
735 curBB = succ;
736 } else if (curBB->GetKind() == BB::kBBIf) {
737 ifBB = curBB;
738 break;
739 } else {
740 return nullptr;
741 }
742 }
743 }
744 /* targets of if bb can only be reached by if bb */
745 {
746 CHECK_FATAL(!ifBB->GetSuccs().empty(), "null succs check!");
747 BB *first = ifBB->GetSuccs().front();
748 BB *second = ifBB->GetSuccs().back();
749 if (first->GetPreds().size() != 1 || !first->GetEhPreds().empty()) {
750 return nullptr;
751 }
752 if (second->GetPreds().size() != 1 || !second->GetEhPreds().empty()) {
753 return nullptr;
754 }
755 /* One target of the if bb jumps to a return bb */
756 if (first->GetKind() != BB::kBBGoto && first->GetKind() != BB::kBBFallthru) {
757 return nullptr;
758 }
759 if (first->GetSuccs().size() != 1) {
760 return nullptr;
761 }
762 if (first->GetSuccs().front()->GetKind() != BB::kBBReturn) {
763 return nullptr;
764 }
765 if (first->GetSuccs().front()->GetPreds().size() != 1) {
766 return nullptr;
767 }
768 if (first->GetSuccs().front()->NumInsn() > kInsnNum2) { /* avoid a insn is used to debug */
769 return nullptr;
770 }
771 if (second->GetSuccs().empty()) {
772 return nullptr;
773 }
774 returnBB = first;
775 coldBB = second;
776 }
777 /* Search backward looking for dependencies for the cond branch */
778 std::list<Insn *> existingInsns; /* the insns must be retained in the ifBB (and the return BB) */
779 std::list<Insn *> moveInsns; /* instructions to be moved to coldbb */
780 /*
781 * The control flow matches at this point.
782 * Make sure the SourceRegs of the insns in returnBB (vecReturnSourceReg) cannot be overwrite.
783 * the regs in insns have three forms: list, MemoryAccess, or Register.
784 */
785 CHECK_FATAL(returnBB != nullptr, "null ptr check");
786 std::set<regno_t> vecReturnSourceRegs;
787 FOR_BB_INSNS_REV(insn, returnBB) {
788 if (!insn->IsMachineInstruction()) {
789 continue;
790 }
791 if (insn->IsBranch() || insn->IsCall() || insn->IsStore() || insn->IsStorePair()) {
792 return nullptr;
793 }
794 InsertInsnRegs(*insn, true, vecReturnSourceRegs, false, vecReturnSourceRegs);
795 existingInsns.push_back(insn);
796 }
797 FOR_BB_INSNS_REV(insn, returnBB->GetSuccs().front()) {
798 if (!insn->IsMachineInstruction()) {
799 continue;
800 }
801 if (insn->IsBranch() || insn->IsCall() || insn->IsStore() || insn->IsStorePair()) {
802 return nullptr;
803 }
804 InsertInsnRegs(*insn, true, vecReturnSourceRegs, false, vecReturnSourceRegs);
805 existingInsns.push_back(insn);
806 }
807 /*
808 * The mv is the 1st move using the parameter register leading to the branch
809 * The ld is the load using the parameter register indirectly for the branch
810 * The depMv is the move which preserves the result of the load but might
811 * destroy a parameter register which will be moved below the branch.
812 */
813 bool fast = BackwardFindDependency(*ifBB, vecReturnSourceRegs, existingInsns, moveInsns);
814 /* move extra instructions to the slow path */
815 if (!fast) {
816 return nullptr;
817 }
818 for (auto in : moveInsns) {
819 in->GetBB()->RemoveInsn(*in);
820 CHECK_FATAL(coldBB != nullptr, "null ptr check");
821 static_cast<void>(coldBB->InsertInsnBegin(*in));
822 }
823 /* All instructions are in the right place, replace branch to ret bb to just ret. */
824 /* Remove the lastInsn of gotoBB */
825 if (returnBB->GetKind() == BB::kBBGoto) {
826 returnBB->RemoveInsn(*returnBB->GetLastInsn());
827 }
828 BB *tgtBB = returnBB->GetSuccs().front();
829 CHECK_FATAL(tgtBB != nullptr, "null ptr check");
830 FOR_BB_INSNS(insn, tgtBB) {
831 returnBB->AppendInsn(*insn); /* add the insns such as MOP_xret */
832 }
833 returnBB->AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_xret));
834 /* bb is now a retbb and has no succ. */
835 returnBB->SetKind(BB::kBBReturn);
836 auto predIt = std::find(tgtBB->GetPredsBegin(), tgtBB->GetPredsEnd(), returnBB);
837 tgtBB->ErasePreds(predIt);
838 tgtBB->ClearInsns();
839 returnBB->ClearSuccs();
840 if (tgtBB->GetPrev() != nullptr && tgtBB->GetNext() != nullptr) {
841 tgtBB->GetPrev()->SetNext(tgtBB->GetNext());
842 tgtBB->GetNext()->SetPrev(tgtBB->GetPrev());
843 }
844 SetFastPathReturnBB(tgtBB);
845 return coldBB;
846 }
847
SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc & cgFunc,const MemOperand & mo,uint32 bitLen,AArch64reg baseRegNum)848 MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, const MemOperand &mo,
849 uint32 bitLen, AArch64reg baseRegNum)
850 {
851 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
852 CHECK_FATAL(mo.GetAddrMode() == MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi");
853 OfstOperand *ofstOp = mo.GetOffsetImmediate();
854 int32 offsetVal = static_cast<int32>(ofstOp->GetOffsetValue());
855 CHECK_FATAL(offsetVal > 0, "offsetVal should be greater than 0");
856 CHECK_FATAL((static_cast<uint32>(offsetVal) & 0x7) == 0, "(offsetVal & 0x7) should be equal to 0");
857 /*
858 * Offset adjustment due to FP/SP has already been done
859 * in AArch64GenProEpilog::GeneratePushRegs() and AArch64GenProEpilog::GeneratePopRegs()
860 */
861 RegOperand &br = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(baseRegNum, bitLen, kRegTyInt);
862 if (aarchCGFunc.GetSplitBaseOffset() == 0) {
863 aarchCGFunc.SetSplitBaseOffset(offsetVal); /* remember the offset; don't forget to clear it */
864 ImmOperand &immAddEnd = aarchCGFunc.CreateImmOperand(offsetVal, k64BitSize, true);
865 RegOperand *origBaseReg = mo.GetBaseRegister();
866 aarchCGFunc.SelectAdd(br, *origBaseReg, immAddEnd, PTY_i64);
867 }
868 offsetVal = offsetVal - aarchCGFunc.GetSplitBaseOffset();
869 return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, offsetVal);
870 }
871
AppendInstructionPushPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)872 void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
873 int32 offset)
874 {
875 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
876 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
877 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
878 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
879 Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerSize() * kBitsPerByte);
880
881 uint32 dataSize = GetPointerSize() * kBitsPerByte;
882 CHECK_FATAL(offset >= 0, "offset must >= 0");
883 if (offset > kStpLdpImm64UpperBound) {
884 o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast<MemOperand *>(o2), dataSize, R16);
885 }
886 Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
887 std::string comment = "SAVE CALLEE REGISTER PAIR";
888 pushInsn.SetComment(comment);
889 AppendInstructionTo(pushInsn, cgFunc);
890
891 /* Append CFi code */
892 if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) {
893 int32 stackFrameSize =
894 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
895 stackFrameSize -= (static_cast<int32>(cgFunc.GetMemlayout()->SizeOfArgsToStackPass()) +
896 cgFunc.GetFunction().GetFrameReseverdSlot());
897 int32 cfiOffset = stackFrameSize - offset;
898 BB *curBB = cgFunc.GetCurBB();
899 Insn *newInsn = curBB->InsertInsnAfter(pushInsn, aarchCGFunc.CreateCfiOffsetInsn(reg0, -cfiOffset, k64BitSize));
900 curBB->InsertInsnAfter(*newInsn,
901 aarchCGFunc.CreateCfiOffsetInsn(reg1, -cfiOffset + kOffset8MemPos, k64BitSize));
902 }
903 }
904
AppendInstructionPushSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)905 void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
906 {
907 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
908 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle];
909 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerSize() * kBitsPerByte, rty);
910 Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerSize() * kBitsPerByte);
911
912 MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
913 uint32 dataSize = GetPointerSize() * kBitsPerByte;
914 if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
915 o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R9);
916 }
917
918 Insn &pushInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
919 std::string comment = "SAVE CALLEE REGISTER";
920 pushInsn.SetComment(comment);
921 AppendInstructionTo(pushInsn, cgFunc);
922
923 /* Append CFI code */
924 if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) {
925 int32 stackFrameSize =
926 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
927 stackFrameSize -= (static_cast<int32>(cgFunc.GetMemlayout()->SizeOfArgsToStackPass()) +
928 cgFunc.GetFunction().GetFrameReseverdSlot());
929 int32 cfiOffset = stackFrameSize - offset;
930 cgFunc.GetCurBB()->InsertInsnAfter(pushInsn, aarchCGFunc.CreateCfiOffsetInsn(reg, -cfiOffset, k64BitSize));
931 }
932 }
933
AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty,bool isAllocate)934 Insn &AArch64GenProEpilog::AppendInstructionForAllocateOrDeallocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
935 AArch64reg reg1, RegType rty,
936 bool isAllocate)
937 {
938 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
939 MOperator mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopPair];
940 uint8 size;
941 if (CGOptions::IsArm64ilp32()) {
942 size = k8ByteSize;
943 } else {
944 size = GetPointerSize();
945 }
946 if (fpToSpDistance <= kStrLdrImm64UpperBound - kOffset8MemPos) {
947 mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopSingle] : pushPopOps[kRegsPopOp][rty][kPushPopSingle];
948 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
949 MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), size * kBitsPerByte);
950 Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o2);
951 AppendInstructionTo(insn1, cgFunc);
952 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
953 o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance + size), size * kBitsPerByte);
954 Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o1, *o2);
955 AppendInstructionTo(insn2, cgFunc);
956 return insn2;
957 } else {
958 RegOperand &oo = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, size * kBitsPerByte, kRegTyInt);
959 ImmOperand &io1 = aarchCGFunc.CreateImmOperand(fpToSpDistance, k64BitSize, true);
960 aarchCGFunc.SelectCopyImm(oo, io1, PTY_i64);
961 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty);
962 RegOperand &rsp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, size * kBitsPerByte, kRegTyInt);
963 MemOperand *mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
964 Insn &insn1 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o0, *mo);
965 AppendInstructionTo(insn1, cgFunc);
966 ImmOperand &io2 = aarchCGFunc.CreateImmOperand(size, k64BitSize, true);
967 aarchCGFunc.SelectAdd(oo, oo, io2, PTY_i64);
968 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty);
969 mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0);
970 Insn &insn2 = cgFunc.GetInsnBuilder()->BuildInsn(isAllocate ? MOP_xstr : MOP_xldr, o1, *mo);
971 AppendInstructionTo(insn2, cgFunc);
972 return insn2;
973 }
974 }
975
CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance,AArch64reg reg0,AArch64reg reg1,RegType rty)976 Insn &AArch64GenProEpilog::CreateAndAppendInstructionForAllocateCallFrame(int64 fpToSpDistance, AArch64reg reg0,
977 AArch64reg reg1, RegType rty)
978 {
979 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
980 CG *currCG = cgFunc.GetCG();
981 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
982 Insn *allocInsn = nullptr;
983 if (fpToSpDistance > kStpLdpImm64UpperBound) {
984 allocInsn = &AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, true);
985 } else {
986 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
987 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
988 Operand *o2 =
989 aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerSize() * kBitsPerByte);
990 allocInsn = &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
991 AppendInstructionTo(*allocInsn, cgFunc);
992 }
993 if (currCG->NeedInsertInstrumentationFunction()) {
994 aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction());
995 } else if (currCG->InstrumentWithDebugTraceCall()) {
996 aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
997 } else if (currCG->InstrumentWithProfile()) {
998 aarchCGFunc.AppendCall(*currCG->GetProfileFunction());
999 }
1000 return *allocInsn;
1001 }
1002
AppendInstructionAllocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)1003 void AArch64GenProEpilog::AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
1004 {
1005 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1006 CG *currCG = cgFunc.GetCG();
1007 if (currCG->GenerateVerboseCG()) {
1008 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
1009 }
1010
1011 Insn *ipoint = nullptr;
1012 /*
1013 * stackFrameSize includes the size of args to stack-pass
1014 * if a function has neither VLA nor alloca.
1015 */
1016 int32 stackFrameSize =
1017 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1018 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1019 /*
1020 * ldp/stp's imm should be within -512 and 504;
1021 * if stp's imm > 512, we fall back to the stp-sub version
1022 */
1023 bool useStpSub = false;
1024 int64 offset = 0;
1025 int32 cfiOffset = 0;
1026 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1027 /*
1028 * stack_frame_size == size of formal parameters + callee-saved (including FP/RL)
1029 * + size of local vars
1030 * + size of actuals
1031 * (when passing more than 8 args, its caller's responsibility to
1032 * allocate space for it. size of actuals represent largest such size in the function.
1033 */
1034 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1035 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1036 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1037 ipoint = cgFunc.GetCurBB()->GetLastInsn();
1038 cfiOffset = stackFrameSize;
1039 } else {
1040 if (stackFrameSize > kStpLdpImm64UpperBound) {
1041 useStpSub = true;
1042 offset = kOffset16MemPos;
1043 stackFrameSize -= offset;
1044 } else {
1045 offset = stackFrameSize;
1046 }
1047 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
1048 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1049 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1050 MemOperand &o2 =
1051 aarchCGFunc.CreateCallFrameOperand(static_cast<int32>(-offset), GetPointerSize() * kBitsPerByte);
1052 ipoint = &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2);
1053 AppendInstructionTo(*ipoint, cgFunc);
1054 cfiOffset = offset;
1055 if (currCG->NeedInsertInstrumentationFunction()) {
1056 aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction());
1057 } else if (currCG->InstrumentWithDebugTraceCall()) {
1058 aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
1059 } else if (currCG->InstrumentWithProfile()) {
1060 aarchCGFunc.AppendCall(*currCG->GetProfileFunction());
1061 }
1062 }
1063
1064 ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint);
1065
1066 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1067 CHECK_FATAL(!useStpSub, "Invalid assumption");
1068 ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
1069 }
1070
1071 if (useStpSub) {
1072 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1073 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1074 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1075 ipoint = cgFunc.GetCurBB()->GetLastInsn();
1076 aarchCGFunc.SetUsedStpSubPairForCallFrameAllocation(true);
1077 }
1078
1079 CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
1080 int32 cfiOffsetSecond = 0;
1081 if (useStpSub) {
1082 cfiOffsetSecond = stackFrameSize;
1083 ipoint = InsertCFIDefCfaOffset(cfiOffsetSecond, *ipoint);
1084 }
1085 cfiOffsetSecond = GetOffsetFromCFA();
1086 if (!cgFunc.HasVLAOrAlloca()) {
1087 cfiOffsetSecond -= fpToSpDistance;
1088 }
1089 if (cgFunc.GenCfi()) {
1090 BB *curBB = cgFunc.GetCurBB();
1091 if (useFP) {
1092 ipoint = curBB->InsertInsnAfter(
1093 *ipoint, aarchCGFunc.CreateCfiOffsetInsn(stackBaseReg, -cfiOffsetSecond, k64BitSize));
1094 }
1095 curBB->InsertInsnAfter(*ipoint,
1096 aarchCGFunc.CreateCfiOffsetInsn(RLR, -cfiOffsetSecond + kOffset8MemPos, k64BitSize));
1097 }
1098 }
1099
AppendInstructionAllocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)1100 void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
1101 {
1102 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1103 CG *currCG = cgFunc.GetCG();
1104 if (currCG->GenerateVerboseCG()) {
1105 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame for debugging"));
1106 }
1107
1108 int32 stackFrameSize =
1109 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1110 int64 fpToSpDistance =
1111 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
1112
1113 Insn *ipoint = nullptr;
1114 int32 cfiOffset = 0;
1115
1116 if (fpToSpDistance > 0) {
1117 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1118 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1119 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1120 ipoint = cgFunc.GetCurBB()->GetLastInsn();
1121 cfiOffset = stackFrameSize;
1122 (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint);
1123 if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
1124 fpToSpDistance -= (kDivide2 * k8ByteSize);
1125 }
1126 ipoint = &CreateAndAppendInstructionForAllocateCallFrame(fpToSpDistance, reg0, reg1, rty);
1127 CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
1128 cfiOffset = GetOffsetFromCFA();
1129 cfiOffset -= fpToSpDistance;
1130 } else {
1131 bool useStpSub = false;
1132
1133 if (stackFrameSize > kStpLdpImm64UpperBound) {
1134 useStpSub = true;
1135 RegOperand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1136 ImmOperand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1137 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1138 ipoint = cgFunc.GetCurBB()->GetLastInsn();
1139 cfiOffset = stackFrameSize;
1140 ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint);
1141 } else {
1142 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
1143 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1144 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1145 MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-stackFrameSize, GetPointerSize() * kBitsPerByte);
1146 ipoint = &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2);
1147 AppendInstructionTo(*ipoint, cgFunc);
1148 cfiOffset = stackFrameSize;
1149 ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint);
1150 }
1151
1152 if (useStpSub) {
1153 MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair];
1154 RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1155 RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1156 MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, 0, GetPointerSize() * kBitsPerByte);
1157 ipoint = &cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1158 AppendInstructionTo(*ipoint, cgFunc);
1159 }
1160
1161 if (currCG->NeedInsertInstrumentationFunction()) {
1162 aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction());
1163 } else if (currCG->InstrumentWithDebugTraceCall()) {
1164 aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction());
1165 } else if (currCG->InstrumentWithProfile()) {
1166 aarchCGFunc.AppendCall(*currCG->GetProfileFunction());
1167 }
1168
1169 CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point");
1170 cfiOffset = GetOffsetFromCFA();
1171 }
1172 if (cgFunc.GenCfi()) {
1173 BB *curBB = cgFunc.GetCurBB();
1174 if (useFP) {
1175 ipoint =
1176 curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(stackBaseReg, -cfiOffset, k64BitSize));
1177 }
1178 curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(RLR, -cfiOffset + kOffset8MemPos, k64BitSize));
1179 }
1180 }
1181
1182 /*
1183 * From AArch64 Reference Manual
1184 * C1.3.3 Load/Store Addressing Mode
1185 * ...
1186 * When stack alignment checking is enabled by system software and
1187 * the base register is the SP, the current stack pointer must be
1188 * initially quadword aligned, that is aligned to 16 bytes. Misalignment
1189 * generates a Stack Alignment fault. The offset does not have to
1190 * be a multiple of 16 bytes unless the specific Load/Store instruction
1191 * requires this. SP cannot be used as a register offset.
1192 */
GeneratePushRegs()1193 void AArch64GenProEpilog::GeneratePushRegs()
1194 {
1195 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1196 CG *currCG = cgFunc.GetCG();
1197 const MapleVector<AArch64reg> ®sToSave =
1198 (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
1199
1200 CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?");
1201
1202 AArch64reg intRegFirstHalf = kRinvalid;
1203 AArch64reg fpRegFirstHalf = kRinvalid;
1204
1205 if (currCG->GenerateVerboseCG()) {
1206 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("save callee-saved registers"));
1207 }
1208
1209 /*
1210 * Even if we don't use RFP, since we push a pair of registers in one instruction
1211 * and the stack needs be aligned on a 16-byte boundary, push RFP as well if function has a call
1212 * Make sure this is reflected when computing callee_saved_regs.size()
1213 */
1214 if (!currCG->GenerateDebugFriendlyCode()) {
1215 AppendInstructionAllocateCallFrame(R29, RLR, kRegTyInt);
1216 } else {
1217 AppendInstructionAllocateCallFrameDebug(R29, RLR, kRegTyInt);
1218 }
1219
1220 if (useFP) {
1221 if (currCG->GenerateVerboseCG()) {
1222 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
1223 }
1224 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1225 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
1226 int64 fpToSpDistance =
1227 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
1228 bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
1229 if ((fpToSpDistance > 0) || isLmbc) {
1230 Operand *immOpnd;
1231 if (isLmbc) {
1232 int32 size =
1233 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1234 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
1235 } else {
1236 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
1237 }
1238 if (!isLmbc || cgFunc.SeenFP() || cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1239 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
1240 }
1241 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
1242 if (cgFunc.GenCfi()) {
1243 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(
1244 stackBaseReg,
1245 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() - fpToSpDistance,
1246 k64BitSize));
1247 }
1248 } else {
1249 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
1250 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
1251 if (cgFunc.GenCfi()) {
1252 cgFunc.GetCurBB()->AppendInsn(
1253 cgFunc.GetInsnBuilder()
1254 ->BuildCfiInsn(cfi::OP_CFI_def_cfa_register)
1255 .AddOpndChain(aarchCGFunc.CreateCfiRegOperand(stackBaseReg, k64BitSize)));
1256 }
1257 }
1258 }
1259
1260 MapleVector<AArch64reg>::const_iterator it = regsToSave.begin();
1261 /* skip the first two registers */
1262 CHECK_FATAL(*it == RFP, "The first callee saved reg is expected to be RFP");
1263 ++it;
1264 CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
1265 ++it;
1266
1267 AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1268 int32 offset;
1269 if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
1270 offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
1271 memLayout->GetSizeOfLocals());
1272 } else {
1273 offset = (static_cast<int32>(memLayout->RealStackFrameSize() -
1274 (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen))) - /* for FP/LR */
1275 memLayout->SizeOfArgsToStackPass() -
1276 cgFunc.GetFunction().GetFrameReseverdSlot());
1277 }
1278
1279 if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) {
1280 offset -= kAarch64StackPtrAlignment;
1281 }
1282
1283 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1284 /* GR/VR save areas are above the callee save area */
1285 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1286 auto saveareasize = static_cast<int32>(RoundUp(ml->GetSizeOfGRSaveArea(), GetPointerSize() * k2BitSize) +
1287 RoundUp(ml->GetSizeOfVRSaveArea(), GetPointerSize() * k2BitSize));
1288 offset -= saveareasize;
1289 }
1290
1291 std::vector<std::pair<uint16, int32>> calleeRegAndOffsetVec;
1292 for (; it != regsToSave.end(); ++it) {
1293 AArch64reg reg = *it;
1294 CHECK_FATAL(reg != RFP, "stray RFP in callee_saved_list?");
1295 CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
1296 RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
1297 AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
1298 if (firstHalf == kRinvalid) {
1299 /* remember it */
1300 firstHalf = reg;
1301 /* for int callee-saved register: x19->19,x20->20 ...
1302 for float callee-saved register: d8->72, d9->73 ..., d15->79
1303 */
1304 uint16 regNO = (regType == kRegTyInt) ? static_cast<uint16>(reg - 1) : static_cast<uint16>(reg - V8 + 72);
1305 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset));
1306 } else {
1307 uint16 regNO = (regType == kRegTyInt) ? static_cast<uint16>(reg - 1) : static_cast<uint16>(reg - V8 + 72);
1308 calleeRegAndOffsetVec.push_back(std::pair<uint16, int32>(regNO, offset + k8ByteSize));
1309 AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset);
1310 GetNextOffsetCalleeSaved(offset);
1311 firstHalf = kRinvalid;
1312 }
1313 }
1314
1315 if (intRegFirstHalf != kRinvalid) {
1316 AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
1317 GetNextOffsetCalleeSaved(offset);
1318 }
1319
1320 if (fpRegFirstHalf != kRinvalid) {
1321 AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
1322 GetNextOffsetCalleeSaved(offset);
1323 }
1324
1325 /*
1326 * in case we split stp/ldp instructions,
1327 * so that we generate a load-into-base-register instruction
1328 * for pop pairs as well.
1329 */
1330 aarchCGFunc.SetSplitBaseOffset(0);
1331
1332 const auto &emitMemoryManager = CGOptions::GetInstance().GetEmitMemoryManager();
1333 if (emitMemoryManager.codeSpace != nullptr) {
1334 emitMemoryManager.funcCalleeOffsetSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), calleeRegAndOffsetVec);
1335 int64 fpToCurSpDistance =
1336 (cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot());
1337 int32 fp2PrevFrameSPDelta =
1338 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() - fpToCurSpDistance;
1339 emitMemoryManager.funcFpSPDeltaSaver(emitMemoryManager.codeSpace, cgFunc.GetName(), fp2PrevFrameSPDelta);
1340 }
1341 }
1342
GeneratePushUnnamedVarargRegs()1343 void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs()
1344 {
1345 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1346 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1347 AArch64MemLayout *memlayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1348 uint8 size;
1349 if (CGOptions::IsArm64ilp32()) {
1350 size = k8ByteSize;
1351 } else {
1352 size = GetPointerSize();
1353 }
1354 uint32 dataSizeBits = size * kBitsPerByte;
1355 uint32 offset;
1356 if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1357 offset = static_cast<uint32>(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */
1358 if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) {
1359 offset += size; /* End of area should be aligned. Hole between VR and GR area */
1360 }
1361 } else {
1362 offset = (UINT32_MAX - memlayout->GetSizeOfGRSaveArea()) + 1; /* FP reference */
1363 if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) {
1364 offset -= size;
1365 }
1366 }
1367 uint32 grSize = (UINT32_MAX - offset) + 1;
1368 uint32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size);
1369 DEBUG_ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area");
1370 for (uint32 i = start_regno + static_cast<uint32>(R0); i < static_cast<uint32>(R8); i++) {
1371 uint32 tmpOffset = 0;
1372 if (CGOptions::IsBigEndian()) {
1373 if ((dataSizeBits >> k8BitShift) < k8BitSize) {
1374 tmpOffset += k8BitSize - (dataSizeBits >> k8BitShift);
1375 }
1376 }
1377 Operand *stackLoc;
1378 if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1379 stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits);
1380 } else {
1381 stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size);
1382 }
1383 RegOperand ® =
1384 aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(i), k64BitSize, kRegTyInt);
1385 Insn &inst =
1386 cgFunc.GetInsnBuilder()->BuildInsn(aarchCGFunc.PickStInsn(dataSizeBits, PTY_i64), reg, *stackLoc);
1387 cgFunc.GetCurBB()->AppendInsn(inst);
1388 offset += size;
1389 }
1390 if (!CGOptions::UseGeneralRegOnly()) {
1391 if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1392 offset = static_cast<uint32>(memlayout->GetVRSaveAreaBaseLoc());
1393 } else {
1394 offset = (UINT32_MAX - (memlayout->GetSizeOfVRSaveArea() + grSize)) + 1;
1395 }
1396 start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize));
1397 DEBUG_ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area");
1398 for (uint32 i = start_regno + static_cast<uint32>(V0); i < static_cast<uint32>(V8); i++) {
1399 uint32 tmpOffset = 0;
1400 if (CGOptions::IsBigEndian()) {
1401 if ((dataSizeBits >> k8BitShift) < k16BitSize) {
1402 tmpOffset += k16BitSize - (dataSizeBits >> k8BitShift);
1403 }
1404 }
1405 Operand *stackLoc;
1406 if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1407 stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits);
1408 } else {
1409 stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size);
1410 }
1411 RegOperand ® =
1412 aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(i), k64BitSize, kRegTyFloat);
1413 Insn &inst =
1414 cgFunc.GetInsnBuilder()->BuildInsn(aarchCGFunc.PickStInsn(dataSizeBits, PTY_f64), reg, *stackLoc);
1415 cgFunc.GetCurBB()->AppendInsn(inst);
1416 offset += (size * k2BitSize);
1417 }
1418 }
1419 }
1420 }
1421
AppendInstructionStackCheck(AArch64reg reg,RegType rty,int32 offset)1422 void AArch64GenProEpilog::AppendInstructionStackCheck(AArch64reg reg, RegType rty, int32 offset)
1423 {
1424 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1425 CG *currCG = cgFunc.GetCG();
1426 /* sub x16, sp, #0x2000 */
1427 auto &x16Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, rty);
1428 auto &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, rty);
1429 auto &imm1 = aarchCGFunc.CreateImmOperand(offset, k64BitSize, true);
1430 aarchCGFunc.SelectSub(x16Opnd, spOpnd, imm1, PTY_u64);
1431
1432 /* ldr wzr, [x16] */
1433 auto &wzr = cgFunc.GetZeroOpnd(k32BitSize);
1434 auto &refX16 = aarchCGFunc.CreateMemOpnd(reg, 0, k64BitSize);
1435 auto &soeInstr = cgFunc.GetInsnBuilder()->BuildInsn(MOP_wldr, wzr, refX16);
1436 if (currCG->GenerateVerboseCG()) {
1437 soeInstr.SetComment("soerror");
1438 }
1439 soeInstr.SetDoNotRemove(true);
1440 AppendInstructionTo(soeInstr, cgFunc);
1441 }
1442
GenerateProlog(BB & bb)1443 void AArch64GenProEpilog::GenerateProlog(BB &bb)
1444 {
1445 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1446 CG *currCG = cgFunc.GetCG();
1447 BB *formerCurBB = cgFunc.GetCurBB();
1448 aarchCGFunc.GetDummyBB()->ClearInsns();
1449 aarchCGFunc.GetDummyBB()->SetIsProEpilog(true);
1450 cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
1451 if (!cgFunc.GetHasProEpilogue()) {
1452 return;
1453 }
1454
1455 // insert .loc for function
1456 if (currCG->GetCGOptions().WithLoc() &&
1457 (!currCG->GetMIRModule()->IsCModule() || currCG->GetMIRModule()->IsWithDbgInfo())) {
1458 MIRFunction *func = &cgFunc.GetFunction();
1459 MIRSymbol *fSym = GlobalTables::GetGsymTable().GetSymbolFromStidx(func->GetStIdx().Idx());
1460 if (currCG->GetCGOptions().WithSrc()) {
1461 uint32 tempmaxsize = static_cast<uint32>(currCG->GetMIRModule()->GetSrcFileInfo().size());
1462 uint32 endfilenum = currCG->GetMIRModule()->GetSrcFileInfo()[tempmaxsize - 1].second;
1463 if (fSym->GetSrcPosition().FileNum() != 0 && fSym->GetSrcPosition().FileNum() <= endfilenum) {
1464 Operand *o0 = cgFunc.CreateDbgImmOperand(fSym->GetSrcPosition().FileNum());
1465 int64_t lineNum = fSym->GetSrcPosition().LineNum();
1466 if (lineNum == 0) {
1467 if (cgFunc.GetFunction().GetAttr(FUNCATTR_native)) {
1468 lineNum = 0xffffe;
1469 } else {
1470 lineNum = 0xffffd;
1471 }
1472 }
1473 Operand *o1 = cgFunc.CreateDbgImmOperand(lineNum);
1474 Insn &loc =
1475 cgFunc.GetInsnBuilder()->BuildDbgInsn(mpldbg::OP_DBG_loc).AddOpndChain(*o0).AddOpndChain(*o1);
1476 cgFunc.GetCurBB()->AppendInsn(loc);
1477 }
1478 } else {
1479 Operand *o0 = cgFunc.CreateDbgImmOperand(1);
1480 Operand *o1 = cgFunc.CreateDbgImmOperand(fSym->GetSrcPosition().MplLineNum());
1481 Insn &loc = cgFunc.GetInsnBuilder()->BuildDbgInsn(mpldbg::OP_DBG_loc).AddOpndChain(*o0).AddOpndChain(*o1);
1482 cgFunc.GetCurBB()->AppendInsn(loc);
1483 }
1484 }
1485
1486 const MapleVector<AArch64reg> ®sToSave =
1487 (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
1488 if (!regsToSave.empty()) {
1489 /*
1490 * Among other things, push the FP & LR pair.
1491 * FP/LR are added to the callee-saved list in AllocateRegisters()
1492 * We add them to the callee-saved list regardless of UseFP() being true/false.
1493 * Activation Frame is allocated as part of pushing FP/LR pair
1494 */
1495 GeneratePushRegs();
1496 } else {
1497 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1498 int32 stackFrameSize =
1499 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1500 if (stackFrameSize > 0) {
1501 if (currCG->GenerateVerboseCG()) {
1502 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame"));
1503 }
1504 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1505 aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64);
1506
1507 int32 offset = stackFrameSize;
1508 (void)InsertCFIDefCfaOffset(offset, *(cgFunc.GetCurBB()->GetLastInsn()));
1509 }
1510 if (currCG->GenerateVerboseCG()) {
1511 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP"));
1512 }
1513 if (useFP) {
1514 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
1515 bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc;
1516 int64 fpToSpDistance =
1517 cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1518 if ((fpToSpDistance > 0) || isLmbc) {
1519 Operand *immOpnd;
1520 if (isLmbc) {
1521 int32 size = static_cast<int32>(
1522 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1523 immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true);
1524 } else {
1525 immOpnd = &aarchCGFunc.CreateImmOperand(fpToSpDistance, k32BitSize, true);
1526 }
1527 aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64);
1528 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
1529 if (cgFunc.GenCfi()) {
1530 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(
1531 stackBaseReg,
1532 static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() - fpToSpDistance,
1533 k64BitSize));
1534 }
1535 } else {
1536 aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64);
1537 cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true);
1538 if (cgFunc.GenCfi()) {
1539 cgFunc.GetCurBB()->AppendInsn(
1540 cgFunc.GetInsnBuilder()
1541 ->BuildCfiInsn(cfi::OP_CFI_def_cfa_register)
1542 .AddOpndChain(aarchCGFunc.CreateCfiRegOperand(stackBaseReg, k64BitSize)));
1543 }
1544 }
1545 }
1546 }
1547 GeneratePushUnnamedVarargRegs();
1548 if (currCG->DoCheckSOE()) {
1549 AppendInstructionStackCheck(R16, kRegTyInt, kSoeChckOffset);
1550 }
1551 bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB());
1552 cgFunc.SetCurBB(*formerCurBB);
1553 aarchCGFunc.GetDummyBB()->SetIsProEpilog(false);
1554 }
1555
GenerateRet(BB & bb)1556 void AArch64GenProEpilog::GenerateRet(BB &bb)
1557 {
1558 bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_xret));
1559 }
1560
1561 /*
1562 * If all the preds of exitBB made the TailcallOpt(replace blr/bl with br/b), return true, we don't create ret insn.
1563 * Otherwise, return false, create the ret insn.
1564 */
TestPredsOfRetBB(const BB & exitBB)1565 bool AArch64GenProEpilog::TestPredsOfRetBB(const BB &exitBB)
1566 {
1567 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1568 if (cgFunc.GetMirModule().IsCModule() &&
1569 (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || ml->GetSizeOfLocals() > 0 || cgFunc.HasVLAOrAlloca())) {
1570 return false;
1571 }
1572 for (auto tmpBB : exitBB.GetPreds()) {
1573 Insn *firstInsn = tmpBB->GetFirstInsn();
1574 if ((firstInsn == nullptr || tmpBB->IsCommentBB()) && (!tmpBB->GetPreds().empty())) {
1575 if (!TestPredsOfRetBB(*tmpBB)) {
1576 return false;
1577 }
1578 } else {
1579 Insn *lastInsn = tmpBB->GetLastInsn();
1580 if (lastInsn == nullptr) {
1581 return false;
1582 }
1583 MOperator insnMop = lastInsn->GetMachineOpcode();
1584 if (insnMop != MOP_tail_call_opt_xbl && insnMop != MOP_tail_call_opt_xblr) {
1585 return false;
1586 }
1587 }
1588 }
1589 return true;
1590 }
1591
AppendInstructionPopSingle(CGFunc & cgFunc,AArch64reg reg,RegType rty,int32 offset)1592 void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset)
1593 {
1594 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1595 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle];
1596 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, GetPointerSize() * kBitsPerByte, rty);
1597 Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerSize() * kBitsPerByte);
1598 MemOperand *aarchMemO1 = static_cast<MemOperand *>(o1);
1599 uint32 dataSize = GetPointerSize() * kBitsPerByte;
1600 if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) {
1601 o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R9);
1602 }
1603
1604 Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, *o1);
1605 popInsn.SetComment("RESTORE");
1606 cgFunc.GetCurBB()->AppendInsn(popInsn);
1607
1608 /* Append CFI code. */
1609 if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) {
1610 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg, k64BitSize));
1611 }
1612 }
1613
AppendInstructionPopPair(CGFunc & cgFunc,AArch64reg reg0,AArch64reg reg1,RegType rty,int32 offset)1614 void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty,
1615 int32 offset)
1616 {
1617 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1618 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
1619 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1620 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1621 Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast<uint32>(offset), GetPointerSize() * kBitsPerByte);
1622
1623 uint32 dataSize = GetPointerSize() * kBitsPerByte;
1624 CHECK_FATAL(offset >= 0, "offset must >= 0");
1625 if (offset > kStpLdpImm64UpperBound) {
1626 o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, static_cast<MemOperand &>(*o2), dataSize, R16);
1627 }
1628 Insn &popInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1629 popInsn.SetComment("RESTORE RESTORE");
1630 cgFunc.GetCurBB()->AppendInsn(popInsn);
1631
1632 /* Append CFI code */
1633 if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) {
1634 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg0, k64BitSize));
1635 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg1, k64BitSize));
1636 }
1637 }
1638
AppendInstructionDeallocateCallFrame(AArch64reg reg0,AArch64reg reg1,RegType rty)1639 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty)
1640 {
1641 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1642 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
1643 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1644 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1645 int32 stackFrameSize =
1646 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1647 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1648 /*
1649 * ldp/stp's imm should be within -512 and 504;
1650 * if ldp's imm > 504, we fall back to the ldp-add version
1651 */
1652 bool useLdpAdd = false;
1653 int32 offset = 0;
1654
1655 Operand *o2 = nullptr;
1656 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1657 o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerSize() * kBitsPerByte);
1658 } else {
1659 if (stackFrameSize > kStpLdpImm64UpperBound) {
1660 useLdpAdd = true;
1661 offset = kOffset16MemPos;
1662 stackFrameSize -= offset;
1663 } else {
1664 offset = stackFrameSize;
1665 }
1666 o2 = &aarchCGFunc.CreateCallFrameOperand(offset, GetPointerSize() * kBitsPerByte);
1667 }
1668
1669 if (useLdpAdd) {
1670 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1671 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1672 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1673 if (cgFunc.GenCfi()) {
1674 int64 cfiOffset = GetOffsetFromCFA();
1675 BB *curBB = cgFunc.GetCurBB();
1676 curBB->InsertInsnAfter(*(curBB->GetLastInsn()),
1677 aarchCGFunc.CreateCfiDefCfaInsn(RSP, cfiOffset - stackFrameSize, k64BitSize));
1678 }
1679 }
1680
1681 if (!cgFunc.HasVLAOrAlloca() && fpToSpDistance > 0) {
1682 CHECK_FATAL(!useLdpAdd, "Invalid assumption");
1683 if (fpToSpDistance > kStpLdpImm64UpperBound) {
1684 (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
1685 } else {
1686 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1687 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1688 }
1689 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1690 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1691 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1692 } else {
1693 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1694 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1695 }
1696
1697 if (cgFunc.GenCfi()) {
1698 /* Append CFI restore */
1699 if (useFP) {
1700 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize));
1701 }
1702 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize));
1703 }
1704 }
1705
AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0,AArch64reg reg1,RegType rty)1706 void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty)
1707 {
1708 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1709 MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair];
1710 Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, GetPointerSize() * kBitsPerByte, rty);
1711 Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, GetPointerSize() * kBitsPerByte, rty);
1712 int32 stackFrameSize =
1713 static_cast<int32>(static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize());
1714 int64 fpToSpDistance = cgFunc.GetMemlayout()->SizeOfArgsToStackPass() + cgFunc.GetFunction().GetFrameReseverdSlot();
1715 /*
1716 * ldp/stp's imm should be within -512 and 504;
1717 * if ldp's imm > 504, we fall back to the ldp-add version
1718 */
1719 bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc);
1720 if (cgFunc.HasVLAOrAlloca() || fpToSpDistance == 0 || isLmbc) {
1721 int lmbcOffset = 0;
1722 if (!isLmbc) {
1723 stackFrameSize -= fpToSpDistance;
1724 } else {
1725 lmbcOffset = fpToSpDistance - (kDivide2 * k8ByteSize);
1726 }
1727 if (stackFrameSize > kStpLdpImm64UpperBound || isLmbc) {
1728 Operand *o2;
1729 o2 = aarchCGFunc.CreateStackMemOpnd(RSP, (isLmbc ? lmbcOffset : 0), GetPointerSize() * kBitsPerByte);
1730 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1731 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1732 if (cgFunc.GenCfi()) {
1733 /* Append CFI restore */
1734 if (useFP) {
1735 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize));
1736 }
1737 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize));
1738 }
1739 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1740 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1741 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1742 } else {
1743 MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(stackFrameSize, GetPointerSize() * kBitsPerByte);
1744 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, o2);
1745 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1746 if (cgFunc.GenCfi()) {
1747 if (useFP) {
1748 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize));
1749 }
1750 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize));
1751 }
1752 }
1753 } else {
1754 Operand *o2;
1755 o2 = aarchCGFunc.CreateStackMemOpnd(RSP, static_cast<int32>(fpToSpDistance), GetPointerSize() * kBitsPerByte);
1756 if (fpToSpDistance > kStpLdpImm64UpperBound) {
1757 (void)AppendInstructionForAllocateOrDeallocateCallFrame(fpToSpDistance, reg0, reg1, rty, false);
1758 } else {
1759 Insn &deallocInsn = cgFunc.GetInsnBuilder()->BuildInsn(mOp, o0, o1, *o2);
1760 cgFunc.GetCurBB()->AppendInsn(deallocInsn);
1761 }
1762
1763 if (cgFunc.GenCfi()) {
1764 if (useFP) {
1765 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize));
1766 }
1767 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize));
1768 }
1769 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1770 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1771 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1772 }
1773 }
1774
GeneratePopRegs()1775 void AArch64GenProEpilog::GeneratePopRegs()
1776 {
1777 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1778 CG *currCG = cgFunc.GetCG();
1779
1780 const MapleVector<AArch64reg> ®sToRestore =
1781 (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
1782
1783 CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?");
1784
1785 AArch64reg intRegFirstHalf = kRinvalid;
1786 AArch64reg fpRegFirstHalf = kRinvalid;
1787
1788 if (currCG->GenerateVerboseCG()) {
1789 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("restore callee-saved registers"));
1790 }
1791
1792 MapleVector<AArch64reg>::const_iterator it = regsToRestore.begin();
1793 /*
1794 * Even if we don't use FP, since we push a pair of registers
1795 * in a single instruction (i.e., stp) and the stack needs be aligned
1796 * on a 16-byte boundary, push FP as well if the function has a call.
1797 * Make sure this is reflected when computing calleeSavedRegs.size()
1798 * skip the first two registers
1799 */
1800 CHECK_FATAL(*it == RFP, "The first callee saved reg is expected to be RFP");
1801 ++it;
1802 CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR");
1803 ++it;
1804
1805 AArch64MemLayout *memLayout = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1806 int32 offset;
1807 if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) {
1808 offset = static_cast<int32>((memLayout->RealStackFrameSize() - aarchCGFunc.SizeOfCalleeSaved()) -
1809 memLayout->GetSizeOfLocals());
1810 } else {
1811 offset = (static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize() -
1812 (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen))) - /* for FP/LR */
1813 memLayout->SizeOfArgsToStackPass() -
1814 cgFunc.GetFunction().GetFrameReseverdSlot();
1815 }
1816
1817 if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) {
1818 offset -= kAarch64StackPtrAlignment;
1819 }
1820
1821 if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) {
1822 /* GR/VR save areas are above the callee save area */
1823 AArch64MemLayout *ml = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout());
1824 auto saveareasize = static_cast<int32>(RoundUp(ml->GetSizeOfGRSaveArea(), GetPointerSize() * k2BitSize) +
1825 RoundUp(ml->GetSizeOfVRSaveArea(), GetPointerSize() * k2BitSize));
1826 offset -= saveareasize;
1827 }
1828
1829 /*
1830 * We are using a cleared dummy block; so insertPoint cannot be ret;
1831 * see GenerateEpilog()
1832 */
1833 for (; it != regsToRestore.end(); ++it) {
1834 AArch64reg reg = *it;
1835 CHECK_FATAL(reg != RFP, "stray RFP in callee_saved_list?");
1836 CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?");
1837
1838 RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat;
1839 AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf;
1840 if (firstHalf == kRinvalid) {
1841 /* remember it */
1842 firstHalf = reg;
1843 } else {
1844 /* flush the pair */
1845 AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset);
1846 GetNextOffsetCalleeSaved(offset);
1847 firstHalf = kRinvalid;
1848 }
1849 }
1850
1851 if (intRegFirstHalf != kRinvalid) {
1852 AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset);
1853 GetNextOffsetCalleeSaved(offset);
1854 }
1855
1856 if (fpRegFirstHalf != kRinvalid) {
1857 AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset);
1858 GetNextOffsetCalleeSaved(offset);
1859 }
1860
1861 if (!currCG->GenerateDebugFriendlyCode()) {
1862 AppendInstructionDeallocateCallFrame(R29, RLR, kRegTyInt);
1863 } else {
1864 AppendInstructionDeallocateCallFrameDebug(R29, RLR, kRegTyInt);
1865 }
1866
1867 if (cgFunc.GenCfi()) {
1868 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(RSP, 0, k64BitSize));
1869 }
1870 /*
1871 * in case we split stp/ldp instructions,
1872 * so that we generate a load-into-base-register instruction
1873 * for the next function, maybe? (seems not necessary, but...)
1874 */
1875 aarchCGFunc.SetSplitBaseOffset(0);
1876 }
1877
AppendJump(const MIRSymbol & funcSymbol)1878 void AArch64GenProEpilog::AppendJump(const MIRSymbol &funcSymbol)
1879 {
1880 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1881 Operand &targetOpnd = aarchCGFunc.GetOrCreateFuncNameOpnd(funcSymbol);
1882 cgFunc.GetCurBB()->AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn(MOP_xuncond, targetOpnd));
1883 }
1884
GenerateEpilog(BB & bb)1885 void AArch64GenProEpilog::GenerateEpilog(BB &bb)
1886 {
1887 if (!cgFunc.GetHasProEpilogue()) {
1888 if (bb.GetPreds().empty() || !TestPredsOfRetBB(bb)) {
1889 GenerateRet(bb);
1890 }
1891 return;
1892 }
1893
1894 /* generate stack protected instruction */
1895 BB &epilogBB = GenStackGuardCheckInsn(bb);
1896
1897 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1898 CG *currCG = cgFunc.GetCG();
1899 BB *formerCurBB = cgFunc.GetCurBB();
1900 aarchCGFunc.GetDummyBB()->ClearInsns();
1901 aarchCGFunc.GetDummyBB()->SetIsProEpilog(true);
1902 cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB());
1903
1904 Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt);
1905 Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt);
1906
1907 if (cgFunc.HasVLAOrAlloca() && cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) {
1908 aarchCGFunc.SelectCopy(spOpnd, PTY_u64, fpOpnd, PTY_u64);
1909 }
1910
1911 /* Hack: exit bb should always be reachable, since we need its existance for ".cfi_remember_state" */
1912 if (&epilogBB != cgFunc.GetLastBB() && epilogBB.GetNext() != nullptr) {
1913 BB *nextBB = epilogBB.GetNext();
1914 do {
1915 if (nextBB == cgFunc.GetLastBB() || !nextBB->IsEmpty()) {
1916 break;
1917 }
1918 nextBB = nextBB->GetNext();
1919 } while (nextBB != nullptr);
1920 if (nextBB != nullptr && !nextBB->IsEmpty() && cgFunc.GenCfi()) {
1921 cgFunc.GetCurBB()->AppendInsn(cgFunc.GetInsnBuilder()->BuildCfiInsn(cfi::OP_CFI_remember_state));
1922 cgFunc.GetCurBB()->SetHasCfi();
1923 nextBB->InsertInsnBefore(*nextBB->GetFirstInsn(),
1924 cgFunc.GetInsnBuilder()->BuildCfiInsn(cfi::OP_CFI_restore_state));
1925 nextBB->SetHasCfi();
1926 }
1927 }
1928
1929 const MapleVector<AArch64reg> ®sToSave =
1930 (!CGOptions::DoRegSavesOpt()) ? aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs();
1931 if (!regsToSave.empty()) {
1932 GeneratePopRegs();
1933 } else {
1934 auto stackFrameSize = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->RealStackFrameSize();
1935 if (stackFrameSize > 0) {
1936 if (currCG->GenerateVerboseCG()) {
1937 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("pop up activation frame"));
1938 }
1939
1940 if (cgFunc.HasVLAOrAlloca()) {
1941 auto size = static_cast<AArch64MemLayout *>(cgFunc.GetMemlayout())->GetSegArgsToStkPass().GetSize();
1942 stackFrameSize = stackFrameSize < size ? 0 : stackFrameSize - size;
1943 }
1944
1945 if (stackFrameSize > 0) {
1946 Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true);
1947 aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64);
1948 if (cgFunc.GenCfi()) {
1949 cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(RSP, 0, k64BitSize));
1950 }
1951 }
1952 }
1953 }
1954
1955 if (currCG->InstrumentWithDebugTraceCall()) {
1956 AppendJump(*(currCG->GetDebugTraceExitFunction()));
1957 }
1958
1959 GenerateRet(*(cgFunc.GetCurBB()));
1960 epilogBB.AppendBBInsns(*cgFunc.GetCurBB());
1961 if (cgFunc.GetCurBB()->GetHasCfi()) {
1962 epilogBB.SetHasCfi();
1963 }
1964
1965 cgFunc.SetCurBB(*formerCurBB);
1966 aarchCGFunc.GetDummyBB()->SetIsProEpilog(false);
1967 }
1968
GenerateEpilogForCleanup(BB & bb)1969 void AArch64GenProEpilog::GenerateEpilogForCleanup(BB &bb)
1970 {
1971 auto &aarchCGFunc = static_cast<AArch64CGFunc &>(cgFunc);
1972 CHECK_FATAL(!cgFunc.GetExitBBsVec().empty(), "exit bb size is zero!");
1973 if (cgFunc.GetExitBB(0)->IsUnreachable()) {
1974 /* if exitbb is unreachable then exitbb can not be generated */
1975 GenerateEpilog(bb);
1976 } else if (aarchCGFunc.NeedCleanup()) { /* bl to the exit epilogue */
1977 LabelOperand &targetOpnd = aarchCGFunc.GetOrCreateLabelOperand(cgFunc.GetExitBB(0)->GetLabIdx());
1978 bb.AppendInsn(cgFunc.GetInsnBuilder()->BuildInsn(MOP_xuncond, targetOpnd));
1979 }
1980 }
1981
ConvertToTailCalls(MapleSet<Insn * > & callInsnsMap)1982 void AArch64GenProEpilog::ConvertToTailCalls(MapleSet<Insn *> &callInsnsMap)
1983 {
1984 BB *exitBB = GetCurTailcallExitBB();
1985
1986 /* ExitBB is filled only by now. If exitBB has restore of SP indicating extra stack space has
1987 been allocated, such as a function call with more than 8 args, argument with large aggr etc */
1988 FOR_BB_INSNS(insn, exitBB) {
1989 if (insn->GetMachineOpcode() == MOP_xaddrri12 || insn->GetMachineOpcode() == MOP_xaddrri24) {
1990 RegOperand ® = static_cast<RegOperand &>(insn->GetOperand(0));
1991 if (reg.GetRegisterNumber() == RSP) {
1992 return;
1993 }
1994 }
1995 }
1996
1997 /* Replace all of the call insns. */
1998 for (Insn *callInsn : callInsnsMap) {
1999 MOperator insnMop = callInsn->GetMachineOpcode();
2000 switch (insnMop) {
2001 case MOP_xbl: {
2002 callInsn->SetMOP(AArch64CG::kMd[MOP_tail_call_opt_xbl]);
2003 break;
2004 }
2005 case MOP_xblr: {
2006 callInsn->SetMOP(AArch64CG::kMd[MOP_tail_call_opt_xblr]);
2007 break;
2008 }
2009 default:
2010 CHECK_FATAL(false, "Internal error.");
2011 break;
2012 }
2013 BB *bb = callInsn->GetBB();
2014 if (bb->GetKind() == BB::kBBGoto) {
2015 bb->SetKind(BB::kBBFallthru);
2016 if (bb->GetLastInsn()->GetMachineOpcode() == MOP_xuncond) {
2017 bb->RemoveInsn(*bb->GetLastInsn());
2018 }
2019 }
2020 for (auto sBB : bb->GetSuccs()) {
2021 bb->RemoveSuccs(*sBB);
2022 sBB->RemovePreds(*bb);
2023 break;
2024 }
2025 }
2026
2027 /* copy instrs from exit block */
2028 for (Insn *callInsn : callInsnsMap) {
2029 BB *toBB = callInsn->GetBB();
2030 BB *fromBB = exitBB;
2031 if (toBB == fromBB) {
2032 /* callsite also in the return exit block, just change the return to branch */
2033 Insn *lastInsn = toBB->GetLastInsn();
2034 if (lastInsn->GetMachineOpcode() == MOP_xret) {
2035 Insn *newInsn = cgFunc.GetTheCFG()->CloneInsn(*callInsn);
2036 toBB->ReplaceInsn(*lastInsn, *newInsn);
2037 for (Insn *insn = callInsn->GetNextMachineInsn(); insn != newInsn; insn = insn->GetNextMachineInsn()) {
2038 insn->SetDoNotRemove(true);
2039 }
2040 toBB->RemoveInsn(*callInsn);
2041 return;
2042 }
2043 CHECK_FATAL(0, "Tailcall in incorrect block");
2044 }
2045 FOR_BB_INSNS_SAFE(insn, fromBB, next) {
2046 if (insn->IsCfiInsn() || (insn->IsMachineInstruction() && insn->GetMachineOpcode() != MOP_xret)) {
2047 Insn *newInsn = cgFunc.GetTheCFG()->CloneInsn(*insn);
2048 newInsn->SetDoNotRemove(true);
2049 toBB->InsertInsnBefore(*callInsn, *newInsn);
2050 }
2051 }
2052 }
2053
2054 /* remove instrs in exit block */
2055 BB *bb = exitBB;
2056 if (bb->GetPreds().size() > 0) {
2057 return; /* exit block still needed by other non-tailcall blocks */
2058 }
2059 Insn &junk = cgFunc.GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_pseudo_none);
2060 bb->AppendInsn(junk);
2061 FOR_BB_INSNS_SAFE(insn, bb, next) {
2062 if (insn->GetMachineOpcode() != MOP_pseudo_none) {
2063 bb->RemoveInsn(*insn);
2064 }
2065 }
2066 }
2067
Run()2068 void AArch64GenProEpilog::Run()
2069 {
2070 CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label,
2071 "The first statement should be a label");
2072 NeedStackProtect();
2073 cgFunc.SetHasProEpilogue(NeedProEpilog());
2074 if (cgFunc.GetHasProEpilogue()) {
2075 GenStackGuard(*(cgFunc.GetFirstBB()));
2076 }
2077 BB *proLog = nullptr;
2078 if (cgFunc.GetCG()->DoPrologueEpilogue() && Globals::GetInstance()->GetOptimLevel() == CGOptions::kLevel2) {
2079 /* There are some O2 dependent assumptions made */
2080 proLog = IsolateFastPath(*(cgFunc.GetFirstBB()));
2081 }
2082
2083 if (cgFunc.IsExitBBsVecEmpty()) {
2084 if (cgFunc.GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc.GetCleanupLabel() &&
2085 cgFunc.GetLastBB()->GetPrev()->GetPrev()) {
2086 cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev()->GetPrev());
2087 } else {
2088 cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev());
2089 }
2090 }
2091
2092 if (proLog != nullptr) {
2093 GenerateProlog(*proLog);
2094 proLog->SetFastPath(true);
2095 cgFunc.GetFirstBB()->SetFastPath(true);
2096 } else {
2097 GenerateProlog(*(cgFunc.GetFirstBB()));
2098 }
2099
2100 for (auto *exitBB : cgFunc.GetExitBBsVec()) {
2101 if (GetFastPathReturnBB() != exitBB) {
2102 GenerateEpilog(*exitBB);
2103 }
2104 }
2105
2106 if (cgFunc.GetFunction().IsJava()) {
2107 GenerateEpilogForCleanup(*(cgFunc.GetCleanupBB()));
2108 }
2109
2110 if (cgFunc.GetMirModule().IsCModule() && !exitBB2CallSitesMap.empty()) {
2111 cgFunc.GetTheCFG()->InitInsnVisitor(cgFunc);
2112 for (auto pair : exitBB2CallSitesMap) {
2113 BB *curExitBB = pair.first;
2114 MapleSet<Insn *> &callInsnsMap = pair.second;
2115 SetCurTailcallExitBB(curExitBB);
2116 ConvertToTailCalls(callInsnsMap);
2117 }
2118 }
2119 }
2120 } /* namespace maplebe */
2121