1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "aarch64_fixshortbranch.h"
17 #include "cg.h"
18 #include "mpl_logging.h"
19 #include "common_utils.h"
20
21 namespace maplebe {
CalculateAlignRange(const BB & bb,uint32 addr) const22 uint32 AArch64FixShortBranch::CalculateAlignRange(const BB &bb, uint32 addr) const
23 {
24 if (addr == 0) {
25 return addr;
26 }
27 uint32 alignPower = bb.GetAlignPower();
28 /*
29 * The algorithm can avoid the problem that alignment causes conditional branch out of range in two stages.
30 * 1. asm: .mpl -> .s
31 * The pseudo-instruction [.p2align 5] is 12B.
32 * kAlignPseudoSize = 12 / 4 = 3
33 * 2. link: .s -> .o
34 * The pseudo-instruction will be expanded to nop.
35 * eg. .p2align 5
36 * alignPower = 5, alignValue = 2^5 = 32
37 * range = (32 - ((addr - 1) * 4) % 32) / 4 - 1
38 *
39 * =======> max[range, kAlignPseudoSize]
40 */
41 uint32 range = ((1U << alignPower) - (((addr - 1) * kInsnSize) & ((1U << alignPower) - 1))) / kInsnSize - 1;
42 return range > kAlignPseudoSize ? range : kAlignPseudoSize;
43 }
44
SetInsnId() const45 void AArch64FixShortBranch::SetInsnId() const
46 {
47 uint32 i = 0;
48 AArch64CGFunc *aarch64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
49 FOR_ALL_BB(bb, aarch64CGFunc)
50 {
51 if (aarch64CGFunc->GetMirModule().IsCModule() && bb->IsBBNeedAlign() && bb->GetAlignNopNum() != 0) {
52 i = i + CalculateAlignRange(*bb, i);
53 }
54 FOR_BB_INSNS(insn, bb)
55 {
56 if (!insn->IsMachineInstruction()) {
57 continue;
58 }
59 i += insn->GetAtomicNum();
60 insn->SetId(i);
61 if (insn->GetMachineOpcode() == MOP_adrp_ldr && CGOptions::IsLazyBinding() &&
62 !cgFunc->GetCG()->IsLibcore()) {
63 /* For 1 additional EmitLazyBindingRoutine in lazybinding
64 * see function AArch64Insn::Emit in file aarch64_insn.cpp
65 */
66 ++i;
67 }
68 }
69 }
70 }
71
CalculateIfBBNum() const72 uint32 AArch64FixShortBranch::CalculateIfBBNum() const
73 {
74 uint32 ifBBCount = 0;
75 FOR_ALL_BB(bb, cgFunc)
76 {
77 if (bb->GetKind() != BB::kBBIf) {
78 ifBBCount++;
79 }
80 }
81 return ifBBCount;
82 }
83
PatchLongBranch()84 void AArch64FixShortBranch::PatchLongBranch()
85 {
86 AArch64CGFunc *aarch64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
87 SetInsnId();
88 uint32 ifBBCount = CalculateIfBBNum();
89 for (BB *bb = aarch64CGFunc->GetFirstBB(); bb != nullptr; bb = bb->GetNext()) {
90 if (bb->GetKind() != BB::kBBIf) {
91 continue;
92 }
93 Insn *insn = bb->GetLastMachineInsn();
94 while (insn != nullptr && insn->IsImmaterialInsn()) {
95 insn = insn->GetPrev();
96 }
97 if (insn == nullptr || !insn->IsCondBranch()) {
98 continue;
99 }
100 LabelIdx tbbLabelIdx = aarch64CGFunc->GetLabelInInsn(*insn);
101 // when we change condbr to condbr and b, we will have more insns
102 // in case the insn num will cause distance check not calculating right
103 // we assume that each if bb will be changed(which is the worst case).
104 if (ifBBCount <= AArch64Abi::kMaxInstrForCondBr &&
105 aarch64CGFunc->DistanceCheck(*bb, tbbLabelIdx, insn->GetId(), AArch64Abi::kMaxInstrForCondBr - ifBBCount)) {
106 continue;
107 }
108 aarch64CGFunc->InsertJumpPad(insn);
109 }
110 }
111 /*
112 * TBZ/TBNZ instruction is generated under -O2, these branch instructions only have a range of +/-32KB.
113 * If the branch target is not reachable, we split tbz/tbnz into combination of ubfx and cbz/cbnz, which
114 * will clobber one extra register. With LSRA under -O2, we can use one of the reserved registers R16 for
115 * that purpose. To save compile time, we do this change when there are more than 32KB / 4 instructions
116 * in the function.
117 */
FixShortBranches() const118 void AArch64FixShortBranch::FixShortBranches() const
119 {
120 AArch64CGFunc *aarch64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
121 bool change = false;
122 do {
123 change = false;
124 SetInsnId();
125 for (auto *bb = aarch64CGFunc->GetFirstBB(); bb != nullptr && !change; bb = bb->GetNext()) {
126 /* Do a backward scan searching for short branches */
127 for (auto *insn = bb->GetLastInsn(); insn != nullptr && !change; insn = insn->GetPrev()) {
128 if (!insn->IsMachineInstruction()) {
129 continue;
130 }
131 MOperator thisMop = insn->GetMachineOpcode();
132 if (thisMop != MOP_wtbz && thisMop != MOP_wtbnz && thisMop != MOP_xtbz && thisMop != MOP_xtbnz) {
133 continue;
134 }
135 LabelOperand &label = static_cast<LabelOperand &>(insn->GetOperand(kInsnThirdOpnd));
136 /* should not be commented out after bug fix */
137 if (aarch64CGFunc->DistanceCheck(*bb, label.GetLabelIndex(), insn->GetId(),
138 AArch64Abi::kMaxInstrForTbnz)) {
139 continue;
140 }
141 auto ® = static_cast<RegOperand &>(insn->GetOperand(kInsnFirstOpnd));
142 ImmOperand &bitSize = aarch64CGFunc->CreateImmOperand(1, k8BitSize, false);
143 auto &bitPos = static_cast<ImmOperand &>(insn->GetOperand(kInsnSecondOpnd));
144 MOperator ubfxOp = MOP_undef;
145 MOperator cbOp = MOP_undef;
146 switch (thisMop) {
147 case MOP_wtbz:
148 ubfxOp = MOP_wubfxrri5i5;
149 cbOp = MOP_wcbz;
150 break;
151 case MOP_wtbnz:
152 ubfxOp = MOP_wubfxrri5i5;
153 cbOp = MOP_wcbnz;
154 break;
155 case MOP_xtbz:
156 ubfxOp = MOP_xubfxrri6i6;
157 cbOp = MOP_xcbz;
158 break;
159 case MOP_xtbnz:
160 ubfxOp = MOP_xubfxrri6i6;
161 cbOp = MOP_xcbnz;
162 break;
163 default:
164 CHECK_FATAL_FALSE("must be");
165 break;
166 }
167 RegOperand &tmp = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(
168 R16, (ubfxOp == MOP_wubfxrri5i5) ? k32BitSize : k64BitSize, kRegTyInt);
169 (void)bb->InsertInsnAfter(*insn, cgFunc->GetInsnBuilder()->BuildInsn(cbOp, tmp, label));
170 (void)bb->InsertInsnAfter(*insn,
171 cgFunc->GetInsnBuilder()->BuildInsn(ubfxOp, tmp, reg, bitPos, bitSize));
172 bb->RemoveInsn(*insn);
173 change = true;
174 }
175 }
176 } while (change);
177 }
178
GetLabelIdx(const Insn & insn)179 uint32 GetLabelIdx(const Insn &insn)
180 {
181 uint32 res = 0;
182 uint32 foundCount = 0;
183 for (uint32 i = 0; i < insn.GetOperandSize(); ++i) {
184 Operand &opnd = insn.GetOperand(i);
185 if (opnd.GetKind() == Operand::kOpdBBAddress) {
186 res = i;
187 foundCount++;
188 }
189 }
190 CHECK_FATAL(foundCount == 1, "check case");
191 return res;
192 }
193
FixShortBranchesForSplitting()194 void AArch64FixShortBranch::FixShortBranchesForSplitting()
195 {
196 InitSecEnd();
197 FOR_ALL_BB(bb, cgFunc)
198 {
199 FOR_BB_INSNS(insn, bb)
200 {
201 if (!insn->IsMachineInstruction()) {
202 continue;
203 }
204 if (insn->IsCondBranch()) {
205 CHECK_FATAL(bb->GetKind() == BB::kBBIf, "CHECK bb TYPE");
206 uint32 targetLabelIdx = GetLabelIdx(*insn);
207 CHECK_FATAL(targetLabelIdx != 0, "get label failed in condition branch insn");
208 auto &targetLabelOpnd = dynamic_cast<LabelOperand &>(insn->GetOperand(targetLabelIdx));
209 BB *targetBB = cgFunc->GetBBFromLab2BBMap(targetLabelOpnd.GetLabelIndex());
210 if (!targetBB) {
211 LogInfo::MapleLogger() << "ISSUE Func : " << cgFunc->GetName()
212 << " ISSUE label: " << targetLabelOpnd.GetLabelIndex() << "\n";
213 CHECK_FATAL_FALSE("get Target bb from lab2bb map failed");
214 }
215 bool crossBoundary = bb->IsInColdSection() != targetBB->IsInColdSection();
216 if (!crossBoundary) {
217 continue;
218 }
219 InsertJmpPadAtSecEnd(*insn, targetLabelIdx, *targetBB);
220 }
221 }
222 }
223 }
224
InsertJmpPadAtSecEnd(Insn & insn,uint32 targetLabelIdx,BB & targetBB)225 void AArch64FixShortBranch::InsertJmpPadAtSecEnd(Insn &insn, uint32 targetLabelIdx, BB &targetBB)
226 {
227 BB *bb = insn.GetBB();
228 BB *padBB = cgFunc->CreateNewBB();
229 LabelIdx padBBLabel = cgFunc->CreateLabel();
230 padBB->SetLabIdx(padBBLabel);
231 cgFunc->SetLab2BBMap(padBBLabel, *padBB);
232
233 auto &targetLabelOpnd = dynamic_cast<LabelOperand &>(insn.GetOperand(targetLabelIdx));
234 padBB->AppendInsn(cgFunc->GetInsnBuilder()->BuildInsn(MOP_xuncond, targetLabelOpnd));
235
236 LabelOperand &padBBLabelOpnd = cgFunc->GetOrCreateLabelOperand(padBBLabel);
237 insn.SetOperand(targetLabelIdx, padBBLabelOpnd);
238
239 /* adjust CFG */
240 bb->ReplaceSucc(targetBB, *padBB);
241 targetBB.RemovePreds(*bb);
242 targetBB.PushBackPreds(*padBB);
243 padBB->PushBackPreds(*bb);
244 padBB->PushBackSuccs(targetBB);
245 /* adjust layout
246 * hot section end -- boundary bb
247 * cold section end -- last bb */
248 if (!bb->IsInColdSection()) {
249 padBB->SetNext(boundaryBB);
250 padBB->SetPrev(boundaryBB->GetPrev());
251 boundaryBB->GetPrev()->SetNext(padBB);
252 boundaryBB->SetPrev(padBB);
253 boundaryBB = padBB;
254 } else {
255 CHECK_FATAL(lastBB->GetNext() == nullptr, "must be");
256 lastBB->SetNext(padBB);
257 padBB->SetNext(nullptr);
258 padBB->SetPrev(lastBB);
259 lastBB = padBB;
260 padBB->SetColdSection();
261 }
262 }
263
InitSecEnd()264 void AArch64FixShortBranch::InitSecEnd()
265 {
266 FOR_ALL_BB(bb, cgFunc)
267 {
268 if (bb->IsInColdSection() && boundaryBB == nullptr) {
269 boundaryBB = bb;
270 }
271 if (bb->GetNext() == nullptr) {
272 CHECK_FATAL(lastBB == nullptr, " last bb exist");
273 lastBB = bb;
274 }
275 }
276 }
277
CheckFunctionSize(uint32 maxSize) const278 bool AArch64FixShortBranch::CheckFunctionSize(uint32 maxSize) const
279 {
280 uint32 firstInsnId = 0;
281 uint32 lastInsnId = UINT32_MAX;
282 bool findLast = false;
283 bool findFirst = false;
284
285 for (auto *bb = cgFunc->GetLastBB(); bb != nullptr && !findLast; bb = bb->GetPrev()) {
286 for (auto *insn = bb->GetLastInsn(); insn != nullptr && !findLast; insn = insn->GetPrev()) {
287 if (!insn->IsMachineInstruction() || insn->IsImmaterialInsn()) {
288 continue;
289 }
290 findLast = true;
291 lastInsnId = insn->GetId();
292 break;
293 }
294 }
295
296 for (auto *bb = cgFunc->GetFirstBB(); bb != nullptr && !findFirst; bb = bb->GetNext()) {
297 for (auto *insn = bb->GetFirstInsn(); insn != nullptr && !findFirst; insn = insn->GetNext()) {
298 if (!insn->IsMachineInstruction() || insn->IsImmaterialInsn()) {
299 continue;
300 }
301 findFirst = true;
302 firstInsnId = insn->GetId();
303 break;
304 }
305 }
306 return (lastInsnId - firstInsnId + 1) <= maxSize;
307 }
308
309 // when func size >= kMaxInstrForLdr
310 // ldr R1, .L.4__5
311 // .L_x: ...
312 // =>
313 // adrp x1, .L.4__5
314 // add x1, x1, :lo12:.L.4__5
315 // ldr x1, [x1]
FixLdr()316 void AArch64FixShortBranch::FixLdr()
317 {
318 AArch64CGFunc *aarch64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
319 SetInsnId();
320 if (CheckFunctionSize(AArch64Abi::kMaxInstrForLdr)) {
321 return;
322 }
323 FOR_ALL_BB(bb, cgFunc)
324 {
325 FOR_BB_INSNS(insn, bb)
326 {
327 if (!insn->IsMachineInstruction()) {
328 continue;
329 }
330 if (insn->GetMachineOpcode() == MOP_xldli && insn->GetOperand(kInsnSecondOpnd).IsLabelOpnd()) {
331 // ldr -> adrp + add
332 auto ®Opnd = static_cast<RegOperand &>(insn->GetOperand(kInsnFirstOpnd));
333 auto &labelOpnd = static_cast<LabelOperand &>(insn->GetOperand(kInsnSecondOpnd));
334 Operand &immOpnd = aarch64CGFunc->CreateImmOperand(labelOpnd.GetLabelIndex(), k64BitSize, false);
335 insn->SetOperand(kInsnSecondOpnd, immOpnd);
336 insn->SetMOP(AArch64CG::kMd[MOP_adrp_label]);
337 // ldr x1, [x1]
338 MemOperand *newDest = aarch64CGFunc->CreateMemOperand(
339 k64BitSize, regOpnd, aarch64CGFunc->CreateImmOperand(0, k32BitSize, false), false);
340 auto *newRegOpnd = static_cast<RegOperand *>(regOpnd.Clone(*aarch64CGFunc->GetMemoryPool()));
341 Insn &ldrInsn = aarch64CGFunc->GetInsnBuilder()->BuildInsn(MOP_xldr, *newRegOpnd, *newDest);
342 (void)bb->InsertInsnAfter(*insn, ldrInsn);
343 }
344 }
345 }
346 }
347
PhaseRun(maplebe::CGFunc & f)348 bool CgFixShortBranch::PhaseRun(maplebe::CGFunc &f)
349 {
350 auto *fixShortBranch = GetPhaseAllocator()->New<AArch64FixShortBranch>(&f);
351 CHECK_FATAL(fixShortBranch != nullptr, "AArch64FixShortBranch instance create failure");
352 fixShortBranch->FixShortBranches();
353
354 // fix ldr would cause insn num increasing, do ldr fix first.
355 fixShortBranch->FixLdr();
356 fixShortBranch->PatchLongBranch();
357 return false;
358 }
359 MAPLE_TRANSFORM_PHASE_REGISTER(CgFixShortBranch, fixshortbranch)
360 } /* namespace maplebe */
361