1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "aarch64_alignment.h"
17 #include "insn.h"
18 #include "loop.h"
19 #include "aarch64_cg.h"
20 #include "cg_option.h"
21 #include <unordered_map>
22
23 namespace maplebe {
FindLoopHeader()24 void AArch64AlignAnalysis::FindLoopHeader()
25 {
26 MapleVector<CGFuncLoops *> loops = aarFunc->GetLoops();
27 if (loops.empty()) {
28 return;
29 }
30 for (const auto *loop : loops) {
31 const BB *header = loop->GetHeader();
32 if (header != nullptr) {
33 InsertLoopHeaderBBs(const_cast<BB &>(*header));
34 }
35 }
36 }
37
FindJumpTarget()38 void AArch64AlignAnalysis::FindJumpTarget()
39 {
40 MapleUnorderedMap<LabelIdx, BB *> label2BBMap = aarFunc->GetLab2BBMap();
41 if (label2BBMap.empty()) {
42 return;
43 }
44 for (auto &iter : label2BBMap) {
45 BB *jumpBB = iter.second;
46 if (jumpBB != nullptr) {
47 InsertJumpTargetBBs(*jumpBB);
48 }
49 }
50 }
51
IsIncludeCall(BB & bb)52 bool AArch64AlignAnalysis::IsIncludeCall(BB &bb)
53 {
54 return bb.HasCall();
55 }
56
IsInSizeRange(BB & bb)57 bool AArch64AlignAnalysis::IsInSizeRange(BB &bb)
58 {
59 uint64 size = 0;
60 FOR_BB_INSNS_CONST(insn, &bb) {
61 if (!insn->IsMachineInstruction() || insn->GetMachineOpcode() == MOP_pseudo_ret_int ||
62 insn->GetMachineOpcode() == MOP_pseudo_ret_float) {
63 continue;
64 }
65 size += kAlignInsnLength;
66 }
67 BB *curBB = &bb;
68 while (curBB->GetNext() != nullptr && curBB->GetNext()->GetLabIdx() == 0) {
69 FOR_BB_INSNS_CONST(insn, curBB->GetNext()) {
70 if (!insn->IsMachineInstruction() || insn->GetMachineOpcode() == MOP_pseudo_ret_int ||
71 insn->GetMachineOpcode() == MOP_pseudo_ret_float) {
72 continue;
73 }
74 size += kAlignInsnLength;
75 }
76 curBB = curBB->GetNext();
77 }
78 AArch64AlignInfo targetInfo;
79 if (CGOptions::GetAlignMinBBSize() == 0 || CGOptions::GetAlignMaxBBSize() == 0) {
80 return false;
81 }
82 constexpr uint32 defaultMinBBSize = 16;
83 constexpr uint32 defaultMaxBBSize = 44;
84 targetInfo.alignMinBBSize = (CGOptions::OptimizeForSize()) ? defaultMinBBSize : CGOptions::GetAlignMinBBSize();
85 targetInfo.alignMaxBBSize = (CGOptions::OptimizeForSize()) ? defaultMaxBBSize : CGOptions::GetAlignMaxBBSize();
86 if (size <= targetInfo.alignMinBBSize || size >= targetInfo.alignMaxBBSize) {
87 return false;
88 }
89 return true;
90 }
91
HasFallthruEdge(BB & bb)92 bool AArch64AlignAnalysis::HasFallthruEdge(BB &bb)
93 {
94 for (auto *iter : bb.GetPreds()) {
95 if (iter == bb.GetPrev()) {
96 return true;
97 }
98 }
99 return false;
100 }
101
ComputeLoopAlign()102 void AArch64AlignAnalysis::ComputeLoopAlign()
103 {
104 if (loopHeaderBBs.empty()) {
105 return;
106 }
107 for (BB *bb : loopHeaderBBs) {
108 if (bb == cgFunc->GetFirstBB() || IsIncludeCall(*bb) || !IsInSizeRange(*bb)) {
109 continue;
110 }
111 bb->SetNeedAlign(true);
112 if (CGOptions::GetLoopAlignPow() == 0) {
113 return;
114 }
115 AArch64AlignInfo targetInfo;
116 targetInfo.loopAlign = CGOptions::GetLoopAlignPow();
117 if (alignInfos.find(bb) == alignInfos.end()) {
118 alignInfos[bb] = targetInfo.loopAlign;
119 } else {
120 uint32 curPower = alignInfos[bb];
121 alignInfos[bb] = (targetInfo.loopAlign < curPower) ? targetInfo.loopAlign : curPower;
122 }
123 bb->SetAlignPower(alignInfos[bb]);
124 }
125 }
126
ComputeJumpAlign()127 void AArch64AlignAnalysis::ComputeJumpAlign()
128 {
129 if (jumpTargetBBs.empty()) {
130 return;
131 }
132 for (BB *bb : jumpTargetBBs) {
133 if (bb == cgFunc->GetFirstBB() || !IsInSizeRange(*bb) || HasFallthruEdge(*bb)) {
134 continue;
135 }
136 bb->SetNeedAlign(true);
137 if (CGOptions::GetJumpAlignPow() == 0) {
138 return;
139 }
140 AArch64AlignInfo targetInfo;
141 targetInfo.jumpAlign = (CGOptions::OptimizeForSize()) ? kOffsetAlignmentOf64Bit : CGOptions::GetJumpAlignPow();
142 if (alignInfos.find(bb) == alignInfos.end()) {
143 alignInfos[bb] = targetInfo.jumpAlign;
144 } else {
145 uint32 curPower = alignInfos[bb];
146 alignInfos[bb] = (targetInfo.jumpAlign < curPower) ? targetInfo.jumpAlign : curPower;
147 }
148 bb->SetAlignPower(alignInfos[bb]);
149 }
150 }
151
GetAlignRange(uint32 alignedVal,uint32 addr) const152 uint32 AArch64AlignAnalysis::GetAlignRange(uint32 alignedVal, uint32 addr) const
153 {
154 if (addr == 0) {
155 return addr;
156 }
157 uint32 range = (alignedVal - (((addr - 1) * kInsnSize) & (alignedVal - 1))) / kInsnSize - 1;
158 return range;
159 }
160
IsInSameAlignedRegion(uint32 addr1,uint32 addr2,uint32 alignedRegionSize) const161 bool AArch64AlignAnalysis::IsInSameAlignedRegion(uint32 addr1, uint32 addr2, uint32 alignedRegionSize) const
162 {
163 return (((addr1 - 1) * kInsnSize) / alignedRegionSize) == (((addr2 - 1) * kInsnSize) / alignedRegionSize);
164 }
165
MarkCondBranchAlign()166 bool AArch64AlignAnalysis::MarkCondBranchAlign()
167 {
168 sameTargetBranches.clear();
169 uint32 addr = 0;
170 bool change = false;
171 FOR_ALL_BB(bb, aarFunc) {
172 if (bb != nullptr && bb->IsBBNeedAlign()) {
173 uint32 alignedVal = (1U << bb->GetAlignPower());
174 uint32 alignNopNum = GetAlignRange(alignedVal, addr);
175 addr += alignNopNum;
176 bb->SetAlignNopNum(alignNopNum);
177 }
178 FOR_BB_INSNS(insn, bb) {
179 if (!insn->IsMachineInstruction()) {
180 continue;
181 }
182 addr += insn->GetAtomicNum();
183 MOperator mOp = insn->GetMachineOpcode();
184 if ((mOp == MOP_wtbz || mOp == MOP_wtbnz || mOp == MOP_xtbz || mOp == MOP_xtbnz) && insn->IsNeedSplit()) {
185 ++addr;
186 }
187 if (!insn->IsCondBranch() || insn->GetOperandSize() == 0) {
188 insn->SetAddress(addr);
189 continue;
190 }
191 Operand &opnd = insn->GetOperand(insn->GetOperandSize() - 1);
192 if (!opnd.IsLabelOpnd()) {
193 insn->SetAddress(addr);
194 continue;
195 }
196 LabelIdx targetIdx = static_cast<LabelOperand &>(opnd).GetLabelIndex();
197 if (sameTargetBranches.find(targetIdx) == sameTargetBranches.end()) {
198 sameTargetBranches[targetIdx] = addr;
199 insn->SetAddress(addr);
200 continue;
201 }
202 uint32 sameTargetAddr = sameTargetBranches[targetIdx];
203 uint32 alignedRegionSize = 1 << kAlignRegionPower;
204 /**
205 * if two branches jump to the same target and their addresses are within an 16byte aligned region,
206 * add a certain number of [nop] to move them out of the region.
207 */
208 if (IsInSameAlignedRegion(sameTargetAddr, addr, alignedRegionSize)) {
209 uint32 nopNum = GetAlignRange(alignedRegionSize, addr) + 1;
210 nopNum = nopNum > kAlignMaxNopNum ? 0 : nopNum;
211 if (nopNum == 0) {
212 break;
213 }
214 change = true;
215 insn->SetNopNum(nopNum);
216 for (uint32 i = 0; i < nopNum; i++) {
217 addr += insn->GetAtomicNum();
218 }
219 } else {
220 insn->SetNopNum(0);
221 }
222 sameTargetBranches[targetIdx] = addr;
223 insn->SetAddress(addr);
224 }
225 }
226 return change;
227 }
228
UpdateInsnId()229 void AArch64AlignAnalysis::UpdateInsnId()
230 {
231 uint32 id = 0;
232 FOR_ALL_BB(bb, aarFunc) {
233 if (bb != nullptr && bb->IsBBNeedAlign()) {
234 uint32 alignedVal = 1U << (bb->GetAlignPower());
235 uint32 range = GetAlignRange(alignedVal, id);
236 id = id + (range > kAlignPseudoSize ? range : kAlignPseudoSize);
237 }
238 FOR_BB_INSNS(insn, bb) {
239 if (!insn->IsMachineInstruction()) {
240 continue;
241 }
242 id += insn->GetAtomicNum();
243 if (insn->IsCondBranch() && insn->GetNopNum() != 0) {
244 id += insn->GetNopNum();
245 }
246 MOperator mOp = insn->GetMachineOpcode();
247 if ((mOp == MOP_wtbz || mOp == MOP_wtbnz || mOp == MOP_xtbz || mOp == MOP_xtbnz) && insn->IsNeedSplit()) {
248 ++id;
249 }
250 insn->SetId(id);
251 if (insn->GetMachineOpcode() == MOP_adrp_ldr && CGOptions::IsLazyBinding() &&
252 !aarFunc->GetCG()->IsLibcore()) {
253 ++id;
254 }
255 }
256 }
257 }
258
MarkShortBranchSplit()259 bool AArch64AlignAnalysis::MarkShortBranchSplit()
260 {
261 bool change = false;
262 bool split;
263 do {
264 split = false;
265 UpdateInsnId();
266 for (auto *bb = aarFunc->GetFirstBB(); bb != nullptr && !split; bb = bb->GetNext()) {
267 for (auto *insn = bb->GetLastInsn(); insn != nullptr && !split; insn = insn->GetPrev()) {
268 if (!insn->IsMachineInstruction()) {
269 continue;
270 }
271 MOperator mOp = insn->GetMachineOpcode();
272 if (mOp != MOP_wtbz && mOp != MOP_wtbnz && mOp != MOP_xtbz && mOp != MOP_xtbnz) {
273 continue;
274 }
275 if (insn->IsNeedSplit()) {
276 continue;
277 }
278 auto &labelOpnd = static_cast<LabelOperand &>(insn->GetOperand(kInsnThirdOpnd));
279 if (aarFunc->DistanceCheck(*bb, labelOpnd.GetLabelIndex(), insn->GetId())) {
280 continue;
281 }
282 split = true;
283 change = true;
284 insn->SetNeedSplit(split);
285 }
286 }
287 } while (split);
288 return change;
289 }
290
AddNopAfterMark()291 void AArch64AlignAnalysis::AddNopAfterMark()
292 {
293 FOR_ALL_BB(bb, aarFunc) {
294 FOR_BB_INSNS(insn, bb) {
295 if (!insn->IsMachineInstruction() || !insn->IsCondBranch() || insn->GetNopNum() == 0) {
296 continue;
297 }
298 /**
299 * To minimize the performance loss of nop, we decided to place nop on an island before the current addr.
300 * The island here is after [b, ret, br, blr].
301 * To ensure correct insertion of the nop, the nop is inserted in the original position in the following
302 * cases:
303 * 1. A branch with the same target exists before it.
304 * 2. A branch whose nopNum value is not 0 exists before it.
305 * 3. no BBs need to be aligned between the original location and the island.
306 */
307 std::unordered_map<LabelIdx, Insn *> targetCondBrs;
308 bool findIsland = false;
309 Insn *detect = insn->GetPrev();
310 BB *region = bb;
311 while (detect != nullptr || region != aarFunc->GetFirstBB()) {
312 while (detect == nullptr) {
313 DEBUG_ASSERT(region->GetPrev() != nullptr, "get region prev failed");
314 region = region->GetPrev();
315 detect = region->GetLastInsn();
316 }
317 if (detect->GetMachineOpcode() == MOP_xuncond || detect->GetMachineOpcode() == MOP_xret ||
318 detect->GetMachineOpcode() == MOP_xbr) {
319 findIsland = true;
320 break;
321 }
322 if (region->IsBBNeedAlign()) {
323 break;
324 }
325 if (!detect->IsMachineInstruction() || !detect->IsCondBranch() || detect->GetOperandSize() == 0) {
326 detect = detect->GetPrev();
327 continue;
328 }
329 if (detect->GetNopNum() != 0) {
330 break;
331 }
332 Operand &opnd = detect->GetOperand(detect->GetOperandSize() - 1);
333 if (!opnd.IsLabelOpnd()) {
334 detect = detect->GetPrev();
335 continue;
336 }
337 LabelIdx targetIdx = static_cast<LabelOperand &>(opnd).GetLabelIndex();
338 if (targetCondBrs.find(targetIdx) != targetCondBrs.end()) {
339 break;
340 }
341 targetCondBrs[targetIdx] = detect;
342 detect = detect->GetPrev();
343 }
344 uint32 nopNum = insn->GetNopNum();
345 if (findIsland) {
346 for (uint32 i = 0; i < nopNum; i++) {
347 (void)bb->InsertInsnAfter(*detect, aarFunc->GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_nop));
348 }
349 } else {
350 for (uint32 i = 0; i < nopNum; i++) {
351 (void)bb->InsertInsnBefore(*insn, aarFunc->GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_nop));
352 }
353 }
354 }
355 }
356 }
357
358 /**
359 * The insertion of nop affects the judgement of the addressing range of short branches,
360 * and the splitting of short branches affects the calculation of the location and number of nop insertions.
361 * In the iteration process of both, we only make some marks, wait for the fixed points, and fill in nop finally.
362 */
ComputeCondBranchAlign()363 void AArch64AlignAnalysis::ComputeCondBranchAlign()
364 {
365 bool condBrChange = false;
366 bool shortBrChange = false;
367 while (true) {
368 condBrChange = MarkCondBranchAlign();
369 if (!condBrChange) {
370 break;
371 }
372 shortBrChange = MarkShortBranchSplit();
373 if (!shortBrChange) {
374 break;
375 }
376 }
377 AddNopAfterMark();
378 }
379 } /* namespace maplebe */
380