• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "aarch64_alignment.h"
17 #include "insn.h"
18 #include "loop.h"
19 #include "aarch64_cg.h"
20 #include "cg_option.h"
21 #include <unordered_map>
22 
23 namespace maplebe {
FindLoopHeader()24 void AArch64AlignAnalysis::FindLoopHeader()
25 {
26     MapleVector<CGFuncLoops *> loops = aarFunc->GetLoops();
27     if (loops.empty()) {
28         return;
29     }
30     for (const auto *loop : loops) {
31         const BB *header = loop->GetHeader();
32         if (header != nullptr) {
33             InsertLoopHeaderBBs(const_cast<BB &>(*header));
34         }
35     }
36 }
37 
FindJumpTarget()38 void AArch64AlignAnalysis::FindJumpTarget()
39 {
40     MapleUnorderedMap<LabelIdx, BB *> label2BBMap = aarFunc->GetLab2BBMap();
41     if (label2BBMap.empty()) {
42         return;
43     }
44     for (auto &iter : label2BBMap) {
45         BB *jumpBB = iter.second;
46         if (jumpBB != nullptr) {
47             InsertJumpTargetBBs(*jumpBB);
48         }
49     }
50 }
51 
IsIncludeCall(BB & bb)52 bool AArch64AlignAnalysis::IsIncludeCall(BB &bb)
53 {
54     return bb.HasCall();
55 }
56 
IsInSizeRange(BB & bb)57 bool AArch64AlignAnalysis::IsInSizeRange(BB &bb)
58 {
59     uint64 size = 0;
60     FOR_BB_INSNS_CONST(insn, &bb) {
61         if (!insn->IsMachineInstruction() || insn->GetMachineOpcode() == MOP_pseudo_ret_int ||
62             insn->GetMachineOpcode() == MOP_pseudo_ret_float) {
63             continue;
64         }
65         size += kAlignInsnLength;
66     }
67     BB *curBB = &bb;
68     while (curBB->GetNext() != nullptr && curBB->GetNext()->GetLabIdx() == 0) {
69         FOR_BB_INSNS_CONST(insn, curBB->GetNext()) {
70             if (!insn->IsMachineInstruction() || insn->GetMachineOpcode() == MOP_pseudo_ret_int ||
71                 insn->GetMachineOpcode() == MOP_pseudo_ret_float) {
72                 continue;
73             }
74             size += kAlignInsnLength;
75         }
76         curBB = curBB->GetNext();
77     }
78     AArch64AlignInfo targetInfo;
79     if (CGOptions::GetAlignMinBBSize() == 0 || CGOptions::GetAlignMaxBBSize() == 0) {
80         return false;
81     }
82     constexpr uint32 defaultMinBBSize = 16;
83     constexpr uint32 defaultMaxBBSize = 44;
84     targetInfo.alignMinBBSize = (CGOptions::OptimizeForSize()) ? defaultMinBBSize : CGOptions::GetAlignMinBBSize();
85     targetInfo.alignMaxBBSize = (CGOptions::OptimizeForSize()) ? defaultMaxBBSize : CGOptions::GetAlignMaxBBSize();
86     if (size <= targetInfo.alignMinBBSize || size >= targetInfo.alignMaxBBSize) {
87         return false;
88     }
89     return true;
90 }
91 
HasFallthruEdge(BB & bb)92 bool AArch64AlignAnalysis::HasFallthruEdge(BB &bb)
93 {
94     for (auto *iter : bb.GetPreds()) {
95         if (iter == bb.GetPrev()) {
96             return true;
97         }
98     }
99     return false;
100 }
101 
ComputeLoopAlign()102 void AArch64AlignAnalysis::ComputeLoopAlign()
103 {
104     if (loopHeaderBBs.empty()) {
105         return;
106     }
107     for (BB *bb : loopHeaderBBs) {
108         if (bb == cgFunc->GetFirstBB() || IsIncludeCall(*bb) || !IsInSizeRange(*bb)) {
109             continue;
110         }
111         bb->SetNeedAlign(true);
112         if (CGOptions::GetLoopAlignPow() == 0) {
113             return;
114         }
115         AArch64AlignInfo targetInfo;
116         targetInfo.loopAlign = CGOptions::GetLoopAlignPow();
117         if (alignInfos.find(bb) == alignInfos.end()) {
118             alignInfos[bb] = targetInfo.loopAlign;
119         } else {
120             uint32 curPower = alignInfos[bb];
121             alignInfos[bb] = (targetInfo.loopAlign < curPower) ? targetInfo.loopAlign : curPower;
122         }
123         bb->SetAlignPower(alignInfos[bb]);
124     }
125 }
126 
ComputeJumpAlign()127 void AArch64AlignAnalysis::ComputeJumpAlign()
128 {
129     if (jumpTargetBBs.empty()) {
130         return;
131     }
132     for (BB *bb : jumpTargetBBs) {
133         if (bb == cgFunc->GetFirstBB() || !IsInSizeRange(*bb) || HasFallthruEdge(*bb)) {
134             continue;
135         }
136         bb->SetNeedAlign(true);
137         if (CGOptions::GetJumpAlignPow() == 0) {
138             return;
139         }
140         AArch64AlignInfo targetInfo;
141         targetInfo.jumpAlign = (CGOptions::OptimizeForSize()) ? kOffsetAlignmentOf64Bit : CGOptions::GetJumpAlignPow();
142         if (alignInfos.find(bb) == alignInfos.end()) {
143             alignInfos[bb] = targetInfo.jumpAlign;
144         } else {
145             uint32 curPower = alignInfos[bb];
146             alignInfos[bb] = (targetInfo.jumpAlign < curPower) ? targetInfo.jumpAlign : curPower;
147         }
148         bb->SetAlignPower(alignInfos[bb]);
149     }
150 }
151 
GetAlignRange(uint32 alignedVal,uint32 addr) const152 uint32 AArch64AlignAnalysis::GetAlignRange(uint32 alignedVal, uint32 addr) const
153 {
154     if (addr == 0) {
155         return addr;
156     }
157     uint32 range = (alignedVal - (((addr - 1) * kInsnSize) & (alignedVal - 1))) / kInsnSize - 1;
158     return range;
159 }
160 
IsInSameAlignedRegion(uint32 addr1,uint32 addr2,uint32 alignedRegionSize) const161 bool AArch64AlignAnalysis::IsInSameAlignedRegion(uint32 addr1, uint32 addr2, uint32 alignedRegionSize) const
162 {
163     return (((addr1 - 1) * kInsnSize) / alignedRegionSize) == (((addr2 - 1) * kInsnSize) / alignedRegionSize);
164 }
165 
MarkCondBranchAlign()166 bool AArch64AlignAnalysis::MarkCondBranchAlign()
167 {
168     sameTargetBranches.clear();
169     uint32 addr = 0;
170     bool change = false;
171     FOR_ALL_BB(bb, aarFunc) {
172         if (bb != nullptr && bb->IsBBNeedAlign()) {
173             uint32 alignedVal = (1U << bb->GetAlignPower());
174             uint32 alignNopNum = GetAlignRange(alignedVal, addr);
175             addr += alignNopNum;
176             bb->SetAlignNopNum(alignNopNum);
177         }
178         FOR_BB_INSNS(insn, bb) {
179             if (!insn->IsMachineInstruction()) {
180                 continue;
181             }
182             addr += insn->GetAtomicNum();
183             MOperator mOp = insn->GetMachineOpcode();
184             if ((mOp == MOP_wtbz || mOp == MOP_wtbnz || mOp == MOP_xtbz || mOp == MOP_xtbnz) && insn->IsNeedSplit()) {
185                 ++addr;
186             }
187             if (!insn->IsCondBranch() || insn->GetOperandSize() == 0) {
188                 insn->SetAddress(addr);
189                 continue;
190             }
191             Operand &opnd = insn->GetOperand(insn->GetOperandSize() - 1);
192             if (!opnd.IsLabelOpnd()) {
193                 insn->SetAddress(addr);
194                 continue;
195             }
196             LabelIdx targetIdx = static_cast<LabelOperand &>(opnd).GetLabelIndex();
197             if (sameTargetBranches.find(targetIdx) == sameTargetBranches.end()) {
198                 sameTargetBranches[targetIdx] = addr;
199                 insn->SetAddress(addr);
200                 continue;
201             }
202             uint32 sameTargetAddr = sameTargetBranches[targetIdx];
203             uint32 alignedRegionSize = 1 << kAlignRegionPower;
204             /**
205              * if two branches jump to the same target and their addresses are within an 16byte aligned region,
206              * add a certain number of [nop] to move them out of the region.
207              */
208             if (IsInSameAlignedRegion(sameTargetAddr, addr, alignedRegionSize)) {
209                 uint32 nopNum = GetAlignRange(alignedRegionSize, addr) + 1;
210                 nopNum = nopNum > kAlignMaxNopNum ? 0 : nopNum;
211                 if (nopNum == 0) {
212                     break;
213                 }
214                 change = true;
215                 insn->SetNopNum(nopNum);
216                 for (uint32 i = 0; i < nopNum; i++) {
217                     addr += insn->GetAtomicNum();
218                 }
219             } else {
220                 insn->SetNopNum(0);
221             }
222             sameTargetBranches[targetIdx] = addr;
223             insn->SetAddress(addr);
224         }
225     }
226     return change;
227 }
228 
UpdateInsnId()229 void AArch64AlignAnalysis::UpdateInsnId()
230 {
231     uint32 id = 0;
232     FOR_ALL_BB(bb, aarFunc) {
233         if (bb != nullptr && bb->IsBBNeedAlign()) {
234             uint32 alignedVal = 1U << (bb->GetAlignPower());
235             uint32 range = GetAlignRange(alignedVal, id);
236             id = id + (range > kAlignPseudoSize ? range : kAlignPseudoSize);
237         }
238         FOR_BB_INSNS(insn, bb) {
239             if (!insn->IsMachineInstruction()) {
240                 continue;
241             }
242             id += insn->GetAtomicNum();
243             if (insn->IsCondBranch() && insn->GetNopNum() != 0) {
244                 id += insn->GetNopNum();
245             }
246             MOperator mOp = insn->GetMachineOpcode();
247             if ((mOp == MOP_wtbz || mOp == MOP_wtbnz || mOp == MOP_xtbz || mOp == MOP_xtbnz) && insn->IsNeedSplit()) {
248                 ++id;
249             }
250             insn->SetId(id);
251             if (insn->GetMachineOpcode() == MOP_adrp_ldr && CGOptions::IsLazyBinding() &&
252                 !aarFunc->GetCG()->IsLibcore()) {
253                 ++id;
254             }
255         }
256     }
257 }
258 
MarkShortBranchSplit()259 bool AArch64AlignAnalysis::MarkShortBranchSplit()
260 {
261     bool change = false;
262     bool split;
263     do {
264         split = false;
265         UpdateInsnId();
266         for (auto *bb = aarFunc->GetFirstBB(); bb != nullptr && !split; bb = bb->GetNext()) {
267             for (auto *insn = bb->GetLastInsn(); insn != nullptr && !split; insn = insn->GetPrev()) {
268                 if (!insn->IsMachineInstruction()) {
269                     continue;
270                 }
271                 MOperator mOp = insn->GetMachineOpcode();
272                 if (mOp != MOP_wtbz && mOp != MOP_wtbnz && mOp != MOP_xtbz && mOp != MOP_xtbnz) {
273                     continue;
274                 }
275                 if (insn->IsNeedSplit()) {
276                     continue;
277                 }
278                 auto &labelOpnd = static_cast<LabelOperand &>(insn->GetOperand(kInsnThirdOpnd));
279                 if (aarFunc->DistanceCheck(*bb, labelOpnd.GetLabelIndex(), insn->GetId())) {
280                     continue;
281                 }
282                 split = true;
283                 change = true;
284                 insn->SetNeedSplit(split);
285             }
286         }
287     } while (split);
288     return change;
289 }
290 
AddNopAfterMark()291 void AArch64AlignAnalysis::AddNopAfterMark()
292 {
293     FOR_ALL_BB(bb, aarFunc) {
294         FOR_BB_INSNS(insn, bb) {
295             if (!insn->IsMachineInstruction() || !insn->IsCondBranch() || insn->GetNopNum() == 0) {
296                 continue;
297             }
298             /**
299              * To minimize the performance loss of nop, we decided to place nop on an island before the current addr.
300              * The island here is after [b, ret, br, blr].
301              * To ensure correct insertion of the nop, the nop is inserted in the original position in the following
302              * cases:
303              * 1. A branch with the same target exists before it.
304              * 2. A branch whose nopNum value is not 0 exists before it.
305              * 3. no BBs need to be aligned between the original location and the island.
306              */
307             std::unordered_map<LabelIdx, Insn *> targetCondBrs;
308             bool findIsland = false;
309             Insn *detect = insn->GetPrev();
310             BB *region = bb;
311             while (detect != nullptr || region != aarFunc->GetFirstBB()) {
312                 while (detect == nullptr) {
313                     DEBUG_ASSERT(region->GetPrev() != nullptr, "get region prev failed");
314                     region = region->GetPrev();
315                     detect = region->GetLastInsn();
316                 }
317                 if (detect->GetMachineOpcode() == MOP_xuncond || detect->GetMachineOpcode() == MOP_xret ||
318                     detect->GetMachineOpcode() == MOP_xbr) {
319                     findIsland = true;
320                     break;
321                 }
322                 if (region->IsBBNeedAlign()) {
323                     break;
324                 }
325                 if (!detect->IsMachineInstruction() || !detect->IsCondBranch() || detect->GetOperandSize() == 0) {
326                     detect = detect->GetPrev();
327                     continue;
328                 }
329                 if (detect->GetNopNum() != 0) {
330                     break;
331                 }
332                 Operand &opnd = detect->GetOperand(detect->GetOperandSize() - 1);
333                 if (!opnd.IsLabelOpnd()) {
334                     detect = detect->GetPrev();
335                     continue;
336                 }
337                 LabelIdx targetIdx = static_cast<LabelOperand &>(opnd).GetLabelIndex();
338                 if (targetCondBrs.find(targetIdx) != targetCondBrs.end()) {
339                     break;
340                 }
341                 targetCondBrs[targetIdx] = detect;
342                 detect = detect->GetPrev();
343             }
344             uint32 nopNum = insn->GetNopNum();
345             if (findIsland) {
346                 for (uint32 i = 0; i < nopNum; i++) {
347                     (void)bb->InsertInsnAfter(*detect, aarFunc->GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_nop));
348                 }
349             } else {
350                 for (uint32 i = 0; i < nopNum; i++) {
351                     (void)bb->InsertInsnBefore(*insn, aarFunc->GetInsnBuilder()->BuildInsn<AArch64CG>(MOP_nop));
352                 }
353             }
354         }
355     }
356 }
357 
358 /**
359  * The insertion of nop affects the judgement of the addressing range of short branches,
360  * and the splitting of short branches affects the calculation of the location and number of nop insertions.
361  * In the iteration process of both, we only make some marks, wait for the fixed points, and fill in nop finally.
362  */
ComputeCondBranchAlign()363 void AArch64AlignAnalysis::ComputeCondBranchAlign()
364 {
365     bool condBrChange = false;
366     bool shortBrChange = false;
367     while (true) {
368         condBrChange = MarkCondBranchAlign();
369         if (!condBrChange) {
370             break;
371         }
372         shortBrChange = MarkShortBranchSplit();
373         if (!shortBrChange) {
374             break;
375         }
376     }
377     AddNopAfterMark();
378 }
379 } /* namespace maplebe */
380