1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "aarch64_alignment.h"
17 #include "insn.h"
18 #include "loop.h"
19 #include "aarch64_cg.h"
20 #include "cg_option.h"
21 #include <unordered_map>
22
23 namespace maplebe {
FindLoopHeader()24 void AArch64AlignAnalysis::FindLoopHeader()
25 {
26 if (loopInfo.GetLoops().empty()) {
27 return;
28 }
29 for (auto *loop : loopInfo.GetLoops()) {
30 BB &header = loop->GetHeader();
31 if (&header == aarFunc->GetFirstBB() || IsIncludeCall(header) || !IsInSizeRange(header)) {
32 continue;
33 }
34 InsertLoopHeaderBBs(header);
35 }
36 }
37
FindJumpTarget()38 void AArch64AlignAnalysis::FindJumpTarget()
39 {
40 MapleUnorderedMap<LabelIdx, BB *> label2BBMap = aarFunc->GetLab2BBMap();
41 if (label2BBMap.empty()) {
42 return;
43 }
44 for (auto &iter : label2BBMap) {
45 BB *jumpBB = iter.second;
46 if (jumpBB != nullptr) {
47 InsertJumpTargetBBs(*jumpBB);
48 }
49 }
50 }
51
IsIncludeCall(BB & bb)52 bool AArch64AlignAnalysis::IsIncludeCall(BB &bb)
53 {
54 return bb.HasCall();
55 }
56
IsInSizeRange(BB & bb)57 bool AArch64AlignAnalysis::IsInSizeRange(BB &bb)
58 {
59 uint64 size = 0;
60 FOR_BB_INSNS_CONST(insn, &bb) {
61 if (!insn->IsMachineInstruction() || insn->GetMachineOpcode() == MOP_pseudo_ret_int ||
62 insn->GetMachineOpcode() == MOP_pseudo_ret_float) {
63 continue;
64 }
65 size += kAlignInsnLength;
66 }
67 BB *curBB = &bb;
68 while (curBB->GetNext() != nullptr && curBB->GetNext()->GetLabIdx() == 0) {
69 FOR_BB_INSNS_CONST(insn, curBB->GetNext()) {
70 if (!insn->IsMachineInstruction() || insn->GetMachineOpcode() == MOP_pseudo_ret_int ||
71 insn->GetMachineOpcode() == MOP_pseudo_ret_float) {
72 continue;
73 }
74 size += kAlignInsnLength;
75 }
76 curBB = curBB->GetNext();
77 }
78 AArch64AlignInfo targetInfo;
79 if (CGOptions::GetAlignMinBBSize() == 0 || CGOptions::GetAlignMaxBBSize() == 0) {
80 return false;
81 }
82 constexpr uint32 defaultMinBBSize = 16;
83 constexpr uint32 defaultMaxBBSize = 44;
84 targetInfo.alignMinBBSize = (CGOptions::OptimizeForSize()) ? defaultMinBBSize : CGOptions::GetAlignMinBBSize();
85 targetInfo.alignMaxBBSize = (CGOptions::OptimizeForSize()) ? defaultMaxBBSize : CGOptions::GetAlignMaxBBSize();
86 if (size <= targetInfo.alignMinBBSize || size >= targetInfo.alignMaxBBSize) {
87 return false;
88 }
89 return true;
90 }
91
HasFallthruEdge(BB & bb)92 bool AArch64AlignAnalysis::HasFallthruEdge(BB &bb)
93 {
94 for (auto *iter : bb.GetPreds()) {
95 if (iter == bb.GetPrev()) {
96 return true;
97 }
98 }
99 return false;
100 }
101
ComputeLoopAlign()102 void AArch64AlignAnalysis::ComputeLoopAlign()
103 {
104 if (loopHeaderBBs.empty()) {
105 return;
106 }
107 for (BB *bb : loopHeaderBBs) {
108 if (bb == cgFunc->GetFirstBB() || IsIncludeCall(*bb) || !IsInSizeRange(*bb)) {
109 continue;
110 }
111 bb->SetNeedAlign(true);
112 if (CGOptions::GetLoopAlignPow() == 0) {
113 return;
114 }
115 AArch64AlignInfo targetInfo;
116 targetInfo.loopAlign = CGOptions::GetLoopAlignPow();
117 if (alignInfos.find(bb) == alignInfos.end()) {
118 alignInfos[bb] = targetInfo.loopAlign;
119 } else {
120 uint32 curPower = alignInfos[bb];
121 alignInfos[bb] = (targetInfo.loopAlign < curPower) ? targetInfo.loopAlign : curPower;
122 }
123 bb->SetAlignPower(alignInfos[bb]);
124 }
125 }
126
ComputeJumpAlign()127 void AArch64AlignAnalysis::ComputeJumpAlign()
128 {
129 if (jumpTargetBBs.empty()) {
130 return;
131 }
132 for (BB *bb : jumpTargetBBs) {
133 if (bb == cgFunc->GetFirstBB() || !IsInSizeRange(*bb) || HasFallthruEdge(*bb)) {
134 continue;
135 }
136 bb->SetNeedAlign(true);
137 if (CGOptions::GetJumpAlignPow() == 0) {
138 return;
139 }
140 AArch64AlignInfo targetInfo;
141 targetInfo.jumpAlign = (CGOptions::OptimizeForSize()) ? kOffsetAlignmentOf64Bit : CGOptions::GetJumpAlignPow();
142 if (alignInfos.find(bb) == alignInfos.end()) {
143 alignInfos[bb] = targetInfo.jumpAlign;
144 } else {
145 uint32 curPower = alignInfos[bb];
146 alignInfos[bb] = (targetInfo.jumpAlign < curPower) ? targetInfo.jumpAlign : curPower;
147 }
148 bb->SetAlignPower(alignInfos[bb]);
149 }
150 }
151
GetAlignRange(uint32 alignedVal,uint32 addr) const152 uint32 AArch64AlignAnalysis::GetAlignRange(uint32 alignedVal, uint32 addr) const
153 {
154 if (addr == 0) {
155 return addr;
156 }
157 CHECK_FATAL(alignedVal > 0, "must not be zero");
158 uint32 range = (alignedVal - (((addr - 1) * kInsnSize) & (alignedVal - 1))) / kInsnSize - 1;
159 return range;
160 }
161
IsInSameAlignedRegion(uint32 addr1,uint32 addr2,uint32 alignedRegionSize) const162 bool AArch64AlignAnalysis::IsInSameAlignedRegion(uint32 addr1, uint32 addr2, uint32 alignedRegionSize) const
163 {
164 CHECK_FATAL(addr2 > 0, "must not be zero");
165 CHECK_FATAL(addr1 > 0, "must not be zero");
166 return (((addr1 - 1) * kInsnSize) / alignedRegionSize) == (((addr2 - 1) * kInsnSize) / alignedRegionSize);
167 }
168
UpdateInsnId()169 void AArch64AlignAnalysis::UpdateInsnId()
170 {
171 uint32 id = 0;
172 FOR_ALL_BB(bb, aarFunc) {
173 if (bb != nullptr && bb->IsBBNeedAlign()) {
174 uint32 alignedVal = 1U << (bb->GetAlignPower());
175 uint32 range = GetAlignRange(alignedVal, id);
176 id = id + (range > kAlignPseudoSize ? range : kAlignPseudoSize);
177 }
178 FOR_BB_INSNS(insn, bb) {
179 if (!insn->IsMachineInstruction()) {
180 continue;
181 }
182 id += insn->GetAtomicNum();
183 if (insn->IsCondBranch() && insn->GetNopNum() != 0) {
184 id += insn->GetNopNum();
185 }
186 MOperator mOp = insn->GetMachineOpcode();
187 if ((mOp == MOP_wtbz || mOp == MOP_wtbnz || mOp == MOP_xtbz || mOp == MOP_xtbnz) && insn->IsNeedSplit()) {
188 ++id;
189 }
190 insn->SetId(id);
191 if (insn->GetMachineOpcode() == MOP_adrp_ldr && CGOptions::IsLazyBinding() &&
192 !aarFunc->GetCG()->IsLibcore()) {
193 ++id;
194 }
195 }
196 }
197 }
198
MarkShortBranchSplit()199 bool AArch64AlignAnalysis::MarkShortBranchSplit()
200 {
201 bool change = false;
202 bool split;
203 do {
204 split = false;
205 UpdateInsnId();
206 for (auto *bb = aarFunc->GetFirstBB(); bb != nullptr && !split; bb = bb->GetNext()) {
207 for (auto *insn = bb->GetLastInsn(); insn != nullptr && !split; insn = insn->GetPrev()) {
208 if (!insn->IsMachineInstruction()) {
209 continue;
210 }
211 MOperator mOp = insn->GetMachineOpcode();
212 if (mOp != MOP_wtbz && mOp != MOP_wtbnz && mOp != MOP_xtbz && mOp != MOP_xtbnz) {
213 continue;
214 }
215 if (insn->IsNeedSplit()) {
216 continue;
217 }
218 auto &labelOpnd = static_cast<LabelOperand &>(insn->GetOperand(kInsnThirdOpnd));
219 if (aarFunc->DistanceCheck(*bb, labelOpnd.GetLabelIndex(), insn->GetId(),
220 AArch64Abi::kMaxInstrForTbnz)) {
221 continue;
222 }
223 split = true;
224 change = true;
225 insn->SetNeedSplit(split);
226 }
227 }
228 } while (split);
229 return change;
230 }
231
232 /**
233 * The insertion of nop affects the judgement of the addressing range of short branches,
234 * and the splitting of short branches affects the calculation of the location and number of nop insertions.
235 * In the iteration process of both, we only make some marks, wait for the fixed points, and fill in nop finally.
236 */
ComputeCondBranchAlign()237 void AArch64AlignAnalysis::ComputeCondBranchAlign()
238 {
239 bool shortBrChange = false;
240 while (true) {
241 shortBrChange = MarkShortBranchSplit();
242 if (!shortBrChange) {
243 break;
244 }
245 }
246 }
247 } /* namespace maplebe */
248