1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "optimizer/analysis/bounds_analysis.h"
17 #include "optimizer/analysis/dominators_tree.h"
18 #include "compiler_logger.h"
19 #include "optimizer/optimizations/balance_expressions.h"
20
21 namespace panda::compiler {
RunImpl()22 bool BalanceExpressions::RunImpl()
23 {
24 processed_inst_mrk_ = GetGraph()->NewMarker();
25 for (auto bb : GetGraph()->GetBlocksRPO()) {
26 ProcessBB(bb);
27 }
28 GetGraph()->EraseMarker(processed_inst_mrk_);
29 return is_applied_;
30 }
31
InvalidateAnalyses()32 void BalanceExpressions::InvalidateAnalyses()
33 {
34 GetGraph()->InvalidateAnalysis<BoundsAnalysis>();
35 GetGraph()->InvalidateAnalysis<DominatorsTree>();
36 }
37
38 /**
39 * Iterate over instructions in reverse order, find every expression-chain by detecting
40 * the final operator of each chain, analyze the chain and optimize it if necessary.
41 */
ProcessBB(BasicBlock * bb)42 void BalanceExpressions::ProcessBB(BasicBlock *bb)
43 {
44 ASSERT(bb != nullptr);
45 SetBB(bb);
46
47 auto it = bb->InstsReverse().begin();
48 for (; it != it.end(); ++it) {
49 ASSERT(*it != nullptr);
50 if ((*it)->SetMarker(processed_inst_mrk_)) {
51 // The instruction is already processed;
52 continue;
53 }
54 if (SuitableInst(*it)) {
55 // The final operator of the chain is found, start analyzing:
56 auto inst_to_continue_cycle = ProccesExpressionChain(*it);
57 it.SetCurrent(inst_to_continue_cycle);
58 }
59 }
60 }
61
SuitableInst(Inst * inst)62 bool BalanceExpressions::SuitableInst(Inst *inst)
63 {
64 // Floating point operations are not associative:
65 if (inst->IsCommutative() && !IsFloatType(inst->GetType())) {
66 SetOpcode(inst->GetOpcode());
67 return true;
68 }
69 return false;
70 }
71
ProccesExpressionChain(Inst * last_operator)72 Inst *BalanceExpressions::ProccesExpressionChain(Inst *last_operator)
73 {
74 ASSERT(last_operator != nullptr);
75 AnalyzeInputsRec(last_operator);
76
77 COMPILER_LOG(DEBUG, BALANCE_EXPR) << "\nConsidering expression:";
78 COMPILER_LOG(DEBUG, BALANCE_EXPR) << *this;
79
80 auto inst_to_continue = NeedsOptimization() ? OptimizeExpression(last_operator->GetNext()) : last_operator;
81
82 COMPILER_LOG(DEBUG, BALANCE_EXPR) << "Expression considered.";
83
84 Reset();
85 return inst_to_continue;
86 }
87
88 /**
89 * Optimizes expression.
90 *
91 * By the end of the algorithm, `operators_.front()` points to the first instruction in expression and
92 * `operators_.back()` points to the last (`operators_.front()` dominates `operators_.back()`).
93 */
OptimizeExpression(Inst * inst_after_expr)94 Inst *BalanceExpressions::OptimizeExpression(Inst *inst_after_expr)
95 {
96 COMPILER_LOG(DEBUG, BALANCE_EXPR) << "Optimizing expression:";
97 AllocateSourcesRec<true>(0, sources_.size() - 1);
98
99 size_t size = operators_.size();
100 operators_.front()->SetNext(operators_[1]);
101 constexpr auto IMM_2 = 2;
102 operators_.back()->SetPrev(operators_[size - IMM_2]);
103 for (size_t i = 1; i < size - 1; i++) {
104 operators_[i]->SetNext(operators_[i + 1]);
105 operators_[i]->SetPrev(operators_[i - 1]);
106 }
107 if (inst_after_expr == nullptr) {
108 GetBB()->AppendRangeInst(operators_.front(), operators_.back());
109 } else {
110 GetBB()->InsertRangeBefore(operators_.front(), operators_.back(), inst_after_expr);
111 }
112
113 SetIsApplied(true);
114 COMPILER_LOG(DEBUG, BALANCE_EXPR) << "\nOptimized expression:";
115 COMPILER_LOG(DEBUG, BALANCE_EXPR) << *this;
116
117 // Need to return pointer to the next instruction in order to correctly continue the cycle:
118 Inst *inst_to_continue_cycle = operators_.front();
119 return inst_to_continue_cycle;
120 }
121
122 /**
123 * Generates expression for sources_ in range from @param first_idx to @param last_idx by splitting them on
124 * two parts, calling itself for each part and binding them to an instruction from operators_.
125 *
126 * By the end of the algorithm, `operators_` are sorted in execution order
127 * (`operators_.front()` is the first, `operators_.back()` is the last).
128 */
129 template <bool is_first_call>
AllocateSourcesRec(size_t first_idx,size_t last_idx)130 Inst *BalanceExpressions::AllocateSourcesRec(size_t first_idx, size_t last_idx)
131 {
132 COMPILER_LOG(DEBUG, BALANCE_EXPR) << "Allocating operators for sources_[" << first_idx << " to " << last_idx << "]";
133 size_t split_idx = first_idx + GetBitFloor(last_idx - first_idx + 1) - 1;
134
135 Inst *lhs = GetOperand(first_idx, split_idx);
136 Inst *rhs = LIKELY((split_idx + 1) != last_idx) ? GetOperand(split_idx + 1, last_idx) : sources_[split_idx + 1];
137 // `(split_idx + 1) == last_idx` means an odd number of `sources_` and we are considering
138 // the last (unpaired) source. This situation may occur only with `rhs`.
139 ASSERT(first_idx != split_idx);
140
141 // Operator allocation:
142 ASSERT(operators_alloc_idx_ < operators_.size());
143 Inst *allocated_operator = operators_[operators_alloc_idx_];
144 operators_alloc_idx_++;
145
146 // Operator initialization:
147 // NOLINTNEXTLINE(readability-braces-around-statements)
148 if constexpr (is_first_call) {
149 // The first call allocates and generates at the same time the last operator of expression and
150 // its users should be saved.
151 allocated_operator->RemoveInputs();
152 allocated_operator->GetBasicBlock()->EraseInst(allocated_operator);
153 } else { // NOLINT(readability-misleading-indentation)
154 allocated_operator->GetBasicBlock()->RemoveInst(allocated_operator);
155 }
156 allocated_operator->SetBasicBlock(GetBB());
157 allocated_operator->SetInput(0, lhs);
158 allocated_operator->SetInput(1, rhs);
159
160 COMPILER_LOG(DEBUG, BALANCE_EXPR) << "sources_[" << first_idx << " to " << last_idx << "] allocated";
161 return allocated_operator;
162 }
163
GetOperand(size_t first_idx,size_t last_idx)164 Inst *BalanceExpressions::GetOperand(size_t first_idx, size_t last_idx)
165 {
166 ASSERT(last_idx > first_idx);
167 return (last_idx - first_idx == 1) ? GenerateElementalOperator(sources_[first_idx], sources_[last_idx])
168 : AllocateSourcesRec<false>(first_idx, last_idx);
169 }
170
171 /**
172 * Create an operator with direct sources
173 * (i.e. `lhs` and `rhs` are from sources_)
174 */
GenerateElementalOperator(Inst * lhs,Inst * rhs)175 Inst *BalanceExpressions::GenerateElementalOperator(Inst *lhs, Inst *rhs)
176 {
177 ASSERT(lhs && rhs);
178 ASSERT(operators_alloc_idx_ < operators_.size());
179 Inst *allocated_operator = operators_[operators_alloc_idx_];
180 operators_alloc_idx_++;
181 allocated_operator->GetBasicBlock()->RemoveInst(allocated_operator);
182
183 allocated_operator->SetBasicBlock(GetBB());
184
185 // There is no need to clean users of lhs and rhs because it is cleaned during RemoveInst()
186 // (as soon as every of operator_insts_ is removed before further usage)
187
188 allocated_operator->SetInput(0, lhs);
189 allocated_operator->SetInput(1, rhs);
190 COMPILER_LOG(DEBUG, BALANCE_EXPR) << "Created an elemental operator:\n" << *allocated_operator;
191 return allocated_operator;
192 }
193
194 /**
195 * Recursively checks inputs.
196 * Fills arrays of source-insts and opreation-insts.
197 */
AnalyzeInputsRec(Inst * inst)198 void BalanceExpressions::AnalyzeInputsRec(Inst *inst)
199 {
200 expr_cur_depth_++;
201 ASSERT(inst != nullptr);
202
203 auto lhs_input = inst->GetInput(0).GetInst();
204 auto rhs_input = inst->GetInput(1).GetInst();
205
206 TryExtendChainRec(lhs_input);
207 TryExtendChainRec(rhs_input);
208 operators_.push_back(inst);
209
210 if (expr_max_depth_ < expr_cur_depth_) {
211 expr_max_depth_ = expr_cur_depth_;
212 }
213 expr_cur_depth_--;
214 }
215
216 /**
217 * Recursively checks if the instruction should be added in the current expression chain.
218 * If not, the considered instruction is the expression's term (source) and we save it for a later step.
219 */
TryExtendChainRec(Inst * inst)220 void BalanceExpressions::TryExtendChainRec(Inst *inst)
221 {
222 ASSERT(inst);
223 if (inst->GetOpcode() == GetOpcode()) {
224 if (inst->HasSingleUser()) {
225 inst->SetMarker(processed_inst_mrk_);
226
227 AnalyzeInputsRec(inst);
228
229 return;
230 }
231 }
232 sources_.push_back(inst);
233 }
234
235 /**
236 * Finds optimal depth and compares to the current.
237 * Both of the numbers are represented as pow(x, 2).
238 */
NeedsOptimization()239 bool BalanceExpressions::NeedsOptimization()
240 {
241 if (sources_.size() <= 3U) {
242 return false;
243 }
244 // Avoid large shift exponent for size_t
245 if (expr_max_depth_ >= std::numeric_limits<size_t>::digits) {
246 return false;
247 }
248 size_t current = 1UL << (expr_max_depth_);
249 size_t optimal = GetBitCeil(sources_.size());
250 return current > optimal;
251 }
252
Reset()253 void BalanceExpressions::Reset()
254 {
255 sources_.clear();
256 operators_.clear();
257 expr_cur_depth_ = 0;
258 expr_max_depth_ = 0;
259 operators_alloc_idx_ = 0;
260 SetOpcode(Opcode::INVALID);
261 }
262
Dump(std::ostream * out) const263 void BalanceExpressions::Dump(std::ostream *out) const
264 {
265 (*out) << "Sources:\n";
266 for (auto i : sources_) {
267 (*out) << *i << '\n';
268 }
269
270 (*out) << "Operators:\n";
271 for (auto i : operators_) {
272 (*out) << *i << '\n';
273 }
274 }
275
276 template size_t BalanceExpressions::GetBitFloor<size_t>(size_t val);
277 template size_t BalanceExpressions::GetBitCeil<size_t>(size_t val);
278 } // namespace panda::compiler
279