• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "optimizer/analysis/bounds_analysis.h"
17 #include "optimizer/analysis/dominators_tree.h"
18 #include "compiler_logger.h"
19 #include "optimizer/optimizations/balance_expressions.h"
20 
21 namespace panda::compiler {
RunImpl()22 bool BalanceExpressions::RunImpl()
23 {
24     processed_inst_mrk_ = GetGraph()->NewMarker();
25     for (auto bb : GetGraph()->GetBlocksRPO()) {
26         ProcessBB(bb);
27     }
28     GetGraph()->EraseMarker(processed_inst_mrk_);
29     return is_applied_;
30 }
31 
InvalidateAnalyses()32 void BalanceExpressions::InvalidateAnalyses()
33 {
34     GetGraph()->InvalidateAnalysis<BoundsAnalysis>();
35     GetGraph()->InvalidateAnalysis<DominatorsTree>();
36 }
37 
38 /**
39  *  Iterate over instructions in reverse order, find every expression-chain by detecting
40  *  the final operator of each chain, analyze the chain and optimize it if necessary.
41  */
ProcessBB(BasicBlock * bb)42 void BalanceExpressions::ProcessBB(BasicBlock *bb)
43 {
44     ASSERT(bb != nullptr);
45     SetBB(bb);
46 
47     auto it = bb->InstsReverse().begin();
48     for (; it != it.end(); ++it) {
49         ASSERT(*it != nullptr);
50         if ((*it)->SetMarker(processed_inst_mrk_)) {
51             // The instruction is already processed;
52             continue;
53         }
54         if (SuitableInst(*it)) {
55             // The final operator of the chain is found, start analyzing:
56             auto inst_to_continue_cycle = ProccesExpressionChain(*it);
57             it.SetCurrent(inst_to_continue_cycle);
58         }
59     }
60 }
61 
SuitableInst(Inst * inst)62 bool BalanceExpressions::SuitableInst(Inst *inst)
63 {
64     // Floating point operations are not associative:
65     if (inst->IsCommutative() && !IsFloatType(inst->GetType())) {
66         SetOpcode(inst->GetOpcode());
67         return true;
68     }
69     return false;
70 }
71 
ProccesExpressionChain(Inst * last_operator)72 Inst *BalanceExpressions::ProccesExpressionChain(Inst *last_operator)
73 {
74     ASSERT(last_operator != nullptr);
75     AnalyzeInputsRec(last_operator);
76 
77     COMPILER_LOG(DEBUG, BALANCE_EXPR) << "\nConsidering expression:";
78     COMPILER_LOG(DEBUG, BALANCE_EXPR) << *this;
79 
80     auto inst_to_continue = NeedsOptimization() ? OptimizeExpression(last_operator->GetNext()) : last_operator;
81 
82     COMPILER_LOG(DEBUG, BALANCE_EXPR) << "Expression considered.";
83 
84     Reset();
85     return inst_to_continue;
86 }
87 
88 /**
89  * Optimizes expression.
90  *
91  * By the end of the algorithm, `operators_.front()` points to the first instruction in expression and
92  * `operators_.back()` points to the last (`operators_.front()` dominates `operators_.back()`).
93  */
OptimizeExpression(Inst * inst_after_expr)94 Inst *BalanceExpressions::OptimizeExpression(Inst *inst_after_expr)
95 {
96     COMPILER_LOG(DEBUG, BALANCE_EXPR) << "Optimizing expression:";
97     AllocateSourcesRec<true>(0, sources_.size() - 1);
98 
99     size_t size = operators_.size();
100     operators_.front()->SetNext(operators_[1]);
101     constexpr auto IMM_2 = 2;
102     operators_.back()->SetPrev(operators_[size - IMM_2]);
103     for (size_t i = 1; i < size - 1; i++) {
104         operators_[i]->SetNext(operators_[i + 1]);
105         operators_[i]->SetPrev(operators_[i - 1]);
106     }
107     if (inst_after_expr == nullptr) {
108         GetBB()->AppendRangeInst(operators_.front(), operators_.back());
109     } else {
110         GetBB()->InsertRangeBefore(operators_.front(), operators_.back(), inst_after_expr);
111     }
112 
113     SetIsApplied(true);
114     COMPILER_LOG(DEBUG, BALANCE_EXPR) << "\nOptimized expression:";
115     COMPILER_LOG(DEBUG, BALANCE_EXPR) << *this;
116 
117     // Need to return pointer to the next instruction in order to correctly continue the cycle:
118     Inst *inst_to_continue_cycle = operators_.front();
119     return inst_to_continue_cycle;
120 }
121 
122 /**
123  * Generates expression for sources_ in range from @param first_idx to @param last_idx by splitting them on
124  * two parts, calling itself for each part and binding them to an instruction from operators_.
125  *
126  * By the end of the algorithm, `operators_` are sorted in execution order
127  * (`operators_.front()` is the first, `operators_.back()` is the last).
128  */
129 template <bool is_first_call>
AllocateSourcesRec(size_t first_idx,size_t last_idx)130 Inst *BalanceExpressions::AllocateSourcesRec(size_t first_idx, size_t last_idx)
131 {
132     COMPILER_LOG(DEBUG, BALANCE_EXPR) << "Allocating operators for sources_[" << first_idx << " to " << last_idx << "]";
133     size_t split_idx = first_idx + GetBitFloor(last_idx - first_idx + 1) - 1;
134 
135     Inst *lhs = GetOperand(first_idx, split_idx);
136     Inst *rhs = LIKELY((split_idx + 1) != last_idx) ? GetOperand(split_idx + 1, last_idx) : sources_[split_idx + 1];
137     // `(split_idx + 1) == last_idx` means an odd number of `sources_` and we are considering
138     // the last (unpaired) source. This situation may occur only with `rhs`.
139     ASSERT(first_idx != split_idx);
140 
141     // Operator allocation:
142     ASSERT(operators_alloc_idx_ < operators_.size());
143     Inst *allocated_operator = operators_[operators_alloc_idx_];
144     operators_alloc_idx_++;
145 
146     // Operator initialization:
147     // NOLINTNEXTLINE(readability-braces-around-statements)
148     if constexpr (is_first_call) {
149         // The first call allocates and generates at the same time the last operator of expression and
150         // its users should be saved.
151         allocated_operator->RemoveInputs();
152         allocated_operator->GetBasicBlock()->EraseInst(allocated_operator);
153     } else {  // NOLINT(readability-misleading-indentation)
154         allocated_operator->GetBasicBlock()->RemoveInst(allocated_operator);
155     }
156     allocated_operator->SetBasicBlock(GetBB());
157     allocated_operator->SetInput(0, lhs);
158     allocated_operator->SetInput(1, rhs);
159 
160     COMPILER_LOG(DEBUG, BALANCE_EXPR) << "sources_[" << first_idx << " to " << last_idx << "] allocated";
161     return allocated_operator;
162 }
163 
GetOperand(size_t first_idx,size_t last_idx)164 Inst *BalanceExpressions::GetOperand(size_t first_idx, size_t last_idx)
165 {
166     ASSERT(last_idx > first_idx);
167     return (last_idx - first_idx == 1) ? GenerateElementalOperator(sources_[first_idx], sources_[last_idx])
168                                        : AllocateSourcesRec<false>(first_idx, last_idx);
169 }
170 
171 /**
172  * Create an operator with direct sources
173  * (i.e. `lhs` and `rhs` are from sources_)
174  */
GenerateElementalOperator(Inst * lhs,Inst * rhs)175 Inst *BalanceExpressions::GenerateElementalOperator(Inst *lhs, Inst *rhs)
176 {
177     ASSERT(lhs && rhs);
178     ASSERT(operators_alloc_idx_ < operators_.size());
179     Inst *allocated_operator = operators_[operators_alloc_idx_];
180     operators_alloc_idx_++;
181     allocated_operator->GetBasicBlock()->RemoveInst(allocated_operator);
182 
183     allocated_operator->SetBasicBlock(GetBB());
184 
185     // There is no need to clean users of lhs and rhs because it is cleaned during RemoveInst()
186     // (as soon as every of operator_insts_ is removed before further usage)
187 
188     allocated_operator->SetInput(0, lhs);
189     allocated_operator->SetInput(1, rhs);
190     COMPILER_LOG(DEBUG, BALANCE_EXPR) << "Created an elemental operator:\n" << *allocated_operator;
191     return allocated_operator;
192 }
193 
194 /**
195  * Recursively checks inputs.
196  * Fills arrays of source-insts and opreation-insts.
197  */
AnalyzeInputsRec(Inst * inst)198 void BalanceExpressions::AnalyzeInputsRec(Inst *inst)
199 {
200     expr_cur_depth_++;
201     ASSERT(inst != nullptr);
202 
203     auto lhs_input = inst->GetInput(0).GetInst();
204     auto rhs_input = inst->GetInput(1).GetInst();
205 
206     TryExtendChainRec(lhs_input);
207     TryExtendChainRec(rhs_input);
208     operators_.push_back(inst);
209 
210     if (expr_max_depth_ < expr_cur_depth_) {
211         expr_max_depth_ = expr_cur_depth_;
212     }
213     expr_cur_depth_--;
214 }
215 
216 /**
217  * Recursively checks if the instruction should be added in the current expression chain.
218  * If not, the considered instruction is the expression's term (source) and we save it for a later step.
219  */
TryExtendChainRec(Inst * inst)220 void BalanceExpressions::TryExtendChainRec(Inst *inst)
221 {
222     ASSERT(inst);
223     if (inst->GetOpcode() == GetOpcode()) {
224         if (inst->HasSingleUser()) {
225             inst->SetMarker(processed_inst_mrk_);
226 
227             AnalyzeInputsRec(inst);
228 
229             return;
230         }
231     }
232     sources_.push_back(inst);
233 }
234 
235 /**
236  * Finds optimal depth and compares to the current.
237  * Both of the numbers are represented as pow(x, 2).
238  */
NeedsOptimization()239 bool BalanceExpressions::NeedsOptimization()
240 {
241     if (sources_.size() <= 3U) {
242         return false;
243     }
244     // Avoid large shift exponent for size_t
245     if (expr_max_depth_ >= std::numeric_limits<size_t>::digits) {
246         return false;
247     }
248     size_t current = 1UL << (expr_max_depth_);
249     size_t optimal = GetBitCeil(sources_.size());
250     return current > optimal;
251 }
252 
Reset()253 void BalanceExpressions::Reset()
254 {
255     sources_.clear();
256     operators_.clear();
257     expr_cur_depth_ = 0;
258     expr_max_depth_ = 0;
259     operators_alloc_idx_ = 0;
260     SetOpcode(Opcode::INVALID);
261 }
262 
Dump(std::ostream * out) const263 void BalanceExpressions::Dump(std::ostream *out) const
264 {
265     (*out) << "Sources:\n";
266     for (auto i : sources_) {
267         (*out) << *i << '\n';
268     }
269 
270     (*out) << "Operators:\n";
271     for (auto i : operators_) {
272         (*out) << *i << '\n';
273     }
274 }
275 
276 template size_t BalanceExpressions::GetBitFloor<size_t>(size_t val);
277 template size_t BalanceExpressions::GetBitCeil<size_t>(size_t val);
278 }  // namespace panda::compiler
279