• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H
17 #define COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H
18 
19 #include "optimizer/ir/analysis.h"
20 #include "optimizer/ir/graph.h"
21 #include "optimizer/pass.h"
22 #include "compiler_options.h"
23 
24 namespace ark::compiler {
25 
26 class LseVisitor;
27 
28 /**
29  * Load Store Elimination (Lse) optimization is aimed to eliminate redundant
30  * loads and store.  It uses Alias Analysis to determine which memory
31  * instructions are redundant.  Lse has the heap representation in form of
32  *
33  * "Memory Instruction" -> "Value stored at location pointed by instruction"
34  *
35  * Generally, this optimization tries to:
36  * 1) delete stores that attempt to store the same values that already have
37  * been been stored by a previous store
38  * 2) delete loads that attempt to load values that were previously loaded or
39  * stored (optimization keep track what was stored previously)
40  *
41  * Traversing basic blocks in RPO optimization does the following with
42  * instructions:
43  * - if the instruction is a store and a stored value is equal to value from
44  * heap for this store then this store can be eliminated.
45  * - if the instruction is a store and a value from heap for this store is
46  * absent or differs from new stored value then the new stored value is written
47  * into heap.  The values of memory instructions that MUST_ALIAS this store are
48  * updated as well.  All values in the heap that MAY_ALIAS this store
49  * instruction are invalidated.
50  * - if the instruction is a load and there is a value from the heap for this
51  * load then this load can be eliminated.
52  * - if the instruction is a load and there is no value from the heap for this
53  * load then we update heap value for this load with the result of this load.
54  * All instructions that MUST_ALIAS this load updated as well.
55  * - if the instruction is a volatile load then the whole heap is invalidated.
56  * - if the instruction is a call then the whole heap is invalidated.
57  *
58  * Instructions that invalidate heap are enumerated in IsHeapInvalidatingInst.
59  * Instructions that cannot be eliminated are presented in
60  * CanEliminateInstruction.
61  *
62  * Another noticeable points:
63  * - Heap is invalided at loop headers basic blocks because values can be
64  * overwritten by using back edges.
65  * - Heap for basic block is obtained by merging heaps from predecessors.  If
66  * heap value conflicts among predecessors it is not added.
67  * - Instructions are deleted at the end of pass.  If a deleted instruction is
68  * the cause that another instruction now without users, this instruction is
69  * deleted as well.  This process continues recursively.
70  *
71  * Loops are handled in the following way:
72  * - on the loop header we record all loads from preheader's heap.  They are
73  * potential memory accesses that can be used to eliminated accesses inside the
74  * loop. We call them phi-candidates (in future they can be used to reuse
75  * stores inside the loop).
76  * - visiting any memory access inside a loop we check the aliasing with
77  * phi-candidates and record aliased ones to a corresponding candidate.
78  * - all phi-candidates of a loop (and of all outer loops of this loop) are
79  * invalidated if any of instructions that invalidate heap have been met in
80  * this loop.
81  * - after actual deletion of collected accesses for elimination we iterate
82  * over candidates with aliased accesses.  If any of aliased accesses for a
83  * candidate is a store, we do nothing.  If among aliased accesses only loads,
84  * we simply replace MUST_ALIASed loads with the corresponding candidate.
85  */
86 class PANDA_PUBLIC_API Lse : public Optimization {
87     using Optimization::Optimization;
88 
89 public:
90     enum EquivClass { EQ_ARRAY = 0, EQ_STATIC, EQ_POOL, EQ_OBJECT, EQ_LAST };
91 
92     struct HeapValue {
93         Inst *origin;  // The instruction the value comes from
94         Inst *val;     // The value itself
95         bool read;     // Whether the value could be read by the code we don't know anything about
96         bool local;    // Whether this value should be only used in the BasicBlock it originated from
97     };
98 
99     using BasicBlockHeap = ArenaMap<Inst *, struct HeapValue>;
100     using BasicBlockHeapIter = BasicBlockHeap::iterator;
101     using Heap = ArenaMap<BasicBlock *, BasicBlockHeap>;
102     using LoopPhiCands = ArenaMap<Inst *, InstVector>;
103     using PhiCands = ArenaUnorderedMap<Loop *, LoopPhiCands>;
104     using HeapEqClasses = ArenaVector<std::pair<Heap, PhiCands>>;
105     using PredBlocksIter = ArenaVector<BasicBlock *>::iterator;
106     using PredBlocksItersPair = std::pair<PredBlocksIter, PredBlocksIter>;
107 
108     explicit Lse(Graph *graph, bool hoistLoads = true)
Optimization(graph)109         : Optimization(graph),
110           hoistLoads_(hoistLoads),
111           beAlive_(GetGraph()->GetLocalAllocator()->Adapter()),
112           rpoLoops_(GetGraph()->GetLocalAllocator()->Adapter())
113     {
114         rpoLoops_.reserve(graph->GetRootLoop()->GetInnerLoops().size());
115     };
116 
117     NO_MOVE_SEMANTIC(Lse);
118     NO_COPY_SEMANTIC(Lse);
119     ~Lse() override = default;
120 
121     bool RunImpl() override;
122 
IsEnable()123     bool IsEnable() const override
124     {
125         return g_options.IsCompilerLse();
126     }
127 
GetPassName()128     const char *GetPassName() const override
129     {
130         return "LSE";
131     }
132 
133     static bool CanEliminateInstruction(Inst *inst);
134     static bool IsHeapInvalidatingInst(Inst *inst);
135 
136     static constexpr size_t AA_CALLS_LIMIT = 20000;
137     static constexpr size_t LS_ACCESS_LIMIT = 32;
138 
139 private:
140     void InitializeHeap(BasicBlock *block, HeapEqClasses *heaps);
141     void MergeHeapValuesForLoop(BasicBlock *block, HeapEqClasses *heaps);
142     void MergeHeapValuesForBlock(BasicBlock *block, HeapEqClasses *heaps, Marker phiFixupMrk);
143     void ProcessHeapValuesForBlock(Heap *heap, BasicBlock *block, Marker phiFixupMrk);
144     BasicBlockHeapIter ProcessPredecessorHeap(BasicBlockHeap &predHeap, HeapValue &heapValue, BasicBlock *block,
145                                               Inst *curInst);
146     bool ProcessHeapValues(HeapValue &heapValue, BasicBlock *block, BasicBlockHeapIter predInstIt,
147                            PredBlocksItersPair iters, Marker phiFixupMrk);
148     void FixupPhisInBlock(BasicBlock *block, Marker phiFixupMrk);
149     const char *GetEliminationCode(Inst *inst, Inst *origin);
150     void ApplyHoistToCandidate(Loop *loop, Inst *alive);
151     void TryToHoistLoadFromLoop(Loop *loop, HeapEqClasses *heaps, const BasicBlockHeap *eliminated);
152     void ProcessAllBBs(HeapEqClasses *heaps, Marker phiFixupMrk);
153     void DeleteInstruction(Inst *inst, Inst *value);
154     void DeleteInstructions(const BasicBlockHeap &eliminated);
155 
156 private:
157     bool applied_ {false};
158     bool hoistLoads_;
159     SaveStateBridgesBuilder ssb_;
160     ArenaUnorderedSet<Inst *> beAlive_;
161     ArenaVector<Loop *> rpoLoops_;
162     LseVisitor *visitor_ {nullptr};
163 };
164 
165 }  // namespace ark::compiler
166 
167 #endif  //  COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H
168