• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H
17 #define COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H
18 
19 #include "optimizer/ir/analysis.h"
20 #include "optimizer/ir/graph.h"
21 #include "optimizer/pass.h"
22 #include "compiler_options.h"
23 
24 namespace ark::compiler {
25 
26 class LseVisitor;
27 
28 /**
29  * Load Store Elimination (Lse) optimization is aimed to eliminate redundant
30  * loads and store.  It uses Alias Analysis to determine which memory
31  * instructions are redundant.  Lse has the heap representation in form of
32  *
33  * "Memory Instruction" -> "Value stored at location pointed by instruction"
34  *
35  * Generally, this optimization tries to:
36  * 1) delete stores that attempt to store the same values that already have
37  * been been stored by a previous store
38  * 2) delete loads that attempt to load values that were previously loaded or
39  * stored (optimization keep track what was stored previously)
40  *
41  * Traversing basic blocks in RPO optimization does the following with
42  * instructions:
43  * - if the instruction is a store and a stored value is equal to value from
44  * heap for this store then this store can be eliminated.
45  * - if the instruction is a store and a value from heap for this store is
46  * absent or differs from new stored value then the new stored value is written
47  * into heap.  The values of memory instructions that MUST_ALIAS this store are
48  * updated as well.  All values in the heap that MAY_ALIAS this store
49  * instruction are invalidated.
50  * - if the instruction is a load and there is a value from the heap for this
51  * load then this load can be eliminated.
52  * - if the instruction is a load and there is no value from the heap for this
53  * load then we update heap value for this load with the result of this load.
54  * All instructions that MUST_ALIAS this load updated as well.
55  * - if the instruction is a volatile load then the whole heap is invalidated.
56  * - if the instruction is a call then the whole heap is invalidated.
57  *
58  * Instructions that invalidate heap are enumerated in IsHeapInvalidatingInst.
59  * Instructions that cannot be eliminated are presented in
60  * CanEliminateInstruction.
61  *
62  * Another noticeable points:
63  * - Heap is invalided at loop headers basic blocks because values can be
64  * overwritten by using back edges.
65  * - Heap for basic block is obtained by merging heaps from predecessors.  If
66  * heap value conflicts among predecessors it is not added.
67  * - Instructions are deleted at the end of pass.  If a deleted instruction is
68  * the cause that another instruction now without users, this instruction is
69  * deleted as well.  This process continues recursively.
70  *
71  * Loops are handled in the following way:
72  * - on the loop header we record all loads from preheader's heap.  They are
73  * potential memory accesses that can be used to eliminated accesses inside the
74  * loop. We call them phi-candidates (in future they can be used to reuse
75  * stores inside the loop).
76  * - visiting any memory access inside a loop we check the aliasing with
77  * phi-candidates and record aliased ones to a corresponding candidate.
78  * - all phi-candidates of a loop (and of all outer loops of this loop) are
79  * invalidated if any of instructions that invalidate heap have been met in
80  * this loop.
81  * - after actual deletion of collected accesses for elimination we iterate
82  * over candidates with aliased accesses.  If any of aliased accesses for a
83  * candidate is a store, we do nothing.  If among aliased accesses only loads,
84  * we simply replace MUST_ALIASed loads with the corresponding candidate.
85  */
86 class Lse : public Optimization {
87     using Optimization::Optimization;
88 
89 public:
90     enum EquivClass { EQ_ARRAY = 0, EQ_STATIC, EQ_POOL, EQ_OBJECT, EQ_LAST };
91 
92     struct HeapValue {
93         Inst *origin;  // The instruction the value comes from
94         Inst *val;     // The value itself
95         bool read;     // Whether the value could be read by the code we don't know anything about
96         bool local;    // Whether this value should be only used in the BasicBlock it originated from
97     };
98 
99     using BasicBlockHeap = ArenaMap<Inst *, struct HeapValue>;
100     using Heap = ArenaMap<BasicBlock *, BasicBlockHeap>;
101     using LoopPhiCands = ArenaMap<Inst *, InstVector>;
102     using PhiCands = ArenaUnorderedMap<Loop *, LoopPhiCands>;
103     using HeapEqClasses = ArenaVector<std::pair<Heap, PhiCands>>;
104 
105     explicit Lse(Graph *graph, bool hoistLoads = true)
Optimization(graph)106         : Optimization(graph),
107           hoistLoads_(hoistLoads),
108           beAlive_(GetGraph()->GetLocalAllocator()->Adapter()),
109           rpoLoops_(GetGraph()->GetLocalAllocator()->Adapter())
110     {
111         rpoLoops_.reserve(graph->GetRootLoop()->GetInnerLoops().size());
112     };
113 
114     NO_MOVE_SEMANTIC(Lse);
115     NO_COPY_SEMANTIC(Lse);
116     ~Lse() override = default;
117 
118     bool RunImpl() override;
119 
IsEnable()120     bool IsEnable() const override
121     {
122         return g_options.IsCompilerLse();
123     }
124 
GetPassName()125     const char *GetPassName() const override
126     {
127         return "LSE";
128     }
129 
130     static bool CanEliminateInstruction(Inst *inst);
131 
132     static constexpr size_t AA_CALLS_LIMIT = 20000;
133     static constexpr size_t LS_ACCESS_LIMIT = 32;
134 
135 private:
136     void InitializeHeap(BasicBlock *block, HeapEqClasses *heaps);
137     void MergeHeapValuesForLoop(BasicBlock *block, HeapEqClasses *heaps);
138     size_t MergeHeapValuesForBlock(BasicBlock *block, HeapEqClasses *heaps, Marker phiFixupMrk);
139     void FixupPhisInBlock(BasicBlock *block, Marker phiFixupMrk);
140     const char *GetEliminationCode(Inst *inst, Inst *origin);
141     void ApplyHoistToCandidate(Loop *loop, Inst *alive);
142     void TryToHoistLoadFromLoop(Loop *loop, HeapEqClasses *heaps, const BasicBlockHeap *eliminated);
143     void ProcessAllBBs(LseVisitor &visitor, HeapEqClasses *heaps, Marker phiFixupMrk);
144     void DeleteInstruction(Inst *inst, Inst *value);
145     void DeleteInstructions(const BasicBlockHeap &eliminated);
146 
147 private:
148     bool applied_ {false};
149     bool hoistLoads_;
150     SaveStateBridgesBuilder ssb_;
151     ArenaUnorderedSet<Inst *> beAlive_;
152     ArenaVector<Loop *> rpoLoops_;
153 };
154 
155 }  // namespace ark::compiler
156 
157 #endif  //  COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H
158