1 /* 2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H 17 #define COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H 18 19 #include "optimizer/ir/analysis.h" 20 #include "optimizer/ir/graph.h" 21 #include "optimizer/pass.h" 22 #include "compiler_options.h" 23 24 namespace ark::compiler { 25 26 class LseVisitor; 27 28 /** 29 * Load Store Elimination (Lse) optimization is aimed to eliminate redundant 30 * loads and store. It uses Alias Analysis to determine which memory 31 * instructions are redundant. Lse has the heap representation in form of 32 * 33 * "Memory Instruction" -> "Value stored at location pointed by instruction" 34 * 35 * Generally, this optimization tries to: 36 * 1) delete stores that attempt to store the same values that already have 37 * been been stored by a previous store 38 * 2) delete loads that attempt to load values that were previously loaded or 39 * stored (optimization keep track what was stored previously) 40 * 41 * Traversing basic blocks in RPO optimization does the following with 42 * instructions: 43 * - if the instruction is a store and a stored value is equal to value from 44 * heap for this store then this store can be eliminated. 45 * - if the instruction is a store and a value from heap for this store is 46 * absent or differs from new stored value then the new stored value is written 47 * into heap. The values of memory instructions that MUST_ALIAS this store are 48 * updated as well. All values in the heap that MAY_ALIAS this store 49 * instruction are invalidated. 50 * - if the instruction is a load and there is a value from the heap for this 51 * load then this load can be eliminated. 52 * - if the instruction is a load and there is no value from the heap for this 53 * load then we update heap value for this load with the result of this load. 54 * All instructions that MUST_ALIAS this load updated as well. 55 * - if the instruction is a volatile load then the whole heap is invalidated. 56 * - if the instruction is a call then the whole heap is invalidated. 57 * 58 * Instructions that invalidate heap are enumerated in IsHeapInvalidatingInst. 59 * Instructions that cannot be eliminated are presented in 60 * CanEliminateInstruction. 61 * 62 * Another noticeable points: 63 * - Heap is invalided at loop headers basic blocks because values can be 64 * overwritten by using back edges. 65 * - Heap for basic block is obtained by merging heaps from predecessors. If 66 * heap value conflicts among predecessors it is not added. 67 * - Instructions are deleted at the end of pass. If a deleted instruction is 68 * the cause that another instruction now without users, this instruction is 69 * deleted as well. This process continues recursively. 70 * 71 * Loops are handled in the following way: 72 * - on the loop header we record all loads from preheader's heap. They are 73 * potential memory accesses that can be used to eliminated accesses inside the 74 * loop. We call them phi-candidates (in future they can be used to reuse 75 * stores inside the loop). 76 * - visiting any memory access inside a loop we check the aliasing with 77 * phi-candidates and record aliased ones to a corresponding candidate. 78 * - all phi-candidates of a loop (and of all outer loops of this loop) are 79 * invalidated if any of instructions that invalidate heap have been met in 80 * this loop. 81 * - after actual deletion of collected accesses for elimination we iterate 82 * over candidates with aliased accesses. If any of aliased accesses for a 83 * candidate is a store, we do nothing. If among aliased accesses only loads, 84 * we simply replace MUST_ALIASed loads with the corresponding candidate. 85 */ 86 class PANDA_PUBLIC_API Lse : public Optimization { 87 using Optimization::Optimization; 88 89 public: 90 enum EquivClass { EQ_ARRAY = 0, EQ_STATIC, EQ_POOL, EQ_OBJECT, EQ_LAST }; 91 92 struct HeapValue { 93 Inst *origin; // The instruction the value comes from 94 Inst *val; // The value itself 95 bool read; // Whether the value could be read by the code we don't know anything about 96 bool local; // Whether this value should be only used in the BasicBlock it originated from 97 }; 98 99 using BasicBlockHeap = ArenaMap<Inst *, struct HeapValue>; 100 using BasicBlockHeapIter = BasicBlockHeap::iterator; 101 using Heap = ArenaMap<BasicBlock *, BasicBlockHeap>; 102 using LoopPhiCands = ArenaMap<Inst *, InstVector>; 103 using PhiCands = ArenaUnorderedMap<Loop *, LoopPhiCands>; 104 using HeapEqClasses = ArenaVector<std::pair<Heap, PhiCands>>; 105 using PredBlocksIter = ArenaVector<BasicBlock *>::iterator; 106 using PredBlocksItersPair = std::pair<PredBlocksIter, PredBlocksIter>; 107 108 explicit Lse(Graph *graph, bool hoistLoads = true) Optimization(graph)109 : Optimization(graph), 110 hoistLoads_(hoistLoads), 111 beAlive_(GetGraph()->GetLocalAllocator()->Adapter()), 112 rpoLoops_(GetGraph()->GetLocalAllocator()->Adapter()) 113 { 114 rpoLoops_.reserve(graph->GetRootLoop()->GetInnerLoops().size()); 115 }; 116 117 NO_MOVE_SEMANTIC(Lse); 118 NO_COPY_SEMANTIC(Lse); 119 ~Lse() override = default; 120 121 bool RunImpl() override; 122 IsEnable()123 bool IsEnable() const override 124 { 125 return g_options.IsCompilerLse(); 126 } 127 GetPassName()128 const char *GetPassName() const override 129 { 130 return "LSE"; 131 } 132 133 static bool CanEliminateInstruction(Inst *inst); 134 static bool IsHeapInvalidatingInst(Inst *inst); 135 136 static constexpr size_t AA_CALLS_LIMIT = 20000; 137 static constexpr size_t LS_ACCESS_LIMIT = 32; 138 139 private: 140 void InitializeHeap(BasicBlock *block, HeapEqClasses *heaps); 141 void MergeHeapValuesForLoop(BasicBlock *block, HeapEqClasses *heaps); 142 void MergeHeapValuesForBlock(BasicBlock *block, HeapEqClasses *heaps, Marker phiFixupMrk); 143 void ProcessHeapValuesForBlock(Heap *heap, BasicBlock *block, Marker phiFixupMrk); 144 BasicBlockHeapIter ProcessPredecessorHeap(BasicBlockHeap &predHeap, HeapValue &heapValue, BasicBlock *block, 145 Inst *curInst); 146 bool ProcessHeapValues(HeapValue &heapValue, BasicBlock *block, BasicBlockHeapIter predInstIt, 147 PredBlocksItersPair iters, Marker phiFixupMrk); 148 void FixupPhisInBlock(BasicBlock *block, Marker phiFixupMrk); 149 const char *GetEliminationCode(Inst *inst, Inst *origin); 150 void ApplyHoistToCandidate(Loop *loop, Inst *alive); 151 void TryToHoistLoadFromLoop(Loop *loop, HeapEqClasses *heaps, const BasicBlockHeap *eliminated); 152 void ProcessAllBBs(HeapEqClasses *heaps, Marker phiFixupMrk); 153 void DeleteInstruction(Inst *inst, Inst *value); 154 void DeleteInstructions(const BasicBlockHeap &eliminated); 155 156 private: 157 bool applied_ {false}; 158 bool hoistLoads_; 159 SaveStateBridgesBuilder ssb_; 160 ArenaUnorderedSet<Inst *> beAlive_; 161 ArenaVector<Loop *> rpoLoops_; 162 LseVisitor *visitor_ {nullptr}; 163 }; 164 165 } // namespace ark::compiler 166 167 #endif // COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H 168