1 /* 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H 17 #define COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H 18 19 #include "optimizer/ir/analysis.h" 20 #include "optimizer/ir/graph.h" 21 #include "optimizer/pass.h" 22 #include "compiler_options.h" 23 24 namespace ark::compiler { 25 26 class LseVisitor; 27 28 /** 29 * Load Store Elimination (Lse) optimization is aimed to eliminate redundant 30 * loads and store. It uses Alias Analysis to determine which memory 31 * instructions are redundant. Lse has the heap representation in form of 32 * 33 * "Memory Instruction" -> "Value stored at location pointed by instruction" 34 * 35 * Generally, this optimization tries to: 36 * 1) delete stores that attempt to store the same values that already have 37 * been been stored by a previous store 38 * 2) delete loads that attempt to load values that were previously loaded or 39 * stored (optimization keep track what was stored previously) 40 * 41 * Traversing basic blocks in RPO optimization does the following with 42 * instructions: 43 * - if the instruction is a store and a stored value is equal to value from 44 * heap for this store then this store can be eliminated. 45 * - if the instruction is a store and a value from heap for this store is 46 * absent or differs from new stored value then the new stored value is written 47 * into heap. The values of memory instructions that MUST_ALIAS this store are 48 * updated as well. All values in the heap that MAY_ALIAS this store 49 * instruction are invalidated. 50 * - if the instruction is a load and there is a value from the heap for this 51 * load then this load can be eliminated. 52 * - if the instruction is a load and there is no value from the heap for this 53 * load then we update heap value for this load with the result of this load. 54 * All instructions that MUST_ALIAS this load updated as well. 55 * - if the instruction is a volatile load then the whole heap is invalidated. 56 * - if the instruction is a call then the whole heap is invalidated. 57 * 58 * Instructions that invalidate heap are enumerated in IsHeapInvalidatingInst. 59 * Instructions that cannot be eliminated are presented in 60 * CanEliminateInstruction. 61 * 62 * Another noticeable points: 63 * - Heap is invalided at loop headers basic blocks because values can be 64 * overwritten by using back edges. 65 * - Heap for basic block is obtained by merging heaps from predecessors. If 66 * heap value conflicts among predecessors it is not added. 67 * - Instructions are deleted at the end of pass. If a deleted instruction is 68 * the cause that another instruction now without users, this instruction is 69 * deleted as well. This process continues recursively. 70 * 71 * Loops are handled in the following way: 72 * - on the loop header we record all loads from preheader's heap. They are 73 * potential memory accesses that can be used to eliminated accesses inside the 74 * loop. We call them phi-candidates (in future they can be used to reuse 75 * stores inside the loop). 76 * - visiting any memory access inside a loop we check the aliasing with 77 * phi-candidates and record aliased ones to a corresponding candidate. 78 * - all phi-candidates of a loop (and of all outer loops of this loop) are 79 * invalidated if any of instructions that invalidate heap have been met in 80 * this loop. 81 * - after actual deletion of collected accesses for elimination we iterate 82 * over candidates with aliased accesses. If any of aliased accesses for a 83 * candidate is a store, we do nothing. If among aliased accesses only loads, 84 * we simply replace MUST_ALIASed loads with the corresponding candidate. 85 */ 86 class Lse : public Optimization { 87 using Optimization::Optimization; 88 89 public: 90 enum EquivClass { EQ_ARRAY = 0, EQ_STATIC, EQ_POOL, EQ_OBJECT, EQ_LAST }; 91 92 struct HeapValue { 93 Inst *origin; // The instruction the value comes from 94 Inst *val; // The value itself 95 bool read; // Whether the value could be read by the code we don't know anything about 96 bool local; // Whether this value should be only used in the BasicBlock it originated from 97 }; 98 99 using BasicBlockHeap = ArenaMap<Inst *, struct HeapValue>; 100 using Heap = ArenaMap<BasicBlock *, BasicBlockHeap>; 101 using LoopPhiCands = ArenaMap<Inst *, InstVector>; 102 using PhiCands = ArenaUnorderedMap<Loop *, LoopPhiCands>; 103 using HeapEqClasses = ArenaVector<std::pair<Heap, PhiCands>>; 104 105 explicit Lse(Graph *graph, bool hoistLoads = true) Optimization(graph)106 : Optimization(graph), 107 hoistLoads_(hoistLoads), 108 beAlive_(GetGraph()->GetLocalAllocator()->Adapter()), 109 rpoLoops_(GetGraph()->GetLocalAllocator()->Adapter()) 110 { 111 rpoLoops_.reserve(graph->GetRootLoop()->GetInnerLoops().size()); 112 }; 113 114 NO_MOVE_SEMANTIC(Lse); 115 NO_COPY_SEMANTIC(Lse); 116 ~Lse() override = default; 117 118 bool RunImpl() override; 119 IsEnable()120 bool IsEnable() const override 121 { 122 return g_options.IsCompilerLse(); 123 } 124 GetPassName()125 const char *GetPassName() const override 126 { 127 return "LSE"; 128 } 129 130 static bool CanEliminateInstruction(Inst *inst); 131 132 static constexpr size_t AA_CALLS_LIMIT = 20000; 133 static constexpr size_t LS_ACCESS_LIMIT = 32; 134 135 private: 136 void InitializeHeap(BasicBlock *block, HeapEqClasses *heaps); 137 void MergeHeapValuesForLoop(BasicBlock *block, HeapEqClasses *heaps); 138 size_t MergeHeapValuesForBlock(BasicBlock *block, HeapEqClasses *heaps, Marker phiFixupMrk); 139 void FixupPhisInBlock(BasicBlock *block, Marker phiFixupMrk); 140 const char *GetEliminationCode(Inst *inst, Inst *origin); 141 void ApplyHoistToCandidate(Loop *loop, Inst *alive); 142 void TryToHoistLoadFromLoop(Loop *loop, HeapEqClasses *heaps, const BasicBlockHeap *eliminated); 143 void ProcessAllBBs(LseVisitor &visitor, HeapEqClasses *heaps, Marker phiFixupMrk); 144 void DeleteInstruction(Inst *inst, Inst *value); 145 void DeleteInstructions(const BasicBlockHeap &eliminated); 146 147 private: 148 bool applied_ {false}; 149 bool hoistLoads_; 150 SaveStateBridgesBuilder ssb_; 151 ArenaUnorderedSet<Inst *> beAlive_; 152 ArenaVector<Loop *> rpoLoops_; 153 }; 154 155 } // namespace ark::compiler 156 157 #endif // COMPILER_OPTIMIZER_OPTIMIZATIONS_LSE_H 158