1 /* 2 * Copyright (c) 2023-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef PANDA_REG_ENCODER_H 17 #define PANDA_REG_ENCODER_H 18 19 #include "compiler/optimizer/ir/graph.h" 20 #include "compiler/optimizer/pass.h" 21 #include "compiler/optimizer/ir/inst.h" 22 #include "compiler/optimizer/ir/graph_visitor.h" 23 24 /* 25 * Register Encoder. 26 * 27 * After compiler's register allocation layout of the virtual frame looks like: 28 * 29 * |<-locals->|<-free registers->|<-RegAlloc temps->|<-arguments->| 30 * 31 * where: 32 * 33 * * locals (>= 0) are registers allocated for function's local variables; 34 * * temps (>= 0) are temporary registers that can be allocated by spill-fill 35 * resolver during breaking mov chain cycles; 36 * * arguments (>= 0) are registers allocated for function's arguemnts. 37 * 38 * The size of the frame is fixed (see register allocator for details). 39 * 40 * Locals and temps are allocated densely: if there are more than 0 locals 41 * (or temps) then all registers go strictly in order without "holes". 42 * Space for arguments, however, can contain "holes" (each "hole" corresponds 43 * to an unused argument). 44 * 45 * For locals and temps, it is not guaranteed that their number equals to the 46 * number of registers used in the unoptimized bytecode. Total number of arguments 47 * is of course constant throughout optimization pipeline. 48 * 49 * This pass solves following problems: 50 * 51 * 1. Shift temps and arguments to the left adjacently to locals, and reserve the 52 * range temps from free registers if necessary, consistently changing all register 53 * numbers for affected instructions. After this is done, the virtual frame looks like: 54 * 55 * |<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->| 56 * 57 * 2. Check if IR contains any instructions that can encode only [r0, r15] with 58 * actual inputs r16+. If such instructions are found, some lower registers 59 * (starting with r0, but no more than MAX_NUM_INPUTS registers) are reserved as 60 * temps and corresponding spills are emitted where needed. After this is done, 61 * the virtual frame looks like: 62 * 63 * |<-Renumber temps->|<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->| 64 * 65 * After the pass: 66 * 67 * * Usage mask is updated accordingly. 68 * * num_locals + num_temps + num_max_range_input is stored to graph with SetStackSlotsCount 69 */ 70 71 namespace ark::bytecodeopt { 72 struct RegContent { 73 // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes) 74 compiler::Register reg; 75 // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes) 76 compiler::DataType::Type type; 77 RegContentRegContent78 RegContent() : reg(compiler::GetInvalidReg()), type(compiler::DataType::NO_TYPE) {} RegContentRegContent79 RegContent(compiler::Register r, compiler::DataType::Type t) : reg(r), type(t) {} 80 81 bool operator==(const RegContent &other) const 82 { 83 return reg == other.reg && type == other.type; 84 } 85 86 bool operator!=(const RegContent &other) const 87 { 88 return !(*this == other); 89 } 90 }; 91 using RegContentMap = ArenaUnorderedMap<compiler::Register, RegContent>; 92 using RegContentVec = ArenaVector<std::pair<compiler::Register, RegContent>>; 93 94 enum class RegEncoderState { IDLE, RENUMBER_ARGS, RESERVE_TEMPS, INSERT_SPILLS }; 95 96 using ark::compiler::BasicBlock; 97 using ark::compiler::Inst; 98 using ark::compiler::Opcode; 99 100 // NOLINTNEXTLINE(fuchsia-multiple-inheritance) 101 class PANDA_PUBLIC_API RegEncoder : public compiler::Optimization, public compiler::GraphVisitor { 102 public: RegEncoder(compiler::Graph * graph)103 explicit RegEncoder(compiler::Graph *graph) : compiler::Optimization(graph) {} 104 105 ~RegEncoder() override = default; 106 NO_COPY_SEMANTIC(RegEncoder); 107 NO_MOVE_SEMANTIC(RegEncoder); 108 109 // true: Pass applied successfully 110 // false: Unable to apply pass because of insufficient number of registers 111 bool RunImpl() override; 112 GetPassName()113 const char *GetPassName() const override 114 { 115 return "RegEncoder"; 116 } 117 118 void Check4Width(compiler::Inst *inst); 119 void Check8Width(compiler::Inst *inst); 120 GetBlocksToVisit()121 const ArenaVector<BasicBlock *> &GetBlocksToVisit() const override 122 { 123 return GetGraph()->GetBlocksRPO(); 124 } 125 126 static void VisitSpillFill([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst); 127 static void VisitConstant([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst); 128 static void VisitCatchPhi([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst); 129 static void VisitCallStatic(GraphVisitor *v, Inst *inst); 130 static void VisitCallVirtual(GraphVisitor *v, Inst *inst); 131 static void VisitInitObject(GraphVisitor *v, Inst *inst); 132 static void VisitIntrinsic(GraphVisitor *v, Inst *inst); 133 static void VisitLoadObject(GraphVisitor *v, Inst *instBase); 134 static void VisitLoadStatic(GraphVisitor *v, Inst *instBase); 135 static void VisitStoreObject(GraphVisitor *v, Inst *instBase); 136 static void VisitStoreStatic(GraphVisitor *v, Inst *instBase); 137 static void VisitLoadString([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst); 138 static void VisitReturn([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst); 139 140 static void VisitCastValueToAnyType(GraphVisitor *v, Inst *inst); 141 142 #include "generated/reg_encoder_visitors.inc" 143 144 #include "generated/check_width.h" 145 VisitDefault(Inst * inst)146 void VisitDefault(Inst *inst) override 147 { 148 LOG(ERROR, BYTECODE_OPTIMIZER) << "Opcode " << compiler::GetOpcodeString(inst->GetOpcode()) 149 << " not yet implemented in RegEncoder"; 150 success_ = false; 151 } 152 153 #include "compiler/optimizer/ir/visitor.inc" 154 155 private: 156 void CalculateNumNeededTemps(); 157 void CalculateNumNeededTempsForInst(compiler::Inst *inst); 158 void RenumberRegs(compiler::Register minReg, compiler::Register delta); 159 bool RenumberArgRegs(); 160 void InsertSpills(); 161 void InsertSpillsForInst(compiler::Inst *inst); 162 void InsertSpillsForDynInputsInst(compiler::Inst *inst); 163 bool InsertSpillsForDynRangeInst(compiler::Inst *inst, size_t nargs, size_t start); 164 size_t GetStartInputIndex(compiler::Inst *inst); 165 void RenumberDstReg(compiler::Inst *inst, size_t temp, size_t rangeTemp = 0, bool largeTemp = false); 166 GetNumArgsFromGraph()167 compiler::Register GetNumArgsFromGraph() const 168 { 169 auto adapter = GetGraph()->GetRuntime(); 170 auto method = GetGraph()->GetMethod(); 171 auto numArgs = adapter->GetMethodTotalArgumentsCount(method); 172 ASSERT(numArgs <= compiler::GetFrameSize()); 173 return numArgs; 174 } 175 GetNumLocalsFromGraph()176 compiler::Register GetNumLocalsFromGraph() const 177 { 178 auto numLocals = GetGraph()->GetStackSlotsCount(); 179 ASSERT(numLocals <= compiler::GetFrameSize()); 180 return numLocals; 181 } 182 GetNumRegs()183 compiler::Register GetNumRegs() const 184 { 185 auto numRegs = GetNumLocalsFromGraph() + GetNumArgsFromGraph(); 186 ASSERT(numRegs <= compiler::GetFrameSize()); 187 return numRegs; 188 } 189 SaveNumLocalsToGraph(uint32_t numLocals)190 void SaveNumLocalsToGraph(uint32_t numLocals) const 191 { 192 ASSERT(numLocals <= compiler::GetFrameSize()); 193 GetGraph()->SetStackSlotsCount(numLocals); 194 } 195 GetStatus()196 bool GetStatus() const 197 { 198 return success_; 199 } 200 CheckStatus()201 bool CheckStatus() const 202 { 203 return !GetGraph()->IsAbcKit() || success_; 204 } 205 CallHelper(compiler::GraphVisitor * visitor,Inst * inst)206 static void CallHelper(compiler::GraphVisitor *visitor, Inst *inst) 207 { 208 auto *re = static_cast<RegEncoder *>(visitor); 209 re->Check4Width(inst); 210 } 211 212 private: 213 compiler::Register numTemps_ {0}; 214 RegEncoderState state_ {RegEncoderState::IDLE}; 215 compiler::Register numMaxRangeInput_ {0}; 216 compiler::Register rangeTempsStart_ {0}; 217 compiler::Register numChangedWidth_ {0}; 218 219 bool success_ {true}; 220 }; 221 } // namespace ark::bytecodeopt 222 223 #endif // PANDA_REG_ENCODER_H 224