1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef BYTECODE_OPTIMIZER_REG_ENCODER_H 17 #define BYTECODE_OPTIMIZER_REG_ENCODER_H 18 19 #include "compiler/optimizer/ir/graph.h" 20 #include "compiler/optimizer/pass.h" 21 #include "compiler/optimizer/ir/inst.h" 22 #include "compiler/optimizer/ir/graph_visitor.h" 23 24 /* 25 * Register Encoder. 26 * 27 * After compiler's register allocation layout of the virtual frame looks like: 28 * 29 * |<-locals->|<-free registers->|<-RegAlloc temps->|<-arguments->| 30 * 31 * where: 32 * 33 * * locals (>= 0) are registers allocated for function's local variables; 34 * * temps (>= 0) are temporary registers that can be allocated by spill-fill 35 * resolver during breaking mov chain cycles; 36 * * arguments (>= 0) are registers allocated for function's arguemnts. 37 * 38 * The size of the frame is fixed (see register allocator for details). 39 * 40 * Locals and temps are allocated densely: if there are more than 0 locals 41 * (or temps) then all registers go strictly in order without "holes". 42 * Space for arguments, however, can contain "holes" (each "hole" corresponds 43 * to an unused argument). 44 * 45 * For locals and temps, it is not guaranteed that their number equals to the 46 * number of registers used in the unoptimized bytecode. Total number of arguments 47 * is of course constant throughout optimization pipeline. 48 * 49 * This pass solves following problems: 50 * 51 * 1. Shift temps and arguments to the left adjacently to locals, and reserve the 52 * range temps from free registers if necessary, consistently changing all register 53 * numbers for affected instructions. After this is done, the virtual frame looks like: 54 * 55 * |<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->| 56 * 57 * 2. Check if IR contains any instructions that can encode only [r0, r15] with 58 * actual inputs r16+. If such instructions are found, some lower registers 59 * (starting with r0, but no more than MAX_NUM_INPUTS registers) are reserved as 60 * temps and corresponding spills are emitted where needed. After this is done, 61 * the virtual frame looks like: 62 * 63 * |<-Renumber temps->|<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->| 64 * 65 * After the pass: 66 * 67 * * Usage mask is updated accordingly. 68 * * num_locals + num_temps + num_max_range_input is stored to graph with SetStackSlotsCount 69 */ 70 71 namespace panda::bytecodeopt { 72 struct RegContent { 73 compiler::Register reg; 74 compiler::DataType::Type type; 75 RegContentRegContent76 RegContent() : reg(compiler::INVALID_REG), type(compiler::DataType::NO_TYPE) {} RegContentRegContent77 RegContent(compiler::Register r, compiler::DataType::Type t) : reg(r), type(t) {} 78 79 bool operator==(const RegContent &other) const 80 { 81 return reg == other.reg && type == other.type; 82 } 83 84 bool operator!=(const RegContent &other) const 85 { 86 return !(*this == other); 87 } 88 }; 89 using RegContentMap = ArenaUnorderedMap<compiler::Register, RegContent>; 90 using RegContentVec = ArenaVector<std::pair<compiler::Register, RegContent>>; 91 92 enum class RegEncoderState { IDLE, RENUMBER_ARGS, RESERVE_TEMPS, INSERT_SPILLS }; 93 94 using panda::compiler::BasicBlock; 95 using panda::compiler::Inst; 96 using panda::compiler::Opcode; 97 98 class RegEncoder : public compiler::Optimization, public compiler::GraphVisitor { 99 public: RegEncoder(compiler::Graph * graph)100 explicit RegEncoder(compiler::Graph *graph) 101 : compiler::Optimization(graph), num_temps_(0), state_(RegEncoderState::IDLE) 102 { 103 } 104 105 ~RegEncoder() override = default; 106 107 // true: Pass applied successfully 108 // false: Unable to apply pass because of insufficient number of registers 109 bool RunImpl() override; 110 GetPassName()111 const char *GetPassName() const override 112 { 113 return "RegEncoder"; 114 } 115 116 void Check4Width(compiler::Inst *inst); 117 void Check8Width(compiler::Inst *inst); 118 GetBlocksToVisit()119 const ArenaVector<BasicBlock *> &GetBlocksToVisit() const override 120 { 121 return GetGraph()->GetBlocksRPO(); 122 } 123 124 static void VisitIntrinsic(GraphVisitor *visitor, Inst *inst); 125 126 #include "generated/reg_encoder_visitors.inc" 127 128 #include "generated/check_width.h" 129 VisitDefault(Inst * inst)130 void VisitDefault(Inst *inst) override {} 131 132 #include "compiler/optimizer/ir/visitor.inc" 133 134 private: 135 void CalculateNumNeededTemps(); 136 void CalculateNumNeededTempsForInst(compiler::Inst *inst); 137 void RenumberRegs(const compiler::Register min_reg, const compiler::Register delta); 138 bool RenumberArgRegs(); 139 void InsertSpills(); 140 void InsertSpillsForInst(compiler::Inst *inst); 141 void InsertSpillsForDynInputsInst(compiler::Inst *inst); 142 GetNumArgsFromGraph()143 compiler::Register GetNumArgsFromGraph() const 144 { 145 auto adapter = GetGraph()->GetRuntime(); 146 auto method = GetGraph()->GetMethod(); 147 auto num_args = adapter->GetMethodTotalArgumentsCount(method); 148 ASSERT(num_args <= compiler::VIRTUAL_FRAME_SIZE); 149 return num_args; 150 } 151 GetNumLocalsFromGraph()152 compiler::Register GetNumLocalsFromGraph() const 153 { 154 auto num_locals = GetGraph()->GetStackSlotsCount(); 155 ASSERT(num_locals <= compiler::VIRTUAL_FRAME_SIZE); 156 return num_locals; 157 } 158 GetNumRegs()159 compiler::Register GetNumRegs() const 160 { 161 auto num_regs = GetNumLocalsFromGraph() + GetNumArgsFromGraph(); 162 ASSERT(num_regs <= compiler::VIRTUAL_FRAME_SIZE); 163 return num_regs; 164 } 165 SaveNumLocalsToGraph(uint32_t num_locals)166 void SaveNumLocalsToGraph(uint32_t num_locals) const 167 { 168 ASSERT(num_locals <= compiler::VIRTUAL_FRAME_SIZE); 169 GetGraph()->SetStackSlotsCount(num_locals); 170 } 171 GetStatus()172 bool GetStatus() const 173 { 174 return success_; 175 } 176 CallHelper(compiler::GraphVisitor * visitor,Inst * inst)177 static void CallHelper(compiler::GraphVisitor *visitor, Inst *inst) 178 { 179 auto *re = static_cast<RegEncoder *>(visitor); 180 re->Check4Width(inst); 181 } 182 183 private: 184 compiler::Register num_temps_; 185 RegEncoderState state_; 186 compiler::Register num_max_range_input_ {0}; 187 compiler::Register range_temps_start_ {0}; 188 189 bool success_ {true}; 190 }; 191 } // namespace panda::bytecodeopt 192 193 #endif // BYTECODE_OPTIMIZER_REG_ENCODER_H 194