• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef BYTECODE_OPTIMIZER_REG_ENCODER_H
17 #define BYTECODE_OPTIMIZER_REG_ENCODER_H
18 
19 #include "compiler/optimizer/ir/graph.h"
20 #include "compiler/optimizer/pass.h"
21 #include "compiler/optimizer/ir/inst.h"
22 #include "compiler/optimizer/ir/graph_visitor.h"
23 
24 /*
25  * Register Encoder.
26  *
27  * After compiler's register allocation layout of the virtual frame looks like:
28  *
29  * |<-locals->|<-free registers->|<-RegAlloc temps->|<-arguments->|
30  *
31  * where:
32  *
33  * * locals (>= 0) are registers allocated for function's local variables;
34  * * temps (>= 0) are temporary registers that can be allocated by spill-fill
35  *   resolver during breaking mov chain cycles;
36  * * arguments (>= 0) are registers allocated for function's arguemnts.
37  *
38  * The size of the frame is fixed (see register allocator for details).
39  *
40  * Locals and temps are allocated densely: if there are more than 0 locals
41  * (or temps) then all registers go strictly in order without "holes".
42  * Space for arguments, however, can contain "holes" (each "hole" corresponds
43  * to an unused argument).
44  *
45  * For locals and temps, it is not guaranteed that their number equals to the
46  * number of registers used in the unoptimized bytecode. Total number of arguments
47  * is of course constant throughout optimization pipeline.
48  *
49  * This pass solves following problems:
50  *
51  * 1. Shift temps and arguments to the left adjacently to locals, and reserve the
52  *    range temps from free registers if necessary, consistently changing all register
53  *    numbers for affected instructions. After this is done, the virtual frame looks like:
54  *
55  * |<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->|
56  *
57  * 2. Check if IR contains any instructions that can encode only [r0, r15] with
58  *    actual inputs r16+. If such instructions are found, some lower registers
59  *    (starting with r0, but no more than MAX_NUM_INPUTS registers) are reserved as
60  *    temps and corresponding spills are emitted where needed. After this is done,
61  *    the virtual frame looks like:
62  *
63  * |<-Renumber temps->|<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->|
64  *
65  * After the pass:
66  *
67  *  * Usage mask is updated accordingly.
68  *  * num_locals + num_temps + num_max_range_input is stored to graph with SetStackSlotsCount
69  */
70 
71 namespace panda::bytecodeopt {
72 struct RegContent {
73     compiler::Register reg;
74     compiler::DataType::Type type;
75 
RegContentRegContent76     RegContent() : reg(compiler::INVALID_REG), type(compiler::DataType::NO_TYPE) {}
RegContentRegContent77     RegContent(compiler::Register r, compiler::DataType::Type t) : reg(r), type(t) {}
78 
79     bool operator==(const RegContent &other) const
80     {
81         return reg == other.reg && type == other.type;
82     }
83 
84     bool operator!=(const RegContent &other) const
85     {
86         return !(*this == other);
87     }
88 };
89 using RegContentMap = ArenaUnorderedMap<compiler::Register, RegContent>;
90 using RegContentVec = ArenaVector<std::pair<compiler::Register, RegContent>>;
91 
92 enum class RegEncoderState { IDLE, RENUMBER_ARGS, RESERVE_TEMPS, INSERT_SPILLS };
93 
94 using panda::compiler::BasicBlock;
95 using panda::compiler::Inst;
96 using panda::compiler::Opcode;
97 
98 class RegEncoder : public compiler::Optimization, public compiler::GraphVisitor {
99 public:
RegEncoder(compiler::Graph * graph)100     explicit RegEncoder(compiler::Graph *graph)
101         : compiler::Optimization(graph), num_temps_(0), state_(RegEncoderState::IDLE)
102     {
103     }
104 
105     ~RegEncoder() override = default;
106 
107     // true: Pass applied successfully
108     // false: Unable to apply pass because of insufficient number of registers
109     bool RunImpl() override;
110 
GetPassName()111     const char *GetPassName() const override
112     {
113         return "RegEncoder";
114     }
115 
116     void Check4Width(compiler::Inst *inst);
117     void Check8Width(compiler::Inst *inst);
118 
GetBlocksToVisit()119     const ArenaVector<BasicBlock *> &GetBlocksToVisit() const override
120     {
121         return GetGraph()->GetBlocksRPO();
122     }
123 
124     static void VisitIntrinsic(GraphVisitor *visitor, Inst *inst);
125 
126 #include "generated/reg_encoder_visitors.inc"
127 
128 #include "generated/check_width.h"
129 
VisitDefault(Inst * inst)130     void VisitDefault(Inst *inst) override {}
131 
132 #include "compiler/optimizer/ir/visitor.inc"
133 
134 private:
135     void CalculateNumNeededTemps();
136     void CalculateNumNeededTempsForInst(compiler::Inst *inst);
137     void RenumberRegs(const compiler::Register min_reg, const compiler::Register delta);
138     bool RenumberArgRegs();
139     void InsertSpills();
140     void InsertSpillsForInst(compiler::Inst *inst);
141     void InsertSpillsForDynInputsInst(compiler::Inst *inst);
142 
GetNumArgsFromGraph()143     compiler::Register GetNumArgsFromGraph() const
144     {
145         auto adapter = GetGraph()->GetRuntime();
146         auto method = GetGraph()->GetMethod();
147         auto num_args = adapter->GetMethodTotalArgumentsCount(method);
148         ASSERT(num_args <= compiler::VIRTUAL_FRAME_SIZE);
149         return num_args;
150     }
151 
GetNumLocalsFromGraph()152     compiler::Register GetNumLocalsFromGraph() const
153     {
154         auto num_locals = GetGraph()->GetStackSlotsCount();
155         ASSERT(num_locals <= compiler::VIRTUAL_FRAME_SIZE);
156         return num_locals;
157     }
158 
GetNumRegs()159     compiler::Register GetNumRegs() const
160     {
161         auto num_regs = GetNumLocalsFromGraph() + GetNumArgsFromGraph();
162         ASSERT(num_regs <= compiler::VIRTUAL_FRAME_SIZE);
163         return num_regs;
164     }
165 
SaveNumLocalsToGraph(uint32_t num_locals)166     void SaveNumLocalsToGraph(uint32_t num_locals) const
167     {
168         ASSERT(num_locals <= compiler::VIRTUAL_FRAME_SIZE);
169         GetGraph()->SetStackSlotsCount(num_locals);
170     }
171 
GetStatus()172     bool GetStatus() const
173     {
174         return success_;
175     }
176 
CallHelper(compiler::GraphVisitor * visitor,Inst * inst)177     static void CallHelper(compiler::GraphVisitor *visitor, Inst *inst)
178     {
179         auto *re = static_cast<RegEncoder *>(visitor);
180         re->Check4Width(inst);
181     }
182 
183 private:
184     compiler::Register num_temps_;
185     RegEncoderState state_;
186     compiler::Register num_max_range_input_ {0};
187     compiler::Register range_temps_start_ {0};
188 
189     bool success_ {true};
190 };
191 }  // namespace panda::bytecodeopt
192 
193 #endif  // BYTECODE_OPTIMIZER_REG_ENCODER_H
194