• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef PANDA_REG_ENCODER_H
17 #define PANDA_REG_ENCODER_H
18 
19 #include "compiler/optimizer/ir/graph.h"
20 #include "compiler/optimizer/pass.h"
21 #include "compiler/optimizer/ir/inst.h"
22 #include "compiler/optimizer/ir/graph_visitor.h"
23 
24 /*
25  * Register Encoder.
26  *
27  * After compiler's register allocation layout of the virtual frame looks like:
28  *
29  * |<-locals->|<-free registers->|<-RegAlloc temps->|<-arguments->|
30  *
31  * where:
32  *
33  * * locals (>= 0) are registers allocated for function's local variables;
34  * * temps (>= 0) are temporary registers that can be allocated by spill-fill
35  *   resolver during breaking mov chain cycles;
36  * * arguments (>= 0) are registers allocated for function's arguemnts.
37  *
38  * The size of the frame is fixed (see register allocator for details).
39  *
40  * Locals and temps are allocated densely: if there are more than 0 locals
41  * (or temps) then all registers go strictly in order without "holes".
42  * Space for arguments, however, can contain "holes" (each "hole" corresponds
43  * to an unused argument).
44  *
45  * For locals and temps, it is not guaranteed that their number equals to the
46  * number of registers used in the unoptimized bytecode. Total number of arguments
47  * is of course constant throughout optimization pipeline.
48  *
49  * This pass solves following problems:
50  *
51  * 1. Shift temps and arguments to the left adjacently to locals, and reserve the
52  *    range temps from free registers if necessary, consistently changing all register
53  *    numbers for affected instructions. After this is done, the virtual frame looks like:
54  *
55  * |<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->|
56  *
57  * 2. Check if IR contains any instructions that can encode only [r0, r15] with
58  *    actual inputs r16+. If such instructions are found, some lower registers
59  *    (starting with r0, but no more than MAX_NUM_INPUTS registers) are reserved as
60  *    temps and corresponding spills are emitted where needed. After this is done,
61  *    the virtual frame looks like:
62  *
63  * |<-Renumber temps->|<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->|
64  *
65  * After the pass:
66  *
67  *  * Usage mask is updated accordingly.
68  *  * num_locals + num_temps + num_max_range_input is stored to graph with SetStackSlotsCount
69  */
70 
71 namespace ark::bytecodeopt {
72 struct RegContent {
73     // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes)
74     compiler::Register reg;
75     // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes)
76     compiler::DataType::Type type;
77 
RegContentRegContent78     RegContent() : reg(compiler::GetInvalidReg()), type(compiler::DataType::NO_TYPE) {}
RegContentRegContent79     RegContent(compiler::Register r, compiler::DataType::Type t) : reg(r), type(t) {}
80 
81     bool operator==(const RegContent &other) const
82     {
83         return reg == other.reg && type == other.type;
84     }
85 
86     bool operator!=(const RegContent &other) const
87     {
88         return !(*this == other);
89     }
90 };
91 using RegContentMap = ArenaUnorderedMap<compiler::Register, RegContent>;
92 using RegContentVec = ArenaVector<std::pair<compiler::Register, RegContent>>;
93 
94 enum class RegEncoderState { IDLE, RENUMBER_ARGS, RESERVE_TEMPS, INSERT_SPILLS };
95 
96 using ark::compiler::BasicBlock;
97 using ark::compiler::Inst;
98 using ark::compiler::Opcode;
99 
100 // NOLINTNEXTLINE(fuchsia-multiple-inheritance)
101 class PANDA_PUBLIC_API RegEncoder : public compiler::Optimization, public compiler::GraphVisitor {
102 public:
RegEncoder(compiler::Graph * graph)103     explicit RegEncoder(compiler::Graph *graph) : compiler::Optimization(graph) {}
104 
105     ~RegEncoder() override = default;
106     NO_COPY_SEMANTIC(RegEncoder);
107     NO_MOVE_SEMANTIC(RegEncoder);
108 
109     // true: Pass applied successfully
110     // false: Unable to apply pass because of insufficient number of registers
111     bool RunImpl() override;
112 
GetPassName()113     const char *GetPassName() const override
114     {
115         return "RegEncoder";
116     }
117 
118     void Check4Width(compiler::Inst *inst);
119     void Check8Width(compiler::Inst *inst);
120 
GetBlocksToVisit()121     const ArenaVector<BasicBlock *> &GetBlocksToVisit() const override
122     {
123         return GetGraph()->GetBlocksRPO();
124     }
125 
126     static void VisitSpillFill([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst);
127     static void VisitConstant([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst);
128     static void VisitCatchPhi([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst);
129     static void VisitCallStatic(GraphVisitor *v, Inst *inst);
130     static void VisitCallVirtual(GraphVisitor *v, Inst *inst);
131     static void VisitInitObject(GraphVisitor *v, Inst *inst);
132     static void VisitIntrinsic(GraphVisitor *v, Inst *inst);
133     static void VisitLoadObject(GraphVisitor *v, Inst *instBase);
134     static void VisitLoadStatic(GraphVisitor *v, Inst *instBase);
135     static void VisitStoreObject(GraphVisitor *v, Inst *instBase);
136     static void VisitStoreStatic(GraphVisitor *v, Inst *instBase);
137     static void VisitLoadString([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst);
138     static void VisitReturn([[maybe_unused]] GraphVisitor *v, [[maybe_unused]] Inst *inst);
139 
140     static void VisitCastValueToAnyType(GraphVisitor *v, Inst *inst);
141 
142 #include "generated/reg_encoder_visitors.inc"
143 
144 #include "generated/check_width.h"
145 
VisitDefault(Inst * inst)146     void VisitDefault(Inst *inst) override
147     {
148         LOG(ERROR, BYTECODE_OPTIMIZER) << "Opcode " << compiler::GetOpcodeString(inst->GetOpcode())
149                                        << " not yet implemented in RegEncoder";
150         success_ = false;
151     }
152 
153 #include "compiler/optimizer/ir/visitor.inc"
154 
155 private:
156     void CalculateNumNeededTemps();
157     void CalculateNumNeededTempsForInst(compiler::Inst *inst);
158     void RenumberRegs(compiler::Register minReg, compiler::Register delta);
159     bool RenumberArgRegs();
160     void InsertSpills();
161     void InsertSpillsForInst(compiler::Inst *inst);
162     void InsertSpillsForDynInputsInst(compiler::Inst *inst);
163     bool InsertSpillsForDynRangeInst(compiler::Inst *inst, size_t nargs, size_t start);
164     size_t GetStartInputIndex(compiler::Inst *inst);
165     void RenumberDstReg(compiler::Inst *inst, size_t temp, size_t rangeTemp = 0, bool largeTemp = false);
166 
GetNumArgsFromGraph()167     compiler::Register GetNumArgsFromGraph() const
168     {
169         auto adapter = GetGraph()->GetRuntime();
170         auto method = GetGraph()->GetMethod();
171         auto numArgs = adapter->GetMethodTotalArgumentsCount(method);
172         ASSERT(numArgs <= compiler::GetFrameSize());
173         return numArgs;
174     }
175 
GetNumLocalsFromGraph()176     compiler::Register GetNumLocalsFromGraph() const
177     {
178         auto numLocals = GetGraph()->GetStackSlotsCount();
179         ASSERT(numLocals <= compiler::GetFrameSize());
180         return numLocals;
181     }
182 
GetNumRegs()183     compiler::Register GetNumRegs() const
184     {
185         auto numRegs = GetNumLocalsFromGraph() + GetNumArgsFromGraph();
186         ASSERT(numRegs <= compiler::GetFrameSize());
187         return numRegs;
188     }
189 
SaveNumLocalsToGraph(uint32_t numLocals)190     void SaveNumLocalsToGraph(uint32_t numLocals) const
191     {
192         ASSERT(numLocals <= compiler::GetFrameSize());
193         GetGraph()->SetStackSlotsCount(numLocals);
194     }
195 
GetStatus()196     bool GetStatus() const
197     {
198         return success_;
199     }
200 
CheckStatus()201     bool CheckStatus() const
202     {
203         return !GetGraph()->IsAbcKit() || success_;
204     }
205 
CallHelper(compiler::GraphVisitor * visitor,Inst * inst)206     static void CallHelper(compiler::GraphVisitor *visitor, Inst *inst)
207     {
208         auto *re = static_cast<RegEncoder *>(visitor);
209         re->Check4Width(inst);
210     }
211 
212 private:
213     compiler::Register numTemps_ {0};
214     RegEncoderState state_ {RegEncoderState::IDLE};
215     compiler::Register numMaxRangeInput_ {0};
216     compiler::Register rangeTempsStart_ {0};
217     compiler::Register numChangedWidth_ {0};
218 
219     bool success_ {true};
220 };
221 }  // namespace ark::bytecodeopt
222 
223 #endif  // PANDA_REG_ENCODER_H
224