• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "bytecode_emitter.h"
17 #include <bytecode_instruction-inl.h>
18 #include <macros.h>
19 #include <utils/bit_utils.h>
20 #include <utils/span.h>
21 
22 namespace ark {
23 
24 using Opcode = BytecodeInstruction::Opcode;
25 using Format = BytecodeInstruction::Format;
26 using BitImmSize = BytecodeEmitter::BitImmSize;
27 
GetBitLengthUnsigned(uint32_t val)28 static inline constexpr BitImmSize GetBitLengthUnsigned(uint32_t val)
29 {
30     constexpr size_t BIT_4 = 4;
31     constexpr size_t BIT_8 = 8;
32 
33     auto bitlen = MinimumBitsToStore(val);
34     if (bitlen <= BIT_4) {
35         return BitImmSize::BITSIZE_4;
36     }
37     if (bitlen <= BIT_8) {
38         return BitImmSize::BITSIZE_8;
39     }
40     return BitImmSize::BITSIZE_16;
41 }
42 
GetBitLengthSigned(int32_t val)43 static inline constexpr BitImmSize GetBitLengthSigned(int32_t val)
44 {
45     constexpr int32_t INT4T_MIN = -8;
46     constexpr int32_t INT4T_MAX = 7;
47     constexpr int32_t INT8T_MIN = std::numeric_limits<int8_t>::min();
48     constexpr int32_t INT8T_MAX = std::numeric_limits<int8_t>::max();
49     constexpr int32_t INT16T_MIN = std::numeric_limits<int16_t>::min();
50     constexpr int32_t INT16T_MAX = std::numeric_limits<int16_t>::max();
51     if (INT4T_MIN <= val && val <= INT4T_MAX) {
52         return BitImmSize::BITSIZE_4;
53     }
54     if (INT8T_MIN <= val && val <= INT8T_MAX) {
55         return BitImmSize::BITSIZE_8;
56     }
57     if (INT16T_MIN <= val && val <= INT16T_MAX) {
58         return BitImmSize::BITSIZE_16;
59     }
60     return BitImmSize::BITSIZE_32;
61 }
62 
EmitImpl(Span<uint8_t> buf,Span<const uint8_t> offsets)63 static inline void EmitImpl([[maybe_unused]] Span<uint8_t> buf, [[maybe_unused]] Span<const uint8_t> offsets) {}
64 
65 template <typename Type, typename... Types>
EmitImpl(Span<uint8_t> buf,Span<const uint8_t> offsets,Type arg,Types...args)66 static void EmitImpl(Span<uint8_t> buf, Span<const uint8_t> offsets, Type arg, Types... args)
67 {
68     static constexpr uint8_t BYTEMASK = 0xFF;
69     static constexpr uint8_t BITMASK_4 = 0xF;
70     static constexpr size_t BIT_4 = 4;
71     static constexpr size_t BIT_8 = 8;
72     static constexpr size_t BIT_16 = 16;
73     static constexpr size_t BIT_32 = 32;
74     static constexpr size_t BIT_64 = 64;
75 
76     uint8_t offset = offsets[0];
77     size_t bitlen = offsets[1] - offsets[0];
78     size_t byteOffset = offset / BIT_8;
79     size_t bitOffset = offset % BIT_8;
80     switch (bitlen) {
81         case BIT_4: {
82             auto val = static_cast<uint8_t>(arg);
83             buf[byteOffset] |= static_cast<uint8_t>(static_cast<uint8_t>(val & BITMASK_4) << bitOffset);
84             break;
85         }
86         case BIT_8: {
87             auto val = static_cast<uint8_t>(arg);
88             buf[byteOffset] = val;
89             break;
90         }
91         case BIT_16: {
92             auto val = static_cast<uint16_t>(arg);
93             buf[byteOffset] = val & BYTEMASK;
94             buf[byteOffset + 1] = val >> BIT_8;
95             break;
96         }
97         case BIT_32: {
98             auto val = static_cast<uint32_t>(arg);
99             for (size_t i = 0; i < sizeof(uint32_t); i++) {
100                 buf[byteOffset + i] = (val >> (i * BIT_8)) & BYTEMASK;
101             }
102             break;
103         }
104         case BIT_64: {
105             auto val = static_cast<uint64_t>(arg);
106             for (size_t i = 0; i < sizeof(uint64_t); i++) {
107                 buf[byteOffset + i] = (val >> (i * BIT_8)) & BYTEMASK;
108             }
109             break;
110         }
111         default: {
112             UNREACHABLE();
113             break;
114         }
115     }
116     EmitImpl(buf, offsets.SubSpan(1), args...);
117 }
118 
119 template <Format FORMAT, typename It, typename... Types>
120 static size_t Emit(It out, Types... args);
121 
Bind(const Label & label)122 void BytecodeEmitter::Bind(const Label &label)
123 {
124     *label.pc_ = pc_;
125     targets_.insert(label);
126 }
127 
Build(std::vector<uint8_t> * output)128 BytecodeEmitter::ErrorCode BytecodeEmitter::Build(std::vector<uint8_t> *output)
129 {
130     ErrorCode res = CheckLabels();
131     if (res != ErrorCode::SUCCESS) {
132         return res;
133     }
134     res = ReserveSpaceForOffsets();
135     if (res != ErrorCode::SUCCESS) {
136         return res;
137     }
138     res = UpdateBranches();
139     if (res != ErrorCode::SUCCESS) {
140         return res;
141     }
142     *output = bytecode_;
143     return ErrorCode::SUCCESS;
144 }
145 
146 /*
147  * NB! All conditional jumps with displacements not fitting into imm16
148  * are transformed into two instructions:
149  * jcc far   # cc is any condiitonal code
150  *      =>
151  * jCC next  # CC is inverted cc
152  * jmp far
153  * next:     # This label is inserted just after previous instruction.
154  */
ReserveSpaceForOffsets()155 BytecodeEmitter::ErrorCode BytecodeEmitter::ReserveSpaceForOffsets()
156 {
157     uint32_t bias = 0;
158     std::map<uint32_t, Label> newBranches;
159     auto it = branches_.begin();
160     while (it != branches_.end()) {
161         uint32_t insnPc = it->first + bias;
162         auto label = it->second;
163 
164         BytecodeInstruction insn(&bytecode_[insnPc]);
165         auto opcode = insn.GetOpcode();
166         const auto encodedImmSize = GetBitImmSizeByOpcode(opcode);
167         const auto realImmSize = GetBitLengthSigned(EstimateMaxDistance(insnPc, label.GetPc(), bias));
168 
169         auto newTarget = insnPc;
170         size_t extraBytes = 0;
171 
172         if (realImmSize > encodedImmSize) {
173             auto res = DoReserveSpaceForOffset(insn, insnPc, realImmSize, &extraBytes, &newTarget);
174             if (res != ErrorCode::SUCCESS) {
175                 return res;
176             }
177         }
178 
179         newBranches.insert(std::make_pair(newTarget, label));
180         if (extraBytes > 0) {
181             bias += extraBytes;
182             UpdateLabelTargets(insnPc, extraBytes);
183         }
184         it = branches_.erase(it);
185     }
186     branches_ = std::move(newBranches);
187     return ErrorCode::SUCCESS;
188 }
189 
DoReserveSpaceForOffset(const BytecodeInstruction & insn,uint32_t insnPc,BitImmSize expectedImmSize,size_t * extraBytesPtr,uint32_t * targetPtr)190 BytecodeEmitter::ErrorCode BytecodeEmitter::DoReserveSpaceForOffset(const BytecodeInstruction &insn, uint32_t insnPc,
191                                                                     BitImmSize expectedImmSize, size_t *extraBytesPtr,
192                                                                     uint32_t *targetPtr)
193 {
194     auto opcode = insn.GetOpcode();
195     const auto insnSize = GetSizeByOpcode(opcode);
196 
197     auto updOp = GetSuitableJump(opcode, expectedImmSize);
198     using DifferenceTypeT = decltype(bytecode_)::iterator::difference_type;
199     if (updOp != Opcode::LAST) {
200         *extraBytesPtr = GetSizeByOpcode(updOp) - insnSize;
201         bytecode_.insert(bytecode_.begin() + static_cast<DifferenceTypeT>(insnPc + insnSize), *extraBytesPtr, 0);
202     } else {
203         *extraBytesPtr = GetSizeByOpcode(Opcode::JMP_IMM32);
204         bytecode_.insert(bytecode_.begin() + static_cast<DifferenceTypeT>(insnPc + insnSize), *extraBytesPtr, 0);
205 
206         updOp = RevertConditionCode(opcode);
207         if (updOp == Opcode::LAST) {
208             UNREACHABLE();  // no revcc and no far opcode
209             return ErrorCode::INTERNAL_ERROR;
210         }
211         ASSERT(insnPc < bytecode_.size());
212         ASSERT(insnPc + insnSize <= bytecode_.size());
213         UpdateBranchOffs(&bytecode_[insnPc], static_cast<int32_t>(insnSize + GetSizeByOpcode(Opcode::JMP_IMM32)));
214         *targetPtr = insnPc + insnSize;
215         Emit<Format::IMM32>(bytecode_.begin() + *targetPtr, Opcode::JMP_IMM32, 0);
216     }
217     if (BytecodeInstruction(reinterpret_cast<uint8_t *>(&updOp)).IsPrefixed()) {
218         Emit<BytecodeInstruction::Format::PREF_NONE>(bytecode_.begin() + insnPc, updOp);
219     } else {
220         Emit<BytecodeInstruction::Format::NONE>(bytecode_.begin() + insnPc, updOp);
221     }
222     return ErrorCode::SUCCESS;
223 }
224 
UpdateBranches()225 BytecodeEmitter::ErrorCode BytecodeEmitter::UpdateBranches()
226 {
227     for (std::pair<const uint32_t, Label> &branch : branches_) {
228         uint32_t insnPc = branch.first;
229         Label label = branch.second;
230         auto offset = static_cast<int32_t>(label.GetPc()) - static_cast<int32_t>(insnPc);
231         UpdateBranchOffs(&bytecode_[insnPc], offset);
232     }
233     return ErrorCode::SUCCESS;
234 }
235 
UpdateLabelTargets(uint32_t pc,size_t bias)236 void BytecodeEmitter::UpdateLabelTargets(uint32_t pc, size_t bias)
237 {
238     pcList_.push_front(pc);
239     Label fake(pcList_.begin());
240     std::list<Label> updatedLabels;
241     auto it = targets_.upper_bound(fake);
242     while (it != targets_.end()) {
243         Label label = *it;
244         it = targets_.erase(it);
245         *label.pc_ += bias;
246         updatedLabels.push_back(label);
247     }
248     targets_.insert(updatedLabels.begin(), updatedLabels.end());
249     pcList_.pop_front();
250 }
251 
EstimateMaxDistance(uint32_t insnPc,uint32_t targetPc,uint32_t bias) const252 int32_t BytecodeEmitter::EstimateMaxDistance(uint32_t insnPc, uint32_t targetPc, uint32_t bias) const
253 {
254     int32_t distance = 0;
255     uint32_t endPc = 0;
256     std::map<uint32_t, Label>::const_iterator it;
257     if (targetPc > insnPc) {
258         it = branches_.lower_bound(insnPc - bias);
259         distance = static_cast<int32_t>(targetPc - insnPc);
260         endPc = targetPc - bias;
261     } else if (targetPc < insnPc) {
262         it = branches_.lower_bound(targetPc - bias);
263         distance = static_cast<int32_t>(targetPc - insnPc);
264         endPc = insnPc - bias;
265     } else {
266         // Do we support branch to itself?
267         return 0;
268     }
269 
270     while (it != branches_.end() && it->first < endPc) {
271         auto insn = BytecodeInstruction(&bytecode_[it->first + bias]);
272         auto longest = GetSizeByOpcode(GetLongestJump(insn.GetOpcode()));
273         distance += static_cast<int32_t>(longest - insn.GetSize());
274         ++it;
275     }
276     return distance;
277 }
278 
CheckLabels()279 BytecodeEmitter::ErrorCode BytecodeEmitter::CheckLabels()
280 {
281     for (const std::pair<const uint32_t, Label> &branch : branches_) {
282         const Label &label = branch.second;
283         if (targets_.find(label) == targets_.end()) {
284             return ErrorCode::UNBOUND_LABELS;
285         }
286     }
287     return ErrorCode::SUCCESS;
288 }
289 
290 #include <bytecode_emitter_gen.h>
291 
292 }  // namespace ark
293