1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
18 #define ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
19
20 #include <cstdint>
21 #include <string>
22 #include <utility>
23 #include <vector>
24
25 #include "arch/riscv64/instruction_set_features_riscv64.h"
26 #include "base/arena_containers.h"
27 #include "base/globals.h"
28 #include "base/macros.h"
29 #include "base/pointer_size.h"
30 #include "managed_register_riscv64.h"
31 #include "utils/assembler.h"
32 #include "utils/label.h"
33
34 namespace art HIDDEN {
35 namespace riscv64 {
36
37 class ScratchRegisterScope;
38
39 static constexpr size_t kRiscv64HalfwordSize = 2;
40 static constexpr size_t kRiscv64WordSize = 4;
41 static constexpr size_t kRiscv64DoublewordSize = 8;
42 static constexpr size_t kRiscv64FloatRegSizeInBytes = 8;
43
44 // The `Riscv64Extension` enumeration is used for restricting the instructions that the assembler
45 // can use. Some restrictions are checked only in debug mode (for example load and store
46 // instructions check `kLoadStore`), other restrictions are checked at run time and affect the
47 // emitted code (for example, the `SextW()` pseudo-instruction selects between an implementation
48 // from "Zcb", "Zbb" and a two-instruction sequence from the basic instruction set.
49 enum class Riscv64Extension : uint32_t {
50 kLoadStore, // Pseudo-extension encompassing all loads and stores. Used to check that
51 // we do not have loads and stores in the middle of a LR/SC sequence.
52 kZifencei,
53 kM,
54 kA,
55 kZicsr,
56 kF,
57 kD,
58 kZba,
59 kZbb,
60 kZbs, // TODO(riscv64): Implement "Zbs" instructions.
61 kV,
62 kZca, // "C" extension instructions except floating point loads/stores.
63 kZcd, // "C" extension double loads/stores.
64 // Note: RV64 cannot implement Zcf ("C" extension float loads/stores).
65 kZcb, // Simple 16-bit operations not present in the original "C" extension.
66
67 kLast = kZcb
68 };
69
70 using Riscv64ExtensionMask = uint32_t;
71
Riscv64ExtensionBit(Riscv64Extension ext)72 constexpr Riscv64ExtensionMask Riscv64ExtensionBit(Riscv64Extension ext) {
73 return 1u << enum_cast<>(ext);
74 }
75
76 constexpr Riscv64ExtensionMask kRiscv64AllExtensionsMask =
77 MaxInt<Riscv64ExtensionMask>(enum_cast<>(Riscv64Extension::kLast) + 1);
78
79 // Extensions allowed in a LR/SC sequence (between the LR and SC).
80 constexpr Riscv64ExtensionMask kRiscv64LrScSequenceExtensionsMask =
81 Riscv64ExtensionBit(Riscv64Extension::kZca);
82
83 enum class FPRoundingMode : uint32_t {
84 kRNE = 0x0, // Round to Nearest, ties to Even
85 kRTZ = 0x1, // Round towards Zero
86 kRDN = 0x2, // Round Down (towards −Infinity)
87 kRUP = 0x3, // Round Up (towards +Infinity)
88 kRMM = 0x4, // Round to Nearest, ties to Max Magnitude
89 kDYN = 0x7, // Dynamic rounding mode
90 kDefault = kDYN,
91 // Some instructions never need to round even though the spec includes the RM field.
92 // To simplify testing, emit the RM as 0 by default for these instructions because that's what
93 // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes.
94 kIgnored = 0
95 };
96
97 enum class AqRl : uint32_t {
98 kNone = 0x0,
99 kRelease = 0x1,
100 kAcquire = 0x2,
101 kAqRl = kRelease | kAcquire
102 };
103
104 // the type for fence
105 enum FenceType {
106 kFenceNone = 0,
107 kFenceWrite = 1,
108 kFenceRead = 2,
109 kFenceOutput = 4,
110 kFenceInput = 8,
111 kFenceDefault = 0xf,
112 };
113
114 // Used to test the values returned by FClassS/FClassD.
115 enum FPClassMaskType {
116 kNegativeInfinity = 0x001,
117 kNegativeNormal = 0x002,
118 kNegativeSubnormal = 0x004,
119 kNegativeZero = 0x008,
120 kPositiveZero = 0x010,
121 kPositiveSubnormal = 0x020,
122 kPositiveNormal = 0x040,
123 kPositiveInfinity = 0x080,
124 kSignalingNaN = 0x100,
125 kQuietNaN = 0x200,
126 };
127
128 enum class CSRAddress : uint32_t {
129 kVstart = 0x008, // Vector start position, URW
130 kVxsat = 0x009, // Fixed-Point Saturate Flag, URW
131 kVxrm = 0x00A, // Fixed-Point Rounding Mode, URW
132 kReserved1 = 0x00B, // Reserved for future vector CSRs
133 kReserved2 = 0x00C,
134 kReserved3 = 0x00D,
135 kReserved4 = 0x00E,
136 kVcsr = 0x00F, // Vector control and status register, URW
137 kVl = 0xC20, // Vector length, URO
138 kVtype = 0xC21, // Vector data type register, URO
139 kVlenb = 0xC22, // VLEN/8 (vector register length in bytes), URO
140 };
141
142 class Riscv64Label : public Label {
143 public:
Riscv64Label()144 Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {}
145
Riscv64Label(Riscv64Label && src)146 Riscv64Label(Riscv64Label&& src) noexcept
147 // NOLINTNEXTLINE - src.prev_branch_id_ is valid after the move
148 : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {}
149
150 private:
151 static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max();
152
153 uint32_t prev_branch_id_; // To get distance from preceding branch, if any.
154
155 friend class Riscv64Assembler;
156 DISALLOW_COPY_AND_ASSIGN(Riscv64Label);
157 };
158
159 // Assembler literal is a value embedded in code, retrieved using a PC-relative load.
160 class Literal {
161 public:
162 static constexpr size_t kMaxSize = 8;
163
Literal(uint32_t size,const uint8_t * data)164 Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) {
165 DCHECK_LE(size, Literal::kMaxSize);
166 memcpy(data_, data, size);
167 }
168
169 template <typename T>
GetValue()170 T GetValue() const {
171 DCHECK_EQ(size_, sizeof(T));
172 T value;
173 memcpy(&value, data_, sizeof(T));
174 return value;
175 }
176
GetSize()177 uint32_t GetSize() const { return size_; }
178
GetData()179 const uint8_t* GetData() const { return data_; }
180
GetLabel()181 Riscv64Label* GetLabel() { return &label_; }
182
GetLabel()183 const Riscv64Label* GetLabel() const { return &label_; }
184
185 private:
186 Riscv64Label label_;
187 const uint32_t size_;
188 uint8_t data_[kMaxSize];
189
190 DISALLOW_COPY_AND_ASSIGN(Literal);
191 };
192
193 // Jump table: table of labels emitted after the code and before the literals. Similar to literals.
194 class JumpTable {
195 public:
JumpTable(ArenaVector<Riscv64Label * > && labels)196 explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {}
197
GetSize()198 size_t GetSize() const { return labels_.size() * sizeof(int32_t); }
199
GetData()200 const ArenaVector<Riscv64Label*>& GetData() const { return labels_; }
201
GetLabel()202 Riscv64Label* GetLabel() { return &label_; }
203
GetLabel()204 const Riscv64Label* GetLabel() const { return &label_; }
205
206 private:
207 Riscv64Label label_;
208 ArenaVector<Riscv64Label*> labels_;
209
210 DISALLOW_COPY_AND_ASSIGN(JumpTable);
211 };
212
213 class Riscv64Assembler final : public Assembler {
214 public:
215 explicit Riscv64Assembler(ArenaAllocator* allocator,
216 const Riscv64InstructionSetFeatures* instruction_set_features = nullptr)
217 : Riscv64Assembler(allocator,
218 instruction_set_features != nullptr
219 ? ConvertExtensions(instruction_set_features)
220 : kRiscv64AllExtensionsMask) {}
221
Riscv64Assembler(ArenaAllocator * allocator,Riscv64ExtensionMask enabled_extensions)222 Riscv64Assembler(ArenaAllocator* allocator, Riscv64ExtensionMask enabled_extensions)
223 : Assembler(allocator),
224 branches_(allocator->Adapter(kArenaAllocAssembler)),
225 finalized_(false),
226 overwriting_(false),
227 overwrite_location_(0),
228 literals_(allocator->Adapter(kArenaAllocAssembler)),
229 long_literals_(allocator->Adapter(kArenaAllocAssembler)),
230 jump_tables_(allocator->Adapter(kArenaAllocAssembler)),
231 last_position_adjustment_(0),
232 last_old_position_(0),
233 last_branch_id_(0),
234 enabled_extensions_(enabled_extensions),
235 available_scratch_core_registers_((1u << TMP) | (1u << TMP2)),
236 available_scratch_fp_registers_(1u << FTMP) {
237 cfi().DelayEmittingAdvancePCs();
238 }
239
~Riscv64Assembler()240 virtual ~Riscv64Assembler() {
241 for (auto& branch : branches_) {
242 CHECK(branch.IsResolved());
243 }
244 }
245
CodeSize()246 size_t CodeSize() const override { return Assembler::CodeSize(); }
cfi()247 DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
248
IsExtensionEnabled(Riscv64Extension ext)249 bool IsExtensionEnabled(Riscv64Extension ext) const {
250 return (enabled_extensions_ & Riscv64ExtensionBit(ext)) != 0u;
251 }
252
253 // According to "The RISC-V Instruction Set Manual"
254
255 // LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37
256 // Note: These take a 20-bit unsigned value to align with the clang assembler for testing,
257 // but the value stored in the register shall actually be sign-extended to 64 bits.
258 void Lui(XRegister rd, uint32_t imm20);
259 void Auipc(XRegister rd, uint32_t imm20);
260
261 // Jump instructions (RV32I), opcode = 0x67, 0x6f
262 void Jal(XRegister rd, int32_t offset);
263 void Jalr(XRegister rd, XRegister rs1, int32_t offset);
264
265 // Branch instructions (RV32I), opcode = 0x63, funct3 from 0x0 ~ 0x1 and 0x4 ~ 0x7
266 void Beq(XRegister rs1, XRegister rs2, int32_t offset);
267 void Bne(XRegister rs1, XRegister rs2, int32_t offset);
268 void Blt(XRegister rs1, XRegister rs2, int32_t offset);
269 void Bge(XRegister rs1, XRegister rs2, int32_t offset);
270 void Bltu(XRegister rs1, XRegister rs2, int32_t offset);
271 void Bgeu(XRegister rs1, XRegister rs2, int32_t offset);
272
273 // Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6
274 void Lb(XRegister rd, XRegister rs1, int32_t offset);
275 void Lh(XRegister rd, XRegister rs1, int32_t offset);
276 void Lw(XRegister rd, XRegister rs1, int32_t offset);
277 void Ld(XRegister rd, XRegister rs1, int32_t offset);
278 void Lbu(XRegister rd, XRegister rs1, int32_t offset);
279 void Lhu(XRegister rd, XRegister rs1, int32_t offset);
280 void Lwu(XRegister rd, XRegister rs1, int32_t offset);
281
282 // Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3
283 void Sb(XRegister rs2, XRegister rs1, int32_t offset);
284 void Sh(XRegister rs2, XRegister rs1, int32_t offset);
285 void Sw(XRegister rs2, XRegister rs1, int32_t offset);
286 void Sd(XRegister rs2, XRegister rs1, int32_t offset);
287
288 // IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7
289 void Addi(XRegister rd, XRegister rs1, int32_t imm12);
290 void Slti(XRegister rd, XRegister rs1, int32_t imm12);
291 void Sltiu(XRegister rd, XRegister rs1, int32_t imm12);
292 void Xori(XRegister rd, XRegister rs1, int32_t imm12);
293 void Ori(XRegister rd, XRegister rs1, int32_t imm12);
294 void Andi(XRegister rd, XRegister rs1, int32_t imm12);
295 void Slli(XRegister rd, XRegister rs1, int32_t shamt);
296 void Srli(XRegister rd, XRegister rs1, int32_t shamt);
297 void Srai(XRegister rd, XRegister rs1, int32_t shamt);
298
299 // ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7
300 void Add(XRegister rd, XRegister rs1, XRegister rs2);
301 void Sub(XRegister rd, XRegister rs1, XRegister rs2);
302 void Slt(XRegister rd, XRegister rs1, XRegister rs2);
303 void Sltu(XRegister rd, XRegister rs1, XRegister rs2);
304 void Xor(XRegister rd, XRegister rs1, XRegister rs2);
305 void Or(XRegister rd, XRegister rs1, XRegister rs2);
306 void And(XRegister rd, XRegister rs1, XRegister rs2);
307 void Sll(XRegister rd, XRegister rs1, XRegister rs2);
308 void Srl(XRegister rd, XRegister rs1, XRegister rs2);
309 void Sra(XRegister rd, XRegister rs1, XRegister rs2);
310
311 // 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5
312 void Addiw(XRegister rd, XRegister rs1, int32_t imm12);
313 void Slliw(XRegister rd, XRegister rs1, int32_t shamt);
314 void Srliw(XRegister rd, XRegister rs1, int32_t shamt);
315 void Sraiw(XRegister rd, XRegister rs1, int32_t shamt);
316
317 // 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7
318 void Addw(XRegister rd, XRegister rs1, XRegister rs2);
319 void Subw(XRegister rd, XRegister rs1, XRegister rs2);
320 void Sllw(XRegister rd, XRegister rs1, XRegister rs2);
321 void Srlw(XRegister rd, XRegister rs1, XRegister rs2);
322 void Sraw(XRegister rd, XRegister rs1, XRegister rs2);
323
324 // Environment call and breakpoint (RV32I), opcode = 0x73
325 void Ecall();
326 void Ebreak();
327
328 // Fence instruction (RV32I): opcode = 0xf, funct3 = 0
329 void Fence(uint32_t pred = kFenceDefault, uint32_t succ = kFenceDefault);
330 void FenceTso();
331
332 // "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1
333 void FenceI();
334
335 // RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7
336 void Mul(XRegister rd, XRegister rs1, XRegister rs2);
337 void Mulh(XRegister rd, XRegister rs1, XRegister rs2);
338 void Mulhsu(XRegister rd, XRegister rs1, XRegister rs2);
339 void Mulhu(XRegister rd, XRegister rs1, XRegister rs2);
340 void Div(XRegister rd, XRegister rs1, XRegister rs2);
341 void Divu(XRegister rd, XRegister rs1, XRegister rs2);
342 void Rem(XRegister rd, XRegister rs1, XRegister rs2);
343 void Remu(XRegister rd, XRegister rs1, XRegister rs2);
344
345 // RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7
346 void Mulw(XRegister rd, XRegister rs1, XRegister rs2);
347 void Divw(XRegister rd, XRegister rs1, XRegister rs2);
348 void Divuw(XRegister rd, XRegister rs1, XRegister rs2);
349 void Remw(XRegister rd, XRegister rs1, XRegister rs2);
350 void Remuw(XRegister rd, XRegister rs1, XRegister rs2);
351
352 // RV32A/RV64A Standard Extension
353 void LrW(XRegister rd, XRegister rs1, AqRl aqrl);
354 void LrD(XRegister rd, XRegister rs1, AqRl aqrl);
355 void ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
356 void ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
357 void AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
358 void AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
359 void AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
360 void AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
361 void AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
362 void AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
363 void AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
364 void AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
365 void AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
366 void AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
367 void AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
368 void AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
369 void AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
370 void AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
371 void AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
372 void AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
373 void AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
374 void AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
375
376 // "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7
377 void Csrrw(XRegister rd, uint32_t csr, XRegister rs1);
378 void Csrrs(XRegister rd, uint32_t csr, XRegister rs1);
379 void Csrrc(XRegister rd, uint32_t csr, XRegister rs1);
380 void Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5);
381 void Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5);
382 void Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5);
383
384 // FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27
385 void FLw(FRegister rd, XRegister rs1, int32_t offset);
386 void FLd(FRegister rd, XRegister rs1, int32_t offset);
387 void FSw(FRegister rs2, XRegister rs1, int32_t offset);
388 void FSd(FRegister rs2, XRegister rs1, int32_t offset);
389
390 // FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f
391 void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
392 void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
393 void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
394 void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
395 void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
396 void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
397 void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
398 void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
399
400 // FP FMA instruction helpers passing the default rounding mode.
FMAddS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)401 void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
402 FMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
403 }
FMAddD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)404 void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
405 FMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
406 }
FMSubS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)407 void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
408 FMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
409 }
FMSubD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)410 void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
411 FMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
412 }
FNMSubS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)413 void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
414 FNMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
415 }
FNMSubD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)416 void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
417 FNMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
418 }
FNMAddS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)419 void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
420 FNMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
421 }
FNMAddD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)422 void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
423 FNMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
424 }
425
426 // Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D
427 void FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
428 void FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
429 void FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
430 void FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
431 void FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
432 void FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
433 void FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
434 void FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
435 void FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm);
436 void FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm);
437 void FSgnjS(FRegister rd, FRegister rs1, FRegister rs2);
438 void FSgnjD(FRegister rd, FRegister rs1, FRegister rs2);
439 void FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2);
440 void FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2);
441 void FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2);
442 void FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2);
443 void FMinS(FRegister rd, FRegister rs1, FRegister rs2);
444 void FMinD(FRegister rd, FRegister rs1, FRegister rs2);
445 void FMaxS(FRegister rd, FRegister rs1, FRegister rs2);
446 void FMaxD(FRegister rd, FRegister rs1, FRegister rs2);
447 void FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm);
448 void FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm);
449
450 // Simple FP instruction helpers passing the default rounding mode.
FAddS(FRegister rd,FRegister rs1,FRegister rs2)451 void FAddS(FRegister rd, FRegister rs1, FRegister rs2) {
452 FAddS(rd, rs1, rs2, FPRoundingMode::kDefault);
453 }
FAddD(FRegister rd,FRegister rs1,FRegister rs2)454 void FAddD(FRegister rd, FRegister rs1, FRegister rs2) {
455 FAddD(rd, rs1, rs2, FPRoundingMode::kDefault);
456 }
FSubS(FRegister rd,FRegister rs1,FRegister rs2)457 void FSubS(FRegister rd, FRegister rs1, FRegister rs2) {
458 FSubS(rd, rs1, rs2, FPRoundingMode::kDefault);
459 }
FSubD(FRegister rd,FRegister rs1,FRegister rs2)460 void FSubD(FRegister rd, FRegister rs1, FRegister rs2) {
461 FSubD(rd, rs1, rs2, FPRoundingMode::kDefault);
462 }
FMulS(FRegister rd,FRegister rs1,FRegister rs2)463 void FMulS(FRegister rd, FRegister rs1, FRegister rs2) {
464 FMulS(rd, rs1, rs2, FPRoundingMode::kDefault);
465 }
FMulD(FRegister rd,FRegister rs1,FRegister rs2)466 void FMulD(FRegister rd, FRegister rs1, FRegister rs2) {
467 FMulD(rd, rs1, rs2, FPRoundingMode::kDefault);
468 }
FDivS(FRegister rd,FRegister rs1,FRegister rs2)469 void FDivS(FRegister rd, FRegister rs1, FRegister rs2) {
470 FDivS(rd, rs1, rs2, FPRoundingMode::kDefault);
471 }
FDivD(FRegister rd,FRegister rs1,FRegister rs2)472 void FDivD(FRegister rd, FRegister rs1, FRegister rs2) {
473 FDivD(rd, rs1, rs2, FPRoundingMode::kDefault);
474 }
FSqrtS(FRegister rd,FRegister rs1)475 void FSqrtS(FRegister rd, FRegister rs1) {
476 FSqrtS(rd, rs1, FPRoundingMode::kDefault);
477 }
FSqrtD(FRegister rd,FRegister rs1)478 void FSqrtD(FRegister rd, FRegister rs1) {
479 FSqrtD(rd, rs1, FPRoundingMode::kDefault);
480 }
FCvtSD(FRegister rd,FRegister rs1)481 void FCvtSD(FRegister rd, FRegister rs1) {
482 FCvtSD(rd, rs1, FPRoundingMode::kDefault);
483 }
FCvtDS(FRegister rd,FRegister rs1)484 void FCvtDS(FRegister rd, FRegister rs1) {
485 FCvtDS(rd, rs1, FPRoundingMode::kIgnored);
486 }
487
488 // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D
489 void FEqS(XRegister rd, FRegister rs1, FRegister rs2);
490 void FEqD(XRegister rd, FRegister rs1, FRegister rs2);
491 void FLtS(XRegister rd, FRegister rs1, FRegister rs2);
492 void FLtD(XRegister rd, FRegister rs1, FRegister rs2);
493 void FLeS(XRegister rd, FRegister rs1, FRegister rs2);
494 void FLeD(XRegister rd, FRegister rs1, FRegister rs2);
495
496 // FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D
497 void FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm);
498 void FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm);
499 void FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
500 void FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
501 void FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm);
502 void FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm);
503 void FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
504 void FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
505 void FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm);
506 void FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm);
507 void FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
508 void FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
509 void FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm);
510 void FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm);
511 void FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
512 void FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
513
514 // FP conversion instruction helpers passing the default rounding mode.
FCvtWS(XRegister rd,FRegister rs1)515 void FCvtWS(XRegister rd, FRegister rs1) { FCvtWS(rd, rs1, FPRoundingMode::kDefault); }
FCvtWD(XRegister rd,FRegister rs1)516 void FCvtWD(XRegister rd, FRegister rs1) { FCvtWD(rd, rs1, FPRoundingMode::kDefault); }
FCvtWuS(XRegister rd,FRegister rs1)517 void FCvtWuS(XRegister rd, FRegister rs1) { FCvtWuS(rd, rs1, FPRoundingMode::kDefault); }
FCvtWuD(XRegister rd,FRegister rs1)518 void FCvtWuD(XRegister rd, FRegister rs1) { FCvtWuD(rd, rs1, FPRoundingMode::kDefault); }
FCvtLS(XRegister rd,FRegister rs1)519 void FCvtLS(XRegister rd, FRegister rs1) { FCvtLS(rd, rs1, FPRoundingMode::kDefault); }
FCvtLD(XRegister rd,FRegister rs1)520 void FCvtLD(XRegister rd, FRegister rs1) { FCvtLD(rd, rs1, FPRoundingMode::kDefault); }
FCvtLuS(XRegister rd,FRegister rs1)521 void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); }
FCvtLuD(XRegister rd,FRegister rs1)522 void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); }
FCvtSW(FRegister rd,XRegister rs1)523 void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); }
FCvtDW(FRegister rd,XRegister rs1)524 void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); }
FCvtSWu(FRegister rd,XRegister rs1)525 void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); }
FCvtDWu(FRegister rd,XRegister rs1)526 void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); }
FCvtSL(FRegister rd,XRegister rs1)527 void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); }
FCvtDL(FRegister rd,XRegister rs1)528 void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); }
FCvtSLu(FRegister rd,XRegister rs1)529 void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); }
FCvtDLu(FRegister rd,XRegister rs1)530 void FCvtDLu(FRegister rd, XRegister rs1) { FCvtDLu(rd, rs1, FPRoundingMode::kDefault); }
531
532 // FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D
533 void FMvXW(XRegister rd, FRegister rs1);
534 void FMvXD(XRegister rd, FRegister rs1);
535 void FMvWX(FRegister rd, XRegister rs1);
536 void FMvDX(FRegister rd, XRegister rs1);
537
538 // FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D
539 void FClassS(XRegister rd, FRegister rs1);
540 void FClassD(XRegister rd, FRegister rs1);
541
542 // "C" Standard Extension, Compresseed Instructions
543 void CLwsp(XRegister rd, int32_t offset);
544 void CLdsp(XRegister rd, int32_t offset);
545 void CFLdsp(FRegister rd, int32_t offset);
546 void CSwsp(XRegister rs2, int32_t offset);
547 void CSdsp(XRegister rs2, int32_t offset);
548 void CFSdsp(FRegister rs2, int32_t offset);
549
550 void CLw(XRegister rd_s, XRegister rs1_s, int32_t offset);
551 void CLd(XRegister rd_s, XRegister rs1_s, int32_t offset);
552 void CFLd(FRegister rd_s, XRegister rs1_s, int32_t offset);
553 void CSw(XRegister rs2_s, XRegister rs1_s, int32_t offset);
554 void CSd(XRegister rs2_s, XRegister rs1_s, int32_t offset);
555 void CFSd(FRegister rs2_s, XRegister rs1_s, int32_t offset);
556
557 void CLi(XRegister rd, int32_t imm);
558 void CLui(XRegister rd, uint32_t nzimm6);
559 void CAddi(XRegister rd, int32_t nzimm);
560 void CAddiw(XRegister rd, int32_t imm);
561 void CAddi16Sp(int32_t nzimm);
562 void CAddi4Spn(XRegister rd_s, uint32_t nzuimm);
563 void CSlli(XRegister rd, int32_t shamt);
564 void CSrli(XRegister rd_s, int32_t shamt);
565 void CSrai(XRegister rd_s, int32_t shamt);
566 void CAndi(XRegister rd_s, int32_t imm);
567 void CMv(XRegister rd, XRegister rs2);
568 void CAdd(XRegister rd, XRegister rs2);
569 void CAnd(XRegister rd_s, XRegister rs2_s);
570 void COr(XRegister rd_s, XRegister rs2_s);
571 void CXor(XRegister rd_s, XRegister rs2_s);
572 void CSub(XRegister rd_s, XRegister rs2_s);
573 void CAddw(XRegister rd_s, XRegister rs2_s);
574 void CSubw(XRegister rd_s, XRegister rs2_s);
575
576 // "Zcb" Standard Extension, part of "C", opcode = 0b00, 0b01, funct3 = 0b100.
577 void CLbu(XRegister rd_s, XRegister rs1_s, int32_t offset);
578 void CLhu(XRegister rd_s, XRegister rs1_s, int32_t offset);
579 void CLh(XRegister rd_s, XRegister rs1_s, int32_t offset);
580 void CSb(XRegister rd_s, XRegister rs1_s, int32_t offset);
581 void CSh(XRegister rd_s, XRegister rs1_s, int32_t offset);
582 void CZextB(XRegister rd_rs1_s);
583 void CSextB(XRegister rd_rs1_s);
584 void CZextH(XRegister rd_rs1_s);
585 void CSextH(XRegister rd_rs1_s);
586 void CZextW(XRegister rd_rs1_s);
587 void CNot(XRegister rd_rs1_s);
588 void CMul(XRegister rd_s, XRegister rs2_s);
589 // "Zcb" Standard Extension End; resume "C" Standard Extension.
590 // TODO(riscv64): Reorder "Zcb" after remaining "C" instructions.
591
592 void CJ(int32_t offset);
593 void CJr(XRegister rs1);
594 void CJalr(XRegister rs1);
595 void CBeqz(XRegister rs1_s, int32_t offset);
596 void CBnez(XRegister rs1_s, int32_t offset);
597
598 void CEbreak();
599 void CNop();
600 void CUnimp();
601
602 // "Zba" Standard Extension, opcode = 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
603 void AddUw(XRegister rd, XRegister rs1, XRegister rs2);
604 void Sh1Add(XRegister rd, XRegister rs1, XRegister rs2);
605 void Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2);
606 void Sh2Add(XRegister rd, XRegister rs1, XRegister rs2);
607 void Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2);
608 void Sh3Add(XRegister rd, XRegister rs1, XRegister rs2);
609 void Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2);
610 void SlliUw(XRegister rd, XRegister rs1, int32_t shamt);
611
612 // "Zbb" Standard Extension, opcode = 0x13, 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
613 // Note: 32-bit sext.b, sext.h and zext.h from the Zbb extension are explicitly
614 // prefixed with "Zbb" to differentiate them from the utility macros.
615 void Andn(XRegister rd, XRegister rs1, XRegister rs2);
616 void Orn(XRegister rd, XRegister rs1, XRegister rs2);
617 void Xnor(XRegister rd, XRegister rs1, XRegister rs2);
618 void Clz(XRegister rd, XRegister rs1);
619 void Clzw(XRegister rd, XRegister rs1);
620 void Ctz(XRegister rd, XRegister rs1);
621 void Ctzw(XRegister rd, XRegister rs1);
622 void Cpop(XRegister rd, XRegister rs1);
623 void Cpopw(XRegister rd, XRegister rs1);
624 void Min(XRegister rd, XRegister rs1, XRegister rs2);
625 void Minu(XRegister rd, XRegister rs1, XRegister rs2);
626 void Max(XRegister rd, XRegister rs1, XRegister rs2);
627 void Maxu(XRegister rd, XRegister rs1, XRegister rs2);
628 void Rol(XRegister rd, XRegister rs1, XRegister rs2);
629 void Rolw(XRegister rd, XRegister rs1, XRegister rs2);
630 void Ror(XRegister rd, XRegister rs1, XRegister rs2);
631 void Rorw(XRegister rd, XRegister rs1, XRegister rs2);
632 void Rori(XRegister rd, XRegister rs1, int32_t shamt);
633 void Roriw(XRegister rd, XRegister rs1, int32_t shamt);
634 void OrcB(XRegister rd, XRegister rs1);
635 void Rev8(XRegister rd, XRegister rs1);
636 void ZbbSextB(XRegister rd, XRegister rs1);
637 void ZbbSextH(XRegister rd, XRegister rs1);
638 void ZbbZextH(XRegister rd, XRegister rs1);
639
640 ////////////////////////////// RISC-V Vector Instructions START ///////////////////////////////
641 enum class LengthMultiplier : uint32_t {
642 kM1Over8 = 0b101,
643 kM1Over4 = 0b110,
644 kM1Over2 = 0b111,
645 kM1 = 0b000,
646 kM2 = 0b001,
647 kM4 = 0b010,
648 kM8 = 0b011,
649
650 kReserved1 = 0b100,
651 };
652
653 enum class SelectedElementWidth : uint32_t {
654 kE8 = 0b000,
655 kE16 = 0b001,
656 kE32 = 0b010,
657 kE64 = 0b011,
658
659 kReserved1 = 0b100,
660 kReserved2 = 0b101,
661 kReserved3 = 0b110,
662 kReserved4 = 0b111,
663 };
664
665 enum class VectorMaskAgnostic : uint32_t {
666 kUndisturbed = 0,
667 kAgnostic = 1,
668 };
669
670 enum class VectorTailAgnostic : uint32_t {
671 kUndisturbed = 0,
672 kAgnostic = 1,
673 };
674
675 enum class VM : uint32_t { // Vector mask
676 kV0_t = 0b0,
677 kUnmasked = 0b1
678 };
679
680 // Vector Conguration-Setting Instructions, opcode = 0x57, funct3 = 0x3
681 void VSetvli(XRegister rd, XRegister rs1, uint32_t vtypei);
682 void VSetivli(XRegister rd, uint32_t uimm, uint32_t vtypei);
683 void VSetvl(XRegister rd, XRegister rs1, XRegister rs2);
684
VTypeiValue(VectorMaskAgnostic vma,VectorTailAgnostic vta,SelectedElementWidth sew,LengthMultiplier lmul)685 static uint32_t VTypeiValue(VectorMaskAgnostic vma,
686 VectorTailAgnostic vta,
687 SelectedElementWidth sew,
688 LengthMultiplier lmul) {
689 return static_cast<uint32_t>(vma) << 7 | static_cast<uint32_t>(vta) << 6 |
690 static_cast<uint32_t>(sew) << 3 | static_cast<uint32_t>(lmul);
691 }
692
693 // Vector Unit-Stride Load/Store Instructions
694 void VLe8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
695 void VLe16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
696 void VLe32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
697 void VLe64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
698 void VLm(VRegister vd, XRegister rs1);
699
700 void VSe8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
701 void VSe16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
702 void VSe32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
703 void VSe64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
704 void VSm(VRegister vs3, XRegister rs1);
705
706 // Vector unit-stride fault-only-first Instructions
707 void VLe8ff(VRegister vd, XRegister rs1);
708 void VLe16ff(VRegister vd, XRegister rs1);
709 void VLe32ff(VRegister vd, XRegister rs1);
710 void VLe64ff(VRegister vd, XRegister rs1);
711
712 // Vector Strided Load/Store Instructions
713 void VLse8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
714 void VLse16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
715 void VLse32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
716 void VLse64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
717
718 void VSse8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
719 void VSse16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
720 void VSse32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
721 void VSse64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
722
723 // Vector Indexed Load/Store Instructions
724 void VLoxei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
725 void VLoxei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
726 void VLoxei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
727 void VLoxei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
728
729 void VLuxei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
730 void VLuxei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
731 void VLuxei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
732 void VLuxei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
733
734 void VSoxei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
735 void VSoxei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
736 void VSoxei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
737 void VSoxei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
738
739 void VSuxei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
740 void VSuxei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
741 void VSuxei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
742 void VSuxei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
743
744 // Vector Segment Load/Store
745
746 // Vector Unit-Stride Segment Loads/Stores
747
748 void VLseg2e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
749 void VLseg2e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
750 void VLseg2e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
751 void VLseg2e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
752 void VLseg3e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
753 void VLseg3e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
754 void VLseg3e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
755 void VLseg3e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
756 void VLseg4e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
757 void VLseg4e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
758 void VLseg4e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
759 void VLseg4e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
760 void VLseg5e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
761 void VLseg5e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
762 void VLseg5e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
763 void VLseg5e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
764 void VLseg6e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
765 void VLseg6e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
766 void VLseg6e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
767 void VLseg6e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
768 void VLseg7e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
769 void VLseg7e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
770 void VLseg7e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
771 void VLseg7e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
772 void VLseg8e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
773 void VLseg8e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
774 void VLseg8e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
775 void VLseg8e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
776
777 void VSseg2e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
778 void VSseg2e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
779 void VSseg2e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
780 void VSseg2e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
781 void VSseg3e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
782 void VSseg3e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
783 void VSseg3e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
784 void VSseg3e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
785 void VSseg4e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
786 void VSseg4e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
787 void VSseg4e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
788 void VSseg4e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
789 void VSseg5e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
790 void VSseg5e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
791 void VSseg5e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
792 void VSseg5e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
793 void VSseg6e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
794 void VSseg6e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
795 void VSseg6e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
796 void VSseg6e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
797 void VSseg7e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
798 void VSseg7e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
799 void VSseg7e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
800 void VSseg7e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
801 void VSseg8e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
802 void VSseg8e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
803 void VSseg8e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
804 void VSseg8e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
805
806 // Vector Unit-Stride Fault-only-First Segment Loads
807
808 void VLseg2e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
809 void VLseg2e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
810 void VLseg2e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
811 void VLseg2e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
812 void VLseg3e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
813 void VLseg3e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
814 void VLseg3e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
815 void VLseg3e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
816 void VLseg4e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
817 void VLseg4e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
818 void VLseg4e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
819 void VLseg4e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
820 void VLseg5e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
821 void VLseg5e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
822 void VLseg5e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
823 void VLseg5e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
824 void VLseg6e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
825 void VLseg6e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
826 void VLseg6e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
827 void VLseg6e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
828 void VLseg7e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
829 void VLseg7e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
830 void VLseg7e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
831 void VLseg7e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
832 void VLseg8e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
833 void VLseg8e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
834 void VLseg8e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
835 void VLseg8e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
836
837 // Vector Strided Segment Loads/Stores
838
839 void VLsseg2e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
840 void VLsseg2e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
841 void VLsseg2e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
842 void VLsseg2e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
843 void VLsseg3e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
844 void VLsseg3e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
845 void VLsseg3e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
846 void VLsseg3e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
847 void VLsseg4e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
848 void VLsseg4e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
849 void VLsseg4e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
850 void VLsseg4e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
851 void VLsseg5e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
852 void VLsseg5e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
853 void VLsseg5e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
854 void VLsseg5e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
855 void VLsseg6e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
856 void VLsseg6e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
857 void VLsseg6e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
858 void VLsseg6e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
859 void VLsseg7e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
860 void VLsseg7e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
861 void VLsseg7e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
862 void VLsseg7e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
863 void VLsseg8e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
864 void VLsseg8e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
865 void VLsseg8e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
866 void VLsseg8e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
867
868 void VSsseg2e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
869 void VSsseg2e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
870 void VSsseg2e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
871 void VSsseg2e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
872 void VSsseg3e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
873 void VSsseg3e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
874 void VSsseg3e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
875 void VSsseg3e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
876 void VSsseg4e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
877 void VSsseg4e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
878 void VSsseg4e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
879 void VSsseg4e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
880 void VSsseg5e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
881 void VSsseg5e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
882 void VSsseg5e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
883 void VSsseg5e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
884 void VSsseg6e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
885 void VSsseg6e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
886 void VSsseg6e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
887 void VSsseg6e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
888 void VSsseg7e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
889 void VSsseg7e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
890 void VSsseg7e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
891 void VSsseg7e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
892 void VSsseg8e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
893 void VSsseg8e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
894 void VSsseg8e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
895 void VSsseg8e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
896
897 // Vector Indexed-unordered Segment Loads/Stores
898
899 void VLuxseg2ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
900 void VLuxseg2ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
901 void VLuxseg2ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
902 void VLuxseg2ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
903 void VLuxseg3ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
904 void VLuxseg3ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
905 void VLuxseg3ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
906 void VLuxseg3ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
907 void VLuxseg4ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
908 void VLuxseg4ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
909 void VLuxseg4ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
910 void VLuxseg4ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
911 void VLuxseg5ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
912 void VLuxseg5ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
913 void VLuxseg5ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
914 void VLuxseg5ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
915 void VLuxseg6ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
916 void VLuxseg6ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
917 void VLuxseg6ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
918 void VLuxseg6ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
919 void VLuxseg7ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
920 void VLuxseg7ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
921 void VLuxseg7ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
922 void VLuxseg7ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
923 void VLuxseg8ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
924 void VLuxseg8ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
925 void VLuxseg8ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
926 void VLuxseg8ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
927
928 void VSuxseg2ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
929 void VSuxseg2ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
930 void VSuxseg2ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
931 void VSuxseg2ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
932 void VSuxseg3ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
933 void VSuxseg3ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
934 void VSuxseg3ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
935 void VSuxseg3ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
936 void VSuxseg4ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
937 void VSuxseg4ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
938 void VSuxseg4ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
939 void VSuxseg4ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
940 void VSuxseg5ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
941 void VSuxseg5ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
942 void VSuxseg5ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
943 void VSuxseg5ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
944 void VSuxseg6ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
945 void VSuxseg6ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
946 void VSuxseg6ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
947 void VSuxseg6ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
948 void VSuxseg7ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
949 void VSuxseg7ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
950 void VSuxseg7ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
951 void VSuxseg7ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
952 void VSuxseg8ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
953 void VSuxseg8ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
954 void VSuxseg8ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
955 void VSuxseg8ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
956
957 // Vector Indexed-ordered Segment Loads/Stores
958
959 void VLoxseg2ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
960 void VLoxseg2ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
961 void VLoxseg2ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
962 void VLoxseg2ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
963 void VLoxseg3ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
964 void VLoxseg3ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
965 void VLoxseg3ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
966 void VLoxseg3ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
967 void VLoxseg4ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
968 void VLoxseg4ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
969 void VLoxseg4ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
970 void VLoxseg4ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
971 void VLoxseg5ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
972 void VLoxseg5ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
973 void VLoxseg5ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
974 void VLoxseg5ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
975 void VLoxseg6ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
976 void VLoxseg6ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
977 void VLoxseg6ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
978 void VLoxseg6ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
979 void VLoxseg7ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
980 void VLoxseg7ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
981 void VLoxseg7ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
982 void VLoxseg7ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
983 void VLoxseg8ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
984 void VLoxseg8ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
985 void VLoxseg8ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
986 void VLoxseg8ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
987
988 void VSoxseg2ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
989 void VSoxseg2ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
990 void VSoxseg2ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
991 void VSoxseg2ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
992 void VSoxseg3ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
993 void VSoxseg3ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
994 void VSoxseg3ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
995 void VSoxseg3ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
996 void VSoxseg4ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
997 void VSoxseg4ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
998 void VSoxseg4ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
999 void VSoxseg4ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1000 void VSoxseg5ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1001 void VSoxseg5ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1002 void VSoxseg5ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1003 void VSoxseg5ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1004 void VSoxseg6ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1005 void VSoxseg6ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1006 void VSoxseg6ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1007 void VSoxseg6ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1008 void VSoxseg7ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1009 void VSoxseg7ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1010 void VSoxseg7ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1011 void VSoxseg7ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1012 void VSoxseg8ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1013 void VSoxseg8ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1014 void VSoxseg8ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1015 void VSoxseg8ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1016
1017 // Vector Whole Register Load/Store Instructions
1018
1019 void VL1re8(VRegister vd, XRegister rs1);
1020 void VL1re16(VRegister vd, XRegister rs1);
1021 void VL1re32(VRegister vd, XRegister rs1);
1022 void VL1re64(VRegister vd, XRegister rs1);
1023
1024 void VL2re8(VRegister vd, XRegister rs1);
1025 void VL2re16(VRegister vd, XRegister rs1);
1026 void VL2re32(VRegister vd, XRegister rs1);
1027 void VL2re64(VRegister vd, XRegister rs1);
1028
1029 void VL4re8(VRegister vd, XRegister rs1);
1030 void VL4re16(VRegister vd, XRegister rs1);
1031 void VL4re32(VRegister vd, XRegister rs1);
1032 void VL4re64(VRegister vd, XRegister rs1);
1033
1034 void VL8re8(VRegister vd, XRegister rs1);
1035 void VL8re16(VRegister vd, XRegister rs1);
1036 void VL8re32(VRegister vd, XRegister rs1);
1037 void VL8re64(VRegister vd, XRegister rs1);
1038
1039 void VL1r(VRegister vd, XRegister rs1); // Pseudoinstruction equal to VL1re8
1040 void VL2r(VRegister vd, XRegister rs1); // Pseudoinstruction equal to VL2re8
1041 void VL4r(VRegister vd, XRegister rs1); // Pseudoinstruction equal to VL4re8
1042 void VL8r(VRegister vd, XRegister rs1); // Pseudoinstruction equal to VL8re8
1043
1044 void VS1r(VRegister vs3, XRegister rs1); // Store {vs3} to address in a1
1045 void VS2r(VRegister vs3, XRegister rs1); // Store {vs3}-{vs3 + 1} to address in a1
1046 void VS4r(VRegister vs3, XRegister rs1); // Store {vs3}-{vs3 + 3} to address in a1
1047 void VS8r(VRegister vs3, XRegister rs1); // Store {vs3}-{vs3 + 7} to address in a1
1048
1049 // Vector Arithmetic Instruction
1050
1051 // Vector vadd instructions, funct6 = 0b000000
1052 void VAdd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1053 void VAdd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1054 void VAdd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1055
1056 // Vector vsub instructions, funct6 = 0b000010
1057 void VSub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1058 void VSub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1059
1060 // Vector vrsub instructions, funct6 = 0b000011
1061 void VRsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1062 void VRsub_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1063
1064 // Pseudo-instruction over VRsub_vi
1065 void VNeg_v(VRegister vd, VRegister vs2);
1066
1067 // Vector vminu instructions, funct6 = 0b000100
1068 void VMinu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1069 void VMinu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1070
1071 // Vector vmin instructions, funct6 = 0b000101
1072 void VMin_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1073 void VMin_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1074
1075 // Vector vmaxu instructions, funct6 = 0b000110
1076 void VMaxu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1077 void VMaxu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1078
1079 // Vector vmax instructions, funct6 = 0b000111
1080 void VMax_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1081 void VMax_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1082
1083 // Vector vand instructions, funct6 = 0b001001
1084 void VAnd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1085 void VAnd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1086 void VAnd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1087
1088 // Vector vor instructions, funct6 = 0b001010
1089 void VOr_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1090 void VOr_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1091 void VOr_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1092
1093 // Vector vxor instructions, funct6 = 0b001011
1094 void VXor_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1095 void VXor_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1096 void VXor_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1097
1098 // Pseudo-instruction over VXor_vi
1099 void VNot_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1100
1101 // Vector vrgather instructions, funct6 = 0b001100
1102 void VRgather_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1103 void VRgather_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1104 void VRgather_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1105
1106 // Vector vslideup instructions, funct6 = 0b001110
1107 void VSlideup_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1108 void VSlideup_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1109
1110 // Vector vrgatherei16 instructions, funct6 = 0b001110
1111 void VRgatherei16_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1112
1113 // Vector vslidedown instructions, funct6 = 0b001111
1114 void VSlidedown_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1115 void VSlidedown_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1116
1117 // Vector vadc instructions, funct6 = 0b010000
1118 void VAdc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1119 void VAdc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1120 void VAdc_vim(VRegister vd, VRegister vs2, int32_t imm5);
1121
1122 // Vector vmadc instructions, funct6 = 0b010001
1123 void VMadc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1124 void VMadc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1125 void VMadc_vim(VRegister vd, VRegister vs2, int32_t imm5);
1126
1127 // Vector vmadc instructions, funct6 = 0b010001
1128 void VMadc_vv(VRegister vd, VRegister vs2, VRegister vs1);
1129 void VMadc_vx(VRegister vd, VRegister vs2, XRegister rs1);
1130 void VMadc_vi(VRegister vd, VRegister vs2, int32_t imm5);
1131
1132 // Vector vsbc instructions, funct6 = 0b010010
1133 void VSbc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1134 void VSbc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1135
1136 // Vector vmsbc instructions, funct6 = 0b010011
1137 void VMsbc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1138 void VMsbc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1139 void VMsbc_vv(VRegister vd, VRegister vs2, VRegister vs1);
1140 void VMsbc_vx(VRegister vd, VRegister vs2, XRegister rs1);
1141
1142 // Vector vmerge instructions, funct6 = 0b010111, vm = 0
1143 void VMerge_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1144 void VMerge_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1145 void VMerge_vim(VRegister vd, VRegister vs2, int32_t imm5);
1146
1147 // Vector vmv instructions, funct6 = 0b010111, vm = 1, vs2 = v0
1148 void VMv_vv(VRegister vd, VRegister vs1);
1149 void VMv_vx(VRegister vd, XRegister rs1);
1150 void VMv_vi(VRegister vd, int32_t imm5);
1151
1152 // Vector vmseq instructions, funct6 = 0b011000
1153 void VMseq_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1154 void VMseq_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1155 void VMseq_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1156
1157 // Vector vmsne instructions, funct6 = 0b011001
1158 void VMsne_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1159 void VMsne_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1160 void VMsne_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1161
1162 // Vector vmsltu instructions, funct6 = 0b011010
1163 void VMsltu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1164 void VMsltu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1165
1166 // Pseudo-instruction over VMsltu_vv
1167 void VMsgtu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1168
1169 // Vector vmslt instructions, funct6 = 0b011011
1170 void VMslt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1171 void VMslt_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1172
1173 // Pseudo-instruction over VMslt_vv
1174 void VMsgt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1175
1176 // Vector vmsleu instructions, funct6 = 0b011100
1177 void VMsleu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1178 void VMsleu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1179 void VMsleu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1180
1181 // Pseudo-instructions over VMsleu_*
1182 void VMsgeu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1183 void VMsltu_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1184
1185 // Vector vmsle instructions, funct6 = 0b011101
1186 void VMsle_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1187 void VMsle_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1188 void VMsle_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1189
1190 // Pseudo-instructions over VMsle_*
1191 void VMsge_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1192 void VMslt_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1193
1194 // Vector vmsgtu instructions, funct6 = 0b011110
1195 void VMsgtu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1196 void VMsgtu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1197
1198 // Pseudo-instruction over VMsgtu_vi
1199 void VMsgeu_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1200
1201 // Vector vmsgt instructions, funct6 = 0b011111
1202 void VMsgt_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1203 void VMsgt_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1204
1205 // Pseudo-instruction over VMsgt_vi
1206 void VMsge_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1207
1208 // Vector vsaddu instructions, funct6 = 0b100000
1209 void VSaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1210 void VSaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1211 void VSaddu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1212
1213 // Vector vsadd instructions, funct6 = 0b100001
1214 void VSadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1215 void VSadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1216 void VSadd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1217
1218 // Vector vssubu instructions, funct6 = 0b100010
1219 void VSsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1220 void VSsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1221
1222 // Vector vssub instructions, funct6 = 0b100011
1223 void VSsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1224 void VSsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1225
1226 // Vector vsll instructions, funct6 = 0b100101
1227 void VSll_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1228 void VSll_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1229 void VSll_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1230
1231 // Vector vsmul instructions, funct6 = 0b100111
1232 void VSmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1233 void VSmul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1234
1235 // Vector vmv<nr>r.v instructions, funct6 = 0b100111
1236 void Vmv1r_v(VRegister vd, VRegister vs2);
1237 void Vmv2r_v(VRegister vd, VRegister vs2);
1238 void Vmv4r_v(VRegister vd, VRegister vs2);
1239 void Vmv8r_v(VRegister vd, VRegister vs2);
1240
1241 // Vector vsrl instructions, funct6 = 0b101000
1242 void VSrl_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1243 void VSrl_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1244 void VSrl_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1245
1246 // Vector vsra instructions, funct6 = 0b101001
1247 void VSra_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1248 void VSra_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1249 void VSra_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1250
1251 // Vector vssrl instructions, funct6 = 0b101010
1252 void VSsrl_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1253 void VSsrl_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1254 void VSsrl_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1255
1256 // Vector vssra instructions, funct6 = 0b101011
1257 void VSsra_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1258 void VSsra_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1259 void VSsra_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1260
1261 // Vector vnsrl instructions, funct6 = 0b101100
1262 void VNsrl_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1263 void VNsrl_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1264 void VNsrl_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1265
1266 // Pseudo-instruction over VNsrl_wx
1267 void VNcvt_x_x_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1268
1269 // Vector vnsra instructions, funct6 = 0b101101
1270 void VNsra_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1271 void VNsra_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1272 void VNsra_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1273
1274 // Vector vnclipu instructions, funct6 = 0b101110
1275 void VNclipu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1276 void VNclipu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1277 void VNclipu_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1278
1279 // Vector vnclip instructions, funct6 = 0b101111
1280 void VNclip_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1281 void VNclip_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1282 void VNclip_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1283
1284 // Vector vwredsumu instructions, funct6 = 0b110000
1285 void VWredsumu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1286
1287 // Vector vwredsum instructions, funct6 = 0b110001
1288 void VWredsum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1289
1290 // Vector vredsum instructions, funct6 = 0b000000
1291 void VRedsum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1292
1293 // Vector vredand instructions, funct6 = 0b000001
1294 void VRedand_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1295
1296 // Vector vredor instructions, funct6 = 0b000010
1297 void VRedor_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1298
1299 // Vector vredxor instructions, funct6 = 0b000011
1300 void VRedxor_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1301
1302 // Vector vredminu instructions, funct6 = 0b000100
1303 void VRedminu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1304
1305 // Vector vredmin instructions, funct6 = 0b000101
1306 void VRedmin_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1307
1308 // Vector vredmaxu instructions, funct6 = 0b000110
1309 void VRedmaxu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1310
1311 // Vector vredmax instructions, funct6 = 0b000111
1312 void VRedmax_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1313
1314 // Vector vaaddu instructions, funct6 = 0b001000
1315 void VAaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1316 void VAaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1317
1318 // Vector vaadd instructions, funct6 = 0b001001
1319 void VAadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1320 void VAadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1321
1322 // Vector vasubu instructions, funct6 = 0b001010
1323 void VAsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1324 void VAsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1325
1326 // Vector vasub instructions, funct6 = 0b001011
1327 void VAsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1328 void VAsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1329
1330 // Vector vslide1up instructions, funct6 = 0b001110
1331 void VSlide1up_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1332
1333 // Vector vslide1down instructions, funct6 = 0b001111
1334 void VSlide1down_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1335
1336 // Vector vcompress instructions, funct6 = 0b010111
1337 void VCompress_vm(VRegister vd, VRegister vs2, VRegister vs1);
1338
1339 // Vector vmandn instructions, funct6 = 0b011000
1340 void VMandn_mm(VRegister vd, VRegister vs2, VRegister vs1);
1341
1342 // Vector vmand instructions, funct6 = 0b011001
1343 void VMand_mm(VRegister vd, VRegister vs2, VRegister vs1);
1344
1345 // Pseudo-instruction over VMand_mm
1346 void VMmv_m(VRegister vd, VRegister vs2);
1347
1348 // Vector vmor instructions, funct6 = 0b011010
1349 void VMor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1350
1351 // Vector vmxor instructions, funct6 = 0b011011
1352 void VMxor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1353
1354 // Pseudo-instruction over VMxor_mm
1355 void VMclr_m(VRegister vd);
1356
1357 // Vector vmorn instructions, funct6 = 0b011100
1358 void VMorn_mm(VRegister vd, VRegister vs2, VRegister vs1);
1359
1360 // Vector vmnand instructions, funct6 = 0b011101
1361 void VMnand_mm(VRegister vd, VRegister vs2, VRegister vs1);
1362
1363 // Pseudo-instruction over VMnand_mm
1364 void VMnot_m(VRegister vd, VRegister vs2);
1365
1366 // Vector vmnor instructions, funct6 = 0b011110
1367 void VMnor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1368
1369 // Vector vmxnor instructions, funct6 = 0b011111
1370 void VMxnor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1371
1372 // Pseudo-instruction over VMxnor_mm
1373 void VMset_m(VRegister vd);
1374
1375 // Vector vdivu instructions, funct6 = 0b100000
1376 void VDivu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1377 void VDivu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1378
1379 // Vector vdiv instructions, funct6 = 0b100001
1380 void VDiv_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1381 void VDiv_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1382
1383 // Vector vremu instructions, funct6 = 0b100010
1384 void VRemu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1385 void VRemu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1386
1387 // Vector vrem instructions, funct6 = 0b100011
1388 void VRem_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1389 void VRem_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1390
1391 // Vector vmulhu instructions, funct6 = 0b100100
1392 void VMulhu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1393 void VMulhu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1394
1395 // Vector vmul instructions, funct6 = 0b100101
1396 void VMul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1397 void VMul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1398
1399 // Vector vmulhsu instructions, funct6 = 0b100110
1400 void VMulhsu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1401 void VMulhsu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1402
1403 // Vector vmulh instructions, funct6 = 0b100111
1404 void VMulh_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1405 void VMulh_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1406
1407 // Vector vmadd instructions, funct6 = 0b101001
1408 void VMadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1409 void VMadd_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1410
1411 // Vector vnmsub instructions, funct6 = 0b101011
1412 void VNmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1413 void VNmsub_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1414
1415 // Vector vmacc instructions, funct6 = 0b101101
1416 void VMacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1417 void VMacc_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1418
1419 // Vector vnmsac instructions, funct6 = 0b101111
1420 void VNmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1421 void VNmsac_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1422
1423 // Vector vwaddu instructions, funct6 = 0b110000
1424 void VWaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1425 void VWaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1426
1427 // Pseudo-instruction over VWaddu_vx
1428 void VWcvtu_x_x_v(VRegister vd, VRegister vs, VM vm = VM::kUnmasked);
1429
1430 // Vector vwadd instructions, funct6 = 0b110001
1431 void VWadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1432 void VWadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1433
1434 // Pseudo-instruction over VWadd_vx
1435 void VWcvt_x_x_v(VRegister vd, VRegister vs, VM vm = VM::kUnmasked);
1436
1437 // Vector vwsubu instructions, funct6 = 0b110010
1438 void VWsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1439 void VWsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1440
1441 // Vector vwsub instructions, funct6 = 0b110011
1442 void VWsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1443 void VWsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1444
1445 // Vector vwaddu.w instructions, funct6 = 0b110100
1446 void VWaddu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1447 void VWaddu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1448
1449 // Vector vwadd.w instructions, funct6 = 0b110101
1450 void VWadd_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1451 void VWadd_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1452
1453 // Vector vwsubu.w instructions, funct6 = 0b110110
1454 void VWsubu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1455 void VWsubu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1456
1457 // Vector vwsub.w instructions, funct6 = 0b110111
1458 void VWsub_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1459 void VWsub_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1460
1461 // Vector vwmulu instructions, funct6 = 0b111000
1462 void VWmulu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1463 void VWmulu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1464
1465 // Vector vwmulsu instructions, funct6 = 0b111010
1466 void VWmulsu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1467 void VWmulsu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1468
1469 // Vector vwmul instructions, funct6 = 0b111011
1470 void VWmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1471 void VWmul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1472
1473 // Vector vwmaccu instructions, funct6 = 0b111100
1474 void VWmaccu_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1475 void VWmaccu_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1476
1477 // Vector vwmacc instructions, funct6 = 0b111101
1478 void VWmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1479 void VWmacc_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1480
1481 // Vector vwmaccus instructions, funct6 = 0b111110
1482 void VWmaccus_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1483
1484 // Vector vwmaccsu instructions, funct6 = 0b111111
1485 void VWmaccsu_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1486 void VWmaccsu_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1487
1488 // Vector vfadd instructions, funct6 = 0b000000
1489 void VFadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1490 void VFadd_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1491
1492 // Vector vfredusum instructions, funct6 = 0b000001
1493 void VFredusum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1494
1495 // Vector vfsub instructions, funct6 = 0b000010
1496 void VFsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1497 void VFsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1498
1499 // Vector vfredosum instructions, funct6 = 0b000011
1500 void VFredosum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1501
1502 // Vector vfmin instructions, funct6 = 0b000100
1503 void VFmin_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1504 void VFmin_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1505
1506 // Vector vfredmin instructions, funct6 = 0b000101
1507 void VFredmin_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1508
1509 // Vector vfmax instructions, funct6 = 0b000110
1510 void VFmax_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1511 void VFmax_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1512
1513 // Vector vfredmax instructions, funct6 = 0b000111
1514 void VFredmax_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1515
1516 // Vector vfsgnj instructions, funct6 = 0b001000
1517 void VFsgnj_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1518 void VFsgnj_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1519
1520 // Vector vfsgnjn instructions, funct6 = 0b001001
1521 void VFsgnjn_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1522 void VFsgnjn_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1523
1524 // Pseudo-instruction over VFsgnjn_vv
1525 void VFneg_v(VRegister vd, VRegister vs);
1526
1527 // Vector vfsgnjx instructions, funct6 = 0b001010
1528 void VFsgnjx_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1529 void VFsgnjx_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1530
1531 // Pseudo-instruction over VFsgnjx_vv
1532 void VFabs_v(VRegister vd, VRegister vs);
1533
1534 // Vector vfslide1up instructions, funct6 = 0b001110
1535 void VFslide1up_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1536
1537 // Vector vfslide1down instructions, funct6 = 0b001111
1538 void VFslide1down_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1539
1540 // Vector vfmerge/vfmv instructions, funct6 = 0b010111
1541 void VFmerge_vfm(VRegister vd, VRegister vs2, FRegister fs1);
1542 void VFmv_v_f(VRegister vd, FRegister fs1);
1543
1544 // Vector vmfeq instructions, funct6 = 0b011000
1545 void VMfeq_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1546 void VMfeq_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1547
1548 // Vector vmfle instructions, funct6 = 0b011001
1549 void VMfle_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1550 void VMfle_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1551
1552 // Pseudo-instruction over VMfle_vv
1553 void VMfge_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1554
1555 // Vector vmflt instructions, funct6 = 0b011011
1556 void VMflt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1557 void VMflt_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1558
1559 // Pseudo-instruction over VMflt_vv
1560 void VMfgt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1561
1562 // Vector vmfne instructions, funct6 = 0b011100
1563 void VMfne_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1564 void VMfne_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1565
1566 // Vector vmfgt instructions, funct6 = 0b011101
1567 void VMfgt_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1568
1569 // Vector vmfge instructions, funct6 = 0b011111
1570 void VMfge_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1571
1572 // Vector vfdiv instructions, funct6 = 0b100000
1573 void VFdiv_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1574 void VFdiv_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1575
1576 // Vector vfrdiv instructions, funct6 = 0b100001
1577 void VFrdiv_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1578
1579 // Vector vfmul instructions, funct6 = 0b100100
1580 void VFmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1581 void VFmul_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1582
1583 // Vector vfrsub instructions, funct6 = 0b100111
1584 void VFrsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1585
1586 // Vector vfmadd instructions, funct6 = 0b101000
1587 void VFmadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1588 void VFmadd_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1589
1590 // Vector vfnmadd instructions, funct6 = 0b101001
1591 void VFnmadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1592 void VFnmadd_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1593
1594 // Vector vfmsub instructions, funct6 = 0b101010
1595 void VFmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1596 void VFmsub_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1597
1598 // Vector vfnmsub instructions, funct6 = 0b101011
1599 void VFnmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1600 void VFnmsub_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1601
1602 // Vector vfmacc instructions, funct6 = 0b101100
1603 void VFmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1604 void VFmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1605
1606 // Vector vfnmacc instructions, funct6 = 0b101101
1607 void VFnmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1608 void VFnmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1609
1610 // Vector vfmsac instructions, funct6 = 0b101110
1611 void VFmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1612 void VFmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1613
1614 // Vector vfnmsac instructions, funct6 = 0b101111
1615 void VFnmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1616 void VFnmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1617
1618 // Vector vfwadd instructions, funct6 = 0b110000
1619 void VFwadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1620 void VFwadd_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1621
1622 // Vector vfwredusum instructions, funct6 = 0b110001
1623 void VFwredusum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1624
1625 // Vector vfwsub instructions, funct6 = 0b110010
1626 void VFwsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1627 void VFwsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1628
1629 // Vector vfwredosum instructions, funct6 = 0b110011
1630 void VFwredosum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1631
1632 // Vector vfwadd.w instructions, funct6 = 0b110100
1633 void VFwadd_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1634 void VFwadd_wf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1635
1636 // Vector vfwsub.w instructions, funct6 = 0b110110
1637 void VFwsub_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1638 void VFwsub_wf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1639
1640 // Vector vfwmul instructions, funct6 = 0b111000
1641 void VFwmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1642 void VFwmul_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1643
1644 // Vector vfwmacc instructions, funct6 = 0b111100
1645 void VFwmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1646 void VFwmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1647
1648 // Vector vfwnmacc instructions, funct6 = 0b111101
1649 void VFwnmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1650 void VFwnmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1651
1652 // Vector vfwmsac instructions, funct6 = 0b111110
1653 void VFwmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1654 void VFwmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1655
1656 // Vector vfwnmsac instructions, funct6 = 0b111111
1657 void VFwnmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1658 void VFwnmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1659
1660 // Vector VRXUNARY0 kind instructions, funct6 = 0b010000
1661 void VMv_s_x(VRegister vd, XRegister rs1);
1662
1663 // Vector VWXUNARY0 kind instructions, funct6 = 0b010000
1664 void VMv_x_s(XRegister rd, VRegister vs2);
1665 void VCpop_m(XRegister rd, VRegister vs2, VM vm = VM::kUnmasked);
1666 void VFirst_m(XRegister rd, VRegister vs2, VM vm = VM::kUnmasked);
1667
1668 // Vector VXUNARY0 kind instructions, funct6 = 0b010010
1669 void VZext_vf8(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1670 void VSext_vf8(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1671 void VZext_vf4(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1672 void VSext_vf4(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1673 void VZext_vf2(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1674 void VSext_vf2(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1675
1676 // Vector VRFUNARY0 kind instructions, funct6 = 0b010000
1677 void VFmv_s_f(VRegister vd, FRegister fs1);
1678
1679 // Vector VWFUNARY0 kind instructions, funct6 = 0b010000
1680 void VFmv_f_s(FRegister fd, VRegister vs2);
1681
1682 // Vector VFUNARY0 kind instructions, funct6 = 0b010010
1683 void VFcvt_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1684 void VFcvt_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1685 void VFcvt_f_xu_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1686 void VFcvt_f_x_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1687 void VFcvt_rtz_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1688 void VFcvt_rtz_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1689 void VFwcvt_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1690 void VFwcvt_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1691 void VFwcvt_f_xu_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1692 void VFwcvt_f_x_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1693 void VFwcvt_f_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1694 void VFwcvt_rtz_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1695 void VFwcvt_rtz_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1696 void VFncvt_xu_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1697 void VFncvt_x_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1698 void VFncvt_f_xu_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1699 void VFncvt_f_x_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1700 void VFncvt_f_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1701 void VFncvt_rod_f_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1702 void VFncvt_rtz_xu_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1703 void VFncvt_rtz_x_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1704
1705 // Vector VFUNARY1 kind instructions, funct6 = 0b010011
1706 void VFsqrt_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1707 void VFrsqrt7_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1708 void VFrec7_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1709 void VFclass_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1710
1711 // Vector VMUNARY0 kind instructions, funct6 = 0b010100
1712 void VMsbf_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1713 void VMsof_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1714 void VMsif_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1715 void VIota_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1716 void VId_v(VRegister vd, VM vm = VM::kUnmasked);
1717
1718 ////////////////////////////// RISC-V Vector Instructions END //////////////////////////////
1719
1720 ////////////////////////////// RV64 MACRO Instructions START ///////////////////////////////
1721 // These pseudo instructions are from "RISC-V Assembly Programmer's Manual".
1722
1723 void Nop();
1724 void Li(XRegister rd, int64_t imm);
1725 void Mv(XRegister rd, XRegister rs);
1726 void Not(XRegister rd, XRegister rs);
1727 void Neg(XRegister rd, XRegister rs);
1728 void NegW(XRegister rd, XRegister rs);
1729 void SextB(XRegister rd, XRegister rs);
1730 void SextH(XRegister rd, XRegister rs);
1731 void SextW(XRegister rd, XRegister rs);
1732 void ZextB(XRegister rd, XRegister rs);
1733 void ZextH(XRegister rd, XRegister rs);
1734 void ZextW(XRegister rd, XRegister rs);
1735 void Seqz(XRegister rd, XRegister rs);
1736 void Snez(XRegister rd, XRegister rs);
1737 void Sltz(XRegister rd, XRegister rs);
1738 void Sgtz(XRegister rd, XRegister rs);
1739 void FMvS(FRegister rd, FRegister rs);
1740 void FAbsS(FRegister rd, FRegister rs);
1741 void FNegS(FRegister rd, FRegister rs);
1742 void FMvD(FRegister rd, FRegister rs);
1743 void FAbsD(FRegister rd, FRegister rs);
1744 void FNegD(FRegister rd, FRegister rs);
1745
1746 // Branch pseudo instructions
1747 void Beqz(XRegister rs, int32_t offset);
1748 void Bnez(XRegister rs, int32_t offset);
1749 void Blez(XRegister rs, int32_t offset);
1750 void Bgez(XRegister rs, int32_t offset);
1751 void Bltz(XRegister rs, int32_t offset);
1752 void Bgtz(XRegister rs, int32_t offset);
1753 void Bgt(XRegister rs, XRegister rt, int32_t offset);
1754 void Ble(XRegister rs, XRegister rt, int32_t offset);
1755 void Bgtu(XRegister rs, XRegister rt, int32_t offset);
1756 void Bleu(XRegister rs, XRegister rt, int32_t offset);
1757
1758 // Jump pseudo instructions
1759 void J(int32_t offset);
1760 void Jal(int32_t offset);
1761 void Jr(XRegister rs);
1762 void Jalr(XRegister rs);
1763 void Jalr(XRegister rd, XRegister rs);
1764 void Ret();
1765
1766 // Pseudo instructions for accessing control and status registers
1767 void RdCycle(XRegister rd);
1768 void RdTime(XRegister rd);
1769 void RdInstret(XRegister rd);
1770 void Csrr(XRegister rd, uint32_t csr);
1771 void Csrw(uint32_t csr, XRegister rs);
1772 void Csrs(uint32_t csr, XRegister rs);
1773 void Csrc(uint32_t csr, XRegister rs);
1774 void Csrwi(uint32_t csr, uint32_t uimm5);
1775 void Csrsi(uint32_t csr, uint32_t uimm5);
1776 void Csrci(uint32_t csr, uint32_t uimm5);
1777
1778 // Load/store macros for arbitrary 32-bit offsets.
1779 void Loadb(XRegister rd, XRegister rs1, int32_t offset);
1780 void Loadh(XRegister rd, XRegister rs1, int32_t offset);
1781 void Loadw(XRegister rd, XRegister rs1, int32_t offset);
1782 void Loadd(XRegister rd, XRegister rs1, int32_t offset);
1783 void Loadbu(XRegister rd, XRegister rs1, int32_t offset);
1784 void Loadhu(XRegister rd, XRegister rs1, int32_t offset);
1785 void Loadwu(XRegister rd, XRegister rs1, int32_t offset);
1786 void Storeb(XRegister rs2, XRegister rs1, int32_t offset);
1787 void Storeh(XRegister rs2, XRegister rs1, int32_t offset);
1788 void Storew(XRegister rs2, XRegister rs1, int32_t offset);
1789 void Stored(XRegister rs2, XRegister rs1, int32_t offset);
1790 void FLoadw(FRegister rd, XRegister rs1, int32_t offset);
1791 void FLoadd(FRegister rd, XRegister rs1, int32_t offset);
1792 void FStorew(FRegister rs2, XRegister rs1, int32_t offset);
1793 void FStored(FRegister rs2, XRegister rs1, int32_t offset);
1794
1795 // Macros for loading constants.
1796 void LoadConst32(XRegister rd, int32_t value);
1797 void LoadConst64(XRegister rd, int64_t value);
1798
1799 // Macros for adding constants.
1800 void AddConst32(XRegister rd, XRegister rs1, int32_t value);
1801 void AddConst64(XRegister rd, XRegister rs1, int64_t value);
1802
1803 // Jumps and branches to a label.
1804 void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1805 void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1806 void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1807 void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1808 void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1809 void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1810 void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1811 void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1812 void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1813 void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1814 void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1815 void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1816 void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1817 void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1818 void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1819 void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1820 void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false);
1821 void J(Riscv64Label* label, bool is_bare = false);
1822 void Jal(Riscv64Label* label, bool is_bare = false);
1823
1824 // Literal load.
1825 void Loadw(XRegister rd, Literal* literal);
1826 void Loadwu(XRegister rd, Literal* literal);
1827 void Loadd(XRegister rd, Literal* literal);
1828 void FLoadw(FRegister rd, Literal* literal);
1829 void FLoadd(FRegister rd, Literal* literal);
1830
1831 // Illegal instruction that triggers SIGILL.
1832 void Unimp();
1833
1834 /////////////////////////////// RV64 MACRO Instructions END ///////////////////////////////
1835
Bind(Label * label)1836 void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); }
1837
Jump(Label * label)1838 void Jump([[maybe_unused]] Label* label) override {
1839 UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64";
1840 }
1841
Jump(Riscv64Label * label)1842 void Jump(Riscv64Label* label) {
1843 J(label);
1844 }
1845
1846 void Bind(Riscv64Label* label);
1847
1848 // Load label address using PC-relative loads.
1849 void LoadLabelAddress(XRegister rd, Riscv64Label* label);
1850
1851 // Create a new literal with a given value.
1852 // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified.
1853 template <typename T>
NewLiteral(typename Identity<T>::type value)1854 Literal* NewLiteral(typename Identity<T>::type value) {
1855 static_assert(std::is_integral<T>::value, "T must be an integral type.");
1856 return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
1857 }
1858
1859 // Create a new literal with the given data.
1860 Literal* NewLiteral(size_t size, const uint8_t* data);
1861
1862 // Create a jump table for the given labels that will be emitted when finalizing.
1863 // When the table is emitted, offsets will be relative to the location of the table.
1864 // The table location is determined by the location of its label (the label precedes
1865 // the table data) and should be loaded using LoadLabelAddress().
1866 JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels);
1867
1868 public:
1869 // Emit slow paths queued during assembly, promote short branches to long if needed,
1870 // and emit branches.
1871 void FinalizeCode() override;
1872
1873 template <typename Reg>
IsShortReg(Reg reg)1874 static inline bool IsShortReg(Reg reg) {
1875 static_assert(std::is_same_v<Reg, XRegister> || std::is_same_v<Reg, FRegister>);
1876 uint32_t uv = enum_cast<uint32_t>(reg) - 8u;
1877 return IsUint<3>(uv);
1878 }
1879
1880 // Returns the current location of a label.
1881 //
1882 // This function must be used instead of `Riscv64Label::GetPosition()`
1883 // which returns assembler's internal data instead of an actual location.
1884 //
1885 // The location can change during branch fixup in `FinalizeCode()`. Before that,
1886 // the location is not final and therefore not very useful to external users,
1887 // so they should preferably retrieve the location only after `FinalizeCode()`.
1888 uint32_t GetLabelLocation(const Riscv64Label* label) const;
1889
1890 // Get the final position of a label after local fixup based on the old position
1891 // recorded before FinalizeCode().
1892 uint32_t GetAdjustedPosition(uint32_t old_position);
1893
1894 private:
ConvertExtensions(const Riscv64InstructionSetFeatures * instruction_set_features)1895 static uint32_t ConvertExtensions(
1896 const Riscv64InstructionSetFeatures* instruction_set_features) {
1897 // The `Riscv64InstructionSetFeatures` currently does not support "Zcb",
1898 // only the original "C" extension. For riscv64 that means "Zca" and "Zcd".
1899 constexpr Riscv64ExtensionMask kCompressedExtensionsMask =
1900 Riscv64ExtensionBit(Riscv64Extension::kZca) | Riscv64ExtensionBit(Riscv64Extension::kZcd);
1901 return
1902 (Riscv64ExtensionBit(Riscv64Extension::kLoadStore)) |
1903 (Riscv64ExtensionBit(Riscv64Extension::kZifencei)) |
1904 (Riscv64ExtensionBit(Riscv64Extension::kM)) |
1905 (Riscv64ExtensionBit(Riscv64Extension::kA)) |
1906 (Riscv64ExtensionBit(Riscv64Extension::kZicsr)) |
1907 (Riscv64ExtensionBit(Riscv64Extension::kF)) |
1908 (Riscv64ExtensionBit(Riscv64Extension::kD)) |
1909 (instruction_set_features->HasZba() ? Riscv64ExtensionBit(Riscv64Extension::kZba) : 0u) |
1910 (instruction_set_features->HasZbb() ? Riscv64ExtensionBit(Riscv64Extension::kZbb) : 0u) |
1911 (instruction_set_features->HasZbs() ? Riscv64ExtensionBit(Riscv64Extension::kZbs) : 0u) |
1912 (instruction_set_features->HasVector() ? Riscv64ExtensionBit(Riscv64Extension::kV) : 0u) |
1913 (instruction_set_features->HasCompressed() ? kCompressedExtensionsMask : 0u);
1914 }
1915
AssertExtensionsEnabled(Riscv64Extension ext)1916 void AssertExtensionsEnabled(Riscv64Extension ext) {
1917 DCHECK(IsExtensionEnabled(ext))
1918 << "ext=" << enum_cast<>(ext) << " enabled=0x" << std::hex << enabled_extensions_;
1919 }
1920
1921 template <typename... OtherExt>
AssertExtensionsEnabled(Riscv64Extension ext,OtherExt...other_ext)1922 void AssertExtensionsEnabled(Riscv64Extension ext, OtherExt... other_ext) {
1923 AssertExtensionsEnabled(ext);
1924 AssertExtensionsEnabled(other_ext...);
1925 }
1926
1927 enum BranchCondition : uint8_t {
1928 kCondEQ,
1929 kCondNE,
1930 kCondLT,
1931 kCondGE,
1932 kCondLE,
1933 kCondGT,
1934 kCondLTU,
1935 kCondGEU,
1936 kCondLEU,
1937 kCondGTU,
1938 kUncond,
1939 };
1940
1941 // Note that PC-relative literal loads are handled as pseudo branches because they need
1942 // to be emitted after branch relocation to use correct offsets.
1943 class Branch {
1944 public:
1945 enum Type : uint8_t {
1946 // Compressed branches (can be promoted to longer)
1947 kCondCBranch,
1948 kUncondCBranch,
1949 // Compressed branches (can't be promoted to longer)
1950 kBareCondCBranch,
1951 kBareUncondCBranch,
1952
1953 // Short branches (can be promoted to longer).
1954 kCondBranch,
1955 kUncondBranch,
1956 kCall,
1957 // Short branches (can't be promoted to longer).
1958 kBareCondBranch,
1959 kBareUncondBranch,
1960 kBareCall,
1961
1962 // Medium branches (can be promoted to long).
1963 // Compressed version
1964 kCondCBranch21,
1965 kCondBranch21,
1966
1967 // Long branches.
1968 kLongCondBranch,
1969 kLongUncondBranch,
1970 kLongCall,
1971
1972 // Label.
1973 kLabel,
1974
1975 // Literals.
1976 kLiteral,
1977 kLiteralUnsigned,
1978 kLiteralLong,
1979 kLiteralFloat,
1980 kLiteralDouble,
1981 };
1982
1983 // Bit sizes of offsets defined as enums to minimize chance of typos.
1984 enum OffsetBits {
1985 kOffset9 = 9,
1986 kOffset12 = 12,
1987 kOffset13 = 13,
1988 kOffset21 = 21,
1989 kOffset32 = 32,
1990 };
1991
1992 static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_
1993 static constexpr uint32_t kMaxBranchLength = 12; // In bytes.
1994
1995 struct BranchInfo {
1996 // Branch length in bytes.
1997 uint32_t length;
1998 // The offset in bytes of the PC used in the (only) PC-relative instruction from
1999 // the start of the branch sequence. RISC-V always uses the address of the PC-relative
2000 // instruction as the PC, so this is essentially the offset of that instruction.
2001 uint32_t pc_offset;
2002 // How large (in bits) a PC-relative offset can be for a given type of branch.
2003 OffsetBits offset_size;
2004 };
2005 static const BranchInfo branch_info_[/* Type */];
2006
2007 // Unconditional branch or call.
2008 Branch(
2009 uint32_t location, uint32_t target, XRegister rd, bool is_bare, bool compression_allowed);
2010 // Conditional branch.
2011 Branch(uint32_t location,
2012 uint32_t target,
2013 BranchCondition condition,
2014 XRegister lhs_reg,
2015 XRegister rhs_reg,
2016 bool is_bare,
2017 bool compression_allowed);
2018 // Label address or literal.
2019 Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type);
2020 Branch(uint32_t location, uint32_t target, FRegister rd, Type literal_type);
2021
2022 // Some conditional branches with lhs = rhs are effectively NOPs, while some
2023 // others are effectively unconditional.
2024 static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs);
2025 static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs);
2026 static bool IsCompressed(Type type);
2027
2028 static BranchCondition OppositeCondition(BranchCondition cond);
2029
2030 Type GetType() const;
2031 Type GetOldType() const;
2032 BranchCondition GetCondition() const;
2033 XRegister GetLeftRegister() const;
2034 XRegister GetRightRegister() const;
2035 XRegister GetNonZeroRegister() const;
2036 FRegister GetFRegister() const;
2037 uint32_t GetTarget() const;
2038 uint32_t GetLocation() const;
2039 uint32_t GetOldLocation() const;
2040 uint32_t GetLength() const;
2041 uint32_t GetOldLength() const;
2042 uint32_t GetEndLocation() const;
2043 uint32_t GetOldEndLocation() const;
2044 bool IsBare() const;
2045 bool IsResolved() const;
2046
2047 uint32_t NextBranchId() const;
2048
2049 // Checks if condition meets compression requirements
2050 bool IsCompressableCondition() const;
2051
2052 // Returns the bit size of the signed offset that the branch instruction can handle.
2053 OffsetBits GetOffsetSize() const;
2054
2055 // Calculates the distance between two byte locations in the assembler buffer and
2056 // returns the number of bits needed to represent the distance as a signed integer.
2057 static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
2058
2059 // Resolve a branch when the target is known.
2060 void Resolve(uint32_t target);
2061
2062 // Relocate a branch by a given delta if needed due to expansion of this or another
2063 // branch at a given location by this delta (just changes location_ and target_).
2064 void Relocate(uint32_t expand_location, uint32_t delta);
2065
2066 // If necessary, updates the type by promoting a short branch to a longer branch
2067 // based on the branch location and target. Returns the amount (in bytes) by
2068 // which the branch size has increased.
2069 uint32_t PromoteIfNeeded();
2070
2071 // Returns the offset into assembler buffer that shall be used as the base PC for
2072 // offset calculation. RISC-V always uses the address of the PC-relative instruction
2073 // as the PC, so this is essentially the location of that instruction.
2074 uint32_t GetOffsetLocation() const;
2075
2076 // Calculates and returns the offset ready for encoding in the branch instruction(s).
2077 int32_t GetOffset() const;
2078
2079 // Link with the next branch
2080 void LinkToList(uint32_t next_branch_id);
2081
2082 private:
2083 // Completes branch construction by determining and recording its type.
2084 void InitializeType(Type initial_type);
2085 // Helper for the above.
2086 void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type, Type longest_type);
2087 void InitShortOrLong(OffsetBits ofs_size,
2088 Type compressed_type,
2089 Type short_type,
2090 Type long_type,
2091 Type longest_type);
2092
2093 uint32_t old_location_; // Offset into assembler buffer in bytes.
2094 uint32_t location_; // Offset into assembler buffer in bytes.
2095 uint32_t target_; // Offset into assembler buffer in bytes.
2096
2097 XRegister lhs_reg_; // Left-hand side register in conditional branches or
2098 // destination register in calls or literals.
2099 XRegister rhs_reg_; // Right-hand side register in conditional branches.
2100 FRegister freg_; // Destination register in FP literals.
2101 BranchCondition condition_; // Condition for conditional branches.
2102
2103 Type type_; // Current type of the branch.
2104 Type old_type_; // Initial type of the branch.
2105
2106 // Id of the next branch bound to the same label in singly-linked zero-terminated list
2107 // NOTE: encoded the same way as a position in a linked Label (id + sizeof(void*))
2108 // Label itself is used to hold the 'head' of this list
2109 uint32_t next_branch_id_;
2110
2111 bool compression_allowed_;
2112 };
2113
2114 // Branch and literal fixup.
2115
2116 void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset);
2117 void EmitBranch(Branch* branch);
2118 void EmitBranches();
2119 void EmitJumpTables();
2120 void EmitLiterals();
2121
2122 void FinalizeLabeledBranch(Riscv64Label* label);
2123 void Bcond(Riscv64Label* label,
2124 bool is_bare,
2125 BranchCondition condition,
2126 XRegister lhs,
2127 XRegister rhs);
2128 void Buncond(Riscv64Label* label, XRegister rd, bool is_bare);
2129 template <typename XRegisterOrFRegister>
2130 void LoadLiteral(Literal* literal, XRegisterOrFRegister rd, Branch::Type literal_type);
2131
2132 Branch* GetBranch(uint32_t branch_id);
2133 const Branch* GetBranch(uint32_t branch_id) const;
2134
2135 void ReserveJumpTableSpace();
2136 void PromoteBranches();
2137 void PatchCFI();
2138
2139 // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
2140 template <typename T>
Emit(T value)2141 void Emit(T value) {
2142 static_assert(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint16_t>,
2143 "Only Integer types are allowed");
2144 if (overwriting_) {
2145 // Branches to labels are emitted into their placeholders here.
2146 buffer_.Store<T>(overwrite_location_, value);
2147 overwrite_location_ += sizeof(T);
2148 } else {
2149 // Other instructions are simply appended at the end here.
2150 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2151 buffer_.Emit<T>(value);
2152 }
2153 }
2154
Emit16(uint32_t value)2155 void Emit16(uint32_t value) { Emit(dchecked_integral_cast<uint16_t>(value)); }
Emit32(uint32_t value)2156 void Emit32(uint32_t value) { Emit(value); }
2157
2158 // Adjust base register and offset if needed for load/store with a large offset.
2159 void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs);
2160
2161 // Helper templates for loads/stores with 32-bit offsets.
2162 template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
2163 void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset);
2164 template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
2165 void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset);
2166 template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
2167 void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset);
2168 template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
2169 void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset);
2170
2171 // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`.
2172 void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp);
2173
2174 // RVV constants and helpers
2175
2176 enum class Nf : uint32_t {
2177 k1 = 0b000,
2178 k2 = 0b001,
2179 k3 = 0b010,
2180 k4 = 0b011,
2181 k5 = 0b100,
2182 k6 = 0b101,
2183 k7 = 0b110,
2184 k8 = 0b111,
2185 };
2186
2187 enum class VAIEncoding : uint32_t {
2188 // ----Operands---- | Type of Scalar | Instruction type
2189 kOPIVV = 0b000, // vector-vector | -- | R-type
2190 kOPFVV = 0b001, // vector-vector | -- | R-type
2191 kOPMVV = 0b010, // vector-vector | -- | R-type
2192 kOPIVI = 0b011, // vector-immediate | imm[4:0] | R-type
2193 kOPIVX = 0b100, // vector-scalar | GPR x register rs1 | R-type
2194 kOPFVF = 0b101, // vector-scalar | FP f register rs1 | R-type
2195 kOPMVX = 0b110, // vector-scalar | GPR x register rs1 | R-type
2196 kOPCFG = 0b111, // scalars-imms | GPR x register rs1 & rs2/imm | R/I-type
2197 };
2198
2199 enum class MemAddressMode : uint32_t {
2200 kUnitStride = 0b00,
2201 kIndexedUnordered = 0b01,
2202 kStrided = 0b10,
2203 kIndexedOrdered = 0b11,
2204 };
2205
2206 enum class VectorWidth : uint32_t {
2207 k8 = 0b000,
2208 k16 = 0b101,
2209 k32 = 0b110,
2210 k64 = 0b111,
2211
2212 kMask = 0b000,
2213 kWholeR = 0b000,
2214 };
2215
EncodeRVVMemF7(const Nf nf,const uint32_t mew,const MemAddressMode mop,const VM vm)2216 static constexpr uint32_t EncodeRVVMemF7(const Nf nf,
2217 const uint32_t mew,
2218 const MemAddressMode mop,
2219 const VM vm) {
2220 DCHECK(IsUint<3>(enum_cast<uint32_t>(nf)));
2221 DCHECK(IsUint<1>(mew));
2222 DCHECK(IsUint<2>(enum_cast<uint32_t>(mop)));
2223 DCHECK(IsUint<1>(enum_cast<uint32_t>(vm)));
2224
2225 return enum_cast<uint32_t>(nf) << 4 | mew << 3 | enum_cast<uint32_t>(mop) << 1 |
2226 enum_cast<uint32_t>(vm);
2227 }
2228
EncodeRVVF7(const uint32_t funct6,const VM vm)2229 static constexpr uint32_t EncodeRVVF7(const uint32_t funct6, const VM vm) {
2230 DCHECK(IsUint<6>(funct6));
2231 return funct6 << 1 | enum_cast<uint32_t>(vm);
2232 }
2233
2234 template <unsigned kWidth>
EncodeIntWidth(const int32_t imm)2235 static constexpr uint32_t EncodeIntWidth(const int32_t imm) {
2236 DCHECK(IsInt<kWidth>(imm));
2237 return static_cast<uint32_t>(imm) & MaskLeastSignificant<uint32_t>(kWidth);
2238 }
2239
EncodeInt5(const int32_t imm)2240 static constexpr uint32_t EncodeInt5(const int32_t imm) { return EncodeIntWidth<5>(imm); }
EncodeInt6(const int32_t imm)2241 static constexpr uint32_t EncodeInt6(const int32_t imm) { return EncodeIntWidth<6>(imm); }
2242
2243 template <typename Reg>
EncodeShortReg(const Reg reg)2244 static constexpr uint32_t EncodeShortReg(const Reg reg) {
2245 DCHECK(IsShortReg(reg));
2246 return enum_cast<uint32_t>(reg) - 8u;
2247 }
2248
2249 // Rearrange given offset in the way {offset[0] | offset[1]}
EncodeOffset0_1(int32_t offset)2250 static constexpr uint32_t EncodeOffset0_1(int32_t offset) {
2251 uint32_t u_offset = static_cast<uint32_t>(offset);
2252 DCHECK(IsUint<2>(u_offset));
2253
2254 return u_offset >> 1 | (u_offset & 1u) << 1;
2255 }
2256
2257 // Rearrange given offset, scaled by 4, in the way {offset[5:2] | offset[7:6]}
ExtractOffset52_76(int32_t offset)2258 static constexpr uint32_t ExtractOffset52_76(int32_t offset) {
2259 DCHECK(IsAligned<4>(offset)) << "Offset should be scalable by 4";
2260
2261 uint32_t u_offset = static_cast<uint32_t>(offset);
2262 DCHECK(IsUint<6 + 2>(u_offset));
2263
2264 uint32_t imm_52 = BitFieldExtract(u_offset, 2, 4);
2265 uint32_t imm_76 = BitFieldExtract(u_offset, 6, 2);
2266
2267 return BitFieldInsert(imm_76, imm_52, 2, 4);
2268 }
2269
2270 // Rearrange given offset, scaled by 8, in the way {offset[5:3] | offset[8:6]}
ExtractOffset53_86(int32_t offset)2271 static constexpr uint32_t ExtractOffset53_86(int32_t offset) {
2272 DCHECK(IsAligned<8>(offset)) << "Offset should be scalable by 8";
2273
2274 uint32_t u_offset = static_cast<uint32_t>(offset);
2275 DCHECK(IsUint<6 + 3>(u_offset));
2276
2277 uint32_t imm_53 = BitFieldExtract(u_offset, 3, 3);
2278 uint32_t imm_86 = BitFieldExtract(u_offset, 6, 3);
2279
2280 return BitFieldInsert(imm_86, imm_53, 3, 3);
2281 }
2282
2283 // Rearrange given offset, scaled by 4, in the way {offset[5:2] | offset[6]}
ExtractOffset52_6(int32_t offset)2284 static constexpr uint32_t ExtractOffset52_6(int32_t offset) {
2285 DCHECK(IsAligned<4>(offset)) << "Offset should be scalable by 4";
2286
2287 uint32_t u_offset = static_cast<uint32_t>(offset);
2288 DCHECK(IsUint<5 + 2>(u_offset));
2289
2290 uint32_t imm_52 = BitFieldExtract(u_offset, 2, 4);
2291 uint32_t imm_6 = BitFieldExtract(u_offset, 6, 1);
2292
2293 return BitFieldInsert(imm_6, imm_52, 1, 4);
2294 }
2295
2296 // Rearrange given offset, scaled by 8, in the way {offset[5:3], offset[7:6]}
ExtractOffset53_76(int32_t offset)2297 static constexpr uint32_t ExtractOffset53_76(int32_t offset) {
2298 DCHECK(IsAligned<8>(offset)) << "Offset should be scalable by 4";
2299
2300 uint32_t u_offset = static_cast<uint32_t>(offset);
2301 DCHECK(IsUint<5 + 3>(u_offset));
2302
2303 uint32_t imm_53 = BitFieldExtract(u_offset, 3, 3);
2304 uint32_t imm_76 = BitFieldExtract(u_offset, 6, 2);
2305
2306 return BitFieldInsert(imm_76, imm_53, 2, 3);
2307 }
2308
IsImmCLuiEncodable(uint32_t uimm)2309 static constexpr bool IsImmCLuiEncodable(uint32_t uimm) {
2310 // Instruction c.lui is odd and its immediate value is a bit tricky
2311 // Its value is not a full 32 bits value, but its bits [31:12]
2312 // (where the bit 17 marks the sign bit) shifted towards the bottom i.e. bits [19:0]
2313 // are the meaningful ones. Since that we want a signed non-zero 6-bit immediate to
2314 // keep values in the range [0, 0x1f], and the range [0xfffe0, 0xfffff] for negative values
2315 // since the sign bit was bit 17 (which is now bit 5 and replicated in the higher bits too)
2316 // Also encoding with immediate = 0 is reserved
2317 // For more details please see 16.5 chapter is the specification
2318
2319 return uimm != 0u && (IsUint<5>(uimm) || IsUint<5>(uimm - 0xfffe0u));
2320 }
2321
2322 // Emit helpers.
2323
2324 // I-type instruction:
2325 //
2326 // 31 20 19 15 14 12 11 7 6 0
2327 // -----------------------------------------------------------------
2328 // [ . . . . . . . . . . . | . . . . | . . | . . . . | . . . . . . ]
2329 // [ imm11:0 rs1 funct3 rd opcode ]
2330 // -----------------------------------------------------------------
2331 template <typename Reg1, typename Reg2>
EmitI(int32_t imm12,Reg1 rs1,uint32_t funct3,Reg2 rd,uint32_t opcode)2332 void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) {
2333 DCHECK(IsInt<12>(imm12)) << imm12;
2334 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2335 DCHECK(IsUint<3>(funct3));
2336 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2337 DCHECK(IsUint<7>(opcode));
2338 uint32_t encoding = static_cast<uint32_t>(imm12) << 20 | static_cast<uint32_t>(rs1) << 15 |
2339 funct3 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
2340 Emit32(encoding);
2341 }
2342
2343 // R-type instruction:
2344 //
2345 // 31 25 24 20 19 15 14 12 11 7 6 0
2346 // -----------------------------------------------------------------
2347 // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2348 // [ funct7 rs2 rs1 funct3 rd opcode ]
2349 // -----------------------------------------------------------------
2350 template <typename Reg1, typename Reg2, typename Reg3>
EmitR(uint32_t funct7,Reg1 rs2,Reg2 rs1,uint32_t funct3,Reg3 rd,uint32_t opcode)2351 void EmitR(uint32_t funct7, Reg1 rs2, Reg2 rs1, uint32_t funct3, Reg3 rd, uint32_t opcode) {
2352 DCHECK(IsUint<7>(funct7));
2353 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2354 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2355 DCHECK(IsUint<3>(funct3));
2356 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2357 DCHECK(IsUint<7>(opcode));
2358 uint32_t encoding = funct7 << 25 | static_cast<uint32_t>(rs2) << 20 |
2359 static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
2360 static_cast<uint32_t>(rd) << 7 | opcode;
2361 Emit32(encoding);
2362 }
2363
2364 // R-type instruction variant for floating-point fused multiply-add/sub (F[N]MADD/ F[N]MSUB):
2365 //
2366 // 31 27 25 24 20 19 15 14 12 11 7 6 0
2367 // -----------------------------------------------------------------
2368 // [ . . . . | . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2369 // [ rs3 fmt rs2 rs1 funct3 rd opcode ]
2370 // -----------------------------------------------------------------
2371 template <typename Reg1, typename Reg2, typename Reg3, typename Reg4>
EmitR4(Reg1 rs3,uint32_t fmt,Reg2 rs2,Reg3 rs1,uint32_t funct3,Reg4 rd,uint32_t opcode)2372 void EmitR4(
2373 Reg1 rs3, uint32_t fmt, Reg2 rs2, Reg3 rs1, uint32_t funct3, Reg4 rd, uint32_t opcode) {
2374 DCHECK(IsUint<5>(static_cast<uint32_t>(rs3)));
2375 DCHECK(IsUint<2>(fmt));
2376 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2377 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2378 DCHECK(IsUint<3>(funct3));
2379 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2380 DCHECK(IsUint<7>(opcode));
2381 uint32_t encoding = static_cast<uint32_t>(rs3) << 27 | static_cast<uint32_t>(fmt) << 25 |
2382 static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2383 static_cast<uint32_t>(funct3) << 12 | static_cast<uint32_t>(rd) << 7 |
2384 opcode;
2385 Emit32(encoding);
2386 }
2387
2388 // S-type instruction:
2389 //
2390 // 31 25 24 20 19 15 14 12 11 7 6 0
2391 // -----------------------------------------------------------------
2392 // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2393 // [ imm11:5 rs2 rs1 funct3 imm4:0 opcode ]
2394 // -----------------------------------------------------------------
2395 template <typename Reg1, typename Reg2>
EmitS(int32_t imm12,Reg1 rs2,Reg2 rs1,uint32_t funct3,uint32_t opcode)2396 void EmitS(int32_t imm12, Reg1 rs2, Reg2 rs1, uint32_t funct3, uint32_t opcode) {
2397 DCHECK(IsInt<12>(imm12)) << imm12;
2398 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2399 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2400 DCHECK(IsUint<3>(funct3));
2401 DCHECK(IsUint<7>(opcode));
2402 uint32_t encoding = (static_cast<uint32_t>(imm12) & 0xFE0) << 20 |
2403 static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2404 static_cast<uint32_t>(funct3) << 12 |
2405 (static_cast<uint32_t>(imm12) & 0x1F) << 7 | opcode;
2406 Emit32(encoding);
2407 }
2408
2409 // I-type instruction variant for shifts (SLLI / SRLI / SRAI):
2410 //
2411 // 31 26 25 20 19 15 14 12 11 7 6 0
2412 // -----------------------------------------------------------------
2413 // [ . . . . . | . . . . . | . . . . | . . | . . . . | . . . . . . ]
2414 // [ imm11:6 imm5:0(shamt) rs1 funct3 rd opcode ]
2415 // -----------------------------------------------------------------
EmitI6(uint32_t funct6,uint32_t imm6,XRegister rs1,uint32_t funct3,XRegister rd,uint32_t opcode)2416 void EmitI6(uint32_t funct6,
2417 uint32_t imm6,
2418 XRegister rs1,
2419 uint32_t funct3,
2420 XRegister rd,
2421 uint32_t opcode) {
2422 DCHECK(IsUint<6>(funct6));
2423 DCHECK(IsUint<6>(imm6)) << imm6;
2424 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2425 DCHECK(IsUint<3>(funct3));
2426 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2427 DCHECK(IsUint<7>(opcode));
2428 uint32_t encoding = funct6 << 26 | static_cast<uint32_t>(imm6) << 20 |
2429 static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
2430 static_cast<uint32_t>(rd) << 7 | opcode;
2431 Emit32(encoding);
2432 }
2433
2434 // B-type instruction:
2435 //
2436 // 31 30 25 24 20 19 15 14 12 11 8 7 6 0
2437 // -----------------------------------------------------------------
2438 // [ | . . . . . | . . . . | . . . . | . . | . . . | | . . . . . . ]
2439 // imm12 imm11:5 rs2 rs1 funct3 imm4:1 imm11 opcode ]
2440 // -----------------------------------------------------------------
EmitB(int32_t offset,XRegister rs2,XRegister rs1,uint32_t funct3,uint32_t opcode)2441 void EmitB(int32_t offset, XRegister rs2, XRegister rs1, uint32_t funct3, uint32_t opcode) {
2442 DCHECK_ALIGNED(offset, 2);
2443 DCHECK(IsInt<13>(offset)) << offset;
2444 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2445 DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2446 DCHECK(IsUint<3>(funct3));
2447 DCHECK(IsUint<7>(opcode));
2448 uint32_t imm12 = (static_cast<uint32_t>(offset) >> 1) & 0xfffu;
2449 uint32_t encoding = (imm12 & 0x800u) << (31 - 11) | (imm12 & 0x03f0u) << (25 - 4) |
2450 static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2451 static_cast<uint32_t>(funct3) << 12 |
2452 (imm12 & 0xfu) << 8 | (imm12 & 0x400u) >> (10 - 7) | opcode;
2453 Emit32(encoding);
2454 }
2455
2456 // U-type instruction:
2457 //
2458 // 31 12 11 7 6 0
2459 // -----------------------------------------------------------------
2460 // [ . . . . . . . . . . . . . . . . . . . | . . . . | . . . . . . ]
2461 // [ imm31:12 rd opcode ]
2462 // -----------------------------------------------------------------
EmitU(uint32_t imm20,XRegister rd,uint32_t opcode)2463 void EmitU(uint32_t imm20, XRegister rd, uint32_t opcode) {
2464 CHECK(IsUint<20>(imm20)) << imm20;
2465 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2466 DCHECK(IsUint<7>(opcode));
2467 uint32_t encoding = imm20 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
2468 Emit32(encoding);
2469 }
2470
2471 // J-type instruction:
2472 //
2473 // 31 30 21 19 12 11 7 6 0
2474 // -----------------------------------------------------------------
2475 // [ | . . . . . . . . . | | . . . . . . . | . . . . | . . . . . . ]
2476 // imm20 imm10:1 imm11 imm19:12 rd opcode ]
2477 // -----------------------------------------------------------------
EmitJ(int32_t offset,XRegister rd,uint32_t opcode)2478 void EmitJ(int32_t offset, XRegister rd, uint32_t opcode) {
2479 DCHECK_ALIGNED(offset, 2);
2480 CHECK(IsInt<21>(offset)) << offset;
2481 DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2482 DCHECK(IsUint<7>(opcode));
2483 uint32_t imm20 = (static_cast<uint32_t>(offset) >> 1) & 0xfffffu;
2484 uint32_t encoding = (imm20 & 0x80000u) << (31 - 19) | (imm20 & 0x03ffu) << 21 |
2485 (imm20 & 0x400u) << (20 - 10) | (imm20 & 0x7f800u) << (12 - 11) |
2486 static_cast<uint32_t>(rd) << 7 | opcode;
2487 Emit32(encoding);
2488 }
2489
2490 // Compressed Instruction Encodings
2491
2492 // CR-type instruction:
2493 //
2494 // 15 12 11 7 6 2 1 0
2495 // ---------------------------------
2496 // [ . . . | . . . . | . . . . | . ]
2497 // [ func4 rd/rs1 rs2 op ]
2498 // ---------------------------------
2499 //
EmitCR(uint32_t funct4,XRegister rd_rs1,XRegister rs2,uint32_t opcode)2500 void EmitCR(uint32_t funct4, XRegister rd_rs1, XRegister rs2, uint32_t opcode) {
2501 DCHECK(IsUint<4>(funct4));
2502 DCHECK(IsUint<5>(static_cast<uint32_t>(rd_rs1)));
2503 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2504 DCHECK(IsUint<2>(opcode));
2505
2506 uint32_t encoding = funct4 << 12 | static_cast<uint32_t>(rd_rs1) << 7 |
2507 static_cast<uint32_t>(rs2) << 2 | opcode;
2508 Emit16(encoding);
2509 }
2510
2511 // CI-type instruction:
2512 //
2513 // 15 13 11 7 6 2 1 0
2514 // ---------------------------------
2515 // [ . . | | . . . . | . . . . | . ]
2516 // [func3 imm rd/rs1 imm op ]
2517 // ---------------------------------
2518 //
2519 template <typename Reg>
EmitCI(uint32_t funct3,Reg rd_rs1,uint32_t imm6,uint32_t opcode)2520 void EmitCI(uint32_t funct3, Reg rd_rs1, uint32_t imm6, uint32_t opcode) {
2521 DCHECK(IsUint<3>(funct3));
2522 DCHECK(IsUint<5>(static_cast<uint32_t>(rd_rs1)));
2523 DCHECK(IsUint<6>(imm6));
2524 DCHECK(IsUint<2>(opcode));
2525
2526 uint32_t immH1 = BitFieldExtract(imm6, 5, 1);
2527 uint32_t immL5 = BitFieldExtract(imm6, 0, 5);
2528
2529 uint32_t encoding =
2530 funct3 << 13 | immH1 << 12 | static_cast<uint32_t>(rd_rs1) << 7 | immL5 << 2 | opcode;
2531 Emit16(encoding);
2532 }
2533
2534 // CSS-type instruction:
2535 //
2536 // 15 13 12 7 6 2 1 0
2537 // ---------------------------------
2538 // [ . . | . . . . . | . . . . | . ]
2539 // [func3 imm6 rs2 op ]
2540 // ---------------------------------
2541 //
2542 template <typename Reg>
EmitCSS(uint32_t funct3,uint32_t offset6,Reg rs2,uint32_t opcode)2543 void EmitCSS(uint32_t funct3, uint32_t offset6, Reg rs2, uint32_t opcode) {
2544 DCHECK(IsUint<3>(funct3));
2545 DCHECK(IsUint<6>(offset6));
2546 DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2547 DCHECK(IsUint<2>(opcode));
2548
2549 uint32_t encoding = funct3 << 13 | offset6 << 7 | static_cast<uint32_t>(rs2) << 2 | opcode;
2550 Emit16(encoding);
2551 }
2552
2553 // CIW-type instruction:
2554 //
2555 // 15 13 12 5 4 2 1 0
2556 // ---------------------------------
2557 // [ . . | . . . . . . . | . . | . ]
2558 // [func3 imm8 rd' op ]
2559 // ---------------------------------
2560 //
EmitCIW(uint32_t funct3,uint32_t imm8,XRegister rd_s,uint32_t opcode)2561 void EmitCIW(uint32_t funct3, uint32_t imm8, XRegister rd_s, uint32_t opcode) {
2562 DCHECK(IsUint<3>(funct3));
2563 DCHECK(IsUint<8>(imm8));
2564 DCHECK(IsShortReg(rd_s)) << rd_s;
2565 DCHECK(IsUint<2>(opcode));
2566
2567 uint32_t encoding = funct3 << 13 | imm8 << 5 | EncodeShortReg(rd_s) << 2 | opcode;
2568 Emit16(encoding);
2569 }
2570
2571 // CL/S-type instruction:
2572 //
2573 // 15 13 12 10 9 7 6 5 4 2 1 0
2574 // ---------------------------------
2575 // [ . . | . . | . . | . | . . | . ]
2576 // [func3 imm rs1' imm rds2' op ]
2577 // ---------------------------------
2578 //
2579 template <typename Reg>
EmitCM(uint32_t funct3,uint32_t imm5,XRegister rs1_s,Reg rd_rs2_s,uint32_t opcode)2580 void EmitCM(uint32_t funct3, uint32_t imm5, XRegister rs1_s, Reg rd_rs2_s, uint32_t opcode) {
2581 DCHECK(IsUint<3>(funct3));
2582 DCHECK(IsUint<5>(imm5));
2583 DCHECK(IsShortReg(rs1_s)) << rs1_s;
2584 DCHECK(IsShortReg(rd_rs2_s)) << rd_rs2_s;
2585 DCHECK(IsUint<2>(opcode));
2586
2587 uint32_t immH3 = BitFieldExtract(imm5, 2, 3);
2588 uint32_t immL2 = BitFieldExtract(imm5, 0, 2);
2589
2590 uint32_t encoding = funct3 << 13 | immH3 << 10 | EncodeShortReg(rs1_s) << 7 | immL2 << 5 |
2591 EncodeShortReg(rd_rs2_s) << 2 | opcode;
2592 Emit16(encoding);
2593 }
2594
2595 // CA-type instruction:
2596 //
2597 // 15 10 9 7 6 5 4 2 1 0
2598 // ---------------------------------
2599 // [ . . . . . | . . | . | . . | . ]
2600 // [ funct6 rds1' funct2 rs2' op]
2601 // ---------------------------------
2602 //
EmitCA(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,uint32_t rs2_v,uint32_t opcode)2603 void EmitCA(
2604 uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, uint32_t rs2_v, uint32_t opcode) {
2605 DCHECK(IsUint<6>(funct6));
2606 DCHECK(IsShortReg(rd_rs1_s)) << rd_rs1_s;
2607 DCHECK(IsUint<2>(funct2));
2608 DCHECK(IsUint<3>(rs2_v));
2609 DCHECK(IsUint<2>(opcode));
2610
2611 uint32_t encoding =
2612 funct6 << 10 | EncodeShortReg(rd_rs1_s) << 7 | funct2 << 5 | rs2_v << 2 | opcode;
2613 Emit16(encoding);
2614 }
2615
EmitCAReg(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,XRegister rs2_s,uint32_t opcode)2616 void EmitCAReg(
2617 uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, XRegister rs2_s, uint32_t opcode) {
2618 DCHECK(IsShortReg(rs2_s)) << rs2_s;
2619 EmitCA(funct6, rd_rs1_s, funct2, EncodeShortReg(rs2_s), opcode);
2620 }
2621
EmitCAImm(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,uint32_t funct3,uint32_t opcode)2622 void EmitCAImm(
2623 uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, uint32_t funct3, uint32_t opcode) {
2624 EmitCA(funct6, rd_rs1_s, funct2, funct3, opcode);
2625 }
2626
2627 // CB-type instruction:
2628 //
2629 // 15 13 12 10 9 7 6 2 1 0
2630 // ---------------------------------
2631 // [ . . | . . | . . | . . . . | . ]
2632 // [func3 offset rs1' offset op ]
2633 // ---------------------------------
2634 //
EmitCB(uint32_t funct3,int32_t offset8,XRegister rd_rs1_s,uint32_t opcode)2635 void EmitCB(uint32_t funct3, int32_t offset8, XRegister rd_rs1_s, uint32_t opcode) {
2636 DCHECK(IsUint<3>(funct3));
2637 DCHECK(IsUint<8>(offset8));
2638 DCHECK(IsShortReg(rd_rs1_s)) << rd_rs1_s;
2639 DCHECK(IsUint<2>(opcode));
2640
2641 uint32_t offsetH3 = BitFieldExtract<uint32_t>(offset8, 5, 3);
2642 uint32_t offsetL5 = BitFieldExtract<uint32_t>(offset8, 0, 5);
2643
2644 uint32_t encoding =
2645 funct3 << 13 | offsetH3 << 10 | EncodeShortReg(rd_rs1_s) << 7 | offsetL5 << 2 | opcode;
2646 Emit16(encoding);
2647 }
2648
2649 // Wrappers for EmitCB with different imm bit permutation
EmitCBBranch(uint32_t funct3,int32_t offset,XRegister rs1_s,uint32_t opcode)2650 void EmitCBBranch(uint32_t funct3, int32_t offset, XRegister rs1_s, uint32_t opcode) {
2651 DCHECK(IsInt<9>(offset));
2652 DCHECK_ALIGNED(offset, 2);
2653
2654 uint32_t u_offset = static_cast<uint32_t>(offset);
2655
2656 // offset[8|4:3]
2657 uint32_t offsetH3 = (BitFieldExtract(u_offset, 8, 1) << 2) |
2658 BitFieldExtract(u_offset, 3, 2);
2659 // offset[7:6|2:1|5]
2660 uint32_t offsetL5 = (BitFieldExtract(u_offset, 6, 2) << 3) |
2661 (BitFieldExtract(u_offset, 1, 2) << 1) |
2662 BitFieldExtract(u_offset, 5, 1);
2663
2664 EmitCB(funct3, BitFieldInsert(offsetL5, offsetH3, 5, 3), rs1_s, opcode);
2665 }
2666
EmitCBArithmetic(uint32_t funct3,uint32_t funct2,uint32_t imm,XRegister rd_s,uint32_t opcode)2667 void EmitCBArithmetic(
2668 uint32_t funct3, uint32_t funct2, uint32_t imm, XRegister rd_s, uint32_t opcode) {
2669 uint32_t imm_5 = BitFieldExtract(imm, 5, 1);
2670 uint32_t immH3 = BitFieldInsert(funct2, imm_5, 2, 1);
2671 uint32_t immL5 = BitFieldExtract(imm, 0, 5);
2672
2673 EmitCB(funct3, BitFieldInsert(immL5, immH3, 5, 3), rd_s, opcode);
2674 }
2675
2676 // CJ-type instruction:
2677 //
2678 // 15 13 12 2 1 0
2679 // ---------------------------------
2680 // [ . . | . . . . . . . . . . | . ]
2681 // [func3 jump target 11 op ]
2682 // ---------------------------------
2683 //
EmitCJ(uint32_t funct3,int32_t offset,uint32_t opcode)2684 void EmitCJ(uint32_t funct3, int32_t offset, uint32_t opcode) {
2685 DCHECK_ALIGNED(offset, 2);
2686 DCHECK(IsInt<12>(offset)) << offset;
2687 DCHECK(IsUint<3>(funct3));
2688 DCHECK(IsUint<2>(opcode));
2689
2690 uint32_t uoffset = static_cast<uint32_t>(offset);
2691 // offset[11|4|9:8|10|6|7|3:1|5]
2692 uint32_t jumpt = (BitFieldExtract(uoffset, 11, 1) << 10) |
2693 (BitFieldExtract(uoffset, 4, 1) << 9) |
2694 (BitFieldExtract(uoffset, 8, 2) << 7) |
2695 (BitFieldExtract(uoffset, 10, 1) << 6) |
2696 (BitFieldExtract(uoffset, 6, 1) << 5) |
2697 (BitFieldExtract(uoffset, 7, 1) << 4) |
2698 (BitFieldExtract(uoffset, 1, 3) << 1) |
2699 BitFieldExtract(uoffset, 5, 1);
2700
2701 DCHECK(IsUint<11>(jumpt));
2702
2703 uint32_t encoding = funct3 << 13 | jumpt << 2 | opcode;
2704 Emit16(encoding);
2705 }
2706
2707 ArenaVector<Branch> branches_;
2708
2709 // For checking that we finalize the code only once.
2710 bool finalized_;
2711
2712 // Whether appending instructions at the end of the buffer or overwriting the existing ones.
2713 bool overwriting_;
2714 // The current overwrite location.
2715 uint32_t overwrite_location_;
2716
2717 // Use `std::deque<>` for literal labels to allow insertions at the end
2718 // without invalidating pointers and references to existing elements.
2719 ArenaDeque<Literal> literals_;
2720 ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons.
2721
2722 // Jump table list.
2723 ArenaDeque<JumpTable> jump_tables_;
2724
2725 // Data for `GetAdjustedPosition()`, see the description there.
2726 uint32_t last_position_adjustment_;
2727 uint32_t last_old_position_;
2728 uint32_t last_branch_id_;
2729
2730 Riscv64ExtensionMask enabled_extensions_;
2731 uint32_t available_scratch_core_registers_;
2732 uint32_t available_scratch_fp_registers_;
2733
2734 static constexpr uint32_t kXlen = 64;
2735
2736 friend class ScopedExtensionsOverride;
2737 friend class ScratchRegisterScope;
2738
2739 DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler);
2740 };
2741
2742 class ScopedExtensionsOverride {
2743 public:
ScopedExtensionsOverride(Riscv64Assembler * assembler,Riscv64ExtensionMask enabled_extensions)2744 ScopedExtensionsOverride(Riscv64Assembler* assembler, Riscv64ExtensionMask enabled_extensions)
2745 : assembler_(assembler),
2746 old_enabled_extensions_(assembler->enabled_extensions_) {
2747 assembler->enabled_extensions_ = enabled_extensions;
2748 }
2749
~ScopedExtensionsOverride()2750 ~ScopedExtensionsOverride() {
2751 assembler_->enabled_extensions_ = old_enabled_extensions_;
2752 }
2753
2754 protected:
GetEnabledExtensions(Riscv64Assembler * assembler)2755 static Riscv64ExtensionMask GetEnabledExtensions(Riscv64Assembler* assembler) {
2756 return assembler->enabled_extensions_;
2757 }
2758
2759 private:
2760 Riscv64Assembler* const assembler_;
2761 const Riscv64ExtensionMask old_enabled_extensions_;
2762 };
2763
2764 template <Riscv64ExtensionMask kMask>
2765 class ScopedExtensionsRestriction : public ScopedExtensionsOverride {
2766 public:
ScopedExtensionsRestriction(Riscv64Assembler * assembler)2767 explicit ScopedExtensionsRestriction(Riscv64Assembler* assembler)
2768 : ScopedExtensionsOverride(assembler, GetEnabledExtensions(assembler) & kMask) {}
2769 };
2770
2771 template <Riscv64ExtensionMask kMask>
2772 class ScopedExtensionsInclusion : public ScopedExtensionsOverride {
2773 public:
ScopedExtensionsInclusion(Riscv64Assembler * assembler)2774 explicit ScopedExtensionsInclusion(Riscv64Assembler* assembler)
2775 : ScopedExtensionsOverride(assembler, GetEnabledExtensions(assembler) | kMask) {}
2776 };
2777
2778 template <Riscv64ExtensionMask kMask>
2779 using ScopedExtensionsExclusion = ScopedExtensionsRestriction<~kMask>;
2780
2781 using ScopedLrScExtensionsRestriction =
2782 ScopedExtensionsRestriction<kRiscv64LrScSequenceExtensionsMask>;
2783
2784 class ScratchRegisterScope {
2785 public:
ScratchRegisterScope(Riscv64Assembler * assembler)2786 explicit ScratchRegisterScope(Riscv64Assembler* assembler)
2787 : assembler_(assembler),
2788 old_available_scratch_core_registers_(assembler->available_scratch_core_registers_),
2789 old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {}
2790
~ScratchRegisterScope()2791 ~ScratchRegisterScope() {
2792 assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_;
2793 assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_;
2794 }
2795
2796 // Alocate a scratch `XRegister`. There must be an available register to allocate.
AllocateXRegister()2797 XRegister AllocateXRegister() {
2798 CHECK_NE(assembler_->available_scratch_core_registers_, 0u);
2799 // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)).
2800 uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) -
2801 CLZ(assembler_->available_scratch_core_registers_);
2802 assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
2803 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2804 return enum_cast<XRegister>(reg_num);
2805 }
2806
2807 // Free a previously unavailable core register for use as a scratch register.
2808 // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`.
FreeXRegister(XRegister reg)2809 void FreeXRegister(XRegister reg) {
2810 uint32_t reg_num = enum_cast<uint32_t>(reg);
2811 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2812 CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u);
2813 assembler_->available_scratch_core_registers_ |= 1u << reg_num;
2814 }
2815
2816 // The number of available scratch core registers.
AvailableXRegisters()2817 size_t AvailableXRegisters() {
2818 return POPCOUNT(assembler_->available_scratch_core_registers_);
2819 }
2820
2821 // Make sure a core register is available for use as a scratch register.
IncludeXRegister(XRegister reg)2822 void IncludeXRegister(XRegister reg) {
2823 uint32_t reg_num = enum_cast<uint32_t>(reg);
2824 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2825 assembler_->available_scratch_core_registers_ |= 1u << reg_num;
2826 }
2827
2828 // Make sure a core register is not available for use as a scratch register.
ExcludeXRegister(XRegister reg)2829 void ExcludeXRegister(XRegister reg) {
2830 uint32_t reg_num = enum_cast<uint32_t>(reg);
2831 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2832 assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
2833 }
2834
2835 // Alocate a scratch `FRegister`. There must be an available register to allocate.
AllocateFRegister()2836 FRegister AllocateFRegister() {
2837 CHECK_NE(assembler_->available_scratch_fp_registers_, 0u);
2838 // Allocate the highest available scratch register (same as for core registers).
2839 uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) -
2840 CLZ(assembler_->available_scratch_fp_registers_);
2841 assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
2842 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2843 return enum_cast<FRegister>(reg_num);
2844 }
2845
2846 // Free a previously unavailable FP register for use as a scratch register.
2847 // This can be an arbitrary register, not necessarly the usual `FTMP`.
FreeFRegister(FRegister reg)2848 void FreeFRegister(FRegister reg) {
2849 uint32_t reg_num = enum_cast<uint32_t>(reg);
2850 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2851 CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u);
2852 assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
2853 }
2854
2855 // The number of available scratch FP registers.
AvailableFRegisters()2856 size_t AvailableFRegisters() {
2857 return POPCOUNT(assembler_->available_scratch_fp_registers_);
2858 }
2859
2860 // Make sure an FP register is available for use as a scratch register.
IncludeFRegister(FRegister reg)2861 void IncludeFRegister(FRegister reg) {
2862 uint32_t reg_num = enum_cast<uint32_t>(reg);
2863 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2864 assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
2865 }
2866
2867 // Make sure an FP register is not available for use as a scratch register.
ExcludeFRegister(FRegister reg)2868 void ExcludeFRegister(FRegister reg) {
2869 uint32_t reg_num = enum_cast<uint32_t>(reg);
2870 DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2871 assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
2872 }
2873
2874 private:
2875 Riscv64Assembler* const assembler_;
2876 const uint32_t old_available_scratch_core_registers_;
2877 const uint32_t old_available_scratch_fp_registers_;
2878
2879 DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope);
2880 };
2881
2882 constexpr Riscv64ExtensionMask kRiscv64CompressedExtensionsMask =
2883 Riscv64ExtensionBit(Riscv64Extension::kZca) |
2884 Riscv64ExtensionBit(Riscv64Extension::kZcd) |
2885 Riscv64ExtensionBit(Riscv64Extension::kZcb);
2886
2887 using ScopedNoCInstructions = ScopedExtensionsExclusion<kRiscv64CompressedExtensionsMask>;
2888 using ScopedUseCInstructions = ScopedExtensionsInclusion<kRiscv64CompressedExtensionsMask>;
2889
2890 } // namespace riscv64
2891 } // namespace art
2892
2893 #endif // ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
2894