1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/codegen/assembler-inl.h"
6 #include "src/codegen/callable.h"
7 #include "src/codegen/macro-assembler.h"
8 #include "src/codegen/optimized-compilation-info.h"
9 #include "src/codegen/riscv64/constants-riscv64.h"
10 #include "src/compiler/backend/code-generator-impl.h"
11 #include "src/compiler/backend/code-generator.h"
12 #include "src/compiler/backend/gap-resolver.h"
13 #include "src/compiler/node-matchers.h"
14 #include "src/compiler/osr.h"
15 #include "src/heap/memory-chunk.h"
16 #include "src/wasm/wasm-code-manager.h"
17
18 namespace v8 {
19 namespace internal {
20 namespace compiler {
21
22 #define __ tasm()->
23
24 // TODO(plind): consider renaming these macros.
25 #define TRACE_MSG(msg) \
26 PrintF("code_gen: \'%s\' in function %s at line %d\n", msg, __FUNCTION__, \
27 __LINE__)
28
29 #define TRACE_UNIMPL() \
30 PrintF("UNIMPLEMENTED code_generator_riscv64: %s at line %d\n", \
31 __FUNCTION__, __LINE__)
32
33 // Adds RISC-V-specific methods to convert InstructionOperands.
34 class RiscvOperandConverter final : public InstructionOperandConverter {
35 public:
RiscvOperandConverter(CodeGenerator * gen,Instruction * instr)36 RiscvOperandConverter(CodeGenerator* gen, Instruction* instr)
37 : InstructionOperandConverter(gen, instr) {}
38
OutputSingleRegister(size_t index=0)39 FloatRegister OutputSingleRegister(size_t index = 0) {
40 return ToSingleRegister(instr_->OutputAt(index));
41 }
42
InputSingleRegister(size_t index)43 FloatRegister InputSingleRegister(size_t index) {
44 return ToSingleRegister(instr_->InputAt(index));
45 }
46
ToSingleRegister(InstructionOperand * op)47 FloatRegister ToSingleRegister(InstructionOperand* op) {
48 // Single (Float) and Double register namespace is same on RISC-V,
49 // both are typedefs of FPURegister.
50 return ToDoubleRegister(op);
51 }
52
InputOrZeroRegister(size_t index)53 Register InputOrZeroRegister(size_t index) {
54 if (instr_->InputAt(index)->IsImmediate()) {
55 Constant constant = ToConstant(instr_->InputAt(index));
56 switch (constant.type()) {
57 case Constant::kInt32:
58 case Constant::kInt64:
59 DCHECK_EQ(0, InputInt32(index));
60 break;
61 case Constant::kFloat32:
62 DCHECK_EQ(0, bit_cast<int32_t>(InputFloat32(index)));
63 break;
64 case Constant::kFloat64:
65 DCHECK_EQ(0, bit_cast<int64_t>(InputDouble(index)));
66 break;
67 default:
68 UNREACHABLE();
69 }
70 return zero_reg;
71 }
72 return InputRegister(index);
73 }
74
InputOrZeroDoubleRegister(size_t index)75 DoubleRegister InputOrZeroDoubleRegister(size_t index) {
76 if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero;
77
78 return InputDoubleRegister(index);
79 }
80
InputOrZeroSingleRegister(size_t index)81 DoubleRegister InputOrZeroSingleRegister(size_t index) {
82 if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero;
83
84 return InputSingleRegister(index);
85 }
86
InputImmediate(size_t index)87 Operand InputImmediate(size_t index) {
88 Constant constant = ToConstant(instr_->InputAt(index));
89 switch (constant.type()) {
90 case Constant::kInt32:
91 return Operand(constant.ToInt32());
92 case Constant::kInt64:
93 return Operand(constant.ToInt64());
94 case Constant::kFloat32:
95 return Operand::EmbeddedNumber(constant.ToFloat32());
96 case Constant::kFloat64:
97 return Operand::EmbeddedNumber(constant.ToFloat64().value());
98 case Constant::kExternalReference:
99 case Constant::kCompressedHeapObject:
100 case Constant::kHeapObject:
101 // TODO(plind): Maybe we should handle ExtRef & HeapObj here?
102 // maybe not done on arm due to const pool ??
103 break;
104 case Constant::kDelayedStringConstant:
105 return Operand::EmbeddedStringConstant(
106 constant.ToDelayedStringConstant());
107 case Constant::kRpoNumber:
108 UNREACHABLE(); // TODO(titzer): RPO immediates
109 }
110 UNREACHABLE();
111 }
112
InputOperand(size_t index)113 Operand InputOperand(size_t index) {
114 InstructionOperand* op = instr_->InputAt(index);
115 if (op->IsRegister()) {
116 return Operand(ToRegister(op));
117 }
118 return InputImmediate(index);
119 }
120
MemoryOperand(size_t * first_index)121 MemOperand MemoryOperand(size_t* first_index) {
122 const size_t index = *first_index;
123 switch (AddressingModeField::decode(instr_->opcode())) {
124 case kMode_None:
125 break;
126 case kMode_MRI:
127 *first_index += 2;
128 return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
129 case kMode_Root:
130 return MemOperand(kRootRegister, InputInt32(index));
131 case kMode_MRR:
132 // TODO(plind): r6 address mode, to be implemented ...
133 UNREACHABLE();
134 }
135 UNREACHABLE();
136 }
137
MemoryOperand(size_t index=0)138 MemOperand MemoryOperand(size_t index = 0) { return MemoryOperand(&index); }
139
ToMemOperand(InstructionOperand * op) const140 MemOperand ToMemOperand(InstructionOperand* op) const {
141 DCHECK_NOT_NULL(op);
142 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
143 return SlotToMemOperand(AllocatedOperand::cast(op)->index());
144 }
145
SlotToMemOperand(int slot) const146 MemOperand SlotToMemOperand(int slot) const {
147 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
148 return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
149 }
150 };
151
HasRegisterInput(Instruction * instr,size_t index)152 static inline bool HasRegisterInput(Instruction* instr, size_t index) {
153 return instr->InputAt(index)->IsRegister();
154 }
155 namespace {
156
157 class OutOfLineRecordWrite final : public OutOfLineCode {
158 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Register index,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)159 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register index,
160 Register value, Register scratch0, Register scratch1,
161 RecordWriteMode mode, StubCallMode stub_mode)
162 : OutOfLineCode(gen),
163 object_(object),
164 index_(index),
165 value_(value),
166 scratch0_(scratch0),
167 scratch1_(scratch1),
168 mode_(mode),
169 stub_mode_(stub_mode),
170 must_save_lr_(!gen->frame_access_state()->has_frame()),
171 zone_(gen->zone()) {
172 DCHECK(!AreAliased(object, index, scratch0, scratch1));
173 DCHECK(!AreAliased(value, index, scratch0, scratch1));
174 }
175
Generate()176 void Generate() final {
177 if (COMPRESS_POINTERS_BOOL) {
178 __ DecompressTaggedPointer(value_, value_);
179 }
180 __ CheckPageFlag(value_, scratch0_,
181 MemoryChunk::kPointersToHereAreInterestingMask, eq,
182 exit());
183 __ Add64(scratch1_, object_, index_);
184 RememberedSetAction const remembered_set_action =
185 mode_ > RecordWriteMode::kValueIsMap ? RememberedSetAction::kEmit
186 : RememberedSetAction::kOmit;
187 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
188 ? SaveFPRegsMode::kSave
189 : SaveFPRegsMode::kIgnore;
190 if (must_save_lr_) {
191 // We need to save and restore ra if the frame was elided.
192 __ Push(ra);
193 }
194 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
195 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
196 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
197 // A direct call to a wasm runtime stub defined in this module.
198 // Just encode the stub index. This will be patched when the code
199 // is added to the native module and copied into wasm code space.
200 __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
201 remembered_set_action, save_fp_mode,
202 StubCallMode::kCallWasmRuntimeStub);
203 } else {
204 __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
205 remembered_set_action, save_fp_mode);
206 }
207 if (must_save_lr_) {
208 __ Pop(ra);
209 }
210 }
211
212 private:
213 Register const object_;
214 Register const index_;
215 Register const value_;
216 Register const scratch0_;
217 Register const scratch1_;
218 RecordWriteMode const mode_;
219 StubCallMode const stub_mode_;
220 bool must_save_lr_;
221 Zone* zone_;
222 };
223
FlagsConditionToConditionCmp(FlagsCondition condition)224 Condition FlagsConditionToConditionCmp(FlagsCondition condition) {
225 switch (condition) {
226 case kEqual:
227 return eq;
228 case kNotEqual:
229 return ne;
230 case kSignedLessThan:
231 return lt;
232 case kSignedGreaterThanOrEqual:
233 return ge;
234 case kSignedLessThanOrEqual:
235 return le;
236 case kSignedGreaterThan:
237 return gt;
238 case kUnsignedLessThan:
239 return Uless;
240 case kUnsignedGreaterThanOrEqual:
241 return Ugreater_equal;
242 case kUnsignedLessThanOrEqual:
243 return Uless_equal;
244 case kUnsignedGreaterThan:
245 return Ugreater;
246 case kUnorderedEqual:
247 case kUnorderedNotEqual:
248 break;
249 default:
250 break;
251 }
252 UNREACHABLE();
253 }
254
FlagsConditionToConditionTst(FlagsCondition condition)255 Condition FlagsConditionToConditionTst(FlagsCondition condition) {
256 switch (condition) {
257 case kNotEqual:
258 return ne;
259 case kEqual:
260 return eq;
261 default:
262 break;
263 }
264 UNREACHABLE();
265 }
266
FlagsConditionToConditionOvf(FlagsCondition condition)267 Condition FlagsConditionToConditionOvf(FlagsCondition condition) {
268 switch (condition) {
269 case kOverflow:
270 return ne;
271 case kNotOverflow:
272 return eq;
273 default:
274 break;
275 }
276 UNREACHABLE();
277 }
278
FlagsConditionToConditionCmpFPU(bool * predicate,FlagsCondition condition)279 FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate,
280 FlagsCondition condition) {
281 switch (condition) {
282 case kEqual:
283 *predicate = true;
284 return EQ;
285 case kNotEqual:
286 *predicate = false;
287 return EQ;
288 case kUnsignedLessThan:
289 *predicate = true;
290 return LT;
291 case kUnsignedGreaterThanOrEqual:
292 *predicate = false;
293 return LT;
294 case kUnsignedLessThanOrEqual:
295 *predicate = true;
296 return LE;
297 case kUnsignedGreaterThan:
298 *predicate = false;
299 return LE;
300 case kUnorderedEqual:
301 case kUnorderedNotEqual:
302 *predicate = true;
303 break;
304 default:
305 *predicate = true;
306 break;
307 }
308 UNREACHABLE();
309 }
310
311 } // namespace
312
313 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \
314 do { \
315 __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
316 __ sync(); \
317 } while (0)
318
319 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr) \
320 do { \
321 __ sync(); \
322 __ asm_instr(i.InputOrZeroRegister(2), i.MemoryOperand()); \
323 __ sync(); \
324 } while (0)
325
326 #define ASSEMBLE_ATOMIC_BINOP(load_linked, store_conditional, bin_instr) \
327 do { \
328 Label binop; \
329 __ Add64(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
330 __ sync(); \
331 __ bind(&binop); \
332 __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0)); \
333 __ bin_instr(i.TempRegister(1), i.OutputRegister(0), \
334 Operand(i.InputRegister(2))); \
335 __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
336 __ BranchShort(&binop, ne, i.TempRegister(1), Operand(zero_reg)); \
337 __ sync(); \
338 } while (0)
339
340 #define ASSEMBLE_ATOMIC_BINOP_EXT(load_linked, store_conditional, sign_extend, \
341 size, bin_instr, representation) \
342 do { \
343 Label binop; \
344 __ Add64(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
345 if (representation == 32) { \
346 __ And(i.TempRegister(3), i.TempRegister(0), 0x3); \
347 } else { \
348 DCHECK_EQ(representation, 64); \
349 __ And(i.TempRegister(3), i.TempRegister(0), 0x7); \
350 } \
351 __ Sub64(i.TempRegister(0), i.TempRegister(0), \
352 Operand(i.TempRegister(3))); \
353 __ Sll32(i.TempRegister(3), i.TempRegister(3), 3); \
354 __ sync(); \
355 __ bind(&binop); \
356 __ load_linked(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
357 __ ExtractBits(i.OutputRegister(0), i.TempRegister(1), i.TempRegister(3), \
358 size, sign_extend); \
359 __ bin_instr(i.TempRegister(2), i.OutputRegister(0), \
360 Operand(i.InputRegister(2))); \
361 __ InsertBits(i.TempRegister(1), i.TempRegister(2), i.TempRegister(3), \
362 size); \
363 __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
364 __ BranchShort(&binop, ne, i.TempRegister(1), Operand(zero_reg)); \
365 __ sync(); \
366 } while (0)
367
368 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_linked, store_conditional) \
369 do { \
370 Label exchange; \
371 __ sync(); \
372 __ bind(&exchange); \
373 __ Add64(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
374 __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0)); \
375 __ Move(i.TempRegister(1), i.InputRegister(2)); \
376 __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
377 __ BranchShort(&exchange, ne, i.TempRegister(1), Operand(zero_reg)); \
378 __ sync(); \
379 } while (0)
380
381 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT( \
382 load_linked, store_conditional, sign_extend, size, representation) \
383 do { \
384 Label exchange; \
385 __ Add64(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
386 if (representation == 32) { \
387 __ And(i.TempRegister(1), i.TempRegister(0), 0x3); \
388 } else { \
389 DCHECK_EQ(representation, 64); \
390 __ And(i.TempRegister(1), i.TempRegister(0), 0x7); \
391 } \
392 __ Sub64(i.TempRegister(0), i.TempRegister(0), \
393 Operand(i.TempRegister(1))); \
394 __ Sll32(i.TempRegister(1), i.TempRegister(1), 3); \
395 __ sync(); \
396 __ bind(&exchange); \
397 __ load_linked(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
398 __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1), \
399 size, sign_extend); \
400 __ InsertBits(i.TempRegister(2), i.InputRegister(2), i.TempRegister(1), \
401 size); \
402 __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
403 __ BranchShort(&exchange, ne, i.TempRegister(2), Operand(zero_reg)); \
404 __ sync(); \
405 } while (0)
406
407 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_linked, \
408 store_conditional) \
409 do { \
410 Label compareExchange; \
411 Label exit; \
412 __ Add64(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
413 __ sync(); \
414 __ bind(&compareExchange); \
415 __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0)); \
416 __ BranchShort(&exit, ne, i.InputRegister(2), \
417 Operand(i.OutputRegister(0))); \
418 __ Move(i.TempRegister(2), i.InputRegister(3)); \
419 __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
420 __ BranchShort(&compareExchange, ne, i.TempRegister(2), \
421 Operand(zero_reg)); \
422 __ bind(&exit); \
423 __ sync(); \
424 } while (0)
425
426 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT( \
427 load_linked, store_conditional, sign_extend, size, representation) \
428 do { \
429 Label compareExchange; \
430 Label exit; \
431 __ Add64(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
432 if (representation == 32) { \
433 __ And(i.TempRegister(1), i.TempRegister(0), 0x3); \
434 } else { \
435 DCHECK_EQ(representation, 64); \
436 __ And(i.TempRegister(1), i.TempRegister(0), 0x7); \
437 } \
438 __ Sub64(i.TempRegister(0), i.TempRegister(0), \
439 Operand(i.TempRegister(1))); \
440 __ Sll32(i.TempRegister(1), i.TempRegister(1), 3); \
441 __ sync(); \
442 __ bind(&compareExchange); \
443 __ load_linked(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
444 __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1), \
445 size, sign_extend); \
446 __ ExtractBits(i.InputRegister(2), i.InputRegister(2), 0, size, \
447 sign_extend); \
448 __ BranchShort(&exit, ne, i.InputRegister(2), \
449 Operand(i.OutputRegister(0))); \
450 __ InsertBits(i.TempRegister(2), i.InputRegister(3), i.TempRegister(1), \
451 size); \
452 __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
453 __ BranchShort(&compareExchange, ne, i.TempRegister(2), \
454 Operand(zero_reg)); \
455 __ bind(&exit); \
456 __ sync(); \
457 } while (0)
458
459 #define ASSEMBLE_IEEE754_BINOP(name) \
460 do { \
461 FrameScope scope(tasm(), StackFrame::MANUAL); \
462 __ PrepareCallCFunction(0, 2, kScratchReg); \
463 __ MovToFloatParameters(i.InputDoubleRegister(0), \
464 i.InputDoubleRegister(1)); \
465 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
466 /* Move the result in the double result register. */ \
467 __ MovFromFloatResult(i.OutputDoubleRegister()); \
468 } while (0)
469
470 #define ASSEMBLE_IEEE754_UNOP(name) \
471 do { \
472 FrameScope scope(tasm(), StackFrame::MANUAL); \
473 __ PrepareCallCFunction(0, 1, kScratchReg); \
474 __ MovToFloatParameter(i.InputDoubleRegister(0)); \
475 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
476 /* Move the result in the double result register. */ \
477 __ MovFromFloatResult(i.OutputDoubleRegister()); \
478 } while (0)
479
480 #define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op) \
481 do { \
482 __ op(i.OutputSimd128Register(), i.InputSimd128Register(0), \
483 i.InputSimd128Register(1)); \
484 } while (0)
485
486 #define ASSEMBLE_RVV_BINOP_INTEGER(instr, OP) \
487 case kRiscvI8x16##instr: { \
488 __ VU.set(kScratchReg, E8, m1); \
489 __ OP(i.OutputSimd128Register(), i.InputSimd128Register(0), \
490 i.InputSimd128Register(1)); \
491 break; \
492 } \
493 case kRiscvI16x8##instr: { \
494 __ VU.set(kScratchReg, E16, m1); \
495 __ OP(i.OutputSimd128Register(), i.InputSimd128Register(0), \
496 i.InputSimd128Register(1)); \
497 break; \
498 } \
499 case kRiscvI32x4##instr: { \
500 __ VU.set(kScratchReg, E32, m1); \
501 __ OP(i.OutputSimd128Register(), i.InputSimd128Register(0), \
502 i.InputSimd128Register(1)); \
503 break; \
504 }
505
506 #define ASSEMBLE_RVV_UNOP_INTEGER_VR(instr, OP) \
507 case kRiscvI8x16##instr: { \
508 __ VU.set(kScratchReg, E8, m1); \
509 __ OP(i.OutputSimd128Register(), i.InputRegister(0)); \
510 break; \
511 } \
512 case kRiscvI16x8##instr: { \
513 __ VU.set(kScratchReg, E16, m1); \
514 __ OP(i.OutputSimd128Register(), i.InputRegister(0)); \
515 break; \
516 } \
517 case kRiscvI32x4##instr: { \
518 __ VU.set(kScratchReg, E32, m1); \
519 __ OP(i.OutputSimd128Register(), i.InputRegister(0)); \
520 break; \
521 } \
522 case kRiscvI64x2##instr: { \
523 __ VU.set(kScratchReg, E64, m1); \
524 __ OP(i.OutputSimd128Register(), i.InputRegister(0)); \
525 break; \
526 }
527
528 #define ASSEMBLE_RVV_UNOP_INTEGER_VV(instr, OP) \
529 case kRiscvI8x16##instr: { \
530 __ VU.set(kScratchReg, E8, m1); \
531 __ OP(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
532 break; \
533 } \
534 case kRiscvI16x8##instr: { \
535 __ VU.set(kScratchReg, E16, m1); \
536 __ OP(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
537 break; \
538 } \
539 case kRiscvI32x4##instr: { \
540 __ VU.set(kScratchReg, E32, m1); \
541 __ OP(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
542 break; \
543 } \
544 case kRiscvI64x2##instr: { \
545 __ VU.set(kScratchReg, E64, m1); \
546 __ OP(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
547 break; \
548 }
549
AssembleDeconstructFrame()550 void CodeGenerator::AssembleDeconstructFrame() {
551 __ Move(sp, fp);
552 __ Pop(ra, fp);
553 }
554
AssemblePrepareTailCall()555 void CodeGenerator::AssemblePrepareTailCall() {
556 if (frame_access_state()->has_frame()) {
557 __ Ld(ra, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
558 __ Ld(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
559 }
560 frame_access_state()->SetFrameAccessToSP();
561 }
562
AssembleArchSelect(Instruction * instr,FlagsCondition condition)563 void CodeGenerator::AssembleArchSelect(Instruction* instr,
564 FlagsCondition condition) {
565 UNIMPLEMENTED();
566 }
567
568 namespace {
569
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)570 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
571 FrameAccessState* state,
572 int new_slot_above_sp,
573 bool allow_shrinkage = true) {
574 int current_sp_offset = state->GetSPToFPSlotCount() +
575 StandardFrameConstants::kFixedSlotCountAboveFp;
576 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
577 if (stack_slot_delta > 0) {
578 tasm->Sub64(sp, sp, stack_slot_delta * kSystemPointerSize);
579 state->IncreaseSPDelta(stack_slot_delta);
580 } else if (allow_shrinkage && stack_slot_delta < 0) {
581 tasm->Add64(sp, sp, -stack_slot_delta * kSystemPointerSize);
582 state->IncreaseSPDelta(stack_slot_delta);
583 }
584 }
585
586 } // namespace
587
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_slot_offset)588 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
589 int first_unused_slot_offset) {
590 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
591 first_unused_slot_offset, false);
592 }
593
AssembleTailCallAfterGap(Instruction * instr,int first_unused_slot_offset)594 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
595 int first_unused_slot_offset) {
596 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
597 first_unused_slot_offset);
598 }
599
600 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()601 void CodeGenerator::AssembleCodeStartRegisterCheck() {
602 __ ComputeCodeStartAddress(kScratchReg);
603 __ Assert(eq, AbortReason::kWrongFunctionCodeStart,
604 kJavaScriptCallCodeStartRegister, Operand(kScratchReg));
605 }
606
607 // Check if the code object is marked for deoptimization. If it is, then it
608 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
609 // to:
610 // 1. read from memory the word that contains that bit, which can be found in
611 // the flags in the referenced {CodeDataContainer} object;
612 // 2. test kMarkedForDeoptimizationBit in those flags; and
613 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()614 void CodeGenerator::BailoutIfDeoptimized() {
615 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
616 __ LoadTaggedPointerField(
617 kScratchReg, MemOperand(kJavaScriptCallCodeStartRegister, offset));
618 __ Lw(kScratchReg,
619 FieldMemOperand(kScratchReg,
620 CodeDataContainer::kKindSpecificFlagsOffset));
621 __ And(kScratchReg, kScratchReg,
622 Operand(1 << Code::kMarkedForDeoptimizationBit));
623 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
624 RelocInfo::CODE_TARGET, ne, kScratchReg, Operand(zero_reg));
625 }
626
627 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)628 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
629 Instruction* instr) {
630 RiscvOperandConverter i(this, instr);
631 InstructionCode opcode = instr->opcode();
632 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
633 switch (arch_opcode) {
634 case kArchCallCodeObject: {
635 if (instr->InputAt(0)->IsImmediate()) {
636 __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
637 } else {
638 Register reg = i.InputRegister(0);
639 DCHECK_IMPLIES(
640 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
641 reg == kJavaScriptCallCodeStartRegister);
642 __ CallCodeObject(reg);
643 }
644 RecordCallPosition(instr);
645 frame_access_state()->ClearSPDelta();
646 break;
647 }
648 case kArchCallBuiltinPointer: {
649 DCHECK(!instr->InputAt(0)->IsImmediate());
650 Register builtin_index = i.InputRegister(0);
651 __ CallBuiltinByIndex(builtin_index);
652 RecordCallPosition(instr);
653 frame_access_state()->ClearSPDelta();
654 break;
655 }
656 case kArchCallWasmFunction: {
657 if (instr->InputAt(0)->IsImmediate()) {
658 Constant constant = i.ToConstant(instr->InputAt(0));
659 Address wasm_code = static_cast<Address>(constant.ToInt64());
660 __ Call(wasm_code, constant.rmode());
661 } else {
662 __ Add64(t6, i.InputOrZeroRegister(0), 0);
663 __ Call(t6);
664 }
665 RecordCallPosition(instr);
666 frame_access_state()->ClearSPDelta();
667 break;
668 }
669 case kArchTailCallCodeObject: {
670 if (instr->InputAt(0)->IsImmediate()) {
671 __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
672 } else {
673 Register reg = i.InputOrZeroRegister(0);
674 DCHECK_IMPLIES(
675 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
676 reg == kJavaScriptCallCodeStartRegister);
677 __ JumpCodeObject(reg);
678 }
679 frame_access_state()->ClearSPDelta();
680 frame_access_state()->SetFrameAccessToDefault();
681 break;
682 }
683 case kArchTailCallWasm: {
684 if (instr->InputAt(0)->IsImmediate()) {
685 Constant constant = i.ToConstant(instr->InputAt(0));
686 Address wasm_code = static_cast<Address>(constant.ToInt64());
687 __ Jump(wasm_code, constant.rmode());
688 } else {
689 __ Add64(kScratchReg, i.InputOrZeroRegister(0), 0);
690 __ Jump(kScratchReg);
691 }
692 frame_access_state()->ClearSPDelta();
693 frame_access_state()->SetFrameAccessToDefault();
694 break;
695 }
696 case kArchTailCallAddress: {
697 CHECK(!instr->InputAt(0)->IsImmediate());
698 Register reg = i.InputOrZeroRegister(0);
699 DCHECK_IMPLIES(
700 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
701 reg == kJavaScriptCallCodeStartRegister);
702 __ Jump(reg);
703 frame_access_state()->ClearSPDelta();
704 frame_access_state()->SetFrameAccessToDefault();
705 break;
706 }
707 case kArchCallJSFunction: {
708 Register func = i.InputOrZeroRegister(0);
709 if (FLAG_debug_code) {
710 // Check the function's context matches the context argument.
711 __ LoadTaggedPointerField(
712 kScratchReg, FieldMemOperand(func, JSFunction::kContextOffset));
713 __ Assert(eq, AbortReason::kWrongFunctionContext, cp,
714 Operand(kScratchReg));
715 }
716 static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
717 __ LoadTaggedPointerField(a2,
718 FieldMemOperand(func, JSFunction::kCodeOffset));
719 __ CallCodeObject(a2);
720 RecordCallPosition(instr);
721 frame_access_state()->ClearSPDelta();
722 break;
723 }
724 case kArchPrepareCallCFunction: {
725 int const num_parameters = MiscField::decode(instr->opcode());
726 __ PrepareCallCFunction(num_parameters, kScratchReg);
727 // Frame alignment requires using FP-relative frame addressing.
728 frame_access_state()->SetFrameAccessToFP();
729 break;
730 }
731 case kArchSaveCallerRegisters: {
732 fp_mode_ =
733 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
734 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
735 fp_mode_ == SaveFPRegsMode::kSave);
736 // kReturnRegister0 should have been saved before entering the stub.
737 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
738 DCHECK(IsAligned(bytes, kSystemPointerSize));
739 DCHECK_EQ(0, frame_access_state()->sp_delta());
740 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
741 DCHECK(!caller_registers_saved_);
742 caller_registers_saved_ = true;
743 break;
744 }
745 case kArchRestoreCallerRegisters: {
746 DCHECK(fp_mode_ ==
747 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
748 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
749 fp_mode_ == SaveFPRegsMode::kSave);
750 // Don't overwrite the returned value.
751 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
752 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
753 DCHECK_EQ(0, frame_access_state()->sp_delta());
754 DCHECK(caller_registers_saved_);
755 caller_registers_saved_ = false;
756 break;
757 }
758 case kArchPrepareTailCall:
759 AssemblePrepareTailCall();
760 break;
761 case kArchCallCFunction: {
762 int const num_gp_parameters = ParamField::decode(instr->opcode());
763 int const num_fp_parameters = FPParamField::decode(instr->opcode());
764 Label after_call;
765 bool isWasmCapiFunction =
766 linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
767 if (isWasmCapiFunction) {
768 // Put the return address in a stack slot.
769 __ LoadAddress(kScratchReg, &after_call, RelocInfo::EXTERNAL_REFERENCE);
770 __ Sd(kScratchReg,
771 MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
772 }
773 if (instr->InputAt(0)->IsImmediate()) {
774 ExternalReference ref = i.InputExternalReference(0);
775 __ CallCFunction(ref, num_gp_parameters, num_fp_parameters);
776 } else {
777 Register func = i.InputOrZeroRegister(0);
778 __ CallCFunction(func, num_gp_parameters, num_fp_parameters);
779 }
780 __ bind(&after_call);
781 if (isWasmCapiFunction) {
782 RecordSafepoint(instr->reference_map());
783 }
784
785 frame_access_state()->SetFrameAccessToDefault();
786 // Ideally, we should decrement SP delta to match the change of stack
787 // pointer in CallCFunction. However, for certain architectures (e.g.
788 // ARM), there may be more strict alignment requirement, causing old SP
789 // to be saved on the stack. In those cases, we can not calculate the SP
790 // delta statically.
791 frame_access_state()->ClearSPDelta();
792 if (caller_registers_saved_) {
793 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
794 // Here, we assume the sequence to be:
795 // kArchSaveCallerRegisters;
796 // kArchCallCFunction;
797 // kArchRestoreCallerRegisters;
798 int bytes =
799 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
800 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
801 }
802 break;
803 }
804 case kArchJmp:
805 AssembleArchJump(i.InputRpo(0));
806 break;
807 case kArchBinarySearchSwitch:
808 AssembleArchBinarySearchSwitch(instr);
809 break;
810 case kArchTableSwitch:
811 AssembleArchTableSwitch(instr);
812 break;
813 case kArchAbortCSADcheck:
814 DCHECK(i.InputRegister(0) == a0);
815 {
816 // We don't actually want to generate a pile of code for this, so just
817 // claim there is a stack frame, without generating one.
818 FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
819 __ Call(isolate()->builtins()->code_handle(Builtin::kAbortCSADcheck),
820 RelocInfo::CODE_TARGET);
821 }
822 __ stop();
823 break;
824 case kArchDebugBreak:
825 __ DebugBreak();
826 break;
827 case kArchComment:
828 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
829 break;
830 case kArchNop:
831 case kArchThrowTerminator:
832 // don't emit code for nops.
833 break;
834 case kArchDeoptimize: {
835 DeoptimizationExit* exit =
836 BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
837 __ Branch(exit->label());
838 break;
839 }
840 case kArchRet:
841 AssembleReturn(instr->InputAt(0));
842 break;
843 case kArchStackPointerGreaterThan:
844 // Pseudo-instruction used for cmp/branch. No opcode emitted here.
845 break;
846 case kArchStackCheckOffset:
847 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
848 break;
849 case kArchFramePointer:
850 __ Move(i.OutputRegister(), fp);
851 break;
852 case kArchParentFramePointer:
853 if (frame_access_state()->has_frame()) {
854 __ Ld(i.OutputRegister(), MemOperand(fp, 0));
855 } else {
856 __ Move(i.OutputRegister(), fp);
857 }
858 break;
859 case kArchTruncateDoubleToI:
860 __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
861 i.InputDoubleRegister(0), DetermineStubCallMode());
862 break;
863 case kArchStoreWithWriteBarrier: {
864 RecordWriteMode mode =
865 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
866 Register object = i.InputRegister(0);
867 Register index = i.InputRegister(1);
868 Register value = i.InputRegister(2);
869 Register scratch0 = i.TempRegister(0);
870 Register scratch1 = i.TempRegister(1);
871 auto ool = zone()->New<OutOfLineRecordWrite>(this, object, index, value,
872 scratch0, scratch1, mode,
873 DetermineStubCallMode());
874 __ Add64(kScratchReg, object, index);
875 __ StoreTaggedField(value, MemOperand(kScratchReg));
876 if (mode > RecordWriteMode::kValueIsPointer) {
877 __ JumpIfSmi(value, ool->exit());
878 }
879 __ CheckPageFlag(object, scratch0,
880 MemoryChunk::kPointersFromHereAreInterestingMask, ne,
881 ool->entry());
882 __ bind(ool->exit());
883 break;
884 }
885 case kArchStackSlot: {
886 FrameOffset offset =
887 frame_access_state()->GetFrameOffset(i.InputInt32(0));
888 Register base_reg = offset.from_stack_pointer() ? sp : fp;
889 __ Add64(i.OutputRegister(), base_reg, Operand(offset.offset()));
890 int alignment = i.InputInt32(1);
891 DCHECK(alignment == 0 || alignment == 4 || alignment == 8 ||
892 alignment == 16);
893 if (FLAG_debug_code && alignment > 0) {
894 // Verify that the output_register is properly aligned
895 __ And(kScratchReg, i.OutputRegister(),
896 Operand(kSystemPointerSize - 1));
897 __ Assert(eq, AbortReason::kAllocationIsNotDoubleAligned, kScratchReg,
898 Operand(zero_reg));
899 }
900 if (alignment == 2 * kSystemPointerSize) {
901 Label done;
902 __ Add64(kScratchReg, base_reg, Operand(offset.offset()));
903 __ And(kScratchReg, kScratchReg, Operand(alignment - 1));
904 __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg));
905 __ Add64(i.OutputRegister(), i.OutputRegister(), kSystemPointerSize);
906 __ bind(&done);
907 } else if (alignment > 2 * kSystemPointerSize) {
908 Label done;
909 __ Add64(kScratchReg, base_reg, Operand(offset.offset()));
910 __ And(kScratchReg, kScratchReg, Operand(alignment - 1));
911 __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg));
912 __ li(kScratchReg2, alignment);
913 __ Sub64(kScratchReg2, kScratchReg2, Operand(kScratchReg));
914 __ Add64(i.OutputRegister(), i.OutputRegister(), kScratchReg2);
915 __ bind(&done);
916 }
917
918 break;
919 }
920 case kIeee754Float64Acos:
921 ASSEMBLE_IEEE754_UNOP(acos);
922 break;
923 case kIeee754Float64Acosh:
924 ASSEMBLE_IEEE754_UNOP(acosh);
925 break;
926 case kIeee754Float64Asin:
927 ASSEMBLE_IEEE754_UNOP(asin);
928 break;
929 case kIeee754Float64Asinh:
930 ASSEMBLE_IEEE754_UNOP(asinh);
931 break;
932 case kIeee754Float64Atan:
933 ASSEMBLE_IEEE754_UNOP(atan);
934 break;
935 case kIeee754Float64Atanh:
936 ASSEMBLE_IEEE754_UNOP(atanh);
937 break;
938 case kIeee754Float64Atan2:
939 ASSEMBLE_IEEE754_BINOP(atan2);
940 break;
941 case kIeee754Float64Cos:
942 ASSEMBLE_IEEE754_UNOP(cos);
943 break;
944 case kIeee754Float64Cosh:
945 ASSEMBLE_IEEE754_UNOP(cosh);
946 break;
947 case kIeee754Float64Cbrt:
948 ASSEMBLE_IEEE754_UNOP(cbrt);
949 break;
950 case kIeee754Float64Exp:
951 ASSEMBLE_IEEE754_UNOP(exp);
952 break;
953 case kIeee754Float64Expm1:
954 ASSEMBLE_IEEE754_UNOP(expm1);
955 break;
956 case kIeee754Float64Log:
957 ASSEMBLE_IEEE754_UNOP(log);
958 break;
959 case kIeee754Float64Log1p:
960 ASSEMBLE_IEEE754_UNOP(log1p);
961 break;
962 case kIeee754Float64Log2:
963 ASSEMBLE_IEEE754_UNOP(log2);
964 break;
965 case kIeee754Float64Log10:
966 ASSEMBLE_IEEE754_UNOP(log10);
967 break;
968 case kIeee754Float64Pow:
969 ASSEMBLE_IEEE754_BINOP(pow);
970 break;
971 case kIeee754Float64Sin:
972 ASSEMBLE_IEEE754_UNOP(sin);
973 break;
974 case kIeee754Float64Sinh:
975 ASSEMBLE_IEEE754_UNOP(sinh);
976 break;
977 case kIeee754Float64Tan:
978 ASSEMBLE_IEEE754_UNOP(tan);
979 break;
980 case kIeee754Float64Tanh:
981 ASSEMBLE_IEEE754_UNOP(tanh);
982 break;
983 case kRiscvAdd32:
984 __ Add32(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
985 break;
986 case kRiscvAdd64:
987 __ Add64(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
988 break;
989 case kRiscvAddOvf64:
990 __ AddOverflow64(i.OutputRegister(), i.InputOrZeroRegister(0),
991 i.InputOperand(1), kScratchReg);
992 break;
993 case kRiscvSub32:
994 __ Sub32(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
995 break;
996 case kRiscvSub64:
997 __ Sub64(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
998 break;
999 case kRiscvSubOvf64:
1000 __ SubOverflow64(i.OutputRegister(), i.InputOrZeroRegister(0),
1001 i.InputOperand(1), kScratchReg);
1002 break;
1003 case kRiscvMul32:
1004 __ Mul32(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1005 break;
1006 case kRiscvMulOvf32:
1007 __ MulOverflow32(i.OutputRegister(), i.InputOrZeroRegister(0),
1008 i.InputOperand(1), kScratchReg);
1009 break;
1010 case kRiscvMulHigh32:
1011 __ Mulh32(i.OutputRegister(), i.InputOrZeroRegister(0),
1012 i.InputOperand(1));
1013 break;
1014 case kRiscvMulHighU32:
1015 __ Mulhu32(i.OutputRegister(), i.InputOrZeroRegister(0),
1016 i.InputOperand(1), kScratchReg, kScratchReg2);
1017 break;
1018 case kRiscvMulHigh64:
1019 __ Mulh64(i.OutputRegister(), i.InputOrZeroRegister(0),
1020 i.InputOperand(1));
1021 break;
1022 case kRiscvDiv32: {
1023 __ Div32(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1024 // Set ouput to zero if divisor == 0
1025 __ LoadZeroIfConditionZero(i.OutputRegister(), i.InputRegister(1));
1026 break;
1027 }
1028 case kRiscvDivU32: {
1029 __ Divu32(i.OutputRegister(), i.InputOrZeroRegister(0),
1030 i.InputOperand(1));
1031 // Set ouput to zero if divisor == 0
1032 __ LoadZeroIfConditionZero(i.OutputRegister(), i.InputRegister(1));
1033 break;
1034 }
1035 case kRiscvMod32:
1036 __ Mod32(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1037 break;
1038 case kRiscvModU32:
1039 __ Modu32(i.OutputRegister(), i.InputOrZeroRegister(0),
1040 i.InputOperand(1));
1041 break;
1042 case kRiscvMul64:
1043 __ Mul64(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1044 break;
1045 case kRiscvDiv64: {
1046 __ Div64(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1047 // Set ouput to zero if divisor == 0
1048 __ LoadZeroIfConditionZero(i.OutputRegister(), i.InputRegister(1));
1049 break;
1050 }
1051 case kRiscvDivU64: {
1052 __ Divu64(i.OutputRegister(), i.InputOrZeroRegister(0),
1053 i.InputOperand(1));
1054 // Set ouput to zero if divisor == 0
1055 __ LoadZeroIfConditionZero(i.OutputRegister(), i.InputRegister(1));
1056 break;
1057 }
1058 case kRiscvMod64:
1059 __ Mod64(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1060 break;
1061 case kRiscvModU64:
1062 __ Modu64(i.OutputRegister(), i.InputOrZeroRegister(0),
1063 i.InputOperand(1));
1064 break;
1065 case kRiscvAnd:
1066 __ And(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1067 break;
1068 case kRiscvAnd32:
1069 __ And(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1070 __ Sll32(i.OutputRegister(), i.OutputRegister(), 0x0);
1071 break;
1072 case kRiscvOr:
1073 __ Or(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1074 break;
1075 case kRiscvOr32:
1076 __ Or(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1077 __ Sll32(i.OutputRegister(), i.OutputRegister(), 0x0);
1078 break;
1079 case kRiscvNor:
1080 if (instr->InputAt(1)->IsRegister()) {
1081 __ Nor(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1082 } else {
1083 DCHECK_EQ(0, i.InputOperand(1).immediate());
1084 __ Nor(i.OutputRegister(), i.InputOrZeroRegister(0), zero_reg);
1085 }
1086 break;
1087 case kRiscvNor32:
1088 if (instr->InputAt(1)->IsRegister()) {
1089 __ Nor(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1090 __ Sll32(i.OutputRegister(), i.OutputRegister(), 0x0);
1091 } else {
1092 DCHECK_EQ(0, i.InputOperand(1).immediate());
1093 __ Nor(i.OutputRegister(), i.InputOrZeroRegister(0), zero_reg);
1094 __ Sll32(i.OutputRegister(), i.OutputRegister(), 0x0);
1095 }
1096 break;
1097 case kRiscvXor:
1098 __ Xor(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1099 break;
1100 case kRiscvXor32:
1101 __ Xor(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
1102 __ Sll32(i.OutputRegister(), i.OutputRegister(), 0x0);
1103 break;
1104 case kRiscvClz32:
1105 __ Clz32(i.OutputRegister(), i.InputOrZeroRegister(0));
1106 break;
1107 case kRiscvClz64:
1108 __ Clz64(i.OutputRegister(), i.InputOrZeroRegister(0));
1109 break;
1110 case kRiscvCtz32: {
1111 Register src = i.InputRegister(0);
1112 Register dst = i.OutputRegister();
1113 __ Ctz32(dst, src);
1114 } break;
1115 case kRiscvCtz64: {
1116 Register src = i.InputRegister(0);
1117 Register dst = i.OutputRegister();
1118 __ Ctz64(dst, src);
1119 } break;
1120 case kRiscvPopcnt32: {
1121 Register src = i.InputRegister(0);
1122 Register dst = i.OutputRegister();
1123 __ Popcnt32(dst, src, kScratchReg);
1124 } break;
1125 case kRiscvPopcnt64: {
1126 Register src = i.InputRegister(0);
1127 Register dst = i.OutputRegister();
1128 __ Popcnt64(dst, src, kScratchReg);
1129 } break;
1130 case kRiscvShl32:
1131 if (instr->InputAt(1)->IsRegister()) {
1132 __ Sll32(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1133 } else {
1134 int64_t imm = i.InputOperand(1).immediate();
1135 __ Sll32(i.OutputRegister(), i.InputRegister(0),
1136 static_cast<uint16_t>(imm));
1137 }
1138 break;
1139 case kRiscvShr32:
1140 if (instr->InputAt(1)->IsRegister()) {
1141 __ Srl32(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1142 } else {
1143 int64_t imm = i.InputOperand(1).immediate();
1144 __ Srl32(i.OutputRegister(), i.InputRegister(0),
1145 static_cast<uint16_t>(imm));
1146 }
1147 break;
1148 case kRiscvSar32:
1149 if (instr->InputAt(1)->IsRegister()) {
1150 __ Sra32(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1151 } else {
1152 int64_t imm = i.InputOperand(1).immediate();
1153 __ Sra32(i.OutputRegister(), i.InputRegister(0),
1154 static_cast<uint16_t>(imm));
1155 }
1156 break;
1157 case kRiscvZeroExtendWord: {
1158 __ ZeroExtendWord(i.OutputRegister(), i.InputRegister(0));
1159 break;
1160 }
1161 case kRiscvSignExtendWord: {
1162 __ SignExtendWord(i.OutputRegister(), i.InputRegister(0));
1163 break;
1164 }
1165 case kRiscvShl64:
1166 __ Sll64(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
1167 break;
1168 case kRiscvShr64:
1169 __ Srl64(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
1170 break;
1171 case kRiscvSar64:
1172 __ Sra64(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
1173 break;
1174 case kRiscvRor32:
1175 __ Ror(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
1176 break;
1177 case kRiscvRor64:
1178 __ Dror(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
1179 break;
1180 case kRiscvTst:
1181 __ And(kScratchReg, i.InputRegister(0), i.InputOperand(1));
1182 // Pseudo-instruction used for cmp/branch. No opcode emitted here.
1183 break;
1184 case kRiscvCmp:
1185 // Pseudo-instruction used for cmp/branch. No opcode emitted here.
1186 break;
1187 case kRiscvCmpZero:
1188 // Pseudo-instruction used for cmpzero/branch. No opcode emitted here.
1189 break;
1190 case kRiscvMov:
1191 // TODO(plind): Should we combine mov/li like this, or use separate instr?
1192 // - Also see x64 ASSEMBLE_BINOP & RegisterOrOperandType
1193 if (HasRegisterInput(instr, 0)) {
1194 __ Move(i.OutputRegister(), i.InputRegister(0));
1195 } else {
1196 __ li(i.OutputRegister(), i.InputOperand(0));
1197 }
1198 break;
1199
1200 case kRiscvCmpS: {
1201 FPURegister left = i.InputOrZeroSingleRegister(0);
1202 FPURegister right = i.InputOrZeroSingleRegister(1);
1203 bool predicate;
1204 FPUCondition cc =
1205 FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
1206
1207 if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
1208 !__ IsSingleZeroRegSet()) {
1209 __ LoadFPRImmediate(kDoubleRegZero, 0.0f);
1210 }
1211 // compare result set to kScratchReg
1212 __ CompareF32(kScratchReg, cc, left, right);
1213 } break;
1214 case kRiscvAddS:
1215 // TODO(plind): add special case: combine mult & add.
1216 __ fadd_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1217 i.InputDoubleRegister(1));
1218 break;
1219 case kRiscvSubS:
1220 __ fsub_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1221 i.InputDoubleRegister(1));
1222 break;
1223 case kRiscvMulS:
1224 // TODO(plind): add special case: right op is -1.0, see arm port.
1225 __ fmul_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1226 i.InputDoubleRegister(1));
1227 break;
1228 case kRiscvDivS:
1229 __ fdiv_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1230 i.InputDoubleRegister(1));
1231 break;
1232 case kRiscvModS: {
1233 // TODO(bmeurer): We should really get rid of this special instruction,
1234 // and generate a CallAddress instruction instead.
1235 FrameScope scope(tasm(), StackFrame::MANUAL);
1236 __ PrepareCallCFunction(0, 2, kScratchReg);
1237 __ MovToFloatParameters(i.InputDoubleRegister(0),
1238 i.InputDoubleRegister(1));
1239 // TODO(balazs.kilvady): implement mod_two_floats_operation(isolate())
1240 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1241 // Move the result in the double result register.
1242 __ MovFromFloatResult(i.OutputSingleRegister());
1243 break;
1244 }
1245 case kRiscvAbsS:
1246 __ fabs_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
1247 break;
1248 case kRiscvNegS:
1249 __ Neg_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
1250 break;
1251 case kRiscvSqrtS: {
1252 __ fsqrt_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1253 break;
1254 }
1255 case kRiscvMaxS:
1256 __ fmax_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1257 i.InputDoubleRegister(1));
1258 break;
1259 case kRiscvMinS:
1260 __ fmin_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1261 i.InputDoubleRegister(1));
1262 break;
1263 case kRiscvCmpD: {
1264 FPURegister left = i.InputOrZeroDoubleRegister(0);
1265 FPURegister right = i.InputOrZeroDoubleRegister(1);
1266 bool predicate;
1267 FPUCondition cc =
1268 FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
1269 if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
1270 !__ IsDoubleZeroRegSet()) {
1271 __ LoadFPRImmediate(kDoubleRegZero, 0.0);
1272 }
1273 // compare result set to kScratchReg
1274 __ CompareF64(kScratchReg, cc, left, right);
1275 } break;
1276 case kRiscvAddD:
1277 // TODO(plind): add special case: combine mult & add.
1278 __ fadd_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1279 i.InputDoubleRegister(1));
1280 break;
1281 case kRiscvSubD:
1282 __ fsub_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1283 i.InputDoubleRegister(1));
1284 break;
1285 case kRiscvMulD:
1286 // TODO(plind): add special case: right op is -1.0, see arm port.
1287 __ fmul_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1288 i.InputDoubleRegister(1));
1289 break;
1290 case kRiscvDivD:
1291 __ fdiv_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1292 i.InputDoubleRegister(1));
1293 break;
1294 case kRiscvModD: {
1295 // TODO(bmeurer): We should really get rid of this special instruction,
1296 // and generate a CallAddress instruction instead.
1297 FrameScope scope(tasm(), StackFrame::MANUAL);
1298 __ PrepareCallCFunction(0, 2, kScratchReg);
1299 __ MovToFloatParameters(i.InputDoubleRegister(0),
1300 i.InputDoubleRegister(1));
1301 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1302 // Move the result in the double result register.
1303 __ MovFromFloatResult(i.OutputDoubleRegister());
1304 break;
1305 }
1306 case kRiscvAbsD:
1307 __ fabs_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1308 break;
1309 case kRiscvNegD:
1310 __ Neg_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1311 break;
1312 case kRiscvSqrtD: {
1313 __ fsqrt_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1314 break;
1315 }
1316 case kRiscvMaxD:
1317 __ fmax_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1318 i.InputDoubleRegister(1));
1319 break;
1320 case kRiscvMinD:
1321 __ fmin_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1322 i.InputDoubleRegister(1));
1323 break;
1324 case kRiscvFloat64RoundDown: {
1325 __ Floor_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1326 kScratchDoubleReg);
1327 break;
1328 }
1329 case kRiscvFloat32RoundDown: {
1330 __ Floor_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0),
1331 kScratchDoubleReg);
1332 break;
1333 }
1334 case kRiscvFloat64RoundTruncate: {
1335 __ Trunc_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1336 kScratchDoubleReg);
1337 break;
1338 }
1339 case kRiscvFloat32RoundTruncate: {
1340 __ Trunc_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0),
1341 kScratchDoubleReg);
1342 break;
1343 }
1344 case kRiscvFloat64RoundUp: {
1345 __ Ceil_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1346 kScratchDoubleReg);
1347 break;
1348 }
1349 case kRiscvFloat32RoundUp: {
1350 __ Ceil_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0),
1351 kScratchDoubleReg);
1352 break;
1353 }
1354 case kRiscvFloat64RoundTiesEven: {
1355 __ Round_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1356 kScratchDoubleReg);
1357 break;
1358 }
1359 case kRiscvFloat32RoundTiesEven: {
1360 __ Round_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0),
1361 kScratchDoubleReg);
1362 break;
1363 }
1364 case kRiscvFloat32Max: {
1365 __ Float32Max(i.OutputSingleRegister(), i.InputSingleRegister(0),
1366 i.InputSingleRegister(1));
1367 break;
1368 }
1369 case kRiscvFloat64Max: {
1370 __ Float64Max(i.OutputSingleRegister(), i.InputSingleRegister(0),
1371 i.InputSingleRegister(1));
1372 break;
1373 }
1374 case kRiscvFloat32Min: {
1375 __ Float32Min(i.OutputSingleRegister(), i.InputSingleRegister(0),
1376 i.InputSingleRegister(1));
1377 break;
1378 }
1379 case kRiscvFloat64Min: {
1380 __ Float64Min(i.OutputSingleRegister(), i.InputSingleRegister(0),
1381 i.InputSingleRegister(1));
1382 break;
1383 }
1384 case kRiscvFloat64SilenceNaN:
1385 __ FPUCanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1386 break;
1387 case kRiscvCvtSD:
1388 __ fcvt_s_d(i.OutputSingleRegister(), i.InputDoubleRegister(0));
1389 break;
1390 case kRiscvCvtDS:
1391 __ fcvt_d_s(i.OutputDoubleRegister(), i.InputSingleRegister(0));
1392 break;
1393 case kRiscvCvtDW: {
1394 __ fcvt_d_w(i.OutputDoubleRegister(), i.InputRegister(0));
1395 break;
1396 }
1397 case kRiscvCvtSW: {
1398 __ fcvt_s_w(i.OutputDoubleRegister(), i.InputRegister(0));
1399 break;
1400 }
1401 case kRiscvCvtSUw: {
1402 __ Cvt_s_uw(i.OutputDoubleRegister(), i.InputRegister(0));
1403 break;
1404 }
1405 case kRiscvCvtSL: {
1406 __ fcvt_s_l(i.OutputDoubleRegister(), i.InputRegister(0));
1407 break;
1408 }
1409 case kRiscvCvtDL: {
1410 __ fcvt_d_l(i.OutputDoubleRegister(), i.InputRegister(0));
1411 break;
1412 }
1413 case kRiscvCvtDUw: {
1414 __ Cvt_d_uw(i.OutputDoubleRegister(), i.InputRegister(0));
1415 break;
1416 }
1417 case kRiscvCvtDUl: {
1418 __ Cvt_d_ul(i.OutputDoubleRegister(), i.InputRegister(0));
1419 break;
1420 }
1421 case kRiscvCvtSUl: {
1422 __ Cvt_s_ul(i.OutputDoubleRegister(), i.InputRegister(0));
1423 break;
1424 }
1425 case kRiscvFloorWD: {
1426 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1427 __ Floor_w_d(i.OutputRegister(), i.InputDoubleRegister(0), result);
1428 break;
1429 }
1430 case kRiscvCeilWD: {
1431 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1432 __ Ceil_w_d(i.OutputRegister(), i.InputDoubleRegister(0), result);
1433 break;
1434 }
1435 case kRiscvRoundWD: {
1436 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1437 __ Round_w_d(i.OutputRegister(), i.InputDoubleRegister(0), result);
1438 break;
1439 }
1440 case kRiscvTruncWD: {
1441 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1442 __ Trunc_w_d(i.OutputRegister(), i.InputDoubleRegister(0), result);
1443 break;
1444 }
1445 case kRiscvFloorWS: {
1446 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1447 __ Floor_w_s(i.OutputRegister(), i.InputDoubleRegister(0), result);
1448 break;
1449 }
1450 case kRiscvCeilWS: {
1451 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1452 __ Ceil_w_s(i.OutputRegister(), i.InputDoubleRegister(0), result);
1453 break;
1454 }
1455 case kRiscvRoundWS: {
1456 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1457 __ Round_w_s(i.OutputRegister(), i.InputDoubleRegister(0), result);
1458 break;
1459 }
1460 case kRiscvTruncWS: {
1461 Label done;
1462 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1463 bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
1464 __ Trunc_w_s(i.OutputRegister(), i.InputDoubleRegister(0), result);
1465
1466 // On RISCV, if the input value exceeds INT32_MAX, the result of fcvt
1467 // is INT32_MAX. Note that, since INT32_MAX means the lower 31-bits are
1468 // all 1s, INT32_MAX cannot be represented precisely as a float, so an
1469 // fcvt result of INT32_MAX always indicate overflow.
1470 //
1471 // In wasm_compiler, to detect overflow in converting a FP value, fval, to
1472 // integer, V8 checks whether I2F(F2I(fval)) equals fval. However, if fval
1473 // == INT32_MAX+1, the value of I2F(F2I(fval)) happens to be fval. So,
1474 // INT32_MAX is not a good value to indicate overflow. Instead, we will
1475 // use INT32_MIN as the converted result of an out-of-range FP value,
1476 // exploiting the fact that INT32_MAX+1 is INT32_MIN.
1477 //
1478 // If the result of conversion overflow, the result will be set to
1479 // INT32_MIN. Here we detect overflow by testing whether output + 1 <
1480 // output (i.e., kScratchReg < output)
1481 if (set_overflow_to_min_i32) {
1482 __ Add32(kScratchReg, i.OutputRegister(), 1);
1483 __ BranchShort(&done, lt, i.OutputRegister(), Operand(kScratchReg));
1484 __ Move(i.OutputRegister(), kScratchReg);
1485 __ bind(&done);
1486 }
1487 break;
1488 }
1489 case kRiscvTruncLS: {
1490 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1491 __ Trunc_l_s(i.OutputRegister(), i.InputDoubleRegister(0), result);
1492 break;
1493 }
1494 case kRiscvTruncLD: {
1495 Label done;
1496 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1497 bool set_overflow_to_min_i64 = MiscField::decode(instr->opcode());
1498 __ Trunc_l_d(i.OutputRegister(), i.InputDoubleRegister(0), result);
1499 if (set_overflow_to_min_i64) {
1500 __ Add64(kScratchReg, i.OutputRegister(), 1);
1501 __ BranchShort(&done, lt, i.OutputRegister(), Operand(kScratchReg));
1502 __ Move(i.OutputRegister(), kScratchReg);
1503 __ bind(&done);
1504 }
1505 break;
1506 }
1507 case kRiscvTruncUwD: {
1508 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1509 __ Trunc_uw_d(i.OutputRegister(), i.InputDoubleRegister(0), result);
1510 break;
1511 }
1512 case kRiscvTruncUwS: {
1513 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1514 bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
1515 __ Trunc_uw_s(i.OutputRegister(), i.InputDoubleRegister(0), result);
1516
1517 // On RISCV, if the input value exceeds UINT32_MAX, the result of fcvt
1518 // is UINT32_MAX. Note that, since UINT32_MAX means all 32-bits are 1s,
1519 // UINT32_MAX cannot be represented precisely as float, so an fcvt result
1520 // of UINT32_MAX always indicates overflow.
1521 //
1522 // In wasm_compiler.cc, to detect overflow in converting a FP value, fval,
1523 // to integer, V8 checks whether I2F(F2I(fval)) equals fval. However, if
1524 // fval == UINT32_MAX+1, the value of I2F(F2I(fval)) happens to be fval.
1525 // So, UINT32_MAX is not a good value to indicate overflow. Instead, we
1526 // will use 0 as the converted result of an out-of-range FP value,
1527 // exploiting the fact that UINT32_MAX+1 is 0.
1528 if (set_overflow_to_min_u32) {
1529 __ Add32(kScratchReg, i.OutputRegister(), 1);
1530 // Set ouput to zero if result overflows (i.e., UINT32_MAX)
1531 __ LoadZeroIfConditionZero(i.OutputRegister(), kScratchReg);
1532 }
1533 break;
1534 }
1535 case kRiscvTruncUlS: {
1536 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1537 __ Trunc_ul_s(i.OutputRegister(), i.InputDoubleRegister(0), result);
1538 break;
1539 }
1540 case kRiscvTruncUlD: {
1541 Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
1542 __ Trunc_ul_d(i.OutputRegister(0), i.InputDoubleRegister(0), result);
1543 break;
1544 }
1545 case kRiscvBitcastDL:
1546 __ fmv_x_d(i.OutputRegister(), i.InputDoubleRegister(0));
1547 break;
1548 case kRiscvBitcastLD:
1549 __ fmv_d_x(i.OutputDoubleRegister(), i.InputRegister(0));
1550 break;
1551 case kRiscvBitcastInt32ToFloat32:
1552 __ fmv_w_x(i.OutputDoubleRegister(), i.InputRegister(0));
1553 break;
1554 case kRiscvBitcastFloat32ToInt32:
1555 __ fmv_x_w(i.OutputRegister(), i.InputDoubleRegister(0));
1556 break;
1557 case kRiscvFloat64ExtractLowWord32:
1558 __ ExtractLowWordFromF64(i.OutputRegister(), i.InputDoubleRegister(0));
1559 break;
1560 case kRiscvFloat64ExtractHighWord32:
1561 __ ExtractHighWordFromF64(i.OutputRegister(), i.InputDoubleRegister(0));
1562 break;
1563 case kRiscvFloat64InsertLowWord32:
1564 __ InsertLowWordF64(i.OutputDoubleRegister(), i.InputRegister(1));
1565 break;
1566 case kRiscvFloat64InsertHighWord32:
1567 __ InsertHighWordF64(i.OutputDoubleRegister(), i.InputRegister(1));
1568 break;
1569 // ... more basic instructions ...
1570
1571 case kRiscvSignExtendByte:
1572 __ SignExtendByte(i.OutputRegister(), i.InputRegister(0));
1573 break;
1574 case kRiscvSignExtendShort:
1575 __ SignExtendShort(i.OutputRegister(), i.InputRegister(0));
1576 break;
1577 case kRiscvLbu:
1578 __ Lbu(i.OutputRegister(), i.MemoryOperand());
1579 break;
1580 case kRiscvLb:
1581 __ Lb(i.OutputRegister(), i.MemoryOperand());
1582 break;
1583 case kRiscvSb:
1584 __ Sb(i.InputOrZeroRegister(2), i.MemoryOperand());
1585 break;
1586 case kRiscvLhu:
1587 __ Lhu(i.OutputRegister(), i.MemoryOperand());
1588 break;
1589 case kRiscvUlhu:
1590 __ Ulhu(i.OutputRegister(), i.MemoryOperand());
1591 break;
1592 case kRiscvLh:
1593 __ Lh(i.OutputRegister(), i.MemoryOperand());
1594 break;
1595 case kRiscvUlh:
1596 __ Ulh(i.OutputRegister(), i.MemoryOperand());
1597 break;
1598 case kRiscvSh:
1599 __ Sh(i.InputOrZeroRegister(2), i.MemoryOperand());
1600 break;
1601 case kRiscvUsh:
1602 __ Ush(i.InputOrZeroRegister(2), i.MemoryOperand());
1603 break;
1604 case kRiscvLw:
1605 __ Lw(i.OutputRegister(), i.MemoryOperand());
1606 break;
1607 case kRiscvUlw:
1608 __ Ulw(i.OutputRegister(), i.MemoryOperand());
1609 break;
1610 case kRiscvLwu:
1611 __ Lwu(i.OutputRegister(), i.MemoryOperand());
1612 break;
1613 case kRiscvUlwu:
1614 __ Ulwu(i.OutputRegister(), i.MemoryOperand());
1615 break;
1616 case kRiscvLd:
1617 __ Ld(i.OutputRegister(), i.MemoryOperand());
1618 break;
1619 case kRiscvUld:
1620 __ Uld(i.OutputRegister(), i.MemoryOperand());
1621 break;
1622 case kRiscvSw:
1623 __ Sw(i.InputOrZeroRegister(2), i.MemoryOperand());
1624 break;
1625 case kRiscvUsw:
1626 __ Usw(i.InputOrZeroRegister(2), i.MemoryOperand());
1627 break;
1628 case kRiscvSd:
1629 __ Sd(i.InputOrZeroRegister(2), i.MemoryOperand());
1630 break;
1631 case kRiscvUsd:
1632 __ Usd(i.InputOrZeroRegister(2), i.MemoryOperand());
1633 break;
1634 case kRiscvLoadFloat: {
1635 __ LoadFloat(i.OutputSingleRegister(), i.MemoryOperand());
1636 break;
1637 }
1638 case kRiscvULoadFloat: {
1639 __ ULoadFloat(i.OutputSingleRegister(), i.MemoryOperand(), kScratchReg);
1640 break;
1641 }
1642 case kRiscvStoreFloat: {
1643 size_t index = 0;
1644 MemOperand operand = i.MemoryOperand(&index);
1645 FPURegister ft = i.InputOrZeroSingleRegister(index);
1646 if (ft == kDoubleRegZero && !__ IsSingleZeroRegSet()) {
1647 __ LoadFPRImmediate(kDoubleRegZero, 0.0f);
1648 }
1649 __ StoreFloat(ft, operand);
1650 break;
1651 }
1652 case kRiscvUStoreFloat: {
1653 size_t index = 0;
1654 MemOperand operand = i.MemoryOperand(&index);
1655 FPURegister ft = i.InputOrZeroSingleRegister(index);
1656 if (ft == kDoubleRegZero && !__ IsSingleZeroRegSet()) {
1657 __ LoadFPRImmediate(kDoubleRegZero, 0.0f);
1658 }
1659 __ UStoreFloat(ft, operand, kScratchReg);
1660 break;
1661 }
1662 case kRiscvLoadDouble:
1663 __ LoadDouble(i.OutputDoubleRegister(), i.MemoryOperand());
1664 break;
1665 case kRiscvULoadDouble:
1666 __ ULoadDouble(i.OutputDoubleRegister(), i.MemoryOperand(), kScratchReg);
1667 break;
1668 case kRiscvStoreDouble: {
1669 FPURegister ft = i.InputOrZeroDoubleRegister(2);
1670 if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
1671 __ LoadFPRImmediate(kDoubleRegZero, 0.0);
1672 }
1673 __ StoreDouble(ft, i.MemoryOperand());
1674 break;
1675 }
1676 case kRiscvUStoreDouble: {
1677 FPURegister ft = i.InputOrZeroDoubleRegister(2);
1678 if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
1679 __ LoadFPRImmediate(kDoubleRegZero, 0.0);
1680 }
1681 __ UStoreDouble(ft, i.MemoryOperand(), kScratchReg);
1682 break;
1683 }
1684 case kRiscvSync: {
1685 __ sync();
1686 break;
1687 }
1688 case kRiscvPush:
1689 if (instr->InputAt(0)->IsFPRegister()) {
1690 __ StoreDouble(i.InputDoubleRegister(0), MemOperand(sp, -kDoubleSize));
1691 __ Sub32(sp, sp, Operand(kDoubleSize));
1692 frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
1693 } else {
1694 __ Push(i.InputOrZeroRegister(0));
1695 frame_access_state()->IncreaseSPDelta(1);
1696 }
1697 break;
1698 case kRiscvPeek: {
1699 int reverse_slot = i.InputInt32(0);
1700 int offset =
1701 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1702 if (instr->OutputAt(0)->IsFPRegister()) {
1703 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1704 if (op->representation() == MachineRepresentation::kFloat64) {
1705 __ LoadDouble(i.OutputDoubleRegister(), MemOperand(fp, offset));
1706 } else {
1707 DCHECK_EQ(op->representation(), MachineRepresentation::kFloat32);
1708 __ LoadFloat(
1709 i.OutputSingleRegister(0),
1710 MemOperand(fp, offset + kLessSignificantWordInDoublewordOffset));
1711 }
1712 } else {
1713 __ Ld(i.OutputRegister(0), MemOperand(fp, offset));
1714 }
1715 break;
1716 }
1717 case kRiscvStackClaim: {
1718 __ Sub64(sp, sp, Operand(i.InputInt32(0)));
1719 frame_access_state()->IncreaseSPDelta(i.InputInt32(0) /
1720 kSystemPointerSize);
1721 break;
1722 }
1723 case kRiscvStoreToStackSlot: {
1724 if (instr->InputAt(0)->IsFPRegister()) {
1725 if (instr->InputAt(0)->IsSimd128Register()) {
1726 Register dst = sp;
1727 if (i.InputInt32(1) != 0) {
1728 dst = kScratchReg2;
1729 __ Add64(kScratchReg2, sp, Operand(i.InputInt32(1)));
1730 }
1731 __ VU.set(kScratchReg, E8, m1);
1732 __ vs(i.InputSimd128Register(0), dst, 0, E8);
1733 } else {
1734 __ StoreDouble(i.InputDoubleRegister(0),
1735 MemOperand(sp, i.InputInt32(1)));
1736 }
1737 } else {
1738 __ Sd(i.InputOrZeroRegister(0), MemOperand(sp, i.InputInt32(1)));
1739 }
1740 break;
1741 }
1742 case kRiscvByteSwap64: {
1743 __ ByteSwap(i.OutputRegister(0), i.InputRegister(0), 8, kScratchReg);
1744 break;
1745 }
1746 case kRiscvByteSwap32: {
1747 __ ByteSwap(i.OutputRegister(0), i.InputRegister(0), 4, kScratchReg);
1748 break;
1749 }
1750 case kAtomicLoadInt8:
1751 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
1752 ASSEMBLE_ATOMIC_LOAD_INTEGER(Lb);
1753 break;
1754 case kAtomicLoadUint8:
1755 ASSEMBLE_ATOMIC_LOAD_INTEGER(Lbu);
1756 break;
1757 case kAtomicLoadInt16:
1758 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
1759 ASSEMBLE_ATOMIC_LOAD_INTEGER(Lh);
1760 break;
1761 case kAtomicLoadUint16:
1762 ASSEMBLE_ATOMIC_LOAD_INTEGER(Lhu);
1763 break;
1764 case kAtomicLoadWord32:
1765 ASSEMBLE_ATOMIC_LOAD_INTEGER(Lw);
1766 break;
1767 case kRiscvWord64AtomicLoadUint64:
1768 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld);
1769 break;
1770 case kAtomicStoreWord8:
1771 ASSEMBLE_ATOMIC_STORE_INTEGER(Sb);
1772 break;
1773 case kAtomicStoreWord16:
1774 ASSEMBLE_ATOMIC_STORE_INTEGER(Sh);
1775 break;
1776 case kAtomicStoreWord32:
1777 ASSEMBLE_ATOMIC_STORE_INTEGER(Sw);
1778 break;
1779 case kRiscvWord64AtomicStoreWord64:
1780 ASSEMBLE_ATOMIC_STORE_INTEGER(Sd);
1781 break;
1782 case kAtomicExchangeInt8:
1783 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
1784 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll, Sc, true, 8, 32);
1785 break;
1786 case kAtomicExchangeUint8:
1787 switch (AtomicWidthField::decode(opcode)) {
1788 case AtomicWidth::kWord32:
1789 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll, Sc, false, 8, 32);
1790 break;
1791 case AtomicWidth::kWord64:
1792 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 8, 64);
1793 break;
1794 }
1795 break;
1796 case kAtomicExchangeInt16:
1797 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
1798 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll, Sc, true, 16, 32);
1799 break;
1800 case kAtomicExchangeUint16:
1801 switch (AtomicWidthField::decode(opcode)) {
1802 case AtomicWidth::kWord32:
1803 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll, Sc, false, 16, 32);
1804 break;
1805 case AtomicWidth::kWord64:
1806 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 16, 64);
1807 break;
1808 }
1809 break;
1810 case kAtomicExchangeWord32:
1811 switch (AtomicWidthField::decode(opcode)) {
1812 case AtomicWidth::kWord32:
1813 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(Ll, Sc);
1814 break;
1815 case AtomicWidth::kWord64:
1816 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 32, 64);
1817 break;
1818 }
1819 break;
1820 case kRiscvWord64AtomicExchangeUint64:
1821 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(Lld, Scd);
1822 break;
1823 case kAtomicCompareExchangeInt8:
1824 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
1825 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, true, 8, 32);
1826 break;
1827 case kAtomicCompareExchangeUint8:
1828 switch (AtomicWidthField::decode(opcode)) {
1829 case AtomicWidth::kWord32:
1830 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, false, 8, 32);
1831 break;
1832 case AtomicWidth::kWord64:
1833 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 8, 64);
1834 break;
1835 }
1836 break;
1837 case kAtomicCompareExchangeInt16:
1838 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
1839 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, true, 16, 32);
1840 break;
1841 case kAtomicCompareExchangeUint16:
1842 switch (AtomicWidthField::decode(opcode)) {
1843 case AtomicWidth::kWord32:
1844 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, false, 16, 32);
1845 break;
1846 case AtomicWidth::kWord64:
1847 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 16, 64);
1848 break;
1849 }
1850 break;
1851 case kAtomicCompareExchangeWord32:
1852 switch (AtomicWidthField::decode(opcode)) {
1853 case AtomicWidth::kWord32:
1854 __ Sll32(i.InputRegister(2), i.InputRegister(2), 0);
1855 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Ll, Sc);
1856 break;
1857 case AtomicWidth::kWord64:
1858 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 32, 64);
1859 break;
1860 }
1861 break;
1862 case kRiscvWord64AtomicCompareExchangeUint64:
1863 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Lld, Scd);
1864 break;
1865 #define ATOMIC_BINOP_CASE(op, inst32, inst64) \
1866 case kAtomic##op##Int8: \
1867 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
1868 ASSEMBLE_ATOMIC_BINOP_EXT(Ll, Sc, true, 8, inst32, 32); \
1869 break; \
1870 case kAtomic##op##Uint8: \
1871 switch (AtomicWidthField::decode(opcode)) { \
1872 case AtomicWidth::kWord32: \
1873 ASSEMBLE_ATOMIC_BINOP_EXT(Ll, Sc, false, 8, inst32, 32); \
1874 break; \
1875 case AtomicWidth::kWord64: \
1876 ASSEMBLE_ATOMIC_BINOP_EXT(Lld, Scd, false, 8, inst64, 64); \
1877 break; \
1878 } \
1879 break; \
1880 case kAtomic##op##Int16: \
1881 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
1882 ASSEMBLE_ATOMIC_BINOP_EXT(Ll, Sc, true, 16, inst32, 32); \
1883 break; \
1884 case kAtomic##op##Uint16: \
1885 switch (AtomicWidthField::decode(opcode)) { \
1886 case AtomicWidth::kWord32: \
1887 ASSEMBLE_ATOMIC_BINOP_EXT(Ll, Sc, false, 16, inst32, 32); \
1888 break; \
1889 case AtomicWidth::kWord64: \
1890 ASSEMBLE_ATOMIC_BINOP_EXT(Lld, Scd, false, 16, inst64, 64); \
1891 break; \
1892 } \
1893 break; \
1894 case kAtomic##op##Word32: \
1895 switch (AtomicWidthField::decode(opcode)) { \
1896 case AtomicWidth::kWord32: \
1897 ASSEMBLE_ATOMIC_BINOP(Ll, Sc, inst32); \
1898 break; \
1899 case AtomicWidth::kWord64: \
1900 ASSEMBLE_ATOMIC_BINOP_EXT(Lld, Scd, false, 32, inst64, 64); \
1901 break; \
1902 } \
1903 break; \
1904 case kRiscvWord64Atomic##op##Uint64: \
1905 ASSEMBLE_ATOMIC_BINOP(Lld, Scd, inst64); \
1906 break;
1907 ATOMIC_BINOP_CASE(Add, Add32, Add64)
1908 ATOMIC_BINOP_CASE(Sub, Sub32, Sub64)
1909 ATOMIC_BINOP_CASE(And, And, And)
1910 ATOMIC_BINOP_CASE(Or, Or, Or)
1911 ATOMIC_BINOP_CASE(Xor, Xor, Xor)
1912 #undef ATOMIC_BINOP_CASE
1913 case kRiscvAssertEqual:
1914 __ Assert(eq, static_cast<AbortReason>(i.InputOperand(2).immediate()),
1915 i.InputRegister(0), Operand(i.InputRegister(1)));
1916 break;
1917 case kRiscvStoreCompressTagged: {
1918 size_t index = 0;
1919 MemOperand operand = i.MemoryOperand(&index);
1920 __ StoreTaggedField(i.InputOrZeroRegister(index), operand);
1921 break;
1922 }
1923 case kRiscvLoadDecompressTaggedSigned: {
1924 CHECK(instr->HasOutput());
1925 Register result = i.OutputRegister();
1926 MemOperand operand = i.MemoryOperand();
1927 __ DecompressTaggedSigned(result, operand);
1928 break;
1929 }
1930 case kRiscvLoadDecompressTaggedPointer: {
1931 CHECK(instr->HasOutput());
1932 Register result = i.OutputRegister();
1933 MemOperand operand = i.MemoryOperand();
1934 __ DecompressTaggedPointer(result, operand);
1935 break;
1936 }
1937 case kRiscvLoadDecompressAnyTagged: {
1938 CHECK(instr->HasOutput());
1939 Register result = i.OutputRegister();
1940 MemOperand operand = i.MemoryOperand();
1941 __ DecompressAnyTagged(result, operand);
1942 break;
1943 }
1944 case kRiscvRvvSt: {
1945 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
1946 Register dst = i.MemoryOperand().offset() == 0 ? i.MemoryOperand().rm()
1947 : kScratchReg;
1948 if (i.MemoryOperand().offset() != 0) {
1949 __ Add64(dst, i.MemoryOperand().rm(), i.MemoryOperand().offset());
1950 }
1951 __ vs(i.InputSimd128Register(2), dst, 0, VSew::E8);
1952 break;
1953 }
1954 case kRiscvRvvLd: {
1955 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
1956 Register src = i.MemoryOperand().offset() == 0 ? i.MemoryOperand().rm()
1957 : kScratchReg;
1958 if (i.MemoryOperand().offset() != 0) {
1959 __ Add64(src, i.MemoryOperand().rm(), i.MemoryOperand().offset());
1960 }
1961 __ vl(i.OutputSimd128Register(), src, 0, VSew::E8);
1962 break;
1963 }
1964 case kRiscvS128Zero: {
1965 Simd128Register dst = i.OutputSimd128Register();
1966 __ VU.set(kScratchReg, E8, m1);
1967 __ vmv_vx(dst, zero_reg);
1968 break;
1969 }
1970 case kRiscvS128Load32Zero: {
1971 Simd128Register dst = i.OutputSimd128Register();
1972 __ VU.set(kScratchReg, E32, m1);
1973 __ Lwu(kScratchReg, i.MemoryOperand());
1974 __ vmv_sx(dst, kScratchReg);
1975 break;
1976 }
1977 case kRiscvS128Load64Zero: {
1978 Simd128Register dst = i.OutputSimd128Register();
1979 __ VU.set(kScratchReg, E64, m1);
1980 __ Ld(kScratchReg, i.MemoryOperand());
1981 __ vmv_sx(dst, kScratchReg);
1982 break;
1983 }
1984 case kRiscvS128LoadLane: {
1985 Simd128Register dst = i.OutputSimd128Register();
1986 DCHECK_EQ(dst, i.InputSimd128Register(0));
1987 auto sz = static_cast<int>(MiscField::decode(instr->opcode()));
1988 __ LoadLane(sz, dst, i.InputUint8(1), i.MemoryOperand(2));
1989 break;
1990 }
1991 case kRiscvS128StoreLane: {
1992 Simd128Register src = i.InputSimd128Register(0);
1993 DCHECK_EQ(src, i.InputSimd128Register(0));
1994 auto sz = static_cast<int>(MiscField::decode(instr->opcode()));
1995 __ StoreLane(sz, src, i.InputUint8(1), i.MemoryOperand(2));
1996 break;
1997 }
1998 case kRiscvS128Load64ExtendS: {
1999 __ VU.set(kScratchReg, E64, m1);
2000 __ Ld(kScratchReg, i.MemoryOperand());
2001 __ vmv_vx(kSimd128ScratchReg, kScratchReg);
2002 __ VU.set(kScratchReg, i.InputInt8(2), m1);
2003 __ vsext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
2004 break;
2005 }
2006 case kRiscvS128Load64ExtendU: {
2007 __ VU.set(kScratchReg, E64, m1);
2008 __ Ld(kScratchReg, i.MemoryOperand());
2009 __ vmv_vx(kSimd128ScratchReg, kScratchReg);
2010 __ VU.set(kScratchReg, i.InputInt8(2), m1);
2011 __ vzext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
2012 break;
2013 }
2014 case kRiscvS128LoadSplat: {
2015 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
2016 switch (i.InputInt8(2)) {
2017 case E8:
2018 __ Lb(kScratchReg, i.MemoryOperand());
2019 break;
2020 case E16:
2021 __ Lh(kScratchReg, i.MemoryOperand());
2022 break;
2023 case E32:
2024 __ Lw(kScratchReg, i.MemoryOperand());
2025 break;
2026 case E64:
2027 __ Ld(kScratchReg, i.MemoryOperand());
2028 break;
2029 default:
2030 UNREACHABLE();
2031 }
2032 __ vmv_vx(i.OutputSimd128Register(), kScratchReg);
2033 break;
2034 }
2035 case kRiscvS128AllOnes: {
2036 __ VU.set(kScratchReg, E8, m1);
2037 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
2038 __ vnot_vv(i.OutputSimd128Register(), i.OutputSimd128Register());
2039 break;
2040 }
2041 case kRiscvS128Select: {
2042 __ VU.set(kScratchReg, E8, m1);
2043 __ vand_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
2044 i.InputSimd128Register(0));
2045 __ vnot_vv(kSimd128ScratchReg2, i.InputSimd128Register(0));
2046 __ vand_vv(kSimd128ScratchReg2, i.InputSimd128Register(2),
2047 kSimd128ScratchReg2);
2048 __ vor_vv(i.OutputSimd128Register(), kSimd128ScratchReg,
2049 kSimd128ScratchReg2);
2050 break;
2051 }
2052 case kRiscvS128And: {
2053 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2054 __ vand_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2055 i.InputSimd128Register(1));
2056 break;
2057 }
2058 case kRiscvS128Or: {
2059 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2060 __ vor_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2061 i.InputSimd128Register(1));
2062 break;
2063 }
2064 case kRiscvS128Xor: {
2065 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2066 __ vxor_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2067 i.InputSimd128Register(1));
2068 break;
2069 }
2070 case kRiscvS128Not: {
2071 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2072 __ vnot_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
2073 break;
2074 }
2075 case kRiscvS128AndNot: {
2076 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2077 __ vnot_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
2078 __ vand_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2079 i.OutputSimd128Register());
2080 break;
2081 }
2082 case kRiscvS128Const: {
2083 Simd128Register dst = i.OutputSimd128Register();
2084 uint8_t imm[16];
2085 *reinterpret_cast<uint64_t*>(imm) =
2086 make_uint64(i.InputUint32(1), i.InputUint32(0));
2087 *(reinterpret_cast<uint64_t*>(imm) + 1) =
2088 make_uint64(i.InputUint32(3), i.InputUint32(2));
2089 __ WasmRvvS128const(dst, imm);
2090 break;
2091 }
2092 case kRiscvI64x2Mul: {
2093 (__ VU).set(kScratchReg, VSew::E64, Vlmul::m1);
2094 __ vmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2095 i.InputSimd128Register(1));
2096 break;
2097 }
2098 case kRiscvI64x2Add: {
2099 (__ VU).set(kScratchReg, VSew::E64, Vlmul::m1);
2100 __ vadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2101 i.InputSimd128Register(1));
2102 break;
2103 }
2104 case kRiscvVrgather: {
2105 Simd128Register index = i.InputSimd128Register(0);
2106 if (!(instr->InputAt(1)->IsImmediate())) {
2107 index = i.InputSimd128Register(1);
2108 } else {
2109 __ VU.set(kScratchReg, E64, m1);
2110 __ li(kScratchReg, i.InputInt64(1));
2111 __ vmv_sx(kSimd128ScratchReg3, kScratchReg);
2112 index = kSimd128ScratchReg3;
2113 }
2114 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
2115 if (i.OutputSimd128Register() == i.InputSimd128Register(0)) {
2116 __ vrgather_vv(kSimd128ScratchReg, i.InputSimd128Register(0), index);
2117 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
2118 } else {
2119 __ vrgather_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2120 index);
2121 }
2122 break;
2123 }
2124 case kRiscvVslidedown: {
2125 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
2126 if (instr->InputAt(1)->IsImmediate()) {
2127 DCHECK(is_uint5(i.InputInt32(1)));
2128 __ vslidedown_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2129 i.InputInt5(1));
2130 } else {
2131 __ vslidedown_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2132 i.InputRegister(1));
2133 }
2134 break;
2135 }
2136 case kRiscvI8x16RoundingAverageU: {
2137 __ VU.set(kScratchReg2, E8, m1);
2138 __ vwaddu_vv(kSimd128ScratchReg, i.InputSimd128Register(0),
2139 i.InputSimd128Register(1));
2140 __ li(kScratchReg, 1);
2141 __ vwaddu_wx(kSimd128ScratchReg3, kSimd128ScratchReg, kScratchReg);
2142 __ li(kScratchReg, 2);
2143 __ VU.set(kScratchReg2, E16, m2);
2144 __ vdivu_vx(kSimd128ScratchReg3, kSimd128ScratchReg3, kScratchReg);
2145 __ VU.set(kScratchReg2, E8, m1);
2146 __ vnclipu_vi(i.OutputSimd128Register(), kSimd128ScratchReg3, 0);
2147 break;
2148 }
2149 case kRiscvI16x8RoundingAverageU: {
2150 __ VU.set(kScratchReg2, E16, m1);
2151 __ vwaddu_vv(kSimd128ScratchReg, i.InputSimd128Register(0),
2152 i.InputSimd128Register(1));
2153 __ li(kScratchReg, 1);
2154 __ vwaddu_wx(kSimd128ScratchReg3, kSimd128ScratchReg, kScratchReg);
2155 __ li(kScratchReg, 2);
2156 __ VU.set(kScratchReg2, E32, m2);
2157 __ vdivu_vx(kSimd128ScratchReg3, kSimd128ScratchReg3, kScratchReg);
2158 __ VU.set(kScratchReg2, E16, m1);
2159 __ vnclipu_vi(i.OutputSimd128Register(), kSimd128ScratchReg3, 0);
2160 break;
2161 }
2162 case kRiscvI16x8Mul: {
2163 (__ VU).set(kScratchReg, VSew::E16, Vlmul::m1);
2164 __ vmv_vx(kSimd128ScratchReg, zero_reg);
2165 __ vmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2166 i.InputSimd128Register(1));
2167 break;
2168 }
2169 case kRiscvI16x8Q15MulRSatS: {
2170 __ VU.set(kScratchReg, E16, m1);
2171 __ vsmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2172 i.InputSimd128Register(1));
2173 break;
2174 }
2175 case kRiscvI16x8AddSatS: {
2176 (__ VU).set(kScratchReg, VSew::E16, Vlmul::m1);
2177 __ vsadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2178 i.InputSimd128Register(1));
2179 break;
2180 }
2181 case kRiscvI16x8AddSatU: {
2182 (__ VU).set(kScratchReg, VSew::E16, Vlmul::m1);
2183 __ vsaddu_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2184 i.InputSimd128Register(1));
2185 break;
2186 }
2187 case kRiscvI8x16AddSatS: {
2188 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2189 __ vsadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2190 i.InputSimd128Register(1));
2191 break;
2192 }
2193 case kRiscvI8x16AddSatU: {
2194 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2195 __ vsaddu_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2196 i.InputSimd128Register(1));
2197 break;
2198 }
2199 case kRiscvI64x2Sub: {
2200 (__ VU).set(kScratchReg, VSew::E64, Vlmul::m1);
2201 __ vsub_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2202 i.InputSimd128Register(1));
2203 break;
2204 }
2205 case kRiscvI16x8SubSatS: {
2206 (__ VU).set(kScratchReg, VSew::E16, Vlmul::m1);
2207 __ vssub_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2208 i.InputSimd128Register(1));
2209 break;
2210 }
2211 case kRiscvI16x8SubSatU: {
2212 (__ VU).set(kScratchReg, VSew::E16, Vlmul::m1);
2213 __ vssubu_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2214 i.InputSimd128Register(1));
2215 break;
2216 }
2217 case kRiscvI8x16SubSatS: {
2218 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2219 __ vssub_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2220 i.InputSimd128Register(1));
2221 break;
2222 }
2223 case kRiscvI8x16SubSatU: {
2224 (__ VU).set(kScratchReg, VSew::E8, Vlmul::m1);
2225 __ vssubu_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2226 i.InputSimd128Register(1));
2227 break;
2228 }
2229 case kRiscvI8x16ExtractLaneU: {
2230 __ VU.set(kScratchReg, E8, m1);
2231 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
2232 i.InputInt8(1));
2233 __ vmv_xs(i.OutputRegister(), kSimd128ScratchReg);
2234 __ slli(i.OutputRegister(), i.OutputRegister(), 64 - 8);
2235 __ srli(i.OutputRegister(), i.OutputRegister(), 64 - 8);
2236 break;
2237 }
2238 case kRiscvI8x16ExtractLaneS: {
2239 __ VU.set(kScratchReg, E8, m1);
2240 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
2241 i.InputInt8(1));
2242 __ vmv_xs(i.OutputRegister(), kSimd128ScratchReg);
2243 break;
2244 }
2245 case kRiscvI16x8ExtractLaneU: {
2246 __ VU.set(kScratchReg, E16, m1);
2247 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
2248 i.InputInt8(1));
2249 __ vmv_xs(i.OutputRegister(), kSimd128ScratchReg);
2250 __ slli(i.OutputRegister(), i.OutputRegister(), 64 - 16);
2251 __ srli(i.OutputRegister(), i.OutputRegister(), 64 - 16);
2252 break;
2253 }
2254 case kRiscvI16x8ExtractLaneS: {
2255 __ VU.set(kScratchReg, E16, m1);
2256 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
2257 i.InputInt8(1));
2258 __ vmv_xs(i.OutputRegister(), kSimd128ScratchReg);
2259 break;
2260 }
2261 case kRiscvI8x16ShrU: {
2262 __ VU.set(kScratchReg, E8, m1);
2263 if (instr->InputAt(1)->IsRegister()) {
2264 __ andi(i.InputRegister(1), i.InputRegister(1), 8 - 1);
2265 __ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2266 i.InputRegister(1));
2267 } else {
2268 __ vsrl_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2269 i.InputInt5(1) % 8);
2270 }
2271 break;
2272 }
2273 case kRiscvI16x8ShrU: {
2274 __ VU.set(kScratchReg, E16, m1);
2275 if (instr->InputAt(1)->IsRegister()) {
2276 __ andi(i.InputRegister(1), i.InputRegister(1), 16 - 1);
2277 __ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2278 i.InputRegister(1));
2279 } else {
2280 __ vsrl_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2281 i.InputInt5(1) % 16);
2282 }
2283 break;
2284 }
2285 case kRiscvI32x4Mul: {
2286 __ VU.set(kScratchReg, E32, m1);
2287 __ vmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2288 i.InputSimd128Register(1));
2289 break;
2290 }
2291 case kRiscvI32x4TruncSatF64x2SZero: {
2292 __ VU.set(kScratchReg, E64, m1);
2293 __ vmv_vx(kSimd128ScratchReg, zero_reg);
2294 __ vmfeq_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(0));
2295 __ vmv_vv(kSimd128ScratchReg3, i.InputSimd128Register(0));
2296 __ VU.set(kScratchReg, E32, m1);
2297 __ VU.set(RoundingMode::RTZ);
2298 __ vfncvt_x_f_w(kSimd128ScratchReg, kSimd128ScratchReg3, MaskType::Mask);
2299 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
2300 break;
2301 }
2302 case kRiscvI32x4TruncSatF64x2UZero: {
2303 __ VU.set(kScratchReg, E64, m1);
2304 __ vmv_vx(kSimd128ScratchReg, zero_reg);
2305 __ vmfeq_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(0));
2306 __ vmv_vv(kSimd128ScratchReg3, i.InputSimd128Register(0));
2307 __ VU.set(kScratchReg, E32, m1);
2308 __ VU.set(RoundingMode::RTZ);
2309 __ vfncvt_xu_f_w(kSimd128ScratchReg, kSimd128ScratchReg3, MaskType::Mask);
2310 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
2311 break;
2312 }
2313 case kRiscvI32x4ShrU: {
2314 __ VU.set(kScratchReg, E32, m1);
2315 if (instr->InputAt(1)->IsRegister()) {
2316 __ andi(i.InputRegister(1), i.InputRegister(1), 32 - 1);
2317 __ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2318 i.InputRegister(1));
2319 } else {
2320 __ vsrl_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2321 i.InputInt5(1) % 32);
2322 }
2323 break;
2324 }
2325 case kRiscvI64x2ShrU: {
2326 __ VU.set(kScratchReg, E64, m1);
2327 if (instr->InputAt(1)->IsRegister()) {
2328 __ andi(i.InputRegister(1), i.InputRegister(1), 64 - 1);
2329 __ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2330 i.InputRegister(1));
2331 } else {
2332 if (is_uint5(i.InputInt6(1) % 64)) {
2333 __ vsrl_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2334 i.InputInt6(1) % 64);
2335 } else {
2336 __ li(kScratchReg, i.InputInt6(1) % 64);
2337 __ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2338 kScratchReg);
2339 }
2340 }
2341 break;
2342 }
2343 case kRiscvI8x16ShrS: {
2344 __ VU.set(kScratchReg, E8, m1);
2345 if (instr->InputAt(1)->IsRegister()) {
2346 __ andi(i.InputRegister(1), i.InputRegister(1), 8 - 1);
2347 __ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2348 i.InputRegister(1));
2349 } else {
2350 __ vsra_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2351 i.InputInt5(1) % 8);
2352 }
2353 break;
2354 }
2355 case kRiscvI16x8ShrS: {
2356 __ VU.set(kScratchReg, E16, m1);
2357 if (instr->InputAt(1)->IsRegister()) {
2358 __ andi(i.InputRegister(1), i.InputRegister(1), 16 - 1);
2359 __ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2360 i.InputRegister(1));
2361 } else {
2362 __ vsra_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2363 i.InputInt5(1) % 16);
2364 }
2365 break;
2366 }
2367 case kRiscvI32x4ShrS: {
2368 __ VU.set(kScratchReg, E32, m1);
2369 if (instr->InputAt(1)->IsRegister()) {
2370 __ andi(i.InputRegister(1), i.InputRegister(1), 32 - 1);
2371 __ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2372 i.InputRegister(1));
2373 } else {
2374 __ vsra_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2375 i.InputInt5(1) % 32);
2376 }
2377 break;
2378 }
2379 case kRiscvI64x2ShrS: {
2380 __ VU.set(kScratchReg, E64, m1);
2381 if (instr->InputAt(1)->IsRegister()) {
2382 __ andi(i.InputRegister(1), i.InputRegister(1), 64 - 1);
2383 __ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2384 i.InputRegister(1));
2385 } else {
2386 if (is_uint5(i.InputInt6(1) % 64)) {
2387 __ vsra_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2388 i.InputInt6(1) % 64);
2389 } else {
2390 __ li(kScratchReg, i.InputInt6(1) % 64);
2391 __ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2392 kScratchReg);
2393 }
2394 }
2395 break;
2396 }
2397 case kRiscvI32x4ExtractLane: {
2398 __ WasmRvvExtractLane(i.OutputRegister(), i.InputSimd128Register(0),
2399 i.InputInt8(1), E32, m1);
2400 break;
2401 }
2402 case kRiscvI32x4Abs: {
2403 __ VU.set(kScratchReg, E32, m1);
2404 __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
2405 __ vmv_vx(kSimd128RegZero, zero_reg);
2406 __ vmslt_vv(v0, i.InputSimd128Register(0), kSimd128RegZero);
2407 __ vneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2408 MaskType::Mask);
2409 break;
2410 }
2411 case kRiscvI16x8Abs: {
2412 __ VU.set(kScratchReg, E16, m1);
2413 __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
2414 __ vmv_vx(kSimd128RegZero, zero_reg);
2415 __ vmslt_vv(v0, i.InputSimd128Register(0), kSimd128RegZero);
2416 __ vneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2417 MaskType::Mask);
2418 break;
2419 }
2420 case kRiscvI8x16Abs: {
2421 __ VU.set(kScratchReg, E8, m1);
2422 __ vmv_vx(kSimd128RegZero, zero_reg);
2423 __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
2424 __ vmslt_vv(v0, i.InputSimd128Register(0), kSimd128RegZero);
2425 __ vneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2426 MaskType::Mask);
2427 break;
2428 }
2429 case kRiscvI64x2Abs: {
2430 __ VU.set(kScratchReg, E64, m1);
2431 __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
2432 __ vmv_vx(kSimd128RegZero, zero_reg);
2433 __ vmslt_vv(v0, i.InputSimd128Register(0), kSimd128RegZero);
2434 __ vneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2435 MaskType::Mask);
2436 break;
2437 }
2438 case kRiscvI64x2ExtractLane: {
2439 __ WasmRvvExtractLane(i.OutputRegister(), i.InputSimd128Register(0),
2440 i.InputInt8(1), E64, m1);
2441 break;
2442 }
2443 case kRiscvI8x16Eq: {
2444 __ WasmRvvEq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2445 i.InputSimd128Register(1), E8, m1);
2446 break;
2447 }
2448 case kRiscvI16x8Eq: {
2449 __ WasmRvvEq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2450 i.InputSimd128Register(1), E16, m1);
2451 break;
2452 }
2453 case kRiscvI32x4Eq: {
2454 __ WasmRvvEq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2455 i.InputSimd128Register(1), E32, m1);
2456 break;
2457 }
2458 case kRiscvI64x2Eq: {
2459 __ WasmRvvEq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2460 i.InputSimd128Register(1), E64, m1);
2461 break;
2462 }
2463 case kRiscvI8x16Ne: {
2464 __ WasmRvvNe(i.OutputSimd128Register(), i.InputSimd128Register(0),
2465 i.InputSimd128Register(1), E8, m1);
2466 break;
2467 }
2468 case kRiscvI16x8Ne: {
2469 __ WasmRvvNe(i.OutputSimd128Register(), i.InputSimd128Register(0),
2470 i.InputSimd128Register(1), E16, m1);
2471 break;
2472 }
2473 case kRiscvI32x4Ne: {
2474 __ WasmRvvNe(i.OutputSimd128Register(), i.InputSimd128Register(0),
2475 i.InputSimd128Register(1), E32, m1);
2476 break;
2477 }
2478 case kRiscvI64x2Ne: {
2479 __ WasmRvvNe(i.OutputSimd128Register(), i.InputSimd128Register(0),
2480 i.InputSimd128Register(1), E64, m1);
2481 break;
2482 }
2483 case kRiscvI8x16GeS: {
2484 __ WasmRvvGeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2485 i.InputSimd128Register(1), E8, m1);
2486 break;
2487 }
2488 case kRiscvI16x8GeS: {
2489 __ WasmRvvGeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2490 i.InputSimd128Register(1), E16, m1);
2491 break;
2492 }
2493 case kRiscvI32x4GeS: {
2494 __ WasmRvvGeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2495 i.InputSimd128Register(1), E32, m1);
2496 break;
2497 }
2498 case kRiscvI64x2GeS: {
2499 __ WasmRvvGeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2500 i.InputSimd128Register(1), E64, m1);
2501 break;
2502 }
2503 case kRiscvI8x16GeU: {
2504 __ WasmRvvGeU(i.OutputSimd128Register(), i.InputSimd128Register(0),
2505 i.InputSimd128Register(1), E8, m1);
2506 break;
2507 }
2508 case kRiscvI16x8GeU: {
2509 __ WasmRvvGeU(i.OutputSimd128Register(), i.InputSimd128Register(0),
2510 i.InputSimd128Register(1), E16, m1);
2511 break;
2512 }
2513 case kRiscvI32x4GeU: {
2514 __ WasmRvvGeU(i.OutputSimd128Register(), i.InputSimd128Register(0),
2515 i.InputSimd128Register(1), E32, m1);
2516 break;
2517 }
2518 case kRiscvI8x16GtS: {
2519 __ WasmRvvGtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2520 i.InputSimd128Register(1), E8, m1);
2521 break;
2522 }
2523 case kRiscvI16x8GtS: {
2524 __ WasmRvvGtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2525 i.InputSimd128Register(1), E16, m1);
2526 break;
2527 }
2528 case kRiscvI32x4GtS: {
2529 __ WasmRvvGtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2530 i.InputSimd128Register(1), E32, m1);
2531 break;
2532 }
2533 case kRiscvI64x2GtS: {
2534 __ WasmRvvGtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2535 i.InputSimd128Register(1), E64, m1);
2536 break;
2537 }
2538 case kRiscvI8x16GtU: {
2539 __ WasmRvvGtU(i.OutputSimd128Register(), i.InputSimd128Register(0),
2540 i.InputSimd128Register(1), E8, m1);
2541 break;
2542 }
2543 case kRiscvI16x8GtU: {
2544 __ WasmRvvGtU(i.OutputSimd128Register(), i.InputSimd128Register(0),
2545 i.InputSimd128Register(1), E16, m1);
2546 break;
2547 }
2548 case kRiscvI32x4GtU: {
2549 __ WasmRvvGtU(i.OutputSimd128Register(), i.InputSimd128Register(0),
2550 i.InputSimd128Register(1), E32, m1);
2551 break;
2552 }
2553 case kRiscvI8x16Shl: {
2554 __ VU.set(kScratchReg, E8, m1);
2555 if (instr->InputAt(1)->IsRegister()) {
2556 __ andi(i.InputRegister(1), i.InputRegister(1), 8 - 1);
2557 __ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2558 i.InputRegister(1));
2559 } else {
2560 __ vsll_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2561 i.InputInt5(1) % 8);
2562 }
2563 break;
2564 }
2565 case kRiscvI16x8Shl: {
2566 __ VU.set(kScratchReg, E16, m1);
2567 if (instr->InputAt(1)->IsRegister()) {
2568 __ andi(i.InputRegister(1), i.InputRegister(1), 16 - 1);
2569 __ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2570 i.InputRegister(1));
2571 } else {
2572 __ vsll_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2573 i.InputInt5(1) % 16);
2574 }
2575 break;
2576 }
2577 case kRiscvI32x4Shl: {
2578 __ VU.set(kScratchReg, E32, m1);
2579 if (instr->InputAt(1)->IsRegister()) {
2580 __ andi(i.InputRegister(1), i.InputRegister(1), 32 - 1);
2581 __ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2582 i.InputRegister(1));
2583 } else {
2584 __ vsll_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2585 i.InputInt5(1) % 32);
2586 }
2587 break;
2588 }
2589 case kRiscvI64x2Shl: {
2590 __ VU.set(kScratchReg, E64, m1);
2591 if (instr->InputAt(1)->IsRegister()) {
2592 __ andi(i.InputRegister(1), i.InputRegister(1), 64 - 1);
2593 __ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2594 i.InputRegister(1));
2595 } else {
2596 if (is_int5(i.InputInt6(1) % 64)) {
2597 __ vsll_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
2598 i.InputInt6(1) % 64);
2599 } else {
2600 __ li(kScratchReg, i.InputInt6(1) % 64);
2601 __ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
2602 kScratchReg);
2603 }
2604 }
2605 break;
2606 }
2607 case kRiscvI8x16ReplaceLane: {
2608 Simd128Register src = i.InputSimd128Register(0);
2609 Simd128Register dst = i.OutputSimd128Register();
2610 __ VU.set(kScratchReg, E64, m1);
2611 __ li(kScratchReg, 0x1 << i.InputInt8(1));
2612 __ vmv_sx(v0, kScratchReg);
2613 __ VU.set(kScratchReg, E8, m1);
2614 __ vmerge_vx(dst, i.InputRegister(2), src);
2615 break;
2616 }
2617 case kRiscvI16x8ReplaceLane: {
2618 Simd128Register src = i.InputSimd128Register(0);
2619 Simd128Register dst = i.OutputSimd128Register();
2620 __ VU.set(kScratchReg, E16, m1);
2621 __ li(kScratchReg, 0x1 << i.InputInt8(1));
2622 __ vmv_sx(v0, kScratchReg);
2623 __ vmerge_vx(dst, i.InputRegister(2), src);
2624 break;
2625 }
2626 case kRiscvI64x2ReplaceLane: {
2627 Simd128Register src = i.InputSimd128Register(0);
2628 Simd128Register dst = i.OutputSimd128Register();
2629 __ VU.set(kScratchReg, E64, m1);
2630 __ li(kScratchReg, 0x1 << i.InputInt8(1));
2631 __ vmv_sx(v0, kScratchReg);
2632 __ vmerge_vx(dst, i.InputRegister(2), src);
2633 break;
2634 }
2635 case kRiscvI32x4ReplaceLane: {
2636 Simd128Register src = i.InputSimd128Register(0);
2637 Simd128Register dst = i.OutputSimd128Register();
2638 __ VU.set(kScratchReg, E32, m1);
2639 __ li(kScratchReg, 0x1 << i.InputInt8(1));
2640 __ vmv_sx(v0, kScratchReg);
2641 __ vmerge_vx(dst, i.InputRegister(2), src);
2642 break;
2643 }
2644 case kRiscvI8x16BitMask: {
2645 Register dst = i.OutputRegister();
2646 Simd128Register src = i.InputSimd128Register(0);
2647 __ VU.set(kScratchReg, E8, m1);
2648 __ vmv_vx(kSimd128RegZero, zero_reg);
2649 __ vmslt_vv(kSimd128ScratchReg, src, kSimd128RegZero);
2650 __ VU.set(kScratchReg, E32, m1);
2651 __ vmv_xs(dst, kSimd128ScratchReg);
2652 break;
2653 }
2654 case kRiscvI16x8BitMask: {
2655 Register dst = i.OutputRegister();
2656 Simd128Register src = i.InputSimd128Register(0);
2657 __ VU.set(kScratchReg, E16, m1);
2658 __ vmv_vx(kSimd128RegZero, zero_reg);
2659 __ vmslt_vv(kSimd128ScratchReg, src, kSimd128RegZero);
2660 __ VU.set(kScratchReg, E32, m1);
2661 __ vmv_xs(dst, kSimd128ScratchReg);
2662 break;
2663 }
2664 case kRiscvI32x4BitMask: {
2665 Register dst = i.OutputRegister();
2666 Simd128Register src = i.InputSimd128Register(0);
2667 __ VU.set(kScratchReg, E32, m1);
2668 __ vmv_vx(kSimd128RegZero, zero_reg);
2669 __ vmslt_vv(kSimd128ScratchReg, src, kSimd128RegZero);
2670 __ vmv_xs(dst, kSimd128ScratchReg);
2671 break;
2672 }
2673 case kRiscvI64x2BitMask: {
2674 Register dst = i.OutputRegister();
2675 Simd128Register src = i.InputSimd128Register(0);
2676 __ VU.set(kScratchReg, E64, m1);
2677 __ vmv_vx(kSimd128RegZero, zero_reg);
2678 __ vmslt_vv(kSimd128ScratchReg, src, kSimd128RegZero);
2679 __ VU.set(kScratchReg, E32, m1);
2680 __ vmv_xs(dst, kSimd128ScratchReg);
2681 break;
2682 }
2683 case kRiscvV128AnyTrue: {
2684 __ VU.set(kScratchReg, E8, m1);
2685 Register dst = i.OutputRegister();
2686 Label t;
2687 __ vmv_sx(kSimd128ScratchReg, zero_reg);
2688 __ vredmaxu_vs(kSimd128ScratchReg, i.InputSimd128Register(0),
2689 kSimd128ScratchReg);
2690 __ vmv_xs(dst, kSimd128ScratchReg);
2691 __ beq(dst, zero_reg, &t);
2692 __ li(dst, 1);
2693 __ bind(&t);
2694 break;
2695 }
2696 case kRiscvI64x2AllTrue: {
2697 __ VU.set(kScratchReg, E64, m1);
2698 Register dst = i.OutputRegister();
2699 Label all_true;
2700 __ li(kScratchReg, -1);
2701 __ vmv_sx(kSimd128ScratchReg, kScratchReg);
2702 __ vredminu_vs(kSimd128ScratchReg, i.InputSimd128Register(0),
2703 kSimd128ScratchReg);
2704 __ vmv_xs(dst, kSimd128ScratchReg);
2705 __ beqz(dst, &all_true);
2706 __ li(dst, 1);
2707 __ bind(&all_true);
2708 break;
2709 }
2710 case kRiscvI32x4AllTrue: {
2711 __ VU.set(kScratchReg, E32, m1);
2712 Register dst = i.OutputRegister();
2713 Label all_true;
2714 __ li(kScratchReg, -1);
2715 __ vmv_sx(kSimd128ScratchReg, kScratchReg);
2716 __ vredminu_vs(kSimd128ScratchReg, i.InputSimd128Register(0),
2717 kSimd128ScratchReg);
2718 __ vmv_xs(dst, kSimd128ScratchReg);
2719 __ beqz(dst, &all_true);
2720 __ li(dst, 1);
2721 __ bind(&all_true);
2722 break;
2723 }
2724 case kRiscvI16x8AllTrue: {
2725 __ VU.set(kScratchReg, E16, m1);
2726 Register dst = i.OutputRegister();
2727 Label all_true;
2728 __ li(kScratchReg, -1);
2729 __ vmv_sx(kSimd128ScratchReg, kScratchReg);
2730 __ vredminu_vs(kSimd128ScratchReg, i.InputSimd128Register(0),
2731 kSimd128ScratchReg);
2732 __ vmv_xs(dst, kSimd128ScratchReg);
2733 __ beqz(dst, &all_true);
2734 __ li(dst, 1);
2735 __ bind(&all_true);
2736 break;
2737 }
2738 case kRiscvI8x16AllTrue: {
2739 __ VU.set(kScratchReg, E8, m1);
2740 Register dst = i.OutputRegister();
2741 Label all_true;
2742 __ li(kScratchReg, -1);
2743 __ vmv_sx(kSimd128ScratchReg, kScratchReg);
2744 __ vredminu_vs(kSimd128ScratchReg, i.InputSimd128Register(0),
2745 kSimd128ScratchReg);
2746 __ vmv_xs(dst, kSimd128ScratchReg);
2747 __ beqz(dst, &all_true);
2748 __ li(dst, 1);
2749 __ bind(&all_true);
2750 break;
2751 }
2752 case kRiscvI8x16Shuffle: {
2753 VRegister dst = i.OutputSimd128Register(),
2754 src0 = i.InputSimd128Register(0),
2755 src1 = i.InputSimd128Register(1);
2756
2757 int64_t imm1 = make_uint64(i.InputInt32(3), i.InputInt32(2));
2758 int64_t imm2 = make_uint64(i.InputInt32(5), i.InputInt32(4));
2759 __ VU.set(kScratchReg, VSew::E64, Vlmul::m1);
2760 __ li(kScratchReg, imm2);
2761 __ vmv_sx(kSimd128ScratchReg2, kScratchReg);
2762 __ vslideup_vi(kSimd128ScratchReg, kSimd128ScratchReg2, 1);
2763 __ li(kScratchReg, imm1);
2764 __ vmv_sx(kSimd128ScratchReg, kScratchReg);
2765
2766 __ VU.set(kScratchReg, E8, m1);
2767 if (dst == src0) {
2768 __ vmv_vv(kSimd128ScratchReg2, src0);
2769 src0 = kSimd128ScratchReg2;
2770 } else if (dst == src1) {
2771 __ vmv_vv(kSimd128ScratchReg2, src1);
2772 src1 = kSimd128ScratchReg2;
2773 }
2774 __ vrgather_vv(dst, src0, kSimd128ScratchReg);
2775 __ vadd_vi(kSimd128ScratchReg, kSimd128ScratchReg, -16);
2776 __ vrgather_vv(kSimd128ScratchReg3, src1, kSimd128ScratchReg);
2777 __ vor_vv(dst, dst, kSimd128ScratchReg3);
2778 break;
2779 }
2780 case kRiscvI8x16Popcnt: {
2781 VRegister dst = i.OutputSimd128Register(),
2782 src = i.InputSimd128Register(0);
2783 Label t;
2784
2785 __ VU.set(kScratchReg, E8, m1);
2786 __ vmv_vv(kSimd128ScratchReg, src);
2787 __ vmv_vv(dst, kSimd128RegZero);
2788
2789 __ bind(&t);
2790 __ vmsne_vv(v0, kSimd128ScratchReg, kSimd128RegZero);
2791 __ vadd_vi(dst, dst, 1, Mask);
2792 __ vadd_vi(kSimd128ScratchReg2, kSimd128ScratchReg, -1, Mask);
2793 __ vand_vv(kSimd128ScratchReg, kSimd128ScratchReg, kSimd128ScratchReg2);
2794 // kScratchReg = -1 if kSimd128ScratchReg == 0 i.e. no active element
2795 __ vfirst_m(kScratchReg, kSimd128ScratchReg);
2796 __ bgez(kScratchReg, &t);
2797 break;
2798 }
2799 case kRiscvF64x2NearestInt: {
2800 __ Round_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
2801 kScratchReg, kSimd128ScratchReg);
2802 break;
2803 }
2804 case kRiscvF64x2Trunc: {
2805 __ Trunc_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
2806 kScratchReg, kSimd128ScratchReg);
2807 break;
2808 }
2809 case kRiscvF64x2Sqrt: {
2810 __ VU.set(kScratchReg, E64, m1);
2811 __ vfsqrt_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
2812 break;
2813 }
2814 case kRiscvF64x2Splat: {
2815 (__ VU).set(kScratchReg, E64, m1);
2816 __ fmv_x_d(kScratchReg, i.InputDoubleRegister(0));
2817 __ vmv_vx(i.OutputSimd128Register(), kScratchReg);
2818 break;
2819 }
2820 case kRiscvF64x2Abs: {
2821 __ VU.set(kScratchReg, VSew::E64, Vlmul::m1);
2822 __ vfabs_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
2823 break;
2824 }
2825 case kRiscvF64x2Neg: {
2826 __ VU.set(kScratchReg, VSew::E64, Vlmul::m1);
2827 __ vfneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
2828 break;
2829 }
2830 case kRiscvF64x2Add: {
2831 __ VU.set(kScratchReg, E64, m1);
2832 __ vfadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2833 i.InputSimd128Register(1));
2834 break;
2835 }
2836 case kRiscvF64x2Sub: {
2837 __ VU.set(kScratchReg, E64, m1);
2838 __ vfsub_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2839 i.InputSimd128Register(1));
2840 break;
2841 }
2842 case kRiscvF64x2Ceil: {
2843 __ Ceil_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
2844 kScratchReg, kSimd128ScratchReg);
2845 break;
2846 }
2847 case kRiscvF64x2Floor: {
2848 __ Floor_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
2849 kScratchReg, kSimd128ScratchReg);
2850 break;
2851 }
2852 case kRiscvF64x2Ne: {
2853 __ VU.set(kScratchReg, E64, m1);
2854 __ vmfne_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
2855 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
2856 __ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
2857 break;
2858 }
2859 case kRiscvF64x2Eq: {
2860 __ VU.set(kScratchReg, E64, m1);
2861 __ vmfeq_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
2862 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
2863 __ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
2864 break;
2865 }
2866 case kRiscvF64x2ReplaceLane: {
2867 __ VU.set(kScratchReg, E64, m1);
2868 __ li(kScratchReg, 0x1 << i.InputInt8(1));
2869 __ vmv_sx(v0, kScratchReg);
2870 __ fmv_x_d(kScratchReg, i.InputSingleRegister(2));
2871 __ vmerge_vx(i.OutputSimd128Register(), kScratchReg,
2872 i.InputSimd128Register(0));
2873 break;
2874 }
2875 case kRiscvF64x2Lt: {
2876 __ VU.set(kScratchReg, E64, m1);
2877 __ vmflt_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
2878 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
2879 __ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
2880 break;
2881 }
2882 case kRiscvF64x2Le: {
2883 __ VU.set(kScratchReg, E64, m1);
2884 __ vmfle_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
2885 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
2886 __ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
2887 break;
2888 }
2889 case kRiscvF64x2Pmax: {
2890 __ VU.set(kScratchReg, E64, m1);
2891 __ vmflt_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
2892 __ vmerge_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
2893 i.InputSimd128Register(0));
2894 break;
2895 }
2896 case kRiscvF64x2Pmin: {
2897 __ VU.set(kScratchReg, E64, m1);
2898 __ vmflt_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
2899 __ vmerge_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
2900 i.InputSimd128Register(0));
2901 break;
2902 }
2903 case kRiscvF64x2Min: {
2904 __ VU.set(kScratchReg, E64, m1);
2905 const int64_t kNaN = 0x7ff8000000000000L;
2906 __ vmfeq_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(0));
2907 __ vmfeq_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
2908 i.InputSimd128Register(1));
2909 __ vand_vv(v0, v0, kSimd128ScratchReg);
2910 __ li(kScratchReg, kNaN);
2911 __ vmv_vx(kSimd128ScratchReg, kScratchReg);
2912 __ vfmin_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
2913 i.InputSimd128Register(0), Mask);
2914 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
2915 break;
2916 }
2917 case kRiscvF64x2Max: {
2918 __ VU.set(kScratchReg, E64, m1);
2919 const int64_t kNaN = 0x7ff8000000000000L;
2920 __ vmfeq_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(0));
2921 __ vmfeq_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
2922 i.InputSimd128Register(1));
2923 __ vand_vv(v0, v0, kSimd128ScratchReg);
2924 __ li(kScratchReg, kNaN);
2925 __ vmv_vx(kSimd128ScratchReg, kScratchReg);
2926 __ vfmax_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
2927 i.InputSimd128Register(0), Mask);
2928 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
2929 break;
2930 }
2931 case kRiscvF64x2Div: {
2932 __ VU.set(kScratchReg, E64, m1);
2933 __ vfdiv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2934 i.InputSimd128Register(1));
2935 break;
2936 }
2937 case kRiscvF64x2Mul: {
2938 __ VU.set(kScratchReg, E64, m1);
2939 __ VU.set(RoundingMode::RTZ);
2940 __ vfmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
2941 i.InputSimd128Register(1));
2942 break;
2943 }
2944 case kRiscvF64x2ExtractLane: {
2945 __ VU.set(kScratchReg, E64, m1);
2946 if (is_uint5(i.InputInt8(1))) {
2947 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
2948 i.InputInt8(1));
2949 } else {
2950 __ li(kScratchReg, i.InputInt8(1));
2951 __ vslidedown_vx(kSimd128ScratchReg, i.InputSimd128Register(0),
2952 kScratchReg);
2953 }
2954 __ vfmv_fs(i.OutputDoubleRegister(), kSimd128ScratchReg);
2955 break;
2956 }
2957 case kRiscvF64x2PromoteLowF32x4: {
2958 __ VU.set(kScratchReg, E32, mf2);
2959 if (i.OutputSimd128Register() != i.InputSimd128Register(0)) {
2960 __ vfwcvt_f_f_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
2961 } else {
2962 __ vfwcvt_f_f_v(kSimd128ScratchReg3, i.InputSimd128Register(0));
2963 __ VU.set(kScratchReg, E64, m1);
2964 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg3);
2965 }
2966 break;
2967 }
2968 case kRiscvF64x2ConvertLowI32x4S: {
2969 __ VU.set(kScratchReg, E32, mf2);
2970 if (i.OutputSimd128Register() != i.InputSimd128Register(0)) {
2971 __ vfwcvt_f_x_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
2972 } else {
2973 __ vfwcvt_f_x_v(kSimd128ScratchReg3, i.InputSimd128Register(0));
2974 __ VU.set(kScratchReg, E64, m1);
2975 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg3);
2976 }
2977 break;
2978 }
2979 case kRiscvF64x2ConvertLowI32x4U: {
2980 __ VU.set(kScratchReg, E32, mf2);
2981 if (i.OutputSimd128Register() != i.InputSimd128Register(0)) {
2982 __ vfwcvt_f_xu_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
2983 } else {
2984 __ vfwcvt_f_xu_v(kSimd128ScratchReg3, i.InputSimd128Register(0));
2985 __ VU.set(kScratchReg, E64, m1);
2986 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg3);
2987 }
2988 break;
2989 }
2990 case kRiscvF64x2Qfma: {
2991 __ VU.set(kScratchReg, E64, m1);
2992 __ vfmadd_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
2993 i.InputSimd128Register(0));
2994 __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
2995 break;
2996 }
2997 case kRiscvF64x2Qfms: {
2998 __ VU.set(kScratchReg, E64, m1);
2999 __ vfnmsub_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
3000 i.InputSimd128Register(0));
3001 __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
3002 break;
3003 }
3004 case kRiscvF32x4ExtractLane: {
3005 __ VU.set(kScratchReg, E32, m1);
3006 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
3007 i.InputInt8(1));
3008 __ vfmv_fs(i.OutputDoubleRegister(), kSimd128ScratchReg);
3009 break;
3010 }
3011 case kRiscvF32x4Trunc: {
3012 __ Trunc_f(i.OutputSimd128Register(), i.InputSimd128Register(0),
3013 kScratchReg, kSimd128ScratchReg);
3014 break;
3015 }
3016 case kRiscvF32x4NearestInt: {
3017 __ Round_f(i.OutputSimd128Register(), i.InputSimd128Register(0),
3018 kScratchReg, kSimd128ScratchReg);
3019 break;
3020 }
3021 case kRiscvF32x4DemoteF64x2Zero: {
3022 __ VU.set(kScratchReg, E32, mf2);
3023 __ vfncvt_f_f_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
3024 __ VU.set(kScratchReg, E32, m1);
3025 __ vmv_vi(v0, 12);
3026 __ vmerge_vx(i.OutputSimd128Register(), zero_reg,
3027 i.OutputSimd128Register());
3028 break;
3029 }
3030 case kRiscvF32x4Neg: {
3031 __ VU.set(kScratchReg, VSew::E32, Vlmul::m1);
3032 __ vfneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
3033 break;
3034 }
3035 case kRiscvF32x4Abs: {
3036 __ VU.set(kScratchReg, VSew::E32, Vlmul::m1);
3037 __ vfabs_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
3038 break;
3039 }
3040 case kRiscvF32x4Splat: {
3041 (__ VU).set(kScratchReg, E32, m1);
3042 __ fmv_x_w(kScratchReg, i.InputSingleRegister(0));
3043 __ vmv_vx(i.OutputSimd128Register(), kScratchReg);
3044 break;
3045 }
3046 case kRiscvF32x4Add: {
3047 __ VU.set(kScratchReg, E32, m1);
3048 __ vfadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3049 i.InputSimd128Register(1));
3050 break;
3051 }
3052 case kRiscvF32x4Sub: {
3053 __ VU.set(kScratchReg, E32, m1);
3054 __ vfsub_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3055 i.InputSimd128Register(1));
3056 break;
3057 }
3058 case kRiscvF32x4Ceil: {
3059 __ Ceil_f(i.OutputSimd128Register(), i.InputSimd128Register(0),
3060 kScratchReg, kSimd128ScratchReg);
3061 break;
3062 }
3063 case kRiscvF32x4Floor: {
3064 __ Floor_f(i.OutputSimd128Register(), i.InputSimd128Register(0),
3065 kScratchReg, kSimd128ScratchReg);
3066 break;
3067 }
3068 case kRiscvF32x4UConvertI32x4: {
3069 __ VU.set(kScratchReg, E32, m1);
3070 __ VU.set(RoundingMode::RTZ);
3071 __ vfcvt_f_xu_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
3072 break;
3073 }
3074 case kRiscvF32x4SConvertI32x4: {
3075 __ VU.set(kScratchReg, E32, m1);
3076 __ VU.set(RoundingMode::RTZ);
3077 __ vfcvt_f_x_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
3078 break;
3079 }
3080 case kRiscvF32x4Div: {
3081 __ VU.set(kScratchReg, E32, m1);
3082 __ VU.set(RoundingMode::RTZ);
3083 __ vfdiv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3084 i.InputSimd128Register(1));
3085 break;
3086 }
3087 case kRiscvF32x4Mul: {
3088 __ VU.set(kScratchReg, E32, m1);
3089 __ VU.set(RoundingMode::RTZ);
3090 __ vfmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
3091 i.InputSimd128Register(0));
3092 break;
3093 }
3094 case kRiscvF32x4Eq: {
3095 __ VU.set(kScratchReg, E32, m1);
3096 __ vmfeq_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
3097 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
3098 __ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
3099 break;
3100 }
3101 case kRiscvF32x4Ne: {
3102 __ VU.set(kScratchReg, E32, m1);
3103 __ vmfne_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
3104 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
3105 __ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
3106 break;
3107 }
3108 case kRiscvF32x4ReplaceLane: {
3109 __ VU.set(kScratchReg, E32, m1);
3110 __ li(kScratchReg, 0x1 << i.InputInt8(1));
3111 __ vmv_sx(v0, kScratchReg);
3112 __ fmv_x_w(kScratchReg, i.InputSingleRegister(2));
3113 __ vmerge_vx(i.OutputSimd128Register(), kScratchReg,
3114 i.InputSimd128Register(0));
3115 break;
3116 }
3117 case kRiscvF32x4Lt: {
3118 __ VU.set(kScratchReg, E32, m1);
3119 __ vmflt_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
3120 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
3121 __ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
3122 break;
3123 }
3124 case kRiscvF32x4Le: {
3125 __ VU.set(kScratchReg, E32, m1);
3126 __ vmfle_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
3127 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
3128 __ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
3129 break;
3130 }
3131 case kRiscvF32x4Pmax: {
3132 __ VU.set(kScratchReg, E32, m1);
3133 __ vmflt_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
3134 __ vmerge_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
3135 i.InputSimd128Register(0));
3136 break;
3137 }
3138 case kRiscvF32x4Pmin: {
3139 __ VU.set(kScratchReg, E32, m1);
3140 __ vmflt_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
3141 __ vmerge_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
3142 i.InputSimd128Register(0));
3143 break;
3144 }
3145 case kRiscvF32x4Sqrt: {
3146 __ VU.set(kScratchReg, E32, m1);
3147 __ vfsqrt_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
3148 break;
3149 }
3150 case kRiscvF32x4Max: {
3151 __ VU.set(kScratchReg, E32, m1);
3152 const int32_t kNaN = 0x7FC00000;
3153 __ vmfeq_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(0));
3154 __ vmfeq_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
3155 i.InputSimd128Register(1));
3156 __ vand_vv(v0, v0, kSimd128ScratchReg);
3157 __ li(kScratchReg, kNaN);
3158 __ vmv_vx(kSimd128ScratchReg, kScratchReg);
3159 __ vfmax_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
3160 i.InputSimd128Register(0), Mask);
3161 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
3162 break;
3163 }
3164 case kRiscvF32x4Min: {
3165 __ VU.set(kScratchReg, E32, m1);
3166 const int32_t kNaN = 0x7FC00000;
3167 __ vmfeq_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(0));
3168 __ vmfeq_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
3169 i.InputSimd128Register(1));
3170 __ vand_vv(v0, v0, kSimd128ScratchReg);
3171 __ li(kScratchReg, kNaN);
3172 __ vmv_vx(kSimd128ScratchReg, kScratchReg);
3173 __ vfmin_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
3174 i.InputSimd128Register(0), Mask);
3175 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
3176 break;
3177 }
3178 case kRiscvF32x4RecipApprox: {
3179 __ VU.set(kScratchReg, E32, m1);
3180 __ vfrec7_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
3181 break;
3182 }
3183 case kRiscvF32x4RecipSqrtApprox: {
3184 __ VU.set(kScratchReg, E32, m1);
3185 __ vfrsqrt7_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
3186 break;
3187 }
3188 case kRiscvF32x4Qfma: {
3189 __ VU.set(kScratchReg, E32, m1);
3190 __ vfmadd_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
3191 i.InputSimd128Register(0));
3192 __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
3193 break;
3194 }
3195 case kRiscvF32x4Qfms: {
3196 __ VU.set(kScratchReg, E32, m1);
3197 __ vfnmsub_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
3198 i.InputSimd128Register(0));
3199 __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
3200 break;
3201 }
3202 case kRiscvI64x2SConvertI32x4Low: {
3203 __ VU.set(kScratchReg, E64, m1);
3204 __ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0));
3205 __ vsext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3206
3207 break;
3208 }
3209 case kRiscvI64x2SConvertI32x4High: {
3210 __ VU.set(kScratchReg, E32, m1);
3211 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0), 2);
3212 __ VU.set(kScratchReg, E64, m1);
3213 __ vsext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3214 break;
3215 }
3216 case kRiscvI64x2UConvertI32x4Low: {
3217 __ VU.set(kScratchReg, E64, m1);
3218 __ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0));
3219 __ vzext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3220 break;
3221 }
3222 case kRiscvI64x2UConvertI32x4High: {
3223 __ VU.set(kScratchReg, E32, m1);
3224 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0), 2);
3225 __ VU.set(kScratchReg, E64, m1);
3226 __ vzext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3227 break;
3228 }
3229 case kRiscvI32x4SConvertI16x8Low: {
3230 __ VU.set(kScratchReg, E32, m1);
3231 __ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0));
3232 __ vsext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3233 break;
3234 }
3235 case kRiscvI32x4SConvertI16x8High: {
3236 __ VU.set(kScratchReg, E16, m1);
3237 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0), 4);
3238 __ VU.set(kScratchReg, E32, m1);
3239 __ vsext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3240 break;
3241 }
3242 case kRiscvI32x4SConvertF32x4: {
3243 __ VU.set(kScratchReg, E32, m1);
3244 __ VU.set(RoundingMode::RTZ);
3245 __ vmfeq_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(0));
3246 if (i.OutputSimd128Register() != i.InputSimd128Register(0)) {
3247 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
3248 __ vfcvt_x_f_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
3249 Mask);
3250 } else {
3251 __ vmv_vx(kSimd128ScratchReg, zero_reg);
3252 __ vfcvt_x_f_v(kSimd128ScratchReg, i.InputSimd128Register(0), Mask);
3253 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
3254 }
3255 break;
3256 }
3257 case kRiscvI32x4UConvertF32x4: {
3258 __ VU.set(kScratchReg, E32, m1);
3259 __ VU.set(RoundingMode::RTZ);
3260 __ vmfeq_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(0));
3261 if (i.OutputSimd128Register() != i.InputSimd128Register(0)) {
3262 __ vmv_vx(i.OutputSimd128Register(), zero_reg);
3263 __ vfcvt_xu_f_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
3264 Mask);
3265 } else {
3266 __ vmv_vx(kSimd128ScratchReg, zero_reg);
3267 __ vfcvt_xu_f_v(kSimd128ScratchReg, i.InputSimd128Register(0), Mask);
3268 __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
3269 }
3270 break;
3271 }
3272 case kRiscvI32x4UConvertI16x8Low: {
3273 __ VU.set(kScratchReg, E32, m1);
3274 __ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0));
3275 __ vzext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3276 break;
3277 }
3278 case kRiscvI32x4UConvertI16x8High: {
3279 __ VU.set(kScratchReg, E16, m1);
3280 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0), 4);
3281 __ VU.set(kScratchReg, E32, m1);
3282 __ vzext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3283 break;
3284 }
3285 case kRiscvI16x8SConvertI8x16Low: {
3286 __ VU.set(kScratchReg, E16, m1);
3287 __ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0));
3288 __ vsext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3289 break;
3290 }
3291 case kRiscvI16x8SConvertI8x16High: {
3292 __ VU.set(kScratchReg, E8, m1);
3293 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0), 8);
3294 __ VU.set(kScratchReg, E16, m1);
3295 __ vsext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3296 break;
3297 }
3298 case kRiscvI16x8UConvertI8x16Low: {
3299 __ VU.set(kScratchReg, E16, m1);
3300 __ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0));
3301 __ vzext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3302 break;
3303 }
3304 case kRiscvI16x8UConvertI8x16High: {
3305 __ VU.set(kScratchReg, E8, m1);
3306 __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0), 8);
3307 __ VU.set(kScratchReg, E16, m1);
3308 __ vzext_vf2(i.OutputSimd128Register(), kSimd128ScratchReg);
3309 break;
3310 }
3311 case kRiscvI8x16SConvertI16x8: {
3312 __ VU.set(kScratchReg, E16, m1);
3313 __ vmv_vv(v26, i.InputSimd128Register(0));
3314 __ vmv_vv(v27, i.InputSimd128Register(1));
3315 __ VU.set(kScratchReg, E8, m1);
3316 __ VU.set(RoundingMode::RNE);
3317 __ vnclip_vi(i.OutputSimd128Register(), v26, 0);
3318 break;
3319 }
3320 case kRiscvI8x16UConvertI16x8: {
3321 __ VU.set(kScratchReg, E16, m1);
3322 __ vmv_vv(v26, i.InputSimd128Register(0));
3323 __ vmv_vv(v27, i.InputSimd128Register(1));
3324 __ VU.set(kScratchReg, E16, m2);
3325 __ vmax_vx(v26, v26, zero_reg);
3326 __ VU.set(kScratchReg, E8, m1);
3327 __ VU.set(RoundingMode::RNE);
3328 __ vnclipu_vi(i.OutputSimd128Register(), v26, 0);
3329 break;
3330 }
3331 case kRiscvI16x8SConvertI32x4: {
3332 __ VU.set(kScratchReg, E32, m1);
3333 __ vmv_vv(v26, i.InputSimd128Register(0));
3334 __ vmv_vv(v27, i.InputSimd128Register(1));
3335 __ VU.set(kScratchReg, E16, m1);
3336 __ VU.set(RoundingMode::RNE);
3337 __ vnclip_vi(i.OutputSimd128Register(), v26, 0);
3338 break;
3339 }
3340 case kRiscvI16x8UConvertI32x4: {
3341 __ VU.set(kScratchReg, E32, m1);
3342 __ vmv_vv(v26, i.InputSimd128Register(0));
3343 __ vmv_vv(v27, i.InputSimd128Register(1));
3344 __ VU.set(kScratchReg, E32, m2);
3345 __ vmax_vx(v26, v26, zero_reg);
3346 __ VU.set(kScratchReg, E16, m1);
3347 __ VU.set(RoundingMode::RNE);
3348 __ vnclipu_vi(i.OutputSimd128Register(), v26, 0);
3349 break;
3350 }
3351 ASSEMBLE_RVV_UNOP_INTEGER_VV(Neg, vneg_vv)
3352 ASSEMBLE_RVV_BINOP_INTEGER(MaxU, vmaxu_vv)
3353 ASSEMBLE_RVV_BINOP_INTEGER(MaxS, vmax_vv)
3354 ASSEMBLE_RVV_BINOP_INTEGER(MinU, vminu_vv)
3355 ASSEMBLE_RVV_BINOP_INTEGER(MinS, vmin_vv)
3356 ASSEMBLE_RVV_UNOP_INTEGER_VR(Splat, vmv_vx)
3357 ASSEMBLE_RVV_BINOP_INTEGER(Add, vadd_vv)
3358 ASSEMBLE_RVV_BINOP_INTEGER(Sub, vsub_vv)
3359 case kRiscvVwadd: {
3360 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
3361 __ vwadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3362 i.InputSimd128Register(1));
3363 break;
3364 }
3365 case kRiscvVwaddu: {
3366 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
3367 __ vwaddu_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3368 i.InputSimd128Register(1));
3369 break;
3370 }
3371 case kRiscvVwmul: {
3372 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
3373 __ vwmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3374 i.InputSimd128Register(1));
3375 break;
3376 }
3377 case kRiscvVwmulu: {
3378 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
3379 __ vwmulu_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3380 i.InputSimd128Register(1));
3381 break;
3382 }
3383 case kRiscvVmvSx: {
3384 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
3385 if (instr->InputAt(0)->IsRegister()) {
3386 __ vmv_sx(i.OutputSimd128Register(), i.InputRegister(0));
3387 } else {
3388 DCHECK(instr->InputAt(0)->IsImmediate());
3389 __ li(kScratchReg, i.InputInt64(0));
3390 __ vmv_sx(i.OutputSimd128Register(), kScratchReg);
3391 }
3392 break;
3393 }
3394 case kRiscvVcompress: {
3395 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
3396 if (instr->InputAt(1)->IsSimd128Register()) {
3397 __ vcompress_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3398 i.InputSimd128Register(1));
3399 } else {
3400 DCHECK(instr->InputAt(1)->IsImmediate());
3401 __ li(kScratchReg, i.InputInt64(1));
3402 __ vmv_sx(v0, kScratchReg);
3403 __ vcompress_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3404 v0);
3405 }
3406 break;
3407 }
3408 case kRiscvVaddVv: {
3409 __ VU.set(kScratchReg, i.InputInt8(2), i.InputInt8(3));
3410 __ vadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
3411 i.InputSimd128Register(1));
3412 break;
3413 }
3414 default:
3415 #ifdef DEBUG
3416 switch (arch_opcode) {
3417 #define Print(name) \
3418 case k##name: \
3419 printf("k%s", #name); \
3420 break;
3421 TARGET_ARCH_OPCODE_LIST(Print);
3422 #undef Print
3423 default:
3424 break;
3425 }
3426 #endif
3427 UNIMPLEMENTED();
3428 }
3429 return kSuccess;
3430 }
3431
3432 #define UNSUPPORTED_COND(opcode, condition) \
3433 StdoutStream{} << "Unsupported " << #opcode << " condition: \"" << condition \
3434 << "\""; \
3435 UNIMPLEMENTED();
3436
IsInludeEqual(Condition cc)3437 bool IsInludeEqual(Condition cc) {
3438 switch (cc) {
3439 case equal:
3440 case greater_equal:
3441 case less_equal:
3442 case Uless_equal:
3443 case Ugreater_equal:
3444 return true;
3445 default:
3446 return false;
3447 }
3448 }
3449
AssembleBranchToLabels(CodeGenerator * gen,TurboAssembler * tasm,Instruction * instr,FlagsCondition condition,Label * tlabel,Label * flabel,bool fallthru)3450 void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm,
3451 Instruction* instr, FlagsCondition condition,
3452 Label* tlabel, Label* flabel, bool fallthru) {
3453 #undef __
3454 #define __ tasm->
3455 RiscvOperandConverter i(gen, instr);
3456
3457 Condition cc = kNoCondition;
3458 // RISC-V does not have condition code flags, so compare and branch are
3459 // implemented differently than on the other arch's. The compare operations
3460 // emit riscv64 pseudo-instructions, which are handled here by branch
3461 // instructions that do the actual comparison. Essential that the input
3462 // registers to compare pseudo-op are not modified before this branch op, as
3463 // they are tested here.
3464
3465 if (instr->arch_opcode() == kRiscvTst) {
3466 cc = FlagsConditionToConditionTst(condition);
3467 __ Branch(tlabel, cc, kScratchReg, Operand(zero_reg));
3468 } else if (instr->arch_opcode() == kRiscvAdd64 ||
3469 instr->arch_opcode() == kRiscvSub64) {
3470 cc = FlagsConditionToConditionOvf(condition);
3471 __ Sra64(kScratchReg, i.OutputRegister(), 32);
3472 __ Sra64(kScratchReg2, i.OutputRegister(), 31);
3473 __ Branch(tlabel, cc, kScratchReg2, Operand(kScratchReg));
3474 } else if (instr->arch_opcode() == kRiscvAddOvf64 ||
3475 instr->arch_opcode() == kRiscvSubOvf64) {
3476 switch (condition) {
3477 // Overflow occurs if overflow register is negative
3478 case kOverflow:
3479 __ Branch(tlabel, lt, kScratchReg, Operand(zero_reg));
3480 break;
3481 case kNotOverflow:
3482 __ Branch(tlabel, ge, kScratchReg, Operand(zero_reg));
3483 break;
3484 default:
3485 UNSUPPORTED_COND(instr->arch_opcode(), condition);
3486 }
3487 } else if (instr->arch_opcode() == kRiscvMulOvf32) {
3488 // Overflow occurs if overflow register is not zero
3489 switch (condition) {
3490 case kOverflow:
3491 __ Branch(tlabel, ne, kScratchReg, Operand(zero_reg));
3492 break;
3493 case kNotOverflow:
3494 __ Branch(tlabel, eq, kScratchReg, Operand(zero_reg));
3495 break;
3496 default:
3497 UNSUPPORTED_COND(kRiscvMulOvf32, condition);
3498 }
3499 } else if (instr->arch_opcode() == kRiscvCmp) {
3500 cc = FlagsConditionToConditionCmp(condition);
3501 __ Branch(tlabel, cc, i.InputRegister(0), i.InputOperand(1));
3502 } else if (instr->arch_opcode() == kRiscvCmpZero) {
3503 cc = FlagsConditionToConditionCmp(condition);
3504 if (i.InputOrZeroRegister(0) == zero_reg && IsInludeEqual(cc)) {
3505 __ Branch(tlabel);
3506 } else if (i.InputOrZeroRegister(0) != zero_reg) {
3507 __ Branch(tlabel, cc, i.InputRegister(0), Operand(zero_reg));
3508 }
3509 } else if (instr->arch_opcode() == kArchStackPointerGreaterThan) {
3510 cc = FlagsConditionToConditionCmp(condition);
3511 Register lhs_register = sp;
3512 uint32_t offset;
3513 if (gen->ShouldApplyOffsetToStackCheck(instr, &offset)) {
3514 lhs_register = i.TempRegister(0);
3515 __ Sub64(lhs_register, sp, offset);
3516 }
3517 __ Branch(tlabel, cc, lhs_register, Operand(i.InputRegister(0)));
3518 } else if (instr->arch_opcode() == kRiscvCmpS ||
3519 instr->arch_opcode() == kRiscvCmpD) {
3520 bool predicate;
3521 FlagsConditionToConditionCmpFPU(&predicate, condition);
3522 // floating-point compare result is set in kScratchReg
3523 if (predicate) {
3524 __ BranchTrueF(kScratchReg, tlabel);
3525 } else {
3526 __ BranchFalseF(kScratchReg, tlabel);
3527 }
3528 } else {
3529 PrintF("AssembleArchBranch Unimplemented arch_opcode: %d\n",
3530 instr->arch_opcode());
3531 UNIMPLEMENTED();
3532 }
3533 if (!fallthru) __ Branch(flabel); // no fallthru to flabel.
3534 #undef __
3535 #define __ tasm()->
3536 }
3537
3538 // Assembles branches after an instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)3539 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3540 Label* tlabel = branch->true_label;
3541 Label* flabel = branch->false_label;
3542
3543 AssembleBranchToLabels(this, tasm(), instr, branch->condition, tlabel, flabel,
3544 branch->fallthru);
3545 }
3546
3547 #undef UNSUPPORTED_COND
3548
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)3549 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3550 BranchInfo* branch) {
3551 AssembleArchBranch(instr, branch);
3552 }
3553
AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)3554 void CodeGenerator::AssembleArchJumpRegardlessOfAssemblyOrder(
3555 RpoNumber target) {
3556 __ Branch(GetLabel(target));
3557 }
3558
AssembleArchTrap(Instruction * instr,FlagsCondition condition)3559 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3560 FlagsCondition condition) {
3561 class OutOfLineTrap final : public OutOfLineCode {
3562 public:
3563 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3564 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3565 void Generate() final {
3566 RiscvOperandConverter i(gen_, instr_);
3567 TrapId trap_id =
3568 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3569 GenerateCallToTrap(trap_id);
3570 }
3571
3572 private:
3573 void GenerateCallToTrap(TrapId trap_id) {
3574 if (trap_id == TrapId::kInvalid) {
3575 // We cannot test calls to the runtime in cctest/test-run-wasm.
3576 // Therefore we emit a call to C here instead of a call to the runtime.
3577 // We use the context register as the scratch register, because we do
3578 // not have a context here.
3579 __ PrepareCallCFunction(0, 0, cp);
3580 __ CallCFunction(
3581 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3582 __ LeaveFrame(StackFrame::WASM);
3583 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3584 int pop_count = static_cast<int>(call_descriptor->ParameterSlotCount());
3585 pop_count += (pop_count & 1); // align
3586 __ Drop(pop_count);
3587 __ Ret();
3588 } else {
3589 gen_->AssembleSourcePosition(instr_);
3590 // A direct call to a wasm runtime stub defined in this module.
3591 // Just encode the stub index. This will be patched when the code
3592 // is added to the native module and copied into wasm code space.
3593 __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3594 ReferenceMap* reference_map =
3595 gen_->zone()->New<ReferenceMap>(gen_->zone());
3596 gen_->RecordSafepoint(reference_map);
3597 if (FLAG_debug_code) {
3598 __ stop();
3599 }
3600 }
3601 }
3602 Instruction* instr_;
3603 CodeGenerator* gen_;
3604 };
3605 auto ool = zone()->New<OutOfLineTrap>(this, instr);
3606 Label* tlabel = ool->entry();
3607 AssembleBranchToLabels(this, tasm(), instr, condition, tlabel, nullptr, true);
3608 }
3609
3610 // Assembles boolean materializations after an instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)3611 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3612 FlagsCondition condition) {
3613 RiscvOperandConverter i(this, instr);
3614
3615 // Materialize a full 32-bit 1 or 0 value. The result register is always the
3616 // last output of the instruction.
3617 DCHECK_NE(0u, instr->OutputCount());
3618 Register result = i.OutputRegister(instr->OutputCount() - 1);
3619 Condition cc = kNoCondition;
3620 // RISC-V does not have condition code flags, so compare and branch are
3621 // implemented differently than on the other arch's. The compare operations
3622 // emit riscv64 pseudo-instructions, which are checked and handled here.
3623
3624 if (instr->arch_opcode() == kRiscvTst) {
3625 cc = FlagsConditionToConditionTst(condition);
3626 if (cc == eq) {
3627 __ Sltu(result, kScratchReg, 1);
3628 } else {
3629 __ Sltu(result, zero_reg, kScratchReg);
3630 }
3631 return;
3632 } else if (instr->arch_opcode() == kRiscvAdd64 ||
3633 instr->arch_opcode() == kRiscvSub64) {
3634 cc = FlagsConditionToConditionOvf(condition);
3635 // Check for overflow creates 1 or 0 for result.
3636 __ Srl64(kScratchReg, i.OutputRegister(), 63);
3637 __ Srl32(kScratchReg2, i.OutputRegister(), 31);
3638 __ Xor(result, kScratchReg, kScratchReg2);
3639 if (cc == eq) // Toggle result for not overflow.
3640 __ Xor(result, result, 1);
3641 return;
3642 } else if (instr->arch_opcode() == kRiscvAddOvf64 ||
3643 instr->arch_opcode() == kRiscvSubOvf64) {
3644 // Overflow occurs if overflow register is negative
3645 __ Slt(result, kScratchReg, zero_reg);
3646 } else if (instr->arch_opcode() == kRiscvMulOvf32) {
3647 // Overflow occurs if overflow register is not zero
3648 __ Sgtu(result, kScratchReg, zero_reg);
3649 } else if (instr->arch_opcode() == kRiscvCmp) {
3650 cc = FlagsConditionToConditionCmp(condition);
3651 switch (cc) {
3652 case eq:
3653 case ne: {
3654 Register left = i.InputOrZeroRegister(0);
3655 Operand right = i.InputOperand(1);
3656 if (instr->InputAt(1)->IsImmediate()) {
3657 if (is_int12(-right.immediate())) {
3658 if (right.immediate() == 0) {
3659 if (cc == eq) {
3660 __ Sltu(result, left, 1);
3661 } else {
3662 __ Sltu(result, zero_reg, left);
3663 }
3664 } else {
3665 __ Add64(result, left, Operand(-right.immediate()));
3666 if (cc == eq) {
3667 __ Sltu(result, result, 1);
3668 } else {
3669 __ Sltu(result, zero_reg, result);
3670 }
3671 }
3672 } else {
3673 if (is_uint12(right.immediate())) {
3674 __ Xor(result, left, right);
3675 } else {
3676 __ li(kScratchReg, right);
3677 __ Xor(result, left, kScratchReg);
3678 }
3679 if (cc == eq) {
3680 __ Sltu(result, result, 1);
3681 } else {
3682 __ Sltu(result, zero_reg, result);
3683 }
3684 }
3685 } else {
3686 __ Xor(result, left, right);
3687 if (cc == eq) {
3688 __ Sltu(result, result, 1);
3689 } else {
3690 __ Sltu(result, zero_reg, result);
3691 }
3692 }
3693 } break;
3694 case lt:
3695 case ge: {
3696 Register left = i.InputOrZeroRegister(0);
3697 Operand right = i.InputOperand(1);
3698 __ Slt(result, left, right);
3699 if (cc == ge) {
3700 __ Xor(result, result, 1);
3701 }
3702 } break;
3703 case gt:
3704 case le: {
3705 Register left = i.InputOrZeroRegister(1);
3706 Operand right = i.InputOperand(0);
3707 __ Slt(result, left, right);
3708 if (cc == le) {
3709 __ Xor(result, result, 1);
3710 }
3711 } break;
3712 case Uless:
3713 case Ugreater_equal: {
3714 Register left = i.InputOrZeroRegister(0);
3715 Operand right = i.InputOperand(1);
3716 __ Sltu(result, left, right);
3717 if (cc == Ugreater_equal) {
3718 __ Xor(result, result, 1);
3719 }
3720 } break;
3721 case Ugreater:
3722 case Uless_equal: {
3723 Register left = i.InputRegister(1);
3724 Operand right = i.InputOperand(0);
3725 __ Sltu(result, left, right);
3726 if (cc == Uless_equal) {
3727 __ Xor(result, result, 1);
3728 }
3729 } break;
3730 default:
3731 UNREACHABLE();
3732 }
3733 return;
3734 } else if (instr->arch_opcode() == kRiscvCmpZero) {
3735 cc = FlagsConditionToConditionCmp(condition);
3736 switch (cc) {
3737 case eq: {
3738 Register left = i.InputOrZeroRegister(0);
3739 __ Sltu(result, left, 1);
3740 break;
3741 }
3742 case ne: {
3743 Register left = i.InputOrZeroRegister(0);
3744 __ Sltu(result, zero_reg, left);
3745 break;
3746 }
3747 case lt:
3748 case ge: {
3749 Register left = i.InputOrZeroRegister(0);
3750 Operand right = Operand(zero_reg);
3751 __ Slt(result, left, right);
3752 if (cc == ge) {
3753 __ Xor(result, result, 1);
3754 }
3755 } break;
3756 case gt:
3757 case le: {
3758 Operand left = i.InputOperand(0);
3759 __ Slt(result, zero_reg, left);
3760 if (cc == le) {
3761 __ Xor(result, result, 1);
3762 }
3763 } break;
3764 case Uless:
3765 case Ugreater_equal: {
3766 Register left = i.InputOrZeroRegister(0);
3767 Operand right = Operand(zero_reg);
3768 __ Sltu(result, left, right);
3769 if (cc == Ugreater_equal) {
3770 __ Xor(result, result, 1);
3771 }
3772 } break;
3773 case Ugreater:
3774 case Uless_equal: {
3775 Register left = zero_reg;
3776 Operand right = i.InputOperand(0);
3777 __ Sltu(result, left, right);
3778 if (cc == Uless_equal) {
3779 __ Xor(result, result, 1);
3780 }
3781 } break;
3782 default:
3783 UNREACHABLE();
3784 }
3785 return;
3786 } else if (instr->arch_opcode() == kArchStackPointerGreaterThan) {
3787 cc = FlagsConditionToConditionCmp(condition);
3788 Register lhs_register = sp;
3789 uint32_t offset;
3790 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
3791 lhs_register = i.TempRegister(0);
3792 __ Sub64(lhs_register, sp, offset);
3793 }
3794 __ Sgtu(result, lhs_register, Operand(i.InputRegister(0)));
3795 return;
3796 } else if (instr->arch_opcode() == kRiscvCmpD ||
3797 instr->arch_opcode() == kRiscvCmpS) {
3798 FPURegister left = i.InputOrZeroDoubleRegister(0);
3799 FPURegister right = i.InputOrZeroDoubleRegister(1);
3800 if ((instr->arch_opcode() == kRiscvCmpD) &&
3801 (left == kDoubleRegZero || right == kDoubleRegZero) &&
3802 !__ IsDoubleZeroRegSet()) {
3803 __ LoadFPRImmediate(kDoubleRegZero, 0.0);
3804 } else if ((instr->arch_opcode() == kRiscvCmpS) &&
3805 (left == kDoubleRegZero || right == kDoubleRegZero) &&
3806 !__ IsSingleZeroRegSet()) {
3807 __ LoadFPRImmediate(kDoubleRegZero, 0.0f);
3808 }
3809 bool predicate;
3810 FlagsConditionToConditionCmpFPU(&predicate, condition);
3811 // RISCV compare returns 0 or 1, do nothing when predicate; otherwise
3812 // toggle kScratchReg (i.e., 0 -> 1, 1 -> 0)
3813 if (predicate) {
3814 __ Move(result, kScratchReg);
3815 } else {
3816 __ Xor(result, kScratchReg, 1);
3817 }
3818 return;
3819 } else {
3820 PrintF("AssembleArchBranch Unimplemented arch_opcode is : %d\n",
3821 instr->arch_opcode());
3822 TRACE_UNIMPL();
3823 UNIMPLEMENTED();
3824 }
3825 }
3826
AssembleArchBinarySearchSwitch(Instruction * instr)3827 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3828 RiscvOperandConverter i(this, instr);
3829 Register input = i.InputRegister(0);
3830 std::vector<std::pair<int32_t, Label*>> cases;
3831 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3832 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3833 }
3834 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3835 cases.data() + cases.size());
3836 }
3837
AssembleArchTableSwitch(Instruction * instr)3838 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3839 RiscvOperandConverter i(this, instr);
3840 Register input = i.InputRegister(0);
3841 size_t const case_count = instr->InputCount() - 2;
3842
3843 __ Branch(GetLabel(i.InputRpo(1)), Ugreater_equal, input,
3844 Operand(case_count));
3845 __ GenerateSwitchTable(input, case_count, [&i, this](size_t index) {
3846 return GetLabel(i.InputRpo(index + 2));
3847 });
3848 }
3849
FinishFrame(Frame * frame)3850 void CodeGenerator::FinishFrame(Frame* frame) {
3851 auto call_descriptor = linkage()->GetIncomingDescriptor();
3852
3853 const DoubleRegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
3854 if (!saves_fpu.is_empty()) {
3855 int count = saves_fpu.Count();
3856 DCHECK_EQ(kNumCalleeSavedFPU, count);
3857 frame->AllocateSavedCalleeRegisterSlots(count *
3858 (kDoubleSize / kSystemPointerSize));
3859 }
3860
3861 const RegList saves = call_descriptor->CalleeSavedRegisters();
3862 if (!saves.is_empty()) {
3863 int count = saves.Count();
3864 frame->AllocateSavedCalleeRegisterSlots(count);
3865 }
3866 }
3867
AssembleConstructFrame()3868 void CodeGenerator::AssembleConstructFrame() {
3869 auto call_descriptor = linkage()->GetIncomingDescriptor();
3870
3871 if (frame_access_state()->has_frame()) {
3872 if (call_descriptor->IsCFunctionCall()) {
3873 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
3874 __ StubPrologue(StackFrame::C_WASM_ENTRY);
3875 // Reserve stack space for saving the c_entry_fp later.
3876 __ Sub64(sp, sp, Operand(kSystemPointerSize));
3877 } else {
3878 __ Push(ra, fp);
3879 __ Move(fp, sp);
3880 }
3881 } else if (call_descriptor->IsJSFunctionCall()) {
3882 __ Prologue();
3883 } else {
3884 __ StubPrologue(info()->GetOutputStackFrameType());
3885 if (call_descriptor->IsWasmFunctionCall() ||
3886 call_descriptor->IsWasmImportWrapper() ||
3887 call_descriptor->IsWasmCapiFunction()) {
3888 __ Push(kWasmInstanceRegister);
3889 }
3890 if (call_descriptor->IsWasmCapiFunction()) {
3891 // Reserve space for saving the PC later.
3892 __ Sub64(sp, sp, Operand(kSystemPointerSize));
3893 }
3894 }
3895 }
3896
3897 int required_slots =
3898 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
3899
3900 if (info()->is_osr()) {
3901 // TurboFan OSR-compiled functions cannot be entered directly.
3902 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3903
3904 // Unoptimized code jumps directly to this entrypoint while the unoptimized
3905 // frame is still on the stack. Optimized code uses OSR values directly from
3906 // the unoptimized frame. Thus, all that needs to be done is to allocate the
3907 // remaining stack slots.
3908 __ RecordComment("-- OSR entrypoint --");
3909 osr_pc_offset_ = __ pc_offset();
3910 required_slots -= osr_helper()->UnoptimizedFrameSlots();
3911 }
3912
3913 const RegList saves = call_descriptor->CalleeSavedRegisters();
3914 const DoubleRegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
3915
3916 if (required_slots > 0) {
3917 DCHECK(frame_access_state()->has_frame());
3918 if (info()->IsWasm() && required_slots > 128) {
3919 // For WebAssembly functions with big frames we have to do the stack
3920 // overflow check before we construct the frame. Otherwise we may not
3921 // have enough space on the stack to call the runtime for the stack
3922 // overflow.
3923 Label done;
3924
3925 // If the frame is bigger than the stack, we throw the stack overflow
3926 // exception unconditionally. Thereby we can avoid the integer overflow
3927 // check in the condition code.
3928 if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) {
3929 __ Ld(
3930 kScratchReg,
3931 FieldMemOperand(kWasmInstanceRegister,
3932 WasmInstanceObject::kRealStackLimitAddressOffset));
3933 __ Ld(kScratchReg, MemOperand(kScratchReg));
3934 __ Add64(kScratchReg, kScratchReg,
3935 Operand(required_slots * kSystemPointerSize));
3936 __ BranchShort(&done, uge, sp, Operand(kScratchReg));
3937 }
3938
3939 __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
3940 // We come from WebAssembly, there are no references for the GC.
3941 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
3942 RecordSafepoint(reference_map);
3943 if (FLAG_debug_code) {
3944 __ stop();
3945 }
3946
3947 __ bind(&done);
3948 }
3949 }
3950
3951 const int returns = frame()->GetReturnSlotCount();
3952
3953 // Skip callee-saved and return slots, which are pushed below.
3954 required_slots -= saves.Count();
3955 required_slots -= saves_fpu.Count();
3956 required_slots -= returns;
3957 if (required_slots > 0) {
3958 __ Sub64(sp, sp, Operand(required_slots * kSystemPointerSize));
3959 }
3960
3961 if (!saves_fpu.is_empty()) {
3962 // Save callee-saved FPU registers.
3963 __ MultiPushFPU(saves_fpu);
3964 DCHECK_EQ(kNumCalleeSavedFPU, saves_fpu.Count());
3965 }
3966
3967 if (!saves.is_empty()) {
3968 // Save callee-saved registers.
3969 __ MultiPush(saves);
3970 }
3971
3972 if (returns != 0) {
3973 // Create space for returns.
3974 __ Sub64(sp, sp, Operand(returns * kSystemPointerSize));
3975 }
3976 }
3977
AssembleReturn(InstructionOperand * additional_pop_count)3978 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
3979 auto call_descriptor = linkage()->GetIncomingDescriptor();
3980
3981 const int returns = frame()->GetReturnSlotCount();
3982 if (returns != 0) {
3983 __ Add64(sp, sp, Operand(returns * kSystemPointerSize));
3984 }
3985
3986 // Restore GP registers.
3987 const RegList saves = call_descriptor->CalleeSavedRegisters();
3988 if (!saves.is_empty()) {
3989 __ MultiPop(saves);
3990 }
3991
3992 // Restore FPU registers.
3993 const DoubleRegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
3994 if (!saves_fpu.is_empty()) {
3995 __ MultiPopFPU(saves_fpu);
3996 }
3997
3998 RiscvOperandConverter g(this, nullptr);
3999
4000 const int parameter_slots =
4001 static_cast<int>(call_descriptor->ParameterSlotCount());
4002
4003 // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
4004 // Check RawMachineAssembler::PopAndReturn.
4005 if (parameter_slots != 0) {
4006 if (additional_pop_count->IsImmediate()) {
4007 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4008 } else if (FLAG_debug_code) {
4009 __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue,
4010 g.ToRegister(additional_pop_count),
4011 Operand(static_cast<int64_t>(0)));
4012 }
4013 }
4014
4015 // Functions with JS linkage have at least one parameter (the receiver).
4016 // If {parameter_slots} == 0, it means it is a builtin with
4017 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4018 // itself.
4019 const bool drop_jsargs = frame_access_state()->has_frame() &&
4020 call_descriptor->IsJSFunctionCall() &&
4021 parameter_slots != 0;
4022
4023 if (call_descriptor->IsCFunctionCall()) {
4024 AssembleDeconstructFrame();
4025 } else if (frame_access_state()->has_frame()) {
4026 // Canonicalize JSFunction return sites for now unless they have an variable
4027 // number of stack slot pops.
4028 if (additional_pop_count->IsImmediate() &&
4029 g.ToConstant(additional_pop_count).ToInt32() == 0) {
4030 if (return_label_.is_bound()) {
4031 __ Branch(&return_label_);
4032 return;
4033 } else {
4034 __ bind(&return_label_);
4035 }
4036 }
4037 if (drop_jsargs) {
4038 // Get the actual argument count
4039 __ Ld(t0, MemOperand(fp, StandardFrameConstants::kArgCOffset));
4040 }
4041 AssembleDeconstructFrame();
4042 }
4043 if (drop_jsargs) {
4044 // We must pop all arguments from the stack (including the receiver). This
4045 // number of arguments is given by max(1 + argc_reg, parameter_slots).
4046 if (parameter_slots > 1) {
4047 Label done;
4048 __ li(kScratchReg, parameter_slots);
4049 __ BranchShort(&done, ge, t0, Operand(kScratchReg));
4050 __ Move(t0, kScratchReg);
4051 __ bind(&done);
4052 }
4053 __ Sll64(t0, t0, kSystemPointerSizeLog2);
4054 __ Add64(sp, sp, t0);
4055 } else if (additional_pop_count->IsImmediate()) {
4056 // it should be a kInt32 or a kInt64
4057 DCHECK_LE(g.ToConstant(additional_pop_count).type(), Constant::kInt64);
4058 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4059 __ Drop(parameter_slots + additional_count);
4060 } else {
4061 Register pop_reg = g.ToRegister(additional_pop_count);
4062 __ Drop(parameter_slots);
4063 __ Sll64(pop_reg, pop_reg, kSystemPointerSizeLog2);
4064 __ Add64(sp, sp, pop_reg);
4065 }
4066 __ Ret();
4067 }
4068
FinishCode()4069 void CodeGenerator::FinishCode() { __ ForceConstantPoolEmissionWithoutJump(); }
4070
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4071 void CodeGenerator::PrepareForDeoptimizationExits(
4072 ZoneDeque<DeoptimizationExit*>* exits) {
4073 __ ForceConstantPoolEmissionWithoutJump();
4074 int total_size = 0;
4075 for (DeoptimizationExit* exit : deoptimization_exits_) {
4076 total_size += (exit->kind() == DeoptimizeKind::kLazy)
4077 ? Deoptimizer::kLazyDeoptExitSize
4078 : Deoptimizer::kEagerDeoptExitSize;
4079 }
4080
4081 __ CheckTrampolinePoolQuick(total_size);
4082 }
4083
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4084 void CodeGenerator::AssembleMove(InstructionOperand* source,
4085 InstructionOperand* destination) {
4086 RiscvOperandConverter g(this, nullptr);
4087 // Dispatch on the source and destination operand kinds. Not all
4088 // combinations are possible.
4089 if (source->IsRegister()) {
4090 DCHECK(destination->IsRegister() || destination->IsStackSlot());
4091 Register src = g.ToRegister(source);
4092 if (destination->IsRegister()) {
4093 __ Move(g.ToRegister(destination), src);
4094 } else {
4095 __ Sd(src, g.ToMemOperand(destination));
4096 }
4097 } else if (source->IsStackSlot()) {
4098 DCHECK(destination->IsRegister() || destination->IsStackSlot());
4099 MemOperand src = g.ToMemOperand(source);
4100 if (destination->IsRegister()) {
4101 __ Ld(g.ToRegister(destination), src);
4102 } else {
4103 Register temp = kScratchReg;
4104 __ Ld(temp, src);
4105 __ Sd(temp, g.ToMemOperand(destination));
4106 }
4107 } else if (source->IsConstant()) {
4108 Constant src = g.ToConstant(source);
4109 if (destination->IsRegister() || destination->IsStackSlot()) {
4110 Register dst =
4111 destination->IsRegister() ? g.ToRegister(destination) : kScratchReg;
4112 switch (src.type()) {
4113 case Constant::kInt32:
4114 if (src.ToInt32() == 0 && destination->IsStackSlot()) {
4115 dst = zero_reg;
4116 } else {
4117 __ li(dst, Operand(src.ToInt32()));
4118 }
4119 break;
4120 case Constant::kFloat32:
4121 __ li(dst, Operand::EmbeddedNumber(src.ToFloat32()));
4122 break;
4123 case Constant::kInt64:
4124 if (RelocInfo::IsWasmReference(src.rmode())) {
4125 __ li(dst, Operand(src.ToInt64(), src.rmode()));
4126 } else {
4127 if (src.ToInt64() == 0 && destination->IsStackSlot()) {
4128 dst = zero_reg;
4129 } else {
4130 __ li(dst, Operand(src.ToInt64()));
4131 }
4132 }
4133 break;
4134 case Constant::kFloat64:
4135 __ li(dst, Operand::EmbeddedNumber(src.ToFloat64().value()));
4136 break;
4137 case Constant::kExternalReference:
4138 __ li(dst, src.ToExternalReference());
4139 break;
4140 case Constant::kDelayedStringConstant:
4141 __ li(dst, src.ToDelayedStringConstant());
4142 break;
4143 case Constant::kHeapObject: {
4144 Handle<HeapObject> src_object = src.ToHeapObject();
4145 RootIndex index;
4146 if (IsMaterializableFromRoot(src_object, &index)) {
4147 __ LoadRoot(dst, index);
4148 } else {
4149 __ li(dst, src_object);
4150 }
4151 break;
4152 }
4153 case Constant::kCompressedHeapObject: {
4154 Handle<HeapObject> src_object = src.ToHeapObject();
4155 RootIndex index;
4156 if (IsMaterializableFromRoot(src_object, &index)) {
4157 __ LoadRoot(dst, index);
4158 } else {
4159 __ li(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4160 }
4161 break;
4162 }
4163 case Constant::kRpoNumber:
4164 UNREACHABLE(); // TODO(titzer): loading RPO numbers
4165 }
4166 if (destination->IsStackSlot()) __ Sd(dst, g.ToMemOperand(destination));
4167 } else if (src.type() == Constant::kFloat32) {
4168 if (destination->IsFPStackSlot()) {
4169 MemOperand dst = g.ToMemOperand(destination);
4170 if (bit_cast<int32_t>(src.ToFloat32()) == 0) {
4171 __ Sw(zero_reg, dst);
4172 } else {
4173 __ li(kScratchReg, Operand(bit_cast<int32_t>(src.ToFloat32())));
4174 __ Sw(kScratchReg, dst);
4175 }
4176 } else {
4177 DCHECK(destination->IsFPRegister());
4178 FloatRegister dst = g.ToSingleRegister(destination);
4179 __ LoadFPRImmediate(dst, src.ToFloat32());
4180 }
4181 } else {
4182 DCHECK_EQ(Constant::kFloat64, src.type());
4183 DoubleRegister dst = destination->IsFPRegister()
4184 ? g.ToDoubleRegister(destination)
4185 : kScratchDoubleReg;
4186 __ LoadFPRImmediate(dst, src.ToFloat64().value());
4187 if (destination->IsFPStackSlot()) {
4188 __ StoreDouble(dst, g.ToMemOperand(destination));
4189 }
4190 }
4191 } else if (source->IsFPRegister()) {
4192 MachineRepresentation rep = LocationOperand::cast(source)->representation();
4193 if (rep == MachineRepresentation::kSimd128) {
4194 VRegister src = g.ToSimd128Register(source);
4195 if (destination->IsSimd128Register()) {
4196 VRegister dst = g.ToSimd128Register(destination);
4197 __ VU.set(kScratchReg, E8, m1);
4198 __ vmv_vv(dst, src);
4199 } else {
4200 DCHECK(destination->IsSimd128StackSlot());
4201 __ VU.set(kScratchReg, E8, m1);
4202 MemOperand dst = g.ToMemOperand(destination);
4203 Register dst_r = dst.rm();
4204 if (dst.offset() != 0) {
4205 dst_r = kScratchReg;
4206 __ Add64(dst_r, dst.rm(), dst.offset());
4207 }
4208 __ vs(src, dst_r, 0, E8);
4209 }
4210 } else {
4211 FPURegister src = g.ToDoubleRegister(source);
4212 if (destination->IsFPRegister()) {
4213 FPURegister dst = g.ToDoubleRegister(destination);
4214 __ Move(dst, src);
4215 } else {
4216 DCHECK(destination->IsFPStackSlot());
4217 if (rep == MachineRepresentation::kFloat32) {
4218 __ StoreFloat(src, g.ToMemOperand(destination));
4219 } else {
4220 DCHECK_EQ(rep, MachineRepresentation::kFloat64);
4221 __ StoreDouble(src, g.ToMemOperand(destination));
4222 }
4223 }
4224 }
4225 } else if (source->IsFPStackSlot()) {
4226 DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
4227 MemOperand src = g.ToMemOperand(source);
4228 MachineRepresentation rep = LocationOperand::cast(source)->representation();
4229 if (rep == MachineRepresentation::kSimd128) {
4230 __ VU.set(kScratchReg, E8, m1);
4231 Register src_r = src.rm();
4232 if (src.offset() != 0) {
4233 src_r = kScratchReg;
4234 __ Add64(src_r, src.rm(), src.offset());
4235 }
4236 if (destination->IsSimd128Register()) {
4237 __ vl(g.ToSimd128Register(destination), src_r, 0, E8);
4238 } else {
4239 DCHECK(destination->IsSimd128StackSlot());
4240 VRegister temp = kSimd128ScratchReg;
4241 MemOperand dst = g.ToMemOperand(destination);
4242 Register dst_r = dst.rm();
4243 if (dst.offset() != 0) {
4244 dst_r = kScratchReg2;
4245 __ Add64(dst_r, dst.rm(), dst.offset());
4246 }
4247 __ vl(temp, src_r, 0, E8);
4248 __ vs(temp, dst_r, 0, E8);
4249 }
4250 } else {
4251 if (destination->IsFPRegister()) {
4252 if (rep == MachineRepresentation::kFloat32) {
4253 __ LoadFloat(g.ToDoubleRegister(destination), src);
4254 } else {
4255 DCHECK_EQ(rep, MachineRepresentation::kFloat64);
4256 __ LoadDouble(g.ToDoubleRegister(destination), src);
4257 }
4258 } else {
4259 DCHECK(destination->IsFPStackSlot());
4260 FPURegister temp = kScratchDoubleReg;
4261 if (rep == MachineRepresentation::kFloat32) {
4262 __ LoadFloat(temp, src);
4263 __ StoreFloat(temp, g.ToMemOperand(destination));
4264 } else {
4265 DCHECK_EQ(rep, MachineRepresentation::kFloat64);
4266 __ LoadDouble(temp, src);
4267 __ StoreDouble(temp, g.ToMemOperand(destination));
4268 }
4269 }
4270 }
4271 } else {
4272 UNREACHABLE();
4273 }
4274 }
4275
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4276 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4277 InstructionOperand* destination) {
4278 RiscvOperandConverter g(this, nullptr);
4279 switch (MoveType::InferSwap(source, destination)) {
4280 case MoveType::kRegisterToRegister:
4281 if (source->IsRegister()) {
4282 Register temp = kScratchReg;
4283 Register src = g.ToRegister(source);
4284 Register dst = g.ToRegister(destination);
4285 __ Move(temp, src);
4286 __ Move(src, dst);
4287 __ Move(dst, temp);
4288 } else {
4289 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
4290 FPURegister temp = kScratchDoubleReg;
4291 FPURegister src = g.ToDoubleRegister(source);
4292 FPURegister dst = g.ToDoubleRegister(destination);
4293 __ Move(temp, src);
4294 __ Move(src, dst);
4295 __ Move(dst, temp);
4296 } else {
4297 DCHECK(source->IsSimd128Register());
4298 VRegister src = g.ToDoubleRegister(source).toV();
4299 VRegister dst = g.ToDoubleRegister(destination).toV();
4300 VRegister temp = kSimd128ScratchReg;
4301 __ VU.set(kScratchReg, E8, m1);
4302 __ vmv_vv(temp, src);
4303 __ vmv_vv(src, dst);
4304 __ vmv_vv(dst, temp);
4305 }
4306 }
4307 return;
4308 case MoveType::kRegisterToStack: {
4309 MemOperand dst = g.ToMemOperand(destination);
4310 if (source->IsRegister()) {
4311 Register temp = kScratchReg;
4312 Register src = g.ToRegister(source);
4313 __ mv(temp, src);
4314 __ Ld(src, dst);
4315 __ Sd(temp, dst);
4316 } else {
4317 MemOperand dst = g.ToMemOperand(destination);
4318 if (source->IsFloatRegister()) {
4319 DoubleRegister src = g.ToDoubleRegister(source);
4320 DoubleRegister temp = kScratchDoubleReg;
4321 __ fmv_s(temp, src);
4322 __ LoadFloat(src, dst);
4323 __ StoreFloat(temp, dst);
4324 } else if (source->IsDoubleRegister()) {
4325 DoubleRegister src = g.ToDoubleRegister(source);
4326 DoubleRegister temp = kScratchDoubleReg;
4327 __ fmv_d(temp, src);
4328 __ LoadDouble(src, dst);
4329 __ StoreDouble(temp, dst);
4330 } else {
4331 DCHECK(source->IsSimd128Register());
4332 VRegister src = g.ToDoubleRegister(source).toV();
4333 VRegister temp = kSimd128ScratchReg;
4334 __ VU.set(kScratchReg, E8, m1);
4335 __ vmv_vv(temp, src);
4336 Register dst_v = dst.rm();
4337 if (dst.offset() != 0) {
4338 dst_v = kScratchReg2;
4339 __ Add64(dst_v, dst.rm(), Operand(dst.offset()));
4340 }
4341 __ vl(src, dst_v, 0, E8);
4342 __ vs(temp, dst_v, 0, E8);
4343 }
4344 }
4345 return;
4346 }
4347 case MoveType::kStackToStack: {
4348 MemOperand src = g.ToMemOperand(source);
4349 MemOperand dst = g.ToMemOperand(destination);
4350 if (source->IsSimd128StackSlot()) {
4351 __ VU.set(kScratchReg, E8, m1);
4352 Register src_v = src.rm();
4353 Register dst_v = dst.rm();
4354 if (src.offset() != 0) {
4355 src_v = kScratchReg;
4356 __ Add64(src_v, src.rm(), Operand(src.offset()));
4357 }
4358 if (dst.offset() != 0) {
4359 dst_v = kScratchReg2;
4360 __ Add64(dst_v, dst.rm(), Operand(dst.offset()));
4361 }
4362 __ vl(kSimd128ScratchReg, src_v, 0, E8);
4363 __ vl(kSimd128ScratchReg2, dst_v, 0, E8);
4364 __ vs(kSimd128ScratchReg, dst_v, 0, E8);
4365 __ vs(kSimd128ScratchReg2, src_v, 0, E8);
4366 } else {
4367 UseScratchRegisterScope scope(tasm());
4368 Register temp_0 = kScratchReg;
4369 Register temp_1 = kScratchReg2;
4370 __ Ld(temp_0, src);
4371 __ Ld(temp_1, dst);
4372 __ Sd(temp_0, dst);
4373 __ Sd(temp_1, src);
4374 }
4375 return;
4376 }
4377 default:
4378 UNREACHABLE();
4379 }
4380 }
4381
AssembleJumpTable(Label ** targets,size_t target_count)4382 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4383 // On 64-bit RISC-V we emit the jump tables inline.
4384 UNREACHABLE();
4385 }
4386
4387 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
4388 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
4389 #undef ASSEMBLE_ATOMIC_BINOP
4390 #undef ASSEMBLE_ATOMIC_BINOP_EXT
4391 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
4392 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT
4393 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
4394 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT
4395 #undef ASSEMBLE_IEEE754_BINOP
4396 #undef ASSEMBLE_IEEE754_UNOP
4397
4398 #undef TRACE_MSG
4399 #undef TRACE_UNIMPL
4400 #undef __
4401
4402 } // namespace compiler
4403 } // namespace internal
4404 } // namespace v8
4405