1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_WASM_BASELINE_RISCV64_LIFTOFF_ASSEMBLER_RISCV64_H_
6 #define V8_WASM_BASELINE_RISCV64_LIFTOFF_ASSEMBLER_RISCV64_H_
7
8 #include "src/base/platform/wrappers.h"
9 #include "src/heap/memory-chunk.h"
10 #include "src/wasm/baseline/liftoff-assembler.h"
11 #include "src/wasm/wasm-objects.h"
12
13 namespace v8 {
14 namespace internal {
15 namespace wasm {
16
17 namespace liftoff {
18
ToCondition(LiftoffCondition liftoff_cond)19 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
20 switch (liftoff_cond) {
21 case kEqual:
22 return eq;
23 case kUnequal:
24 return ne;
25 case kSignedLessThan:
26 return lt;
27 case kSignedLessEqual:
28 return le;
29 case kSignedGreaterThan:
30 return gt;
31 case kSignedGreaterEqual:
32 return ge;
33 case kUnsignedLessThan:
34 return ult;
35 case kUnsignedLessEqual:
36 return ule;
37 case kUnsignedGreaterThan:
38 return ugt;
39 case kUnsignedGreaterEqual:
40 return uge;
41 }
42 }
43
44 // Liftoff Frames.
45 //
46 // slot Frame
47 // +--------------------+---------------------------
48 // n+4 | optional padding slot to keep the stack 16 byte aligned.
49 // n+3 | parameter n |
50 // ... | ... |
51 // 4 | parameter 1 | or parameter 2
52 // 3 | parameter 0 | or parameter 1
53 // 2 | (result address) | or parameter 0
54 // -----+--------------------+---------------------------
55 // 1 | return addr (ra) |
56 // 0 | previous frame (fp)|
57 // -----+--------------------+ <-- frame ptr (fp)
58 // -1 | StackFrame::WASM |
59 // -2 | instance |
60 // -3 | feedback vector|
61 // -4 | tiering budget |
62 // -----+--------------------+---------------------------
63 // -5 | slot 0 | ^
64 // -6 | slot 1 | |
65 // | | Frame slots
66 // | | |
67 // | | v
68 // | optional padding slot to keep the stack 16 byte aligned.
69 // -----+--------------------+ <-- stack ptr (sp)
70 //
71
72 // fp-8 holds the stack marker, fp-16 is the instance parameter.
73 constexpr int kInstanceOffset = 2 * kSystemPointerSize;
74 constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
75 constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
76
GetStackSlot(int offset)77 inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
78
GetInstanceOperand()79 inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
80
GetMemOp(LiftoffAssembler * assm,Register addr,Register offset,uintptr_t offset_imm)81 inline MemOperand GetMemOp(LiftoffAssembler* assm, Register addr,
82 Register offset, uintptr_t offset_imm) {
83 if (is_uint31(offset_imm)) {
84 int32_t offset_imm32 = static_cast<int32_t>(offset_imm);
85 if (offset == no_reg) return MemOperand(addr, offset_imm32);
86 assm->Add64(kScratchReg2, addr, offset);
87 return MemOperand(kScratchReg2, offset_imm32);
88 }
89 // Offset immediate does not fit in 31 bits.
90 assm->li(kScratchReg2, offset_imm);
91 assm->Add64(kScratchReg2, kScratchReg2, addr);
92 if (offset != no_reg) {
93 assm->Add64(kScratchReg2, kScratchReg2, offset);
94 }
95 return MemOperand(kScratchReg2, 0);
96 }
97
Load(LiftoffAssembler * assm,LiftoffRegister dst,MemOperand src,ValueKind kind)98 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src,
99 ValueKind kind) {
100 switch (kind) {
101 case kI32:
102 assm->Lw(dst.gp(), src);
103 break;
104 case kI64:
105 case kRef:
106 case kOptRef:
107 case kRtt:
108 assm->Ld(dst.gp(), src);
109 break;
110 case kF32:
111 assm->LoadFloat(dst.fp(), src);
112 break;
113 case kF64:
114 assm->LoadDouble(dst.fp(), src);
115 break;
116 default:
117 UNREACHABLE();
118 }
119 }
120
Store(LiftoffAssembler * assm,Register base,int32_t offset,LiftoffRegister src,ValueKind kind)121 inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
122 LiftoffRegister src, ValueKind kind) {
123 MemOperand dst(base, offset);
124 switch (kind) {
125 case kI32:
126 assm->Sw(src.gp(), dst);
127 break;
128 case kI64:
129 case kOptRef:
130 case kRef:
131 case kRtt:
132 assm->Sd(src.gp(), dst);
133 break;
134 case kF32:
135 assm->StoreFloat(src.fp(), dst);
136 break;
137 case kF64:
138 assm->StoreDouble(src.fp(), dst);
139 break;
140 default:
141 UNREACHABLE();
142 }
143 }
144
push(LiftoffAssembler * assm,LiftoffRegister reg,ValueKind kind)145 inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind) {
146 switch (kind) {
147 case kI32:
148 assm->addi(sp, sp, -kSystemPointerSize);
149 assm->Sw(reg.gp(), MemOperand(sp, 0));
150 break;
151 case kI64:
152 case kOptRef:
153 case kRef:
154 case kRtt:
155 assm->push(reg.gp());
156 break;
157 case kF32:
158 assm->addi(sp, sp, -kSystemPointerSize);
159 assm->StoreFloat(reg.fp(), MemOperand(sp, 0));
160 break;
161 case kF64:
162 assm->addi(sp, sp, -kSystemPointerSize);
163 assm->StoreDouble(reg.fp(), MemOperand(sp, 0));
164 break;
165 default:
166 UNREACHABLE();
167 }
168 }
169
170 #if defined(V8_TARGET_BIG_ENDIAN)
ChangeEndiannessLoad(LiftoffAssembler * assm,LiftoffRegister dst,LoadType type,LiftoffRegList pinned)171 inline void ChangeEndiannessLoad(LiftoffAssembler* assm, LiftoffRegister dst,
172 LoadType type, LiftoffRegList pinned) {
173 bool is_float = false;
174 LiftoffRegister tmp = dst;
175 switch (type.value()) {
176 case LoadType::kI64Load8U:
177 case LoadType::kI64Load8S:
178 case LoadType::kI32Load8U:
179 case LoadType::kI32Load8S:
180 // No need to change endianness for byte size.
181 return;
182 case LoadType::kF32Load:
183 is_float = true;
184 tmp = assm->GetUnusedRegister(kGpReg, pinned);
185 assm->emit_type_conversion(kExprI32ReinterpretF32, tmp, dst);
186 V8_FALLTHROUGH;
187 case LoadType::kI64Load32U:
188 assm->TurboAssembler::ByteSwapUnsigned(tmp.gp(), tmp.gp(), 4);
189 break;
190 case LoadType::kI32Load:
191 case LoadType::kI64Load32S:
192 assm->TurboAssembler::ByteSwapSigned(tmp.gp(), tmp.gp(), 4);
193 break;
194 case LoadType::kI32Load16S:
195 case LoadType::kI64Load16S:
196 assm->TurboAssembler::ByteSwapSigned(tmp.gp(), tmp.gp(), 2);
197 break;
198 case LoadType::kI32Load16U:
199 case LoadType::kI64Load16U:
200 assm->TurboAssembler::ByteSwapUnsigned(tmp.gp(), tmp.gp(), 2);
201 break;
202 case LoadType::kF64Load:
203 is_float = true;
204 tmp = assm->GetUnusedRegister(kGpReg, pinned);
205 assm->emit_type_conversion(kExprI64ReinterpretF64, tmp, dst);
206 V8_FALLTHROUGH;
207 case LoadType::kI64Load:
208 assm->TurboAssembler::ByteSwapSigned(tmp.gp(), tmp.gp(), 8);
209 break;
210 default:
211 UNREACHABLE();
212 }
213
214 if (is_float) {
215 switch (type.value()) {
216 case LoadType::kF32Load:
217 assm->emit_type_conversion(kExprF32ReinterpretI32, dst, tmp);
218 break;
219 case LoadType::kF64Load:
220 assm->emit_type_conversion(kExprF64ReinterpretI64, dst, tmp);
221 break;
222 default:
223 UNREACHABLE();
224 }
225 }
226 }
227
ChangeEndiannessStore(LiftoffAssembler * assm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)228 inline void ChangeEndiannessStore(LiftoffAssembler* assm, LiftoffRegister src,
229 StoreType type, LiftoffRegList pinned) {
230 bool is_float = false;
231 LiftoffRegister tmp = src;
232 switch (type.value()) {
233 case StoreType::kI64Store8:
234 case StoreType::kI32Store8:
235 // No need to change endianness for byte size.
236 return;
237 case StoreType::kF32Store:
238 is_float = true;
239 tmp = assm->GetUnusedRegister(kGpReg, pinned);
240 assm->emit_type_conversion(kExprI32ReinterpretF32, tmp, src);
241 V8_FALLTHROUGH;
242 case StoreType::kI32Store:
243 assm->TurboAssembler::ByteSwapSigned(tmp.gp(), tmp.gp(), 4);
244 break;
245 case StoreType::kI32Store16:
246 assm->TurboAssembler::ByteSwapSigned(tmp.gp(), tmp.gp(), 2);
247 break;
248 case StoreType::kF64Store:
249 is_float = true;
250 tmp = assm->GetUnusedRegister(kGpReg, pinned);
251 assm->emit_type_conversion(kExprI64ReinterpretF64, tmp, src);
252 V8_FALLTHROUGH;
253 case StoreType::kI64Store:
254 assm->TurboAssembler::ByteSwapSigned(tmp.gp(), tmp.gp(), 8);
255 break;
256 case StoreType::kI64Store32:
257 assm->TurboAssembler::ByteSwapSigned(tmp.gp(), tmp.gp(), 4);
258 break;
259 case StoreType::kI64Store16:
260 assm->TurboAssembler::ByteSwapSigned(tmp.gp(), tmp.gp(), 2);
261 break;
262 default:
263 UNREACHABLE();
264 }
265
266 if (is_float) {
267 switch (type.value()) {
268 case StoreType::kF32Store:
269 assm->emit_type_conversion(kExprF32ReinterpretI32, src, tmp);
270 break;
271 case StoreType::kF64Store:
272 assm->emit_type_conversion(kExprF64ReinterpretI64, src, tmp);
273 break;
274 default:
275 UNREACHABLE();
276 }
277 }
278 }
279 #endif // V8_TARGET_BIG_ENDIAN
280
281 } // namespace liftoff
282
PrepareStackFrame()283 int LiftoffAssembler::PrepareStackFrame() {
284 int offset = pc_offset();
285 // When the frame size is bigger than 4KB, we need two instructions for
286 // stack checking, so we reserve space for this case.
287 addi(sp, sp, 0);
288 nop();
289 nop();
290 return offset;
291 }
292
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)293 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
294 int stack_param_delta) {
295 UseScratchRegisterScope temps(this);
296 Register scratch = temps.Acquire();
297
298 // Push the return address and frame pointer to complete the stack frame.
299 Ld(scratch, MemOperand(fp, 8));
300 Push(scratch);
301 Ld(scratch, MemOperand(fp, 0));
302 Push(scratch);
303
304 // Shift the whole frame upwards.
305 int slot_count = num_callee_stack_params + 2;
306 for (int i = slot_count - 1; i >= 0; --i) {
307 Ld(scratch, MemOperand(sp, i * 8));
308 Sd(scratch, MemOperand(fp, (i - stack_param_delta) * 8));
309 }
310
311 // Set the new stack and frame pointer.
312 Add64(sp, fp, -stack_param_delta * 8);
313 Pop(ra, fp);
314 }
315
AlignFrameSize()316 void LiftoffAssembler::AlignFrameSize() {}
317
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)318 void LiftoffAssembler::PatchPrepareStackFrame(
319 int offset, SafepointTableBuilder* safepoint_table_builder) {
320 // The frame_size includes the frame marker and the instance slot. Both are
321 // pushed as part of frame construction, so we don't need to allocate memory
322 // for them anymore.
323 int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
324 // We can't run out of space, just pass anything big enough to not cause the
325 // assembler to try to grow the buffer.
326 constexpr int kAvailableSpace = 256;
327 TurboAssembler patching_assembler(
328 nullptr, AssemblerOptions{}, CodeObjectRequired::kNo,
329 ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
330
331 if (V8_LIKELY(frame_size < 4 * KB)) {
332 // This is the standard case for small frames: just subtract from SP and be
333 // done with it.
334 patching_assembler.Add64(sp, sp, Operand(-frame_size));
335 return;
336 }
337
338 // The frame size is bigger than 4KB, so we might overflow the available stack
339 // space if we first allocate the frame and then do the stack check (we will
340 // need some remaining stack space for throwing the exception). That's why we
341 // check the available stack space before we allocate the frame. To do this we
342 // replace the {__ Add64(sp, sp, -frame_size)} with a jump to OOL code that
343 // does this "extended stack check".
344 //
345 // The OOL code can simply be generated here with the normal assembler,
346 // because all other code generation, including OOL code, has already finished
347 // when {PatchPrepareStackFrame} is called. The function prologue then jumps
348 // to the current {pc_offset()} to execute the OOL code for allocating the
349 // large frame.
350 // Emit the unconditional branch in the function prologue (from {offset} to
351 // {pc_offset()}).
352
353 int imm32 = pc_offset() - offset;
354 patching_assembler.GenPCRelativeJump(kScratchReg, imm32);
355
356 // If the frame is bigger than the stack, we throw the stack overflow
357 // exception unconditionally. Thereby we can avoid the integer overflow
358 // check in the condition code.
359 RecordComment("OOL: stack check for large frame");
360 Label continuation;
361 if (frame_size < FLAG_stack_size * 1024) {
362 Register stack_limit = kScratchReg;
363 Ld(stack_limit,
364 FieldMemOperand(kWasmInstanceRegister,
365 WasmInstanceObject::kRealStackLimitAddressOffset));
366 Ld(stack_limit, MemOperand(stack_limit));
367 Add64(stack_limit, stack_limit, Operand(frame_size));
368 Branch(&continuation, uge, sp, Operand(stack_limit));
369 }
370
371 Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
372 // The call will not return; just define an empty safepoint.
373 safepoint_table_builder->DefineSafepoint(this);
374 if (FLAG_debug_code) stop();
375
376 bind(&continuation);
377
378 // Now allocate the stack space. Note that this might do more than just
379 // decrementing the SP;
380 Add64(sp, sp, Operand(-frame_size));
381
382 // Jump back to the start of the function, from {pc_offset()} to
383 // right after the reserved space for the {__ Add64(sp, sp, -framesize)}
384 // (which is a Branch now).
385 int func_start_offset = offset + 2 * kInstrSize;
386 imm32 = func_start_offset - pc_offset();
387 GenPCRelativeJump(kScratchReg, imm32);
388 }
389
FinishCode()390 void LiftoffAssembler::FinishCode() { ForceConstantPoolEmissionWithoutJump(); }
391
AbortCompilation()392 void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
393
394 // static
StaticStackFrameSize()395 constexpr int LiftoffAssembler::StaticStackFrameSize() {
396 return liftoff::kTierupBudgetOffset;
397 }
398
SlotSizeForType(ValueKind kind)399 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
400 switch (kind) {
401 case kS128:
402 return value_kind_size(kind);
403 default:
404 return kStackSlotSize;
405 }
406 }
407
NeedsAlignment(ValueKind kind)408 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
409 switch (kind) {
410 case kS128:
411 return true;
412 default:
413 // No alignment because all other types are kStackSlotSize.
414 return false;
415 }
416 }
417
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)418 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
419 RelocInfo::Mode rmode) {
420 switch (value.type().kind()) {
421 case kI32:
422 TurboAssembler::li(reg.gp(), Operand(value.to_i32(), rmode));
423 break;
424 case kI64:
425 TurboAssembler::li(reg.gp(), Operand(value.to_i64(), rmode));
426 break;
427 case kF32:
428 TurboAssembler::LoadFPRImmediate(reg.fp(),
429 value.to_f32_boxed().get_bits());
430 break;
431 case kF64:
432 TurboAssembler::LoadFPRImmediate(reg.fp(),
433 value.to_f64_boxed().get_bits());
434 break;
435 default:
436 UNREACHABLE();
437 }
438 }
439
LoadInstanceFromFrame(Register dst)440 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
441 Ld(dst, liftoff::GetInstanceOperand());
442 }
443
LoadFromInstance(Register dst,Register instance,int offset,int size)444 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
445 int offset, int size) {
446 DCHECK_LE(0, offset);
447 MemOperand src{instance, offset};
448 switch (size) {
449 case 1:
450 Lb(dst, MemOperand(src));
451 break;
452 case 4:
453 Lw(dst, MemOperand(src));
454 break;
455 case 8:
456 Ld(dst, MemOperand(src));
457 break;
458 default:
459 UNIMPLEMENTED();
460 }
461 }
462
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)463 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
464 Register instance,
465 int offset) {
466 DCHECK_LE(0, offset);
467 LoadTaggedPointerField(dst, MemOperand{instance, offset});
468 }
469
SpillInstance(Register instance)470 void LiftoffAssembler::SpillInstance(Register instance) {
471 Sd(instance, liftoff::GetInstanceOperand());
472 }
473
ResetOSRTarget()474 void LiftoffAssembler::ResetOSRTarget() {}
475
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)476 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
477 Register offset_reg,
478 int32_t offset_imm,
479 LiftoffRegList pinned) {
480 MemOperand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
481 LoadTaggedPointerField(dst, src_op);
482 }
483
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)484 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
485 int32_t offset_imm) {
486 MemOperand src_op = liftoff::GetMemOp(this, src_addr, no_reg, offset_imm);
487 Ld(dst, src_op);
488 }
489
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)490 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
491 Register offset_reg,
492 int32_t offset_imm,
493 LiftoffRegister src,
494 LiftoffRegList pinned,
495 SkipWriteBarrier skip_write_barrier) {
496 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
497 MemOperand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
498 StoreTaggedField(src.gp(), dst_op);
499
500 if (skip_write_barrier || FLAG_disable_write_barriers) return;
501
502 Label write_barrier;
503 Label exit;
504 CheckPageFlag(dst_addr, scratch,
505 MemoryChunk::kPointersFromHereAreInterestingMask, ne,
506 &write_barrier);
507 Branch(&exit);
508 bind(&write_barrier);
509 JumpIfSmi(src.gp(), &exit);
510 CheckPageFlag(src.gp(), scratch,
511 MemoryChunk::kPointersToHereAreInterestingMask, eq, &exit);
512 Add64(scratch, dst_op.rm(), dst_op.offset());
513 CallRecordWriteStubSaveRegisters(
514 dst_addr, scratch, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
515 StubCallMode::kCallWasmRuntimeStub);
516 bind(&exit);
517 }
518
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)519 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
520 Register offset_reg, uintptr_t offset_imm,
521 LoadType type, LiftoffRegList pinned,
522 uint32_t* protected_load_pc, bool is_load_mem,
523 bool i64_offset) {
524 MemOperand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
525
526 if (protected_load_pc) *protected_load_pc = pc_offset();
527 switch (type.value()) {
528 case LoadType::kI32Load8U:
529 case LoadType::kI64Load8U:
530 Lbu(dst.gp(), src_op);
531 break;
532 case LoadType::kI32Load8S:
533 case LoadType::kI64Load8S:
534 Lb(dst.gp(), src_op);
535 break;
536 case LoadType::kI32Load16U:
537 case LoadType::kI64Load16U:
538 TurboAssembler::Lhu(dst.gp(), src_op);
539 break;
540 case LoadType::kI32Load16S:
541 case LoadType::kI64Load16S:
542 TurboAssembler::Lh(dst.gp(), src_op);
543 break;
544 case LoadType::kI64Load32U:
545 TurboAssembler::Lwu(dst.gp(), src_op);
546 break;
547 case LoadType::kI32Load:
548 case LoadType::kI64Load32S:
549 TurboAssembler::Lw(dst.gp(), src_op);
550 break;
551 case LoadType::kI64Load:
552 TurboAssembler::Ld(dst.gp(), src_op);
553 break;
554 case LoadType::kF32Load:
555 TurboAssembler::LoadFloat(dst.fp(), src_op);
556 break;
557 case LoadType::kF64Load:
558 TurboAssembler::LoadDouble(dst.fp(), src_op);
559 break;
560 case LoadType::kS128Load: {
561 VU.set(kScratchReg, E8, m1);
562 Register src_reg = src_op.offset() == 0 ? src_op.rm() : kScratchReg;
563 if (src_op.offset() != 0) {
564 TurboAssembler::Add64(src_reg, src_op.rm(), src_op.offset());
565 }
566 vl(dst.fp().toV(), src_reg, 0, E8);
567 break;
568 }
569 default:
570 UNREACHABLE();
571 }
572
573 #if defined(V8_TARGET_BIG_ENDIAN)
574 if (is_load_mem) {
575 pinned.set(src_op.rm());
576 liftoff::ChangeEndiannessLoad(this, dst, type, pinned);
577 }
578 #endif
579 }
580
Store(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned,uint32_t * protected_store_pc,bool is_store_mem)581 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
582 uintptr_t offset_imm, LiftoffRegister src,
583 StoreType type, LiftoffRegList pinned,
584 uint32_t* protected_store_pc, bool is_store_mem) {
585 MemOperand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
586
587 #if defined(V8_TARGET_BIG_ENDIAN)
588 if (is_store_mem) {
589 pinned.set(dst_op.rm());
590 LiftoffRegister tmp = GetUnusedRegister(src.reg_class(), pinned);
591 // Save original value.
592 Move(tmp, src, type.value_type());
593
594 src = tmp;
595 pinned.set(tmp);
596 liftoff::ChangeEndiannessStore(this, src, type, pinned);
597 }
598 #endif
599
600 if (protected_store_pc) *protected_store_pc = pc_offset();
601
602 switch (type.value()) {
603 case StoreType::kI32Store8:
604 case StoreType::kI64Store8:
605 Sb(src.gp(), dst_op);
606 break;
607 case StoreType::kI32Store16:
608 case StoreType::kI64Store16:
609 TurboAssembler::Sh(src.gp(), dst_op);
610 break;
611 case StoreType::kI32Store:
612 case StoreType::kI64Store32:
613 TurboAssembler::Sw(src.gp(), dst_op);
614 break;
615 case StoreType::kI64Store:
616 TurboAssembler::Sd(src.gp(), dst_op);
617 break;
618 case StoreType::kF32Store:
619 TurboAssembler::StoreFloat(src.fp(), dst_op);
620 break;
621 case StoreType::kF64Store:
622 TurboAssembler::StoreDouble(src.fp(), dst_op);
623 break;
624 case StoreType::kS128Store: {
625 VU.set(kScratchReg, E8, m1);
626 Register dst_reg = dst_op.offset() == 0 ? dst_op.rm() : kScratchReg;
627 if (dst_op.offset() != 0) {
628 Add64(kScratchReg, dst_op.rm(), dst_op.offset());
629 }
630 vs(src.fp().toV(), dst_reg, 0, VSew::E8);
631 break;
632 }
633 default:
634 UNREACHABLE();
635 }
636 }
637
638 namespace liftoff {
639 #define __ lasm->
640
CalculateActualAddress(LiftoffAssembler * lasm,Register addr_reg,Register offset_reg,uintptr_t offset_imm,Register result_reg)641 inline Register CalculateActualAddress(LiftoffAssembler* lasm,
642 Register addr_reg, Register offset_reg,
643 uintptr_t offset_imm,
644 Register result_reg) {
645 DCHECK_NE(offset_reg, no_reg);
646 DCHECK_NE(addr_reg, no_reg);
647 __ Add64(result_reg, addr_reg, Operand(offset_reg));
648 if (offset_imm != 0) {
649 __ Add64(result_reg, result_reg, Operand(offset_imm));
650 }
651 return result_reg;
652 }
653
654 enum class Binop { kAdd, kSub, kAnd, kOr, kXor, kExchange };
655
AtomicBinop(LiftoffAssembler * lasm,Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type,Binop op)656 inline void AtomicBinop(LiftoffAssembler* lasm, Register dst_addr,
657 Register offset_reg, uintptr_t offset_imm,
658 LiftoffRegister value, LiftoffRegister result,
659 StoreType type, Binop op) {
660 LiftoffRegList pinned = {dst_addr, offset_reg, value, result};
661 Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
662
663 // Make sure that {result} is unique.
664 Register result_reg = result.gp();
665 if (result_reg == value.gp() || result_reg == dst_addr ||
666 result_reg == offset_reg) {
667 result_reg = __ GetUnusedRegister(kGpReg, pinned).gp();
668 }
669
670 UseScratchRegisterScope temps(lasm);
671 Register actual_addr = liftoff::CalculateActualAddress(
672 lasm, dst_addr, offset_reg, offset_imm, temps.Acquire());
673
674 // Allocate an additional {temp} register to hold the result that should be
675 // stored to memory. Note that {temp} and {store_result} are not allowed to be
676 // the same register.
677 Register temp = temps.Acquire();
678
679 Label retry;
680 __ bind(&retry);
681 switch (type.value()) {
682 case StoreType::kI64Store8:
683 case StoreType::kI32Store8:
684 __ lbu(result_reg, actual_addr, 0);
685 __ sync();
686 break;
687 case StoreType::kI64Store16:
688 case StoreType::kI32Store16:
689 __ lhu(result_reg, actual_addr, 0);
690 __ sync();
691 break;
692 case StoreType::kI64Store32:
693 case StoreType::kI32Store:
694 __ lr_w(true, false, result_reg, actual_addr);
695 break;
696 case StoreType::kI64Store:
697 __ lr_d(true, false, result_reg, actual_addr);
698 break;
699 default:
700 UNREACHABLE();
701 }
702
703 switch (op) {
704 case Binop::kAdd:
705 __ add(temp, result_reg, value.gp());
706 break;
707 case Binop::kSub:
708 __ sub(temp, result_reg, value.gp());
709 break;
710 case Binop::kAnd:
711 __ and_(temp, result_reg, value.gp());
712 break;
713 case Binop::kOr:
714 __ or_(temp, result_reg, value.gp());
715 break;
716 case Binop::kXor:
717 __ xor_(temp, result_reg, value.gp());
718 break;
719 case Binop::kExchange:
720 __ mv(temp, value.gp());
721 break;
722 }
723 switch (type.value()) {
724 case StoreType::kI64Store8:
725 case StoreType::kI32Store8:
726 __ sync();
727 __ sb(temp, actual_addr, 0);
728 __ sync();
729 __ mv(store_result, zero_reg);
730 break;
731 case StoreType::kI64Store16:
732 case StoreType::kI32Store16:
733 __ sync();
734 __ sh(temp, actual_addr, 0);
735 __ sync();
736 __ mv(store_result, zero_reg);
737 break;
738 case StoreType::kI64Store32:
739 case StoreType::kI32Store:
740 __ sc_w(false, true, store_result, actual_addr, temp);
741 break;
742 case StoreType::kI64Store:
743 __ sc_w(false, true, store_result, actual_addr, temp);
744 break;
745 default:
746 UNREACHABLE();
747 }
748
749 __ bnez(store_result, &retry);
750 if (result_reg != result.gp()) {
751 __ mv(result.gp(), result_reg);
752 }
753 }
754
755 #undef __
756 } // namespace liftoff
757
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned)758 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
759 Register offset_reg, uintptr_t offset_imm,
760 LoadType type, LiftoffRegList pinned) {
761 UseScratchRegisterScope temps(this);
762 Register src_reg = liftoff::CalculateActualAddress(
763 this, src_addr, offset_reg, offset_imm, temps.Acquire());
764 switch (type.value()) {
765 case LoadType::kI32Load8U:
766 case LoadType::kI64Load8U:
767 fence(PSR | PSW, PSR | PSW);
768 lbu(dst.gp(), src_reg, 0);
769 fence(PSR, PSR | PSW);
770 return;
771 case LoadType::kI32Load16U:
772 case LoadType::kI64Load16U:
773 fence(PSR | PSW, PSR | PSW);
774 lhu(dst.gp(), src_reg, 0);
775 fence(PSR, PSR | PSW);
776 return;
777 case LoadType::kI32Load:
778 case LoadType::kI64Load32U:
779 fence(PSR | PSW, PSR | PSW);
780 lw(dst.gp(), src_reg, 0);
781 fence(PSR, PSR | PSW);
782 return;
783 case LoadType::kI64Load:
784 fence(PSR | PSW, PSR | PSW);
785 ld(dst.gp(), src_reg, 0);
786 fence(PSR, PSR | PSW);
787 return;
788 default:
789 UNREACHABLE();
790 }
791 }
792
AtomicStore(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)793 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
794 uintptr_t offset_imm, LiftoffRegister src,
795 StoreType type, LiftoffRegList pinned) {
796 UseScratchRegisterScope temps(this);
797 Register dst_reg = liftoff::CalculateActualAddress(
798 this, dst_addr, offset_reg, offset_imm, temps.Acquire());
799 switch (type.value()) {
800 case StoreType::kI64Store8:
801 case StoreType::kI32Store8:
802 fence(PSR | PSW, PSW);
803 sb(src.gp(), dst_reg, 0);
804 return;
805 case StoreType::kI64Store16:
806 case StoreType::kI32Store16:
807 fence(PSR | PSW, PSW);
808 sh(src.gp(), dst_reg, 0);
809 return;
810 case StoreType::kI64Store32:
811 case StoreType::kI32Store:
812 fence(PSR | PSW, PSW);
813 sw(src.gp(), dst_reg, 0);
814 return;
815 case StoreType::kI64Store:
816 fence(PSR | PSW, PSW);
817 sd(src.gp(), dst_reg, 0);
818 return;
819 default:
820 UNREACHABLE();
821 }
822 }
823
AtomicAdd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)824 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
825 uintptr_t offset_imm, LiftoffRegister value,
826 LiftoffRegister result, StoreType type) {
827 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
828 type, liftoff::Binop::kAdd);
829 }
830
AtomicSub(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)831 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
832 uintptr_t offset_imm, LiftoffRegister value,
833 LiftoffRegister result, StoreType type) {
834 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
835 type, liftoff::Binop::kSub);
836 }
837
AtomicAnd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)838 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
839 uintptr_t offset_imm, LiftoffRegister value,
840 LiftoffRegister result, StoreType type) {
841 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
842 type, liftoff::Binop::kAnd);
843 }
844
AtomicOr(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)845 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
846 uintptr_t offset_imm, LiftoffRegister value,
847 LiftoffRegister result, StoreType type) {
848 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
849 type, liftoff::Binop::kOr);
850 }
851
AtomicXor(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)852 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
853 uintptr_t offset_imm, LiftoffRegister value,
854 LiftoffRegister result, StoreType type) {
855 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
856 type, liftoff::Binop::kXor);
857 }
858
AtomicExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)859 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
860 uintptr_t offset_imm,
861 LiftoffRegister value,
862 LiftoffRegister result, StoreType type) {
863 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
864 type, liftoff::Binop::kExchange);
865 }
866
867 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_linked, \
868 store_conditional) \
869 do { \
870 Label compareExchange; \
871 Label exit; \
872 sync(); \
873 bind(&compareExchange); \
874 load_linked(result.gp(), MemOperand(temp0, 0)); \
875 BranchShort(&exit, ne, expected.gp(), Operand(result.gp())); \
876 mv(temp2, new_value.gp()); \
877 store_conditional(temp2, MemOperand(temp0, 0)); \
878 BranchShort(&compareExchange, eq, temp2, Operand(zero_reg)); \
879 bind(&exit); \
880 sync(); \
881 } while (0)
882
883 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT( \
884 load_linked, store_conditional, size, aligned) \
885 do { \
886 Label compareExchange; \
887 Label exit; \
888 andi(temp1, temp0, aligned); \
889 Sub64(temp0, temp0, Operand(temp1)); \
890 Sll32(temp1, temp1, 3); \
891 sync(); \
892 bind(&compareExchange); \
893 load_linked(temp2, MemOperand(temp0, 0)); \
894 ExtractBits(result.gp(), temp2, temp1, size, false); \
895 ExtractBits(temp2, expected.gp(), zero_reg, size, false); \
896 BranchShort(&exit, ne, temp2, Operand(result.gp())); \
897 InsertBits(temp2, new_value.gp(), temp1, size); \
898 store_conditional(temp2, MemOperand(temp0, 0)); \
899 BranchShort(&compareExchange, eq, temp2, Operand(zero_reg)); \
900 bind(&exit); \
901 sync(); \
902 } while (0)
903
AtomicCompareExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)904 void LiftoffAssembler::AtomicCompareExchange(
905 Register dst_addr, Register offset_reg, uintptr_t offset_imm,
906 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
907 StoreType type) {
908 LiftoffRegList pinned = {dst_addr, offset_reg, expected, new_value, result};
909 Register temp0 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
910 Register temp1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
911 Register temp2 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
912 MemOperand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
913 Add64(temp0, dst_op.rm(), dst_op.offset());
914 switch (type.value()) {
915 case StoreType::kI64Store8:
916 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, 8, 7);
917 break;
918 case StoreType::kI32Store8:
919 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, 8, 3);
920 break;
921 case StoreType::kI64Store16:
922 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, 16, 7);
923 break;
924 case StoreType::kI32Store16:
925 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, 16, 3);
926 break;
927 case StoreType::kI64Store32:
928 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Lld, Scd, 32, 7);
929 break;
930 case StoreType::kI32Store:
931 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Ll, Sc);
932 break;
933 case StoreType::kI64Store:
934 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Lld, Scd);
935 break;
936 default:
937 UNREACHABLE();
938 }
939 }
940 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
941 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT
942
AtomicFence()943 void LiftoffAssembler::AtomicFence() { sync(); }
944
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)945 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
946 uint32_t caller_slot_idx,
947 ValueKind kind) {
948 MemOperand src(fp, kSystemPointerSize * (caller_slot_idx + 1));
949 liftoff::Load(this, dst, src, kind);
950 }
951
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)952 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
953 uint32_t caller_slot_idx,
954 ValueKind kind) {
955 int32_t offset = kSystemPointerSize * (caller_slot_idx + 1);
956 liftoff::Store(this, fp, offset, src, kind);
957 }
958
LoadReturnStackSlot(LiftoffRegister dst,int offset,ValueKind kind)959 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset,
960 ValueKind kind) {
961 liftoff::Load(this, dst, MemOperand(sp, offset), kind);
962 }
963
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)964 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
965 ValueKind kind) {
966 DCHECK_NE(dst_offset, src_offset);
967 LiftoffRegister reg = GetUnusedRegister(reg_class_for(kind), {});
968 Fill(reg, src_offset, kind);
969 Spill(dst_offset, reg, kind);
970 }
971
Move(Register dst,Register src,ValueKind kind)972 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
973 DCHECK_NE(dst, src);
974 // TODO(ksreten): Handle different sizes here.
975 TurboAssembler::Move(dst, src);
976 }
977
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)978 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
979 ValueKind kind) {
980 DCHECK_NE(dst, src);
981 if (kind != kS128) {
982 TurboAssembler::Move(dst, src);
983 } else {
984 TurboAssembler::vmv_vv(dst.toV(), dst.toV());
985 }
986 }
987
Spill(int offset,LiftoffRegister reg,ValueKind kind)988 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
989 RecordUsedSpillOffset(offset);
990 MemOperand dst = liftoff::GetStackSlot(offset);
991 switch (kind) {
992 case kI32:
993 Sw(reg.gp(), dst);
994 break;
995 case kI64:
996 case kRef:
997 case kOptRef:
998 case kRtt:
999 Sd(reg.gp(), dst);
1000 break;
1001 case kF32:
1002 StoreFloat(reg.fp(), dst);
1003 break;
1004 case kF64:
1005 TurboAssembler::StoreDouble(reg.fp(), dst);
1006 break;
1007 case kS128: {
1008 VU.set(kScratchReg, E8, m1);
1009 Register dst_reg = dst.offset() == 0 ? dst.rm() : kScratchReg;
1010 if (dst.offset() != 0) {
1011 Add64(kScratchReg, dst.rm(), dst.offset());
1012 }
1013 vs(reg.fp().toV(), dst_reg, 0, VSew::E8);
1014 break;
1015 }
1016 default:
1017 UNREACHABLE();
1018 }
1019 }
1020
Spill(int offset,WasmValue value)1021 void LiftoffAssembler::Spill(int offset, WasmValue value) {
1022 RecordUsedSpillOffset(offset);
1023 MemOperand dst = liftoff::GetStackSlot(offset);
1024 switch (value.type().kind()) {
1025 case kI32: {
1026 LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
1027 TurboAssembler::li(tmp.gp(), Operand(value.to_i32()));
1028 Sw(tmp.gp(), dst);
1029 break;
1030 }
1031 case kI64:
1032 case kRef:
1033 case kOptRef: {
1034 LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
1035 TurboAssembler::li(tmp.gp(), value.to_i64());
1036 Sd(tmp.gp(), dst);
1037 break;
1038 }
1039 default:
1040 // kWasmF32 and kWasmF64 are unreachable, since those
1041 // constants are not tracked.
1042 UNREACHABLE();
1043 }
1044 }
1045
Fill(LiftoffRegister reg,int offset,ValueKind kind)1046 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
1047 MemOperand src = liftoff::GetStackSlot(offset);
1048 switch (kind) {
1049 case kI32:
1050 Lw(reg.gp(), src);
1051 break;
1052 case kI64:
1053 case kRef:
1054 case kOptRef:
1055 Ld(reg.gp(), src);
1056 break;
1057 case kF32:
1058 LoadFloat(reg.fp(), src);
1059 break;
1060 case kF64:
1061 TurboAssembler::LoadDouble(reg.fp(), src);
1062 break;
1063 case kS128: {
1064 VU.set(kScratchReg, E8, m1);
1065 Register src_reg = src.offset() == 0 ? src.rm() : kScratchReg;
1066 if (src.offset() != 0) {
1067 TurboAssembler::Add64(src_reg, src.rm(), src.offset());
1068 }
1069 vl(reg.fp().toV(), src_reg, 0, E8);
1070 break;
1071 }
1072 default:
1073 UNREACHABLE();
1074 }
1075 }
1076
FillI64Half(Register,int offset,RegPairHalf)1077 void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
1078 UNREACHABLE();
1079 }
1080
FillStackSlotsWithZero(int start,int size)1081 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
1082 DCHECK_LT(0, size);
1083 RecordUsedSpillOffset(start + size);
1084
1085 if (size <= 12 * kStackSlotSize) {
1086 // Special straight-line code for up to 12 slots. Generates one
1087 // instruction per slot (<= 12 instructions total).
1088 uint32_t remainder = size;
1089 for (; remainder >= kStackSlotSize; remainder -= kStackSlotSize) {
1090 Sd(zero_reg, liftoff::GetStackSlot(start + remainder));
1091 }
1092 DCHECK(remainder == 4 || remainder == 0);
1093 if (remainder) {
1094 Sw(zero_reg, liftoff::GetStackSlot(start + remainder));
1095 }
1096 } else {
1097 // General case for bigger counts (12 instructions).
1098 // Use a0 for start address (inclusive), a1 for end address (exclusive).
1099 Push(a1, a0);
1100 Add64(a0, fp, Operand(-start - size));
1101 Add64(a1, fp, Operand(-start));
1102
1103 Label loop;
1104 bind(&loop);
1105 Sd(zero_reg, MemOperand(a0));
1106 addi(a0, a0, kSystemPointerSize);
1107 BranchShort(&loop, ne, a0, Operand(a1));
1108
1109 Pop(a1, a0);
1110 }
1111 }
1112
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1113 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1114 TurboAssembler::Clz64(dst.gp(), src.gp());
1115 }
1116
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1117 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1118 TurboAssembler::Ctz64(dst.gp(), src.gp());
1119 }
1120
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1121 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1122 LiftoffRegister src) {
1123 TurboAssembler::Popcnt64(dst.gp(), src.gp(), kScratchReg);
1124 return true;
1125 }
1126
emit_i32_mul(Register dst,Register lhs,Register rhs)1127 void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
1128 TurboAssembler::Mul32(dst, lhs, rhs);
1129 }
1130
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1131 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1132 Label* trap_div_by_zero,
1133 Label* trap_div_unrepresentable) {
1134 TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg));
1135
1136 // Check if lhs == kMinInt and rhs == -1, since this case is unrepresentable.
1137 TurboAssembler::CompareI(kScratchReg, lhs, Operand(kMinInt), ne);
1138 TurboAssembler::CompareI(kScratchReg2, rhs, Operand(-1), ne);
1139 add(kScratchReg, kScratchReg, kScratchReg2);
1140 TurboAssembler::Branch(trap_div_unrepresentable, eq, kScratchReg,
1141 Operand(zero_reg));
1142
1143 TurboAssembler::Div32(dst, lhs, rhs);
1144 }
1145
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1146 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1147 Label* trap_div_by_zero) {
1148 TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg));
1149 TurboAssembler::Divu32(dst, lhs, rhs);
1150 }
1151
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1152 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1153 Label* trap_div_by_zero) {
1154 TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg));
1155 TurboAssembler::Mod32(dst, lhs, rhs);
1156 }
1157
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1158 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1159 Label* trap_div_by_zero) {
1160 TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg));
1161 TurboAssembler::Modu32(dst, lhs, rhs);
1162 }
1163
1164 #define I32_BINOP(name, instruction) \
1165 void LiftoffAssembler::emit_i32_##name(Register dst, Register lhs, \
1166 Register rhs) { \
1167 instruction(dst, lhs, rhs); \
1168 }
1169
1170 // clang-format off
I32_BINOP(add,addw)1171 I32_BINOP(add, addw)
1172 I32_BINOP(sub, subw)
1173 I32_BINOP(and, and_)
1174 I32_BINOP(or, or_)
1175 I32_BINOP(xor, xor_)
1176 // clang-format on
1177
1178 #undef I32_BINOP
1179
1180 #define I32_BINOP_I(name, instruction) \
1181 void LiftoffAssembler::emit_i32_##name##i(Register dst, Register lhs, \
1182 int32_t imm) { \
1183 instruction(dst, lhs, Operand(imm)); \
1184 }
1185
1186 // clang-format off
1187 I32_BINOP_I(add, Add32)
1188 I32_BINOP_I(sub, Sub32)
1189 I32_BINOP_I(and, And)
1190 I32_BINOP_I(or, Or)
1191 I32_BINOP_I(xor, Xor)
1192 // clang-format on
1193
1194 #undef I32_BINOP_I
1195
1196 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1197 TurboAssembler::Clz32(dst, src);
1198 }
1199
emit_i32_ctz(Register dst,Register src)1200 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1201 TurboAssembler::Ctz32(dst, src);
1202 }
1203
emit_i32_popcnt(Register dst,Register src)1204 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1205 TurboAssembler::Popcnt32(dst, src, kScratchReg);
1206 return true;
1207 }
1208
1209 #define I32_SHIFTOP(name, instruction) \
1210 void LiftoffAssembler::emit_i32_##name(Register dst, Register src, \
1211 Register amount) { \
1212 instruction(dst, src, amount); \
1213 }
1214 #define I32_SHIFTOP_I(name, instruction) \
1215 void LiftoffAssembler::emit_i32_##name##i(Register dst, Register src, \
1216 int amount) { \
1217 instruction(dst, src, amount & 31); \
1218 }
1219
I32_SHIFTOP(shl,sllw)1220 I32_SHIFTOP(shl, sllw)
1221 I32_SHIFTOP(sar, sraw)
1222 I32_SHIFTOP(shr, srlw)
1223
1224 I32_SHIFTOP_I(shl, slliw)
1225 I32_SHIFTOP_I(sar, sraiw)
1226 I32_SHIFTOP_I(shr, srliw)
1227
1228 #undef I32_SHIFTOP
1229 #undef I32_SHIFTOP_I
1230
1231 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1232 LiftoffRegister rhs) {
1233 TurboAssembler::Mul64(dst.gp(), lhs.gp(), rhs.gp());
1234 }
1235
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1236 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1237 LiftoffRegister rhs,
1238 Label* trap_div_by_zero,
1239 Label* trap_div_unrepresentable) {
1240 TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg));
1241
1242 // Check if lhs == MinInt64 and rhs == -1, since this case is unrepresentable.
1243 TurboAssembler::CompareI(kScratchReg, lhs.gp(),
1244 Operand(std::numeric_limits<int64_t>::min()), ne);
1245 TurboAssembler::CompareI(kScratchReg2, rhs.gp(), Operand(-1), ne);
1246 add(kScratchReg, kScratchReg, kScratchReg2);
1247 TurboAssembler::Branch(trap_div_unrepresentable, eq, kScratchReg,
1248 Operand(zero_reg));
1249
1250 TurboAssembler::Div64(dst.gp(), lhs.gp(), rhs.gp());
1251 return true;
1252 }
1253
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1254 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1255 LiftoffRegister rhs,
1256 Label* trap_div_by_zero) {
1257 TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg));
1258 TurboAssembler::Divu64(dst.gp(), lhs.gp(), rhs.gp());
1259 return true;
1260 }
1261
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1262 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1263 LiftoffRegister rhs,
1264 Label* trap_div_by_zero) {
1265 TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg));
1266 TurboAssembler::Mod64(dst.gp(), lhs.gp(), rhs.gp());
1267 return true;
1268 }
1269
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1270 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1271 LiftoffRegister rhs,
1272 Label* trap_div_by_zero) {
1273 TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg));
1274 TurboAssembler::Modu64(dst.gp(), lhs.gp(), rhs.gp());
1275 return true;
1276 }
1277
1278 #define I64_BINOP(name, instruction) \
1279 void LiftoffAssembler::emit_i64_##name( \
1280 LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { \
1281 instruction(dst.gp(), lhs.gp(), rhs.gp()); \
1282 }
1283
1284 // clang-format off
I64_BINOP(add,add)1285 I64_BINOP(add, add)
1286 I64_BINOP(sub, sub)
1287 I64_BINOP(and, and_)
1288 I64_BINOP(or, or_)
1289 I64_BINOP(xor, xor_)
1290 // clang-format on
1291
1292 #undef I64_BINOP
1293
1294 #define I64_BINOP_I(name, instruction) \
1295 void LiftoffAssembler::emit_i64_##name##i( \
1296 LiftoffRegister dst, LiftoffRegister lhs, int32_t imm) { \
1297 instruction(dst.gp(), lhs.gp(), Operand(imm)); \
1298 }
1299
1300 // clang-format off
1301 I64_BINOP_I(and, And)
1302 I64_BINOP_I(or, Or)
1303 I64_BINOP_I(xor, Xor)
1304 // clang-format on
1305
1306 #undef I64_BINOP_I
1307
1308 #define I64_SHIFTOP(name, instruction) \
1309 void LiftoffAssembler::emit_i64_##name( \
1310 LiftoffRegister dst, LiftoffRegister src, Register amount) { \
1311 instruction(dst.gp(), src.gp(), amount); \
1312 }
1313
1314 I64_SHIFTOP(shl, sll)
1315 I64_SHIFTOP(sar, sra)
1316 I64_SHIFTOP(shr, srl)
1317 #undef I64_SHIFTOP
1318
1319 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1320 int amount) {
1321 if (is_uint6(amount)) {
1322 slli(dst.gp(), src.gp(), amount);
1323 } else {
1324 li(kScratchReg, amount);
1325 sll(dst.gp(), src.gp(), kScratchReg);
1326 }
1327 }
1328
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int amount)1329 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1330 int amount) {
1331 if (is_uint6(amount)) {
1332 srai(dst.gp(), src.gp(), amount);
1333 } else {
1334 li(kScratchReg, amount);
1335 sra(dst.gp(), src.gp(), kScratchReg);
1336 }
1337 }
1338
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int amount)1339 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1340 int amount) {
1341 if (is_uint6(amount)) {
1342 srli(dst.gp(), src.gp(), amount);
1343 } else {
1344 li(kScratchReg, amount);
1345 srl(dst.gp(), src.gp(), kScratchReg);
1346 }
1347 }
1348
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1349 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1350 int64_t imm) {
1351 TurboAssembler::Add64(dst.gp(), lhs.gp(), Operand(imm));
1352 }
emit_u32_to_uintptr(Register dst,Register src)1353 void LiftoffAssembler::emit_u32_to_uintptr(Register dst, Register src) {
1354 addw(dst, src, zero_reg);
1355 }
1356
emit_f32_neg(DoubleRegister dst,DoubleRegister src)1357 void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
1358 TurboAssembler::Neg_s(dst, src);
1359 }
1360
emit_f64_neg(DoubleRegister dst,DoubleRegister src)1361 void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
1362 TurboAssembler::Neg_d(dst, src);
1363 }
1364
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1365 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1366 DoubleRegister rhs) {
1367 TurboAssembler::Float32Min(dst, lhs, rhs);
1368 }
1369
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1370 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
1371 DoubleRegister rhs) {
1372 TurboAssembler::Float32Max(dst, lhs, rhs);
1373 }
1374
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1375 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1376 DoubleRegister rhs) {
1377 fsgnj_s(dst, lhs, rhs);
1378 }
1379
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1380 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
1381 DoubleRegister rhs) {
1382 TurboAssembler::Float64Min(dst, lhs, rhs);
1383 }
1384
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1385 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
1386 DoubleRegister rhs) {
1387 TurboAssembler::Float64Max(dst, lhs, rhs);
1388 }
1389
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1390 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1391 DoubleRegister rhs) {
1392 fsgnj_d(dst, lhs, rhs);
1393 }
1394
1395 #define FP_BINOP(name, instruction) \
1396 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1397 DoubleRegister rhs) { \
1398 instruction(dst, lhs, rhs); \
1399 }
1400 #define FP_UNOP(name, instruction) \
1401 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1402 instruction(dst, src); \
1403 }
1404 #define FP_UNOP_RETURN_TRUE(name, instruction) \
1405 bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1406 instruction(dst, src, kScratchDoubleReg); \
1407 return true; \
1408 }
1409
FP_BINOP(f32_add,fadd_s)1410 FP_BINOP(f32_add, fadd_s)
1411 FP_BINOP(f32_sub, fsub_s)
1412 FP_BINOP(f32_mul, fmul_s)
1413 FP_BINOP(f32_div, fdiv_s)
1414 FP_UNOP(f32_abs, fabs_s)
1415 FP_UNOP_RETURN_TRUE(f32_ceil, Ceil_s_s)
1416 FP_UNOP_RETURN_TRUE(f32_floor, Floor_s_s)
1417 FP_UNOP_RETURN_TRUE(f32_trunc, Trunc_s_s)
1418 FP_UNOP_RETURN_TRUE(f32_nearest_int, Round_s_s)
1419 FP_UNOP(f32_sqrt, fsqrt_s)
1420 FP_BINOP(f64_add, fadd_d)
1421 FP_BINOP(f64_sub, fsub_d)
1422 FP_BINOP(f64_mul, fmul_d)
1423 FP_BINOP(f64_div, fdiv_d)
1424 FP_UNOP(f64_abs, fabs_d)
1425 FP_UNOP_RETURN_TRUE(f64_ceil, Ceil_d_d)
1426 FP_UNOP_RETURN_TRUE(f64_floor, Floor_d_d)
1427 FP_UNOP_RETURN_TRUE(f64_trunc, Trunc_d_d)
1428 FP_UNOP_RETURN_TRUE(f64_nearest_int, Round_d_d)
1429 FP_UNOP(f64_sqrt, fsqrt_d)
1430
1431 #undef FP_BINOP
1432 #undef FP_UNOP
1433 #undef FP_UNOP_RETURN_TRUE
1434
1435 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
1436 LiftoffRegister dst,
1437 LiftoffRegister src, Label* trap) {
1438 switch (opcode) {
1439 case kExprI32ConvertI64:
1440 // According to WebAssembly spec, if I64 value does not fit the range of
1441 // I32, the value is undefined. Therefore, We use sign extension to
1442 // implement I64 to I32 truncation
1443 TurboAssembler::SignExtendWord(dst.gp(), src.gp());
1444 return true;
1445 case kExprI32SConvertF32:
1446 case kExprI32UConvertF32:
1447 case kExprI32SConvertF64:
1448 case kExprI32UConvertF64:
1449 case kExprI64SConvertF32:
1450 case kExprI64UConvertF32:
1451 case kExprI64SConvertF64:
1452 case kExprI64UConvertF64:
1453 case kExprF32ConvertF64: {
1454 // real conversion, if src is out-of-bound of target integer types,
1455 // kScratchReg is set to 0
1456 switch (opcode) {
1457 case kExprI32SConvertF32:
1458 Trunc_w_s(dst.gp(), src.fp(), kScratchReg);
1459 break;
1460 case kExprI32UConvertF32:
1461 Trunc_uw_s(dst.gp(), src.fp(), kScratchReg);
1462 break;
1463 case kExprI32SConvertF64:
1464 Trunc_w_d(dst.gp(), src.fp(), kScratchReg);
1465 break;
1466 case kExprI32UConvertF64:
1467 Trunc_uw_d(dst.gp(), src.fp(), kScratchReg);
1468 break;
1469 case kExprI64SConvertF32:
1470 Trunc_l_s(dst.gp(), src.fp(), kScratchReg);
1471 break;
1472 case kExprI64UConvertF32:
1473 Trunc_ul_s(dst.gp(), src.fp(), kScratchReg);
1474 break;
1475 case kExprI64SConvertF64:
1476 Trunc_l_d(dst.gp(), src.fp(), kScratchReg);
1477 break;
1478 case kExprI64UConvertF64:
1479 Trunc_ul_d(dst.gp(), src.fp(), kScratchReg);
1480 break;
1481 case kExprF32ConvertF64:
1482 fcvt_s_d(dst.fp(), src.fp());
1483 break;
1484 default:
1485 UNREACHABLE();
1486 }
1487
1488 // Checking if trap.
1489 if (trap != nullptr) {
1490 TurboAssembler::Branch(trap, eq, kScratchReg, Operand(zero_reg));
1491 }
1492
1493 return true;
1494 }
1495 case kExprI32ReinterpretF32:
1496 TurboAssembler::ExtractLowWordFromF64(dst.gp(), src.fp());
1497 return true;
1498 case kExprI64SConvertI32:
1499 TurboAssembler::SignExtendWord(dst.gp(), src.gp());
1500 return true;
1501 case kExprI64UConvertI32:
1502 TurboAssembler::ZeroExtendWord(dst.gp(), src.gp());
1503 return true;
1504 case kExprI64ReinterpretF64:
1505 fmv_x_d(dst.gp(), src.fp());
1506 return true;
1507 case kExprF32SConvertI32: {
1508 TurboAssembler::Cvt_s_w(dst.fp(), src.gp());
1509 return true;
1510 }
1511 case kExprF32UConvertI32:
1512 TurboAssembler::Cvt_s_uw(dst.fp(), src.gp());
1513 return true;
1514 case kExprF32ReinterpretI32:
1515 fmv_w_x(dst.fp(), src.gp());
1516 return true;
1517 case kExprF64SConvertI32: {
1518 TurboAssembler::Cvt_d_w(dst.fp(), src.gp());
1519 return true;
1520 }
1521 case kExprF64UConvertI32:
1522 TurboAssembler::Cvt_d_uw(dst.fp(), src.gp());
1523 return true;
1524 case kExprF64ConvertF32:
1525 fcvt_d_s(dst.fp(), src.fp());
1526 return true;
1527 case kExprF64ReinterpretI64:
1528 fmv_d_x(dst.fp(), src.gp());
1529 return true;
1530 case kExprI32SConvertSatF32: {
1531 fcvt_w_s(dst.gp(), src.fp(), RTZ);
1532 Clear_if_nan_s(dst.gp(), src.fp());
1533 return true;
1534 }
1535 case kExprI32UConvertSatF32: {
1536 fcvt_wu_s(dst.gp(), src.fp(), RTZ);
1537 Clear_if_nan_s(dst.gp(), src.fp());
1538 return true;
1539 }
1540 case kExprI32SConvertSatF64: {
1541 fcvt_w_d(dst.gp(), src.fp(), RTZ);
1542 Clear_if_nan_d(dst.gp(), src.fp());
1543 return true;
1544 }
1545 case kExprI32UConvertSatF64: {
1546 fcvt_wu_d(dst.gp(), src.fp(), RTZ);
1547 Clear_if_nan_d(dst.gp(), src.fp());
1548 return true;
1549 }
1550 case kExprI64SConvertSatF32: {
1551 fcvt_l_s(dst.gp(), src.fp(), RTZ);
1552 Clear_if_nan_s(dst.gp(), src.fp());
1553 return true;
1554 }
1555 case kExprI64UConvertSatF32: {
1556 fcvt_lu_s(dst.gp(), src.fp(), RTZ);
1557 Clear_if_nan_s(dst.gp(), src.fp());
1558 return true;
1559 }
1560 case kExprI64SConvertSatF64: {
1561 fcvt_l_d(dst.gp(), src.fp(), RTZ);
1562 Clear_if_nan_d(dst.gp(), src.fp());
1563 return true;
1564 }
1565 case kExprI64UConvertSatF64: {
1566 fcvt_lu_d(dst.gp(), src.fp(), RTZ);
1567 Clear_if_nan_d(dst.gp(), src.fp());
1568 return true;
1569 }
1570 default:
1571 return false;
1572 }
1573 }
1574
emit_i32_signextend_i8(Register dst,Register src)1575 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
1576 slliw(dst, src, 32 - 8);
1577 sraiw(dst, dst, 32 - 8);
1578 }
1579
emit_i32_signextend_i16(Register dst,Register src)1580 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
1581 slliw(dst, src, 32 - 16);
1582 sraiw(dst, dst, 32 - 16);
1583 }
1584
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)1585 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
1586 LiftoffRegister src) {
1587 slli(dst.gp(), src.gp(), 64 - 8);
1588 srai(dst.gp(), dst.gp(), 64 - 8);
1589 }
1590
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)1591 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
1592 LiftoffRegister src) {
1593 slli(dst.gp(), src.gp(), 64 - 16);
1594 srai(dst.gp(), dst.gp(), 64 - 16);
1595 }
1596
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)1597 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
1598 LiftoffRegister src) {
1599 slli(dst.gp(), src.gp(), 64 - 32);
1600 srai(dst.gp(), dst.gp(), 64 - 32);
1601 }
1602
emit_jump(Label * label)1603 void LiftoffAssembler::emit_jump(Label* label) {
1604 TurboAssembler::Branch(label);
1605 }
1606
emit_jump(Register target)1607 void LiftoffAssembler::emit_jump(Register target) {
1608 TurboAssembler::Jump(target);
1609 }
1610
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)1611 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
1612 Label* label, ValueKind kind,
1613 Register lhs, Register rhs) {
1614 Condition cond = liftoff::ToCondition(liftoff_cond);
1615 if (rhs == no_reg) {
1616 DCHECK(kind == kI32 || kind == kI64);
1617 TurboAssembler::Branch(label, cond, lhs, Operand(zero_reg));
1618 } else {
1619 DCHECK((kind == kI32 || kind == kI64) ||
1620 (is_reference(kind) &&
1621 (liftoff_cond == kEqual || liftoff_cond == kUnequal)));
1622 TurboAssembler::Branch(label, cond, lhs, Operand(rhs));
1623 }
1624 }
1625
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int32_t imm)1626 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
1627 Label* label, Register lhs,
1628 int32_t imm) {
1629 Condition cond = liftoff::ToCondition(liftoff_cond);
1630 TurboAssembler::Branch(label, cond, lhs, Operand(imm));
1631 }
1632
emit_i32_subi_jump_negative(Register value,int subtrahend,Label * result_negative)1633 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
1634 int subtrahend,
1635 Label* result_negative) {
1636 Sub64(value, value, Operand(subtrahend));
1637 TurboAssembler::Branch(result_negative, lt, value, Operand(zero_reg));
1638 }
1639
emit_i32_eqz(Register dst,Register src)1640 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
1641 TurboAssembler::Sltu(dst, src, 1);
1642 }
1643
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)1644 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
1645 Register dst, Register lhs,
1646 Register rhs) {
1647 Condition cond = liftoff::ToCondition(liftoff_cond);
1648 TurboAssembler::CompareI(dst, lhs, Operand(rhs), cond);
1649 }
1650
emit_i64_eqz(Register dst,LiftoffRegister src)1651 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
1652 TurboAssembler::Sltu(dst, src.gp(), 1);
1653 }
1654
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)1655 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
1656 Register dst, LiftoffRegister lhs,
1657 LiftoffRegister rhs) {
1658 Condition cond = liftoff::ToCondition(liftoff_cond);
1659 TurboAssembler::CompareI(dst, lhs.gp(), Operand(rhs.gp()), cond);
1660 }
1661
ConditionToConditionCmpFPU(LiftoffCondition condition)1662 static FPUCondition ConditionToConditionCmpFPU(LiftoffCondition condition) {
1663 switch (condition) {
1664 case kEqual:
1665 return EQ;
1666 case kUnequal:
1667 return NE;
1668 case kUnsignedLessThan:
1669 return LT;
1670 case kUnsignedGreaterEqual:
1671 return GE;
1672 case kUnsignedLessEqual:
1673 return LE;
1674 case kUnsignedGreaterThan:
1675 return GT;
1676 default:
1677 break;
1678 }
1679 UNREACHABLE();
1680 }
1681
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)1682 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
1683 Register dst, DoubleRegister lhs,
1684 DoubleRegister rhs) {
1685 FPUCondition fcond = ConditionToConditionCmpFPU(liftoff_cond);
1686 TurboAssembler::CompareF32(dst, fcond, lhs, rhs);
1687 }
1688
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)1689 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
1690 Register dst, DoubleRegister lhs,
1691 DoubleRegister rhs) {
1692 FPUCondition fcond = ConditionToConditionCmpFPU(liftoff_cond);
1693 TurboAssembler::CompareF64(dst, fcond, lhs, rhs);
1694 }
1695
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)1696 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
1697 LiftoffRegister true_value,
1698 LiftoffRegister false_value,
1699 ValueKind kind) {
1700 return false;
1701 }
1702
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)1703 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
1704 SmiCheckMode mode) {
1705 UseScratchRegisterScope temps(this);
1706 Register scratch = temps.Acquire();
1707 And(scratch, obj, Operand(kSmiTagMask));
1708 Condition condition = mode == kJumpOnSmi ? eq : ne;
1709 Branch(target, condition, scratch, Operand(zero_reg));
1710 }
1711
IncrementSmi(LiftoffRegister dst,int offset)1712 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1713 UseScratchRegisterScope temps(this);
1714 if (COMPRESS_POINTERS_BOOL) {
1715 DCHECK(SmiValuesAre31Bits());
1716 Register scratch = temps.Acquire();
1717 Lw(scratch, MemOperand(dst.gp(), offset));
1718 Add32(scratch, scratch, Operand(Smi::FromInt(1)));
1719 Sw(scratch, MemOperand(dst.gp(), offset));
1720 } else {
1721 Register scratch = temps.Acquire();
1722 SmiUntag(scratch, MemOperand(dst.gp(), offset));
1723 Add64(scratch, scratch, Operand(1));
1724 SmiTag(scratch);
1725 Sd(scratch, MemOperand(dst.gp(), offset));
1726 }
1727 }
1728
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)1729 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
1730 Register offset_reg, uintptr_t offset_imm,
1731 LoadType type,
1732 LoadTransformationKind transform,
1733 uint32_t* protected_load_pc) {
1734 UseScratchRegisterScope temps(this);
1735 Register scratch = temps.Acquire();
1736 MemOperand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
1737 VRegister dst_v = dst.fp().toV();
1738 *protected_load_pc = pc_offset();
1739
1740 MachineType memtype = type.mem_type();
1741 if (transform == LoadTransformationKind::kExtend) {
1742 Ld(scratch, src_op);
1743 if (memtype == MachineType::Int8()) {
1744 VU.set(kScratchReg, E64, m1);
1745 vmv_vx(kSimd128ScratchReg, scratch);
1746 VU.set(kScratchReg, E16, m1);
1747 vsext_vf2(dst_v, kSimd128ScratchReg);
1748 } else if (memtype == MachineType::Uint8()) {
1749 VU.set(kScratchReg, E64, m1);
1750 vmv_vx(kSimd128ScratchReg, scratch);
1751 VU.set(kScratchReg, E16, m1);
1752 vzext_vf2(dst_v, kSimd128ScratchReg);
1753 } else if (memtype == MachineType::Int16()) {
1754 VU.set(kScratchReg, E64, m1);
1755 vmv_vx(kSimd128ScratchReg, scratch);
1756 VU.set(kScratchReg, E32, m1);
1757 vsext_vf2(dst_v, kSimd128ScratchReg);
1758 } else if (memtype == MachineType::Uint16()) {
1759 VU.set(kScratchReg, E64, m1);
1760 vmv_vx(kSimd128ScratchReg, scratch);
1761 VU.set(kScratchReg, E32, m1);
1762 vzext_vf2(dst_v, kSimd128ScratchReg);
1763 } else if (memtype == MachineType::Int32()) {
1764 VU.set(kScratchReg, E64, m1);
1765 vmv_vx(kSimd128ScratchReg, scratch);
1766 vsext_vf2(dst_v, kSimd128ScratchReg);
1767 } else if (memtype == MachineType::Uint32()) {
1768 VU.set(kScratchReg, E64, m1);
1769 vmv_vx(kSimd128ScratchReg, scratch);
1770 vzext_vf2(dst_v, kSimd128ScratchReg);
1771 }
1772 } else if (transform == LoadTransformationKind::kZeroExtend) {
1773 vxor_vv(dst_v, dst_v, dst_v);
1774 if (memtype == MachineType::Int32()) {
1775 VU.set(kScratchReg, E32, m1);
1776 Lwu(scratch, src_op);
1777 vmv_sx(dst_v, scratch);
1778 } else {
1779 DCHECK_EQ(MachineType::Int64(), memtype);
1780 VU.set(kScratchReg, E64, m1);
1781 Ld(scratch, src_op);
1782 vmv_sx(dst_v, scratch);
1783 }
1784 } else {
1785 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
1786 if (memtype == MachineType::Int8()) {
1787 VU.set(kScratchReg, E8, m1);
1788 Lb(scratch, src_op);
1789 vmv_vx(dst_v, scratch);
1790 } else if (memtype == MachineType::Int16()) {
1791 VU.set(kScratchReg, E16, m1);
1792 Lh(scratch, src_op);
1793 vmv_vx(dst_v, scratch);
1794 } else if (memtype == MachineType::Int32()) {
1795 VU.set(kScratchReg, E32, m1);
1796 Lw(scratch, src_op);
1797 vmv_vx(dst_v, scratch);
1798 } else if (memtype == MachineType::Int64()) {
1799 VU.set(kScratchReg, E64, m1);
1800 Ld(scratch, src_op);
1801 vmv_vx(dst_v, scratch);
1802 }
1803 }
1804 }
1805
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)1806 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
1807 Register addr, Register offset_reg,
1808 uintptr_t offset_imm, LoadType type,
1809 uint8_t laneidx, uint32_t* protected_load_pc) {
1810 MemOperand src_op = liftoff::GetMemOp(this, addr, offset_reg, offset_imm);
1811 MachineType mem_type = type.mem_type();
1812 UseScratchRegisterScope temps(this);
1813 Register scratch = temps.Acquire();
1814 *protected_load_pc = pc_offset();
1815 if (mem_type == MachineType::Int8()) {
1816 Lbu(scratch, src_op);
1817 VU.set(kScratchReg, E64, m1);
1818 li(kScratchReg, 0x1 << laneidx);
1819 vmv_sx(v0, kScratchReg);
1820 VU.set(kScratchReg, E8, m1);
1821 vmerge_vx(dst.fp().toV(), scratch, dst.fp().toV());
1822 } else if (mem_type == MachineType::Int16()) {
1823 Lhu(scratch, src_op);
1824 VU.set(kScratchReg, E16, m1);
1825 li(kScratchReg, 0x1 << laneidx);
1826 vmv_sx(v0, kScratchReg);
1827 vmerge_vx(dst.fp().toV(), scratch, dst.fp().toV());
1828 } else if (mem_type == MachineType::Int32()) {
1829 Lwu(scratch, src_op);
1830 VU.set(kScratchReg, E32, m1);
1831 li(kScratchReg, 0x1 << laneidx);
1832 vmv_sx(v0, kScratchReg);
1833 vmerge_vx(dst.fp().toV(), scratch, dst.fp().toV());
1834 } else if (mem_type == MachineType::Int64()) {
1835 Ld(scratch, src_op);
1836 VU.set(kScratchReg, E64, m1);
1837 li(kScratchReg, 0x1 << laneidx);
1838 vmv_sx(v0, kScratchReg);
1839 vmerge_vx(dst.fp().toV(), scratch, dst.fp().toV());
1840 } else {
1841 UNREACHABLE();
1842 }
1843 }
1844
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t lane,uint32_t * protected_store_pc)1845 void LiftoffAssembler::StoreLane(Register dst, Register offset,
1846 uintptr_t offset_imm, LiftoffRegister src,
1847 StoreType type, uint8_t lane,
1848 uint32_t* protected_store_pc) {
1849 MemOperand dst_op = liftoff::GetMemOp(this, dst, offset, offset_imm);
1850 if (protected_store_pc) *protected_store_pc = pc_offset();
1851 MachineRepresentation rep = type.mem_rep();
1852 if (rep == MachineRepresentation::kWord8) {
1853 VU.set(kScratchReg, E8, m1);
1854 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), lane);
1855 vmv_xs(kScratchReg, kSimd128ScratchReg);
1856 Sb(kScratchReg, dst_op);
1857 } else if (rep == MachineRepresentation::kWord16) {
1858 VU.set(kScratchReg, E16, m1);
1859 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), lane);
1860 vmv_xs(kScratchReg, kSimd128ScratchReg);
1861 Sh(kScratchReg, dst_op);
1862 } else if (rep == MachineRepresentation::kWord32) {
1863 VU.set(kScratchReg, E32, m1);
1864 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), lane);
1865 vmv_xs(kScratchReg, kSimd128ScratchReg);
1866 Sw(kScratchReg, dst_op);
1867 } else {
1868 DCHECK_EQ(MachineRepresentation::kWord64, rep);
1869 VU.set(kScratchReg, E64, m1);
1870 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), lane);
1871 vmv_xs(kScratchReg, kSimd128ScratchReg);
1872 Sd(kScratchReg, dst_op);
1873 }
1874 }
1875
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)1876 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
1877 LiftoffRegister lhs,
1878 LiftoffRegister rhs,
1879 const uint8_t shuffle[16],
1880 bool is_swizzle) {
1881 VRegister dst_v = dst.fp().toV();
1882 VRegister lhs_v = lhs.fp().toV();
1883 VRegister rhs_v = rhs.fp().toV();
1884
1885 uint64_t imm1 = *(reinterpret_cast<const uint64_t*>(shuffle));
1886 uint64_t imm2 = *((reinterpret_cast<const uint64_t*>(shuffle)) + 1);
1887 VU.set(kScratchReg, VSew::E64, Vlmul::m1);
1888 li(kScratchReg, imm2);
1889 vmv_sx(kSimd128ScratchReg2, kScratchReg);
1890 vslideup_vi(kSimd128ScratchReg, kSimd128ScratchReg2, 1);
1891 li(kScratchReg, imm1);
1892 vmv_sx(kSimd128ScratchReg, kScratchReg);
1893
1894 VU.set(kScratchReg, E8, m1);
1895 VRegister temp =
1896 GetUnusedRegister(kFpReg, LiftoffRegList{lhs, rhs}).fp().toV();
1897 if (dst_v == lhs_v) {
1898 vmv_vv(temp, lhs_v);
1899 lhs_v = temp;
1900 } else if (dst_v == rhs_v) {
1901 vmv_vv(temp, rhs_v);
1902 rhs_v = temp;
1903 }
1904 vrgather_vv(dst_v, lhs_v, kSimd128ScratchReg);
1905 vadd_vi(kSimd128ScratchReg, kSimd128ScratchReg,
1906 -16); // The indices in range [16, 31] select the i - 16-th element
1907 // of rhs
1908 vrgather_vv(kSimd128ScratchReg2, rhs_v, kSimd128ScratchReg);
1909 vor_vv(dst_v, dst_v, kSimd128ScratchReg2);
1910 }
1911
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)1912 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
1913 LiftoffRegister src) {
1914 VRegister src_v = src.fp().toV();
1915 VRegister dst_v = dst.fp().toV();
1916 Label t;
1917
1918 VU.set(kScratchReg, E8, m1);
1919 vmv_vv(kSimd128ScratchReg, src_v);
1920 vmv_vv(dst_v, kSimd128RegZero);
1921
1922 bind(&t);
1923 vmsne_vv(v0, kSimd128ScratchReg, kSimd128RegZero);
1924 vadd_vi(dst_v, dst_v, 1, Mask);
1925 vadd_vi(kSimd128ScratchReg2, kSimd128ScratchReg, -1, Mask);
1926 vand_vv(kSimd128ScratchReg, kSimd128ScratchReg, kSimd128ScratchReg2);
1927 // kScratchReg = -1 if kSimd128ScratchReg == 0 i.e. no active element
1928 vfirst_m(kScratchReg, kSimd128ScratchReg);
1929 bgez(kScratchReg, &t);
1930 }
1931
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1932 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
1933 LiftoffRegister lhs,
1934 LiftoffRegister rhs) {
1935 VU.set(kScratchReg, E8, m1);
1936 if (dst == lhs) {
1937 vrgather_vv(kSimd128ScratchReg, lhs.fp().toV(), rhs.fp().toV());
1938 vmv_vv(dst.fp().toV(), kSimd128ScratchReg);
1939 } else {
1940 vrgather_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
1941 }
1942 }
1943
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)1944 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
1945 LiftoffRegister src) {
1946 VU.set(kScratchReg, E8, m1);
1947 vmv_vx(dst.fp().toV(), src.gp());
1948 }
1949
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)1950 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
1951 LiftoffRegister src) {
1952 VU.set(kScratchReg, E16, m1);
1953 vmv_vx(dst.fp().toV(), src.gp());
1954 }
1955
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)1956 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
1957 LiftoffRegister src) {
1958 VU.set(kScratchReg, E32, m1);
1959 vmv_vx(dst.fp().toV(), src.gp());
1960 }
1961
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)1962 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
1963 LiftoffRegister src) {
1964 VU.set(kScratchReg, E64, m1);
1965 vmv_vx(dst.fp().toV(), src.gp());
1966 }
1967
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1968 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
1969 LiftoffRegister rhs) {
1970 WasmRvvEq(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E64, m1);
1971 }
1972
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1973 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
1974 LiftoffRegister rhs) {
1975 WasmRvvNe(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E64, m1);
1976 }
1977
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1978 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
1979 LiftoffRegister rhs) {
1980 WasmRvvGtS(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E64, m1);
1981 }
1982
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1983 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
1984 LiftoffRegister rhs) {
1985 WasmRvvGeS(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E64, m1);
1986 }
1987
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)1988 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
1989 LiftoffRegister src) {
1990 VU.set(kScratchReg, E32, m1);
1991 fmv_x_w(kScratchReg, src.fp());
1992 vmv_vx(dst.fp().toV(), kScratchReg);
1993 }
1994
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)1995 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
1996 LiftoffRegister src) {
1997 VU.set(kScratchReg, E64, m1);
1998 fmv_x_d(kScratchReg, src.fp());
1999 vmv_vx(dst.fp().toV(), kScratchReg);
2000 }
2001
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2002 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
2003 LiftoffRegister src1,
2004 LiftoffRegister src2) {
2005 VU.set(kScratchReg, E32, mf2);
2006 VRegister dst_v = dst.fp().toV();
2007 if (dst == src1 || dst == src2) {
2008 dst_v = kSimd128ScratchReg3;
2009 }
2010 vwmul_vv(dst_v, src2.fp().toV(), src1.fp().toV());
2011 if (dst == src1 || dst == src2) {
2012 VU.set(kScratchReg, E64, m1);
2013 vmv_vv(dst.fp().toV(), dst_v);
2014 }
2015 }
2016
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2017 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
2018 LiftoffRegister src1,
2019 LiftoffRegister src2) {
2020 VU.set(kScratchReg, E32, mf2);
2021 VRegister dst_v = dst.fp().toV();
2022 if (dst == src1 || dst == src2) {
2023 dst_v = kSimd128ScratchReg3;
2024 }
2025 vwmulu_vv(dst_v, src2.fp().toV(), src1.fp().toV());
2026 if (dst == src1 || dst == src2) {
2027 VU.set(kScratchReg, E64, m1);
2028 vmv_vv(dst.fp().toV(), dst_v);
2029 }
2030 }
2031
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2032 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
2033 LiftoffRegister src1,
2034 LiftoffRegister src2) {
2035 VU.set(kScratchReg, E32, m1);
2036 vslidedown_vi(kSimd128ScratchReg, src1.fp().toV(), 2);
2037 vslidedown_vi(kSimd128ScratchReg2, src2.fp().toV(), 2);
2038 VU.set(kScratchReg, E32, mf2);
2039 vwmul_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
2040 }
2041
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2042 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
2043 LiftoffRegister src1,
2044 LiftoffRegister src2) {
2045 VU.set(kScratchReg, E32, m1);
2046 vslidedown_vi(kSimd128ScratchReg, src1.fp().toV(), 2);
2047 vslidedown_vi(kSimd128ScratchReg2, src2.fp().toV(), 2);
2048 VU.set(kScratchReg, E32, mf2);
2049 vwmulu_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
2050 }
2051
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2052 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
2053 LiftoffRegister src1,
2054 LiftoffRegister src2) {
2055 VU.set(kScratchReg, E16, mf2);
2056 VRegister dst_v = dst.fp().toV();
2057 if (dst == src1 || dst == src2) {
2058 dst_v = kSimd128ScratchReg3;
2059 }
2060 vwmul_vv(dst_v, src2.fp().toV(), src1.fp().toV());
2061 if (dst == src1 || dst == src2) {
2062 VU.set(kScratchReg, E16, m1);
2063 vmv_vv(dst.fp().toV(), dst_v);
2064 }
2065 }
2066
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2067 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
2068 LiftoffRegister src1,
2069 LiftoffRegister src2) {
2070 VU.set(kScratchReg, E16, mf2);
2071 VRegister dst_v = dst.fp().toV();
2072 if (dst == src1 || dst == src2) {
2073 dst_v = kSimd128ScratchReg3;
2074 }
2075 vwmulu_vv(dst_v, src2.fp().toV(), src1.fp().toV());
2076 if (dst == src1 || dst == src2) {
2077 VU.set(kScratchReg, E16, m1);
2078 vmv_vv(dst.fp().toV(), dst_v);
2079 }
2080 }
2081
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2082 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
2083 LiftoffRegister src1,
2084 LiftoffRegister src2) {
2085 VU.set(kScratchReg, E16, m1);
2086 vslidedown_vi(kSimd128ScratchReg, src1.fp().toV(), 4);
2087 vslidedown_vi(kSimd128ScratchReg2, src2.fp().toV(), 4);
2088 VU.set(kScratchReg, E16, mf2);
2089 vwmul_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
2090 }
2091
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2092 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
2093 LiftoffRegister src1,
2094 LiftoffRegister src2) {
2095 VU.set(kScratchReg, E16, m1);
2096 vslidedown_vi(kSimd128ScratchReg, src1.fp().toV(), 4);
2097 vslidedown_vi(kSimd128ScratchReg2, src2.fp().toV(), 4);
2098 VU.set(kScratchReg, E16, mf2);
2099 vwmulu_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
2100 }
2101
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2102 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
2103 LiftoffRegister src1,
2104 LiftoffRegister src2) {
2105 VU.set(kScratchReg, E8, mf2);
2106 VRegister dst_v = dst.fp().toV();
2107 if (dst == src1 || dst == src2) {
2108 dst_v = kSimd128ScratchReg3;
2109 }
2110 vwmul_vv(dst_v, src2.fp().toV(), src1.fp().toV());
2111 if (dst == src1 || dst == src2) {
2112 VU.set(kScratchReg, E8, m1);
2113 vmv_vv(dst.fp().toV(), dst_v);
2114 }
2115 }
2116
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2117 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
2118 LiftoffRegister src1,
2119 LiftoffRegister src2) {
2120 VU.set(kScratchReg, E8, mf2);
2121 VRegister dst_v = dst.fp().toV();
2122 if (dst == src1 || dst == src2) {
2123 dst_v = kSimd128ScratchReg3;
2124 }
2125 vwmulu_vv(dst_v, src2.fp().toV(), src1.fp().toV());
2126 if (dst == src1 || dst == src2) {
2127 VU.set(kScratchReg, E8, m1);
2128 vmv_vv(dst.fp().toV(), dst_v);
2129 }
2130 }
2131
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2132 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
2133 LiftoffRegister src1,
2134 LiftoffRegister src2) {
2135 VU.set(kScratchReg, E8, m1);
2136 vslidedown_vi(kSimd128ScratchReg, src1.fp().toV(), 8);
2137 vslidedown_vi(kSimd128ScratchReg2, src2.fp().toV(), 8);
2138 VU.set(kScratchReg, E8, mf2);
2139 vwmul_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
2140 }
2141
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2142 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
2143 LiftoffRegister src1,
2144 LiftoffRegister src2) {
2145 VU.set(kScratchReg, E8, m1);
2146 vslidedown_vi(kSimd128ScratchReg, src1.fp().toV(), 8);
2147 vslidedown_vi(kSimd128ScratchReg2, src2.fp().toV(), 8);
2148 VU.set(kScratchReg, E8, mf2);
2149 vwmulu_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
2150 }
2151
2152 #undef SIMD_BINOP
2153
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2154 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
2155 LiftoffRegister src1,
2156 LiftoffRegister src2) {
2157 VU.set(kScratchReg, E16, m1);
2158 vsmul_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
2159 }
2160
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)2161 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
2162 LiftoffRegister src) {
2163 VU.set(kScratchReg, E64, m1);
2164 vmv_vx(kSimd128RegZero, zero_reg);
2165 vmslt_vv(kSimd128ScratchReg, src.fp().toV(), kSimd128RegZero);
2166 VU.set(kScratchReg, E32, m1);
2167 vmv_xs(dst.gp(), kSimd128ScratchReg);
2168 }
2169
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)2170 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
2171 LiftoffRegister src) {
2172 VU.set(kScratchReg, E64, m1);
2173 vmv_vv(kSimd128ScratchReg, src.fp().toV());
2174 vsext_vf2(dst.fp().toV(), kSimd128ScratchReg);
2175 }
2176
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)2177 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
2178 LiftoffRegister src) {
2179 VU.set(kScratchReg, E32, m1);
2180 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), 2);
2181 VU.set(kScratchReg, E64, m1);
2182 vsext_vf2(dst.fp().toV(), kSimd128ScratchReg);
2183 }
2184
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)2185 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
2186 LiftoffRegister src) {
2187 VU.set(kScratchReg, E64, m1);
2188 vmv_vv(kSimd128ScratchReg, src.fp().toV());
2189 vzext_vf2(dst.fp().toV(), kSimd128ScratchReg);
2190 }
2191
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)2192 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
2193 LiftoffRegister src) {
2194 VU.set(kScratchReg, E32, m1);
2195 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), 2);
2196 VU.set(kScratchReg, E64, m1);
2197 vzext_vf2(dst.fp().toV(), kSimd128ScratchReg);
2198 }
2199
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2200 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2201 LiftoffRegister rhs) {
2202 WasmRvvEq(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E8, m1);
2203 }
2204
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2205 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2206 LiftoffRegister rhs) {
2207 WasmRvvNe(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E8, m1);
2208 }
2209
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2210 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2211 LiftoffRegister rhs) {
2212 WasmRvvGtS(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E8, m1);
2213 }
2214
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2215 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2216 LiftoffRegister rhs) {
2217 WasmRvvGtU(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E8, m1);
2218 }
2219
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2220 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2221 LiftoffRegister rhs) {
2222 WasmRvvGeS(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E8, m1);
2223 }
2224
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2225 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2226 LiftoffRegister rhs) {
2227 WasmRvvGeU(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E8, m1);
2228 }
2229
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2230 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
2231 LiftoffRegister rhs) {
2232 WasmRvvEq(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E16, m1);
2233 }
2234
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2235 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
2236 LiftoffRegister rhs) {
2237 WasmRvvNe(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E16, m1);
2238 }
2239
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2240 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2241 LiftoffRegister rhs) {
2242 WasmRvvGtS(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E16, m1);
2243 }
2244
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2245 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2246 LiftoffRegister rhs) {
2247 WasmRvvGtU(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E16, m1);
2248 }
2249
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2250 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2251 LiftoffRegister rhs) {
2252 WasmRvvGeS(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E16, m1);
2253 }
2254
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2255 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2256 LiftoffRegister rhs) {
2257 WasmRvvGeU(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E16, m1);
2258 }
2259
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2260 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2261 LiftoffRegister rhs) {
2262 WasmRvvEq(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E32, m1);
2263 }
2264
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2265 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2266 LiftoffRegister rhs) {
2267 WasmRvvNe(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E32, m1);
2268 }
2269
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2270 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2271 LiftoffRegister rhs) {
2272 WasmRvvGtS(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E32, m1);
2273 }
2274
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2275 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2276 LiftoffRegister rhs) {
2277 WasmRvvGtU(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E32, m1);
2278 }
2279
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2280 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2281 LiftoffRegister rhs) {
2282 WasmRvvGeS(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E32, m1);
2283 }
2284
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2285 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2286 LiftoffRegister rhs) {
2287 WasmRvvGeU(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E32, m1);
2288 }
2289
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2290 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2291 LiftoffRegister rhs) {
2292 VU.set(kScratchReg, E32, m1);
2293 vmfeq_vv(v0, rhs.fp().toV(), lhs.fp().toV());
2294 vmv_vx(dst.fp().toV(), zero_reg);
2295 vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
2296 }
2297
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2298 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2299 LiftoffRegister rhs) {
2300 VU.set(kScratchReg, E32, m1);
2301 vmfne_vv(v0, rhs.fp().toV(), lhs.fp().toV());
2302 vmv_vx(dst.fp().toV(), zero_reg);
2303 vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
2304 }
2305
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2306 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
2307 LiftoffRegister rhs) {
2308 VU.set(kScratchReg, E32, m1);
2309 vmflt_vv(v0, lhs.fp().toV(), rhs.fp().toV());
2310 vmv_vx(dst.fp().toV(), zero_reg);
2311 vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
2312 }
2313
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2314 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
2315 LiftoffRegister rhs) {
2316 VU.set(kScratchReg, E32, m1);
2317 vmfle_vv(v0, lhs.fp().toV(), rhs.fp().toV());
2318 vmv_vx(dst.fp().toV(), zero_reg);
2319 vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
2320 }
2321
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)2322 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
2323 LiftoffRegister src) {
2324 VU.set(kScratchReg, E32, mf2);
2325 if (dst.fp().toV() != src.fp().toV()) {
2326 vfwcvt_f_x_v(dst.fp().toV(), src.fp().toV());
2327 } else {
2328 vfwcvt_f_x_v(kSimd128ScratchReg3, src.fp().toV());
2329 VU.set(kScratchReg, E64, m1);
2330 vmv_vv(dst.fp().toV(), kSimd128ScratchReg3);
2331 }
2332 }
2333
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)2334 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
2335 LiftoffRegister src) {
2336 VU.set(kScratchReg, E32, mf2);
2337 if (dst.fp().toV() != src.fp().toV()) {
2338 vfwcvt_f_xu_v(dst.fp().toV(), src.fp().toV());
2339 } else {
2340 vfwcvt_f_xu_v(kSimd128ScratchReg3, src.fp().toV());
2341 VU.set(kScratchReg, E64, m1);
2342 vmv_vv(dst.fp().toV(), kSimd128ScratchReg3);
2343 }
2344 }
2345
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)2346 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
2347 LiftoffRegister src) {
2348 VU.set(kScratchReg, E32, mf2);
2349 if (dst.fp().toV() != src.fp().toV()) {
2350 vfwcvt_f_f_v(dst.fp().toV(), src.fp().toV());
2351 } else {
2352 vfwcvt_f_f_v(kSimd128ScratchReg3, src.fp().toV());
2353 VU.set(kScratchReg, E64, m1);
2354 vmv_vv(dst.fp().toV(), kSimd128ScratchReg3);
2355 }
2356 }
2357
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)2358 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
2359 LiftoffRegister src) {
2360 VU.set(kScratchReg, E32, mf2);
2361 vfncvt_f_f_w(dst.fp().toV(), src.fp().toV());
2362 VU.set(kScratchReg, E32, m1);
2363 vmv_vi(v0, 12);
2364 vmerge_vx(dst.fp().toV(), zero_reg, dst.fp().toV());
2365 }
2366
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)2367 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
2368 LiftoffRegister src) {
2369 VU.set(kScratchReg, E64, m1);
2370 vmv_vx(kSimd128ScratchReg, zero_reg);
2371 vmfeq_vv(v0, src.fp().toV(), src.fp().toV());
2372 vmv_vv(kSimd128ScratchReg3, src.fp().toV());
2373 VU.set(kScratchReg, E32, m1);
2374 VU.set(RoundingMode::RTZ);
2375 vfncvt_x_f_w(kSimd128ScratchReg, kSimd128ScratchReg3, MaskType::Mask);
2376 vmv_vv(dst.fp().toV(), kSimd128ScratchReg);
2377 }
2378
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)2379 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
2380 LiftoffRegister src) {
2381 VU.set(kScratchReg, E64, m1);
2382 vmv_vx(kSimd128ScratchReg, zero_reg);
2383 vmfeq_vv(v0, src.fp().toV(), src.fp().toV());
2384 vmv_vv(kSimd128ScratchReg3, src.fp().toV());
2385 VU.set(kScratchReg, E32, m1);
2386 VU.set(RoundingMode::RTZ);
2387 vfncvt_xu_f_w(kSimd128ScratchReg, kSimd128ScratchReg3, MaskType::Mask);
2388 vmv_vv(dst.fp().toV(), kSimd128ScratchReg);
2389 }
2390
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2391 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2392 LiftoffRegister rhs) {
2393 VU.set(kScratchReg, E64, m1);
2394 vmfeq_vv(v0, rhs.fp().toV(), lhs.fp().toV());
2395 vmv_vx(dst.fp().toV(), zero_reg);
2396 vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
2397 }
2398
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2399 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2400 LiftoffRegister rhs) {
2401 VU.set(kScratchReg, E64, m1);
2402 vmfne_vv(v0, rhs.fp().toV(), lhs.fp().toV());
2403 vmv_vx(dst.fp().toV(), zero_reg);
2404 vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
2405 }
2406
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2407 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
2408 LiftoffRegister rhs) {
2409 VU.set(kScratchReg, E64, m1);
2410 vmflt_vv(v0, lhs.fp().toV(), rhs.fp().toV());
2411 vmv_vx(dst.fp().toV(), zero_reg);
2412 vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
2413 }
2414
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2415 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
2416 LiftoffRegister rhs) {
2417 VU.set(kScratchReg, E64, m1);
2418 vmfle_vv(v0, lhs.fp().toV(), rhs.fp().toV());
2419 vmv_vx(dst.fp().toV(), zero_reg);
2420 vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
2421 }
2422
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])2423 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
2424 const uint8_t imms[16]) {
2425 WasmRvvS128const(dst.fp().toV(), imms);
2426 }
2427
emit_s128_not(LiftoffRegister dst,LiftoffRegister src)2428 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
2429 VU.set(kScratchReg, E8, m1);
2430 vnot_vv(dst.fp().toV(), src.fp().toV());
2431 }
2432
emit_s128_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2433 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
2434 LiftoffRegister rhs) {
2435 VU.set(kScratchReg, E8, m1);
2436 vand_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2437 }
2438
emit_s128_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2439 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
2440 LiftoffRegister rhs) {
2441 VU.set(kScratchReg, E8, m1);
2442 vor_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2443 }
2444
emit_s128_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2445 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
2446 LiftoffRegister rhs) {
2447 VU.set(kScratchReg, E8, m1);
2448 vxor_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2449 }
2450
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2451 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
2452 LiftoffRegister lhs,
2453 LiftoffRegister rhs) {
2454 VU.set(kScratchReg, E8, m1);
2455 vnot_vv(dst.fp().toV(), rhs.fp().toV());
2456 vand_vv(dst.fp().toV(), lhs.fp().toV(), dst.fp().toV());
2457 }
2458
emit_s128_select(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,LiftoffRegister mask)2459 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
2460 LiftoffRegister src1,
2461 LiftoffRegister src2,
2462 LiftoffRegister mask) {
2463 VU.set(kScratchReg, E8, m1);
2464 vand_vv(kSimd128ScratchReg, src1.fp().toV(), mask.fp().toV());
2465 vnot_vv(kSimd128ScratchReg2, mask.fp().toV());
2466 vand_vv(kSimd128ScratchReg2, src2.fp().toV(), kSimd128ScratchReg2);
2467 vor_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
2468 }
2469
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)2470 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
2471 LiftoffRegister src) {
2472 VU.set(kScratchReg, E8, m1);
2473 vneg_vv(dst.fp().toV(), src.fp().toV());
2474 }
2475
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)2476 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
2477 LiftoffRegister src) {
2478 VU.set(kScratchReg, E8, m1);
2479 Label t;
2480 vmv_sx(kSimd128ScratchReg, zero_reg);
2481 vredmaxu_vs(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg);
2482 vmv_xs(dst.gp(), kSimd128ScratchReg);
2483 beq(dst.gp(), zero_reg, &t);
2484 li(dst.gp(), 1);
2485 bind(&t);
2486 }
2487
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)2488 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
2489 LiftoffRegister src) {
2490 VU.set(kScratchReg, E8, m1);
2491 Label alltrue;
2492 li(kScratchReg, -1);
2493 vmv_sx(kSimd128ScratchReg, kScratchReg);
2494 vredminu_vs(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg);
2495 vmv_xs(dst.gp(), kSimd128ScratchReg);
2496 beqz(dst.gp(), &alltrue);
2497 li(dst.gp(), 1);
2498 bind(&alltrue);
2499 }
2500
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)2501 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
2502 LiftoffRegister src) {
2503 VU.set(kScratchReg, E8, m1);
2504 vmv_vx(kSimd128RegZero, zero_reg);
2505 vmslt_vv(kSimd128ScratchReg, src.fp().toV(), kSimd128RegZero);
2506 VU.set(kScratchReg, E32, m1);
2507 vmv_xs(dst.gp(), kSimd128ScratchReg);
2508 }
2509
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2510 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
2511 LiftoffRegister rhs) {
2512 VU.set(kScratchReg, E8, m1);
2513 andi(rhs.gp(), rhs.gp(), 8 - 1);
2514 vsll_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2515 }
2516
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2517 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
2518 int32_t rhs) {
2519 DCHECK(is_uint5(rhs));
2520 VU.set(kScratchReg, E8, m1);
2521 vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 8);
2522 }
2523
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2524 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
2525 LiftoffRegister lhs,
2526 LiftoffRegister rhs) {
2527 VU.set(kScratchReg, E8, m1);
2528 andi(rhs.gp(), rhs.gp(), 8 - 1);
2529 vsra_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2530 }
2531
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2532 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
2533 LiftoffRegister lhs, int32_t rhs) {
2534 VU.set(kScratchReg, E8, m1);
2535 vsra_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 8);
2536 }
2537
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2538 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
2539 LiftoffRegister lhs,
2540 LiftoffRegister rhs) {
2541 VU.set(kScratchReg, E8, m1);
2542 andi(rhs.gp(), rhs.gp(), 8 - 1);
2543 vsrl_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2544 }
2545
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2546 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
2547 LiftoffRegister lhs, int32_t rhs) {
2548 VU.set(kScratchReg, E8, m1);
2549 vsrl_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 8);
2550 }
2551
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2552 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
2553 LiftoffRegister rhs) {
2554 VU.set(kScratchReg, E8, m1);
2555 vadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2556 }
2557
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2558 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
2559 LiftoffRegister lhs,
2560 LiftoffRegister rhs) {
2561 VU.set(kScratchReg, E8, m1);
2562 vsadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2563 }
2564
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2565 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
2566 LiftoffRegister lhs,
2567 LiftoffRegister rhs) {
2568 VU.set(kScratchReg, E8, m1);
2569 vsaddu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2570 }
2571
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2572 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
2573 LiftoffRegister rhs) {
2574 VU.set(kScratchReg, E8, m1);
2575 vsub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2576 }
2577
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2578 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
2579 LiftoffRegister lhs,
2580 LiftoffRegister rhs) {
2581 VU.set(kScratchReg, E8, m1);
2582 vssub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2583 }
2584
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2585 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
2586 LiftoffRegister lhs,
2587 LiftoffRegister rhs) {
2588 VU.set(kScratchReg, E8, m1);
2589 vssubu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2590 }
2591
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2592 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
2593 LiftoffRegister lhs,
2594 LiftoffRegister rhs) {
2595 VU.set(kScratchReg, E8, m1);
2596 vmin_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2597 }
2598
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2599 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
2600 LiftoffRegister lhs,
2601 LiftoffRegister rhs) {
2602 VU.set(kScratchReg, E8, m1);
2603 vminu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2604 }
2605
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2606 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
2607 LiftoffRegister lhs,
2608 LiftoffRegister rhs) {
2609 VU.set(kScratchReg, E8, m1);
2610 vmax_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2611 }
2612
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2613 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
2614 LiftoffRegister lhs,
2615 LiftoffRegister rhs) {
2616 VU.set(kScratchReg, E8, m1);
2617 vmaxu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2618 }
2619
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)2620 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
2621 LiftoffRegister src) {
2622 VU.set(kScratchReg, E16, m1);
2623 vneg_vv(dst.fp().toV(), src.fp().toV());
2624 }
2625
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)2626 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
2627 LiftoffRegister src) {
2628 VU.set(kScratchReg, E16, m1);
2629 Label alltrue;
2630 li(kScratchReg, -1);
2631 vmv_sx(kSimd128ScratchReg, kScratchReg);
2632 vredminu_vs(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg);
2633 vmv_xs(dst.gp(), kSimd128ScratchReg);
2634 beqz(dst.gp(), &alltrue);
2635 li(dst.gp(), 1);
2636 bind(&alltrue);
2637 }
2638
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)2639 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
2640 LiftoffRegister src) {
2641 VU.set(kScratchReg, E16, m1);
2642 vmv_vx(kSimd128RegZero, zero_reg);
2643 vmslt_vv(kSimd128ScratchReg, src.fp().toV(), kSimd128RegZero);
2644 VU.set(kScratchReg, E32, m1);
2645 vmv_xs(dst.gp(), kSimd128ScratchReg);
2646 }
2647
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2648 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
2649 LiftoffRegister rhs) {
2650 VU.set(kScratchReg, E16, m1);
2651 andi(rhs.gp(), rhs.gp(), 16 - 1);
2652 vsll_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2653 }
2654
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2655 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
2656 int32_t rhs) {
2657 VU.set(kScratchReg, E16, m1);
2658 vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 16);
2659 }
2660
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2661 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
2662 LiftoffRegister lhs,
2663 LiftoffRegister rhs) {
2664 VU.set(kScratchReg, E16, m1);
2665 andi(rhs.gp(), rhs.gp(), 16 - 1);
2666 vsra_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2667 }
2668
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2669 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
2670 LiftoffRegister lhs, int32_t rhs) {
2671 VU.set(kScratchReg, E16, m1);
2672 vsra_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 16);
2673 }
2674
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2675 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
2676 LiftoffRegister lhs,
2677 LiftoffRegister rhs) {
2678 VU.set(kScratchReg, E16, m1);
2679 andi(rhs.gp(), rhs.gp(), 16 - 1);
2680 vsrl_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2681 }
2682
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2683 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
2684 LiftoffRegister lhs, int32_t rhs) {
2685 DCHECK(is_uint5(rhs));
2686 VU.set(kScratchReg, E16, m1);
2687 vsrl_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 16);
2688 }
2689
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2690 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
2691 LiftoffRegister rhs) {
2692 VU.set(kScratchReg, E16, m1);
2693 vadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2694 }
2695
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2696 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
2697 LiftoffRegister lhs,
2698 LiftoffRegister rhs) {
2699 VU.set(kScratchReg, E16, m1);
2700 vsadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2701 }
2702
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2703 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
2704 LiftoffRegister lhs,
2705 LiftoffRegister rhs) {
2706 VU.set(kScratchReg, E16, m1);
2707 vsaddu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2708 }
2709
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2710 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
2711 LiftoffRegister rhs) {
2712 VU.set(kScratchReg, E16, m1);
2713 vsub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2714 }
2715
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2716 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
2717 LiftoffRegister lhs,
2718 LiftoffRegister rhs) {
2719 VU.set(kScratchReg, E16, m1);
2720 vssub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2721 }
2722
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2723 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
2724 LiftoffRegister lhs,
2725 LiftoffRegister rhs) {
2726 VU.set(kScratchReg, E16, m1);
2727 vssubu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2728 }
2729
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2730 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
2731 LiftoffRegister rhs) {
2732 VU.set(kScratchReg, E16, m1);
2733 vmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2734 }
2735
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2736 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
2737 LiftoffRegister lhs,
2738 LiftoffRegister rhs) {
2739 VU.set(kScratchReg, E16, m1);
2740 vmin_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2741 }
2742
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2743 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
2744 LiftoffRegister lhs,
2745 LiftoffRegister rhs) {
2746 VU.set(kScratchReg, E16, m1);
2747 vminu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2748 }
2749
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2750 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
2751 LiftoffRegister lhs,
2752 LiftoffRegister rhs) {
2753 VU.set(kScratchReg, E16, m1);
2754 vmax_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2755 }
2756
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2757 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
2758 LiftoffRegister lhs,
2759 LiftoffRegister rhs) {
2760 VU.set(kScratchReg, E16, m1);
2761 vmaxu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2762 }
2763
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)2764 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
2765 LiftoffRegister src) {
2766 VU.set(kScratchReg, E32, m1);
2767 vneg_vv(dst.fp().toV(), src.fp().toV());
2768 }
2769
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)2770 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
2771 LiftoffRegister src) {
2772 VU.set(kScratchReg, E32, m1);
2773 Label alltrue;
2774 li(kScratchReg, -1);
2775 vmv_sx(kSimd128ScratchReg, kScratchReg);
2776 vredminu_vs(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg);
2777 vmv_xs(dst.gp(), kSimd128ScratchReg);
2778 beqz(dst.gp(), &alltrue);
2779 li(dst.gp(), 1);
2780 bind(&alltrue);
2781 }
2782
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)2783 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
2784 LiftoffRegister src) {
2785 VU.set(kScratchReg, E32, m1);
2786 vmv_vx(kSimd128RegZero, zero_reg);
2787 vmslt_vv(kSimd128ScratchReg, src.fp().toV(), kSimd128RegZero);
2788 vmv_xs(dst.gp(), kSimd128ScratchReg);
2789 }
2790
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2791 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
2792 LiftoffRegister rhs) {
2793 VU.set(kScratchReg, E32, m1);
2794 andi(rhs.gp(), rhs.gp(), 32 - 1);
2795 vsll_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2796 }
2797
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2798 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
2799 int32_t rhs) {
2800 if (is_uint5(rhs % 32)) {
2801 vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 32);
2802 } else {
2803 li(kScratchReg, rhs % 32);
2804 vsll_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
2805 }
2806 }
2807
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2808 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
2809 LiftoffRegister lhs,
2810 LiftoffRegister rhs) {
2811 VU.set(kScratchReg, E32, m1);
2812 andi(rhs.gp(), rhs.gp(), 32 - 1);
2813 vsra_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2814 }
2815
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2816 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
2817 LiftoffRegister lhs, int32_t rhs) {
2818 VU.set(kScratchReg, E32, m1);
2819 if (is_uint5(rhs % 32)) {
2820 vsra_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 32);
2821 } else {
2822 li(kScratchReg, rhs % 32);
2823 vsra_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
2824 }
2825 }
2826
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2827 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
2828 LiftoffRegister lhs,
2829 LiftoffRegister rhs) {
2830 VU.set(kScratchReg, E32, m1);
2831 andi(rhs.gp(), rhs.gp(), 32 - 1);
2832 vsrl_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2833 }
2834
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2835 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
2836 LiftoffRegister lhs, int32_t rhs) {
2837 VU.set(kScratchReg, E32, m1);
2838 if (is_uint5(rhs % 32)) {
2839 vsrl_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 32);
2840 } else {
2841 li(kScratchReg, rhs % 32);
2842 vsrl_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
2843 }
2844 }
2845
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2846 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
2847 LiftoffRegister rhs) {
2848 VU.set(kScratchReg, E32, m1);
2849 vadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2850 }
2851
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2852 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
2853 LiftoffRegister rhs) {
2854 VU.set(kScratchReg, E32, m1);
2855 vsub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2856 }
2857
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2858 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
2859 LiftoffRegister rhs) {
2860 VU.set(kScratchReg, E32, m1);
2861 vmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2862 }
2863
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2864 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
2865 LiftoffRegister lhs,
2866 LiftoffRegister rhs) {
2867 VU.set(kScratchReg, E32, m1);
2868 vmin_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2869 }
2870
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2871 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
2872 LiftoffRegister lhs,
2873 LiftoffRegister rhs) {
2874 VU.set(kScratchReg, E32, m1);
2875 vminu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2876 }
2877
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2878 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
2879 LiftoffRegister lhs,
2880 LiftoffRegister rhs) {
2881 VU.set(kScratchReg, E32, m1);
2882 vmax_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2883 }
2884
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2885 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
2886 LiftoffRegister lhs,
2887 LiftoffRegister rhs) {
2888 VU.set(kScratchReg, E32, m1);
2889 vmaxu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2890 }
2891
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2892 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
2893 LiftoffRegister lhs,
2894 LiftoffRegister rhs) {
2895 VU.set(kScratchReg, E16, m1);
2896 vwmul_vv(kSimd128ScratchReg3, lhs.fp().toV(), rhs.fp().toV());
2897 VU.set(kScratchReg, E32, m2);
2898 li(kScratchReg, 0b01010101);
2899 vmv_sx(v0, kScratchReg);
2900 vcompress_vv(kSimd128ScratchReg, kSimd128ScratchReg3, v0);
2901
2902 li(kScratchReg, 0b10101010);
2903 vmv_sx(kSimd128ScratchReg2, kScratchReg);
2904 vcompress_vv(v0, kSimd128ScratchReg3, kSimd128ScratchReg2);
2905 VU.set(kScratchReg, E32, m1);
2906 vadd_vv(dst.fp().toV(), kSimd128ScratchReg, v0);
2907 }
2908
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)2909 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
2910 LiftoffRegister src) {
2911 VU.set(kScratchReg, E64, m1);
2912 vneg_vv(dst.fp().toV(), src.fp().toV());
2913 }
2914
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)2915 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
2916 LiftoffRegister src) {
2917 VU.set(kScratchReg, E64, m1);
2918 Label alltrue;
2919 li(kScratchReg, -1);
2920 vmv_sx(kSimd128ScratchReg, kScratchReg);
2921 vredminu_vs(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg);
2922 vmv_xs(dst.gp(), kSimd128ScratchReg);
2923 beqz(dst.gp(), &alltrue);
2924 li(dst.gp(), 1);
2925 bind(&alltrue);
2926 }
2927
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2928 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
2929 LiftoffRegister rhs) {
2930 VU.set(kScratchReg, E64, m1);
2931 andi(rhs.gp(), rhs.gp(), 64 - 1);
2932 vsll_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2933 }
2934
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2935 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
2936 int32_t rhs) {
2937 VU.set(kScratchReg, E64, m1);
2938 if (is_uint5(rhs % 64)) {
2939 vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 64);
2940 } else {
2941 li(kScratchReg, rhs % 64);
2942 vsll_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
2943 }
2944 }
2945
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2946 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
2947 LiftoffRegister lhs,
2948 LiftoffRegister rhs) {
2949 VU.set(kScratchReg, E64, m1);
2950 andi(rhs.gp(), rhs.gp(), 64 - 1);
2951 vsra_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2952 }
2953
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2954 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
2955 LiftoffRegister lhs, int32_t rhs) {
2956 VU.set(kScratchReg, E64, m1);
2957 if (is_uint5(rhs % 64)) {
2958 vsra_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 64);
2959 } else {
2960 li(kScratchReg, rhs % 64);
2961 vsra_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
2962 }
2963 }
2964
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2965 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
2966 LiftoffRegister lhs,
2967 LiftoffRegister rhs) {
2968 VU.set(kScratchReg, E64, m1);
2969 andi(rhs.gp(), rhs.gp(), 64 - 1);
2970 vsrl_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
2971 }
2972
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2973 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
2974 LiftoffRegister lhs, int32_t rhs) {
2975 VU.set(kScratchReg, E64, m1);
2976 if (is_uint5(rhs % 64)) {
2977 vsrl_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 64);
2978 } else {
2979 li(kScratchReg, rhs % 64);
2980 vsrl_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
2981 }
2982 }
2983
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2984 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2985 LiftoffRegister rhs) {
2986 VU.set(kScratchReg, E64, m1);
2987 vadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2988 }
2989
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2990 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2991 LiftoffRegister rhs) {
2992 VU.set(kScratchReg, E64, m1);
2993 vsub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
2994 }
2995
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2996 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2997 LiftoffRegister rhs) {
2998 VU.set(kScratchReg, E64, m1);
2999 vmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3000 }
3001
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)3002 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
3003 LiftoffRegister src) {
3004 VU.set(kScratchReg, E32, m1);
3005 vfabs_vv(dst.fp().toV(), src.fp().toV());
3006 }
3007
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)3008 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
3009 LiftoffRegister src) {
3010 VU.set(kScratchReg, E32, m1);
3011 vfneg_vv(dst.fp().toV(), src.fp().toV());
3012 }
3013
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)3014 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
3015 LiftoffRegister src) {
3016 VU.set(kScratchReg, E32, m1);
3017 vfsqrt_v(dst.fp().toV(), src.fp().toV());
3018 }
3019
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)3020 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
3021 LiftoffRegister src) {
3022 Ceil_f(dst.fp().toV(), src.fp().toV(), kScratchReg, kSimd128ScratchReg);
3023 return true;
3024 }
3025
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)3026 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
3027 LiftoffRegister src) {
3028 Floor_f(dst.fp().toV(), src.fp().toV(), kScratchReg, kSimd128ScratchReg);
3029 return true;
3030 }
3031
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)3032 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
3033 LiftoffRegister src) {
3034 Trunc_f(dst.fp().toV(), src.fp().toV(), kScratchReg, kSimd128ScratchReg);
3035 return true;
3036 }
3037
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)3038 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
3039 LiftoffRegister src) {
3040 Round_f(dst.fp().toV(), src.fp().toV(), kScratchReg, kSimd128ScratchReg);
3041 return true;
3042 }
3043
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3044 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3045 LiftoffRegister rhs) {
3046 VU.set(kScratchReg, E32, m1);
3047 vfadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3048 }
3049
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3050 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3051 LiftoffRegister rhs) {
3052 VU.set(kScratchReg, E32, m1);
3053 vfsub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3054 }
3055
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3056 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3057 LiftoffRegister rhs) {
3058 VU.set(kScratchReg, E32, m1);
3059 VU.set(RoundingMode::RTZ);
3060 vfmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3061 }
3062
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3063 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
3064 LiftoffRegister rhs) {
3065 VU.set(kScratchReg, E32, m1);
3066 vfdiv_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3067 }
3068
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3069 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
3070 LiftoffRegister rhs) {
3071 const int32_t kNaN = 0x7FC00000;
3072 VU.set(kScratchReg, E32, m1);
3073 vmfeq_vv(v0, lhs.fp().toV(), lhs.fp().toV());
3074 vmfeq_vv(kSimd128ScratchReg, rhs.fp().toV(), rhs.fp().toV());
3075 vand_vv(v0, v0, kSimd128ScratchReg);
3076 li(kScratchReg, kNaN);
3077 vmv_vx(kSimd128ScratchReg, kScratchReg);
3078 vfmin_vv(kSimd128ScratchReg, rhs.fp().toV(), lhs.fp().toV(), Mask);
3079 vmv_vv(dst.fp().toV(), kSimd128ScratchReg);
3080 }
3081
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3082 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
3083 LiftoffRegister rhs) {
3084 const int32_t kNaN = 0x7FC00000;
3085 VU.set(kScratchReg, E32, m1);
3086 vmfeq_vv(v0, lhs.fp().toV(), lhs.fp().toV());
3087 vmfeq_vv(kSimd128ScratchReg, rhs.fp().toV(), rhs.fp().toV());
3088 vand_vv(v0, v0, kSimd128ScratchReg);
3089 li(kScratchReg, kNaN);
3090 vmv_vx(kSimd128ScratchReg, kScratchReg);
3091 vfmax_vv(kSimd128ScratchReg, rhs.fp().toV(), lhs.fp().toV(), Mask);
3092 vmv_vv(dst.fp().toV(), kSimd128ScratchReg);
3093 }
3094
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3095 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3096 LiftoffRegister rhs) {
3097 VU.set(kScratchReg, E32, m1);
3098 // b < a ? b : a
3099 vmflt_vv(v0, rhs.fp().toV(), lhs.fp().toV());
3100 vmerge_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
3101 }
3102
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3103 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3104 LiftoffRegister rhs) {
3105 VU.set(kScratchReg, E32, m1);
3106 // a < b ? b : a
3107 vmflt_vv(v0, lhs.fp().toV(), rhs.fp().toV());
3108 vmerge_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
3109 }
3110
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)3111 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
3112 LiftoffRegister src) {
3113 VU.set(kScratchReg, E64, m1);
3114 vfabs_vv(dst.fp().toV(), src.fp().toV());
3115 }
3116
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)3117 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
3118 LiftoffRegister src) {
3119 VU.set(kScratchReg, E64, m1);
3120 vfneg_vv(dst.fp().toV(), src.fp().toV());
3121 }
3122
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)3123 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
3124 LiftoffRegister src) {
3125 VU.set(kScratchReg, E64, m1);
3126 vfsqrt_v(dst.fp().toV(), src.fp().toV());
3127 }
3128
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)3129 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
3130 LiftoffRegister src) {
3131 Ceil_d(dst.fp().toV(), src.fp().toV(), kScratchReg, kSimd128ScratchReg);
3132 return true;
3133 }
3134
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)3135 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
3136 LiftoffRegister src) {
3137 Floor_d(dst.fp().toV(), src.fp().toV(), kScratchReg, kSimd128ScratchReg);
3138 return true;
3139 }
3140
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)3141 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
3142 LiftoffRegister src) {
3143 Trunc_d(dst.fp().toV(), src.fp().toV(), kScratchReg, kSimd128ScratchReg);
3144 return true;
3145 }
3146
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)3147 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
3148 LiftoffRegister src) {
3149 Round_d(dst.fp().toV(), src.fp().toV(), kScratchReg, kSimd128ScratchReg);
3150 return true;
3151 }
3152
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3153 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3154 LiftoffRegister rhs) {
3155 VU.set(kScratchReg, E64, m1);
3156 vfadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3157 }
3158
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3159 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3160 LiftoffRegister rhs) {
3161 VU.set(kScratchReg, E64, m1);
3162 vfsub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3163 }
3164
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3165 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3166 LiftoffRegister rhs) {
3167 VU.set(kScratchReg, E64, m1);
3168 vfmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3169 }
3170
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3171 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
3172 LiftoffRegister rhs) {
3173 VU.set(kScratchReg, E64, m1);
3174 vfdiv_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
3175 }
3176
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3177 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
3178 LiftoffRegister rhs) {
3179 VU.set(kScratchReg, E64, m1);
3180 const int64_t kNaN = 0x7ff8000000000000L;
3181 vmfeq_vv(v0, lhs.fp().toV(), lhs.fp().toV());
3182 vmfeq_vv(kSimd128ScratchReg, rhs.fp().toV(), rhs.fp().toV());
3183 vand_vv(v0, v0, kSimd128ScratchReg);
3184 li(kScratchReg, kNaN);
3185 vmv_vx(kSimd128ScratchReg, kScratchReg);
3186 vfmin_vv(kSimd128ScratchReg, rhs.fp().toV(), lhs.fp().toV(), Mask);
3187 vmv_vv(dst.fp().toV(), kSimd128ScratchReg);
3188 }
3189
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3190 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
3191 LiftoffRegister rhs) {
3192 VU.set(kScratchReg, E64, m1);
3193 const int64_t kNaN = 0x7ff8000000000000L;
3194 vmfeq_vv(v0, lhs.fp().toV(), lhs.fp().toV());
3195 vmfeq_vv(kSimd128ScratchReg, rhs.fp().toV(), rhs.fp().toV());
3196 vand_vv(v0, v0, kSimd128ScratchReg);
3197 li(kScratchReg, kNaN);
3198 vmv_vx(kSimd128ScratchReg, kScratchReg);
3199 vfmax_vv(kSimd128ScratchReg, rhs.fp().toV(), lhs.fp().toV(), Mask);
3200 vmv_vv(dst.fp().toV(), kSimd128ScratchReg);
3201 }
3202
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3203 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3204 LiftoffRegister rhs) {
3205 VU.set(kScratchReg, E64, m1);
3206 // b < a ? b : a
3207 vmflt_vv(v0, rhs.fp().toV(), lhs.fp().toV());
3208 vmerge_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
3209 }
3210
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3211 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3212 LiftoffRegister rhs) {
3213 VU.set(kScratchReg, E64, m1);
3214 // a < b ? b : a
3215 vmflt_vv(v0, lhs.fp().toV(), rhs.fp().toV());
3216 vmerge_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
3217 }
3218
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3219 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
3220 LiftoffRegister src) {
3221 VU.set(kScratchReg, E32, m1);
3222 VU.set(RoundingMode::RTZ);
3223 vmfeq_vv(v0, src.fp().toV(), src.fp().toV());
3224 vmv_vx(dst.fp().toV(), zero_reg);
3225 vfcvt_x_f_v(dst.fp().toV(), src.fp().toV(), Mask);
3226 }
3227
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3228 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
3229 LiftoffRegister src) {
3230 VU.set(kScratchReg, E32, m1);
3231 VU.set(RoundingMode::RTZ);
3232 vmfeq_vv(v0, src.fp().toV(), src.fp().toV());
3233 vmv_vx(dst.fp().toV(), zero_reg);
3234 vfcvt_xu_f_v(dst.fp().toV(), src.fp().toV(), Mask);
3235 }
3236
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3237 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
3238 LiftoffRegister src) {
3239 VU.set(kScratchReg, E32, m1);
3240 VU.set(RoundingMode::RTZ);
3241 vfcvt_f_x_v(dst.fp().toV(), src.fp().toV());
3242 }
3243
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3244 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
3245 LiftoffRegister src) {
3246 VU.set(kScratchReg, E32, m1);
3247 VU.set(RoundingMode::RTZ);
3248 vfcvt_f_xu_v(dst.fp().toV(), src.fp().toV());
3249 }
3250
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3251 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
3252 LiftoffRegister lhs,
3253 LiftoffRegister rhs) {
3254 VU.set(kScratchReg, E16, m1);
3255 vmv_vv(v26, lhs.fp().toV());
3256 vmv_vv(v27, lhs.fp().toV());
3257 VU.set(kScratchReg, E8, m1);
3258 VU.set(RoundingMode::RNE);
3259 vnclip_vi(dst.fp().toV(), v26, 0);
3260 }
3261
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3262 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
3263 LiftoffRegister lhs,
3264 LiftoffRegister rhs) {
3265 VU.set(kScratchReg, E16, m1);
3266 vmv_vv(v26, lhs.fp().toV());
3267 vmv_vv(v27, lhs.fp().toV());
3268 VU.set(kScratchReg, E16, m2);
3269 vmax_vx(v26, v26, zero_reg);
3270 VU.set(kScratchReg, E8, m1);
3271 VU.set(RoundingMode::RNE);
3272 vnclipu_vi(dst.fp().toV(), v26, 0);
3273 }
3274
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3275 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
3276 LiftoffRegister lhs,
3277 LiftoffRegister rhs) {
3278 VU.set(kScratchReg, E32, m1);
3279 vmv_vv(v26, lhs.fp().toV());
3280 vmv_vv(v27, lhs.fp().toV());
3281 VU.set(kScratchReg, E16, m1);
3282 VU.set(RoundingMode::RNE);
3283 vnclip_vi(dst.fp().toV(), v26, 0);
3284 }
3285
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3286 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
3287 LiftoffRegister lhs,
3288 LiftoffRegister rhs) {
3289 VU.set(kScratchReg, E32, m1);
3290 vmv_vv(v26, lhs.fp().toV());
3291 vmv_vv(v27, lhs.fp().toV());
3292 VU.set(kScratchReg, E32, m2);
3293 vmax_vx(v26, v26, zero_reg);
3294 VU.set(kScratchReg, E16, m1);
3295 VU.set(RoundingMode::RNE);
3296 vnclipu_vi(dst.fp().toV(), v26, 0);
3297 }
3298
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3299 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
3300 LiftoffRegister src) {
3301 VU.set(kScratchReg, E16, m1);
3302 vmv_vv(kSimd128ScratchReg, src.fp().toV());
3303 vsext_vf2(dst.fp().toV(), kSimd128ScratchReg);
3304 }
3305
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3306 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3307 LiftoffRegister src) {
3308 VU.set(kScratchReg, E8, m1);
3309 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), 8);
3310 VU.set(kScratchReg, E16, m1);
3311 vsext_vf2(dst.fp().toV(), kSimd128ScratchReg);
3312 }
3313
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3314 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3315 LiftoffRegister src) {
3316 VU.set(kScratchReg, E16, m1);
3317 vmv_vv(kSimd128ScratchReg, src.fp().toV());
3318 vzext_vf2(dst.fp().toV(), kSimd128ScratchReg);
3319 }
3320
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3321 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3322 LiftoffRegister src) {
3323 VU.set(kScratchReg, E8, m1);
3324 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), 8);
3325 VU.set(kScratchReg, E16, m1);
3326 vzext_vf2(dst.fp().toV(), kSimd128ScratchReg);
3327 }
3328
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3329 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3330 LiftoffRegister src) {
3331 VU.set(kScratchReg, E32, m1);
3332 vmv_vv(kSimd128ScratchReg, src.fp().toV());
3333 vsext_vf2(dst.fp().toV(), kSimd128ScratchReg);
3334 }
3335
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3336 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3337 LiftoffRegister src) {
3338 VU.set(kScratchReg, E16, m1);
3339 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), 4);
3340 VU.set(kScratchReg, E32, m1);
3341 vsext_vf2(dst.fp().toV(), kSimd128ScratchReg);
3342 }
3343
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3344 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3345 LiftoffRegister src) {
3346 VU.set(kScratchReg, E32, m1);
3347 vmv_vv(kSimd128ScratchReg, src.fp().toV());
3348 vzext_vf2(dst.fp().toV(), kSimd128ScratchReg);
3349 }
3350
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3351 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3352 LiftoffRegister src) {
3353 VU.set(kScratchReg, E16, m1);
3354 vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), 4);
3355 VU.set(kScratchReg, E32, m1);
3356 vzext_vf2(dst.fp().toV(), kSimd128ScratchReg);
3357 }
3358
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3359 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
3360 LiftoffRegister lhs,
3361 LiftoffRegister rhs) {
3362 VU.set(kScratchReg, E8, m1);
3363 vwaddu_vv(kSimd128ScratchReg, lhs.fp().toV(), rhs.fp().toV());
3364 li(kScratchReg, 1);
3365 vwaddu_wx(kSimd128ScratchReg3, kSimd128ScratchReg, kScratchReg);
3366 li(kScratchReg, 2);
3367 VU.set(kScratchReg2, E16, m2);
3368 vdivu_vx(kSimd128ScratchReg3, kSimd128ScratchReg3, kScratchReg);
3369 VU.set(kScratchReg2, E8, m1);
3370 vnclipu_vi(dst.fp().toV(), kSimd128ScratchReg3, 0);
3371 }
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3372 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
3373 LiftoffRegister lhs,
3374 LiftoffRegister rhs) {
3375 VU.set(kScratchReg2, E16, m1);
3376 vwaddu_vv(kSimd128ScratchReg, lhs.fp().toV(), rhs.fp().toV());
3377 li(kScratchReg, 1);
3378 vwaddu_wx(kSimd128ScratchReg3, kSimd128ScratchReg, kScratchReg);
3379 li(kScratchReg, 2);
3380 VU.set(kScratchReg2, E32, m2);
3381 vdivu_vx(kSimd128ScratchReg3, kSimd128ScratchReg3, kScratchReg);
3382 VU.set(kScratchReg2, E16, m1);
3383 vnclipu_vi(dst.fp().toV(), kSimd128ScratchReg3, 0);
3384 }
3385
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)3386 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
3387 LiftoffRegister src) {
3388 VU.set(kScratchReg, E8, m1);
3389 vmv_vx(kSimd128RegZero, zero_reg);
3390 vmv_vv(dst.fp().toV(), src.fp().toV());
3391 vmslt_vv(v0, src.fp().toV(), kSimd128RegZero);
3392 vneg_vv(dst.fp().toV(), src.fp().toV(), MaskType::Mask);
3393 }
3394
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)3395 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
3396 LiftoffRegister src) {
3397 VU.set(kScratchReg, E16, m1);
3398 vmv_vx(kSimd128RegZero, zero_reg);
3399 vmv_vv(dst.fp().toV(), src.fp().toV());
3400 vmslt_vv(v0, src.fp().toV(), kSimd128RegZero);
3401 vneg_vv(dst.fp().toV(), src.fp().toV(), MaskType::Mask);
3402 }
3403
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)3404 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
3405 LiftoffRegister src) {
3406 VU.set(kScratchReg, E64, m1);
3407 vmv_vx(kSimd128RegZero, zero_reg);
3408 vmv_vv(dst.fp().toV(), src.fp().toV());
3409 vmslt_vv(v0, src.fp().toV(), kSimd128RegZero);
3410 vneg_vv(dst.fp().toV(), src.fp().toV(), MaskType::Mask);
3411 }
3412
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3413 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3414 LiftoffRegister src) {
3415 VU.set(kScratchReg, E64, m1);
3416 li(kScratchReg, 0x0006000400020000);
3417 vmv_sx(kSimd128ScratchReg, kScratchReg);
3418 li(kScratchReg, 0x0007000500030001);
3419 vmv_sx(kSimd128ScratchReg3, kScratchReg);
3420 VU.set(kScratchReg, E16, m1);
3421 vrgather_vv(kSimd128ScratchReg2, src.fp().toV(), kSimd128ScratchReg);
3422 vrgather_vv(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg3);
3423 VU.set(kScratchReg, E16, mf2);
3424 vwadd_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
3425 }
3426
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3427 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3428 LiftoffRegister src) {
3429 VU.set(kScratchReg, E64, m1);
3430 li(kScratchReg, 0x0006000400020000);
3431 vmv_sx(kSimd128ScratchReg, kScratchReg);
3432 li(kScratchReg, 0x0007000500030001);
3433 vmv_sx(kSimd128ScratchReg3, kScratchReg);
3434 VU.set(kScratchReg, E16, m1);
3435 vrgather_vv(kSimd128ScratchReg2, src.fp().toV(), kSimd128ScratchReg);
3436 vrgather_vv(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg3);
3437 VU.set(kScratchReg, E16, mf2);
3438 vwaddu_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
3439 }
3440
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3441 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3442 LiftoffRegister src) {
3443 VU.set(kScratchReg, E64, m1);
3444 li(kScratchReg, 0x0E0C0A0806040200);
3445 vmv_sx(kSimd128ScratchReg, kScratchReg);
3446 li(kScratchReg, 0x0F0D0B0907050301);
3447 vmv_sx(kSimd128ScratchReg3, kScratchReg);
3448 VU.set(kScratchReg, E8, m1);
3449 vrgather_vv(kSimd128ScratchReg2, src.fp().toV(), kSimd128ScratchReg);
3450 vrgather_vv(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg3);
3451 VU.set(kScratchReg, E8, mf2);
3452 vwadd_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
3453 }
3454
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3455 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3456 LiftoffRegister src) {
3457 VU.set(kScratchReg, E64, m1);
3458 li(kScratchReg, 0x0E0C0A0806040200);
3459 vmv_sx(kSimd128ScratchReg, kScratchReg);
3460 li(kScratchReg, 0x0F0D0B0907050301);
3461 vmv_sx(kSimd128ScratchReg3, kScratchReg);
3462 VU.set(kScratchReg, E8, m1);
3463 vrgather_vv(kSimd128ScratchReg2, src.fp().toV(), kSimd128ScratchReg);
3464 vrgather_vv(kSimd128ScratchReg, src.fp().toV(), kSimd128ScratchReg3);
3465 VU.set(kScratchReg, E8, mf2);
3466 vwaddu_vv(dst.fp().toV(), kSimd128ScratchReg, kSimd128ScratchReg2);
3467 }
3468
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)3469 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
3470 LiftoffRegister src) {
3471 VU.set(kScratchReg, E32, m1);
3472 vmv_vx(kSimd128RegZero, zero_reg);
3473 vmv_vv(dst.fp().toV(), src.fp().toV());
3474 vmslt_vv(v0, src.fp().toV(), kSimd128RegZero);
3475 vneg_vv(dst.fp().toV(), src.fp().toV(), MaskType::Mask);
3476 }
3477
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3478 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
3479 LiftoffRegister lhs,
3480 uint8_t imm_lane_idx) {
3481 VU.set(kScratchReg, E8, m1);
3482 vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
3483 vmv_xs(dst.gp(), kSimd128ScratchReg);
3484 }
3485
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3486 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
3487 LiftoffRegister lhs,
3488 uint8_t imm_lane_idx) {
3489 VU.set(kScratchReg, E8, m1);
3490 vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
3491 vmv_xs(dst.gp(), kSimd128ScratchReg);
3492 slli(dst.gp(), dst.gp(), 64 - 8);
3493 srli(dst.gp(), dst.gp(), 64 - 8);
3494 }
3495
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3496 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
3497 LiftoffRegister lhs,
3498 uint8_t imm_lane_idx) {
3499 VU.set(kScratchReg, E16, m1);
3500 vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
3501 vmv_xs(dst.gp(), kSimd128ScratchReg);
3502 }
3503
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3504 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
3505 LiftoffRegister lhs,
3506 uint8_t imm_lane_idx) {
3507 VU.set(kScratchReg, E16, m1);
3508 vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
3509 vmv_xs(dst.gp(), kSimd128ScratchReg);
3510 slli(dst.gp(), dst.gp(), 64 - 16);
3511 srli(dst.gp(), dst.gp(), 64 - 16);
3512 }
3513
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3514 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
3515 LiftoffRegister lhs,
3516 uint8_t imm_lane_idx) {
3517 VU.set(kScratchReg, E32, m1);
3518 vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
3519 vmv_xs(dst.gp(), kSimd128ScratchReg);
3520 }
3521
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3522 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
3523 LiftoffRegister lhs,
3524 uint8_t imm_lane_idx) {
3525 VU.set(kScratchReg, E64, m1);
3526 vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
3527 vmv_xs(dst.gp(), kSimd128ScratchReg);
3528 }
3529
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3530 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
3531 LiftoffRegister lhs,
3532 uint8_t imm_lane_idx) {
3533 VU.set(kScratchReg, E32, m1);
3534 vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
3535 vfmv_fs(dst.fp(), kSimd128ScratchReg);
3536 }
3537
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3538 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
3539 LiftoffRegister lhs,
3540 uint8_t imm_lane_idx) {
3541 VU.set(kScratchReg, E64, m1);
3542 vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
3543 vfmv_fs(dst.fp(), kSimd128ScratchReg);
3544 }
3545
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3546 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
3547 LiftoffRegister src1,
3548 LiftoffRegister src2,
3549 uint8_t imm_lane_idx) {
3550 VU.set(kScratchReg, E64, m1);
3551 li(kScratchReg, 0x1 << imm_lane_idx);
3552 vmv_sx(v0, kScratchReg);
3553 VU.set(kScratchReg, E8, m1);
3554 vmerge_vx(dst.fp().toV(), src2.gp(), src1.fp().toV());
3555 }
3556
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3557 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
3558 LiftoffRegister src1,
3559 LiftoffRegister src2,
3560 uint8_t imm_lane_idx) {
3561 VU.set(kScratchReg, E16, m1);
3562 li(kScratchReg, 0x1 << imm_lane_idx);
3563 vmv_sx(v0, kScratchReg);
3564 vmerge_vx(dst.fp().toV(), src2.gp(), src1.fp().toV());
3565 }
3566
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3567 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
3568 LiftoffRegister src1,
3569 LiftoffRegister src2,
3570 uint8_t imm_lane_idx) {
3571 VU.set(kScratchReg, E32, m1);
3572 li(kScratchReg, 0x1 << imm_lane_idx);
3573 vmv_sx(v0, kScratchReg);
3574 vmerge_vx(dst.fp().toV(), src2.gp(), src1.fp().toV());
3575 }
3576
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3577 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
3578 LiftoffRegister src1,
3579 LiftoffRegister src2,
3580 uint8_t imm_lane_idx) {
3581 VU.set(kScratchReg, E64, m1);
3582 li(kScratchReg, 0x1 << imm_lane_idx);
3583 vmv_sx(v0, kScratchReg);
3584 vmerge_vx(dst.fp().toV(), src2.gp(), src1.fp().toV());
3585 }
3586
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3587 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
3588 LiftoffRegister src1,
3589 LiftoffRegister src2,
3590 uint8_t imm_lane_idx) {
3591 VU.set(kScratchReg, E32, m1);
3592 li(kScratchReg, 0x1 << imm_lane_idx);
3593 vmv_sx(v0, kScratchReg);
3594 fmv_x_w(kScratchReg, src2.fp());
3595 vmerge_vx(dst.fp().toV(), kScratchReg, src1.fp().toV());
3596 }
3597
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3598 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
3599 LiftoffRegister src1,
3600 LiftoffRegister src2,
3601 uint8_t imm_lane_idx) {
3602 VU.set(kScratchReg, E64, m1);
3603 li(kScratchReg, 0x1 << imm_lane_idx);
3604 vmv_sx(v0, kScratchReg);
3605 fmv_x_d(kScratchReg, src2.fp());
3606 vmerge_vx(dst.fp().toV(), kScratchReg, src1.fp().toV());
3607 }
3608
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)3609 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
3610 Register tmp_gp,
3611 LiftoffRegister tmp_s128,
3612 ValueKind lane_kind) {
3613 DoubleRegister tmp_fp = tmp_s128.fp();
3614 vfredmax_vs(kSimd128ScratchReg, src.fp().toV(), src.fp().toV());
3615 vfmv_fs(tmp_fp, kSimd128ScratchReg);
3616 if (lane_kind == kF32) {
3617 feq_s(kScratchReg, tmp_fp, tmp_fp); // scratch <- !IsNan(tmp_fp)
3618 } else {
3619 DCHECK_EQ(lane_kind, kF64);
3620 feq_d(kScratchReg, tmp_fp, tmp_fp); // scratch <- !IsNan(tmp_fp)
3621 }
3622 not_(kScratchReg, kScratchReg);
3623 Sw(kScratchReg, MemOperand(dst));
3624 }
3625
StackCheck(Label * ool_code,Register limit_address)3626 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
3627 TurboAssembler::Ld(limit_address, MemOperand(limit_address));
3628 TurboAssembler::Branch(ool_code, ule, sp, Operand(limit_address));
3629 }
3630
CallTrapCallbackForTesting()3631 void LiftoffAssembler::CallTrapCallbackForTesting() {
3632 PrepareCallCFunction(0, GetUnusedRegister(kGpReg, {}).gp());
3633 CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3634 }
3635
AssertUnreachable(AbortReason reason)3636 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
3637 if (FLAG_debug_code) Abort(reason);
3638 }
3639
PushRegisters(LiftoffRegList regs)3640 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
3641 LiftoffRegList gp_regs = regs & kGpCacheRegList;
3642 int32_t num_gp_regs = gp_regs.GetNumRegsSet();
3643 if (num_gp_regs) {
3644 int32_t offset = num_gp_regs * kSystemPointerSize;
3645 Add64(sp, sp, Operand(-offset));
3646 while (!gp_regs.is_empty()) {
3647 LiftoffRegister reg = gp_regs.GetFirstRegSet();
3648 offset -= kSystemPointerSize;
3649 Sd(reg.gp(), MemOperand(sp, offset));
3650 gp_regs.clear(reg);
3651 }
3652 DCHECK_EQ(offset, 0);
3653 }
3654 LiftoffRegList fp_regs = regs & kFpCacheRegList;
3655 int32_t num_fp_regs = fp_regs.GetNumRegsSet();
3656 if (num_fp_regs) {
3657 Add64(sp, sp, Operand(-(num_fp_regs * kStackSlotSize)));
3658 int32_t offset = 0;
3659 while (!fp_regs.is_empty()) {
3660 LiftoffRegister reg = fp_regs.GetFirstRegSet();
3661 TurboAssembler::StoreDouble(reg.fp(), MemOperand(sp, offset));
3662 fp_regs.clear(reg);
3663 offset += sizeof(double);
3664 }
3665 DCHECK_EQ(offset, num_fp_regs * sizeof(double));
3666 }
3667 }
3668
PopRegisters(LiftoffRegList regs)3669 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
3670 LiftoffRegList fp_regs = regs & kFpCacheRegList;
3671 int32_t fp_offset = 0;
3672 while (!fp_regs.is_empty()) {
3673 LiftoffRegister reg = fp_regs.GetFirstRegSet();
3674 TurboAssembler::LoadDouble(reg.fp(), MemOperand(sp, fp_offset));
3675 fp_regs.clear(reg);
3676 fp_offset += sizeof(double);
3677 }
3678 if (fp_offset) Add64(sp, sp, Operand(fp_offset));
3679 LiftoffRegList gp_regs = regs & kGpCacheRegList;
3680 int32_t gp_offset = 0;
3681 while (!gp_regs.is_empty()) {
3682 LiftoffRegister reg = gp_regs.GetLastRegSet();
3683 Ld(reg.gp(), MemOperand(sp, gp_offset));
3684 gp_regs.clear(reg);
3685 gp_offset += kSystemPointerSize;
3686 }
3687 Add64(sp, sp, Operand(gp_offset));
3688 }
3689
RecordSpillsInSafepoint(SafepointTableBuilder::Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)3690 void LiftoffAssembler::RecordSpillsInSafepoint(
3691 SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
3692 LiftoffRegList ref_spills, int spill_offset) {
3693 int spill_space_size = 0;
3694 while (!all_spills.is_empty()) {
3695 LiftoffRegister reg = all_spills.GetFirstRegSet();
3696 if (ref_spills.has(reg)) {
3697 safepoint.DefineTaggedStackSlot(spill_offset);
3698 }
3699 all_spills.clear(reg);
3700 ++spill_offset;
3701 spill_space_size += kSystemPointerSize;
3702 }
3703 // Record the number of additional spill slots.
3704 RecordOolSpillSpaceSize(spill_space_size);
3705 }
3706
DropStackSlotsAndRet(uint32_t num_stack_slots)3707 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
3708 TurboAssembler::DropAndRet(static_cast<int>(num_stack_slots));
3709 }
3710
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)3711 void LiftoffAssembler::CallC(const ValueKindSig* sig,
3712 const LiftoffRegister* args,
3713 const LiftoffRegister* rets,
3714 ValueKind out_argument_kind, int stack_bytes,
3715 ExternalReference ext_ref) {
3716 Add64(sp, sp, Operand(-stack_bytes));
3717
3718 int arg_bytes = 0;
3719 for (ValueKind param_kind : sig->parameters()) {
3720 liftoff::Store(this, sp, arg_bytes, *args++, param_kind);
3721 arg_bytes += value_kind_size(param_kind);
3722 }
3723 DCHECK_LE(arg_bytes, stack_bytes);
3724
3725 // Pass a pointer to the buffer with the arguments to the C function.
3726 // On RISC-V, the first argument is passed in {a0}.
3727 constexpr Register kFirstArgReg = a0;
3728 mv(kFirstArgReg, sp);
3729
3730 // Now call the C function.
3731 constexpr int kNumCCallArgs = 1;
3732 PrepareCallCFunction(kNumCCallArgs, kScratchReg);
3733 CallCFunction(ext_ref, kNumCCallArgs);
3734
3735 // Move return value to the right register.
3736 const LiftoffRegister* next_result_reg = rets;
3737 if (sig->return_count() > 0) {
3738 DCHECK_EQ(1, sig->return_count());
3739 constexpr Register kReturnReg = a0;
3740 if (kReturnReg != next_result_reg->gp()) {
3741 Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
3742 }
3743 ++next_result_reg;
3744 }
3745
3746 // Load potential output value from the buffer on the stack.
3747 if (out_argument_kind != kVoid) {
3748 liftoff::Load(this, *next_result_reg, MemOperand(sp, 0), out_argument_kind);
3749 }
3750
3751 Add64(sp, sp, Operand(stack_bytes));
3752 }
3753
CallNativeWasmCode(Address addr)3754 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
3755 Call(addr, RelocInfo::WASM_CALL);
3756 }
3757
TailCallNativeWasmCode(Address addr)3758 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
3759 Jump(addr, RelocInfo::WASM_CALL);
3760 }
3761
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)3762 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
3763 compiler::CallDescriptor* call_descriptor,
3764 Register target) {
3765 if (target == no_reg) {
3766 pop(t6);
3767 Call(t6);
3768 } else {
3769 Call(target);
3770 }
3771 }
3772
TailCallIndirect(Register target)3773 void LiftoffAssembler::TailCallIndirect(Register target) {
3774 if (target == no_reg) {
3775 Pop(t6);
3776 Jump(t6);
3777 } else {
3778 Jump(target);
3779 }
3780 }
3781
CallRuntimeStub(WasmCode::RuntimeStubId sid)3782 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
3783 // A direct call to a wasm runtime stub defined in this module.
3784 // Just encode the stub index. This will be patched at relocation.
3785 Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
3786 }
3787
AllocateStackSlot(Register addr,uint32_t size)3788 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
3789 Add64(sp, sp, Operand(-size));
3790 TurboAssembler::Move(addr, sp);
3791 }
3792
DeallocateStackSlot(uint32_t size)3793 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
3794 Add64(sp, sp, Operand(size));
3795 }
3796
MaybeOSR()3797 void LiftoffAssembler::MaybeOSR() {}
3798
emit_set_if_nan(Register dst,FPURegister src,ValueKind kind)3799 void LiftoffAssembler::emit_set_if_nan(Register dst, FPURegister src,
3800 ValueKind kind) {
3801 UseScratchRegisterScope temps(this);
3802 Register scratch = temps.Acquire();
3803 li(scratch, 1);
3804 if (kind == kF32) {
3805 feq_s(scratch, src, src); // rd <- !isNan(src)
3806 } else {
3807 DCHECK_EQ(kind, kF64);
3808 feq_d(scratch, src, src); // rd <- !isNan(src)
3809 }
3810 not_(scratch, scratch);
3811 Sd(scratch, MemOperand(dst));
3812 }
3813
Construct(int param_slots)3814 void LiftoffStackSlots::Construct(int param_slots) {
3815 DCHECK_LT(0, slots_.size());
3816 SortInPushOrder();
3817 int last_stack_slot = param_slots;
3818 for (auto& slot : slots_) {
3819 const int stack_slot = slot.dst_slot_;
3820 int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
3821 DCHECK_LT(0, stack_decrement);
3822 last_stack_slot = stack_slot;
3823 const LiftoffAssembler::VarState& src = slot.src_;
3824 switch (src.loc()) {
3825 case LiftoffAssembler::VarState::kStack:
3826 if (src.kind() != kS128) {
3827 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
3828 asm_->Ld(kScratchReg, liftoff::GetStackSlot(slot.src_offset_));
3829 asm_->push(kScratchReg);
3830 } else {
3831 asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
3832 asm_->Ld(kScratchReg, liftoff::GetStackSlot(slot.src_offset_ - 8));
3833 asm_->push(kScratchReg);
3834 asm_->Ld(kScratchReg, liftoff::GetStackSlot(slot.src_offset_));
3835 asm_->push(kScratchReg);
3836 }
3837 break;
3838 case LiftoffAssembler::VarState::kRegister: {
3839 int pushed_bytes = SlotSizeInBytes(slot);
3840 asm_->AllocateStackSpace(stack_decrement - pushed_bytes);
3841 liftoff::push(asm_, src.reg(), src.kind());
3842 break;
3843 }
3844 case LiftoffAssembler::VarState::kIntConst: {
3845 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
3846 asm_->li(kScratchReg, Operand(src.i32_const()));
3847 asm_->push(kScratchReg);
3848 break;
3849 }
3850 }
3851 }
3852 }
3853 } // namespace wasm
3854 } // namespace internal
3855 } // namespace v8
3856
3857 #endif // V8_WASM_BASELINE_RISCV64_LIFTOFF_ASSEMBLER_RISCV64_H_
3858