1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_H_
6 #define V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_H_
7
8 #include "src/base/platform/wrappers.h"
9 #include "src/codegen/assembler.h"
10 #include "src/heap/memory-chunk.h"
11 #include "src/wasm/baseline/liftoff-assembler.h"
12 #include "src/wasm/baseline/liftoff-register.h"
13 #include "src/wasm/simd-shuffle.h"
14 #include "src/wasm/value-type.h"
15 #include "src/wasm/wasm-objects.h"
16
17 namespace v8 {
18 namespace internal {
19 namespace wasm {
20
21 #define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name) \
22 if (!CpuFeatures::IsSupported(name)) return false; \
23 CpuFeatureScope feature(this, name);
24
25 namespace liftoff {
26
ToCondition(LiftoffCondition liftoff_cond)27 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
28 switch (liftoff_cond) {
29 case kEqual:
30 return equal;
31 case kUnequal:
32 return not_equal;
33 case kSignedLessThan:
34 return less;
35 case kSignedLessEqual:
36 return less_equal;
37 case kSignedGreaterThan:
38 return greater;
39 case kSignedGreaterEqual:
40 return greater_equal;
41 case kUnsignedLessThan:
42 return below;
43 case kUnsignedLessEqual:
44 return below_equal;
45 case kUnsignedGreaterThan:
46 return above;
47 case kUnsignedGreaterEqual:
48 return above_equal;
49 }
50 }
51
52 // ebp-4 holds the stack marker, ebp-8 is the instance parameter.
53 constexpr int kInstanceOffset = 8;
54 constexpr int kFeedbackVectorOffset = 12; // ebp-12 is the feedback vector.
55 constexpr int kTierupBudgetOffset = 16; // ebp-16 is the tiering budget.
56
GetStackSlot(int offset)57 inline Operand GetStackSlot(int offset) { return Operand(ebp, -offset); }
58
GetHalfStackSlot(int offset,RegPairHalf half)59 inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) {
60 int32_t half_offset =
61 half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2;
62 return Operand(offset > 0 ? ebp : esp, -offset + half_offset);
63 }
64
65 // TODO(clemensb): Make this a constexpr variable once Operand is constexpr.
GetInstanceOperand()66 inline Operand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
67
68 static constexpr LiftoffRegList kByteRegs =
69 LiftoffRegList::FromBits<RegList{eax, ecx, edx}.bits()>();
70
Load(LiftoffAssembler * assm,LiftoffRegister dst,Register base,int32_t offset,ValueKind kind)71 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Register base,
72 int32_t offset, ValueKind kind) {
73 Operand src(base, offset);
74 switch (kind) {
75 case kI32:
76 case kOptRef:
77 case kRef:
78 case kRtt:
79 assm->mov(dst.gp(), src);
80 break;
81 case kI64:
82 assm->mov(dst.low_gp(), src);
83 assm->mov(dst.high_gp(), Operand(base, offset + 4));
84 break;
85 case kF32:
86 assm->movss(dst.fp(), src);
87 break;
88 case kF64:
89 assm->movsd(dst.fp(), src);
90 break;
91 case kS128:
92 assm->movdqu(dst.fp(), src);
93 break;
94 default:
95 UNREACHABLE();
96 }
97 }
98
Store(LiftoffAssembler * assm,Register base,int32_t offset,LiftoffRegister src,ValueKind kind)99 inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
100 LiftoffRegister src, ValueKind kind) {
101 Operand dst(base, offset);
102 switch (kind) {
103 case kI32:
104 case kOptRef:
105 case kRef:
106 case kRtt:
107 assm->mov(dst, src.gp());
108 break;
109 case kI64:
110 assm->mov(dst, src.low_gp());
111 assm->mov(Operand(base, offset + 4), src.high_gp());
112 break;
113 case kF32:
114 assm->movss(dst, src.fp());
115 break;
116 case kF64:
117 assm->movsd(dst, src.fp());
118 break;
119 case kS128:
120 assm->movdqu(dst, src.fp());
121 break;
122 case kVoid:
123 case kBottom:
124 case kI8:
125 case kI16:
126 UNREACHABLE();
127 }
128 }
129
130 inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind,
131 int padding = 0) {
132 switch (kind) {
133 case kI32:
134 case kRef:
135 case kOptRef:
136 case kRtt:
137 assm->AllocateStackSpace(padding);
138 assm->push(reg.gp());
139 break;
140 case kI64:
141 assm->AllocateStackSpace(padding);
142 assm->push(reg.high_gp());
143 assm->push(reg.low_gp());
144 break;
145 case kF32:
146 assm->AllocateStackSpace(sizeof(float) + padding);
147 assm->movss(Operand(esp, 0), reg.fp());
148 break;
149 case kF64:
150 assm->AllocateStackSpace(sizeof(double) + padding);
151 assm->movsd(Operand(esp, 0), reg.fp());
152 break;
153 case kS128:
154 assm->AllocateStackSpace(sizeof(double) * 2 + padding);
155 assm->movdqu(Operand(esp, 0), reg.fp());
156 break;
157 case kVoid:
158 case kBottom:
159 case kI8:
160 case kI16:
161 UNREACHABLE();
162 }
163 }
164
SignExtendI32ToI64(Assembler * assm,LiftoffRegister reg)165 inline void SignExtendI32ToI64(Assembler* assm, LiftoffRegister reg) {
166 assm->mov(reg.high_gp(), reg.low_gp());
167 assm->sar(reg.high_gp(), 31);
168 }
169
170 // Get a temporary byte register, using {candidate} if possible.
171 // Might spill, but always keeps status flags intact.
GetTmpByteRegister(LiftoffAssembler * assm,Register candidate)172 inline Register GetTmpByteRegister(LiftoffAssembler* assm, Register candidate) {
173 if (candidate.is_byte_register()) return candidate;
174 // {GetUnusedRegister()} may insert move instructions to spill registers to
175 // the stack. This is OK because {mov} does not change the status flags.
176 return assm->GetUnusedRegister(liftoff::kByteRegs).gp();
177 }
178
MoveStackValue(LiftoffAssembler * assm,const Operand & src,const Operand & dst)179 inline void MoveStackValue(LiftoffAssembler* assm, const Operand& src,
180 const Operand& dst) {
181 if (assm->cache_state()->has_unused_register(kGpReg)) {
182 Register tmp = assm->cache_state()->unused_register(kGpReg).gp();
183 assm->mov(tmp, src);
184 assm->mov(dst, tmp);
185 } else {
186 // No free register, move via the stack.
187 assm->push(src);
188 assm->pop(dst);
189 }
190 }
191
192 constexpr DoubleRegister kScratchDoubleReg = xmm7;
193
194 constexpr int kSubSpSize = 6; // 6 bytes for "sub esp, <imm32>"
195
196 } // namespace liftoff
197
PrepareStackFrame()198 int LiftoffAssembler::PrepareStackFrame() {
199 int offset = pc_offset();
200 // Next we reserve the memory for the whole stack frame. We do not know yet
201 // how big the stack frame will be so we just emit a placeholder instruction.
202 // PatchPrepareStackFrame will patch this in order to increase the stack
203 // appropriately.
204 sub_sp_32(0);
205 DCHECK_EQ(liftoff::kSubSpSize, pc_offset() - offset);
206 return offset;
207 }
208
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)209 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
210 int stack_param_delta) {
211 // Push the return address and frame pointer to complete the stack frame.
212 push(Operand(ebp, 4));
213 push(Operand(ebp, 0));
214
215 // Shift the whole frame upwards.
216 Register scratch = eax;
217 push(scratch);
218 const int slot_count = num_callee_stack_params + 2;
219 for (int i = slot_count; i > 0; --i) {
220 mov(scratch, Operand(esp, i * 4));
221 mov(Operand(ebp, (i - stack_param_delta - 1) * 4), scratch);
222 }
223 pop(scratch);
224
225 // Set the new stack and frame pointers.
226 lea(esp, Operand(ebp, -stack_param_delta * 4));
227 pop(ebp);
228 }
229
AlignFrameSize()230 void LiftoffAssembler::AlignFrameSize() {}
231
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)232 void LiftoffAssembler::PatchPrepareStackFrame(
233 int offset, SafepointTableBuilder* safepoint_table_builder) {
234 // The frame_size includes the frame marker and the instance slot. Both are
235 // pushed as part of frame construction, so we don't need to allocate memory
236 // for them anymore.
237 int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
238 DCHECK_EQ(0, frame_size % kSystemPointerSize);
239
240 // We can't run out of space when patching, just pass anything big enough to
241 // not cause the assembler to try to grow the buffer.
242 constexpr int kAvailableSpace = 64;
243 Assembler patching_assembler(
244 AssemblerOptions{},
245 ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
246
247 if (V8_LIKELY(frame_size < 4 * KB)) {
248 // This is the standard case for small frames: just subtract from SP and be
249 // done with it.
250 patching_assembler.sub_sp_32(frame_size);
251 DCHECK_EQ(liftoff::kSubSpSize, patching_assembler.pc_offset());
252 return;
253 }
254
255 // The frame size is bigger than 4KB, so we might overflow the available stack
256 // space if we first allocate the frame and then do the stack check (we will
257 // need some remaining stack space for throwing the exception). That's why we
258 // check the available stack space before we allocate the frame. To do this we
259 // replace the {__ sub(sp, framesize)} with a jump to OOL code that does this
260 // "extended stack check".
261 //
262 // The OOL code can simply be generated here with the normal assembler,
263 // because all other code generation, including OOL code, has already finished
264 // when {PatchPrepareStackFrame} is called. The function prologue then jumps
265 // to the current {pc_offset()} to execute the OOL code for allocating the
266 // large frame.
267
268 // Emit the unconditional branch in the function prologue (from {offset} to
269 // {pc_offset()}).
270 patching_assembler.jmp_rel(pc_offset() - offset);
271 DCHECK_GE(liftoff::kSubSpSize, patching_assembler.pc_offset());
272 patching_assembler.Nop(liftoff::kSubSpSize - patching_assembler.pc_offset());
273
274 // If the frame is bigger than the stack, we throw the stack overflow
275 // exception unconditionally. Thereby we can avoid the integer overflow
276 // check in the condition code.
277 RecordComment("OOL: stack check for large frame");
278 Label continuation;
279 if (frame_size < FLAG_stack_size * 1024) {
280 // We do not have a scratch register, so pick any and push it first.
281 Register stack_limit = eax;
282 push(stack_limit);
283 mov(stack_limit,
284 FieldOperand(kWasmInstanceRegister,
285 WasmInstanceObject::kRealStackLimitAddressOffset));
286 mov(stack_limit, Operand(stack_limit, 0));
287 add(stack_limit, Immediate(frame_size));
288 cmp(esp, stack_limit);
289 pop(stack_limit);
290 j(above_equal, &continuation, Label::kNear);
291 }
292
293 wasm_call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
294 // The call will not return; just define an empty safepoint.
295 safepoint_table_builder->DefineSafepoint(this);
296 AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
297
298 bind(&continuation);
299
300 // Now allocate the stack space. Note that this might do more than just
301 // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
302 AllocateStackSpace(frame_size);
303
304 // Jump back to the start of the function, from {pc_offset()} to
305 // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
306 // is a branch now).
307 int func_start_offset = offset + liftoff::kSubSpSize;
308 jmp_rel(func_start_offset - pc_offset());
309 }
310
FinishCode()311 void LiftoffAssembler::FinishCode() {}
312
AbortCompilation()313 void LiftoffAssembler::AbortCompilation() {}
314
315 // static
StaticStackFrameSize()316 constexpr int LiftoffAssembler::StaticStackFrameSize() {
317 return liftoff::kTierupBudgetOffset;
318 }
319
SlotSizeForType(ValueKind kind)320 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
321 return value_kind_full_size(kind);
322 }
323
NeedsAlignment(ValueKind kind)324 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
325 return is_reference(kind);
326 }
327
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)328 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
329 RelocInfo::Mode rmode) {
330 switch (value.type().kind()) {
331 case kI32:
332 TurboAssembler::Move(reg.gp(), Immediate(value.to_i32(), rmode));
333 break;
334 case kI64: {
335 DCHECK(RelocInfo::IsNoInfo(rmode));
336 int32_t low_word = value.to_i64();
337 int32_t high_word = value.to_i64() >> 32;
338 TurboAssembler::Move(reg.low_gp(), Immediate(low_word));
339 TurboAssembler::Move(reg.high_gp(), Immediate(high_word));
340 break;
341 }
342 case kF32:
343 TurboAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits());
344 break;
345 case kF64:
346 TurboAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits());
347 break;
348 default:
349 UNREACHABLE();
350 }
351 }
352
LoadInstanceFromFrame(Register dst)353 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
354 mov(dst, liftoff::GetInstanceOperand());
355 }
356
LoadFromInstance(Register dst,Register instance,int offset,int size)357 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
358 int offset, int size) {
359 DCHECK_LE(0, offset);
360 Operand src{instance, offset};
361 switch (size) {
362 case 1:
363 movzx_b(dst, src);
364 break;
365 case 4:
366 mov(dst, src);
367 break;
368 default:
369 UNIMPLEMENTED();
370 }
371 }
372
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)373 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
374 Register instance,
375 int offset) {
376 STATIC_ASSERT(kTaggedSize == kSystemPointerSize);
377 mov(dst, Operand{instance, offset});
378 }
379
SpillInstance(Register instance)380 void LiftoffAssembler::SpillInstance(Register instance) {
381 mov(liftoff::GetInstanceOperand(), instance);
382 }
383
ResetOSRTarget()384 void LiftoffAssembler::ResetOSRTarget() {}
385
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)386 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
387 Register offset_reg,
388 int32_t offset_imm,
389 LiftoffRegList pinned) {
390 DCHECK_GE(offset_imm, 0);
391 STATIC_ASSERT(kTaggedSize == kInt32Size);
392 Load(LiftoffRegister(dst), src_addr, offset_reg,
393 static_cast<uint32_t>(offset_imm), LoadType::kI32Load, pinned);
394 }
395
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)396 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
397 int32_t offset_imm) {
398 mov(dst, Operand(src_addr, offset_imm));
399 }
400
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)401 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
402 Register offset_reg,
403 int32_t offset_imm,
404 LiftoffRegister src,
405 LiftoffRegList pinned,
406 SkipWriteBarrier skip_write_barrier) {
407 DCHECK_GE(offset_imm, 0);
408 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
409 STATIC_ASSERT(kTaggedSize == kInt32Size);
410 Operand dst_op = offset_reg == no_reg
411 ? Operand(dst_addr, offset_imm)
412 : Operand(dst_addr, offset_reg, times_1, offset_imm);
413 mov(dst_op, src.gp());
414
415 if (skip_write_barrier || FLAG_disable_write_barriers) return;
416
417 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
418 Label write_barrier;
419 Label exit;
420 CheckPageFlag(dst_addr, scratch,
421 MemoryChunk::kPointersFromHereAreInterestingMask, not_zero,
422 &write_barrier, Label::kNear);
423 jmp(&exit, Label::kNear);
424 bind(&write_barrier);
425 JumpIfSmi(src.gp(), &exit, Label::kNear);
426 CheckPageFlag(src.gp(), scratch,
427 MemoryChunk::kPointersToHereAreInterestingMask, zero, &exit,
428 Label::kNear);
429 lea(scratch, dst_op);
430 CallRecordWriteStubSaveRegisters(
431 dst_addr, scratch, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
432 StubCallMode::kCallWasmRuntimeStub);
433 bind(&exit);
434 }
435
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)436 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
437 Register offset_reg, uint32_t offset_imm,
438 LoadType type, LiftoffRegList pinned,
439 uint32_t* protected_load_pc, bool is_load_mem,
440 bool i64_offset) {
441 // Offsets >=2GB are statically OOB on 32-bit systems.
442 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
443 DCHECK_EQ(type.value_type() == kWasmI64, dst.is_gp_pair());
444 Operand src_op = offset_reg == no_reg
445 ? Operand(src_addr, offset_imm)
446 : Operand(src_addr, offset_reg, times_1, offset_imm);
447 if (protected_load_pc) *protected_load_pc = pc_offset();
448
449 switch (type.value()) {
450 case LoadType::kI32Load8U:
451 movzx_b(dst.gp(), src_op);
452 break;
453 case LoadType::kI32Load8S:
454 movsx_b(dst.gp(), src_op);
455 break;
456 case LoadType::kI64Load8U:
457 movzx_b(dst.low_gp(), src_op);
458 xor_(dst.high_gp(), dst.high_gp());
459 break;
460 case LoadType::kI64Load8S:
461 movsx_b(dst.low_gp(), src_op);
462 liftoff::SignExtendI32ToI64(this, dst);
463 break;
464 case LoadType::kI32Load16U:
465 movzx_w(dst.gp(), src_op);
466 break;
467 case LoadType::kI32Load16S:
468 movsx_w(dst.gp(), src_op);
469 break;
470 case LoadType::kI64Load16U:
471 movzx_w(dst.low_gp(), src_op);
472 xor_(dst.high_gp(), dst.high_gp());
473 break;
474 case LoadType::kI64Load16S:
475 movsx_w(dst.low_gp(), src_op);
476 liftoff::SignExtendI32ToI64(this, dst);
477 break;
478 case LoadType::kI32Load:
479 mov(dst.gp(), src_op);
480 break;
481 case LoadType::kI64Load32U:
482 mov(dst.low_gp(), src_op);
483 xor_(dst.high_gp(), dst.high_gp());
484 break;
485 case LoadType::kI64Load32S:
486 mov(dst.low_gp(), src_op);
487 liftoff::SignExtendI32ToI64(this, dst);
488 break;
489 case LoadType::kI64Load: {
490 // Compute the operand for the load of the upper half.
491 Operand upper_src_op =
492 offset_reg == no_reg
493 ? Operand(src_addr, bit_cast<int32_t>(offset_imm + 4))
494 : Operand(src_addr, offset_reg, times_1, offset_imm + 4);
495 // The high word has to be mov'ed first, such that this is the protected
496 // instruction. The mov of the low word cannot segfault.
497 mov(dst.high_gp(), upper_src_op);
498 mov(dst.low_gp(), src_op);
499 break;
500 }
501 case LoadType::kF32Load:
502 movss(dst.fp(), src_op);
503 break;
504 case LoadType::kF64Load:
505 movsd(dst.fp(), src_op);
506 break;
507 case LoadType::kS128Load:
508 movdqu(dst.fp(), src_op);
509 break;
510 }
511 }
512
Store(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned,uint32_t * protected_store_pc,bool is_store_mem)513 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
514 uint32_t offset_imm, LiftoffRegister src,
515 StoreType type, LiftoffRegList pinned,
516 uint32_t* protected_store_pc, bool is_store_mem) {
517 DCHECK_EQ(type.value_type() == kWasmI64, src.is_gp_pair());
518 // Offsets >=2GB are statically OOB on 32-bit systems.
519 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
520 Operand dst_op = offset_reg == no_reg
521 ? Operand(dst_addr, offset_imm)
522 : Operand(dst_addr, offset_reg, times_1, offset_imm);
523 if (protected_store_pc) *protected_store_pc = pc_offset();
524
525 switch (type.value()) {
526 case StoreType::kI64Store8:
527 src = src.low();
528 V8_FALLTHROUGH;
529 case StoreType::kI32Store8:
530 // Only the lower 4 registers can be addressed as 8-bit registers.
531 if (src.gp().is_byte_register()) {
532 mov_b(dst_op, src.gp());
533 } else {
534 // We know that {src} is not a byte register, so the only pinned byte
535 // registers (beside the outer {pinned}) are {dst_addr} and potentially
536 // {offset_reg}.
537 LiftoffRegList pinned_byte = pinned | LiftoffRegList{dst_addr};
538 if (offset_reg != no_reg) pinned_byte.set(offset_reg);
539 Register byte_src =
540 GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned_byte)).gp();
541 mov(byte_src, src.gp());
542 mov_b(dst_op, byte_src);
543 }
544 break;
545 case StoreType::kI64Store16:
546 src = src.low();
547 V8_FALLTHROUGH;
548 case StoreType::kI32Store16:
549 mov_w(dst_op, src.gp());
550 break;
551 case StoreType::kI64Store32:
552 src = src.low();
553 V8_FALLTHROUGH;
554 case StoreType::kI32Store:
555 mov(dst_op, src.gp());
556 break;
557 case StoreType::kI64Store: {
558 // Compute the operand for the store of the upper half.
559 Operand upper_dst_op =
560 offset_reg == no_reg
561 ? Operand(dst_addr, bit_cast<int32_t>(offset_imm + 4))
562 : Operand(dst_addr, offset_reg, times_1, offset_imm + 4);
563 // The high word has to be mov'ed first, such that this is the protected
564 // instruction. The mov of the low word cannot segfault.
565 mov(upper_dst_op, src.high_gp());
566 mov(dst_op, src.low_gp());
567 break;
568 }
569 case StoreType::kF32Store:
570 movss(dst_op, src.fp());
571 break;
572 case StoreType::kF64Store:
573 movsd(dst_op, src.fp());
574 break;
575 case StoreType::kS128Store:
576 Movdqu(dst_op, src.fp());
577 break;
578 }
579 }
580
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned)581 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
582 Register offset_reg, uint32_t offset_imm,
583 LoadType type, LiftoffRegList pinned) {
584 if (type.value() != LoadType::kI64Load) {
585 Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
586 return;
587 }
588
589 DCHECK_EQ(type.value_type() == kWasmI64, dst.is_gp_pair());
590 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
591 Operand src_op = offset_reg == no_reg
592 ? Operand(src_addr, offset_imm)
593 : Operand(src_addr, offset_reg, times_1, offset_imm);
594
595 movsd(liftoff::kScratchDoubleReg, src_op);
596 Pextrd(dst.low().gp(), liftoff::kScratchDoubleReg, 0);
597 Pextrd(dst.high().gp(), liftoff::kScratchDoubleReg, 1);
598 }
599
AtomicStore(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)600 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
601 uint32_t offset_imm, LiftoffRegister src,
602 StoreType type, LiftoffRegList pinned) {
603 DCHECK_NE(offset_reg, no_reg);
604 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
605 Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm);
606
607 // i64 store uses a totally different approach, hence implement it separately.
608 if (type.value() == StoreType::kI64Store) {
609 auto scratch2 = GetUnusedRegister(kFpReg, pinned).fp();
610 movd(liftoff::kScratchDoubleReg, src.low().gp());
611 movd(scratch2, src.high().gp());
612 Punpckldq(liftoff::kScratchDoubleReg, scratch2);
613 movsd(dst_op, liftoff::kScratchDoubleReg);
614 // This lock+or is needed to achieve sequential consistency.
615 lock();
616 or_(Operand(esp, 0), Immediate(0));
617 return;
618 }
619
620 // Other i64 stores actually only use the low word.
621 if (src.is_pair()) src = src.low();
622 Register src_gp = src.gp();
623
624 bool is_byte_store = type.size() == 1;
625 LiftoffRegList src_candidates =
626 is_byte_store ? liftoff::kByteRegs : kGpCacheRegList;
627 pinned = pinned | LiftoffRegList{dst_addr, src, offset_reg};
628
629 // Ensure that {src} is a valid and otherwise unused register.
630 if (!src_candidates.has(src) || cache_state_.is_used(src)) {
631 // If there are no unused candidate registers, but {src} is a candidate,
632 // then spill other uses of {src}. Otherwise spill any candidate register
633 // and use that.
634 LiftoffRegList unpinned_candidates = src_candidates.MaskOut(pinned);
635 if (!cache_state_.has_unused_register(unpinned_candidates) &&
636 src_candidates.has(src)) {
637 SpillRegister(src);
638 } else {
639 Register safe_src = GetUnusedRegister(unpinned_candidates).gp();
640 mov(safe_src, src_gp);
641 src_gp = safe_src;
642 }
643 }
644
645 switch (type.value()) {
646 case StoreType::kI64Store8:
647 case StoreType::kI32Store8:
648 xchg_b(src_gp, dst_op);
649 return;
650 case StoreType::kI64Store16:
651 case StoreType::kI32Store16:
652 xchg_w(src_gp, dst_op);
653 return;
654 case StoreType::kI64Store32:
655 case StoreType::kI32Store:
656 xchg(src_gp, dst_op);
657 return;
658 default:
659 UNREACHABLE();
660 }
661 }
662
663 namespace liftoff {
664 #define __ lasm->
665
666 enum Binop { kAdd, kSub, kAnd, kOr, kXor, kExchange };
667
AtomicAddOrSubOrExchange32(LiftoffAssembler * lasm,Binop binop,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)668 inline void AtomicAddOrSubOrExchange32(LiftoffAssembler* lasm, Binop binop,
669 Register dst_addr, Register offset_reg,
670 uint32_t offset_imm,
671 LiftoffRegister value,
672 LiftoffRegister result, StoreType type) {
673 DCHECK_EQ(value, result);
674 DCHECK(!__ cache_state()->is_used(result));
675 bool is_64_bit_op = type.value_type() == kWasmI64;
676
677 Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
678 Register result_reg = is_64_bit_op ? result.low_gp() : result.gp();
679
680 bool is_byte_store = type.size() == 1;
681 LiftoffRegList pinned = {dst_addr, value_reg, offset_reg};
682
683 // Ensure that {value_reg} is a valid register.
684 if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
685 Register safe_value_reg =
686 __ GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned)).gp();
687 __ mov(safe_value_reg, value_reg);
688 value_reg = safe_value_reg;
689 }
690
691 Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm);
692 if (binop == kSub) {
693 __ neg(value_reg);
694 }
695 if (binop != kExchange) {
696 __ lock();
697 }
698 switch (type.value()) {
699 case StoreType::kI64Store8:
700 case StoreType::kI32Store8:
701 if (binop == kExchange) {
702 __ xchg_b(value_reg, dst_op);
703 } else {
704 __ xadd_b(dst_op, value_reg);
705 }
706 __ movzx_b(result_reg, value_reg);
707 break;
708 case StoreType::kI64Store16:
709 case StoreType::kI32Store16:
710 if (binop == kExchange) {
711 __ xchg_w(value_reg, dst_op);
712 } else {
713 __ xadd_w(dst_op, value_reg);
714 }
715 __ movzx_w(result_reg, value_reg);
716 break;
717 case StoreType::kI64Store32:
718 case StoreType::kI32Store:
719 if (binop == kExchange) {
720 __ xchg(value_reg, dst_op);
721 } else {
722 __ xadd(dst_op, value_reg);
723 }
724 if (value_reg != result_reg) {
725 __ mov(result_reg, value_reg);
726 }
727 break;
728 default:
729 UNREACHABLE();
730 }
731 if (is_64_bit_op) {
732 __ xor_(result.high_gp(), result.high_gp());
733 }
734 }
735
AtomicBinop32(LiftoffAssembler * lasm,Binop op,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)736 inline void AtomicBinop32(LiftoffAssembler* lasm, Binop op, Register dst_addr,
737 Register offset_reg, uint32_t offset_imm,
738 LiftoffRegister value, LiftoffRegister result,
739 StoreType type) {
740 DCHECK_EQ(value, result);
741 DCHECK(!__ cache_state()->is_used(result));
742 bool is_64_bit_op = type.value_type() == kWasmI64;
743
744 Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
745 Register result_reg = is_64_bit_op ? result.low_gp() : result.gp();
746
747 // The cmpxchg instruction uses eax to store the old value of the
748 // compare-exchange primitive. Therefore we have to spill the register and
749 // move any use to another register.
750 __ ClearRegister(eax, {&dst_addr, &offset_reg, &value_reg},
751 LiftoffRegList{dst_addr, offset_reg, value_reg});
752
753 bool is_byte_store = type.size() == 1;
754 Register scratch = no_reg;
755 if (is_byte_store) {
756 // The scratch register has to be a byte register. As we are already tight
757 // on registers, we just use the root register here.
758 static_assert(!kLiftoffAssemblerGpCacheRegs.has(kRootRegister),
759 "root register is not Liftoff cache register");
760 DCHECK(kRootRegister.is_byte_register());
761 __ push(kRootRegister);
762 scratch = kRootRegister;
763 } else {
764 scratch = __ GetUnusedRegister(
765 kGpReg, LiftoffRegList{dst_addr, offset_reg, value_reg, eax})
766 .gp();
767 }
768
769 Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm);
770
771 switch (type.value()) {
772 case StoreType::kI32Store8:
773 case StoreType::kI64Store8: {
774 __ xor_(eax, eax);
775 __ mov_b(eax, dst_op);
776 break;
777 }
778 case StoreType::kI32Store16:
779 case StoreType::kI64Store16: {
780 __ xor_(eax, eax);
781 __ mov_w(eax, dst_op);
782 break;
783 }
784 case StoreType::kI32Store:
785 case StoreType::kI64Store32: {
786 __ mov(eax, dst_op);
787 break;
788 }
789 default:
790 UNREACHABLE();
791 }
792
793 Label binop;
794 __ bind(&binop);
795 __ mov(scratch, eax);
796
797 switch (op) {
798 case kAnd: {
799 __ and_(scratch, value_reg);
800 break;
801 }
802 case kOr: {
803 __ or_(scratch, value_reg);
804 break;
805 }
806 case kXor: {
807 __ xor_(scratch, value_reg);
808 break;
809 }
810 default:
811 UNREACHABLE();
812 }
813
814 __ lock();
815
816 switch (type.value()) {
817 case StoreType::kI32Store8:
818 case StoreType::kI64Store8: {
819 __ cmpxchg_b(dst_op, scratch);
820 break;
821 }
822 case StoreType::kI32Store16:
823 case StoreType::kI64Store16: {
824 __ cmpxchg_w(dst_op, scratch);
825 break;
826 }
827 case StoreType::kI32Store:
828 case StoreType::kI64Store32: {
829 __ cmpxchg(dst_op, scratch);
830 break;
831 }
832 default:
833 UNREACHABLE();
834 }
835 __ j(not_equal, &binop);
836
837 if (is_byte_store) {
838 __ pop(kRootRegister);
839 }
840 if (result_reg != eax) {
841 __ mov(result_reg, eax);
842 }
843 if (is_64_bit_op) {
844 __ xor_(result.high_gp(), result.high_gp());
845 }
846 }
847
AtomicBinop64(LiftoffAssembler * lasm,Binop op,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result)848 inline void AtomicBinop64(LiftoffAssembler* lasm, Binop op, Register dst_addr,
849 Register offset_reg, uint32_t offset_imm,
850 LiftoffRegister value, LiftoffRegister result) {
851 // We need {ebx} here, which is the root register. As the root register it
852 // needs special treatment. As we use {ebx} directly in the code below, we
853 // have to make sure here that the root register is actually {ebx}.
854 static_assert(kRootRegister == ebx,
855 "The following code assumes that kRootRegister == ebx");
856 __ push(ebx);
857
858 // Store the value on the stack, so that we can use it for retries.
859 __ AllocateStackSpace(8);
860 Operand value_op_hi = Operand(esp, 0);
861 Operand value_op_lo = Operand(esp, 4);
862 __ mov(value_op_lo, value.low_gp());
863 __ mov(value_op_hi, value.high_gp());
864
865 // We want to use the compare-exchange instruction here. It uses registers
866 // as follows: old-value = EDX:EAX; new-value = ECX:EBX.
867 Register old_hi = edx;
868 Register old_lo = eax;
869 Register new_hi = ecx;
870 Register new_lo = ebx;
871 // Base and offset need separate registers that do not alias with the
872 // ones above.
873 Register base = esi;
874 Register offset = edi;
875
876 // Swap base and offset register if necessary to avoid unnecessary
877 // moves.
878 if (dst_addr == offset || offset_reg == base) {
879 std::swap(dst_addr, offset_reg);
880 }
881 // Spill all these registers if they are still holding other values.
882 __ SpillRegisters(old_hi, old_lo, new_hi, base, offset);
883 __ ParallelRegisterMove(
884 {{LiftoffRegister::ForPair(base, offset),
885 LiftoffRegister::ForPair(dst_addr, offset_reg), kI64}});
886
887 Operand dst_op_lo = Operand(base, offset, times_1, offset_imm);
888 Operand dst_op_hi = Operand(base, offset, times_1, offset_imm + 4);
889
890 // Load the old value from memory.
891 __ mov(old_lo, dst_op_lo);
892 __ mov(old_hi, dst_op_hi);
893 Label retry;
894 __ bind(&retry);
895 __ mov(new_lo, old_lo);
896 __ mov(new_hi, old_hi);
897 switch (op) {
898 case kAdd:
899 __ add(new_lo, value_op_lo);
900 __ adc(new_hi, value_op_hi);
901 break;
902 case kSub:
903 __ sub(new_lo, value_op_lo);
904 __ sbb(new_hi, value_op_hi);
905 break;
906 case kAnd:
907 __ and_(new_lo, value_op_lo);
908 __ and_(new_hi, value_op_hi);
909 break;
910 case kOr:
911 __ or_(new_lo, value_op_lo);
912 __ or_(new_hi, value_op_hi);
913 break;
914 case kXor:
915 __ xor_(new_lo, value_op_lo);
916 __ xor_(new_hi, value_op_hi);
917 break;
918 case kExchange:
919 __ mov(new_lo, value_op_lo);
920 __ mov(new_hi, value_op_hi);
921 break;
922 }
923 __ lock();
924 __ cmpxchg8b(dst_op_lo);
925 __ j(not_equal, &retry);
926
927 // Deallocate the stack space again.
928 __ add(esp, Immediate(8));
929 // Restore the root register, and we are done.
930 __ pop(kRootRegister);
931
932 // Move the result into the correct registers.
933 __ ParallelRegisterMove(
934 {{result, LiftoffRegister::ForPair(old_lo, old_hi), kI64}});
935 }
936
937 #undef __
938 } // namespace liftoff
939
AtomicAdd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)940 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
941 uint32_t offset_imm, LiftoffRegister value,
942 LiftoffRegister result, StoreType type) {
943 if (type.value() == StoreType::kI64Store) {
944 liftoff::AtomicBinop64(this, liftoff::kAdd, dst_addr, offset_reg,
945 offset_imm, value, result);
946 return;
947 }
948
949 liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kAdd, dst_addr, offset_reg,
950 offset_imm, value, result, type);
951 }
952
AtomicSub(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)953 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
954 uint32_t offset_imm, LiftoffRegister value,
955 LiftoffRegister result, StoreType type) {
956 if (type.value() == StoreType::kI64Store) {
957 liftoff::AtomicBinop64(this, liftoff::kSub, dst_addr, offset_reg,
958 offset_imm, value, result);
959 return;
960 }
961 liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kSub, dst_addr, offset_reg,
962 offset_imm, value, result, type);
963 }
964
AtomicAnd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)965 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
966 uint32_t offset_imm, LiftoffRegister value,
967 LiftoffRegister result, StoreType type) {
968 if (type.value() == StoreType::kI64Store) {
969 liftoff::AtomicBinop64(this, liftoff::kAnd, dst_addr, offset_reg,
970 offset_imm, value, result);
971 return;
972 }
973
974 liftoff::AtomicBinop32(this, liftoff::kAnd, dst_addr, offset_reg, offset_imm,
975 value, result, type);
976 }
977
AtomicOr(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)978 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
979 uint32_t offset_imm, LiftoffRegister value,
980 LiftoffRegister result, StoreType type) {
981 if (type.value() == StoreType::kI64Store) {
982 liftoff::AtomicBinop64(this, liftoff::kOr, dst_addr, offset_reg, offset_imm,
983 value, result);
984 return;
985 }
986
987 liftoff::AtomicBinop32(this, liftoff::kOr, dst_addr, offset_reg, offset_imm,
988 value, result, type);
989 }
990
AtomicXor(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)991 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
992 uint32_t offset_imm, LiftoffRegister value,
993 LiftoffRegister result, StoreType type) {
994 if (type.value() == StoreType::kI64Store) {
995 liftoff::AtomicBinop64(this, liftoff::kXor, dst_addr, offset_reg,
996 offset_imm, value, result);
997 return;
998 }
999
1000 liftoff::AtomicBinop32(this, liftoff::kXor, dst_addr, offset_reg, offset_imm,
1001 value, result, type);
1002 }
1003
AtomicExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1004 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
1005 uint32_t offset_imm,
1006 LiftoffRegister value,
1007 LiftoffRegister result, StoreType type) {
1008 if (type.value() == StoreType::kI64Store) {
1009 liftoff::AtomicBinop64(this, liftoff::kExchange, dst_addr, offset_reg,
1010 offset_imm, value, result);
1011 return;
1012 }
1013 liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kExchange, dst_addr,
1014 offset_reg, offset_imm, value, result,
1015 type);
1016 }
1017
AtomicCompareExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)1018 void LiftoffAssembler::AtomicCompareExchange(
1019 Register dst_addr, Register offset_reg, uint32_t offset_imm,
1020 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
1021 StoreType type) {
1022 // We expect that the offset has already been added to {dst_addr}, and no
1023 // {offset_reg} is provided. This is to save registers.
1024 DCHECK_EQ(offset_reg, no_reg);
1025
1026 DCHECK_EQ(result, expected);
1027
1028 if (type.value() != StoreType::kI64Store) {
1029 bool is_64_bit_op = type.value_type() == kWasmI64;
1030
1031 Register value_reg = is_64_bit_op ? new_value.low_gp() : new_value.gp();
1032 Register expected_reg = is_64_bit_op ? expected.low_gp() : expected.gp();
1033 Register result_reg = expected_reg;
1034
1035 // The cmpxchg instruction uses eax to store the old value of the
1036 // compare-exchange primitive. Therefore we have to spill the register and
1037 // move any use to another register.
1038 ClearRegister(eax, {&dst_addr, &value_reg},
1039 LiftoffRegList{dst_addr, value_reg, expected_reg});
1040 if (expected_reg != eax) {
1041 mov(eax, expected_reg);
1042 expected_reg = eax;
1043 }
1044
1045 bool is_byte_store = type.size() == 1;
1046 LiftoffRegList pinned = {dst_addr, value_reg, expected_reg};
1047
1048 // Ensure that {value_reg} is a valid register.
1049 if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
1050 Register safe_value_reg =
1051 pinned.set(GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned)))
1052 .gp();
1053 mov(safe_value_reg, value_reg);
1054 value_reg = safe_value_reg;
1055 pinned.clear(LiftoffRegister(value_reg));
1056 }
1057
1058
1059 Operand dst_op = Operand(dst_addr, offset_imm);
1060
1061 lock();
1062 switch (type.value()) {
1063 case StoreType::kI32Store8:
1064 case StoreType::kI64Store8: {
1065 cmpxchg_b(dst_op, value_reg);
1066 movzx_b(result_reg, eax);
1067 break;
1068 }
1069 case StoreType::kI32Store16:
1070 case StoreType::kI64Store16: {
1071 cmpxchg_w(dst_op, value_reg);
1072 movzx_w(result_reg, eax);
1073 break;
1074 }
1075 case StoreType::kI32Store:
1076 case StoreType::kI64Store32: {
1077 cmpxchg(dst_op, value_reg);
1078 if (result_reg != eax) {
1079 mov(result_reg, eax);
1080 }
1081 break;
1082 }
1083 default:
1084 UNREACHABLE();
1085 }
1086 if (is_64_bit_op) {
1087 xor_(result.high_gp(), result.high_gp());
1088 }
1089 return;
1090 }
1091
1092 // The following code handles kExprI64AtomicCompareExchange.
1093
1094 // We need {ebx} here, which is the root register. The root register it
1095 // needs special treatment. As we use {ebx} directly in the code below, we
1096 // have to make sure here that the root register is actually {ebx}.
1097 static_assert(kRootRegister == ebx,
1098 "The following code assumes that kRootRegister == ebx");
1099 push(kRootRegister);
1100
1101 // The compare-exchange instruction uses registers as follows:
1102 // old-value = EDX:EAX; new-value = ECX:EBX.
1103 Register expected_hi = edx;
1104 Register expected_lo = eax;
1105 Register new_hi = ecx;
1106 Register new_lo = ebx;
1107 // The address needs a separate registers that does not alias with the
1108 // ones above.
1109 Register address = esi;
1110
1111 // Spill all these registers if they are still holding other values.
1112 SpillRegisters(expected_hi, expected_lo, new_hi, address);
1113
1114 // We have to set new_lo specially, because it's the root register. We do it
1115 // before setting all other registers so that the original value does not get
1116 // overwritten.
1117 mov(new_lo, new_value.low_gp());
1118
1119 // Move all other values into the right register.
1120 ParallelRegisterMove(
1121 {{LiftoffRegister(address), LiftoffRegister(dst_addr), kI32},
1122 {LiftoffRegister::ForPair(expected_lo, expected_hi), expected, kI64},
1123 {LiftoffRegister(new_hi), new_value.high(), kI32}});
1124
1125 Operand dst_op = Operand(address, offset_imm);
1126
1127 lock();
1128 cmpxchg8b(dst_op);
1129
1130 // Restore the root register, and we are done.
1131 pop(kRootRegister);
1132
1133 // Move the result into the correct registers.
1134 ParallelRegisterMove(
1135 {{result, LiftoffRegister::ForPair(expected_lo, expected_hi), kI64}});
1136 }
1137
AtomicFence()1138 void LiftoffAssembler::AtomicFence() { mfence(); }
1139
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)1140 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
1141 uint32_t caller_slot_idx,
1142 ValueKind kind) {
1143 liftoff::Load(this, dst, ebp, kSystemPointerSize * (caller_slot_idx + 1),
1144 kind);
1145 }
1146
LoadReturnStackSlot(LiftoffRegister reg,int offset,ValueKind kind)1147 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister reg, int offset,
1148 ValueKind kind) {
1149 liftoff::Load(this, reg, esp, offset, kind);
1150 }
1151
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)1152 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
1153 uint32_t caller_slot_idx,
1154 ValueKind kind) {
1155 liftoff::Store(this, ebp, kSystemPointerSize * (caller_slot_idx + 1), src,
1156 kind);
1157 }
1158
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)1159 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
1160 ValueKind kind) {
1161 DCHECK_EQ(0, SlotSizeForType(kind) % kSystemPointerSize);
1162 int words = SlotSizeForType(kind) / kSystemPointerSize;
1163 DCHECK_LE(1, words);
1164 // Make sure we move the words in the correct order in case there is an
1165 // overlap between src and dst.
1166 if (src_offset < dst_offset) {
1167 do {
1168 liftoff::MoveStackValue(this, liftoff::GetStackSlot(src_offset),
1169 liftoff::GetStackSlot(dst_offset));
1170 dst_offset -= kSystemPointerSize;
1171 src_offset -= kSystemPointerSize;
1172 } while (--words);
1173 } else {
1174 while (words--) {
1175 liftoff::MoveStackValue(
1176 this, liftoff::GetStackSlot(src_offset - words * kSystemPointerSize),
1177 liftoff::GetStackSlot(dst_offset - words * kSystemPointerSize));
1178 }
1179 }
1180 }
1181
Move(Register dst,Register src,ValueKind kind)1182 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
1183 DCHECK_NE(dst, src);
1184 DCHECK(kI32 == kind || is_reference(kind));
1185 mov(dst, src);
1186 }
1187
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)1188 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
1189 ValueKind kind) {
1190 DCHECK_NE(dst, src);
1191 if (kind == kF32) {
1192 movss(dst, src);
1193 } else if (kind == kF64) {
1194 movsd(dst, src);
1195 } else {
1196 DCHECK_EQ(kS128, kind);
1197 Movaps(dst, src);
1198 }
1199 }
1200
Spill(int offset,LiftoffRegister reg,ValueKind kind)1201 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
1202 RecordUsedSpillOffset(offset);
1203 Operand dst = liftoff::GetStackSlot(offset);
1204 switch (kind) {
1205 case kI32:
1206 case kOptRef:
1207 case kRef:
1208 case kRtt:
1209 mov(dst, reg.gp());
1210 break;
1211 case kI64:
1212 mov(liftoff::GetHalfStackSlot(offset, kLowWord), reg.low_gp());
1213 mov(liftoff::GetHalfStackSlot(offset, kHighWord), reg.high_gp());
1214 break;
1215 case kF32:
1216 movss(dst, reg.fp());
1217 break;
1218 case kF64:
1219 movsd(dst, reg.fp());
1220 break;
1221 case kS128:
1222 movdqu(dst, reg.fp());
1223 break;
1224 default:
1225 UNREACHABLE();
1226 }
1227 }
1228
Spill(int offset,WasmValue value)1229 void LiftoffAssembler::Spill(int offset, WasmValue value) {
1230 RecordUsedSpillOffset(offset);
1231 Operand dst = liftoff::GetStackSlot(offset);
1232 switch (value.type().kind()) {
1233 case kI32:
1234 mov(dst, Immediate(value.to_i32()));
1235 break;
1236 case kI64: {
1237 int32_t low_word = value.to_i64();
1238 int32_t high_word = value.to_i64() >> 32;
1239 mov(liftoff::GetHalfStackSlot(offset, kLowWord), Immediate(low_word));
1240 mov(liftoff::GetHalfStackSlot(offset, kHighWord), Immediate(high_word));
1241 break;
1242 }
1243 default:
1244 // We do not track f32 and f64 constants, hence they are unreachable.
1245 UNREACHABLE();
1246 }
1247 }
1248
Fill(LiftoffRegister reg,int offset,ValueKind kind)1249 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
1250 liftoff::Load(this, reg, ebp, -offset, kind);
1251 }
1252
FillI64Half(Register reg,int offset,RegPairHalf half)1253 void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) {
1254 mov(reg, liftoff::GetHalfStackSlot(offset, half));
1255 }
1256
FillStackSlotsWithZero(int start,int size)1257 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
1258 DCHECK_LT(0, size);
1259 DCHECK_EQ(0, size % 4);
1260 RecordUsedSpillOffset(start + size);
1261
1262 if (size <= 12) {
1263 // Special straight-line code for up to three words (6-9 bytes per word:
1264 // C7 <1-4 bytes operand> <4 bytes imm>, makes 18-27 bytes total).
1265 for (int offset = 4; offset <= size; offset += 4) {
1266 mov(liftoff::GetHalfStackSlot(start + offset, kLowWord), Immediate(0));
1267 }
1268 } else {
1269 // General case for bigger counts.
1270 // This sequence takes 19-22 bytes (3 for pushes, 3-6 for lea, 2 for xor, 5
1271 // for mov, 3 for repstosq, 3 for pops).
1272 // Note: rep_stos fills ECX doublewords at [EDI] with EAX.
1273 push(eax);
1274 push(ecx);
1275 push(edi);
1276 lea(edi, liftoff::GetStackSlot(start + size));
1277 xor_(eax, eax);
1278 // Size is in bytes, convert to doublewords (4-bytes).
1279 mov(ecx, Immediate(size / 4));
1280 rep_stos();
1281 pop(edi);
1282 pop(ecx);
1283 pop(eax);
1284 }
1285 }
1286
emit_i32_add(Register dst,Register lhs,Register rhs)1287 void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
1288 if (lhs != dst) {
1289 lea(dst, Operand(lhs, rhs, times_1, 0));
1290 } else {
1291 add(dst, rhs);
1292 }
1293 }
1294
emit_i32_addi(Register dst,Register lhs,int32_t imm)1295 void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
1296 if (lhs != dst) {
1297 lea(dst, Operand(lhs, imm));
1298 } else {
1299 add(dst, Immediate(imm));
1300 }
1301 }
1302
emit_i32_sub(Register dst,Register lhs,Register rhs)1303 void LiftoffAssembler::emit_i32_sub(Register dst, Register lhs, Register rhs) {
1304 if (dst != rhs) {
1305 // Default path.
1306 if (dst != lhs) mov(dst, lhs);
1307 sub(dst, rhs);
1308 } else if (lhs == rhs) {
1309 // Degenerate case.
1310 xor_(dst, dst);
1311 } else {
1312 // Emit {dst = lhs + -rhs} if dst == rhs.
1313 neg(dst);
1314 add(dst, lhs);
1315 }
1316 }
1317
emit_i32_subi(Register dst,Register lhs,int32_t imm)1318 void LiftoffAssembler::emit_i32_subi(Register dst, Register lhs, int32_t imm) {
1319 if (dst != lhs) {
1320 // We'll have to implement an UB-safe version if we need this corner case.
1321 DCHECK_NE(imm, kMinInt);
1322 lea(dst, Operand(lhs, -imm));
1323 } else {
1324 sub(dst, Immediate(imm));
1325 }
1326 }
1327
1328 namespace liftoff {
1329 template <void (Assembler::*op)(Register, Register)>
EmitCommutativeBinOp(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs)1330 void EmitCommutativeBinOp(LiftoffAssembler* assm, Register dst, Register lhs,
1331 Register rhs) {
1332 if (dst == rhs) {
1333 (assm->*op)(dst, lhs);
1334 } else {
1335 if (dst != lhs) assm->mov(dst, lhs);
1336 (assm->*op)(dst, rhs);
1337 }
1338 }
1339
1340 template <void (Assembler::*op)(Register, int32_t)>
EmitCommutativeBinOpImm(LiftoffAssembler * assm,Register dst,Register lhs,int32_t imm)1341 void EmitCommutativeBinOpImm(LiftoffAssembler* assm, Register dst, Register lhs,
1342 int32_t imm) {
1343 if (dst != lhs) assm->mov(dst, lhs);
1344 (assm->*op)(dst, imm);
1345 }
1346 } // namespace liftoff
1347
emit_i32_mul(Register dst,Register lhs,Register rhs)1348 void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
1349 liftoff::EmitCommutativeBinOp<&Assembler::imul>(this, dst, lhs, rhs);
1350 }
1351
1352 namespace liftoff {
1353 enum class DivOrRem : uint8_t { kDiv, kRem };
1354 template <bool is_signed, DivOrRem div_or_rem>
EmitInt32DivOrRem(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1355 void EmitInt32DivOrRem(LiftoffAssembler* assm, Register dst, Register lhs,
1356 Register rhs, Label* trap_div_by_zero,
1357 Label* trap_div_unrepresentable) {
1358 constexpr bool needs_unrepresentable_check =
1359 is_signed && div_or_rem == DivOrRem::kDiv;
1360 constexpr bool special_case_minus_1 =
1361 is_signed && div_or_rem == DivOrRem::kRem;
1362 DCHECK_EQ(needs_unrepresentable_check, trap_div_unrepresentable != nullptr);
1363
1364 // For division, the lhs is always taken from {edx:eax}. Thus, make sure that
1365 // these registers are unused. If {rhs} is stored in one of them, move it to
1366 // another temporary register.
1367 // Do all this before any branch, such that the code is executed
1368 // unconditionally, as the cache state will also be modified unconditionally.
1369 assm->SpillRegisters(eax, edx);
1370 if (rhs == eax || rhs == edx) {
1371 LiftoffRegList unavailable{eax, edx, lhs};
1372 Register tmp = assm->GetUnusedRegister(kGpReg, unavailable).gp();
1373 assm->mov(tmp, rhs);
1374 rhs = tmp;
1375 }
1376
1377 // Check for division by zero.
1378 assm->test(rhs, rhs);
1379 assm->j(zero, trap_div_by_zero);
1380
1381 Label done;
1382 if (needs_unrepresentable_check) {
1383 // Check for {kMinInt / -1}. This is unrepresentable.
1384 Label do_div;
1385 assm->cmp(rhs, -1);
1386 assm->j(not_equal, &do_div);
1387 assm->cmp(lhs, kMinInt);
1388 assm->j(equal, trap_div_unrepresentable);
1389 assm->bind(&do_div);
1390 } else if (special_case_minus_1) {
1391 // {lhs % -1} is always 0 (needs to be special cased because {kMinInt / -1}
1392 // cannot be computed).
1393 Label do_rem;
1394 assm->cmp(rhs, -1);
1395 assm->j(not_equal, &do_rem);
1396 assm->xor_(dst, dst);
1397 assm->jmp(&done);
1398 assm->bind(&do_rem);
1399 }
1400
1401 // Now move {lhs} into {eax}, then zero-extend or sign-extend into {edx}, then
1402 // do the division.
1403 if (lhs != eax) assm->mov(eax, lhs);
1404 if (is_signed) {
1405 assm->cdq();
1406 assm->idiv(rhs);
1407 } else {
1408 assm->xor_(edx, edx);
1409 assm->div(rhs);
1410 }
1411
1412 // Move back the result (in {eax} or {edx}) into the {dst} register.
1413 constexpr Register kResultReg = div_or_rem == DivOrRem::kDiv ? eax : edx;
1414 if (dst != kResultReg) assm->mov(dst, kResultReg);
1415 if (special_case_minus_1) assm->bind(&done);
1416 }
1417 } // namespace liftoff
1418
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1419 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1420 Label* trap_div_by_zero,
1421 Label* trap_div_unrepresentable) {
1422 liftoff::EmitInt32DivOrRem<true, liftoff::DivOrRem::kDiv>(
1423 this, dst, lhs, rhs, trap_div_by_zero, trap_div_unrepresentable);
1424 }
1425
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1426 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1427 Label* trap_div_by_zero) {
1428 liftoff::EmitInt32DivOrRem<false, liftoff::DivOrRem::kDiv>(
1429 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1430 }
1431
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1432 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1433 Label* trap_div_by_zero) {
1434 liftoff::EmitInt32DivOrRem<true, liftoff::DivOrRem::kRem>(
1435 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1436 }
1437
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1438 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1439 Label* trap_div_by_zero) {
1440 liftoff::EmitInt32DivOrRem<false, liftoff::DivOrRem::kRem>(
1441 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1442 }
1443
emit_i32_and(Register dst,Register lhs,Register rhs)1444 void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
1445 liftoff::EmitCommutativeBinOp<&Assembler::and_>(this, dst, lhs, rhs);
1446 }
1447
emit_i32_andi(Register dst,Register lhs,int32_t imm)1448 void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
1449 liftoff::EmitCommutativeBinOpImm<&Assembler::and_>(this, dst, lhs, imm);
1450 }
1451
emit_i32_or(Register dst,Register lhs,Register rhs)1452 void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
1453 liftoff::EmitCommutativeBinOp<&Assembler::or_>(this, dst, lhs, rhs);
1454 }
1455
emit_i32_ori(Register dst,Register lhs,int32_t imm)1456 void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
1457 liftoff::EmitCommutativeBinOpImm<&Assembler::or_>(this, dst, lhs, imm);
1458 }
1459
emit_i32_xor(Register dst,Register lhs,Register rhs)1460 void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
1461 liftoff::EmitCommutativeBinOp<&Assembler::xor_>(this, dst, lhs, rhs);
1462 }
1463
emit_i32_xori(Register dst,Register lhs,int32_t imm)1464 void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
1465 liftoff::EmitCommutativeBinOpImm<&Assembler::xor_>(this, dst, lhs, imm);
1466 }
1467
1468 namespace liftoff {
EmitShiftOperation(LiftoffAssembler * assm,Register dst,Register src,Register amount,void (Assembler::* emit_shift)(Register))1469 inline void EmitShiftOperation(LiftoffAssembler* assm, Register dst,
1470 Register src, Register amount,
1471 void (Assembler::*emit_shift)(Register)) {
1472 LiftoffRegList pinned = {dst, src, amount};
1473 // If dst is ecx, compute into a tmp register first, then move to ecx.
1474 if (dst == ecx) {
1475 Register tmp = assm->GetUnusedRegister(kGpReg, pinned).gp();
1476 assm->mov(tmp, src);
1477 if (amount != ecx) assm->mov(ecx, amount);
1478 (assm->*emit_shift)(tmp);
1479 assm->mov(ecx, tmp);
1480 return;
1481 }
1482
1483 // Move amount into ecx. If ecx is in use, move its content to a tmp register
1484 // first. If src is ecx, src is now the tmp register.
1485 Register tmp_reg = no_reg;
1486 if (amount != ecx) {
1487 if (assm->cache_state()->is_used(LiftoffRegister(ecx)) ||
1488 pinned.has(LiftoffRegister(ecx))) {
1489 tmp_reg = assm->GetUnusedRegister(kGpReg, pinned).gp();
1490 assm->mov(tmp_reg, ecx);
1491 if (src == ecx) src = tmp_reg;
1492 }
1493 assm->mov(ecx, amount);
1494 }
1495
1496 // Do the actual shift.
1497 if (dst != src) assm->mov(dst, src);
1498 (assm->*emit_shift)(dst);
1499
1500 // Restore ecx if needed.
1501 if (tmp_reg.is_valid()) assm->mov(ecx, tmp_reg);
1502 }
1503 } // namespace liftoff
1504
emit_i32_shl(Register dst,Register src,Register amount)1505 void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
1506 Register amount) {
1507 liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::shl_cl);
1508 }
1509
emit_i32_shli(Register dst,Register src,int32_t amount)1510 void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
1511 int32_t amount) {
1512 if (dst != src) mov(dst, src);
1513 shl(dst, amount & 31);
1514 }
1515
emit_i32_sar(Register dst,Register src,Register amount)1516 void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
1517 Register amount) {
1518 liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::sar_cl);
1519 }
1520
emit_i32_sari(Register dst,Register src,int32_t amount)1521 void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
1522 int32_t amount) {
1523 if (dst != src) mov(dst, src);
1524 sar(dst, amount & 31);
1525 }
1526
emit_i32_shr(Register dst,Register src,Register amount)1527 void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
1528 Register amount) {
1529 liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::shr_cl);
1530 }
1531
emit_i32_shri(Register dst,Register src,int32_t amount)1532 void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
1533 int32_t amount) {
1534 if (dst != src) mov(dst, src);
1535 shr(dst, amount & 31);
1536 }
1537
emit_i32_clz(Register dst,Register src)1538 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1539 Lzcnt(dst, src);
1540 }
1541
emit_i32_ctz(Register dst,Register src)1542 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1543 Tzcnt(dst, src);
1544 }
1545
emit_i32_popcnt(Register dst,Register src)1546 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1547 if (!CpuFeatures::IsSupported(POPCNT)) return false;
1548 CpuFeatureScope scope(this, POPCNT);
1549 popcnt(dst, src);
1550 return true;
1551 }
1552
1553 namespace liftoff {
1554 template <void (Assembler::*op)(Register, Register),
1555 void (Assembler::*op_with_carry)(Register, Register)>
OpWithCarry(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1556 inline void OpWithCarry(LiftoffAssembler* assm, LiftoffRegister dst,
1557 LiftoffRegister lhs, LiftoffRegister rhs) {
1558 // First, compute the low half of the result, potentially into a temporary dst
1559 // register if {dst.low_gp()} equals {rhs.low_gp()} or any register we need to
1560 // keep alive for computing the upper half.
1561 LiftoffRegList keep_alive{lhs.high_gp(), rhs};
1562 Register dst_low = keep_alive.has(dst.low_gp())
1563 ? assm->GetUnusedRegister(kGpReg, keep_alive).gp()
1564 : dst.low_gp();
1565
1566 if (dst_low != lhs.low_gp()) assm->mov(dst_low, lhs.low_gp());
1567 (assm->*op)(dst_low, rhs.low_gp());
1568
1569 // Now compute the upper half, while keeping alive the previous result.
1570 keep_alive = LiftoffRegList{dst_low, rhs.high_gp()};
1571 Register dst_high = keep_alive.has(dst.high_gp())
1572 ? assm->GetUnusedRegister(kGpReg, keep_alive).gp()
1573 : dst.high_gp();
1574
1575 if (dst_high != lhs.high_gp()) assm->mov(dst_high, lhs.high_gp());
1576 (assm->*op_with_carry)(dst_high, rhs.high_gp());
1577
1578 // If necessary, move result into the right registers.
1579 LiftoffRegister tmp_result = LiftoffRegister::ForPair(dst_low, dst_high);
1580 if (tmp_result != dst) assm->Move(dst, tmp_result, kI64);
1581 }
1582
1583 template <void (Assembler::*op)(Register, const Immediate&),
1584 void (Assembler::*op_with_carry)(Register, int32_t)>
OpWithCarryI(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1585 inline void OpWithCarryI(LiftoffAssembler* assm, LiftoffRegister dst,
1586 LiftoffRegister lhs, int64_t imm) {
1587 // The compiler allocated registers such that either {dst == lhs} or there is
1588 // no overlap between the two.
1589 DCHECK_NE(dst.low_gp(), lhs.high_gp());
1590
1591 int32_t imm_low_word = static_cast<int32_t>(imm);
1592 int32_t imm_high_word = static_cast<int32_t>(imm >> 32);
1593
1594 // First, compute the low half of the result.
1595 if (dst.low_gp() != lhs.low_gp()) assm->mov(dst.low_gp(), lhs.low_gp());
1596 (assm->*op)(dst.low_gp(), Immediate(imm_low_word));
1597
1598 // Now compute the upper half.
1599 if (dst.high_gp() != lhs.high_gp()) assm->mov(dst.high_gp(), lhs.high_gp());
1600 (assm->*op_with_carry)(dst.high_gp(), imm_high_word);
1601 }
1602 } // namespace liftoff
1603
emit_i64_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1604 void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1605 LiftoffRegister rhs) {
1606 liftoff::OpWithCarry<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs);
1607 }
1608
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1609 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1610 int64_t imm) {
1611 liftoff::OpWithCarryI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm);
1612 }
1613
emit_i64_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1614 void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1615 LiftoffRegister rhs) {
1616 liftoff::OpWithCarry<&Assembler::sub, &Assembler::sbb>(this, dst, lhs, rhs);
1617 }
1618
emit_i64_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1619 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1620 LiftoffRegister rhs) {
1621 // Idea:
1622 // [ lhs_hi | lhs_lo ] * [ rhs_hi | rhs_lo ]
1623 // = [ lhs_hi * rhs_lo | ] (32 bit mul, shift 32)
1624 // + [ lhs_lo * rhs_hi | ] (32 bit mul, shift 32)
1625 // + [ lhs_lo * rhs_lo ] (32x32->64 mul, shift 0)
1626
1627 // For simplicity, we move lhs and rhs into fixed registers.
1628 Register dst_hi = edx;
1629 Register dst_lo = eax;
1630 Register lhs_hi = ecx;
1631 Register lhs_lo = dst_lo;
1632 Register rhs_hi = dst_hi;
1633 Register rhs_lo = esi;
1634
1635 // Spill all these registers if they are still holding other values.
1636 SpillRegisters(dst_hi, dst_lo, lhs_hi, rhs_lo);
1637
1638 // Move lhs and rhs into the respective registers.
1639 ParallelRegisterMove({{LiftoffRegister::ForPair(lhs_lo, lhs_hi), lhs, kI64},
1640 {LiftoffRegister::ForPair(rhs_lo, rhs_hi), rhs, kI64}});
1641
1642 // First mul: lhs_hi' = lhs_hi * rhs_lo.
1643 imul(lhs_hi, rhs_lo);
1644 // Second mul: rhi_hi' = rhs_hi * lhs_lo.
1645 imul(rhs_hi, lhs_lo);
1646 // Add them: lhs_hi'' = lhs_hi' + rhs_hi' = lhs_hi * rhs_lo + rhs_hi * lhs_lo.
1647 add(lhs_hi, rhs_hi);
1648 // Third mul: edx:eax (dst_hi:dst_lo) = eax * esi (lhs_lo * rhs_lo).
1649 mul(rhs_lo);
1650 // Add lhs_hi'' to dst_hi.
1651 add(dst_hi, lhs_hi);
1652
1653 // Finally, move back the temporary result to the actual dst register pair.
1654 LiftoffRegister dst_tmp = LiftoffRegister::ForPair(dst_lo, dst_hi);
1655 if (dst != dst_tmp) Move(dst, dst_tmp, kI64);
1656 }
1657
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1658 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1659 LiftoffRegister rhs,
1660 Label* trap_div_by_zero,
1661 Label* trap_div_unrepresentable) {
1662 return false;
1663 }
1664
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1665 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1666 LiftoffRegister rhs,
1667 Label* trap_div_by_zero) {
1668 return false;
1669 }
1670
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1671 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1672 LiftoffRegister rhs,
1673 Label* trap_div_by_zero) {
1674 return false;
1675 }
1676
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1677 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1678 LiftoffRegister rhs,
1679 Label* trap_div_by_zero) {
1680 return false;
1681 }
1682
1683 namespace liftoff {
PairContains(LiftoffRegister pair,Register reg)1684 inline bool PairContains(LiftoffRegister pair, Register reg) {
1685 return pair.low_gp() == reg || pair.high_gp() == reg;
1686 }
1687
ReplaceInPair(LiftoffRegister pair,Register old_reg,Register new_reg)1688 inline LiftoffRegister ReplaceInPair(LiftoffRegister pair, Register old_reg,
1689 Register new_reg) {
1690 if (pair.low_gp() == old_reg) {
1691 return LiftoffRegister::ForPair(new_reg, pair.high_gp());
1692 }
1693 if (pair.high_gp() == old_reg) {
1694 return LiftoffRegister::ForPair(pair.low_gp(), new_reg);
1695 }
1696 return pair;
1697 }
1698
Emit64BitShiftOperation(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src,Register amount,void (TurboAssembler::* emit_shift)(Register,Register))1699 inline void Emit64BitShiftOperation(
1700 LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src,
1701 Register amount, void (TurboAssembler::*emit_shift)(Register, Register)) {
1702 // Temporary registers cannot overlap with {dst}.
1703 LiftoffRegList pinned = {dst};
1704
1705 constexpr size_t kMaxRegMoves = 3;
1706 base::SmallVector<LiftoffAssembler::ParallelRegisterMoveTuple, kMaxRegMoves>
1707 reg_moves;
1708
1709 // If {dst} contains {ecx}, replace it by an unused register, which is then
1710 // moved to {ecx} in the end.
1711 Register ecx_replace = no_reg;
1712 if (PairContains(dst, ecx)) {
1713 ecx_replace = assm->GetUnusedRegister(kGpReg, pinned).gp();
1714 dst = ReplaceInPair(dst, ecx, ecx_replace);
1715 // If {amount} needs to be moved to {ecx}, but {ecx} is in use (and not part
1716 // of {dst}, hence overwritten anyway), move {ecx} to a tmp register and
1717 // restore it at the end.
1718 } else if (amount != ecx &&
1719 (assm->cache_state()->is_used(LiftoffRegister(ecx)) ||
1720 pinned.has(LiftoffRegister(ecx)))) {
1721 ecx_replace = assm->GetUnusedRegister(kGpReg, pinned).gp();
1722 reg_moves.emplace_back(ecx_replace, ecx, kI32);
1723 }
1724
1725 reg_moves.emplace_back(dst, src, kI64);
1726 reg_moves.emplace_back(ecx, amount, kI32);
1727 assm->ParallelRegisterMove(base::VectorOf(reg_moves));
1728
1729 // Do the actual shift.
1730 (assm->*emit_shift)(dst.high_gp(), dst.low_gp());
1731
1732 // Restore {ecx} if needed.
1733 if (ecx_replace != no_reg) assm->mov(ecx, ecx_replace);
1734 }
1735 } // namespace liftoff
1736
emit_i64_shl(LiftoffRegister dst,LiftoffRegister src,Register amount)1737 void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1738 Register amount) {
1739 liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1740 &TurboAssembler::ShlPair_cl);
1741 }
1742
emit_i64_shli(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1743 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1744 int32_t amount) {
1745 amount &= 63;
1746 if (amount >= 32) {
1747 if (dst.high_gp() != src.low_gp()) mov(dst.high_gp(), src.low_gp());
1748 if (amount != 32) shl(dst.high_gp(), amount - 32);
1749 xor_(dst.low_gp(), dst.low_gp());
1750 } else {
1751 if (dst != src) Move(dst, src, kI64);
1752 ShlPair(dst.high_gp(), dst.low_gp(), amount);
1753 }
1754 }
1755
emit_i64_sar(LiftoffRegister dst,LiftoffRegister src,Register amount)1756 void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1757 Register amount) {
1758 liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1759 &TurboAssembler::SarPair_cl);
1760 }
1761
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1762 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1763 int32_t amount) {
1764 amount &= 63;
1765 if (amount >= 32) {
1766 if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
1767 if (dst.high_gp() != src.high_gp()) mov(dst.high_gp(), src.high_gp());
1768 if (amount != 32) sar(dst.low_gp(), amount - 32);
1769 sar(dst.high_gp(), 31);
1770 } else {
1771 if (dst != src) Move(dst, src, kI64);
1772 SarPair(dst.high_gp(), dst.low_gp(), amount);
1773 }
1774 }
emit_i64_shr(LiftoffRegister dst,LiftoffRegister src,Register amount)1775 void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1776 Register amount) {
1777 liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1778 &TurboAssembler::ShrPair_cl);
1779 }
1780
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1781 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1782 int32_t amount) {
1783 amount &= 63;
1784 if (amount >= 32) {
1785 if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
1786 if (amount != 32) shr(dst.low_gp(), amount - 32);
1787 xor_(dst.high_gp(), dst.high_gp());
1788 } else {
1789 if (dst != src) Move(dst, src, kI64);
1790 ShrPair(dst.high_gp(), dst.low_gp(), amount);
1791 }
1792 }
1793
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1794 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1795 // return high == 0 ? 32 + CLZ32(low) : CLZ32(high);
1796 Label done;
1797 Register safe_dst = dst.low_gp();
1798 if (src.low_gp() == safe_dst) safe_dst = dst.high_gp();
1799 if (CpuFeatures::IsSupported(LZCNT)) {
1800 CpuFeatureScope scope(this, LZCNT);
1801 lzcnt(safe_dst, src.high_gp()); // Sets CF if high == 0.
1802 j(not_carry, &done, Label::kNear);
1803 lzcnt(safe_dst, src.low_gp());
1804 add(safe_dst, Immediate(32)); // 32 + CLZ32(low)
1805 } else {
1806 // CLZ32(x) =^ x == 0 ? 32 : 31 - BSR32(x)
1807 Label high_is_zero;
1808 bsr(safe_dst, src.high_gp()); // Sets ZF is high == 0.
1809 j(zero, &high_is_zero, Label::kNear);
1810 xor_(safe_dst, Immediate(31)); // for x in [0..31], 31^x == 31-x.
1811 jmp(&done, Label::kNear);
1812
1813 bind(&high_is_zero);
1814 Label low_not_zero;
1815 bsr(safe_dst, src.low_gp());
1816 j(not_zero, &low_not_zero, Label::kNear);
1817 mov(safe_dst, Immediate(64 ^ 63)); // 64, after the xor below.
1818 bind(&low_not_zero);
1819 xor_(safe_dst, 63); // for x in [0..31], 63^x == 63-x.
1820 }
1821
1822 bind(&done);
1823 if (safe_dst != dst.low_gp()) mov(dst.low_gp(), safe_dst);
1824 xor_(dst.high_gp(), dst.high_gp()); // High word of result is always 0.
1825 }
1826
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1827 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1828 // return low == 0 ? 32 + CTZ32(high) : CTZ32(low);
1829 Label done;
1830 Register safe_dst = dst.low_gp();
1831 if (src.high_gp() == safe_dst) safe_dst = dst.high_gp();
1832 if (CpuFeatures::IsSupported(BMI1)) {
1833 CpuFeatureScope scope(this, BMI1);
1834 tzcnt(safe_dst, src.low_gp()); // Sets CF if low == 0.
1835 j(not_carry, &done, Label::kNear);
1836 tzcnt(safe_dst, src.high_gp());
1837 add(safe_dst, Immediate(32)); // 32 + CTZ32(high)
1838 } else {
1839 // CTZ32(x) =^ x == 0 ? 32 : BSF32(x)
1840 bsf(safe_dst, src.low_gp()); // Sets ZF is low == 0.
1841 j(not_zero, &done, Label::kNear);
1842
1843 Label high_not_zero;
1844 bsf(safe_dst, src.high_gp());
1845 j(not_zero, &high_not_zero, Label::kNear);
1846 mov(safe_dst, 64); // low == 0 and high == 0
1847 jmp(&done);
1848 bind(&high_not_zero);
1849 add(safe_dst, Immediate(32)); // 32 + CTZ32(high)
1850 }
1851
1852 bind(&done);
1853 if (safe_dst != dst.low_gp()) mov(dst.low_gp(), safe_dst);
1854 xor_(dst.high_gp(), dst.high_gp()); // High word of result is always 0.
1855 }
1856
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1857 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1858 LiftoffRegister src) {
1859 if (!CpuFeatures::IsSupported(POPCNT)) return false;
1860 CpuFeatureScope scope(this, POPCNT);
1861 // Produce partial popcnts in the two dst registers.
1862 Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
1863 Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
1864 popcnt(dst.low_gp(), src1);
1865 popcnt(dst.high_gp(), src2);
1866 // Add the two into the lower dst reg, clear the higher dst reg.
1867 add(dst.low_gp(), dst.high_gp());
1868 xor_(dst.high_gp(), dst.high_gp());
1869 return true;
1870 }
1871
IncrementSmi(LiftoffRegister dst,int offset)1872 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1873 add(Operand(dst.gp(), offset), Immediate(Smi::FromInt(1)));
1874 }
1875
emit_f32_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1876 void LiftoffAssembler::emit_f32_add(DoubleRegister dst, DoubleRegister lhs,
1877 DoubleRegister rhs) {
1878 if (CpuFeatures::IsSupported(AVX)) {
1879 CpuFeatureScope scope(this, AVX);
1880 vaddss(dst, lhs, rhs);
1881 } else if (dst == rhs) {
1882 addss(dst, lhs);
1883 } else {
1884 if (dst != lhs) movss(dst, lhs);
1885 addss(dst, rhs);
1886 }
1887 }
1888
emit_f32_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1889 void LiftoffAssembler::emit_f32_sub(DoubleRegister dst, DoubleRegister lhs,
1890 DoubleRegister rhs) {
1891 if (CpuFeatures::IsSupported(AVX)) {
1892 CpuFeatureScope scope(this, AVX);
1893 vsubss(dst, lhs, rhs);
1894 } else if (dst == rhs) {
1895 movss(liftoff::kScratchDoubleReg, rhs);
1896 movss(dst, lhs);
1897 subss(dst, liftoff::kScratchDoubleReg);
1898 } else {
1899 if (dst != lhs) movss(dst, lhs);
1900 subss(dst, rhs);
1901 }
1902 }
1903
emit_f32_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1904 void LiftoffAssembler::emit_f32_mul(DoubleRegister dst, DoubleRegister lhs,
1905 DoubleRegister rhs) {
1906 if (CpuFeatures::IsSupported(AVX)) {
1907 CpuFeatureScope scope(this, AVX);
1908 vmulss(dst, lhs, rhs);
1909 } else if (dst == rhs) {
1910 mulss(dst, lhs);
1911 } else {
1912 if (dst != lhs) movss(dst, lhs);
1913 mulss(dst, rhs);
1914 }
1915 }
1916
emit_f32_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1917 void LiftoffAssembler::emit_f32_div(DoubleRegister dst, DoubleRegister lhs,
1918 DoubleRegister rhs) {
1919 if (CpuFeatures::IsSupported(AVX)) {
1920 CpuFeatureScope scope(this, AVX);
1921 vdivss(dst, lhs, rhs);
1922 } else if (dst == rhs) {
1923 movss(liftoff::kScratchDoubleReg, rhs);
1924 movss(dst, lhs);
1925 divss(dst, liftoff::kScratchDoubleReg);
1926 } else {
1927 if (dst != lhs) movss(dst, lhs);
1928 divss(dst, rhs);
1929 }
1930 }
1931
1932 namespace liftoff {
1933 enum class MinOrMax : uint8_t { kMin, kMax };
1934 template <typename type>
EmitFloatMinOrMax(LiftoffAssembler * assm,DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs,MinOrMax min_or_max)1935 inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst,
1936 DoubleRegister lhs, DoubleRegister rhs,
1937 MinOrMax min_or_max) {
1938 Label is_nan;
1939 Label lhs_below_rhs;
1940 Label lhs_above_rhs;
1941 Label done;
1942
1943 // We need one tmp register to extract the sign bit. Get it right at the
1944 // beginning, such that the spilling code is not accidentially jumped over.
1945 Register tmp = assm->GetUnusedRegister(kGpReg, {}).gp();
1946
1947 #define dop(name, ...) \
1948 do { \
1949 if (sizeof(type) == 4) { \
1950 assm->name##s(__VA_ARGS__); \
1951 } else { \
1952 assm->name##d(__VA_ARGS__); \
1953 } \
1954 } while (false)
1955
1956 // Check the easy cases first: nan (e.g. unordered), smaller and greater.
1957 // NaN has to be checked first, because PF=1 implies CF=1.
1958 dop(ucomis, lhs, rhs);
1959 assm->j(parity_even, &is_nan, Label::kNear); // PF=1
1960 assm->j(below, &lhs_below_rhs, Label::kNear); // CF=1
1961 assm->j(above, &lhs_above_rhs, Label::kNear); // CF=0 && ZF=0
1962
1963 // If we get here, then either
1964 // a) {lhs == rhs},
1965 // b) {lhs == -0.0} and {rhs == 0.0}, or
1966 // c) {lhs == 0.0} and {rhs == -0.0}.
1967 // For a), it does not matter whether we return {lhs} or {rhs}. Check the sign
1968 // bit of {rhs} to differentiate b) and c).
1969 dop(movmskp, tmp, rhs);
1970 assm->test(tmp, Immediate(1));
1971 assm->j(zero, &lhs_below_rhs, Label::kNear);
1972 assm->jmp(&lhs_above_rhs, Label::kNear);
1973
1974 assm->bind(&is_nan);
1975 // Create a NaN output.
1976 dop(xorp, dst, dst);
1977 dop(divs, dst, dst);
1978 assm->jmp(&done, Label::kNear);
1979
1980 assm->bind(&lhs_below_rhs);
1981 DoubleRegister lhs_below_rhs_src = min_or_max == MinOrMax::kMin ? lhs : rhs;
1982 if (dst != lhs_below_rhs_src) dop(movs, dst, lhs_below_rhs_src);
1983 assm->jmp(&done, Label::kNear);
1984
1985 assm->bind(&lhs_above_rhs);
1986 DoubleRegister lhs_above_rhs_src = min_or_max == MinOrMax::kMin ? rhs : lhs;
1987 if (dst != lhs_above_rhs_src) dop(movs, dst, lhs_above_rhs_src);
1988
1989 assm->bind(&done);
1990 }
1991 } // namespace liftoff
1992
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1993 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1994 DoubleRegister rhs) {
1995 liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1996 liftoff::MinOrMax::kMin);
1997 }
1998
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1999 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
2000 DoubleRegister rhs) {
2001 liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
2002 liftoff::MinOrMax::kMax);
2003 }
2004
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2005 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
2006 DoubleRegister rhs) {
2007 static constexpr int kF32SignBit = 1 << 31;
2008 LiftoffRegList pinned;
2009 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
2010 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
2011 Movd(scratch, lhs); // move {lhs} into {scratch}.
2012 and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}.
2013 Movd(scratch2, rhs); // move {rhs} into {scratch2}.
2014 and_(scratch2, Immediate(kF32SignBit)); // isolate sign bit in {scratch2}.
2015 or_(scratch, scratch2); // combine {scratch2} into {scratch}.
2016 Movd(dst, scratch); // move result into {dst}.
2017 }
2018
emit_f32_abs(DoubleRegister dst,DoubleRegister src)2019 void LiftoffAssembler::emit_f32_abs(DoubleRegister dst, DoubleRegister src) {
2020 static constexpr uint32_t kSignBit = uint32_t{1} << 31;
2021 if (dst == src) {
2022 TurboAssembler::Move(liftoff::kScratchDoubleReg, kSignBit - 1);
2023 Andps(dst, liftoff::kScratchDoubleReg);
2024 } else {
2025 TurboAssembler::Move(dst, kSignBit - 1);
2026 Andps(dst, src);
2027 }
2028 }
2029
emit_f32_neg(DoubleRegister dst,DoubleRegister src)2030 void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
2031 static constexpr uint32_t kSignBit = uint32_t{1} << 31;
2032 if (dst == src) {
2033 TurboAssembler::Move(liftoff::kScratchDoubleReg, kSignBit);
2034 Xorps(dst, liftoff::kScratchDoubleReg);
2035 } else {
2036 TurboAssembler::Move(dst, kSignBit);
2037 Xorps(dst, src);
2038 }
2039 }
2040
emit_f32_ceil(DoubleRegister dst,DoubleRegister src)2041 bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
2042 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2043 roundss(dst, src, kRoundUp);
2044 return true;
2045 }
2046
emit_f32_floor(DoubleRegister dst,DoubleRegister src)2047 bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
2048 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2049 roundss(dst, src, kRoundDown);
2050 return true;
2051 }
2052
emit_f32_trunc(DoubleRegister dst,DoubleRegister src)2053 bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
2054 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2055 roundss(dst, src, kRoundToZero);
2056 return true;
2057 }
2058
emit_f32_nearest_int(DoubleRegister dst,DoubleRegister src)2059 bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
2060 DoubleRegister src) {
2061 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2062 roundss(dst, src, kRoundToNearest);
2063 return true;
2064 }
2065
emit_f32_sqrt(DoubleRegister dst,DoubleRegister src)2066 void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
2067 Sqrtss(dst, src);
2068 }
2069
emit_f64_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2070 void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
2071 DoubleRegister rhs) {
2072 if (CpuFeatures::IsSupported(AVX)) {
2073 CpuFeatureScope scope(this, AVX);
2074 vaddsd(dst, lhs, rhs);
2075 } else if (dst == rhs) {
2076 addsd(dst, lhs);
2077 } else {
2078 if (dst != lhs) movsd(dst, lhs);
2079 addsd(dst, rhs);
2080 }
2081 }
2082
emit_f64_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2083 void LiftoffAssembler::emit_f64_sub(DoubleRegister dst, DoubleRegister lhs,
2084 DoubleRegister rhs) {
2085 if (CpuFeatures::IsSupported(AVX)) {
2086 CpuFeatureScope scope(this, AVX);
2087 vsubsd(dst, lhs, rhs);
2088 } else if (dst == rhs) {
2089 movsd(liftoff::kScratchDoubleReg, rhs);
2090 movsd(dst, lhs);
2091 subsd(dst, liftoff::kScratchDoubleReg);
2092 } else {
2093 if (dst != lhs) movsd(dst, lhs);
2094 subsd(dst, rhs);
2095 }
2096 }
2097
emit_f64_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2098 void LiftoffAssembler::emit_f64_mul(DoubleRegister dst, DoubleRegister lhs,
2099 DoubleRegister rhs) {
2100 if (CpuFeatures::IsSupported(AVX)) {
2101 CpuFeatureScope scope(this, AVX);
2102 vmulsd(dst, lhs, rhs);
2103 } else if (dst == rhs) {
2104 mulsd(dst, lhs);
2105 } else {
2106 if (dst != lhs) movsd(dst, lhs);
2107 mulsd(dst, rhs);
2108 }
2109 }
2110
emit_f64_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2111 void LiftoffAssembler::emit_f64_div(DoubleRegister dst, DoubleRegister lhs,
2112 DoubleRegister rhs) {
2113 if (CpuFeatures::IsSupported(AVX)) {
2114 CpuFeatureScope scope(this, AVX);
2115 vdivsd(dst, lhs, rhs);
2116 } else if (dst == rhs) {
2117 movsd(liftoff::kScratchDoubleReg, rhs);
2118 movsd(dst, lhs);
2119 divsd(dst, liftoff::kScratchDoubleReg);
2120 } else {
2121 if (dst != lhs) movsd(dst, lhs);
2122 divsd(dst, rhs);
2123 }
2124 }
2125
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2126 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
2127 DoubleRegister rhs) {
2128 liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
2129 liftoff::MinOrMax::kMin);
2130 }
2131
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2132 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
2133 DoubleRegister rhs) {
2134 static constexpr int kF32SignBit = 1 << 31;
2135 // On ia32, we cannot hold the whole f64 value in a gp register, so we just
2136 // operate on the upper half (UH).
2137 LiftoffRegList pinned;
2138 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
2139 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
2140
2141 Pextrd(scratch, lhs, 1); // move UH of {lhs} into {scratch}.
2142 and_(scratch, Immediate(~kF32SignBit)); // clear sign bit in {scratch}.
2143 Pextrd(scratch2, rhs, 1); // move UH of {rhs} into {scratch2}.
2144 and_(scratch2, Immediate(kF32SignBit)); // isolate sign bit in {scratch2}.
2145 or_(scratch, scratch2); // combine {scratch2} into {scratch}.
2146 movsd(dst, lhs); // move {lhs} into {dst}.
2147 Pinsrd(dst, scratch, 1); // insert {scratch} into UH of {dst}.
2148 }
2149
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2150 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
2151 DoubleRegister rhs) {
2152 liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
2153 liftoff::MinOrMax::kMax);
2154 }
2155
emit_f64_abs(DoubleRegister dst,DoubleRegister src)2156 void LiftoffAssembler::emit_f64_abs(DoubleRegister dst, DoubleRegister src) {
2157 static constexpr uint64_t kSignBit = uint64_t{1} << 63;
2158 if (dst == src) {
2159 TurboAssembler::Move(liftoff::kScratchDoubleReg, kSignBit - 1);
2160 Andpd(dst, liftoff::kScratchDoubleReg);
2161 } else {
2162 TurboAssembler::Move(dst, kSignBit - 1);
2163 Andpd(dst, src);
2164 }
2165 }
2166
emit_f64_neg(DoubleRegister dst,DoubleRegister src)2167 void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
2168 static constexpr uint64_t kSignBit = uint64_t{1} << 63;
2169 if (dst == src) {
2170 TurboAssembler::Move(liftoff::kScratchDoubleReg, kSignBit);
2171 Xorpd(dst, liftoff::kScratchDoubleReg);
2172 } else {
2173 TurboAssembler::Move(dst, kSignBit);
2174 Xorpd(dst, src);
2175 }
2176 }
2177
emit_f64_ceil(DoubleRegister dst,DoubleRegister src)2178 bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
2179 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2180 roundsd(dst, src, kRoundUp);
2181 return true;
2182 }
2183
emit_f64_floor(DoubleRegister dst,DoubleRegister src)2184 bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
2185 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2186 roundsd(dst, src, kRoundDown);
2187 return true;
2188 }
2189
emit_f64_trunc(DoubleRegister dst,DoubleRegister src)2190 bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
2191 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2192 roundsd(dst, src, kRoundToZero);
2193 return true;
2194 }
2195
emit_f64_nearest_int(DoubleRegister dst,DoubleRegister src)2196 bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
2197 DoubleRegister src) {
2198 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2199 roundsd(dst, src, kRoundToNearest);
2200 return true;
2201 }
2202
emit_f64_sqrt(DoubleRegister dst,DoubleRegister src)2203 void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
2204 Sqrtsd(dst, src);
2205 }
2206
2207 namespace liftoff {
2208 #define __ assm->
2209 // Used for float to int conversions. If the value in {converted_back} equals
2210 // {src} afterwards, the conversion succeeded.
2211 template <typename dst_type, typename src_type>
ConvertFloatToIntAndBack(LiftoffAssembler * assm,Register dst,DoubleRegister src,DoubleRegister converted_back,LiftoffRegList pinned)2212 inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst,
2213 DoubleRegister src,
2214 DoubleRegister converted_back,
2215 LiftoffRegList pinned) {
2216 if (std::is_same<double, src_type>::value) { // f64
2217 if (std::is_signed<dst_type>::value) { // f64 -> i32
2218 __ cvttsd2si(dst, src);
2219 __ Cvtsi2sd(converted_back, dst);
2220 } else { // f64 -> u32
2221 __ Cvttsd2ui(dst, src, liftoff::kScratchDoubleReg);
2222 __ Cvtui2sd(converted_back, dst,
2223 __ GetUnusedRegister(kGpReg, pinned).gp());
2224 }
2225 } else { // f32
2226 if (std::is_signed<dst_type>::value) { // f32 -> i32
2227 __ cvttss2si(dst, src);
2228 __ Cvtsi2ss(converted_back, dst);
2229 } else { // f32 -> u32
2230 __ Cvttss2ui(dst, src, liftoff::kScratchDoubleReg);
2231 __ Cvtui2ss(converted_back, dst,
2232 __ GetUnusedRegister(kGpReg, pinned).gp());
2233 }
2234 }
2235 }
2236
2237 template <typename dst_type, typename src_type>
EmitTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src,Label * trap)2238 inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
2239 DoubleRegister src, Label* trap) {
2240 if (!CpuFeatures::IsSupported(SSE4_1)) {
2241 __ bailout(kMissingCPUFeature, "no SSE4.1");
2242 return true;
2243 }
2244 CpuFeatureScope feature(assm, SSE4_1);
2245
2246 LiftoffRegList pinned = {src, dst};
2247 DoubleRegister rounded =
2248 pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2249 DoubleRegister converted_back =
2250 pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2251
2252 if (std::is_same<double, src_type>::value) { // f64
2253 __ roundsd(rounded, src, kRoundToZero);
2254 } else { // f32
2255 __ roundss(rounded, src, kRoundToZero);
2256 }
2257 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
2258 converted_back, pinned);
2259 if (std::is_same<double, src_type>::value) { // f64
2260 __ ucomisd(converted_back, rounded);
2261 } else { // f32
2262 __ ucomiss(converted_back, rounded);
2263 }
2264
2265 // Jump to trap if PF is 0 (one of the operands was NaN) or they are not
2266 // equal.
2267 __ j(parity_even, trap);
2268 __ j(not_equal, trap);
2269 return true;
2270 }
2271
2272 template <typename dst_type, typename src_type>
EmitSatTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src)2273 inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
2274 DoubleRegister src) {
2275 if (!CpuFeatures::IsSupported(SSE4_1)) {
2276 __ bailout(kMissingCPUFeature, "no SSE4.1");
2277 return true;
2278 }
2279 CpuFeatureScope feature(assm, SSE4_1);
2280
2281 Label done;
2282 Label not_nan;
2283 Label src_positive;
2284
2285 LiftoffRegList pinned = {src, dst};
2286 DoubleRegister rounded =
2287 pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2288 DoubleRegister converted_back =
2289 pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2290 DoubleRegister zero_reg =
2291 pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2292
2293 if (std::is_same<double, src_type>::value) { // f64
2294 __ roundsd(rounded, src, kRoundToZero);
2295 } else { // f32
2296 __ roundss(rounded, src, kRoundToZero);
2297 }
2298
2299 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
2300 converted_back, pinned);
2301 if (std::is_same<double, src_type>::value) { // f64
2302 __ ucomisd(converted_back, rounded);
2303 } else { // f32
2304 __ ucomiss(converted_back, rounded);
2305 }
2306
2307 // Return 0 if PF is 0 (one of the operands was NaN)
2308 __ j(parity_odd, ¬_nan);
2309 __ xor_(dst, dst);
2310 __ jmp(&done);
2311
2312 __ bind(¬_nan);
2313 // If rounding is as expected, return result
2314 __ j(equal, &done);
2315
2316 __ Xorpd(zero_reg, zero_reg);
2317
2318 // if out-of-bounds, check if src is positive
2319 if (std::is_same<double, src_type>::value) { // f64
2320 __ ucomisd(src, zero_reg);
2321 } else { // f32
2322 __ ucomiss(src, zero_reg);
2323 }
2324 __ j(above, &src_positive);
2325 __ mov(dst, Immediate(std::numeric_limits<dst_type>::min()));
2326 __ jmp(&done);
2327
2328 __ bind(&src_positive);
2329
2330 __ mov(dst, Immediate(std::numeric_limits<dst_type>::max()));
2331
2332 __ bind(&done);
2333 return true;
2334 }
2335 #undef __
2336 } // namespace liftoff
2337
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)2338 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
2339 LiftoffRegister dst,
2340 LiftoffRegister src, Label* trap) {
2341 switch (opcode) {
2342 case kExprI32ConvertI64:
2343 if (dst.gp() != src.low_gp()) mov(dst.gp(), src.low_gp());
2344 return true;
2345 case kExprI32SConvertF32:
2346 return liftoff::EmitTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2347 src.fp(), trap);
2348 case kExprI32UConvertF32:
2349 return liftoff::EmitTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2350 src.fp(), trap);
2351 case kExprI32SConvertF64:
2352 return liftoff::EmitTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2353 src.fp(), trap);
2354 case kExprI32UConvertF64:
2355 return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
2356 src.fp(), trap);
2357 case kExprI32SConvertSatF32:
2358 return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2359 src.fp());
2360 case kExprI32UConvertSatF32:
2361 return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2362 src.fp());
2363 case kExprI32SConvertSatF64:
2364 return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2365 src.fp());
2366 case kExprI32UConvertSatF64:
2367 return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(
2368 this, dst.gp(), src.fp());
2369 case kExprI32ReinterpretF32:
2370 Movd(dst.gp(), src.fp());
2371 return true;
2372 case kExprI64SConvertI32:
2373 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2374 if (dst.high_gp() != src.gp()) mov(dst.high_gp(), src.gp());
2375 sar(dst.high_gp(), 31);
2376 return true;
2377 case kExprI64UConvertI32:
2378 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2379 xor_(dst.high_gp(), dst.high_gp());
2380 return true;
2381 case kExprI64ReinterpretF64:
2382 // Push src to the stack.
2383 AllocateStackSpace(8);
2384 movsd(Operand(esp, 0), src.fp());
2385 // Pop to dst.
2386 pop(dst.low_gp());
2387 pop(dst.high_gp());
2388 return true;
2389 case kExprF32SConvertI32:
2390 cvtsi2ss(dst.fp(), src.gp());
2391 return true;
2392 case kExprF32UConvertI32: {
2393 LiftoffRegList pinned = {dst, src};
2394 Register scratch = GetUnusedRegister(kGpReg, pinned).gp();
2395 Cvtui2ss(dst.fp(), src.gp(), scratch);
2396 return true;
2397 }
2398 case kExprF32ConvertF64:
2399 cvtsd2ss(dst.fp(), src.fp());
2400 return true;
2401 case kExprF32ReinterpretI32:
2402 Movd(dst.fp(), src.gp());
2403 return true;
2404 case kExprF64SConvertI32:
2405 Cvtsi2sd(dst.fp(), src.gp());
2406 return true;
2407 case kExprF64UConvertI32: {
2408 LiftoffRegList pinned = {dst, src};
2409 Register scratch = GetUnusedRegister(kGpReg, pinned).gp();
2410 Cvtui2sd(dst.fp(), src.gp(), scratch);
2411 return true;
2412 }
2413 case kExprF64ConvertF32:
2414 cvtss2sd(dst.fp(), src.fp());
2415 return true;
2416 case kExprF64ReinterpretI64:
2417 // Push src to the stack.
2418 push(src.high_gp());
2419 push(src.low_gp());
2420 // Pop to dst.
2421 movsd(dst.fp(), Operand(esp, 0));
2422 add(esp, Immediate(8));
2423 return true;
2424 default:
2425 return false;
2426 }
2427 }
2428
emit_i32_signextend_i8(Register dst,Register src)2429 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2430 Register byte_reg = liftoff::GetTmpByteRegister(this, src);
2431 if (byte_reg != src) mov(byte_reg, src);
2432 movsx_b(dst, byte_reg);
2433 }
2434
emit_i32_signextend_i16(Register dst,Register src)2435 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2436 movsx_w(dst, src);
2437 }
2438
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)2439 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2440 LiftoffRegister src) {
2441 Register byte_reg = liftoff::GetTmpByteRegister(this, src.low_gp());
2442 if (byte_reg != src.low_gp()) mov(byte_reg, src.low_gp());
2443 movsx_b(dst.low_gp(), byte_reg);
2444 liftoff::SignExtendI32ToI64(this, dst);
2445 }
2446
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)2447 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2448 LiftoffRegister src) {
2449 movsx_w(dst.low_gp(), src.low_gp());
2450 liftoff::SignExtendI32ToI64(this, dst);
2451 }
2452
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)2453 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2454 LiftoffRegister src) {
2455 if (dst.low_gp() != src.low_gp()) mov(dst.low_gp(), src.low_gp());
2456 liftoff::SignExtendI32ToI64(this, dst);
2457 }
2458
emit_jump(Label * label)2459 void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
2460
emit_jump(Register target)2461 void LiftoffAssembler::emit_jump(Register target) { jmp(target); }
2462
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)2463 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2464 Label* label, ValueKind kind,
2465 Register lhs, Register rhs) {
2466 Condition cond = liftoff::ToCondition(liftoff_cond);
2467 if (rhs != no_reg) {
2468 switch (kind) {
2469 case kRef:
2470 case kOptRef:
2471 case kRtt:
2472 DCHECK(liftoff_cond == kEqual || liftoff_cond == kUnequal);
2473 V8_FALLTHROUGH;
2474 case kI32:
2475 cmp(lhs, rhs);
2476 break;
2477 default:
2478 UNREACHABLE();
2479 }
2480 } else {
2481 DCHECK_EQ(kind, kI32);
2482 test(lhs, lhs);
2483 }
2484
2485 j(cond, label);
2486 }
2487
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int imm)2488 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2489 Label* label, Register lhs,
2490 int imm) {
2491 Condition cond = liftoff::ToCondition(liftoff_cond);
2492 cmp(lhs, Immediate(imm));
2493 j(cond, label);
2494 }
2495
emit_i32_subi_jump_negative(Register value,int subtrahend,Label * result_negative)2496 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
2497 int subtrahend,
2498 Label* result_negative) {
2499 sub(value, Immediate(subtrahend));
2500 j(negative, result_negative);
2501 }
2502
2503 namespace liftoff {
2504
2505 // Setcc into dst register, given a scratch byte register (might be the same as
2506 // dst). Never spills.
setcc_32_no_spill(LiftoffAssembler * assm,Condition cond,Register dst,Register tmp_byte_reg)2507 inline void setcc_32_no_spill(LiftoffAssembler* assm, Condition cond,
2508 Register dst, Register tmp_byte_reg) {
2509 assm->setcc(cond, tmp_byte_reg);
2510 assm->movzx_b(dst, tmp_byte_reg);
2511 }
2512
2513 // Setcc into dst register (no contraints). Might spill.
setcc_32(LiftoffAssembler * assm,Condition cond,Register dst)2514 inline void setcc_32(LiftoffAssembler* assm, Condition cond, Register dst) {
2515 Register tmp_byte_reg = GetTmpByteRegister(assm, dst);
2516 setcc_32_no_spill(assm, cond, dst, tmp_byte_reg);
2517 }
2518
2519 } // namespace liftoff
2520
emit_i32_eqz(Register dst,Register src)2521 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2522 test(src, src);
2523 liftoff::setcc_32(this, equal, dst);
2524 }
2525
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)2526 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2527 Register dst, Register lhs,
2528 Register rhs) {
2529 Condition cond = liftoff::ToCondition(liftoff_cond);
2530 cmp(lhs, rhs);
2531 liftoff::setcc_32(this, cond, dst);
2532 }
2533
emit_i64_eqz(Register dst,LiftoffRegister src)2534 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2535 // Compute the OR of both registers in the src pair, using dst as scratch
2536 // register. Then check whether the result is equal to zero.
2537 if (src.low_gp() == dst) {
2538 or_(dst, src.high_gp());
2539 } else {
2540 if (src.high_gp() != dst) mov(dst, src.high_gp());
2541 or_(dst, src.low_gp());
2542 }
2543 liftoff::setcc_32(this, equal, dst);
2544 }
2545
2546 namespace liftoff {
cond_make_unsigned(LiftoffCondition cond)2547 inline LiftoffCondition cond_make_unsigned(LiftoffCondition cond) {
2548 switch (cond) {
2549 case kSignedLessThan:
2550 return kUnsignedLessThan;
2551 case kSignedLessEqual:
2552 return kUnsignedLessEqual;
2553 case kSignedGreaterThan:
2554 return kUnsignedGreaterThan;
2555 case kSignedGreaterEqual:
2556 return kUnsignedGreaterEqual;
2557 default:
2558 return cond;
2559 }
2560 }
2561 } // namespace liftoff
2562
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)2563 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2564 Register dst, LiftoffRegister lhs,
2565 LiftoffRegister rhs) {
2566 Condition cond = liftoff::ToCondition(liftoff_cond);
2567 Condition unsigned_cond =
2568 liftoff::ToCondition(liftoff::cond_make_unsigned(liftoff_cond));
2569
2570 // Get the tmp byte register out here, such that we don't conditionally spill
2571 // (this cannot be reflected in the cache state).
2572 Register tmp_byte_reg = liftoff::GetTmpByteRegister(this, dst);
2573
2574 // For signed i64 comparisons, we still need to use unsigned comparison for
2575 // the low word (the only bit carrying signedness information is the MSB in
2576 // the high word).
2577 Label setcc;
2578 Label cont;
2579 // Compare high word first. If it differs, use if for the setcc. If it's
2580 // equal, compare the low word and use that for setcc.
2581 cmp(lhs.high_gp(), rhs.high_gp());
2582 j(not_equal, &setcc, Label::kNear);
2583 cmp(lhs.low_gp(), rhs.low_gp());
2584 if (unsigned_cond != cond) {
2585 // If the condition predicate for the low differs from that for the high
2586 // word, emit a separete setcc sequence for the low word.
2587 liftoff::setcc_32_no_spill(this, unsigned_cond, dst, tmp_byte_reg);
2588 jmp(&cont);
2589 }
2590 bind(&setcc);
2591 liftoff::setcc_32_no_spill(this, cond, dst, tmp_byte_reg);
2592 bind(&cont);
2593 }
2594
2595 namespace liftoff {
2596 template <void (Assembler::*cmp_op)(DoubleRegister, DoubleRegister)>
EmitFloatSetCond(LiftoffAssembler * assm,Condition cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2597 void EmitFloatSetCond(LiftoffAssembler* assm, Condition cond, Register dst,
2598 DoubleRegister lhs, DoubleRegister rhs) {
2599 Label cont;
2600 Label not_nan;
2601
2602 // Get the tmp byte register out here, such that we don't conditionally spill
2603 // (this cannot be reflected in the cache state).
2604 Register tmp_byte_reg = GetTmpByteRegister(assm, dst);
2605
2606 (assm->*cmp_op)(lhs, rhs);
2607 // If PF is one, one of the operands was Nan. This needs special handling.
2608 assm->j(parity_odd, ¬_nan, Label::kNear);
2609 // Return 1 for f32.ne, 0 for all other cases.
2610 if (cond == not_equal) {
2611 assm->mov(dst, Immediate(1));
2612 } else {
2613 assm->xor_(dst, dst);
2614 }
2615 assm->jmp(&cont, Label::kNear);
2616 assm->bind(¬_nan);
2617
2618 setcc_32_no_spill(assm, cond, dst, tmp_byte_reg);
2619 assm->bind(&cont);
2620 }
2621 } // namespace liftoff
2622
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2623 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2624 Register dst, DoubleRegister lhs,
2625 DoubleRegister rhs) {
2626 Condition cond = liftoff::ToCondition(liftoff_cond);
2627 liftoff::EmitFloatSetCond<&Assembler::ucomiss>(this, cond, dst, lhs, rhs);
2628 }
2629
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2630 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2631 Register dst, DoubleRegister lhs,
2632 DoubleRegister rhs) {
2633 Condition cond = liftoff::ToCondition(liftoff_cond);
2634 liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs);
2635 }
2636
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)2637 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2638 LiftoffRegister true_value,
2639 LiftoffRegister false_value,
2640 ValueKind kind) {
2641 return false;
2642 }
2643
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)2644 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2645 SmiCheckMode mode) {
2646 test_b(obj, Immediate(kSmiTagMask));
2647 Condition condition = mode == kJumpOnSmi ? zero : not_zero;
2648 j(condition, target);
2649 }
2650
2651 namespace liftoff {
2652 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2653 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2654 void EmitSimdCommutativeBinOp(
2655 LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2656 LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2657 if (CpuFeatures::IsSupported(AVX)) {
2658 CpuFeatureScope scope(assm, AVX);
2659 (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2660 return;
2661 }
2662
2663 base::Optional<CpuFeatureScope> sse_scope;
2664 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2665
2666 if (dst.fp() == rhs.fp()) {
2667 (assm->*sse_op)(dst.fp(), lhs.fp());
2668 } else {
2669 if (dst.fp() != lhs.fp()) (assm->movaps)(dst.fp(), lhs.fp());
2670 (assm->*sse_op)(dst.fp(), rhs.fp());
2671 }
2672 }
2673
2674 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2675 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2676 void EmitSimdNonCommutativeBinOp(
2677 LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2678 LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2679 if (CpuFeatures::IsSupported(AVX)) {
2680 CpuFeatureScope scope(assm, AVX);
2681 (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2682 return;
2683 }
2684
2685 base::Optional<CpuFeatureScope> sse_scope;
2686 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2687
2688 if (dst.fp() == rhs.fp()) {
2689 assm->movaps(kScratchDoubleReg, rhs.fp());
2690 assm->movaps(dst.fp(), lhs.fp());
2691 (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2692 } else {
2693 if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
2694 (assm->*sse_op)(dst.fp(), rhs.fp());
2695 }
2696 }
2697
2698 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2699 void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
EmitSimdShiftOp(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,LiftoffRegister count)2700 void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
2701 LiftoffRegister operand, LiftoffRegister count) {
2702 static constexpr RegClass tmp_rc = reg_class_for(kI32);
2703 LiftoffRegister tmp = assm->GetUnusedRegister(tmp_rc, LiftoffRegList{count});
2704 constexpr int mask = (1 << width) - 1;
2705
2706 assm->mov(tmp.gp(), count.gp());
2707 assm->and_(tmp.gp(), Immediate(mask));
2708 assm->Movd(kScratchDoubleReg, tmp.gp());
2709 if (CpuFeatures::IsSupported(AVX)) {
2710 CpuFeatureScope scope(assm, AVX);
2711 (assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
2712 } else {
2713 if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2714 (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2715 }
2716 }
2717
2718 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
2719 void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
EmitSimdShiftOpImm(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,int32_t count)2720 void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
2721 LiftoffRegister operand, int32_t count) {
2722 constexpr int mask = (1 << width) - 1;
2723 byte shift = static_cast<byte>(count & mask);
2724 if (CpuFeatures::IsSupported(AVX)) {
2725 CpuFeatureScope scope(assm, AVX);
2726 (assm->*avx_op)(dst.fp(), operand.fp(), shift);
2727 } else {
2728 if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2729 (assm->*sse_op)(dst.fp(), shift);
2730 }
2731 }
2732
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)2733 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2734 LiftoffRegister src) {
2735 Register tmp = assm->GetUnusedRegister(kGpReg, LiftoffRegList{dst}).gp();
2736 assm->xor_(tmp, tmp);
2737 assm->mov(dst.gp(), Immediate(1));
2738 assm->Ptest(src.fp(), src.fp());
2739 assm->cmov(zero, dst.gp(), tmp);
2740 }
2741
2742 template <void (SharedTurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
2743 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2744 LiftoffRegister src,
2745 base::Optional<CpuFeature> feature = base::nullopt) {
2746 base::Optional<CpuFeatureScope> sse_scope;
2747 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2748
2749 Register tmp = assm->GetUnusedRegister(kGpReg, LiftoffRegList{dst}).gp();
2750 XMMRegister tmp_simd = liftoff::kScratchDoubleReg;
2751 assm->mov(tmp, Immediate(1));
2752 assm->xor_(dst.gp(), dst.gp());
2753 assm->Pxor(tmp_simd, tmp_simd);
2754 (assm->*pcmp)(tmp_simd, src.fp());
2755 assm->Ptest(tmp_simd, tmp_simd);
2756 assm->cmov(zero, dst.gp(), tmp);
2757 }
2758
2759 } // namespace liftoff
2760
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)2761 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2762 Register offset_reg, uintptr_t offset_imm,
2763 LoadType type,
2764 LoadTransformationKind transform,
2765 uint32_t* protected_load_pc) {
2766 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2767 Operand src_op{src_addr, offset_reg, times_1,
2768 static_cast<int32_t>(offset_imm)};
2769 *protected_load_pc = pc_offset();
2770
2771 MachineType memtype = type.mem_type();
2772 if (transform == LoadTransformationKind::kExtend) {
2773 if (memtype == MachineType::Int8()) {
2774 Pmovsxbw(dst.fp(), src_op);
2775 } else if (memtype == MachineType::Uint8()) {
2776 Pmovzxbw(dst.fp(), src_op);
2777 } else if (memtype == MachineType::Int16()) {
2778 Pmovsxwd(dst.fp(), src_op);
2779 } else if (memtype == MachineType::Uint16()) {
2780 Pmovzxwd(dst.fp(), src_op);
2781 } else if (memtype == MachineType::Int32()) {
2782 Pmovsxdq(dst.fp(), src_op);
2783 } else if (memtype == MachineType::Uint32()) {
2784 Pmovzxdq(dst.fp(), src_op);
2785 }
2786 } else if (transform == LoadTransformationKind::kZeroExtend) {
2787 if (memtype == MachineType::Int32()) {
2788 Movss(dst.fp(), src_op);
2789 } else {
2790 DCHECK_EQ(MachineType::Int64(), memtype);
2791 Movsd(dst.fp(), src_op);
2792 }
2793 } else {
2794 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2795 if (memtype == MachineType::Int8()) {
2796 S128Load8Splat(dst.fp(), src_op, liftoff::kScratchDoubleReg);
2797 } else if (memtype == MachineType::Int16()) {
2798 S128Load16Splat(dst.fp(), src_op, liftoff::kScratchDoubleReg);
2799 } else if (memtype == MachineType::Int32()) {
2800 S128Load32Splat(dst.fp(), src_op);
2801 } else if (memtype == MachineType::Int64()) {
2802 Movddup(dst.fp(), src_op);
2803 }
2804 }
2805 }
2806
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)2807 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2808 Register addr, Register offset_reg,
2809 uintptr_t offset_imm, LoadType type,
2810 uint8_t laneidx, uint32_t* protected_load_pc) {
2811 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2812 Operand src_op{addr, offset_reg, times_1, static_cast<int32_t>(offset_imm)};
2813 *protected_load_pc = pc_offset();
2814
2815 MachineType mem_type = type.mem_type();
2816 if (mem_type == MachineType::Int8()) {
2817 Pinsrb(dst.fp(), src.fp(), src_op, laneidx);
2818 } else if (mem_type == MachineType::Int16()) {
2819 Pinsrw(dst.fp(), src.fp(), src_op, laneidx);
2820 } else if (mem_type == MachineType::Int32()) {
2821 Pinsrd(dst.fp(), src.fp(), src_op, laneidx);
2822 } else {
2823 DCHECK_EQ(MachineType::Int64(), mem_type);
2824 if (laneidx == 0) {
2825 Movlps(dst.fp(), src.fp(), src_op);
2826 } else {
2827 DCHECK_EQ(1, laneidx);
2828 Movhps(dst.fp(), src.fp(), src_op);
2829 }
2830 }
2831 }
2832
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t lane,uint32_t * protected_store_pc)2833 void LiftoffAssembler::StoreLane(Register dst, Register offset,
2834 uintptr_t offset_imm, LiftoffRegister src,
2835 StoreType type, uint8_t lane,
2836 uint32_t* protected_store_pc) {
2837 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2838 Operand dst_op = Operand(dst, offset, times_1, offset_imm);
2839 if (protected_store_pc) *protected_store_pc = pc_offset();
2840
2841 MachineRepresentation rep = type.mem_rep();
2842 if (rep == MachineRepresentation::kWord8) {
2843 Pextrb(dst_op, src.fp(), lane);
2844 } else if (rep == MachineRepresentation::kWord16) {
2845 Pextrw(dst_op, src.fp(), lane);
2846 } else if (rep == MachineRepresentation::kWord32) {
2847 S128Store32Lane(dst_op, src.fp(), lane);
2848 } else {
2849 DCHECK_EQ(MachineRepresentation::kWord64, rep);
2850 S128Store64Lane(dst_op, src.fp(), lane);
2851 }
2852 }
2853
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)2854 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2855 LiftoffRegister lhs,
2856 LiftoffRegister rhs,
2857 const uint8_t shuffle[16],
2858 bool is_swizzle) {
2859 LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
2860 // Prepare 16 byte aligned buffer for shuffle control mask.
2861 mov(tmp.gp(), esp);
2862 and_(esp, -16);
2863
2864 if (is_swizzle) {
2865 uint32_t imms[4];
2866 // Shuffles that use just 1 operand are called swizzles, rhs can be ignored.
2867 wasm::SimdShuffle::Pack16Lanes(imms, shuffle);
2868 for (int i = 3; i >= 0; i--) {
2869 push_imm32(imms[i]);
2870 }
2871 Pshufb(dst.fp(), lhs.fp(), Operand(esp, 0));
2872 mov(esp, tmp.gp());
2873 return;
2874 }
2875
2876 movups(liftoff::kScratchDoubleReg, lhs.fp());
2877 for (int i = 3; i >= 0; i--) {
2878 uint32_t mask = 0;
2879 for (int j = 3; j >= 0; j--) {
2880 uint8_t lane = shuffle[i * 4 + j];
2881 mask <<= 8;
2882 mask |= lane < kSimd128Size ? lane : 0x80;
2883 }
2884 push(Immediate(mask));
2885 }
2886 Pshufb(liftoff::kScratchDoubleReg, lhs.fp(), Operand(esp, 0));
2887
2888 for (int i = 3; i >= 0; i--) {
2889 uint32_t mask = 0;
2890 for (int j = 3; j >= 0; j--) {
2891 uint8_t lane = shuffle[i * 4 + j];
2892 mask <<= 8;
2893 mask |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
2894 }
2895 push(Immediate(mask));
2896 }
2897 Pshufb(dst.fp(), rhs.fp(), Operand(esp, 0));
2898 Por(dst.fp(), liftoff::kScratchDoubleReg);
2899 mov(esp, tmp.gp());
2900 }
2901
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2902 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2903 LiftoffRegister lhs,
2904 LiftoffRegister rhs) {
2905 Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
2906 I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg,
2907 scratch);
2908 }
2909
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)2910 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
2911 LiftoffRegister src) {
2912 Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
2913 XMMRegister tmp =
2914 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{dst, src}).fp();
2915 I8x16Popcnt(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp, scratch);
2916 }
2917
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)2918 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2919 LiftoffRegister src) {
2920 I8x16Splat(dst.fp(), src.gp(), liftoff::kScratchDoubleReg);
2921 }
2922
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)2923 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2924 LiftoffRegister src) {
2925 I16x8Splat(dst.fp(), src.gp());
2926 }
2927
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)2928 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
2929 LiftoffRegister src) {
2930 Movd(dst.fp(), src.gp());
2931 Pshufd(dst.fp(), dst.fp(), uint8_t{0});
2932 }
2933
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)2934 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2935 LiftoffRegister src) {
2936 Pinsrd(dst.fp(), src.low_gp(), 0);
2937 Pinsrd(dst.fp(), src.high_gp(), 1);
2938 Pshufd(dst.fp(), dst.fp(), uint8_t{0x44});
2939 }
2940
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)2941 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2942 LiftoffRegister src) {
2943 F32x4Splat(dst.fp(), src.fp());
2944 }
2945
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)2946 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2947 LiftoffRegister src) {
2948 Movddup(dst.fp(), src.fp());
2949 }
2950
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2951 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2952 LiftoffRegister rhs) {
2953 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2954 this, dst, lhs, rhs);
2955 }
2956
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2957 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2958 LiftoffRegister rhs) {
2959 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2960 this, dst, lhs, rhs);
2961 Pcmpeqb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
2962 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
2963 }
2964
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2965 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2966 LiftoffRegister rhs) {
2967 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
2968 &Assembler::pcmpgtb>(this, dst, lhs,
2969 rhs);
2970 }
2971
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2972 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2973 LiftoffRegister rhs) {
2974 DoubleRegister ref = rhs.fp();
2975 if (dst == rhs) {
2976 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
2977 ref = liftoff::kScratchDoubleReg;
2978 }
2979 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
2980 this, dst, lhs, rhs);
2981 Pcmpeqb(dst.fp(), ref);
2982 Pcmpeqb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
2983 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
2984 }
2985
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2986 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2987 LiftoffRegister rhs) {
2988 DoubleRegister ref = rhs.fp();
2989 if (dst == rhs) {
2990 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
2991 ref = liftoff::kScratchDoubleReg;
2992 }
2993 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2994 this, dst, lhs, rhs, SSE4_1);
2995 Pcmpeqb(dst.fp(), ref);
2996 }
2997
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2998 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2999 LiftoffRegister rhs) {
3000 DoubleRegister ref = rhs.fp();
3001 if (dst == rhs) {
3002 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3003 ref = liftoff::kScratchDoubleReg;
3004 }
3005 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
3006 this, dst, lhs, rhs);
3007 Pcmpeqb(dst.fp(), ref);
3008 }
3009
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3010 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
3011 LiftoffRegister rhs) {
3012 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
3013 this, dst, lhs, rhs);
3014 }
3015
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3016 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
3017 LiftoffRegister rhs) {
3018 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
3019 this, dst, lhs, rhs);
3020 Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3021 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3022 }
3023
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3024 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3025 LiftoffRegister rhs) {
3026 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
3027 &Assembler::pcmpgtw>(this, dst, lhs,
3028 rhs);
3029 }
3030
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3031 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3032 LiftoffRegister rhs) {
3033 DoubleRegister ref = rhs.fp();
3034 if (dst == rhs) {
3035 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3036 ref = liftoff::kScratchDoubleReg;
3037 }
3038 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3039 this, dst, lhs, rhs, SSE4_1);
3040 Pcmpeqw(dst.fp(), ref);
3041 Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3042 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3043 }
3044
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3045 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3046 LiftoffRegister rhs) {
3047 DoubleRegister ref = rhs.fp();
3048 if (dst == rhs) {
3049 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3050 ref = liftoff::kScratchDoubleReg;
3051 }
3052 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3053 this, dst, lhs, rhs);
3054 Pcmpeqw(dst.fp(), ref);
3055 }
3056
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3057 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3058 LiftoffRegister rhs) {
3059 DoubleRegister ref = rhs.fp();
3060 if (dst == rhs) {
3061 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3062 ref = liftoff::kScratchDoubleReg;
3063 }
3064 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3065 this, dst, lhs, rhs, SSE4_1);
3066 Pcmpeqw(dst.fp(), ref);
3067 }
3068
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3069 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3070 LiftoffRegister rhs) {
3071 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
3072 this, dst, lhs, rhs);
3073 }
3074
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3075 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3076 LiftoffRegister rhs) {
3077 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
3078 this, dst, lhs, rhs);
3079 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3080 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3081 }
3082
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3083 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3084 LiftoffRegister rhs) {
3085 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
3086 &Assembler::pcmpgtd>(this, dst, lhs,
3087 rhs);
3088 }
3089
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3090 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3091 LiftoffRegister rhs) {
3092 DoubleRegister ref = rhs.fp();
3093 if (dst == rhs) {
3094 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3095 ref = liftoff::kScratchDoubleReg;
3096 }
3097 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3098 this, dst, lhs, rhs, SSE4_1);
3099 Pcmpeqd(dst.fp(), ref);
3100 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3101 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3102 }
3103
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3104 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3105 LiftoffRegister rhs) {
3106 DoubleRegister ref = rhs.fp();
3107 if (dst == rhs) {
3108 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3109 ref = liftoff::kScratchDoubleReg;
3110 }
3111 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3112 this, dst, lhs, rhs, SSE4_1);
3113 Pcmpeqd(dst.fp(), ref);
3114 }
3115
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3116 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3117 LiftoffRegister rhs) {
3118 DoubleRegister ref = rhs.fp();
3119 if (dst == rhs) {
3120 Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3121 ref = liftoff::kScratchDoubleReg;
3122 }
3123 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3124 this, dst, lhs, rhs, SSE4_1);
3125 Pcmpeqd(dst.fp(), ref);
3126 }
3127
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3128 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3129 LiftoffRegister rhs) {
3130 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
3131 this, dst, lhs, rhs, SSE4_1);
3132 }
3133
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3134 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3135 LiftoffRegister rhs) {
3136 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
3137 this, dst, lhs, rhs, SSE4_1);
3138 Pcmpeqq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3139 Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3140 }
3141
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3142 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3143 LiftoffRegister rhs) {
3144 // Different register alias requirements depending on CpuFeatures supported:
3145 if (CpuFeatures::IsSupported(AVX) || CpuFeatures::IsSupported(SSE4_2)) {
3146 // 1. AVX, or SSE4_2 no requirements (I64x2GtS takes care of aliasing).
3147 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3148 } else {
3149 // 2. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
3150 if (dst == lhs || dst == rhs) {
3151 LiftoffRegister tmp =
3152 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{lhs, rhs});
3153 I64x2GtS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3154 movaps(dst.fp(), tmp.fp());
3155 } else {
3156 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3157 }
3158 }
3159 }
3160
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3161 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3162 LiftoffRegister rhs) {
3163 // Different register alias requirements depending on CpuFeatures supported:
3164 if (CpuFeatures::IsSupported(AVX)) {
3165 // 1. AVX, no requirements.
3166 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3167 } else if (CpuFeatures::IsSupported(SSE4_2)) {
3168 // 2. SSE4_2, dst != lhs.
3169 if (dst == lhs) {
3170 LiftoffRegister tmp =
3171 GetUnusedRegister(RegClass::kFpReg, {rhs}, LiftoffRegList{lhs});
3172 // macro-assembler uses kScratchDoubleReg, so don't use it.
3173 I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3174 movaps(dst.fp(), tmp.fp());
3175 } else {
3176 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3177 }
3178 } else {
3179 // 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
3180 if (dst == lhs || dst == rhs) {
3181 LiftoffRegister tmp =
3182 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{lhs, rhs});
3183 I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3184 movaps(dst.fp(), tmp.fp());
3185 } else {
3186 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3187 }
3188 }
3189 }
3190
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3191 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3192 LiftoffRegister rhs) {
3193 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
3194 this, dst, lhs, rhs);
3195 }
3196
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3197 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3198 LiftoffRegister rhs) {
3199 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
3200 &Assembler::cmpneqps>(this, dst, lhs, rhs);
3201 }
3202
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3203 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
3204 LiftoffRegister rhs) {
3205 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
3206 &Assembler::cmpltps>(this, dst, lhs,
3207 rhs);
3208 }
3209
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3210 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
3211 LiftoffRegister rhs) {
3212 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
3213 &Assembler::cmpleps>(this, dst, lhs,
3214 rhs);
3215 }
3216
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3217 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3218 LiftoffRegister rhs) {
3219 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
3220 this, dst, lhs, rhs);
3221 }
3222
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3223 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3224 LiftoffRegister rhs) {
3225 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
3226 &Assembler::cmpneqpd>(this, dst, lhs, rhs);
3227 }
3228
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3229 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
3230 LiftoffRegister rhs) {
3231 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
3232 &Assembler::cmpltpd>(this, dst, lhs,
3233 rhs);
3234 }
3235
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3236 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
3237 LiftoffRegister rhs) {
3238 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
3239 &Assembler::cmplepd>(this, dst, lhs,
3240 rhs);
3241 }
3242
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])3243 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
3244 const uint8_t imms[16]) {
3245 uint64_t vals[2];
3246 memcpy(vals, imms, sizeof(vals));
3247 TurboAssembler::Move(dst.fp(), vals[0]);
3248
3249 uint64_t high = vals[1];
3250 Register tmp = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3251 TurboAssembler::Move(tmp, Immediate(high & 0xffff'ffff));
3252 Pinsrd(dst.fp(), tmp, 2);
3253
3254 TurboAssembler::Move(tmp, Immediate(high >> 32));
3255 Pinsrd(dst.fp(), tmp, 3);
3256 }
3257
3258 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
3259 S128Not(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3260 }
3261
3262 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
3263 LiftoffRegister rhs) {
3264 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
3265 this, dst, lhs, rhs);
3266 }
3267
3268 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
3269 LiftoffRegister rhs) {
3270 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
3271 this, dst, lhs, rhs);
3272 }
3273
3274 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
3275 LiftoffRegister rhs) {
3276 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
3277 this, dst, lhs, rhs);
3278 }
3279
3280 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
3281 LiftoffRegister src1,
3282 LiftoffRegister src2,
3283 LiftoffRegister mask) {
3284 // Ensure that we don't overwrite any inputs with the movaps below.
3285 DCHECK_NE(dst, src1);
3286 DCHECK_NE(dst, src2);
3287 if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
3288 movaps(dst.fp(), mask.fp());
3289 S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(),
3290 liftoff::kScratchDoubleReg);
3291 } else {
3292 S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(),
3293 liftoff::kScratchDoubleReg);
3294 }
3295 }
3296
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)3297 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
3298 LiftoffRegister src) {
3299 if (dst.fp() == src.fp()) {
3300 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3301 Psignb(dst.fp(), liftoff::kScratchDoubleReg);
3302 } else {
3303 Pxor(dst.fp(), dst.fp());
3304 Psubb(dst.fp(), src.fp());
3305 }
3306 }
3307
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)3308 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
3309 LiftoffRegister src) {
3310 liftoff::EmitAnyTrue(this, dst, src);
3311 }
3312
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)3313 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
3314 LiftoffRegister src) {
3315 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src);
3316 }
3317
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)3318 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
3319 LiftoffRegister src) {
3320 Pmovmskb(dst.gp(), src.fp());
3321 }
3322
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3323 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
3324 LiftoffRegister rhs) {
3325 LiftoffRegister tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs});
3326 LiftoffRegister tmp_simd =
3327 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs});
3328 I8x16Shl(dst.fp(), lhs.fp(), rhs.gp(), tmp.gp(), liftoff::kScratchDoubleReg,
3329 tmp_simd.fp());
3330 }
3331
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3332 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
3333 int32_t rhs) {
3334 LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
3335 I8x16Shl(dst.fp(), lhs.fp(), rhs, tmp.gp(), liftoff::kScratchDoubleReg);
3336 }
3337
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3338 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
3339 LiftoffRegister lhs,
3340 LiftoffRegister rhs) {
3341 Register tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs}).gp();
3342 XMMRegister tmp_simd =
3343 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs}).fp();
3344 I8x16ShrS(dst.fp(), lhs.fp(), rhs.gp(), tmp, liftoff::kScratchDoubleReg,
3345 tmp_simd);
3346 }
3347
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3348 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
3349 LiftoffRegister lhs, int32_t rhs) {
3350 I8x16ShrS(dst.fp(), lhs.fp(), rhs, liftoff::kScratchDoubleReg);
3351 }
3352
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3353 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
3354 LiftoffRegister lhs,
3355 LiftoffRegister rhs) {
3356 Register tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs}).gp();
3357 XMMRegister tmp_simd =
3358 GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs}).fp();
3359 I8x16ShrU(dst.fp(), lhs.fp(), rhs.gp(), tmp, liftoff::kScratchDoubleReg,
3360 tmp_simd);
3361 }
3362
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3363 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
3364 LiftoffRegister lhs, int32_t rhs) {
3365 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
3366 I8x16ShrU(dst.fp(), lhs.fp(), rhs, tmp, liftoff::kScratchDoubleReg);
3367 }
3368
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3369 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
3370 LiftoffRegister rhs) {
3371 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
3372 this, dst, lhs, rhs);
3373 }
3374
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3375 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
3376 LiftoffRegister lhs,
3377 LiftoffRegister rhs) {
3378 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
3379 this, dst, lhs, rhs);
3380 }
3381
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3382 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
3383 LiftoffRegister lhs,
3384 LiftoffRegister rhs) {
3385 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
3386 this, dst, lhs, rhs);
3387 }
3388
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3389 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
3390 LiftoffRegister rhs) {
3391 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
3392 this, dst, lhs, rhs);
3393 }
3394
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3395 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
3396 LiftoffRegister lhs,
3397 LiftoffRegister rhs) {
3398 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
3399 this, dst, lhs, rhs);
3400 }
3401
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3402 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
3403 LiftoffRegister lhs,
3404 LiftoffRegister rhs) {
3405 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
3406 &Assembler::psubusb>(this, dst, lhs,
3407 rhs);
3408 }
3409
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3410 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
3411 LiftoffRegister lhs,
3412 LiftoffRegister rhs) {
3413 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
3414 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3415 }
3416
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3417 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
3418 LiftoffRegister lhs,
3419 LiftoffRegister rhs) {
3420 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
3421 this, dst, lhs, rhs);
3422 }
3423
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3424 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3425 LiftoffRegister lhs,
3426 LiftoffRegister rhs) {
3427 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
3428 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3429 }
3430
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3431 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3432 LiftoffRegister lhs,
3433 LiftoffRegister rhs) {
3434 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
3435 this, dst, lhs, rhs);
3436 }
3437
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)3438 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3439 LiftoffRegister src) {
3440 if (dst.fp() == src.fp()) {
3441 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3442 Psignw(dst.fp(), liftoff::kScratchDoubleReg);
3443 } else {
3444 Pxor(dst.fp(), dst.fp());
3445 Psubw(dst.fp(), src.fp());
3446 }
3447 }
3448
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)3449 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3450 LiftoffRegister src) {
3451 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src);
3452 }
3453
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)3454 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3455 LiftoffRegister src) {
3456 XMMRegister tmp = liftoff::kScratchDoubleReg;
3457 Packsswb(tmp, src.fp());
3458 Pmovmskb(dst.gp(), tmp);
3459 shr(dst.gp(), 8);
3460 }
3461
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3462 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3463 LiftoffRegister rhs) {
3464 liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
3465 lhs, rhs);
3466 }
3467
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3468 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3469 int32_t rhs) {
3470 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
3471 this, dst, lhs, rhs);
3472 }
3473
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3474 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3475 LiftoffRegister lhs,
3476 LiftoffRegister rhs) {
3477 liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst,
3478 lhs, rhs);
3479 }
3480
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3481 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3482 LiftoffRegister lhs, int32_t rhs) {
3483 liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
3484 this, dst, lhs, rhs);
3485 }
3486
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3487 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3488 LiftoffRegister lhs,
3489 LiftoffRegister rhs) {
3490 liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst,
3491 lhs, rhs);
3492 }
3493
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3494 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3495 LiftoffRegister lhs, int32_t rhs) {
3496 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
3497 this, dst, lhs, rhs);
3498 }
3499
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3500 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3501 LiftoffRegister rhs) {
3502 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
3503 this, dst, lhs, rhs);
3504 }
3505
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3506 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3507 LiftoffRegister lhs,
3508 LiftoffRegister rhs) {
3509 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
3510 this, dst, lhs, rhs);
3511 }
3512
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3513 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3514 LiftoffRegister lhs,
3515 LiftoffRegister rhs) {
3516 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
3517 this, dst, lhs, rhs);
3518 }
3519
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3520 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3521 LiftoffRegister rhs) {
3522 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
3523 this, dst, lhs, rhs);
3524 }
3525
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3526 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3527 LiftoffRegister lhs,
3528 LiftoffRegister rhs) {
3529 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
3530 this, dst, lhs, rhs);
3531 }
3532
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3533 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3534 LiftoffRegister lhs,
3535 LiftoffRegister rhs) {
3536 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
3537 &Assembler::psubusw>(this, dst, lhs,
3538 rhs);
3539 }
3540
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3541 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3542 LiftoffRegister rhs) {
3543 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
3544 this, dst, lhs, rhs);
3545 }
3546
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3547 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3548 LiftoffRegister lhs,
3549 LiftoffRegister rhs) {
3550 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3551 this, dst, lhs, rhs);
3552 }
3553
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3554 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3555 LiftoffRegister lhs,
3556 LiftoffRegister rhs) {
3557 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3558 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3559 }
3560
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3561 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3562 LiftoffRegister lhs,
3563 LiftoffRegister rhs) {
3564 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
3565 this, dst, lhs, rhs);
3566 }
3567
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3568 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3569 LiftoffRegister lhs,
3570 LiftoffRegister rhs) {
3571 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3572 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3573 }
3574
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3575 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3576 LiftoffRegister src) {
3577 I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), liftoff::kScratchDoubleReg,
3578 GetUnusedRegister(kGpReg, {}).gp());
3579 }
3580
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3581 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3582 LiftoffRegister src) {
3583 I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(),
3584 GetUnusedRegister(kGpReg, {}).gp());
3585 }
3586
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3587 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3588 LiftoffRegister src1,
3589 LiftoffRegister src2) {
3590 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3591 /*is_signed=*/true);
3592 }
3593
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3594 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3595 LiftoffRegister src1,
3596 LiftoffRegister src2) {
3597 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3598 /*is_signed=*/false);
3599 }
3600
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3601 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3602 LiftoffRegister src1,
3603 LiftoffRegister src2) {
3604 I16x8ExtMulHighS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3605 }
3606
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3607 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3608 LiftoffRegister src1,
3609 LiftoffRegister src2) {
3610 I16x8ExtMulHighU(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3611 }
3612
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3613 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3614 LiftoffRegister src1,
3615 LiftoffRegister src2) {
3616 I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3617 }
3618
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)3619 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3620 LiftoffRegister src) {
3621 if (dst.fp() == src.fp()) {
3622 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3623 Psignd(dst.fp(), liftoff::kScratchDoubleReg);
3624 } else {
3625 Pxor(dst.fp(), dst.fp());
3626 Psubd(dst.fp(), src.fp());
3627 }
3628 }
3629
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)3630 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3631 LiftoffRegister src) {
3632 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src);
3633 }
3634
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)3635 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3636 LiftoffRegister src) {
3637 Movmskps(dst.gp(), src.fp());
3638 }
3639
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3640 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3641 LiftoffRegister rhs) {
3642 liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
3643 lhs, rhs);
3644 }
3645
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3646 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3647 int32_t rhs) {
3648 liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
3649 this, dst, lhs, rhs);
3650 }
3651
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3652 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3653 LiftoffRegister lhs,
3654 LiftoffRegister rhs) {
3655 liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst,
3656 lhs, rhs);
3657 }
3658
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3659 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3660 LiftoffRegister lhs, int32_t rhs) {
3661 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
3662 this, dst, lhs, rhs);
3663 }
3664
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3665 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3666 LiftoffRegister lhs,
3667 LiftoffRegister rhs) {
3668 liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst,
3669 lhs, rhs);
3670 }
3671
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3672 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3673 LiftoffRegister lhs, int32_t rhs) {
3674 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
3675 this, dst, lhs, rhs);
3676 }
3677
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3678 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3679 LiftoffRegister rhs) {
3680 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
3681 this, dst, lhs, rhs);
3682 }
3683
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3684 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3685 LiftoffRegister rhs) {
3686 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
3687 this, dst, lhs, rhs);
3688 }
3689
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3690 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3691 LiftoffRegister rhs) {
3692 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
3693 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3694 }
3695
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3696 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3697 LiftoffRegister lhs,
3698 LiftoffRegister rhs) {
3699 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3700 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3701 }
3702
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3703 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3704 LiftoffRegister lhs,
3705 LiftoffRegister rhs) {
3706 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3707 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3708 }
3709
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3710 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3711 LiftoffRegister lhs,
3712 LiftoffRegister rhs) {
3713 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
3714 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3715 }
3716
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3717 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3718 LiftoffRegister lhs,
3719 LiftoffRegister rhs) {
3720 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3721 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3722 }
3723
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3724 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3725 LiftoffRegister lhs,
3726 LiftoffRegister rhs) {
3727 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaddwd, &Assembler::pmaddwd>(
3728 this, dst, lhs, rhs);
3729 }
3730
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3731 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3732 LiftoffRegister src) {
3733 I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(),
3734 GetUnusedRegister(kGpReg, {}).gp());
3735 }
3736
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3737 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3738 LiftoffRegister src) {
3739 I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3740 }
3741
3742 namespace liftoff {
3743 // Helper function to check for register aliasing, AVX support, and moves
3744 // registers around before calling the actual macro-assembler function.
I32x4ExtMulHelper(LiftoffAssembler * assm,XMMRegister dst,XMMRegister src1,XMMRegister src2,bool low,bool is_signed)3745 inline void I32x4ExtMulHelper(LiftoffAssembler* assm, XMMRegister dst,
3746 XMMRegister src1, XMMRegister src2, bool low,
3747 bool is_signed) {
3748 // I32x4ExtMul requires dst == src1 if AVX is not supported.
3749 if (CpuFeatures::IsSupported(AVX) || dst == src1) {
3750 assm->I32x4ExtMul(dst, src1, src2, liftoff::kScratchDoubleReg, low,
3751 is_signed);
3752 } else if (dst != src2) {
3753 // dst != src1 && dst != src2
3754 assm->movaps(dst, src1);
3755 assm->I32x4ExtMul(dst, dst, src2, liftoff::kScratchDoubleReg, low,
3756 is_signed);
3757 } else {
3758 // dst == src2
3759 // Extended multiplication is commutative,
3760 assm->movaps(dst, src2);
3761 assm->I32x4ExtMul(dst, dst, src1, liftoff::kScratchDoubleReg, low,
3762 is_signed);
3763 }
3764 }
3765 } // namespace liftoff
3766
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3767 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3768 LiftoffRegister src1,
3769 LiftoffRegister src2) {
3770 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3771 /*is_signed=*/true);
3772 }
3773
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3774 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3775 LiftoffRegister src1,
3776 LiftoffRegister src2) {
3777 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3778 /*is_signed=*/false);
3779 }
3780
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3781 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3782 LiftoffRegister src1,
3783 LiftoffRegister src2) {
3784 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3785 /*low=*/false,
3786 /*is_signed=*/true);
3787 }
3788
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3789 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3790 LiftoffRegister src1,
3791 LiftoffRegister src2) {
3792 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3793 /*low=*/false,
3794 /*is_signed=*/false);
3795 }
3796
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)3797 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
3798 LiftoffRegister src) {
3799 I64x2Neg(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3800 }
3801
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)3802 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
3803 LiftoffRegister src) {
3804 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqq>(this, dst, src, SSE4_1);
3805 }
3806
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3807 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
3808 LiftoffRegister rhs) {
3809 liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
3810 lhs, rhs);
3811 }
3812
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3813 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
3814 int32_t rhs) {
3815 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
3816 this, dst, lhs, rhs);
3817 }
3818
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3819 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
3820 LiftoffRegister lhs,
3821 LiftoffRegister rhs) {
3822 XMMRegister tmp =
3823 GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{dst, lhs}).fp();
3824 Register scratch =
3825 GetUnusedRegister(RegClass::kGpReg, LiftoffRegList{rhs}).gp();
3826
3827 I64x2ShrS(dst.fp(), lhs.fp(), rhs.gp(), liftoff::kScratchDoubleReg, tmp,
3828 scratch);
3829 }
3830
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3831 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
3832 LiftoffRegister lhs, int32_t rhs) {
3833 I64x2ShrS(dst.fp(), lhs.fp(), rhs & 0x3F, liftoff::kScratchDoubleReg);
3834 }
3835
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3836 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
3837 LiftoffRegister lhs,
3838 LiftoffRegister rhs) {
3839 liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst,
3840 lhs, rhs);
3841 }
3842
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3843 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
3844 LiftoffRegister lhs, int32_t rhs) {
3845 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
3846 this, dst, lhs, rhs);
3847 }
3848
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3849 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3850 LiftoffRegister rhs) {
3851 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
3852 this, dst, lhs, rhs);
3853 }
3854
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3855 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3856 LiftoffRegister rhs) {
3857 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
3858 this, dst, lhs, rhs);
3859 }
3860
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3861 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3862 LiftoffRegister rhs) {
3863 static constexpr RegClass tmp_rc = reg_class_for(kS128);
3864 LiftoffRegister tmp1 =
3865 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs});
3866 LiftoffRegister tmp2 =
3867 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, tmp1});
3868 I64x2Mul(dst.fp(), lhs.fp(), rhs.fp(), tmp1.fp(), tmp2.fp());
3869 }
3870
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3871 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
3872 LiftoffRegister src1,
3873 LiftoffRegister src2) {
3874 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3875 /*low=*/true, /*is_signed=*/true);
3876 }
3877
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3878 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
3879 LiftoffRegister src1,
3880 LiftoffRegister src2) {
3881 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3882 /*low=*/true, /*is_signed=*/false);
3883 }
3884
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3885 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
3886 LiftoffRegister src1,
3887 LiftoffRegister src2) {
3888 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3889 /*low=*/false, /*is_signed=*/true);
3890 }
3891
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3892 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
3893 LiftoffRegister src1,
3894 LiftoffRegister src2) {
3895 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3896 /*low=*/false, /*is_signed=*/false);
3897 }
3898
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)3899 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
3900 LiftoffRegister src) {
3901 Movmskpd(dst.gp(), src.fp());
3902 }
3903
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3904 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
3905 LiftoffRegister src) {
3906 Pmovsxdq(dst.fp(), src.fp());
3907 }
3908
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3909 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3910 LiftoffRegister src) {
3911 I64x2SConvertI32x4High(dst.fp(), src.fp());
3912 }
3913
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3914 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3915 LiftoffRegister src) {
3916 Pmovzxdq(dst.fp(), src.fp());
3917 }
3918
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3919 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3920 LiftoffRegister src) {
3921 I64x2UConvertI32x4High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3922 }
3923
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)3924 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
3925 LiftoffRegister src) {
3926 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
3927 Absps(dst.fp(), src.fp(), tmp);
3928 }
3929
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)3930 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
3931 LiftoffRegister src) {
3932 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
3933 Negps(dst.fp(), src.fp(), tmp);
3934 }
3935
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)3936 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
3937 LiftoffRegister src) {
3938 Sqrtps(dst.fp(), src.fp());
3939 }
3940
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)3941 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
3942 LiftoffRegister src) {
3943 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3944 Roundps(dst.fp(), src.fp(), kRoundUp);
3945 return true;
3946 }
3947
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)3948 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
3949 LiftoffRegister src) {
3950 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3951 Roundps(dst.fp(), src.fp(), kRoundDown);
3952 return true;
3953 }
3954
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)3955 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
3956 LiftoffRegister src) {
3957 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3958 Roundps(dst.fp(), src.fp(), kRoundToZero);
3959 return true;
3960 }
3961
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)3962 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
3963 LiftoffRegister src) {
3964 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3965 Roundps(dst.fp(), src.fp(), kRoundToNearest);
3966 return true;
3967 }
3968
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3969 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3970 LiftoffRegister rhs) {
3971 liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
3972 this, dst, lhs, rhs);
3973 }
3974
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3975 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3976 LiftoffRegister rhs) {
3977 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
3978 this, dst, lhs, rhs);
3979 }
3980
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3981 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3982 LiftoffRegister rhs) {
3983 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
3984 this, dst, lhs, rhs);
3985 }
3986
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3987 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
3988 LiftoffRegister rhs) {
3989 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
3990 this, dst, lhs, rhs);
3991 }
3992
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3993 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
3994 LiftoffRegister rhs) {
3995 F32x4Min(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3996 }
3997
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3998 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
3999 LiftoffRegister rhs) {
4000 F32x4Max(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4001 }
4002
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4003 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4004 LiftoffRegister rhs) {
4005 // Due to the way minps works, pmin(a, b) = minps(b, a).
4006 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
4007 this, dst, rhs, lhs);
4008 }
4009
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4010 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4011 LiftoffRegister rhs) {
4012 // Due to the way maxps works, pmax(a, b) = maxps(b, a).
4013 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
4014 this, dst, rhs, lhs);
4015 }
4016
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)4017 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
4018 LiftoffRegister src) {
4019 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4020 Abspd(dst.fp(), src.fp(), tmp);
4021 }
4022
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)4023 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
4024 LiftoffRegister src) {
4025 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4026 Negpd(dst.fp(), src.fp(), tmp);
4027 }
4028
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)4029 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
4030 LiftoffRegister src) {
4031 Sqrtpd(dst.fp(), src.fp());
4032 }
4033
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)4034 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
4035 LiftoffRegister src) {
4036 DCHECK(CpuFeatures::IsSupported(SSE4_1));
4037 Roundpd(dst.fp(), src.fp(), kRoundUp);
4038 return true;
4039 }
4040
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)4041 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
4042 LiftoffRegister src) {
4043 DCHECK(CpuFeatures::IsSupported(SSE4_1));
4044 Roundpd(dst.fp(), src.fp(), kRoundDown);
4045 return true;
4046 }
4047
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)4048 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
4049 LiftoffRegister src) {
4050 DCHECK(CpuFeatures::IsSupported(SSE4_1));
4051 Roundpd(dst.fp(), src.fp(), kRoundToZero);
4052 return true;
4053 }
4054
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)4055 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
4056 LiftoffRegister src) {
4057 DCHECK(CpuFeatures::IsSupported(SSE4_1));
4058 Roundpd(dst.fp(), src.fp(), kRoundToNearest);
4059 return true;
4060 }
4061
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4062 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
4063 LiftoffRegister rhs) {
4064 liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
4065 this, dst, lhs, rhs);
4066 }
4067
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4068 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
4069 LiftoffRegister rhs) {
4070 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
4071 this, dst, lhs, rhs);
4072 }
4073
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4074 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
4075 LiftoffRegister rhs) {
4076 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
4077 this, dst, lhs, rhs);
4078 }
4079
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4080 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
4081 LiftoffRegister rhs) {
4082 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
4083 this, dst, lhs, rhs);
4084 }
4085
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4086 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
4087 LiftoffRegister rhs) {
4088 F64x2Min(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4089 }
4090
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4091 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
4092 LiftoffRegister rhs) {
4093 F64x2Max(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4094 }
4095
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4096 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4097 LiftoffRegister rhs) {
4098 // Due to the way minpd works, pmin(a, b) = minpd(b, a).
4099 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
4100 this, dst, rhs, lhs);
4101 }
4102
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4103 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4104 LiftoffRegister rhs) {
4105 // Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
4106 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
4107 this, dst, rhs, lhs);
4108 }
4109
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)4110 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
4111 LiftoffRegister src) {
4112 Cvtdq2pd(dst.fp(), src.fp());
4113 }
4114
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)4115 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
4116 LiftoffRegister src) {
4117 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4118 F64x2ConvertLowI32x4U(dst.fp(), src.fp(), tmp);
4119 }
4120
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)4121 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
4122 LiftoffRegister src) {
4123 Cvtps2pd(dst.fp(), src.fp());
4124 }
4125
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)4126 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
4127 LiftoffRegister src) {
4128 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4129 I32x4SConvertF32x4(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4130 }
4131
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)4132 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
4133 LiftoffRegister src) {
4134 static constexpr RegClass tmp_rc = reg_class_for(kS128);
4135 DoubleRegister tmp = GetUnusedRegister(tmp_rc, LiftoffRegList{dst, src}).fp();
4136 // NAN->0, negative->0.
4137 Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
4138 if (CpuFeatures::IsSupported(AVX)) {
4139 CpuFeatureScope scope(this, AVX);
4140 vmaxps(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4141 } else {
4142 if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
4143 maxps(dst.fp(), liftoff::kScratchDoubleReg);
4144 }
4145 // scratch: float representation of max_signed.
4146 Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
4147 Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
4148 uint8_t{1}); // 0x7fffffff
4149 Cvtdq2ps(liftoff::kScratchDoubleReg,
4150 liftoff::kScratchDoubleReg); // 0x4f000000
4151 // tmp: convert (src-max_signed).
4152 // Set positive overflow lanes to 0x7FFFFFFF.
4153 // Set negative lanes to 0.
4154 if (CpuFeatures::IsSupported(AVX)) {
4155 CpuFeatureScope scope(this, AVX);
4156 vsubps(tmp, dst.fp(), liftoff::kScratchDoubleReg);
4157 } else {
4158 movaps(tmp, dst.fp());
4159 subps(tmp, liftoff::kScratchDoubleReg);
4160 }
4161 Cmpleps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, tmp);
4162 Cvttps2dq(tmp, tmp);
4163 Pxor(tmp, liftoff::kScratchDoubleReg);
4164 Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
4165 Pmaxsd(tmp, tmp, liftoff::kScratchDoubleReg);
4166 // Convert to int. Overflow lanes above max_signed will be 0x80000000.
4167 Cvttps2dq(dst.fp(), dst.fp());
4168 // Add (src-max_signed) for overflow lanes.
4169 Paddd(dst.fp(), dst.fp(), tmp);
4170 }
4171
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)4172 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
4173 LiftoffRegister src) {
4174 Cvtdq2ps(dst.fp(), src.fp());
4175 }
4176
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)4177 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
4178 LiftoffRegister src) {
4179 Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); // Zeros.
4180 Pblendw(liftoff::kScratchDoubleReg, src.fp(),
4181 uint8_t{0x55}); // Get lo 16 bits.
4182 if (CpuFeatures::IsSupported(AVX)) {
4183 CpuFeatureScope scope(this, AVX);
4184 vpsubd(dst.fp(), src.fp(), liftoff::kScratchDoubleReg); // Get hi 16 bits.
4185 } else {
4186 if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
4187 psubd(dst.fp(), liftoff::kScratchDoubleReg);
4188 }
4189 Cvtdq2ps(liftoff::kScratchDoubleReg,
4190 liftoff::kScratchDoubleReg); // Convert lo exactly.
4191 Psrld(dst.fp(), dst.fp(), byte{1}); // Divide by 2 to get in unsigned range.
4192 Cvtdq2ps(dst.fp(), dst.fp()); // Convert hi, exactly.
4193 Addps(dst.fp(), dst.fp(), dst.fp()); // Double hi, exactly.
4194 Addps(dst.fp(), dst.fp(),
4195 liftoff::kScratchDoubleReg); // Add hi and lo, may round.
4196 }
4197
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)4198 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
4199 LiftoffRegister src) {
4200 Cvtpd2ps(dst.fp(), src.fp());
4201 }
4202
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4203 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
4204 LiftoffRegister lhs,
4205 LiftoffRegister rhs) {
4206 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
4207 &Assembler::packsswb>(this, dst, lhs,
4208 rhs);
4209 }
4210
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4211 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
4212 LiftoffRegister lhs,
4213 LiftoffRegister rhs) {
4214 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
4215 &Assembler::packuswb>(this, dst, lhs,
4216 rhs);
4217 }
4218
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4219 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
4220 LiftoffRegister lhs,
4221 LiftoffRegister rhs) {
4222 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
4223 &Assembler::packssdw>(this, dst, lhs,
4224 rhs);
4225 }
4226
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4227 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
4228 LiftoffRegister lhs,
4229 LiftoffRegister rhs) {
4230 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
4231 &Assembler::packusdw>(this, dst, lhs,
4232 rhs, SSE4_1);
4233 }
4234
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)4235 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
4236 LiftoffRegister src) {
4237 Pmovsxbw(dst.fp(), src.fp());
4238 }
4239
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)4240 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
4241 LiftoffRegister src) {
4242 I16x8SConvertI8x16High(dst.fp(), src.fp());
4243 }
4244
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)4245 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
4246 LiftoffRegister src) {
4247 Pmovzxbw(dst.fp(), src.fp());
4248 }
4249
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)4250 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
4251 LiftoffRegister src) {
4252 I16x8UConvertI8x16High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4253 }
4254
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)4255 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
4256 LiftoffRegister src) {
4257 Pmovsxwd(dst.fp(), src.fp());
4258 }
4259
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)4260 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
4261 LiftoffRegister src) {
4262 I32x4SConvertI16x8High(dst.fp(), src.fp());
4263 }
4264
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)4265 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
4266 LiftoffRegister src) {
4267 Pmovzxwd(dst.fp(), src.fp());
4268 }
4269
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)4270 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
4271 LiftoffRegister src) {
4272 I32x4UConvertI16x8High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4273 }
4274
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)4275 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
4276 LiftoffRegister src) {
4277 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4278 I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4279 }
4280
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)4281 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
4282 LiftoffRegister src) {
4283 Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4284 I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4285 }
4286
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4287 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
4288 LiftoffRegister lhs,
4289 LiftoffRegister rhs) {
4290 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
4291 this, dst, rhs, lhs);
4292 }
4293
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4294 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
4295 LiftoffRegister lhs,
4296 LiftoffRegister rhs) {
4297 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
4298 this, dst, lhs, rhs);
4299 }
4300
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4301 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
4302 LiftoffRegister lhs,
4303 LiftoffRegister rhs) {
4304 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
4305 this, dst, lhs, rhs);
4306 }
4307
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)4308 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
4309 LiftoffRegister src) {
4310 Pabsb(dst.fp(), src.fp());
4311 }
4312
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)4313 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
4314 LiftoffRegister src) {
4315 Pabsw(dst.fp(), src.fp());
4316 }
4317
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)4318 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
4319 LiftoffRegister src) {
4320 Pabsd(dst.fp(), src.fp());
4321 }
4322
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)4323 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
4324 LiftoffRegister src) {
4325 I64x2Abs(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4326 }
4327
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4328 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
4329 LiftoffRegister lhs,
4330 uint8_t imm_lane_idx) {
4331 Register byte_reg = liftoff::GetTmpByteRegister(this, dst.gp());
4332 Pextrb(byte_reg, lhs.fp(), imm_lane_idx);
4333 movsx_b(dst.gp(), byte_reg);
4334 }
4335
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4336 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
4337 LiftoffRegister lhs,
4338 uint8_t imm_lane_idx) {
4339 Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
4340 }
4341
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4342 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
4343 LiftoffRegister lhs,
4344 uint8_t imm_lane_idx) {
4345 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
4346 movsx_w(dst.gp(), dst.gp());
4347 }
4348
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4349 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
4350 LiftoffRegister lhs,
4351 uint8_t imm_lane_idx) {
4352 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
4353 }
4354
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4355 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
4356 LiftoffRegister lhs,
4357 uint8_t imm_lane_idx) {
4358 Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
4359 }
4360
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4361 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
4362 LiftoffRegister lhs,
4363 uint8_t imm_lane_idx) {
4364 Pextrd(dst.low_gp(), lhs.fp(), imm_lane_idx * 2);
4365 Pextrd(dst.high_gp(), lhs.fp(), imm_lane_idx * 2 + 1);
4366 }
4367
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4368 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
4369 LiftoffRegister lhs,
4370 uint8_t imm_lane_idx) {
4371 F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
4372 }
4373
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4374 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
4375 LiftoffRegister lhs,
4376 uint8_t imm_lane_idx) {
4377 F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
4378 }
4379
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4380 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
4381 LiftoffRegister src1,
4382 LiftoffRegister src2,
4383 uint8_t imm_lane_idx) {
4384 if (CpuFeatures::IsSupported(AVX)) {
4385 CpuFeatureScope scope(this, AVX);
4386 vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4387 } else {
4388 CpuFeatureScope scope(this, SSE4_1);
4389 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4390 pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
4391 }
4392 }
4393
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4394 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
4395 LiftoffRegister src1,
4396 LiftoffRegister src2,
4397 uint8_t imm_lane_idx) {
4398 if (CpuFeatures::IsSupported(AVX)) {
4399 CpuFeatureScope scope(this, AVX);
4400 vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4401 } else {
4402 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4403 pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
4404 }
4405 }
4406
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4407 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
4408 LiftoffRegister src1,
4409 LiftoffRegister src2,
4410 uint8_t imm_lane_idx) {
4411 if (CpuFeatures::IsSupported(AVX)) {
4412 CpuFeatureScope scope(this, AVX);
4413 vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4414 } else {
4415 CpuFeatureScope scope(this, SSE4_1);
4416 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4417 pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
4418 }
4419 }
4420
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4421 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
4422 LiftoffRegister src1,
4423 LiftoffRegister src2,
4424 uint8_t imm_lane_idx) {
4425 if (CpuFeatures::IsSupported(AVX)) {
4426 CpuFeatureScope scope(this, AVX);
4427 vpinsrd(dst.fp(), src1.fp(), src2.low_gp(), imm_lane_idx * 2);
4428 vpinsrd(dst.fp(), dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
4429 } else {
4430 CpuFeatureScope scope(this, SSE4_1);
4431 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4432 pinsrd(dst.fp(), src2.low_gp(), imm_lane_idx * 2);
4433 pinsrd(dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
4434 }
4435 }
4436
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4437 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
4438 LiftoffRegister src1,
4439 LiftoffRegister src2,
4440 uint8_t imm_lane_idx) {
4441 if (CpuFeatures::IsSupported(AVX)) {
4442 CpuFeatureScope scope(this, AVX);
4443 vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
4444 } else {
4445 CpuFeatureScope scope(this, SSE4_1);
4446 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4447 insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
4448 }
4449 }
4450
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4451 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
4452 LiftoffRegister src1,
4453 LiftoffRegister src2,
4454 uint8_t imm_lane_idx) {
4455 F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
4456 }
4457
StackCheck(Label * ool_code,Register limit_address)4458 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
4459 cmp(esp, Operand(limit_address, 0));
4460 j(below_equal, ool_code);
4461 }
4462
CallTrapCallbackForTesting()4463 void LiftoffAssembler::CallTrapCallbackForTesting() {
4464 PrepareCallCFunction(0, GetUnusedRegister(kGpReg, {}).gp());
4465 CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
4466 }
4467
AssertUnreachable(AbortReason reason)4468 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4469 TurboAssembler::AssertUnreachable(reason);
4470 }
4471
PushRegisters(LiftoffRegList regs)4472 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4473 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4474 while (!gp_regs.is_empty()) {
4475 LiftoffRegister reg = gp_regs.GetFirstRegSet();
4476 push(reg.gp());
4477 gp_regs.clear(reg);
4478 }
4479 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4480 unsigned num_fp_regs = fp_regs.GetNumRegsSet();
4481 if (num_fp_regs) {
4482 AllocateStackSpace(num_fp_regs * kSimd128Size);
4483 unsigned offset = 0;
4484 while (!fp_regs.is_empty()) {
4485 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4486 Movdqu(Operand(esp, offset), reg.fp());
4487 fp_regs.clear(reg);
4488 offset += kSimd128Size;
4489 }
4490 DCHECK_EQ(offset, num_fp_regs * kSimd128Size);
4491 }
4492 }
4493
PopRegisters(LiftoffRegList regs)4494 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4495 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4496 unsigned fp_offset = 0;
4497 while (!fp_regs.is_empty()) {
4498 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4499 Movdqu(reg.fp(), Operand(esp, fp_offset));
4500 fp_regs.clear(reg);
4501 fp_offset += kSimd128Size;
4502 }
4503 if (fp_offset) add(esp, Immediate(fp_offset));
4504 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4505 while (!gp_regs.is_empty()) {
4506 LiftoffRegister reg = gp_regs.GetLastRegSet();
4507 pop(reg.gp());
4508 gp_regs.clear(reg);
4509 }
4510 }
4511
RecordSpillsInSafepoint(SafepointTableBuilder::Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)4512 void LiftoffAssembler::RecordSpillsInSafepoint(
4513 SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4514 LiftoffRegList ref_spills, int spill_offset) {
4515 int spill_space_size = 0;
4516 while (!all_spills.is_empty()) {
4517 LiftoffRegister reg = all_spills.GetFirstRegSet();
4518 if (ref_spills.has(reg)) {
4519 safepoint.DefineTaggedStackSlot(spill_offset);
4520 }
4521 all_spills.clear(reg);
4522 ++spill_offset;
4523 spill_space_size += kSystemPointerSize;
4524 }
4525 // Record the number of additional spill slots.
4526 RecordOolSpillSpaceSize(spill_space_size);
4527 }
4528
DropStackSlotsAndRet(uint32_t num_stack_slots)4529 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4530 DCHECK_LT(num_stack_slots,
4531 (1 << 16) / kSystemPointerSize); // 16 bit immediate
4532 ret(static_cast<int>(num_stack_slots * kSystemPointerSize));
4533 }
4534
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)4535 void LiftoffAssembler::CallC(const ValueKindSig* sig,
4536 const LiftoffRegister* args,
4537 const LiftoffRegister* rets,
4538 ValueKind out_argument_kind, int stack_bytes,
4539 ExternalReference ext_ref) {
4540 AllocateStackSpace(stack_bytes);
4541
4542 int arg_bytes = 0;
4543 for (ValueKind param_kind : sig->parameters()) {
4544 liftoff::Store(this, esp, arg_bytes, *args++, param_kind);
4545 arg_bytes += value_kind_size(param_kind);
4546 }
4547 DCHECK_LE(arg_bytes, stack_bytes);
4548
4549 constexpr Register kScratch = eax;
4550 constexpr Register kArgumentBuffer = ecx;
4551 constexpr int kNumCCallArgs = 1;
4552 mov(kArgumentBuffer, esp);
4553 PrepareCallCFunction(kNumCCallArgs, kScratch);
4554
4555 // Pass a pointer to the buffer with the arguments to the C function. ia32
4556 // does not use registers here, so push to the stack.
4557 mov(Operand(esp, 0), kArgumentBuffer);
4558
4559 // Now call the C function.
4560 CallCFunction(ext_ref, kNumCCallArgs);
4561
4562 // Move return value to the right register.
4563 const LiftoffRegister* next_result_reg = rets;
4564 if (sig->return_count() > 0) {
4565 DCHECK_EQ(1, sig->return_count());
4566 constexpr Register kReturnReg = eax;
4567 if (kReturnReg != next_result_reg->gp()) {
4568 Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4569 }
4570 ++next_result_reg;
4571 }
4572
4573 // Load potential output value from the buffer on the stack.
4574 if (out_argument_kind != kVoid) {
4575 liftoff::Load(this, *next_result_reg, esp, 0, out_argument_kind);
4576 }
4577
4578 add(esp, Immediate(stack_bytes));
4579 }
4580
CallNativeWasmCode(Address addr)4581 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4582 wasm_call(addr, RelocInfo::WASM_CALL);
4583 }
4584
TailCallNativeWasmCode(Address addr)4585 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4586 jmp(addr, RelocInfo::WASM_CALL);
4587 }
4588
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)4589 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4590 compiler::CallDescriptor* call_descriptor,
4591 Register target) {
4592 // Since we have more cache registers than parameter registers, the
4593 // {LiftoffCompiler} should always be able to place {target} in a register.
4594 DCHECK(target.is_valid());
4595 call(target);
4596 }
4597
TailCallIndirect(Register target)4598 void LiftoffAssembler::TailCallIndirect(Register target) {
4599 // Since we have more cache registers than parameter registers, the
4600 // {LiftoffCompiler} should always be able to place {target} in a register.
4601 DCHECK(target.is_valid());
4602 jmp(target);
4603 }
4604
CallRuntimeStub(WasmCode::RuntimeStubId sid)4605 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4606 // A direct call to a wasm runtime stub defined in this module.
4607 // Just encode the stub index. This will be patched at relocation.
4608 wasm_call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4609 }
4610
AllocateStackSlot(Register addr,uint32_t size)4611 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4612 AllocateStackSpace(size);
4613 mov(addr, esp);
4614 }
4615
DeallocateStackSlot(uint32_t size)4616 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4617 add(esp, Immediate(size));
4618 }
4619
MaybeOSR()4620 void LiftoffAssembler::MaybeOSR() {}
4621
emit_set_if_nan(Register dst,DoubleRegister src,ValueKind kind)4622 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4623 ValueKind kind) {
4624 if (kind == kF32) {
4625 ucomiss(src, src);
4626 } else {
4627 DCHECK_EQ(kind, kF64);
4628 ucomisd(src, src);
4629 }
4630 Label ret;
4631 j(parity_odd, &ret);
4632 mov(Operand(dst, 0), Immediate(1));
4633 bind(&ret);
4634 }
4635
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)4636 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4637 Register tmp_gp,
4638 LiftoffRegister tmp_s128,
4639 ValueKind lane_kind) {
4640 if (lane_kind == kF32) {
4641 movaps(tmp_s128.fp(), src.fp());
4642 cmpunordps(tmp_s128.fp(), tmp_s128.fp());
4643 } else {
4644 DCHECK_EQ(lane_kind, kF64);
4645 movapd(tmp_s128.fp(), src.fp());
4646 cmpunordpd(tmp_s128.fp(), tmp_s128.fp());
4647 }
4648 pmovmskb(tmp_gp, tmp_s128.fp());
4649 or_(Operand(dst, 0), tmp_gp);
4650 }
4651
Construct(int param_slots)4652 void LiftoffStackSlots::Construct(int param_slots) {
4653 DCHECK_LT(0, slots_.size());
4654 SortInPushOrder();
4655 int last_stack_slot = param_slots;
4656 for (auto& slot : slots_) {
4657 const int stack_slot = slot.dst_slot_;
4658 int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4659 DCHECK_LT(0, stack_decrement);
4660 last_stack_slot = stack_slot;
4661 const LiftoffAssembler::VarState& src = slot.src_;
4662 switch (src.loc()) {
4663 case LiftoffAssembler::VarState::kStack:
4664 // The combination of AllocateStackSpace and 2 movdqu is usually smaller
4665 // in code size than doing 4 pushes.
4666 if (src.kind() == kS128) {
4667 asm_->AllocateStackSpace(stack_decrement);
4668 asm_->movdqu(liftoff::kScratchDoubleReg,
4669 liftoff::GetStackSlot(slot.src_offset_));
4670 asm_->movdqu(Operand(esp, 0), liftoff::kScratchDoubleReg);
4671 break;
4672 }
4673 if (src.kind() == kF64) {
4674 asm_->AllocateStackSpace(stack_decrement - kDoubleSize);
4675 DCHECK_EQ(kLowWord, slot.half_);
4676 asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, kHighWord));
4677 stack_decrement = kSystemPointerSize;
4678 }
4679 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4680 asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_));
4681 break;
4682 case LiftoffAssembler::VarState::kRegister:
4683 if (src.kind() == kI64) {
4684 liftoff::push(
4685 asm_, slot.half_ == kLowWord ? src.reg().low() : src.reg().high(),
4686 kI32, stack_decrement - kSystemPointerSize);
4687 } else {
4688 int pushed_bytes = SlotSizeInBytes(slot);
4689 liftoff::push(asm_, src.reg(), src.kind(),
4690 stack_decrement - pushed_bytes);
4691 }
4692 break;
4693 case LiftoffAssembler::VarState::kIntConst:
4694 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4695 // The high word is the sign extension of the low word.
4696 asm_->push(Immediate(slot.half_ == kLowWord ? src.i32_const()
4697 : src.i32_const() >> 31));
4698 break;
4699 }
4700 }
4701 }
4702
4703 #undef RETURN_FALSE_IF_MISSING_CPU_FEATURE
4704
4705 } // namespace wasm
4706 } // namespace internal
4707 } // namespace v8
4708
4709 #endif // V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_H_
4710