• Home
  • Raw
  • Download

Lines Matching refs:dst

30 void SharedTurboAssembler::Move(Register dst, uint32_t src) {  in Move()  argument
33 mov(dst, Immediate(src)); in Move()
35 movl(dst, Immediate(src)); in Move()
41 void SharedTurboAssembler::Move(Register dst, Register src) { in Move() argument
43 if (dst != src) { in Move()
45 mov(dst, src); in Move()
47 movq(dst, src); in Move()
54 void SharedTurboAssembler::Add(Register dst, Immediate src) { in Add() argument
57 add(dst, src); in Add()
59 addq(dst, src); in Add()
65 void SharedTurboAssembler::And(Register dst, Immediate src) { in And() argument
68 and_(dst, src); in And()
71 andl(dst, src); in And()
73 andq(dst, src); in And()
80 void SharedTurboAssembler::Movhps(XMMRegister dst, XMMRegister src1, in Movhps() argument
84 vmovhps(dst, src1, src2); in Movhps()
86 if (dst != src1) { in Movhps()
87 movaps(dst, src1); in Movhps()
89 movhps(dst, src2); in Movhps()
93 void SharedTurboAssembler::Movlps(XMMRegister dst, XMMRegister src1, in Movlps() argument
97 vmovlps(dst, src1, src2); in Movlps()
99 if (dst != src1) { in Movlps()
100 movaps(dst, src1); in Movlps()
102 movlps(dst, src2); in Movlps()
106 void SharedTurboAssembler::Pblendvb(XMMRegister dst, XMMRegister src1, in Pblendvb() argument
110 vpblendvb(dst, src1, src2, mask); in Pblendvb()
114 DCHECK_EQ(dst, src1); in Pblendvb()
115 pblendvb(dst, src2); in Pblendvb()
119 void SharedTurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, in Shufps() argument
123 vshufps(dst, src1, src2, imm8); in Shufps()
125 if (dst != src1) { in Shufps()
126 movaps(dst, src1); in Shufps()
128 shufps(dst, src2, imm8); in Shufps()
132 void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src, in F64x2ExtractLane() argument
136 if (dst != src) { in F64x2ExtractLane()
137 Movaps(dst, src); in F64x2ExtractLane()
144 vmovhlps(dst, src, src); in F64x2ExtractLane()
146 movhlps(dst, src); in F64x2ExtractLane()
151 void SharedTurboAssembler::F64x2ReplaceLane(XMMRegister dst, XMMRegister src, in F64x2ReplaceLane() argument
157 vmovsd(dst, src, rep); in F64x2ReplaceLane()
159 vmovlhps(dst, src, rep); in F64x2ReplaceLane()
163 if (dst != src) { in F64x2ReplaceLane()
164 DCHECK_NE(dst, rep); // Ensure rep is not overwritten. in F64x2ReplaceLane()
165 movaps(dst, src); in F64x2ReplaceLane()
168 movsd(dst, rep); in F64x2ReplaceLane()
170 movlhps(dst, rep); in F64x2ReplaceLane()
175 void SharedTurboAssembler::F32x4Min(XMMRegister dst, XMMRegister lhs, in F32x4Min() argument
183 vminps(dst, rhs, lhs); in F32x4Min()
184 } else if (dst == lhs || dst == rhs) { in F32x4Min()
185 XMMRegister src = dst == lhs ? rhs : lhs; in F32x4Min()
187 minps(scratch, dst); in F32x4Min()
188 minps(dst, src); in F32x4Min()
192 movaps(dst, rhs); in F32x4Min()
193 minps(dst, lhs); in F32x4Min()
196 Orps(scratch, dst); in F32x4Min()
198 Cmpunordps(dst, dst, scratch); in F32x4Min()
199 Orps(scratch, dst); in F32x4Min()
200 Psrld(dst, dst, byte{10}); in F32x4Min()
201 Andnps(dst, dst, scratch); in F32x4Min()
204 void SharedTurboAssembler::F32x4Max(XMMRegister dst, XMMRegister lhs, in F32x4Max() argument
212 vmaxps(dst, rhs, lhs); in F32x4Max()
213 } else if (dst == lhs || dst == rhs) { in F32x4Max()
214 XMMRegister src = dst == lhs ? rhs : lhs; in F32x4Max()
216 maxps(scratch, dst); in F32x4Max()
217 maxps(dst, src); in F32x4Max()
221 movaps(dst, rhs); in F32x4Max()
222 maxps(dst, lhs); in F32x4Max()
225 Xorps(dst, scratch); in F32x4Max()
227 Orps(scratch, dst); in F32x4Max()
229 Subps(scratch, scratch, dst); in F32x4Max()
231 Cmpunordps(dst, dst, scratch); in F32x4Max()
232 Psrld(dst, dst, byte{10}); in F32x4Max()
233 Andnps(dst, dst, scratch); in F32x4Max()
236 void SharedTurboAssembler::F64x2Min(XMMRegister dst, XMMRegister lhs, in F64x2Min() argument
244 vminpd(dst, rhs, lhs); in F64x2Min()
246 vorpd(scratch, scratch, dst); in F64x2Min()
248 vcmpunordpd(dst, dst, scratch); in F64x2Min()
249 vorpd(scratch, scratch, dst); in F64x2Min()
250 vpsrlq(dst, dst, byte{13}); in F64x2Min()
251 vandnpd(dst, dst, scratch); in F64x2Min()
255 if (dst == lhs || dst == rhs) { in F64x2Min()
256 XMMRegister src = dst == lhs ? rhs : lhs; in F64x2Min()
258 minpd(scratch, dst); in F64x2Min()
259 minpd(dst, src); in F64x2Min()
262 movaps(dst, rhs); in F64x2Min()
264 minpd(dst, lhs); in F64x2Min()
266 orpd(scratch, dst); in F64x2Min()
267 cmpunordpd(dst, scratch); in F64x2Min()
268 orpd(scratch, dst); in F64x2Min()
269 psrlq(dst, byte{13}); in F64x2Min()
270 andnpd(dst, scratch); in F64x2Min()
274 void SharedTurboAssembler::F64x2Max(XMMRegister dst, XMMRegister lhs, in F64x2Max() argument
282 vmaxpd(dst, rhs, lhs); in F64x2Max()
284 vxorpd(dst, dst, scratch); in F64x2Max()
286 vorpd(scratch, scratch, dst); in F64x2Max()
288 vsubpd(scratch, scratch, dst); in F64x2Max()
290 vcmpunordpd(dst, dst, scratch); in F64x2Max()
291 vpsrlq(dst, dst, byte{13}); in F64x2Max()
292 vandnpd(dst, dst, scratch); in F64x2Max()
294 if (dst == lhs || dst == rhs) { in F64x2Max()
295 XMMRegister src = dst == lhs ? rhs : lhs; in F64x2Max()
297 maxpd(scratch, dst); in F64x2Max()
298 maxpd(dst, src); in F64x2Max()
301 movaps(dst, rhs); in F64x2Max()
303 maxpd(dst, lhs); in F64x2Max()
305 xorpd(dst, scratch); in F64x2Max()
306 orpd(scratch, dst); in F64x2Max()
307 subpd(scratch, dst); in F64x2Max()
308 cmpunordpd(dst, scratch); in F64x2Max()
309 psrlq(dst, byte{13}); in F64x2Max()
310 andnpd(dst, scratch); in F64x2Max()
314 void SharedTurboAssembler::F32x4Splat(XMMRegister dst, DoubleRegister src) { in F32x4Splat() argument
318 vbroadcastss(dst, src); in F32x4Splat()
321 vshufps(dst, src, src, 0); in F32x4Splat()
323 if (dst == src) { in F32x4Splat()
325 shufps(dst, src, 0); in F32x4Splat()
327 pshufd(dst, src, 0); in F32x4Splat()
332 void SharedTurboAssembler::F32x4ExtractLane(FloatRegister dst, XMMRegister src, in F32x4ExtractLane() argument
339 if (dst != src) { in F32x4ExtractLane()
340 Movaps(dst, src); in F32x4ExtractLane()
343 Movshdup(dst, src); in F32x4ExtractLane()
344 } else if (lane == 2 && dst == src) { in F32x4ExtractLane()
346 Movhlps(dst, src); in F32x4ExtractLane()
347 } else if (dst == src) { in F32x4ExtractLane()
348 Shufps(dst, src, src, lane); in F32x4ExtractLane()
350 Pshufd(dst, src, lane); in F32x4ExtractLane()
354 void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src, in S128Store32Lane() argument
358 Movss(dst, src); in S128Store32Lane()
361 Extractps(dst, src, laneidx); in S128Store32Lane()
366 void SharedTurboAssembler::I8x16SplatPreAvx2(XMMRegister dst, Op src, in I8x16SplatPreAvx2() argument
371 Movd(dst, src); in I8x16SplatPreAvx2()
373 Pshufb(dst, scratch); in I8x16SplatPreAvx2()
376 void SharedTurboAssembler::I8x16Splat(XMMRegister dst, Register src, in I8x16Splat() argument
382 vpbroadcastb(dst, scratch); in I8x16Splat()
384 I8x16SplatPreAvx2(dst, src, scratch); in I8x16Splat()
388 void SharedTurboAssembler::I8x16Splat(XMMRegister dst, Operand src, in I8x16Splat() argument
394 vpbroadcastb(dst, src); in I8x16Splat()
396 I8x16SplatPreAvx2(dst, src, scratch); in I8x16Splat()
400 void SharedTurboAssembler::I8x16Shl(XMMRegister dst, XMMRegister src1, in I8x16Shl() argument
404 DCHECK_NE(dst, tmp2); in I8x16Shl()
406 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) { in I8x16Shl()
407 movaps(dst, src1); in I8x16Shl()
408 src1 = dst; in I8x16Shl()
412 Psllw(dst, src1, byte{shift}); in I8x16Shl()
419 Pand(dst, tmp2); in I8x16Shl()
422 void SharedTurboAssembler::I8x16Shl(XMMRegister dst, XMMRegister src1, in I8x16Shl() argument
426 DCHECK(!AreAliased(dst, tmp2, tmp3)); in I8x16Shl()
438 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) { in I8x16Shl()
439 movaps(dst, src1); in I8x16Shl()
440 src1 = dst; in I8x16Shl()
443 Pand(dst, src1, tmp2); in I8x16Shl()
446 Psllw(dst, dst, tmp3); in I8x16Shl()
449 void SharedTurboAssembler::I8x16ShrS(XMMRegister dst, XMMRegister src1, in I8x16ShrS() argument
453 DCHECK_NE(dst, tmp); in I8x16ShrS()
457 Punpcklbw(dst, src1); in I8x16ShrS()
459 Psraw(dst, shift); in I8x16ShrS()
460 Packsswb(dst, tmp); in I8x16ShrS()
463 void SharedTurboAssembler::I8x16ShrS(XMMRegister dst, XMMRegister src1, in I8x16ShrS() argument
467 DCHECK(!AreAliased(dst, tmp2, tmp3)); in I8x16ShrS()
472 Punpcklbw(dst, src1); in I8x16ShrS()
480 Psraw(dst, tmp3); in I8x16ShrS()
481 Packsswb(dst, tmp2); in I8x16ShrS()
484 void SharedTurboAssembler::I8x16ShrU(XMMRegister dst, XMMRegister src1, in I8x16ShrU() argument
488 DCHECK_NE(dst, tmp2); in I8x16ShrU()
489 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) { in I8x16ShrU()
490 movaps(dst, src1); in I8x16ShrU()
491 src1 = dst; in I8x16ShrU()
496 Psrlw(dst, src1, shift); in I8x16ShrU()
503 Pand(dst, tmp2); in I8x16ShrU()
506 void SharedTurboAssembler::I8x16ShrU(XMMRegister dst, XMMRegister src1, in I8x16ShrU() argument
510 DCHECK(!AreAliased(dst, tmp2, tmp3)); in I8x16ShrU()
515 Punpcklbw(dst, src1); in I8x16ShrU()
523 Psrlw(dst, tmp3); in I8x16ShrU()
524 Packuswb(dst, tmp2); in I8x16ShrU()
528 void SharedTurboAssembler::I16x8SplatPreAvx2(XMMRegister dst, Op src) { in I16x8SplatPreAvx2() argument
530 Movd(dst, src); in I16x8SplatPreAvx2()
531 Pshuflw(dst, dst, uint8_t{0x0}); in I16x8SplatPreAvx2()
532 Punpcklqdq(dst, dst); in I16x8SplatPreAvx2()
535 void SharedTurboAssembler::I16x8Splat(XMMRegister dst, Register src) { in I16x8Splat() argument
539 Movd(dst, src); in I16x8Splat()
540 vpbroadcastw(dst, dst); in I16x8Splat()
542 I16x8SplatPreAvx2(dst, src); in I16x8Splat()
546 void SharedTurboAssembler::I16x8Splat(XMMRegister dst, Operand src) { in I16x8Splat() argument
551 vpbroadcastw(dst, src); in I16x8Splat()
553 I16x8SplatPreAvx2(dst, src); in I16x8Splat()
557 void SharedTurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, in I16x8ExtMulLow() argument
562 is_signed ? Pmovsxbw(dst, src2) : Pmovzxbw(dst, src2); in I16x8ExtMulLow()
563 Pmullw(dst, scratch); in I16x8ExtMulLow()
566 void SharedTurboAssembler::I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1, in I16x8ExtMulHighS() argument
574 vpunpckhbw(dst, src2, src2); in I16x8ExtMulHighS()
575 vpsraw(dst, dst, 8); in I16x8ExtMulHighS()
576 vpmullw(dst, dst, scratch); in I16x8ExtMulHighS()
578 if (dst != src1) { in I16x8ExtMulHighS()
579 movaps(dst, src1); in I16x8ExtMulHighS()
582 punpckhbw(dst, dst); in I16x8ExtMulHighS()
583 psraw(dst, 8); in I16x8ExtMulHighS()
586 pmullw(dst, scratch); in I16x8ExtMulHighS()
590 void SharedTurboAssembler::I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1, in I16x8ExtMulHighU() argument
600 vpunpckhbw(dst, src1, scratch); in I16x8ExtMulHighU()
601 vpmullw(dst, dst, dst); in I16x8ExtMulHighU()
603 if (dst == src2) { in I16x8ExtMulHighU()
608 vpunpckhbw(dst, src1, scratch); in I16x8ExtMulHighU()
610 vpmullw(dst, dst, scratch); in I16x8ExtMulHighU()
615 if (dst != src1) { in I16x8ExtMulHighU()
616 movaps(dst, src1); in I16x8ExtMulHighU()
618 punpckhbw(dst, scratch); in I16x8ExtMulHighU()
619 pmullw(dst, scratch); in I16x8ExtMulHighU()
624 if (dst == src2) { in I16x8ExtMulHighU()
627 } else if (dst != src1) { in I16x8ExtMulHighU()
629 movaps(dst, src1); in I16x8ExtMulHighU()
632 punpckhbw(dst, scratch); in I16x8ExtMulHighU()
635 pmullw(dst, scratch); in I16x8ExtMulHighU()
640 void SharedTurboAssembler::I16x8SConvertI8x16High(XMMRegister dst, in I16x8SConvertI8x16High() argument
647 vpunpckhbw(dst, src, src); in I16x8SConvertI8x16High()
648 vpsraw(dst, dst, 8); in I16x8SConvertI8x16High()
651 if (dst == src) { in I16x8SConvertI8x16High()
653 movhlps(dst, src); in I16x8SConvertI8x16High()
654 pmovsxbw(dst, dst); in I16x8SConvertI8x16High()
657 pshufd(dst, src, 0xEE); in I16x8SConvertI8x16High()
658 pmovsxbw(dst, dst); in I16x8SConvertI8x16High()
663 void SharedTurboAssembler::I16x8UConvertI8x16High(XMMRegister dst, in I16x8UConvertI8x16High() argument
672 XMMRegister tmp = dst == src ? scratch : dst; in I16x8UConvertI8x16High()
674 vpunpckhbw(dst, src, tmp); in I16x8UConvertI8x16High()
677 if (dst == src) { in I16x8UConvertI8x16High()
680 punpckhbw(dst, scratch); in I16x8UConvertI8x16High()
683 pshufd(dst, src, 0xEE); in I16x8UConvertI8x16High()
684 pmovzxbw(dst, dst); in I16x8UConvertI8x16High()
689 void SharedTurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, in I16x8Q15MulRSatS() argument
697 if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) { in I16x8Q15MulRSatS()
698 movaps(dst, src1); in I16x8Q15MulRSatS()
699 src1 = dst; in I16x8Q15MulRSatS()
702 Pmulhrsw(dst, src1, src2); in I16x8Q15MulRSatS()
703 Pcmpeqw(scratch, dst); in I16x8Q15MulRSatS()
704 Pxor(dst, scratch); in I16x8Q15MulRSatS()
707 void SharedTurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst, in I32x4ExtAddPairwiseI16x8U() argument
717 vpblendw(dst, src, tmp, 0xAA); in I32x4ExtAddPairwiseI16x8U()
719 vpaddd(dst, tmp, dst); in I32x4ExtAddPairwiseI16x8U()
726 if (dst != src) { in I32x4ExtAddPairwiseI16x8U()
727 movaps(dst, src); in I32x4ExtAddPairwiseI16x8U()
729 pblendw(dst, tmp, 0xAA); in I32x4ExtAddPairwiseI16x8U()
730 paddd(dst, tmp); in I32x4ExtAddPairwiseI16x8U()
739 if (dst != src) { in I32x4ExtAddPairwiseI16x8U()
740 movaps(dst, src); in I32x4ExtAddPairwiseI16x8U()
742 psrld(dst, byte{16}); in I32x4ExtAddPairwiseI16x8U()
744 paddd(dst, tmp); in I32x4ExtAddPairwiseI16x8U()
751 void SharedTurboAssembler::I32x4ExtMul(XMMRegister dst, XMMRegister src1, in I32x4ExtMul() argument
758 is_signed ? vpmulhw(dst, src1, src2) : vpmulhuw(dst, src1, src2); in I32x4ExtMul()
759 low ? vpunpcklwd(dst, scratch, dst) : vpunpckhwd(dst, scratch, dst); in I32x4ExtMul()
761 DCHECK_EQ(dst, src1); in I32x4ExtMul()
763 pmullw(dst, src2); in I32x4ExtMul()
765 low ? punpcklwd(dst, scratch) : punpckhwd(dst, scratch); in I32x4ExtMul()
769 void SharedTurboAssembler::I32x4SConvertI16x8High(XMMRegister dst, in I32x4SConvertI16x8High() argument
776 vpunpckhwd(dst, src, src); in I32x4SConvertI16x8High()
777 vpsrad(dst, dst, 16); in I32x4SConvertI16x8High()
780 if (dst == src) { in I32x4SConvertI16x8High()
782 movhlps(dst, src); in I32x4SConvertI16x8High()
783 pmovsxwd(dst, dst); in I32x4SConvertI16x8High()
786 pshufd(dst, src, 0xEE); in I32x4SConvertI16x8High()
787 pmovsxwd(dst, dst); in I32x4SConvertI16x8High()
792 void SharedTurboAssembler::I32x4UConvertI16x8High(XMMRegister dst, in I32x4UConvertI16x8High() argument
801 XMMRegister tmp = dst == src ? scratch : dst; in I32x4UConvertI16x8High()
803 vpunpckhwd(dst, src, tmp); in I32x4UConvertI16x8High()
805 if (dst == src) { in I32x4UConvertI16x8High()
808 punpckhwd(dst, scratch); in I32x4UConvertI16x8High()
812 pshufd(dst, src, 0xEE); in I32x4UConvertI16x8High()
813 pmovzxwd(dst, dst); in I32x4UConvertI16x8High()
818 void SharedTurboAssembler::I64x2Neg(XMMRegister dst, XMMRegister src, in I64x2Neg() argument
824 vpsubq(dst, scratch, src); in I64x2Neg()
826 if (dst == src) { in I64x2Neg()
830 pxor(dst, dst); in I64x2Neg()
831 psubq(dst, src); in I64x2Neg()
835 void SharedTurboAssembler::I64x2Abs(XMMRegister dst, XMMRegister src, in I64x2Abs() argument
840 XMMRegister tmp = dst == src ? scratch : dst; in I64x2Abs()
843 vblendvpd(dst, src, tmp, src); in I64x2Abs()
847 if (dst != src) { in I64x2Abs()
848 movaps(dst, src); in I64x2Abs()
851 xorps(dst, scratch); in I64x2Abs()
852 psubq(dst, scratch); in I64x2Abs()
856 void SharedTurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0, in I64x2GtS() argument
861 vpcmpgtq(dst, src0, src1); in I64x2GtS()
864 if (dst == src0) { in I64x2GtS()
865 pcmpgtq(dst, src1); in I64x2GtS()
866 } else if (dst == src1) { in I64x2GtS()
869 movaps(dst, scratch); in I64x2GtS()
871 movaps(dst, src0); in I64x2GtS()
872 pcmpgtq(dst, src1); in I64x2GtS()
876 DCHECK_NE(dst, src0); in I64x2GtS()
877 DCHECK_NE(dst, src1); in I64x2GtS()
878 movaps(dst, src1); in I64x2GtS()
880 psubq(dst, src0); in I64x2GtS()
882 andps(dst, scratch); in I64x2GtS()
885 orps(dst, scratch); in I64x2GtS()
886 movshdup(dst, dst); in I64x2GtS()
890 void SharedTurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0, in I64x2GeS() argument
895 vpcmpgtq(dst, src1, src0); in I64x2GeS()
897 vpxor(dst, dst, scratch); in I64x2GeS()
900 DCHECK_NE(dst, src0); in I64x2GeS()
901 if (dst != src1) { in I64x2GeS()
902 movaps(dst, src1); in I64x2GeS()
904 pcmpgtq(dst, src0); in I64x2GeS()
906 xorps(dst, scratch); in I64x2GeS()
909 DCHECK_NE(dst, src0); in I64x2GeS()
910 DCHECK_NE(dst, src1); in I64x2GeS()
911 movaps(dst, src0); in I64x2GeS()
913 psubq(dst, src1); in I64x2GeS()
915 andps(dst, scratch); in I64x2GeS()
918 orps(dst, scratch); in I64x2GeS()
919 movshdup(dst, dst); in I64x2GeS()
921 xorps(dst, scratch); in I64x2GeS()
925 void SharedTurboAssembler::I64x2ShrS(XMMRegister dst, XMMRegister src, in I64x2ShrS() argument
929 DCHECK_NE(xmm_tmp, dst); in I64x2ShrS()
944 if (!CpuFeatures::IsSupported(AVX) && (dst != src)) { in I64x2ShrS()
945 movaps(dst, src); in I64x2ShrS()
946 src = dst; in I64x2ShrS()
950 Pxor(dst, src, xmm_tmp); in I64x2ShrS()
952 Psrlq(dst, shift); in I64x2ShrS()
955 Psubq(dst, xmm_tmp); in I64x2ShrS()
958 void SharedTurboAssembler::I64x2ShrS(XMMRegister dst, XMMRegister src, in I64x2ShrS() argument
963 DCHECK_NE(xmm_tmp, dst); in I64x2ShrS()
965 DCHECK_NE(xmm_shift, dst); in I64x2ShrS()
978 if (!CpuFeatures::IsSupported(AVX) && (dst != src)) { in I64x2ShrS()
979 movaps(dst, src); in I64x2ShrS()
980 src = dst; in I64x2ShrS()
982 Pxor(dst, src, xmm_tmp); in I64x2ShrS()
983 Psrlq(dst, xmm_shift); in I64x2ShrS()
985 Psubq(dst, xmm_tmp); in I64x2ShrS()
988 void SharedTurboAssembler::I64x2Mul(XMMRegister dst, XMMRegister lhs, in I64x2Mul() argument
992 DCHECK(!AreAliased(dst, tmp1, tmp2)); in I64x2Mul()
1008 vpmuludq(dst, lhs, rhs); in I64x2Mul()
1010 vpaddq(dst, dst, tmp2); in I64x2Mul()
1021 if (dst == rhs) { in I64x2Mul()
1023 pmuludq(dst, lhs); in I64x2Mul()
1025 if (dst != lhs) { in I64x2Mul()
1026 movaps(dst, lhs); in I64x2Mul()
1028 pmuludq(dst, rhs); in I64x2Mul()
1030 paddq(dst, tmp2); in I64x2Mul()
1038 void SharedTurboAssembler::I64x2ExtMul(XMMRegister dst, XMMRegister src1, in I64x2ExtMul() argument
1046 vpunpckldq(dst, src2, src2); in I64x2ExtMul()
1049 vpunpckhdq(dst, src2, src2); in I64x2ExtMul()
1052 vpmuldq(dst, scratch, dst); in I64x2ExtMul()
1054 vpmuludq(dst, scratch, dst); in I64x2ExtMul()
1059 pshufd(dst, src2, mask); in I64x2ExtMul()
1062 pmuldq(dst, scratch); in I64x2ExtMul()
1064 pmuludq(dst, scratch); in I64x2ExtMul()
1069 void SharedTurboAssembler::I64x2SConvertI32x4High(XMMRegister dst, in I64x2SConvertI32x4High() argument
1074 vpunpckhqdq(dst, src, src); in I64x2SConvertI32x4High()
1075 vpmovsxdq(dst, dst); in I64x2SConvertI32x4High()
1078 if (dst == src) { in I64x2SConvertI32x4High()
1079 movhlps(dst, src); in I64x2SConvertI32x4High()
1081 pshufd(dst, src, 0xEE); in I64x2SConvertI32x4High()
1083 pmovsxdq(dst, dst); in I64x2SConvertI32x4High()
1087 void SharedTurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, in I64x2UConvertI32x4High() argument
1094 vpunpckhdq(dst, src, scratch); in I64x2UConvertI32x4High()
1096 if (dst == src) { in I64x2UConvertI32x4High()
1099 punpckhdq(dst, scratch); in I64x2UConvertI32x4High()
1103 pshufd(dst, src, 0xEE); in I64x2UConvertI32x4High()
1104 pmovzxdq(dst, dst); in I64x2UConvertI32x4High()
1109 void SharedTurboAssembler::S128Not(XMMRegister dst, XMMRegister src, in S128Not() argument
1112 if (dst == src) { in S128Not()
1114 Pxor(dst, scratch); in S128Not()
1116 Pcmpeqd(dst, dst); in S128Not()
1117 Pxor(dst, src); in S128Not()
1121 void SharedTurboAssembler::S128Select(XMMRegister dst, XMMRegister mask, in S128Select() argument
1130 vpand(dst, src1, mask); in S128Select()
1131 vpor(dst, dst, scratch); in S128Select()
1133 DCHECK_EQ(dst, mask); in S128Select()
1137 andps(dst, src1); in S128Select()
1138 orps(dst, scratch); in S128Select()
1142 void SharedTurboAssembler::S128Load8Splat(XMMRegister dst, Operand src, in S128Load8Splat() argument
1150 vpbroadcastb(dst, src); in S128Load8Splat()
1154 vpinsrb(dst, scratch, src, uint8_t{0}); in S128Load8Splat()
1156 vpshufb(dst, dst, scratch); in S128Load8Splat()
1159 pinsrb(dst, src, uint8_t{0}); in S128Load8Splat()
1161 pshufb(dst, scratch); in S128Load8Splat()
1165 void SharedTurboAssembler::S128Load16Splat(XMMRegister dst, Operand src, in S128Load16Splat() argument
1173 vpbroadcastw(dst, src); in S128Load16Splat()
1177 vpinsrw(dst, scratch, src, uint8_t{0}); in S128Load16Splat()
1178 vpshuflw(dst, dst, uint8_t{0}); in S128Load16Splat()
1179 vpunpcklqdq(dst, dst, dst); in S128Load16Splat()
1181 pinsrw(dst, src, uint8_t{0}); in S128Load16Splat()
1182 pshuflw(dst, dst, uint8_t{0}); in S128Load16Splat()
1183 movlhps(dst, dst); in S128Load16Splat()
1187 void SharedTurboAssembler::S128Load32Splat(XMMRegister dst, Operand src) { in S128Load32Splat() argument
1194 vbroadcastss(dst, src); in S128Load32Splat()
1196 movss(dst, src); in S128Load32Splat()
1197 shufps(dst, dst, byte{0}); in S128Load32Splat()
1201 void SharedTurboAssembler::S128Store64Lane(Operand dst, XMMRegister src, in S128Store64Lane() argument
1205 Movlps(dst, src); in S128Store64Lane()
1208 Movhps(dst, src); in S128Store64Lane()
1217 if (dst == src1) { \
1218 vfmadd231##ps_or_pd(dst, src2, src3); \
1219 } else if (dst == src2) { \
1220 vfmadd132##ps_or_pd(dst, src1, src3); \
1221 } else if (dst == src3) { \
1222 vfmadd213##ps_or_pd(dst, src2, src1); \
1225 vmovups(dst, src1); \
1226 vfmadd231##ps_or_pd(dst, src2, src3); \
1231 vadd##ps_or_pd(dst, src1, tmp); \
1233 if (dst == src1) { \
1236 add##ps_or_pd(dst, tmp); \
1237 } else if (dst == src2) { \
1241 } else if (dst == src3) { \
1246 movaps(dst, src2); \
1247 mul##ps_or_pd(dst, src3); \
1248 add##ps_or_pd(dst, src1); \
1257 if (dst == src1) { \
1258 vfnmadd231##ps_or_pd(dst, src2, src3); \
1259 } else if (dst == src2) { \
1260 vfnmadd132##ps_or_pd(dst, src1, src3); \
1261 } else if (dst == src3) { \
1262 vfnmadd213##ps_or_pd(dst, src2, src1); \
1265 vmovups(dst, src1); \
1266 vfnmadd231##ps_or_pd(dst, src2, src3); \
1271 vsub##ps_or_pd(dst, src1, tmp); \
1275 if (dst != src1) { \
1276 movaps(dst, src1); \
1278 sub##ps_or_pd(dst, tmp); \
1281 void SharedTurboAssembler::F32x4Qfma(XMMRegister dst, XMMRegister src1, in F32x4Qfma() argument
1287 void SharedTurboAssembler::F32x4Qfms(XMMRegister dst, XMMRegister src1, in F32x4Qfms() argument
1293 void SharedTurboAssembler::F64x2Qfma(XMMRegister dst, XMMRegister src1, in F64x2Qfma() argument
1299 void SharedTurboAssembler::F64x2Qfms(XMMRegister dst, XMMRegister src1, in F64x2Qfms() argument