• Home
  • Raw
  • Download

Lines Matching refs:xmm0

15 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
20 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
28 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
33 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
41 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
46 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
54 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
59 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
67 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
72 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
80 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
85 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
93 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
98 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
106 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
111 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
120 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
125 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
134 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
139 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
147 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
152 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
160 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
165 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
173 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
178 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
186 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
191 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
199 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
204 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
212 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
217 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
225 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
230 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
238 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
243 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
251 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
256 ; SSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
261 ; SSSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
266 ; SSE41-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
271 ; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
279 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
284 ; SSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
289 ; SSSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
294 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
299 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
308 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
313 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
322 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
323 ; SSE-NEXT: movapd %xmm1, %xmm0
328 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
337 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
338 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
343 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
344 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
349 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
350 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
356 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
362 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
368 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
376 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
377 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
382 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
383 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
388 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
389 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
395 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
396 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
402 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
403 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
409 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
410 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
418 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
419 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
420 ; SSE2-NEXT: movaps %xmm1, %xmm0
425 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
426 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
427 ; SSE3-NEXT: movaps %xmm1, %xmm0
432 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
433 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
434 ; SSSE3-NEXT: movaps %xmm1, %xmm0
440 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
441 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
447 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
448 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
454 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
455 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
463 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
464 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
465 ; SSE2-NEXT: movaps %xmm1, %xmm0
470 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
471 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
472 ; SSE3-NEXT: movaps %xmm1, %xmm0
477 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
478 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
479 ; SSSE3-NEXT: movaps %xmm1, %xmm0
484 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
485 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
490 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
491 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
496 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
497 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
505 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
510 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
518 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
519 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
524 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
525 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
530 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
531 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
537 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
538 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
544 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
545 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
551 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
552 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
560 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
561 ; SSE-NEXT: movdqa %xmm1, %xmm0
566 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
574 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
575 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
580 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
581 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
586 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
587 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
593 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
594 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
600 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
601 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
607 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
608 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
618 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
619 ; SSE2-NEXT: movaps %xmm1, %xmm0
625 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
626 ; SSE3-NEXT: movaps %xmm1, %xmm0
632 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
633 ; SSSE3-NEXT: movaps %xmm1, %xmm0
639 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
645 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
655 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
656 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
662 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
663 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
669 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
670 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
675 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
680 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
690 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
691 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
692 ; SSE2-NEXT: movaps %xmm1, %xmm0
698 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
699 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
700 ; SSE3-NEXT: movaps %xmm1, %xmm0
706 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
707 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
708 ; SSSE3-NEXT: movaps %xmm1, %xmm0
713 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
718 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
728 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
729 ; SSE2-NEXT: movaps %xmm1, %xmm0
735 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
736 ; SSE3-NEXT: movaps %xmm1, %xmm0
742 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
743 ; SSSE3-NEXT: movaps %xmm1, %xmm0
748 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
753 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
763 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
764 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
765 ; SSE2-NEXT: movaps %xmm1, %xmm0
771 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
772 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
773 ; SSE3-NEXT: movaps %xmm1, %xmm0
779 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
780 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
781 ; SSSE3-NEXT: movaps %xmm1, %xmm0
787 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
793 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
803 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
804 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
810 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
811 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
817 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
818 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
823 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
828 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
838 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
839 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
840 ; SSE2-NEXT: movaps %xmm1, %xmm0
846 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
847 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
848 ; SSE3-NEXT: movaps %xmm1, %xmm0
854 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
855 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
856 ; SSSE3-NEXT: movaps %xmm1, %xmm0
862 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
868 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
878 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
879 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
885 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
886 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
892 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
893 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
899 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
905 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
915 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
916 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
922 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
923 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
929 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
930 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
936 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
942 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
952 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
953 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
959 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
960 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
966 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
967 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
973 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
979 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
989 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
990 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
996 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
997 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1003 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
1004 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1010 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1016 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1025 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1026 ; SSE-NEXT: movaps %xmm1, %xmm0
1031 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1043 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1044 ; SSE2-NEXT: movaps %xmm2, %xmm0
1052 ; SSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1053 ; SSE3-NEXT: movaps %xmm2, %xmm0
1061 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1062 ; SSSE3-NEXT: movaps %xmm2, %xmm0
1068 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1074 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1084 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1086 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1088 ; SSE2-NEXT: movaps %xmm1, %xmm0
1093 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1095 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1097 ; SSE3-NEXT: movaps %xmm1, %xmm0
1102 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1104 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1106 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1111 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
1116 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
1126 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1127 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1129 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1130 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1131 ; SSE2-NEXT: movaps %xmm1, %xmm0
1136 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1137 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1139 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1140 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1141 ; SSE3-NEXT: movaps %xmm1, %xmm0
1146 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1147 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1149 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1150 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1151 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1156 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
1161 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
1172 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1173 ; SSE2-NEXT: movaps %xmm1, %xmm0
1179 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1180 ; SSE3-NEXT: movaps %xmm1, %xmm0
1186 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1187 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1193 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1199 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1209 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1210 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1216 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1217 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1223 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1224 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1230 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1231 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1237 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1238 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
1248 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1249 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1255 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1256 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1262 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1263 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1269 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1270 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1276 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1277 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
1286 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,x…
1291 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,
1301 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1302 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1308 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1309 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1315 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1316 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1321 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
1322 ; SSE41-NEXT: pxor %xmm0, %xmm0
1323 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1328 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1330 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1335 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1337 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1346 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
1347 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
1348 ; SSE2-NEXT: movaps %xmm1, %xmm0
1353 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
1354 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
1355 ; SSE3-NEXT: movaps %xmm1, %xmm0
1360 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1365 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1370 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1379 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
1380 ; SSE2-NEXT: movapd %xmm1, %xmm0
1385 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
1386 ; SSE3-NEXT: movapd %xmm1, %xmm0
1391 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1396 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1401 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1410 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1411 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
1412 ; SSE2-NEXT: movaps %xmm1, %xmm0
1417 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1418 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
1419 ; SSE3-NEXT: movaps %xmm1, %xmm0
1424 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1429 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1434 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1443 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1444 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
1449 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1450 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
1455 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1456 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1461 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1462 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1467 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1476 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1481 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1486 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1487 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1492 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1493 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1498 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1507 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1508 ; SSE-NEXT: movdqa %xmm1, %xmm0
1513 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1522 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
1523 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
1528 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
1529 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
1534 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1535 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1540 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1541 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1546 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1555 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1560 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1565 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1570 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1575 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1585 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1591 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1597 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1602 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1607 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1616 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1621 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1630 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1635 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1640 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1646 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1652 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1658 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1667 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1672 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1677 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1683 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1689 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1695 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1704 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1709 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1714 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1720 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
1726 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
1732 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
1741 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1746 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1751 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1757 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
1763 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
1769 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
1778 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1783 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1796 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1797 ; SSE-NEXT: movapd %xmm1, %xmm0
1803 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1816 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
1821 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
1832 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
1834 ; SSE2-NEXT: movaps %xmm1, %xmm0
1839 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
1841 ; SSE3-NEXT: movaps %xmm1, %xmm0
1846 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
1848 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1853 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
1858 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
1872 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1873 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1874 ; SSE2-NEXT: movaps %xmm1, %xmm0
1879 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1880 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1881 ; SSE3-NEXT: movaps %xmm1, %xmm0
1886 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1887 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1888 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1893 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
1898 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
1912 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1913 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1914 ; SSE2-NEXT: movaps %xmm1, %xmm0
1919 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1920 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1921 ; SSE3-NEXT: movaps %xmm1, %xmm0
1926 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
1927 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
1928 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1933 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
1938 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
1943 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
1957 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1958 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1963 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1968 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1973 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1978 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1988 ; SSE-NEXT: movd %edi, %xmm0
1993 ; AVX-NEXT: vmovd %edi, %xmm0
2003 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2008 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2020 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2021 ; SSE2-NEXT: movaps %xmm1, %xmm0
2027 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2028 ; SSE3-NEXT: movaps %xmm1, %xmm0
2034 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2035 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2041 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2047 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2057 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2062 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2074 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2080 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2086 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2092 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2098 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2104 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
2115 ; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2120 ; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2125 ; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2131 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2137 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2143 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
2155 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2161 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2173 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2179 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2190 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2191 ; SSE-NEXT: movapd %xmm1, %xmm0
2196 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2207 ; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2212 ; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
2223 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2224 ; SSE-NEXT: movapd %xmm1, %xmm0
2229 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2240 ; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2245 ; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2256 ; SSE-NEXT: movaps (%rdi), %xmm0
2257 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2262 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
2272 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2273 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2278 ; AVX-NEXT: vbroadcastss (%rdi), %xmm0
2293 ; SSE-NEXT: psllq $32, %xmm0
2298 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
2307 ; SSE-NEXT: psrlq $32, %xmm0
2312 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0