Lines Matching refs:SLOW
2 … %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE-SLOW
4 …le=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1-SLOW
6 …-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512,AVX512-SLOW
87 ; SSE-SLOW-LABEL: test4_undef:
88 ; SSE-SLOW: # %bb.0:
89 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
90 ; SSE-SLOW-NEXT: addss %xmm1, %xmm0
91 ; SSE-SLOW-NEXT: retq
98 ; AVX-SLOW-LABEL: test4_undef:
99 ; AVX-SLOW: # %bb.0:
100 ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
101 ; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0
102 ; AVX-SLOW-NEXT: retq
116 ; SSE-SLOW-LABEL: test5_undef:
117 ; SSE-SLOW: # %bb.0:
118 ; SSE-SLOW-NEXT: movapd %xmm0, %xmm1
119 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
120 ; SSE-SLOW-NEXT: addsd %xmm0, %xmm1
121 ; SSE-SLOW-NEXT: movapd %xmm1, %xmm0
122 ; SSE-SLOW-NEXT: retq
129 ; AVX-SLOW-LABEL: test5_undef:
130 ; AVX-SLOW: # %bb.0:
131 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
132 ; AVX-SLOW-NEXT: vaddsd %xmm1, %xmm0, %xmm0
133 ; AVX-SLOW-NEXT: retq
189 ; SSE-SLOW-LABEL: test8_undef:
190 ; SSE-SLOW: # %bb.0:
191 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
192 ; SSE-SLOW-NEXT: addss %xmm0, %xmm1
193 ; SSE-SLOW-NEXT: movaps %xmm0, %xmm2
194 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
195 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
196 ; SSE-SLOW-NEXT: addss %xmm2, %xmm0
197 ; SSE-SLOW-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
198 ; SSE-SLOW-NEXT: movaps %xmm1, %xmm0
199 ; SSE-SLOW-NEXT: retq
207 ; AVX-SLOW-LABEL: test8_undef:
208 ; AVX-SLOW: # %bb.0:
209 ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
210 ; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm1
211 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
212 ; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
213 ; AVX-SLOW-NEXT: vaddss %xmm0, %xmm2, %xmm0
214 ; AVX-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
215 ; AVX-SLOW-NEXT: retq
276 ; SSE-SLOW-LABEL: test11_undef:
277 ; SSE-SLOW: # %bb.0:
278 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
279 ; SSE-SLOW-NEXT: addss %xmm1, %xmm0
280 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
281 ; SSE-SLOW-NEXT: addss %xmm3, %xmm1
282 ; SSE-SLOW-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
283 ; SSE-SLOW-NEXT: retq
364 ; AVX1-SLOW-LABEL: test13_v16f32_undef:
365 ; AVX1-SLOW: # %bb.0:
366 ; AVX1-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
367 ; AVX1-SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0
368 ; AVX1-SLOW-NEXT: retq
376 ; AVX512-SLOW-LABEL: test13_v16f32_undef:
377 ; AVX512-SLOW: # %bb.0:
378 ; AVX512-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
379 ; AVX512-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm1
380 ; AVX512-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
381 ; AVX512-SLOW-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3]
382 ; AVX512-SLOW-NEXT: vaddss %xmm3, %xmm2, %xmm2
383 ; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
384 ; AVX512-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
385 ; AVX512-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
386 ; AVX512-SLOW-NEXT: vaddss %xmm2, %xmm0, %xmm2
387 ; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
388 ; AVX512-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
389 ; AVX512-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
390 ; AVX512-SLOW-NEXT: vaddss %xmm0, %xmm2, %xmm0
391 ; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
392 ; AVX512-SLOW-NEXT: retq
412 ; SSE-SLOW-LABEL: add_pd_003:
413 ; SSE-SLOW: # %bb.0:
414 ; SSE-SLOW-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0]
415 ; SSE-SLOW-NEXT: addpd %xmm1, %xmm0
416 ; SSE-SLOW-NEXT: retq
423 ; AVX-SLOW-LABEL: add_pd_003:
424 ; AVX-SLOW: # %bb.0:
425 ; AVX-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
426 ; AVX-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0
427 ; AVX-SLOW-NEXT: retq
441 ; SSE-SLOW-LABEL: add_pd_003_2:
442 ; SSE-SLOW: # %bb.0:
443 ; SSE-SLOW-NEXT: movapd %xmm0, %xmm1
444 ; SSE-SLOW-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
445 ; SSE-SLOW-NEXT: addpd %xmm0, %xmm1
446 ; SSE-SLOW-NEXT: movapd %xmm1, %xmm0
447 ; SSE-SLOW-NEXT: retq
454 ; AVX-SLOW-LABEL: add_pd_003_2:
455 ; AVX-SLOW: # %bb.0:
456 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
457 ; AVX-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0
458 ; AVX-SLOW-NEXT: retq
470 ; SSE-SLOW-LABEL: add_pd_010:
471 ; SSE-SLOW: # %bb.0:
472 ; SSE-SLOW-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0]
473 ; SSE-SLOW-NEXT: addpd %xmm0, %xmm1
474 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
475 ; SSE-SLOW-NEXT: movapd %xmm1, %xmm0
476 ; SSE-SLOW-NEXT: retq
483 ; AVX-SLOW-LABEL: add_pd_010:
484 ; AVX-SLOW: # %bb.0:
485 ; AVX-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
486 ; AVX-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0
487 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
488 ; AVX-SLOW-NEXT: retq
552 ; SSE-SLOW-LABEL: add_ps_008:
553 ; SSE-SLOW: # %bb.0:
554 ; SSE-SLOW-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2]
555 ; SSE-SLOW-NEXT: addps %xmm1, %xmm0
556 ; SSE-SLOW-NEXT: retq
563 ; AVX-SLOW-LABEL: add_ps_008:
564 ; AVX-SLOW: # %bb.0:
565 ; AVX-SLOW-NEXT: vmovsldup {{.*#+}} xmm1 = xmm0[0,0,2,2]
566 ; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
567 ; AVX-SLOW-NEXT: retq
579 ; SSE-SLOW-LABEL: add_ps_016:
580 ; SSE-SLOW: # %bb.0:
581 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
582 ; SSE-SLOW-NEXT: addps %xmm1, %xmm2
583 ; SSE-SLOW-NEXT: haddps %xmm0, %xmm1
584 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[0,0]
585 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[3,3]
586 ; SSE-SLOW-NEXT: movaps %xmm2, %xmm0
587 ; SSE-SLOW-NEXT: retq
596 ; AVX-SLOW-LABEL: add_ps_016:
597 ; AVX-SLOW: # %bb.0:
598 ; AVX-SLOW-NEXT: vhaddps %xmm0, %xmm1, %xmm0
599 ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
600 ; AVX-SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
601 ; AVX-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
602 ; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,3]
603 ; AVX-SLOW-NEXT: retq
621 ; SSE-SLOW-LABEL: add_ps_017:
622 ; SSE-SLOW: # %bb.0:
623 ; SSE-SLOW-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2]
624 ; SSE-SLOW-NEXT: addps %xmm0, %xmm1
625 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
626 ; SSE-SLOW-NEXT: movaps %xmm1, %xmm0
627 ; SSE-SLOW-NEXT: retq
635 ; AVX-SLOW-LABEL: add_ps_017:
636 ; AVX-SLOW: # %bb.0:
637 ; AVX-SLOW-NEXT: vmovsldup {{.*#+}} xmm1 = xmm0[0,0,2,2]
638 ; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
639 ; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
640 ; AVX-SLOW-NEXT: retq
660 ; AVX1-SLOW-LABEL: add_ps_018:
661 ; AVX1-SLOW: # %bb.0:
662 ; AVX1-SLOW-NEXT: vhaddps %xmm0, %xmm0, %xmm0
663 ; AVX1-SLOW-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
664 ; AVX1-SLOW-NEXT: retq
685 ; SSE-SLOW-LABEL: add_pd_011:
686 ; SSE-SLOW: # %bb.0:
687 ; SSE-SLOW-NEXT: movapd %xmm2, %xmm1
688 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
689 ; SSE-SLOW-NEXT: movapd %xmm0, %xmm3
690 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
691 ; SSE-SLOW-NEXT: addpd %xmm0, %xmm3
692 ; SSE-SLOW-NEXT: addpd %xmm2, %xmm1
693 ; SSE-SLOW-NEXT: movapd %xmm3, %xmm0
694 ; SSE-SLOW-NEXT: retq
703 ; AVX1-SLOW-LABEL: add_pd_011:
704 ; AVX1-SLOW: # %bb.0:
705 ; AVX1-SLOW-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
706 ; AVX1-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
707 ; AVX1-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
708 ; AVX1-SLOW-NEXT: retq
823 ; AVX1-SLOW-LABEL: v16f32_inputs_v4f32_output_0123:
824 ; AVX1-SLOW: # %bb.0:
825 ; AVX1-SLOW-NEXT: vhaddps %xmm2, %xmm0, %xmm0
826 ; AVX1-SLOW-NEXT: vzeroupper
827 ; AVX1-SLOW-NEXT: retq
857 ; AVX1-SLOW-LABEL: v16f32_inputs_v8f32_output_4567:
858 ; AVX1-SLOW: # %bb.0:
859 ; AVX1-SLOW-NEXT: vhaddps %ymm2, %ymm0, %ymm0
860 ; AVX1-SLOW-NEXT: retq
904 ; SSE-SLOW-LABEL: PR44694:
905 ; SSE-SLOW: # %bb.0:
906 ; SSE-SLOW-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
907 ; SSE-SLOW-NEXT: haddpd %xmm3, %xmm2
908 ; SSE-SLOW-NEXT: addpd %xmm1, %xmm0
909 ; SSE-SLOW-NEXT: movapd %xmm2, %xmm1
910 ; SSE-SLOW-NEXT: retq
920 ; AVX1-SLOW-LABEL: PR44694:
921 ; AVX1-SLOW: # %bb.0:
922 ; AVX1-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
923 ; AVX1-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
924 ; AVX1-SLOW-NEXT: vhaddpd %ymm0, %ymm1, %ymm0
925 ; AVX1-SLOW-NEXT: retq
946 ; SSE-SLOW-LABEL: PR45747_1:
947 ; SSE-SLOW: # %bb.0:
948 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
949 ; SSE-SLOW-NEXT: addps %xmm0, %xmm1
950 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,2,2,2]
951 ; SSE-SLOW-NEXT: movaps %xmm1, %xmm0
952 ; SSE-SLOW-NEXT: retq
959 ; AVX-SLOW-LABEL: PR45747_1:
960 ; AVX-SLOW: # %bb.0:
961 ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
962 ; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
963 ; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,2,2]
964 ; AVX-SLOW-NEXT: retq
977 ; SSE-SLOW-LABEL: PR45747_2:
978 ; SSE-SLOW: # %bb.0:
979 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
980 ; SSE-SLOW-NEXT: addps %xmm1, %xmm0
981 ; SSE-SLOW-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
982 ; SSE-SLOW-NEXT: retq
990 ; AVX-SLOW-LABEL: PR45747_2:
991 ; AVX-SLOW: # %bb.0:
992 ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
993 ; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
994 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
995 ; AVX-SLOW-NEXT: retq
1009 ; SSE-SLOW-LABEL: PR34724_add_v4f32_u123:
1010 ; SSE-SLOW: # %bb.0:
1011 ; SSE-SLOW-NEXT: haddps %xmm1, %xmm0
1012 ; SSE-SLOW-NEXT: movsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
1013 ; SSE-SLOW-NEXT: addps %xmm1, %xmm2
1014 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
1015 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
1016 ; SSE-SLOW-NEXT: retq
1023 ; AVX-SLOW-LABEL: PR34724_add_v4f32_u123:
1024 ; AVX-SLOW: # %bb.0:
1025 ; AVX-SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0
1026 ; AVX-SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
1027 ; AVX-SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
1028 ; AVX-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
1029 ; AVX-SLOW-NEXT: retq
1046 ; SSE-SLOW-LABEL: PR34724_add_v4f32_0u23:
1047 ; SSE-SLOW: # %bb.0:
1048 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1049 ; SSE-SLOW-NEXT: addps %xmm2, %xmm0
1050 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
1051 ; SSE-SLOW-NEXT: addps %xmm1, %xmm2
1052 ; SSE-SLOW-NEXT: movsldup {{.*#+}} xmm3 = xmm1[0,0,2,2]
1053 ; SSE-SLOW-NEXT: addps %xmm1, %xmm3
1054 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[0,3]
1055 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0]
1056 ; SSE-SLOW-NEXT: retq
1063 ; AVX-SLOW-LABEL: PR34724_add_v4f32_0u23:
1064 ; AVX-SLOW: # %bb.0:
1065 ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1066 ; AVX-SLOW-NEXT: vaddps %xmm0, %xmm2, %xmm0
1067 ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
1068 ; AVX-SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm2
1069 ; AVX-SLOW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1070 ; AVX-SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
1071 ; AVX-SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
1072 ; AVX-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
1073 ; AVX-SLOW-NEXT: retq
1091 ; SSE-SLOW-LABEL: PR34724_add_v4f32_01u3:
1092 ; SSE-SLOW: # %bb.0:
1093 ; SSE-SLOW-NEXT: haddps %xmm0, %xmm0
1094 ; SSE-SLOW-NEXT: movsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
1095 ; SSE-SLOW-NEXT: addps %xmm1, %xmm2
1096 ; SSE-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
1097 ; SSE-SLOW-NEXT: retq
1104 ; AVX-SLOW-LABEL: PR34724_add_v4f32_01u3:
1105 ; AVX-SLOW: # %bb.0:
1106 ; AVX-SLOW-NEXT: vhaddps %xmm0, %xmm0, %xmm0
1107 ; AVX-SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
1108 ; AVX-SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
1109 ; AVX-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1110 ; AVX-SLOW-NEXT: retq
1127 ; SSE-SLOW-LABEL: PR34724_add_v4f32_012u:
1128 ; SSE-SLOW: # %bb.0:
1129 ; SSE-SLOW-NEXT: haddps %xmm0, %xmm0
1130 ; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
1131 ; SSE-SLOW-NEXT: addps %xmm1, %xmm2
1132 ; SSE-SLOW-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1133 ; SSE-SLOW-NEXT: retq
1140 ; AVX-SLOW-LABEL: PR34724_add_v4f32_012u:
1141 ; AVX-SLOW: # %bb.0:
1142 ; AVX-SLOW-NEXT: vhaddps %xmm0, %xmm0, %xmm0
1143 ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
1144 ; AVX-SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
1145 ; AVX-SLOW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1146 ; AVX-SLOW-NEXT: retq
1163 ; SSE-SLOW-LABEL: PR34724_add_v4f64_u123:
1164 ; SSE-SLOW: # %bb.0:
1165 ; SSE-SLOW-NEXT: haddpd %xmm2, %xmm1
1166 ; SSE-SLOW-NEXT: movapd %xmm3, %xmm2
1167 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm3[1]
1168 ; SSE-SLOW-NEXT: addsd %xmm3, %xmm2
1169 ; SSE-SLOW-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
1170 ; SSE-SLOW-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
1171 ; SSE-SLOW-NEXT: retq
1181 ; AVX-SLOW-LABEL: PR34724_add_v4f64_u123:
1182 ; AVX-SLOW: # %bb.0:
1183 ; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
1184 ; AVX-SLOW-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
1185 ; AVX-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm1
1186 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1187 ; AVX-SLOW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1188 ; AVX-SLOW-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
1189 ; AVX-SLOW-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1190 ; AVX-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1191 ; AVX-SLOW-NEXT: retq
1215 ; SSE-SLOW-LABEL: PR34724_add_v4f64_0u23:
1216 ; SSE-SLOW: # %bb.0:
1217 ; SSE-SLOW-NEXT: haddpd %xmm2, %xmm0
1218 ; SSE-SLOW-NEXT: movapd %xmm3, %xmm2
1219 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm3[1]
1220 ; SSE-SLOW-NEXT: addsd %xmm3, %xmm2
1221 ; SSE-SLOW-NEXT: movapd %xmm0, %xmm1
1222 ; SSE-SLOW-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
1223 ; SSE-SLOW-NEXT: retq
1232 ; AVX-SLOW-LABEL: PR34724_add_v4f64_0u23:
1233 ; AVX-SLOW: # %bb.0:
1234 ; AVX-SLOW-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
1235 ; AVX-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm1
1236 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1237 ; AVX-SLOW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1238 ; AVX-SLOW-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
1239 ; AVX-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1240 ; AVX-SLOW-NEXT: retq
1262 ; SSE-SLOW-LABEL: PR34724_add_v4f64_01u3:
1263 ; SSE-SLOW: # %bb.0:
1264 ; SSE-SLOW-NEXT: haddpd %xmm1, %xmm0
1265 ; SSE-SLOW-NEXT: movapd %xmm3, %xmm1
1266 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
1267 ; SSE-SLOW-NEXT: addsd %xmm3, %xmm1
1268 ; SSE-SLOW-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
1269 ; SSE-SLOW-NEXT: retq
1278 ; AVX-SLOW-LABEL: PR34724_add_v4f64_01u3:
1279 ; AVX-SLOW: # %bb.0:
1280 ; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2
1281 ; AVX-SLOW-NEXT: vhaddpd %xmm2, %xmm0, %xmm0
1282 ; AVX-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm1
1283 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1284 ; AVX-SLOW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1285 ; AVX-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1286 ; AVX-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1287 ; AVX-SLOW-NEXT: retq
1310 ; SSE-SLOW-LABEL: PR34724_add_v4f64_012u:
1311 ; SSE-SLOW: # %bb.0:
1312 ; SSE-SLOW-NEXT: haddpd %xmm1, %xmm0
1313 ; SSE-SLOW-NEXT: movapd %xmm2, %xmm1
1314 ; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
1315 ; SSE-SLOW-NEXT: addsd %xmm2, %xmm1
1316 ; SSE-SLOW-NEXT: retq
1325 ; AVX-SLOW-LABEL: PR34724_add_v4f64_012u:
1326 ; AVX-SLOW: # %bb.0:
1327 ; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2
1328 ; AVX-SLOW-NEXT: vhaddpd %xmm2, %xmm0, %xmm0
1329 ; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1330 ; AVX-SLOW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1331 ; AVX-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1332 ; AVX-SLOW-NEXT: retq