Lines Matching refs:X64
3 …6_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X64
13 ; X64-LABEL: test_mm_broadcastd_epi32:
14 ; X64: # BB#0:
15 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0
16 ; X64-NEXT: retq
38 ; X64-LABEL: test_mm_mask_broadcastd_epi32:
39 ; X64: # BB#0:
40 ; X64-NEXT: andb $15, %dil
41 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
42 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
43 ; X64-NEXT: kmovw %eax, %k1
44 ; X64-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1}
45 ; X64-NEXT: retq
71 ; X64-LABEL: test_mm_maskz_broadcastd_epi32:
72 ; X64: # BB#0:
73 ; X64-NEXT: andb $15, %dil
74 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
75 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
76 ; X64-NEXT: kmovw %eax, %k1
77 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z}
78 ; X64-NEXT: retq
94 ; X64-LABEL: test_mm256_broadcastd_epi32:
95 ; X64: # BB#0:
96 ; X64-NEXT: vpbroadcastd %xmm0, %ymm0
97 ; X64-NEXT: retq
112 ; X64-LABEL: test_mm256_mask_broadcastd_epi32:
113 ; X64: # BB#0:
114 ; X64-NEXT: kmovw %edi, %k1
115 ; X64-NEXT: vpbroadcastd %xmm1, %ymm0 {%k1}
116 ; X64-NEXT: retq
134 ; X64-LABEL: test_mm256_maskz_broadcastd_epi32:
135 ; X64: # BB#0:
136 ; X64-NEXT: kmovw %edi, %k1
137 ; X64-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z}
138 ; X64-NEXT: retq
153 ; X64-LABEL: test_mm_broadcastq_epi64:
154 ; X64: # BB#0:
155 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0
156 ; X64-NEXT: retq
176 ; X64-LABEL: test_mm_mask_broadcastq_epi64:
177 ; X64: # BB#0:
178 ; X64-NEXT: andb $3, %dil
179 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
180 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
181 ; X64-NEXT: kmovw %eax, %k1
182 ; X64-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1}
183 ; X64-NEXT: retq
206 ; X64-LABEL: test_mm_maskz_broadcastq_epi64:
207 ; X64: # BB#0:
208 ; X64-NEXT: andb $3, %dil
209 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
210 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
211 ; X64-NEXT: kmovw %eax, %k1
212 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z}
213 ; X64-NEXT: retq
227 ; X64-LABEL: test_mm256_broadcastq_epi64:
228 ; X64: # BB#0:
229 ; X64-NEXT: vpbroadcastq %xmm0, %ymm0
230 ; X64-NEXT: retq
250 ; X64-LABEL: test_mm256_mask_broadcastq_epi64:
251 ; X64: # BB#0:
252 ; X64-NEXT: andb $15, %dil
253 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
254 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
255 ; X64-NEXT: kmovw %eax, %k1
256 ; X64-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1}
257 ; X64-NEXT: retq
280 ; X64-LABEL: test_mm256_maskz_broadcastq_epi64:
281 ; X64: # BB#0:
282 ; X64-NEXT: andb $15, %dil
283 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
284 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
285 ; X64-NEXT: kmovw %eax, %k1
286 ; X64-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z}
287 ; X64-NEXT: retq
301 ; X64-LABEL: test_mm_broadcastsd_pd:
302 ; X64: # BB#0:
303 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
304 ; X64-NEXT: retq
324 ; X64-LABEL: test_mm_mask_broadcastsd_pd:
325 ; X64: # BB#0:
326 ; X64-NEXT: andb $3, %dil
327 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
328 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
329 ; X64-NEXT: kmovw %eax, %k1
330 ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
331 ; X64-NEXT: retq
354 ; X64-LABEL: test_mm_maskz_broadcastsd_pd:
355 ; X64: # BB#0:
356 ; X64-NEXT: andb $3, %dil
357 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
358 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
359 ; X64-NEXT: kmovw %eax, %k1
360 ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
361 ; X64-NEXT: retq
375 ; X64-LABEL: test_mm256_broadcastsd_pd:
376 ; X64: # BB#0:
377 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
378 ; X64-NEXT: retq
398 ; X64-LABEL: test_mm256_mask_broadcastsd_pd:
399 ; X64: # BB#0:
400 ; X64-NEXT: andb $15, %dil
401 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
402 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
403 ; X64-NEXT: kmovw %eax, %k1
404 ; X64-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
405 ; X64-NEXT: retq
428 ; X64-LABEL: test_mm256_maskz_broadcastsd_pd:
429 ; X64: # BB#0:
430 ; X64-NEXT: andb $15, %dil
431 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
432 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
433 ; X64-NEXT: kmovw %eax, %k1
434 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
435 ; X64-NEXT: retq
449 ; X64-LABEL: test_mm_broadcastss_ps:
450 ; X64: # BB#0:
451 ; X64-NEXT: vbroadcastss %xmm0, %xmm0
452 ; X64-NEXT: retq
472 ; X64-LABEL: test_mm_mask_broadcastss_ps:
473 ; X64: # BB#0:
474 ; X64-NEXT: andb $15, %dil
475 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
476 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
477 ; X64-NEXT: kmovw %eax, %k1
478 ; X64-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
479 ; X64-NEXT: retq
502 ; X64-LABEL: test_mm_maskz_broadcastss_ps:
503 ; X64: # BB#0:
504 ; X64-NEXT: andb $15, %dil
505 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
506 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
507 ; X64-NEXT: kmovw %eax, %k1
508 ; X64-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
509 ; X64-NEXT: retq
523 ; X64-LABEL: test_mm256_broadcastss_ps:
524 ; X64: # BB#0:
525 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
526 ; X64-NEXT: retq
539 ; X64-LABEL: test_mm256_mask_broadcastss_ps:
540 ; X64: # BB#0:
541 ; X64-NEXT: kmovw %edi, %k1
542 ; X64-NEXT: vbroadcastss %xmm1, %ymm0 {%k1}
543 ; X64-NEXT: retq
558 ; X64-LABEL: test_mm256_maskz_broadcastss_ps:
559 ; X64: # BB#0:
560 ; X64-NEXT: kmovw %edi, %k1
561 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
562 ; X64-NEXT: retq
575 ; X64-LABEL: test_mm_movddup_pd:
576 ; X64: # BB#0:
577 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
578 ; X64-NEXT: retq
598 ; X64-LABEL: test_mm_mask_movddup_pd:
599 ; X64: # BB#0:
600 ; X64-NEXT: andb $3, %dil
601 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
602 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
603 ; X64-NEXT: kmovw %eax, %k1
604 ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
605 ; X64-NEXT: retq
628 ; X64-LABEL: test_mm_maskz_movddup_pd:
629 ; X64: # BB#0:
630 ; X64-NEXT: andb $3, %dil
631 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
632 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
633 ; X64-NEXT: kmovw %eax, %k1
634 ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
635 ; X64-NEXT: retq
649 ; X64-LABEL: test_mm256_movddup_pd:
650 ; X64: # BB#0:
651 ; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
652 ; X64-NEXT: retq
672 ; X64-LABEL: test_mm256_mask_movddup_pd:
673 ; X64: # BB#0:
674 ; X64-NEXT: andb $15, %dil
675 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
676 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
677 ; X64-NEXT: kmovw %eax, %k1
678 ; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
679 ; X64-NEXT: retq
702 ; X64-LABEL: test_mm256_maskz_movddup_pd:
703 ; X64: # BB#0:
704 ; X64-NEXT: andb $15, %dil
705 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
706 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
707 ; X64-NEXT: kmovw %eax, %k1
708 ; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
709 ; X64-NEXT: retq
723 ; X64-LABEL: test_mm_movehdup_ps:
724 ; X64: # BB#0:
725 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
726 ; X64-NEXT: retq
746 ; X64-LABEL: test_mm_mask_movehdup_ps:
747 ; X64: # BB#0:
748 ; X64-NEXT: andb $15, %dil
749 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
750 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
751 ; X64-NEXT: kmovw %eax, %k1
752 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
753 ; X64-NEXT: retq
776 ; X64-LABEL: test_mm_maskz_movehdup_ps:
777 ; X64: # BB#0:
778 ; X64-NEXT: andb $15, %dil
779 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
780 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
781 ; X64-NEXT: kmovw %eax, %k1
782 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
783 ; X64-NEXT: retq
797 ; X64-LABEL: test_mm256_movehdup_ps:
798 ; X64: # BB#0:
799 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
800 ; X64-NEXT: retq
813 ; X64-LABEL: test_mm256_mask_movehdup_ps:
814 ; X64: # BB#0:
815 ; X64-NEXT: kmovw %edi, %k1
816 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7]
817 ; X64-NEXT: retq
832 ; X64-LABEL: test_mm256_maskz_movehdup_ps:
833 ; X64: # BB#0:
834 ; X64-NEXT: kmovw %edi, %k1
835 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
836 ; X64-NEXT: retq
849 ; X64-LABEL: test_mm_moveldup_ps:
850 ; X64: # BB#0:
851 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
852 ; X64-NEXT: retq
872 ; X64-LABEL: test_mm_mask_moveldup_ps:
873 ; X64: # BB#0:
874 ; X64-NEXT: andb $15, %dil
875 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
876 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
877 ; X64-NEXT: kmovw %eax, %k1
878 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
879 ; X64-NEXT: retq
902 ; X64-LABEL: test_mm_maskz_moveldup_ps:
903 ; X64: # BB#0:
904 ; X64-NEXT: andb $15, %dil
905 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
906 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
907 ; X64-NEXT: kmovw %eax, %k1
908 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
909 ; X64-NEXT: retq
923 ; X64-LABEL: test_mm256_moveldup_ps:
924 ; X64: # BB#0:
925 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
926 ; X64-NEXT: retq
939 ; X64-LABEL: test_mm256_mask_moveldup_ps:
940 ; X64: # BB#0:
941 ; X64-NEXT: kmovw %edi, %k1
942 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6]
943 ; X64-NEXT: retq
958 ; X64-LABEL: test_mm256_maskz_moveldup_ps:
959 ; X64: # BB#0:
960 ; X64-NEXT: kmovw %edi, %k1
961 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
962 ; X64-NEXT: retq
975 ; X64-LABEL: test_mm256_permutex_epi64:
976 ; X64: # BB#0:
977 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
978 ; X64-NEXT: retq
998 ; X64-LABEL: test_mm256_mask_permutex_epi64:
999 ; X64: # BB#0:
1000 ; X64-NEXT: andb $15, %dil
1001 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1002 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1003 ; X64-NEXT: kmovw %eax, %k1
1004 ; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
1005 ; X64-NEXT: retq
1028 ; X64-LABEL: test_mm256_maskz_permutex_epi64:
1029 ; X64: # BB#0:
1030 ; X64-NEXT: andb $15, %dil
1031 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1032 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1033 ; X64-NEXT: kmovw %eax, %k1
1034 ; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1035 ; X64-NEXT: retq
1049 ; X64-LABEL: test_mm256_permutex_pd:
1050 ; X64: # BB#0:
1051 ; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
1052 ; X64-NEXT: retq
1072 ; X64-LABEL: test_mm256_mask_permutex_pd:
1073 ; X64: # BB#0:
1074 ; X64-NEXT: andb $15, %dil
1075 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1076 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1077 ; X64-NEXT: kmovw %eax, %k1
1078 ; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
1079 ; X64-NEXT: retq
1102 ; X64-LABEL: test_mm256_maskz_permutex_pd:
1103 ; X64: # BB#0:
1104 ; X64-NEXT: andb $15, %dil
1105 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1106 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1107 ; X64-NEXT: kmovw %eax, %k1
1108 ; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1109 ; X64-NEXT: retq
1123 ; X64-LABEL: test_mm_shuffle_pd:
1124 ; X64: # BB#0:
1125 ; X64-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1126 ; X64-NEXT: retq
1146 ; X64-LABEL: test_mm_mask_shuffle_pd:
1147 ; X64: # BB#0:
1148 ; X64-NEXT: andb $3, %dil
1149 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1150 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1151 ; X64-NEXT: kmovw %eax, %k1
1152 ; X64-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
1153 ; X64-NEXT: retq
1176 ; X64-LABEL: test_mm_maskz_shuffle_pd:
1177 ; X64: # BB#0:
1178 ; X64-NEXT: andb $3, %dil
1179 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1180 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1181 ; X64-NEXT: kmovw %eax, %k1
1182 ; X64-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
1183 ; X64-NEXT: retq
1197 ; X64-LABEL: test_mm256_shuffle_pd:
1198 ; X64: # BB#0:
1199 ; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1200 ; X64-NEXT: retq
1220 ; X64-LABEL: test_mm256_mask_shuffle_pd:
1221 ; X64: # BB#0:
1222 ; X64-NEXT: andb $15, %dil
1223 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1224 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1225 ; X64-NEXT: kmovw %eax, %k1
1226 ; X64-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
1227 ; X64-NEXT: retq
1250 ; X64-LABEL: test_mm256_maskz_shuffle_pd:
1251 ; X64: # BB#0:
1252 ; X64-NEXT: andb $15, %dil
1253 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1254 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1255 ; X64-NEXT: kmovw %eax, %k1
1256 ; X64-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1257 ; X64-NEXT: retq
1271 ; X64-LABEL: test_mm_shuffle_ps:
1272 ; X64: # BB#0:
1273 ; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1274 ; X64-NEXT: retq
1294 ; X64-LABEL: test_mm_mask_shuffle_ps:
1295 ; X64: # BB#0:
1296 ; X64-NEXT: andb $15, %dil
1297 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1298 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1299 ; X64-NEXT: kmovw %eax, %k1
1300 ; X64-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
1301 ; X64-NEXT: retq
1324 ; X64-LABEL: test_mm_maskz_shuffle_ps:
1325 ; X64: # BB#0:
1326 ; X64-NEXT: andb $15, %dil
1327 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1328 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1329 ; X64-NEXT: kmovw %eax, %k1
1330 ; X64-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
1331 ; X64-NEXT: retq
1345 ; X64-LABEL: test_mm256_shuffle_ps:
1346 ; X64: # BB#0:
1347 ; X64-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1348 ; X64-NEXT: retq
1361 ; X64-LABEL: test_mm256_mask_shuffle_ps:
1362 ; X64: # BB#0:
1363 ; X64-NEXT: kmovw %edi, %k1
1364 ; X64-NEXT: vshufps {{.*#+}} ymm0 {%k1} = ymm1[0,1],ymm2[0,0],ymm1[4,5],ymm2[4,4]
1365 ; X64-NEXT: retq
1380 ; X64-LABEL: test_mm256_maskz_shuffle_ps:
1381 ; X64: # BB#0:
1382 ; X64-NEXT: kmovw %edi, %k1
1383 ; X64-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1384 ; X64-NEXT: retq