Lines Matching refs:X64
3 …riple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
13 ; X64-LABEL: test_mm_add_ps:
14 ; X64: # BB#0:
15 ; X64-NEXT: addps %xmm1, %xmm0
16 ; X64-NEXT: retq
27 ; X64-LABEL: test_mm_add_ss:
28 ; X64: # BB#0:
29 ; X64-NEXT: addss %xmm1, %xmm0
30 ; X64-NEXT: retq
72 ; X64-LABEL: test_mm_and_ps:
73 ; X64: # BB#0:
74 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
75 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
76 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
77 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
78 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
79 ; X64-NEXT: movq %rdx, %rsi
80 ; X64-NEXT: andl %eax, %edx
81 ; X64-NEXT: shrq $32, %rax
82 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
83 ; X64-NEXT: movq %rcx, %rdi
84 ; X64-NEXT: andl %r8d, %ecx
85 ; X64-NEXT: shrq $32, %r8
86 ; X64-NEXT: shrq $32, %rsi
87 ; X64-NEXT: shrq $32, %rdi
88 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
89 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
90 ; X64-NEXT: andl %r8d, %edi
91 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
92 ; X64-NEXT: andl %eax, %esi
93 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
94 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
95 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
96 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
97 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
98 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
99 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
100 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
101 ; X64-NEXT: retq
147 ; X64-LABEL: test_mm_andnot_ps:
148 ; X64: # BB#0:
149 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
150 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
151 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
152 ; X64-NEXT: movq %rcx, %rdx
153 ; X64-NEXT: shrq $32, %rdx
154 ; X64-NEXT: movq %rax, %rsi
155 ; X64-NEXT: shrq $32, %rsi
156 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
157 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
158 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
159 ; X64-NEXT: notl %eax
160 ; X64-NEXT: andl %edi, %eax
161 ; X64-NEXT: shrq $32, %rdi
162 ; X64-NEXT: notl %ecx
163 ; X64-NEXT: andl %r8d, %ecx
164 ; X64-NEXT: shrq $32, %r8
165 ; X64-NEXT: notl %esi
166 ; X64-NEXT: notl %edx
167 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
168 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
169 ; X64-NEXT: andl %r8d, %edx
170 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
171 ; X64-NEXT: andl %edi, %esi
172 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
173 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
174 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
175 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
176 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
177 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
178 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
179 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
180 ; X64-NEXT: retq
195 ; X64-LABEL: test_mm_cmpeq_ps:
196 ; X64: # BB#0:
197 ; X64-NEXT: cmpeqps %xmm1, %xmm0
198 ; X64-NEXT: retq
211 ; X64-LABEL: test_mm_cmpeq_ss:
212 ; X64: # BB#0:
213 ; X64-NEXT: cmpeqss %xmm1, %xmm0
214 ; X64-NEXT: retq
227 ; X64-LABEL: test_mm_cmpge_ps:
228 ; X64: # BB#0:
229 ; X64-NEXT: cmpleps %xmm0, %xmm1
230 ; X64-NEXT: movaps %xmm1, %xmm0
231 ; X64-NEXT: retq
245 ; X64-LABEL: test_mm_cmpge_ss:
246 ; X64: # BB#0:
247 ; X64-NEXT: cmpless %xmm0, %xmm1
248 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
249 ; X64-NEXT: retq
262 ; X64-LABEL: test_mm_cmpgt_ps:
263 ; X64: # BB#0:
264 ; X64-NEXT: cmpltps %xmm0, %xmm1
265 ; X64-NEXT: movaps %xmm1, %xmm0
266 ; X64-NEXT: retq
280 ; X64-LABEL: test_mm_cmpgt_ss:
281 ; X64: # BB#0:
282 ; X64-NEXT: cmpltss %xmm0, %xmm1
283 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
284 ; X64-NEXT: retq
296 ; X64-LABEL: test_mm_cmple_ps:
297 ; X64: # BB#0:
298 ; X64-NEXT: cmpleps %xmm1, %xmm0
299 ; X64-NEXT: retq
312 ; X64-LABEL: test_mm_cmple_ss:
313 ; X64: # BB#0:
314 ; X64-NEXT: cmpless %xmm1, %xmm0
315 ; X64-NEXT: retq
326 ; X64-LABEL: test_mm_cmplt_ps:
327 ; X64: # BB#0:
328 ; X64-NEXT: cmpltps %xmm1, %xmm0
329 ; X64-NEXT: retq
342 ; X64-LABEL: test_mm_cmplt_ss:
343 ; X64: # BB#0:
344 ; X64-NEXT: cmpltss %xmm1, %xmm0
345 ; X64-NEXT: retq
356 ; X64-LABEL: test_mm_cmpneq_ps:
357 ; X64: # BB#0:
358 ; X64-NEXT: cmpneqps %xmm1, %xmm0
359 ; X64-NEXT: retq
372 ; X64-LABEL: test_mm_cmpneq_ss:
373 ; X64: # BB#0:
374 ; X64-NEXT: cmpneqss %xmm1, %xmm0
375 ; X64-NEXT: retq
387 ; X64-LABEL: test_mm_cmpnge_ps:
388 ; X64: # BB#0:
389 ; X64-NEXT: cmpnleps %xmm0, %xmm1
390 ; X64-NEXT: movaps %xmm1, %xmm0
391 ; X64-NEXT: retq
405 ; X64-LABEL: test_mm_cmpnge_ss:
406 ; X64: # BB#0:
407 ; X64-NEXT: cmpnless %xmm0, %xmm1
408 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
409 ; X64-NEXT: retq
422 ; X64-LABEL: test_mm_cmpngt_ps:
423 ; X64: # BB#0:
424 ; X64-NEXT: cmpnltps %xmm0, %xmm1
425 ; X64-NEXT: movaps %xmm1, %xmm0
426 ; X64-NEXT: retq
440 ; X64-LABEL: test_mm_cmpngt_ss:
441 ; X64: # BB#0:
442 ; X64-NEXT: cmpnltss %xmm0, %xmm1
443 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
444 ; X64-NEXT: retq
456 ; X64-LABEL: test_mm_cmpnle_ps:
457 ; X64: # BB#0:
458 ; X64-NEXT: cmpnleps %xmm1, %xmm0
459 ; X64-NEXT: retq
472 ; X64-LABEL: test_mm_cmpnle_ss:
473 ; X64: # BB#0:
474 ; X64-NEXT: cmpnless %xmm1, %xmm0
475 ; X64-NEXT: retq
486 ; X64-LABEL: test_mm_cmpnlt_ps:
487 ; X64: # BB#0:
488 ; X64-NEXT: cmpnltps %xmm1, %xmm0
489 ; X64-NEXT: retq
502 ; X64-LABEL: test_mm_cmpnlt_ss:
503 ; X64: # BB#0:
504 ; X64-NEXT: cmpnltss %xmm1, %xmm0
505 ; X64-NEXT: retq
516 ; X64-LABEL: test_mm_cmpord_ps:
517 ; X64: # BB#0:
518 ; X64-NEXT: cmpordps %xmm1, %xmm0
519 ; X64-NEXT: retq
532 ; X64-LABEL: test_mm_cmpord_ss:
533 ; X64: # BB#0:
534 ; X64-NEXT: cmpordss %xmm1, %xmm0
535 ; X64-NEXT: retq
546 ; X64-LABEL: test_mm_cmpunord_ps:
547 ; X64: # BB#0:
548 ; X64-NEXT: cmpunordps %xmm1, %xmm0
549 ; X64-NEXT: retq
562 ; X64-LABEL: test_mm_cmpunord_ss:
563 ; X64: # BB#0:
564 ; X64-NEXT: cmpunordss %xmm1, %xmm0
565 ; X64-NEXT: retq
580 ; X64-LABEL: test_mm_comieq_ss:
581 ; X64: # BB#0:
582 ; X64-NEXT: comiss %xmm1, %xmm0
583 ; X64-NEXT: setnp %al
584 ; X64-NEXT: sete %cl
585 ; X64-NEXT: andb %al, %cl
586 ; X64-NEXT: movzbl %cl, %eax
587 ; X64-NEXT: retq
601 ; X64-LABEL: test_mm_comige_ss:
602 ; X64: # BB#0:
603 ; X64-NEXT: xorl %eax, %eax
604 ; X64-NEXT: comiss %xmm1, %xmm0
605 ; X64-NEXT: setae %al
606 ; X64-NEXT: retq
620 ; X64-LABEL: test_mm_comigt_ss:
621 ; X64: # BB#0:
622 ; X64-NEXT: xorl %eax, %eax
623 ; X64-NEXT: comiss %xmm1, %xmm0
624 ; X64-NEXT: seta %al
625 ; X64-NEXT: retq
639 ; X64-LABEL: test_mm_comile_ss:
640 ; X64: # BB#0:
641 ; X64-NEXT: xorl %eax, %eax
642 ; X64-NEXT: comiss %xmm0, %xmm1
643 ; X64-NEXT: setae %al
644 ; X64-NEXT: retq
658 ; X64-LABEL: test_mm_comilt_ss:
659 ; X64: # BB#0:
660 ; X64-NEXT: xorl %eax, %eax
661 ; X64-NEXT: comiss %xmm0, %xmm1
662 ; X64-NEXT: seta %al
663 ; X64-NEXT: retq
679 ; X64-LABEL: test_mm_comineq_ss:
680 ; X64: # BB#0:
681 ; X64-NEXT: comiss %xmm1, %xmm0
682 ; X64-NEXT: setp %al
683 ; X64-NEXT: setne %cl
684 ; X64-NEXT: orb %al, %cl
685 ; X64-NEXT: movzbl %cl, %eax
686 ; X64-NEXT: retq
698 ; X64-LABEL: test_mm_cvt_ss2si:
699 ; X64: # BB#0:
700 ; X64-NEXT: cvtss2si %xmm0, %eax
701 ; X64-NEXT: retq
715 ; X64-LABEL: test_mm_cvtsi32_ss:
716 ; X64: # BB#0:
717 ; X64-NEXT: cvtsi2ssl %edi, %xmm1
718 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
719 ; X64-NEXT: retq
734 ; X64-LABEL: test_mm_cvtss_f32:
735 ; X64: # BB#0:
736 ; X64-NEXT: retq
747 ; X64-LABEL: test_mm_cvtss_si32:
748 ; X64: # BB#0:
749 ; X64-NEXT: cvtss2si %xmm0, %eax
750 ; X64-NEXT: retq
761 ; X64-LABEL: test_mm_cvttss_si:
762 ; X64: # BB#0:
763 ; X64-NEXT: cvttss2si %xmm0, %eax
764 ; X64-NEXT: retq
776 ; X64-LABEL: test_mm_cvttss_si32:
777 ; X64: # BB#0:
778 ; X64-NEXT: cvttss2si %xmm0, %eax
779 ; X64-NEXT: retq
791 ; X64-LABEL: test_mm_div_ps:
792 ; X64: # BB#0:
793 ; X64-NEXT: divps %xmm1, %xmm0
794 ; X64-NEXT: retq
805 ; X64-LABEL: test_mm_div_ss:
806 ; X64: # BB#0:
807 ; X64-NEXT: divss %xmm1, %xmm0
808 ; X64-NEXT: retq
827 ; X64-LABEL: test_MM_GET_EXCEPTION_MASK:
828 ; X64: # BB#0:
829 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
830 ; X64-NEXT: stmxcsr (%rax)
831 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
832 ; X64-NEXT: andl $8064, %eax # imm = 0x1F80
833 ; X64-NEXT: retq
854 ; X64-LABEL: test_MM_GET_EXCEPTION_STATE:
855 ; X64: # BB#0:
856 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
857 ; X64-NEXT: stmxcsr (%rax)
858 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
859 ; X64-NEXT: andl $63, %eax
860 ; X64-NEXT: retq
880 ; X64-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
881 ; X64: # BB#0:
882 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
883 ; X64-NEXT: stmxcsr (%rax)
884 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
885 ; X64-NEXT: andl $32768, %eax # imm = 0x8000
886 ; X64-NEXT: retq
906 ; X64-LABEL: test_MM_GET_ROUNDING_MODE:
907 ; X64: # BB#0:
908 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
909 ; X64-NEXT: stmxcsr (%rax)
910 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
911 ; X64-NEXT: andl $24576, %eax # imm = 0x6000
912 ; X64-NEXT: retq
931 ; X64-LABEL: test_mm_getcsr:
932 ; X64: # BB#0:
933 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
934 ; X64-NEXT: stmxcsr (%rax)
935 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
936 ; X64-NEXT: retq
951 ; X64-LABEL: test_mm_load_ps:
952 ; X64: # BB#0:
953 ; X64-NEXT: movaps (%rdi), %xmm0
954 ; X64-NEXT: retq
968 ; X64-LABEL: test_mm_load_ps1:
969 ; X64: # BB#0:
970 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
971 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
972 ; X64-NEXT: retq
988 ; X64-LABEL: test_mm_load_ss:
989 ; X64: # BB#0:
990 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
991 ; X64-NEXT: retq
1008 ; X64-LABEL: test_mm_load1_ps:
1009 ; X64: # BB#0:
1010 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1011 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1012 ; X64-NEXT: retq
1031 ; X64-LABEL: test_mm_loadh_pi:
1032 ; X64: # BB#0:
1033 ; X64-NEXT: movq (%rdi), %rax
1034 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
1035 ; X64-NEXT: shrq $32, %rax
1036 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
1037 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1038 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1039 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1040 ; X64-NEXT: xorps %xmm2, %xmm2
1041 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1042 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1043 ; X64-NEXT: retq
1062 ; X64-LABEL: test_mm_loadl_pi:
1063 ; X64: # BB#0:
1064 ; X64-NEXT: movq (%rdi), %rax
1065 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
1066 ; X64-NEXT: shrq $32, %rax
1067 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
1068 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1069 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1070 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1071 ; X64-NEXT: xorps %xmm2, %xmm2
1072 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1073 ; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
1074 ; X64-NEXT: movaps %xmm1, %xmm0
1075 ; X64-NEXT: retq
1091 ; X64-LABEL: test_mm_loadr_ps:
1092 ; X64: # BB#0:
1093 ; X64-NEXT: movaps (%rdi), %xmm0
1094 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1095 ; X64-NEXT: retq
1109 ; X64-LABEL: test_mm_loadu_ps:
1110 ; X64: # BB#0:
1111 ; X64-NEXT: movups (%rdi), %xmm0
1112 ; X64-NEXT: retq
1124 ; X64-LABEL: test_mm_max_ps:
1125 ; X64: # BB#0:
1126 ; X64-NEXT: maxps %xmm1, %xmm0
1127 ; X64-NEXT: retq
1139 ; X64-LABEL: test_mm_max_ss:
1140 ; X64: # BB#0:
1141 ; X64-NEXT: maxss %xmm1, %xmm0
1142 ; X64-NEXT: retq
1154 ; X64-LABEL: test_mm_min_ps:
1155 ; X64: # BB#0:
1156 ; X64-NEXT: minps %xmm1, %xmm0
1157 ; X64-NEXT: retq
1169 ; X64-LABEL: test_mm_min_ss:
1170 ; X64: # BB#0:
1171 ; X64-NEXT: minss %xmm1, %xmm0
1172 ; X64-NEXT: retq
1184 ; X64-LABEL: test_mm_move_ss:
1185 ; X64: # BB#0:
1186 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1187 ; X64-NEXT: retq
1198 ; X64-LABEL: test_mm_movehl_ps:
1199 ; X64: # BB#0:
1200 ; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1201 ; X64-NEXT: retq
1212 ; X64-LABEL: test_mm_movelh_ps:
1213 ; X64: # BB#0:
1214 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1215 ; X64-NEXT: retq
1226 ; X64-LABEL: test_mm_movemask_ps:
1227 ; X64: # BB#0:
1228 ; X64-NEXT: movmskps %xmm0, %eax
1229 ; X64-NEXT: retq
1241 ; X64-LABEL: test_mm_mul_ps:
1242 ; X64: # BB#0:
1243 ; X64-NEXT: mulps %xmm1, %xmm0
1244 ; X64-NEXT: retq
1255 ; X64-LABEL: test_mm_mul_ss:
1256 ; X64: # BB#0:
1257 ; X64-NEXT: mulss %xmm1, %xmm0
1258 ; X64-NEXT: retq
1300 ; X64-LABEL: test_mm_or_ps:
1301 ; X64: # BB#0:
1302 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1303 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1304 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
1305 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1306 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
1307 ; X64-NEXT: movq %rdx, %rsi
1308 ; X64-NEXT: orl %eax, %edx
1309 ; X64-NEXT: shrq $32, %rax
1310 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
1311 ; X64-NEXT: movq %rcx, %rdi
1312 ; X64-NEXT: orl %r8d, %ecx
1313 ; X64-NEXT: shrq $32, %r8
1314 ; X64-NEXT: shrq $32, %rsi
1315 ; X64-NEXT: shrq $32, %rdi
1316 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
1317 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1318 ; X64-NEXT: orl %r8d, %edi
1319 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
1320 ; X64-NEXT: orl %eax, %esi
1321 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
1322 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1323 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1324 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1325 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1326 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1327 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1328 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1329 ; X64-NEXT: retq
1344 ; X64-LABEL: test_mm_prefetch:
1345 ; X64: # BB#0:
1346 ; X64-NEXT: prefetchnta (%rdi)
1347 ; X64-NEXT: retq
1359 ; X64-LABEL: test_mm_rcp_ps:
1360 ; X64: # BB#0:
1361 ; X64-NEXT: rcpps %xmm0, %xmm0
1362 ; X64-NEXT: retq
1374 ; X64-LABEL: test_mm_rcp_ss:
1375 ; X64: # BB#0:
1376 ; X64-NEXT: rcpss %xmm0, %xmm0
1377 ; X64-NEXT: retq
1397 ; X64-LABEL: test_mm_rsqrt_ps:
1398 ; X64: # BB#0:
1399 ; X64-NEXT: rsqrtps %xmm0, %xmm0
1400 ; X64-NEXT: retq
1412 ; X64-LABEL: test_mm_rsqrt_ss:
1413 ; X64: # BB#0:
1414 ; X64-NEXT: rsqrtss %xmm0, %xmm0
1415 ; X64-NEXT: retq
1444 ; X64-LABEL: test_MM_SET_EXCEPTION_MASK:
1445 ; X64: # BB#0:
1446 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
1447 ; X64-NEXT: stmxcsr (%rax)
1448 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
1449 ; X64-NEXT: andl $-8065, %ecx # imm = 0xE07F
1450 ; X64-NEXT: orl %edi, %ecx
1451 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
1452 ; X64-NEXT: ldmxcsr (%rax)
1453 ; X64-NEXT: retq
1481 ; X64-LABEL: test_MM_SET_EXCEPTION_STATE:
1482 ; X64: # BB#0:
1483 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
1484 ; X64-NEXT: stmxcsr (%rax)
1485 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
1486 ; X64-NEXT: andl $-64, %ecx
1487 ; X64-NEXT: orl %edi, %ecx
1488 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
1489 ; X64-NEXT: ldmxcsr (%rax)
1490 ; X64-NEXT: retq
1517 ; X64-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1518 ; X64: # BB#0:
1519 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
1520 ; X64-NEXT: stmxcsr (%rax)
1521 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
1522 ; X64-NEXT: andl $-32769, %ecx # imm = 0xFFFF7FFF
1523 ; X64-NEXT: orl %edi, %ecx
1524 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
1525 ; X64-NEXT: ldmxcsr (%rax)
1526 ; X64-NEXT: retq
1550 ; X64-LABEL: test_mm_set_ps:
1551 ; X64: # BB#0:
1552 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1553 ; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1554 ; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1555 ; X64-NEXT: movaps %xmm3, %xmm0
1556 ; X64-NEXT: retq
1571 ; X64-LABEL: test_mm_set_ps1:
1572 ; X64: # BB#0:
1573 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1574 ; X64-NEXT: retq
1597 ; X64-LABEL: test_MM_SET_ROUNDING_MODE:
1598 ; X64: # BB#0:
1599 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
1600 ; X64-NEXT: stmxcsr (%rax)
1601 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
1602 ; X64-NEXT: andl $-24577, %ecx # imm = 0x9FFF
1603 ; X64-NEXT: orl %edi, %ecx
1604 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
1605 ; X64-NEXT: ldmxcsr (%rax)
1606 ; X64-NEXT: retq
1626 ; X64-LABEL: test_mm_set_ss:
1627 ; X64: # BB#0:
1628 ; X64-NEXT: xorps %xmm1, %xmm1
1629 ; X64-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1630 ; X64-NEXT: movaps %xmm1, %xmm0
1631 ; X64-NEXT: retq
1646 ; X64-LABEL: test_mm_set1_ps:
1647 ; X64: # BB#0:
1648 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1649 ; X64-NEXT: retq
1668 ; X64-LABEL: test_mm_setcsr:
1669 ; X64: # BB#0:
1670 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
1671 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
1672 ; X64-NEXT: ldmxcsr (%rax)
1673 ; X64-NEXT: retq
1693 ; X64-LABEL: test_mm_setr_ps:
1694 ; X64: # BB#0:
1695 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1696 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1697 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1698 ; X64-NEXT: retq
1712 ; X64-LABEL: test_mm_setzero_ps:
1713 ; X64: # BB#0:
1714 ; X64-NEXT: xorps %xmm0, %xmm0
1715 ; X64-NEXT: retq
1725 ; X64-LABEL: test_mm_sfence:
1726 ; X64: # BB#0:
1727 ; X64-NEXT: sfence
1728 ; X64-NEXT: retq
1740 ; X64-LABEL: test_mm_shuffle_ps:
1741 ; X64: # BB#0:
1742 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
1743 ; X64-NEXT: retq
1754 ; X64-LABEL: test_mm_sqrt_ps:
1755 ; X64: # BB#0:
1756 ; X64-NEXT: sqrtps %xmm0, %xmm0
1757 ; X64-NEXT: retq
1769 ; X64-LABEL: test_mm_sqrt_ss:
1770 ; X64: # BB#0:
1771 ; X64-NEXT: sqrtss %xmm0, %xmm0
1772 ; X64-NEXT: retq
1793 ; X64-LABEL: test_mm_store_ps:
1794 ; X64: # BB#0:
1795 ; X64-NEXT: movaps %xmm0, (%rdi)
1796 ; X64-NEXT: retq
1810 ; X64-LABEL: test_mm_store_ps1:
1811 ; X64: # BB#0:
1812 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1813 ; X64-NEXT: movaps %xmm0, (%rdi)
1814 ; X64-NEXT: retq
1828 ; X64-LABEL: test_mm_store_ss:
1829 ; X64: # BB#0:
1830 ; X64-NEXT: movss %xmm0, (%rdi)
1831 ; X64-NEXT: retq
1845 ; X64-LABEL: test_mm_store1_ps:
1846 ; X64: # BB#0:
1847 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1848 ; X64-NEXT: movaps %xmm0, (%rdi)
1849 ; X64-NEXT: retq
1873 ; X64-LABEL: test_mm_storeh_ps:
1874 ; X64: # BB#0:
1875 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1876 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1877 ; X64-NEXT: movq %rax, (%rdi)
1878 ; X64-NEXT: retq
1903 ; X64-LABEL: test_mm_storel_ps:
1904 ; X64: # BB#0:
1905 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1906 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1907 ; X64-NEXT: movq %rax, (%rdi)
1908 ; X64-NEXT: retq
1924 ; X64-LABEL: test_mm_storer_ps:
1925 ; X64: # BB#0:
1926 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1927 ; X64-NEXT: movaps %xmm0, (%rdi)
1928 ; X64-NEXT: retq
1942 ; X64-LABEL: test_mm_storeu_ps:
1943 ; X64: # BB#0:
1944 ; X64-NEXT: movups %xmm0, (%rdi)
1945 ; X64-NEXT: retq
1958 ; X64-LABEL: test_mm_stream_ps:
1959 ; X64: # BB#0:
1960 ; X64-NEXT: movntps %xmm0, (%rdi)
1961 ; X64-NEXT: retq
1973 ; X64-LABEL: test_mm_sub_ps:
1974 ; X64: # BB#0:
1975 ; X64-NEXT: subps %xmm1, %xmm0
1976 ; X64-NEXT: retq
1987 ; X64-LABEL: test_mm_sub_ss:
1988 ; X64: # BB#0:
1989 ; X64-NEXT: subss %xmm1, %xmm0
1990 ; X64-NEXT: retq
2029 ; X64-LABEL: test_MM_TRANSPOSE4_PS:
2030 ; X64: # BB#0:
2031 ; X64-NEXT: movaps (%rdi), %xmm0
2032 ; X64-NEXT: movaps (%rsi), %xmm1
2033 ; X64-NEXT: movaps (%rdx), %xmm2
2034 ; X64-NEXT: movaps (%rcx), %xmm3
2035 ; X64-NEXT: movaps %xmm0, %xmm4
2036 ; X64-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
2037 ; X64-NEXT: movaps %xmm2, %xmm5
2038 ; X64-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
2039 ; X64-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2040 ; X64-NEXT: unpckhps {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
2041 ; X64-NEXT: movaps %xmm4, %xmm1
2042 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0]
2043 ; X64-NEXT: movhlps {{.*#+}} xmm5 = xmm4[1],xmm5[1]
2044 ; X64-NEXT: movaps %xmm0, %xmm3
2045 ; X64-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm2[0]
2046 ; X64-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
2047 ; X64-NEXT: movaps %xmm1, (%rdi)
2048 ; X64-NEXT: movaps %xmm5, (%rsi)
2049 ; X64-NEXT: movaps %xmm3, (%rdx)
2050 ; X64-NEXT: movaps %xmm2, (%rcx)
2051 ; X64-NEXT: retq
2081 ; X64-LABEL: test_mm_ucomieq_ss:
2082 ; X64: # BB#0:
2083 ; X64-NEXT: ucomiss %xmm1, %xmm0
2084 ; X64-NEXT: setnp %al
2085 ; X64-NEXT: sete %cl
2086 ; X64-NEXT: andb %al, %cl
2087 ; X64-NEXT: movzbl %cl, %eax
2088 ; X64-NEXT: retq
2102 ; X64-LABEL: test_mm_ucomige_ss:
2103 ; X64: # BB#0:
2104 ; X64-NEXT: xorl %eax, %eax
2105 ; X64-NEXT: ucomiss %xmm1, %xmm0
2106 ; X64-NEXT: setae %al
2107 ; X64-NEXT: retq
2121 ; X64-LABEL: test_mm_ucomigt_ss:
2122 ; X64: # BB#0:
2123 ; X64-NEXT: xorl %eax, %eax
2124 ; X64-NEXT: ucomiss %xmm1, %xmm0
2125 ; X64-NEXT: seta %al
2126 ; X64-NEXT: retq
2140 ; X64-LABEL: test_mm_ucomile_ss:
2141 ; X64: # BB#0:
2142 ; X64-NEXT: xorl %eax, %eax
2143 ; X64-NEXT: ucomiss %xmm0, %xmm1
2144 ; X64-NEXT: setae %al
2145 ; X64-NEXT: retq
2159 ; X64-LABEL: test_mm_ucomilt_ss:
2160 ; X64: # BB#0:
2161 ; X64-NEXT: xorl %eax, %eax
2162 ; X64-NEXT: ucomiss %xmm0, %xmm1
2163 ; X64-NEXT: seta %al
2164 ; X64-NEXT: retq
2180 ; X64-LABEL: test_mm_ucomineq_ss:
2181 ; X64: # BB#0:
2182 ; X64-NEXT: ucomiss %xmm1, %xmm0
2183 ; X64-NEXT: setp %al
2184 ; X64-NEXT: setne %cl
2185 ; X64-NEXT: orb %al, %cl
2186 ; X64-NEXT: movzbl %cl, %eax
2187 ; X64-NEXT: retq
2198 ; X64-LABEL: test_mm_undefined_ps:
2199 ; X64: # BB#0:
2200 ; X64-NEXT: retq
2210 ; X64-LABEL: test_mm_unpackhi_ps:
2211 ; X64: # BB#0:
2212 ; X64-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2213 ; X64-NEXT: retq
2224 ; X64-LABEL: test_mm_unpacklo_ps:
2225 ; X64: # BB#0:
2226 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2227 ; X64-NEXT: retq
2266 ; X64-LABEL: test_mm_xor_ps:
2267 ; X64: # BB#0:
2268 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
2269 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
2270 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
2271 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
2272 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
2273 ; X64-NEXT: movq %rdx, %rsi
2274 ; X64-NEXT: xorl %eax, %edx
2275 ; X64-NEXT: shrq $32, %rax
2276 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
2277 ; X64-NEXT: movq %rcx, %rdi
2278 ; X64-NEXT: xorl %r8d, %ecx
2279 ; X64-NEXT: shrq $32, %r8
2280 ; X64-NEXT: shrq $32, %rsi
2281 ; X64-NEXT: shrq $32, %rdi
2282 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
2283 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
2284 ; X64-NEXT: xorl %r8d, %edi
2285 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
2286 ; X64-NEXT: xorl %eax, %esi
2287 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
2288 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2289 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2290 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2291 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2292 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2293 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2294 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2295 ; X64-NEXT: retq