• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.1           | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx              | FileCheck %s --check-prefix=AVX
4; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefix=AVX
5
6; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751
7; We can't combine into 'round' instructions because the behavior is different for out-of-range values.
8
9declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
10declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>)
11declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
12declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
13declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
14declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
15
16define float @float_to_int_to_float_mem_f32_i32(<4 x float>* %p) #0 {
17; SSE-LABEL: float_to_int_to_float_mem_f32_i32:
18; SSE:       # %bb.0:
19; SSE-NEXT:    cvttss2si (%rdi), %eax
20; SSE-NEXT:    cvtsi2ss %eax, %xmm0
21; SSE-NEXT:    retq
22;
23; AVX-LABEL: float_to_int_to_float_mem_f32_i32:
24; AVX:       # %bb.0:
25; AVX-NEXT:    vcvttss2si (%rdi), %eax
26; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
27; AVX-NEXT:    retq
28  %x = load <4 x float>, <4 x float>* %p, align 16
29  %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)
30  %sitofp = sitofp i32 %fptosi to float
31  ret float %sitofp
32}
33
34define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) #0 {
35; SSE-LABEL: float_to_int_to_float_reg_f32_i32:
36; SSE:       # %bb.0:
37; SSE-NEXT:    cvttss2si %xmm0, %eax
38; SSE-NEXT:    xorps %xmm0, %xmm0
39; SSE-NEXT:    cvtsi2ss %eax, %xmm0
40; SSE-NEXT:    retq
41;
42; AVX-LABEL: float_to_int_to_float_reg_f32_i32:
43; AVX:       # %bb.0:
44; AVX-NEXT:    vcvttss2si %xmm0, %eax
45; AVX-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm0
46; AVX-NEXT:    retq
47  %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)
48  %sitofp = sitofp i32 %fptosi to float
49  ret float %sitofp
50}
51
52define float @float_to_int_to_float_mem_f32_i64(<4 x float>* %p) #0 {
53; SSE-LABEL: float_to_int_to_float_mem_f32_i64:
54; SSE:       # %bb.0:
55; SSE-NEXT:    cvttss2si (%rdi), %rax
56; SSE-NEXT:    cvtsi2ss %rax, %xmm0
57; SSE-NEXT:    retq
58;
59; AVX-LABEL: float_to_int_to_float_mem_f32_i64:
60; AVX:       # %bb.0:
61; AVX-NEXT:    vcvttss2si (%rdi), %rax
62; AVX-NEXT:    vcvtsi2ss %rax, %xmm0, %xmm0
63; AVX-NEXT:    retq
64  %x = load <4 x float>, <4 x float>* %p, align 16
65  %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)
66  %sitofp = sitofp i64 %fptosi to float
67  ret float %sitofp
68}
69
70define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) #0 {
71; SSE-LABEL: float_to_int_to_float_reg_f32_i64:
72; SSE:       # %bb.0:
73; SSE-NEXT:    cvttss2si %xmm0, %rax
74; SSE-NEXT:    xorps %xmm0, %xmm0
75; SSE-NEXT:    cvtsi2ss %rax, %xmm0
76; SSE-NEXT:    retq
77;
78; AVX-LABEL: float_to_int_to_float_reg_f32_i64:
79; AVX:       # %bb.0:
80; AVX-NEXT:    vcvttss2si %xmm0, %rax
81; AVX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm0
82; AVX-NEXT:    retq
83  %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)
84  %sitofp = sitofp i64 %fptosi to float
85  ret float %sitofp
86}
87
88define double @float_to_int_to_float_mem_f64_i32(<2 x double>* %p) #0 {
89; SSE-LABEL: float_to_int_to_float_mem_f64_i32:
90; SSE:       # %bb.0:
91; SSE-NEXT:    cvttsd2si (%rdi), %eax
92; SSE-NEXT:    cvtsi2sd %eax, %xmm0
93; SSE-NEXT:    retq
94;
95; AVX-LABEL: float_to_int_to_float_mem_f64_i32:
96; AVX:       # %bb.0:
97; AVX-NEXT:    vcvttsd2si (%rdi), %eax
98; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
99; AVX-NEXT:    retq
100  %x = load <2 x double>, <2 x double>* %p, align 16
101  %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)
102  %sitofp = sitofp i32 %fptosi to double
103  ret double %sitofp
104}
105
106define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) #0 {
107; SSE-LABEL: float_to_int_to_float_reg_f64_i32:
108; SSE:       # %bb.0:
109; SSE-NEXT:    cvttsd2si %xmm0, %eax
110; SSE-NEXT:    xorps %xmm0, %xmm0
111; SSE-NEXT:    cvtsi2sd %eax, %xmm0
112; SSE-NEXT:    retq
113;
114; AVX-LABEL: float_to_int_to_float_reg_f64_i32:
115; AVX:       # %bb.0:
116; AVX-NEXT:    vcvttsd2si %xmm0, %eax
117; AVX-NEXT:    vcvtsi2sd %eax, %xmm1, %xmm0
118; AVX-NEXT:    retq
119  %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)
120  %sitofp = sitofp i32 %fptosi to double
121  ret double %sitofp
122}
123
124define double @float_to_int_to_float_mem_f64_i64(<2 x double>* %p) #0 {
125; SSE-LABEL: float_to_int_to_float_mem_f64_i64:
126; SSE:       # %bb.0:
127; SSE-NEXT:    cvttsd2si (%rdi), %rax
128; SSE-NEXT:    cvtsi2sd %rax, %xmm0
129; SSE-NEXT:    retq
130;
131; AVX-LABEL: float_to_int_to_float_mem_f64_i64:
132; AVX:       # %bb.0:
133; AVX-NEXT:    vcvttsd2si (%rdi), %rax
134; AVX-NEXT:    vcvtsi2sd %rax, %xmm0, %xmm0
135; AVX-NEXT:    retq
136  %x = load <2 x double>, <2 x double>* %p, align 16
137  %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)
138  %sitofp = sitofp i64 %fptosi to double
139  ret double %sitofp
140}
141
142define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) #0 {
143; SSE-LABEL: float_to_int_to_float_reg_f64_i64:
144; SSE:       # %bb.0:
145; SSE-NEXT:    cvttsd2si %xmm0, %rax
146; SSE-NEXT:    xorps %xmm0, %xmm0
147; SSE-NEXT:    cvtsi2sd %rax, %xmm0
148; SSE-NEXT:    retq
149;
150; AVX-LABEL: float_to_int_to_float_reg_f64_i64:
151; AVX:       # %bb.0:
152; AVX-NEXT:    vcvttsd2si %xmm0, %rax
153; AVX-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
154; AVX-NEXT:    retq
155  %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)
156  %sitofp = sitofp i64 %fptosi to double
157  ret double %sitofp
158}
159
160define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) #0 {
161; SSE-LABEL: float_to_int_to_float_mem_v4f32:
162; SSE:       # %bb.0:
163; SSE-NEXT:    cvttps2dq (%rdi), %xmm0
164; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
165; SSE-NEXT:    retq
166;
167; AVX-LABEL: float_to_int_to_float_mem_v4f32:
168; AVX:       # %bb.0:
169; AVX-NEXT:    vcvttps2dq (%rdi), %xmm0
170; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
171; AVX-NEXT:    retq
172  %x = load <4 x float>, <4 x float>* %p, align 16
173  %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)
174  %sitofp = sitofp <4 x i32> %fptosi to <4 x float>
175  ret <4 x float> %sitofp
176}
177
178define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) #0 {
179; SSE-LABEL: float_to_int_to_float_reg_v4f32:
180; SSE:       # %bb.0:
181; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
182; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
183; SSE-NEXT:    retq
184;
185; AVX-LABEL: float_to_int_to_float_reg_v4f32:
186; AVX:       # %bb.0:
187; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
188; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
189; AVX-NEXT:    retq
190  %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)
191  %sitofp = sitofp <4 x i32> %fptosi to <4 x float>
192  ret <4 x float> %sitofp
193}
194
195define <2 x double> @float_to_int_to_float_mem_v2f64(<2 x double>* %p) #0 {
196; SSE-LABEL: float_to_int_to_float_mem_v2f64:
197; SSE:       # %bb.0:
198; SSE-NEXT:    cvttpd2dq (%rdi), %xmm0
199; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
200; SSE-NEXT:    retq
201;
202; AVX-LABEL: float_to_int_to_float_mem_v2f64:
203; AVX:       # %bb.0:
204; AVX-NEXT:    vcvttpd2dqx (%rdi), %xmm0
205; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
206; AVX-NEXT:    retq
207  %x = load <2 x double>, <2 x double>* %p, align 16
208  %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x)
209  %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
210  %sitofp = sitofp <2 x i32> %concat to <2 x double>
211  ret <2 x double> %sitofp
212}
213
214define <2 x double> @float_to_int_to_float_reg_v2f64(<2 x double> %x) #0 {
215; SSE-LABEL: float_to_int_to_float_reg_v2f64:
216; SSE:       # %bb.0:
217; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
218; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
219; SSE-NEXT:    retq
220;
221; AVX-LABEL: float_to_int_to_float_reg_v2f64:
222; AVX:       # %bb.0:
223; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
224; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
225; AVX-NEXT:    retq
226  %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x)
227  %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
228  %sitofp = sitofp <2 x i32> %concat to <2 x double>
229  ret <2 x double> %sitofp
230}
231
232attributes #0 = { "no-signed-zeros-fp-math"="true" }
233
234