• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1  | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     | FileCheck %s --check-prefixes=AVX1
5
6define float @trunc_unsigned_f32(float %x) #0 {
7; SSE2-LABEL: trunc_unsigned_f32:
8; SSE2:       # %bb.0:
9; SSE2-NEXT:    cvttss2si %xmm0, %rax
10; SSE2-NEXT:    movl %eax, %eax
11; SSE2-NEXT:    xorps %xmm0, %xmm0
12; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
13; SSE2-NEXT:    retq
14;
15; SSE41-LABEL: trunc_unsigned_f32:
16; SSE41:       # %bb.0:
17; SSE41-NEXT:    roundss $11, %xmm0, %xmm0
18; SSE41-NEXT:    retq
19;
20; AVX1-LABEL: trunc_unsigned_f32:
21; AVX1:       # %bb.0:
22; AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
23; AVX1-NEXT:    retq
24  %i = fptoui float %x to i32
25  %r = uitofp i32 %i to float
26  ret float %r
27}
28
29define double @trunc_unsigned_f64(double %x) #0 {
30; SSE2-LABEL: trunc_unsigned_f64:
31; SSE2:       # %bb.0:
32; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
33; SSE2-NEXT:    movapd %xmm0, %xmm2
34; SSE2-NEXT:    subsd %xmm1, %xmm2
35; SSE2-NEXT:    cvttsd2si %xmm2, %rax
36; SSE2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
37; SSE2-NEXT:    xorq %rax, %rcx
38; SSE2-NEXT:    cvttsd2si %xmm0, %rax
39; SSE2-NEXT:    ucomisd %xmm1, %xmm0
40; SSE2-NEXT:    cmovaeq %rcx, %rax
41; SSE2-NEXT:    movq %rax, %xmm1
42; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
43; SSE2-NEXT:    subpd {{.*}}(%rip), %xmm1
44; SSE2-NEXT:    movapd %xmm1, %xmm0
45; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
46; SSE2-NEXT:    addsd %xmm1, %xmm0
47; SSE2-NEXT:    retq
48;
49; SSE41-LABEL: trunc_unsigned_f64:
50; SSE41:       # %bb.0:
51; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
52; SSE41-NEXT:    retq
53;
54; AVX1-LABEL: trunc_unsigned_f64:
55; AVX1:       # %bb.0:
56; AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
57; AVX1-NEXT:    retq
58  %i = fptoui double %x to i64
59  %r = uitofp i64 %i to double
60  ret double %r
61}
62
63define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 {
64; SSE2-LABEL: trunc_unsigned_v4f32:
65; SSE2:       # %bb.0:
66; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
67; SSE2-NEXT:    movaps %xmm0, %xmm1
68; SSE2-NEXT:    cmpltps %xmm2, %xmm1
69; SSE2-NEXT:    cvttps2dq %xmm0, %xmm3
70; SSE2-NEXT:    subps %xmm2, %xmm0
71; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
72; SSE2-NEXT:    xorps {{.*}}(%rip), %xmm0
73; SSE2-NEXT:    andps %xmm1, %xmm3
74; SSE2-NEXT:    andnps %xmm0, %xmm1
75; SSE2-NEXT:    orps %xmm3, %xmm1
76; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
77; SSE2-NEXT:    andps %xmm1, %xmm0
78; SSE2-NEXT:    orps {{.*}}(%rip), %xmm0
79; SSE2-NEXT:    psrld $16, %xmm1
80; SSE2-NEXT:    por {{.*}}(%rip), %xmm1
81; SSE2-NEXT:    subps {{.*}}(%rip), %xmm1
82; SSE2-NEXT:    addps %xmm0, %xmm1
83; SSE2-NEXT:    movaps %xmm1, %xmm0
84; SSE2-NEXT:    retq
85;
86; SSE41-LABEL: trunc_unsigned_v4f32:
87; SSE41:       # %bb.0:
88; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
89; SSE41-NEXT:    retq
90;
91; AVX1-LABEL: trunc_unsigned_v4f32:
92; AVX1:       # %bb.0:
93; AVX1-NEXT:    vroundps $11, %xmm0, %xmm0
94; AVX1-NEXT:    retq
95  %i = fptoui <4 x float> %x to <4 x i32>
96  %r = uitofp <4 x i32> %i to <4 x float>
97  ret <4 x float> %r
98}
99
100define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
101; SSE2-LABEL: trunc_unsigned_v2f64:
102; SSE2:       # %bb.0:
103; SSE2-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
104; SSE2-NEXT:    movapd %xmm0, %xmm1
105; SSE2-NEXT:    subsd %xmm2, %xmm1
106; SSE2-NEXT:    cvttsd2si %xmm1, %rax
107; SSE2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
108; SSE2-NEXT:    xorq %rcx, %rax
109; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
110; SSE2-NEXT:    ucomisd %xmm2, %xmm0
111; SSE2-NEXT:    cmovaeq %rax, %rdx
112; SSE2-NEXT:    movq %rdx, %xmm1
113; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
114; SSE2-NEXT:    movapd %xmm0, %xmm3
115; SSE2-NEXT:    subsd %xmm2, %xmm3
116; SSE2-NEXT:    cvttsd2si %xmm3, %rax
117; SSE2-NEXT:    xorq %rcx, %rax
118; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
119; SSE2-NEXT:    ucomisd %xmm2, %xmm0
120; SSE2-NEXT:    cmovaeq %rax, %rcx
121; SSE2-NEXT:    movq %rcx, %xmm0
122; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
123; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4294967295,4294967295]
124; SSE2-NEXT:    pand %xmm1, %xmm0
125; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
126; SSE2-NEXT:    psrlq $32, %xmm1
127; SSE2-NEXT:    por {{.*}}(%rip), %xmm1
128; SSE2-NEXT:    subpd {{.*}}(%rip), %xmm1
129; SSE2-NEXT:    addpd %xmm0, %xmm1
130; SSE2-NEXT:    movapd %xmm1, %xmm0
131; SSE2-NEXT:    retq
132;
133; SSE41-LABEL: trunc_unsigned_v2f64:
134; SSE41:       # %bb.0:
135; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
136; SSE41-NEXT:    retq
137;
138; AVX1-LABEL: trunc_unsigned_v2f64:
139; AVX1:       # %bb.0:
140; AVX1-NEXT:    vroundpd $11, %xmm0, %xmm0
141; AVX1-NEXT:    retq
142  %i = fptoui <2 x double> %x to <2 x i64>
143  %r = uitofp <2 x i64> %i to <2 x double>
144  ret <2 x double> %r
145}
146
147define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 {
148; SSE2-LABEL: trunc_unsigned_v4f64:
149; SSE2:       # %bb.0:
150; SSE2-NEXT:    movapd %xmm1, %xmm2
151; SSE2-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
152; SSE2-NEXT:    subsd %xmm3, %xmm1
153; SSE2-NEXT:    cvttsd2si %xmm1, %rcx
154; SSE2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
155; SSE2-NEXT:    xorq %rax, %rcx
156; SSE2-NEXT:    cvttsd2si %xmm2, %rdx
157; SSE2-NEXT:    ucomisd %xmm3, %xmm2
158; SSE2-NEXT:    cmovaeq %rcx, %rdx
159; SSE2-NEXT:    movq %rdx, %xmm1
160; SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
161; SSE2-NEXT:    movapd %xmm2, %xmm4
162; SSE2-NEXT:    subsd %xmm3, %xmm4
163; SSE2-NEXT:    cvttsd2si %xmm4, %rcx
164; SSE2-NEXT:    xorq %rax, %rcx
165; SSE2-NEXT:    cvttsd2si %xmm2, %rdx
166; SSE2-NEXT:    ucomisd %xmm3, %xmm2
167; SSE2-NEXT:    cmovaeq %rcx, %rdx
168; SSE2-NEXT:    movq %rdx, %xmm2
169; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
170; SSE2-NEXT:    movapd %xmm0, %xmm2
171; SSE2-NEXT:    subsd %xmm3, %xmm2
172; SSE2-NEXT:    cvttsd2si %xmm2, %rcx
173; SSE2-NEXT:    xorq %rax, %rcx
174; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
175; SSE2-NEXT:    ucomisd %xmm3, %xmm0
176; SSE2-NEXT:    cmovaeq %rcx, %rdx
177; SSE2-NEXT:    movq %rdx, %xmm2
178; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
179; SSE2-NEXT:    movapd %xmm0, %xmm4
180; SSE2-NEXT:    subsd %xmm3, %xmm4
181; SSE2-NEXT:    cvttsd2si %xmm4, %rcx
182; SSE2-NEXT:    xorq %rax, %rcx
183; SSE2-NEXT:    cvttsd2si %xmm0, %rax
184; SSE2-NEXT:    ucomisd %xmm3, %xmm0
185; SSE2-NEXT:    cmovaeq %rcx, %rax
186; SSE2-NEXT:    movq %rax, %xmm0
187; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
188; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4294967295,4294967295]
189; SSE2-NEXT:    movdqa %xmm2, %xmm3
190; SSE2-NEXT:    pand %xmm0, %xmm3
191; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
192; SSE2-NEXT:    por %xmm4, %xmm3
193; SSE2-NEXT:    psrlq $32, %xmm2
194; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
195; SSE2-NEXT:    por %xmm5, %xmm2
196; SSE2-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
197; SSE2-NEXT:    subpd %xmm6, %xmm2
198; SSE2-NEXT:    addpd %xmm3, %xmm2
199; SSE2-NEXT:    pand %xmm1, %xmm0
200; SSE2-NEXT:    por %xmm4, %xmm0
201; SSE2-NEXT:    psrlq $32, %xmm1
202; SSE2-NEXT:    por %xmm5, %xmm1
203; SSE2-NEXT:    subpd %xmm6, %xmm1
204; SSE2-NEXT:    addpd %xmm0, %xmm1
205; SSE2-NEXT:    movapd %xmm2, %xmm0
206; SSE2-NEXT:    retq
207;
208; SSE41-LABEL: trunc_unsigned_v4f64:
209; SSE41:       # %bb.0:
210; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
211; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
212; SSE41-NEXT:    retq
213;
214; AVX1-LABEL: trunc_unsigned_v4f64:
215; AVX1:       # %bb.0:
216; AVX1-NEXT:    vroundpd $11, %ymm0, %ymm0
217; AVX1-NEXT:    retq
218  %i = fptoui <4 x double> %x to <4 x i64>
219  %r = uitofp <4 x i64> %i to <4 x double>
220  ret <4 x double> %r
221}
222
223define float @trunc_signed_f32_no_fast_math(float %x) {
224; SSE-LABEL: trunc_signed_f32_no_fast_math:
225; SSE:       # %bb.0:
226; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
227; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
228; SSE-NEXT:    retq
229;
230; AVX1-LABEL: trunc_signed_f32_no_fast_math:
231; AVX1:       # %bb.0:
232; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
233; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
234; AVX1-NEXT:    retq
235  %i = fptosi float %x to i32
236  %r = sitofp i32 %i to float
237  ret float %r
238}
239
240; Without -0.0, it is ok to use roundss if it is available.
241
242define float @trunc_signed_f32_nsz(float %x) #0 {
243; SSE2-LABEL: trunc_signed_f32_nsz:
244; SSE2:       # %bb.0:
245; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
246; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
247; SSE2-NEXT:    retq
248;
249; SSE41-LABEL: trunc_signed_f32_nsz:
250; SSE41:       # %bb.0:
251; SSE41-NEXT:    roundss $11, %xmm0, %xmm0
252; SSE41-NEXT:    retq
253;
254; AVX1-LABEL: trunc_signed_f32_nsz:
255; AVX1:       # %bb.0:
256; AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
257; AVX1-NEXT:    retq
258  %i = fptosi float %x to i32
259  %r = sitofp i32 %i to float
260  ret float %r
261}
262
263define double @trunc_signed32_f64_no_fast_math(double %x) {
264; SSE-LABEL: trunc_signed32_f64_no_fast_math:
265; SSE:       # %bb.0:
266; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
267; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
268; SSE-NEXT:    retq
269;
270; AVX1-LABEL: trunc_signed32_f64_no_fast_math:
271; AVX1:       # %bb.0:
272; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
273; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
274; AVX1-NEXT:    retq
275  %i = fptosi double %x to i32
276  %r = sitofp i32 %i to double
277  ret double %r
278}
279
280define double @trunc_signed32_f64_nsz(double %x) #0 {
281; SSE2-LABEL: trunc_signed32_f64_nsz:
282; SSE2:       # %bb.0:
283; SSE2-NEXT:    cvttpd2dq %xmm0, %xmm0
284; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
285; SSE2-NEXT:    retq
286;
287; SSE41-LABEL: trunc_signed32_f64_nsz:
288; SSE41:       # %bb.0:
289; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
290; SSE41-NEXT:    retq
291;
292; AVX1-LABEL: trunc_signed32_f64_nsz:
293; AVX1:       # %bb.0:
294; AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
295; AVX1-NEXT:    retq
296  %i = fptosi double %x to i32
297  %r = sitofp i32 %i to double
298  ret double %r
299}
300
301define double @trunc_f32_signed32_f64_no_fast_math(float %x) {
302; SSE-LABEL: trunc_f32_signed32_f64_no_fast_math:
303; SSE:       # %bb.0:
304; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
305; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
306; SSE-NEXT:    retq
307;
308; AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math:
309; AVX1:       # %bb.0:
310; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
311; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
312; AVX1-NEXT:    retq
313  %i = fptosi float %x to i32
314  %r = sitofp i32 %i to double
315  ret double %r
316}
317
318define double @trunc_f32_signed32_f64_nsz(float %x) #0 {
319; SSE-LABEL: trunc_f32_signed32_f64_nsz:
320; SSE:       # %bb.0:
321; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
322; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
323; SSE-NEXT:    retq
324;
325; AVX1-LABEL: trunc_f32_signed32_f64_nsz:
326; AVX1:       # %bb.0:
327; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
328; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
329; AVX1-NEXT:    retq
330  %i = fptosi float %x to i32
331  %r = sitofp i32 %i to double
332  ret double %r
333}
334
335define float @trunc_f64_signed32_f32_no_fast_math(double %x) {
336; SSE-LABEL: trunc_f64_signed32_f32_no_fast_math:
337; SSE:       # %bb.0:
338; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
339; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
340; SSE-NEXT:    retq
341;
342; AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math:
343; AVX1:       # %bb.0:
344; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
345; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
346; AVX1-NEXT:    retq
347  %i = fptosi double %x to i32
348  %r = sitofp i32 %i to float
349  ret float %r
350}
351
352define float @trunc_f64_signed32_f32_nsz(double %x) #0 {
353; SSE-LABEL: trunc_f64_signed32_f32_nsz:
354; SSE:       # %bb.0:
355; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
356; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
357; SSE-NEXT:    retq
358;
359; AVX1-LABEL: trunc_f64_signed32_f32_nsz:
360; AVX1:       # %bb.0:
361; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
362; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
363; AVX1-NEXT:    retq
364  %i = fptosi double %x to i32
365  %r = sitofp i32 %i to float
366  ret float %r
367}
368
369define double @trunc_signed_f64_no_fast_math(double %x) {
370; SSE-LABEL: trunc_signed_f64_no_fast_math:
371; SSE:       # %bb.0:
372; SSE-NEXT:    cvttsd2si %xmm0, %rax
373; SSE-NEXT:    xorps %xmm0, %xmm0
374; SSE-NEXT:    cvtsi2sd %rax, %xmm0
375; SSE-NEXT:    retq
376;
377; AVX1-LABEL: trunc_signed_f64_no_fast_math:
378; AVX1:       # %bb.0:
379; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
380; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
381; AVX1-NEXT:    retq
382  %i = fptosi double %x to i64
383  %r = sitofp i64 %i to double
384  ret double %r
385}
386
387define double @trunc_signed_f64_nsz(double %x) #0 {
388; SSE2-LABEL: trunc_signed_f64_nsz:
389; SSE2:       # %bb.0:
390; SSE2-NEXT:    cvttsd2si %xmm0, %rax
391; SSE2-NEXT:    xorps %xmm0, %xmm0
392; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
393; SSE2-NEXT:    retq
394;
395; SSE41-LABEL: trunc_signed_f64_nsz:
396; SSE41:       # %bb.0:
397; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
398; SSE41-NEXT:    retq
399;
400; AVX1-LABEL: trunc_signed_f64_nsz:
401; AVX1:       # %bb.0:
402; AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
403; AVX1-NEXT:    retq
404  %i = fptosi double %x to i64
405  %r = sitofp i64 %i to double
406  ret double %r
407}
408
409define <4 x float> @trunc_signed_v4f32_nsz(<4 x float> %x) #0 {
410; SSE2-LABEL: trunc_signed_v4f32_nsz:
411; SSE2:       # %bb.0:
412; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
413; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
414; SSE2-NEXT:    retq
415;
416; SSE41-LABEL: trunc_signed_v4f32_nsz:
417; SSE41:       # %bb.0:
418; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
419; SSE41-NEXT:    retq
420;
421; AVX1-LABEL: trunc_signed_v4f32_nsz:
422; AVX1:       # %bb.0:
423; AVX1-NEXT:    vroundps $11, %xmm0, %xmm0
424; AVX1-NEXT:    retq
425  %i = fptosi <4 x float> %x to <4 x i32>
426  %r = sitofp <4 x i32> %i to <4 x float>
427  ret <4 x float> %r
428}
429
430define <2 x double> @trunc_signed_v2f64_nsz(<2 x double> %x) #0 {
431; SSE2-LABEL: trunc_signed_v2f64_nsz:
432; SSE2:       # %bb.0:
433; SSE2-NEXT:    cvttsd2si %xmm0, %rax
434; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
435; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
436; SSE2-NEXT:    xorps %xmm0, %xmm0
437; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
438; SSE2-NEXT:    cvtsi2sd %rcx, %xmm1
439; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
440; SSE2-NEXT:    retq
441;
442; SSE41-LABEL: trunc_signed_v2f64_nsz:
443; SSE41:       # %bb.0:
444; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
445; SSE41-NEXT:    retq
446;
447; AVX1-LABEL: trunc_signed_v2f64_nsz:
448; AVX1:       # %bb.0:
449; AVX1-NEXT:    vroundpd $11, %xmm0, %xmm0
450; AVX1-NEXT:    retq
451  %i = fptosi <2 x double> %x to <2 x i64>
452  %r = sitofp <2 x i64> %i to <2 x double>
453  ret <2 x double> %r
454}
455
456define <4 x double> @trunc_signed_v4f64_nsz(<4 x double> %x) #0 {
457; SSE2-LABEL: trunc_signed_v4f64_nsz:
458; SSE2:       # %bb.0:
459; SSE2-NEXT:    cvttsd2si %xmm1, %rax
460; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
461; SSE2-NEXT:    cvttsd2si %xmm1, %rcx
462; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
463; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
464; SSE2-NEXT:    cvttsd2si %xmm0, %rsi
465; SSE2-NEXT:    xorps %xmm0, %xmm0
466; SSE2-NEXT:    cvtsi2sd %rdx, %xmm0
467; SSE2-NEXT:    xorps %xmm1, %xmm1
468; SSE2-NEXT:    cvtsi2sd %rsi, %xmm1
469; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
470; SSE2-NEXT:    xorps %xmm1, %xmm1
471; SSE2-NEXT:    cvtsi2sd %rax, %xmm1
472; SSE2-NEXT:    cvtsi2sd %rcx, %xmm2
473; SSE2-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
474; SSE2-NEXT:    retq
475;
476; SSE41-LABEL: trunc_signed_v4f64_nsz:
477; SSE41:       # %bb.0:
478; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
479; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
480; SSE41-NEXT:    retq
481;
482; AVX1-LABEL: trunc_signed_v4f64_nsz:
483; AVX1:       # %bb.0:
484; AVX1-NEXT:    vroundpd $11, %ymm0, %ymm0
485; AVX1-NEXT:    retq
486  %i = fptosi <4 x double> %x to <4 x i64>
487  %r = sitofp <4 x i64> %i to <4 x double>
488  ret <4 x double> %r
489}
490
491; The fold may be guarded to allow existing code to continue
492; working based on its assumptions of float->int overflow.
493
494define float @trunc_unsigned_f32_disable_via_attr(float %x) #1 {
495; SSE-LABEL: trunc_unsigned_f32_disable_via_attr:
496; SSE:       # %bb.0:
497; SSE-NEXT:    cvttss2si %xmm0, %rax
498; SSE-NEXT:    movl %eax, %eax
499; SSE-NEXT:    xorps %xmm0, %xmm0
500; SSE-NEXT:    cvtsi2ss %rax, %xmm0
501; SSE-NEXT:    retq
502;
503; AVX1-LABEL: trunc_unsigned_f32_disable_via_attr:
504; AVX1:       # %bb.0:
505; AVX1-NEXT:    vcvttss2si %xmm0, %rax
506; AVX1-NEXT:    movl %eax, %eax
507; AVX1-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm0
508; AVX1-NEXT:    retq
509  %i = fptoui float %x to i32
510  %r = uitofp i32 %i to float
511  ret float %r
512}
513
514define double @trunc_signed_f64_disable_via_attr(double %x) #1 {
515; SSE-LABEL: trunc_signed_f64_disable_via_attr:
516; SSE:       # %bb.0:
517; SSE-NEXT:    cvttsd2si %xmm0, %rax
518; SSE-NEXT:    xorps %xmm0, %xmm0
519; SSE-NEXT:    cvtsi2sd %rax, %xmm0
520; SSE-NEXT:    retq
521;
522; AVX1-LABEL: trunc_signed_f64_disable_via_attr:
523; AVX1:       # %bb.0:
524; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
525; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
526; AVX1-NEXT:    retq
527  %i = fptosi double %x to i64
528  %r = sitofp i64 %i to double
529  ret double %r
530}
531
532attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
533attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "strict-float-cast-overflow"="false" }
534