• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
6
7define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) {
8; X32-SSE-LABEL: fptrunc_frommem2:
9; X32-SSE:       # %bb.0: # %entry
10; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
11; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
12; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm0
13; X32-SSE-NEXT:    movlpd %xmm0, (%eax)
14; X32-SSE-NEXT:    retl
15;
16; X32-AVX-LABEL: fptrunc_frommem2:
17; X32-AVX:       # %bb.0: # %entry
18; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
19; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
20; X32-AVX-NEXT:    vcvtpd2psx (%ecx), %xmm0
21; X32-AVX-NEXT:    vmovlpd %xmm0, (%eax)
22; X32-AVX-NEXT:    retl
23;
24; X64-SSE-LABEL: fptrunc_frommem2:
25; X64-SSE:       # %bb.0: # %entry
26; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0
27; X64-SSE-NEXT:    movlpd %xmm0, (%rsi)
28; X64-SSE-NEXT:    retq
29;
30; X64-AVX-LABEL: fptrunc_frommem2:
31; X64-AVX:       # %bb.0: # %entry
32; X64-AVX-NEXT:    vcvtpd2psx (%rdi), %xmm0
33; X64-AVX-NEXT:    vmovlpd %xmm0, (%rsi)
34; X64-AVX-NEXT:    retq
35entry:
36  %0 = load <2 x double>, <2 x double>* %in
37  %1 = fptrunc <2 x double> %0 to <2 x float>
38  store <2 x float> %1, <2 x float>* %out, align 1
39  ret void
40}
41
42define void @fptrunc_frommem4(<4 x double>* %in, <4 x float>* %out) {
43; X32-SSE-LABEL: fptrunc_frommem4:
44; X32-SSE:       # %bb.0: # %entry
45; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
46; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
47; X32-SSE-NEXT:    cvtpd2ps 16(%ecx), %xmm0
48; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm1
49; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
50; X32-SSE-NEXT:    movupd %xmm1, (%eax)
51; X32-SSE-NEXT:    retl
52;
53; X32-AVX-LABEL: fptrunc_frommem4:
54; X32-AVX:       # %bb.0: # %entry
55; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
56; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
57; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
58; X32-AVX-NEXT:    vmovupd %xmm0, (%eax)
59; X32-AVX-NEXT:    retl
60;
61; X64-SSE-LABEL: fptrunc_frommem4:
62; X64-SSE:       # %bb.0: # %entry
63; X64-SSE-NEXT:    cvtpd2ps 16(%rdi), %xmm0
64; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1
65; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
66; X64-SSE-NEXT:    movupd %xmm1, (%rsi)
67; X64-SSE-NEXT:    retq
68;
69; X64-AVX-LABEL: fptrunc_frommem4:
70; X64-AVX:       # %bb.0: # %entry
71; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
72; X64-AVX-NEXT:    vmovupd %xmm0, (%rsi)
73; X64-AVX-NEXT:    retq
74entry:
75  %0 = load <4 x double>, <4 x double>* %in
76  %1 = fptrunc <4 x double> %0 to <4 x float>
77  store <4 x float> %1, <4 x float>* %out, align 1
78  ret void
79}
80
81define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
82; X32-SSE-LABEL: fptrunc_frommem8:
83; X32-SSE:       # %bb.0: # %entry
84; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
85; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
86; X32-SSE-NEXT:    cvtpd2ps 16(%ecx), %xmm0
87; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm1
88; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
89; X32-SSE-NEXT:    cvtpd2ps 48(%ecx), %xmm0
90; X32-SSE-NEXT:    cvtpd2ps 32(%ecx), %xmm2
91; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
92; X32-SSE-NEXT:    movupd %xmm2, 16(%eax)
93; X32-SSE-NEXT:    movupd %xmm1, (%eax)
94; X32-SSE-NEXT:    retl
95;
96; X32-AVX-LABEL: fptrunc_frommem8:
97; X32-AVX:       # %bb.0: # %entry
98; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
99; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
100; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
101; X32-AVX-NEXT:    vcvtpd2psy 32(%ecx), %xmm1
102; X32-AVX-NEXT:    vmovupd %xmm1, 16(%eax)
103; X32-AVX-NEXT:    vmovupd %xmm0, (%eax)
104; X32-AVX-NEXT:    retl
105;
106; X64-SSE-LABEL: fptrunc_frommem8:
107; X64-SSE:       # %bb.0: # %entry
108; X64-SSE-NEXT:    cvtpd2ps 16(%rdi), %xmm0
109; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1
110; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
111; X64-SSE-NEXT:    cvtpd2ps 48(%rdi), %xmm0
112; X64-SSE-NEXT:    cvtpd2ps 32(%rdi), %xmm2
113; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
114; X64-SSE-NEXT:    movupd %xmm2, 16(%rsi)
115; X64-SSE-NEXT:    movupd %xmm1, (%rsi)
116; X64-SSE-NEXT:    retq
117;
118; X64-AVX-LABEL: fptrunc_frommem8:
119; X64-AVX:       # %bb.0: # %entry
120; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
121; X64-AVX-NEXT:    vcvtpd2psy 32(%rdi), %xmm1
122; X64-AVX-NEXT:    vmovupd %xmm1, 16(%rsi)
123; X64-AVX-NEXT:    vmovupd %xmm0, (%rsi)
124; X64-AVX-NEXT:    retq
125entry:
126  %0 = load <8 x double>, <8 x double>* %in
127  %1 = fptrunc <8 x double> %0 to <8 x float>
128  store <8 x float> %1, <8 x float>* %out, align 1
129  ret void
130}
131
132define <4 x float> @fptrunc_frommem2_zext(<2 x double> * %ld) {
133; X32-SSE-LABEL: fptrunc_frommem2_zext:
134; X32-SSE:       # %bb.0:
135; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
136; X32-SSE-NEXT:    cvtpd2ps (%eax), %xmm0
137; X32-SSE-NEXT:    retl
138;
139; X32-AVX-LABEL: fptrunc_frommem2_zext:
140; X32-AVX:       # %bb.0:
141; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
142; X32-AVX-NEXT:    vcvtpd2psx (%eax), %xmm0
143; X32-AVX-NEXT:    retl
144;
145; X64-SSE-LABEL: fptrunc_frommem2_zext:
146; X64-SSE:       # %bb.0:
147; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0
148; X64-SSE-NEXT:    retq
149;
150; X64-AVX-LABEL: fptrunc_frommem2_zext:
151; X64-AVX:       # %bb.0:
152; X64-AVX-NEXT:    vcvtpd2psx (%rdi), %xmm0
153; X64-AVX-NEXT:    retq
154  %arg = load <2 x double>, <2 x double> * %ld, align 16
155  %cvt = fptrunc <2 x double> %arg to <2 x float>
156  %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
157  ret <4 x float> %ret
158}
159
160define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) {
161; X32-SSE-LABEL: fptrunc_fromreg2_zext:
162; X32-SSE:       # %bb.0:
163; X32-SSE-NEXT:    cvtpd2ps %xmm0, %xmm0
164; X32-SSE-NEXT:    retl
165;
166; X32-AVX-LABEL: fptrunc_fromreg2_zext:
167; X32-AVX:       # %bb.0:
168; X32-AVX-NEXT:    vcvtpd2ps %xmm0, %xmm0
169; X32-AVX-NEXT:    retl
170;
171; X64-SSE-LABEL: fptrunc_fromreg2_zext:
172; X64-SSE:       # %bb.0:
173; X64-SSE-NEXT:    cvtpd2ps %xmm0, %xmm0
174; X64-SSE-NEXT:    retq
175;
176; X64-AVX-LABEL: fptrunc_fromreg2_zext:
177; X64-AVX:       # %bb.0:
178; X64-AVX-NEXT:    vcvtpd2ps %xmm0, %xmm0
179; X64-AVX-NEXT:    retq
180  %cvt = fptrunc <2 x double> %arg to <2 x float>
181  %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
182  ret <4 x float> %ret
183}
184
185; FIXME: For exact truncations we should be able to fold this.
186define <4 x float> @fptrunc_fromconst() {
187; X32-SSE-LABEL: fptrunc_fromconst:
188; X32-SSE:       # %bb.0: # %entry
189; X32-SSE-NEXT:    cvtpd2ps {{\.LCPI.*}}, %xmm1
190; X32-SSE-NEXT:    cvtpd2ps {{\.LCPI.*}}, %xmm0
191; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
192; X32-SSE-NEXT:    retl
193;
194; X32-AVX-LABEL: fptrunc_fromconst:
195; X32-AVX:       # %bb.0: # %entry
196; X32-AVX-NEXT:    vcvtpd2psy {{\.LCPI.*}}, %xmm0
197; X32-AVX-NEXT:    retl
198;
199; X64-SSE-LABEL: fptrunc_fromconst:
200; X64-SSE:       # %bb.0: # %entry
201; X64-SSE-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm1
202; X64-SSE-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm0
203; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
204; X64-SSE-NEXT:    retq
205;
206; X64-AVX-LABEL: fptrunc_fromconst:
207; X64-AVX:       # %bb.0: # %entry
208; X64-AVX-NEXT:    vcvtpd2psy {{.*}}(%rip), %xmm0
209; X64-AVX-NEXT:    retq
210entry:
211  %0  = insertelement <4 x double> undef, double 1.0, i32 0
212  %1  = insertelement <4 x double> %0, double -2.0, i32 1
213  %2  = insertelement <4 x double> %1, double +4.0, i32 2
214  %3  = insertelement <4 x double> %2, double -0.0, i32 3
215  %4  = fptrunc <4 x double> %3 to <4 x float>
216  ret <4 x float> %4
217}
218