• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2      | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1    | FileCheck %s --check-prefixes=SSE,SSE4
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx       | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2      | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f   | FileCheck %s --check-prefixes=AVX,AVX512F
7; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl  | FileCheck %s --check-prefixes=AVX,AVX512VL
8
9define <4 x float> @sitofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
10; SSE-LABEL: sitofp_v4i32_v4f32:
11; SSE:       # %bb.0:
12; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: sitofp_v4i32_v4f32:
17; AVX:       # %bb.0:
18; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
19; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
20; AVX-NEXT:    retq
21  %s0 = sitofp <2 x i32> %x to <2 x float>
22  %s1 = sitofp <2 x i32> %y to <2 x float>
23  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
24  ret <4 x float> %r
25}
26
27define <4 x float> @uitofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
28; SSE2-LABEL: uitofp_v4i32_v4f32:
29; SSE2:       # %bb.0:
30; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
31; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
32; SSE2-NEXT:    pand %xmm0, %xmm1
33; SSE2-NEXT:    por {{.*}}(%rip), %xmm1
34; SSE2-NEXT:    psrld $16, %xmm0
35; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
36; SSE2-NEXT:    subps {{.*}}(%rip), %xmm0
37; SSE2-NEXT:    addps %xmm1, %xmm0
38; SSE2-NEXT:    retq
39;
40; SSE4-LABEL: uitofp_v4i32_v4f32:
41; SSE4:       # %bb.0:
42; SSE4-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
43; SSE4-NEXT:    movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
44; SSE4-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
45; SSE4-NEXT:    psrld $16, %xmm0
46; SSE4-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
47; SSE4-NEXT:    subps {{.*}}(%rip), %xmm0
48; SSE4-NEXT:    addps %xmm1, %xmm0
49; SSE4-NEXT:    retq
50;
51; AVX1-LABEL: uitofp_v4i32_v4f32:
52; AVX1:       # %bb.0:
53; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
54; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
55; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
56; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
57; AVX1-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
58; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
59; AVX1-NEXT:    retq
60;
61; AVX2-LABEL: uitofp_v4i32_v4f32:
62; AVX2:       # %bb.0:
63; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
64; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200]
65; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
66; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
67; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928]
68; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
69; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
70; AVX2-NEXT:    vsubps %xmm2, %xmm0, %xmm0
71; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
72; AVX2-NEXT:    retq
73;
74; AVX512F-LABEL: uitofp_v4i32_v4f32:
75; AVX512F:       # %bb.0:
76; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
77; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
78; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
79; AVX512F-NEXT:    vzeroupper
80; AVX512F-NEXT:    retq
81;
82; AVX512VL-LABEL: uitofp_v4i32_v4f32:
83; AVX512VL:       # %bb.0:
84; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
85; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
86; AVX512VL-NEXT:    retq
87  %s0 = uitofp <2 x i32> %x to <2 x float>
88  %s1 = uitofp <2 x i32> %y to <2 x float>
89  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
90  ret <4 x float> %r
91}
92
93define <4 x i32> @fptosi_v4f32_v4i32(<2 x float> %x, <2 x float> %y) {
94; SSE-LABEL: fptosi_v4f32_v4i32:
95; SSE:       # %bb.0:
96; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
97; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
98; SSE-NEXT:    retq
99;
100; AVX-LABEL: fptosi_v4f32_v4i32:
101; AVX:       # %bb.0:
102; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
103; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
104; AVX-NEXT:    retq
105  %s0 = fptosi <2 x float> %x to <2 x i32>
106  %s1 = fptosi <2 x float> %y to <2 x i32>
107  %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108  ret <4 x i32> %r
109}
110
111define <4 x i32> @fptoui_v4f32_v4i32(<2 x float> %x, <2 x float> %y) {
112; SSE2-LABEL: fptoui_v4f32_v4i32:
113; SSE2:       # %bb.0:
114; SSE2-NEXT:    movaps {{.*#+}} xmm3 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
115; SSE2-NEXT:    movaps %xmm0, %xmm2
116; SSE2-NEXT:    cmpltps %xmm3, %xmm2
117; SSE2-NEXT:    cvttps2dq %xmm0, %xmm4
118; SSE2-NEXT:    subps %xmm3, %xmm0
119; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
120; SSE2-NEXT:    movaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
121; SSE2-NEXT:    xorps %xmm5, %xmm0
122; SSE2-NEXT:    andps %xmm2, %xmm4
123; SSE2-NEXT:    andnps %xmm0, %xmm2
124; SSE2-NEXT:    orps %xmm4, %xmm2
125; SSE2-NEXT:    movaps %xmm1, %xmm0
126; SSE2-NEXT:    cmpltps %xmm3, %xmm0
127; SSE2-NEXT:    cvttps2dq %xmm1, %xmm4
128; SSE2-NEXT:    subps %xmm3, %xmm1
129; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
130; SSE2-NEXT:    xorps %xmm5, %xmm1
131; SSE2-NEXT:    andps %xmm0, %xmm4
132; SSE2-NEXT:    andnps %xmm1, %xmm0
133; SSE2-NEXT:    orps %xmm4, %xmm0
134; SSE2-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
135; SSE2-NEXT:    movaps %xmm2, %xmm0
136; SSE2-NEXT:    retq
137;
138; SSE4-LABEL: fptoui_v4f32_v4i32:
139; SSE4:       # %bb.0:
140; SSE4-NEXT:    movaps {{.*#+}} xmm4 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
141; SSE4-NEXT:    movaps %xmm0, %xmm2
142; SSE4-NEXT:    cmpltps %xmm4, %xmm2
143; SSE4-NEXT:    cvttps2dq %xmm0, %xmm5
144; SSE4-NEXT:    subps %xmm4, %xmm0
145; SSE4-NEXT:    cvttps2dq %xmm0, %xmm3
146; SSE4-NEXT:    movaps {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
147; SSE4-NEXT:    xorps %xmm6, %xmm3
148; SSE4-NEXT:    movaps %xmm2, %xmm0
149; SSE4-NEXT:    blendvps %xmm0, %xmm5, %xmm3
150; SSE4-NEXT:    movaps %xmm1, %xmm0
151; SSE4-NEXT:    cmpltps %xmm4, %xmm0
152; SSE4-NEXT:    cvttps2dq %xmm1, %xmm2
153; SSE4-NEXT:    subps %xmm4, %xmm1
154; SSE4-NEXT:    cvttps2dq %xmm1, %xmm1
155; SSE4-NEXT:    xorps %xmm6, %xmm1
156; SSE4-NEXT:    blendvps %xmm0, %xmm2, %xmm1
157; SSE4-NEXT:    movlhps {{.*#+}} xmm3 = xmm3[0],xmm1[0]
158; SSE4-NEXT:    movaps %xmm3, %xmm0
159; SSE4-NEXT:    retq
160;
161; AVX1-LABEL: fptoui_v4f32_v4i32:
162; AVX1:       # %bb.0:
163; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
164; AVX1-NEXT:    vcmpltps %xmm2, %xmm0, %xmm3
165; AVX1-NEXT:    vsubps %xmm2, %xmm0, %xmm4
166; AVX1-NEXT:    vcvttps2dq %xmm4, %xmm4
167; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
168; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
169; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
170; AVX1-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm0
171; AVX1-NEXT:    vcmpltps %xmm2, %xmm1, %xmm3
172; AVX1-NEXT:    vsubps %xmm2, %xmm1, %xmm2
173; AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
174; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm2
175; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
176; AVX1-NEXT:    vblendvps %xmm3, %xmm1, %xmm2, %xmm1
177; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
178; AVX1-NEXT:    retq
179;
180; AVX2-LABEL: fptoui_v4f32_v4i32:
181; AVX2:       # %bb.0:
182; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
183; AVX2-NEXT:    vcmpltps %xmm2, %xmm0, %xmm3
184; AVX2-NEXT:    vsubps %xmm2, %xmm0, %xmm4
185; AVX2-NEXT:    vcvttps2dq %xmm4, %xmm4
186; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
187; AVX2-NEXT:    vxorps %xmm5, %xmm4, %xmm4
188; AVX2-NEXT:    vcvttps2dq %xmm0, %xmm0
189; AVX2-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm0
190; AVX2-NEXT:    vcmpltps %xmm2, %xmm1, %xmm3
191; AVX2-NEXT:    vsubps %xmm2, %xmm1, %xmm2
192; AVX2-NEXT:    vcvttps2dq %xmm2, %xmm2
193; AVX2-NEXT:    vxorps %xmm5, %xmm2, %xmm2
194; AVX2-NEXT:    vcvttps2dq %xmm1, %xmm1
195; AVX2-NEXT:    vblendvps %xmm3, %xmm1, %xmm2, %xmm1
196; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
197; AVX2-NEXT:    retq
198;
199; AVX512F-LABEL: fptoui_v4f32_v4i32:
200; AVX512F:       # %bb.0:
201; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
202; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
203; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
204; AVX512F-NEXT:    vzeroupper
205; AVX512F-NEXT:    retq
206;
207; AVX512VL-LABEL: fptoui_v4f32_v4i32:
208; AVX512VL:       # %bb.0:
209; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
210; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
211; AVX512VL-NEXT:    retq
212  %s0 = fptoui <2 x float> %x to <2 x i32>
213  %s1 = fptoui <2 x float> %y to <2 x i32>
214  %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
215  ret <4 x i32> %r
216}
217
218define <4 x double> @sitofp_v4i32_v4f64(<2 x i32> %x, <2 x i32> %y) {
219; SSE-LABEL: sitofp_v4i32_v4f64:
220; SSE:       # %bb.0:
221; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
222; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
223; SSE-NEXT:    retq
224;
225; AVX-LABEL: sitofp_v4i32_v4f64:
226; AVX:       # %bb.0:
227; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
228; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
229; AVX-NEXT:    retq
230  %s0 = sitofp <2 x i32> %x to <2 x double>
231  %s1 = sitofp <2 x i32> %y to <2 x double>
232  %r = shufflevector <2 x double> %s0, <2 x double> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
233  ret <4 x double> %r
234}
235
236define <4 x double> @uitofp_v4i32_v4f64(<2 x i32> %x, <2 x i32> %y) {
237; SSE2-LABEL: uitofp_v4i32_v4f64:
238; SSE2:       # %bb.0:
239; SSE2-NEXT:    xorpd %xmm2, %xmm2
240; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
241; SSE2-NEXT:    movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
242; SSE2-NEXT:    orpd %xmm3, %xmm0
243; SSE2-NEXT:    subpd %xmm3, %xmm0
244; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
245; SSE2-NEXT:    orpd %xmm3, %xmm1
246; SSE2-NEXT:    subpd %xmm3, %xmm1
247; SSE2-NEXT:    retq
248;
249; SSE4-LABEL: uitofp_v4i32_v4f64:
250; SSE4:       # %bb.0:
251; SSE4-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
252; SSE4-NEXT:    movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
253; SSE4-NEXT:    por %xmm2, %xmm0
254; SSE4-NEXT:    subpd %xmm2, %xmm0
255; SSE4-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
256; SSE4-NEXT:    por %xmm2, %xmm1
257; SSE4-NEXT:    subpd %xmm2, %xmm1
258; SSE4-NEXT:    retq
259;
260; AVX1-LABEL: uitofp_v4i32_v4f64:
261; AVX1:       # %bb.0:
262; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
263; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
264; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
265; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
266; AVX1-NEXT:    vorpd %ymm1, %ymm0, %ymm0
267; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
268; AVX1-NEXT:    retq
269;
270; AVX2-LABEL: uitofp_v4i32_v4f64:
271; AVX2:       # %bb.0:
272; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
273; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
274; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
275; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
276; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
277; AVX2-NEXT:    retq
278;
279; AVX512F-LABEL: uitofp_v4i32_v4f64:
280; AVX512F:       # %bb.0:
281; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
282; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
283; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
284; AVX512F-NEXT:    retq
285;
286; AVX512VL-LABEL: uitofp_v4i32_v4f64:
287; AVX512VL:       # %bb.0:
288; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
289; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %ymm0
290; AVX512VL-NEXT:    retq
291  %s0 = uitofp <2 x i32> %x to <2 x double>
292  %s1 = uitofp <2 x i32> %y to <2 x double>
293  %r = shufflevector <2 x double> %s0, <2 x double> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
294  ret <4 x double> %r
295}
296
297define <4 x i32> @fptosi_v4f64_v4i32(<2 x double> %x, <2 x double> %y) {
298; SSE-LABEL: fptosi_v4f64_v4i32:
299; SSE:       # %bb.0:
300; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
301; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
302; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
303; SSE-NEXT:    retq
304;
305; AVX-LABEL: fptosi_v4f64_v4i32:
306; AVX:       # %bb.0:
307; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
308; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
309; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
310; AVX-NEXT:    vzeroupper
311; AVX-NEXT:    retq
312  %s0 = fptosi <2 x double> %x to <2 x i32>
313  %s1 = fptosi <2 x double> %y to <2 x i32>
314  %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
315  ret <4 x i32> %r
316}
317
318define <4 x i32> @fptoui_v4f64_v4i32(<2 x double> %x, <2 x double> %y) {
319; SSE2-LABEL: fptoui_v4f64_v4i32:
320; SSE2:       # %bb.0:
321; SSE2-NEXT:    cvttsd2si %xmm0, %rax
322; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
323; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
324; SSE2-NEXT:    cvttsd2si %xmm1, %rdx
325; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
326; SSE2-NEXT:    cvttsd2si %xmm1, %rsi
327; SSE2-NEXT:    movd %edx, %xmm1
328; SSE2-NEXT:    movd %esi, %xmm0
329; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
330; SSE2-NEXT:    movd %eax, %xmm0
331; SSE2-NEXT:    movd %ecx, %xmm2
332; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
333; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
334; SSE2-NEXT:    retq
335;
336; SSE4-LABEL: fptoui_v4f64_v4i32:
337; SSE4:       # %bb.0:
338; SSE4-NEXT:    cvttsd2si %xmm0, %rax
339; SSE4-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
340; SSE4-NEXT:    cvttsd2si %xmm0, %rcx
341; SSE4-NEXT:    cvttsd2si %xmm1, %rdx
342; SSE4-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
343; SSE4-NEXT:    cvttsd2si %xmm1, %rsi
344; SSE4-NEXT:    movd %eax, %xmm0
345; SSE4-NEXT:    pinsrd $1, %ecx, %xmm0
346; SSE4-NEXT:    pinsrd $2, %edx, %xmm0
347; SSE4-NEXT:    pinsrd $3, %esi, %xmm0
348; SSE4-NEXT:    retq
349;
350; AVX1-LABEL: fptoui_v4f64_v4i32:
351; AVX1:       # %bb.0:
352; AVX1-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
353; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
354; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
355; AVX1-NEXT:    vcmpltpd %ymm2, %ymm0, %ymm3
356; AVX1-NEXT:    vpackssdw %xmm3, %xmm3, %xmm3
357; AVX1-NEXT:    vsubpd %ymm2, %ymm0, %ymm4
358; AVX1-NEXT:    vcvttpd2dq %ymm4, %xmm4
359; AVX1-NEXT:    vmovapd {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
360; AVX1-NEXT:    vxorpd %xmm5, %xmm4, %xmm4
361; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
362; AVX1-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm0
363; AVX1-NEXT:    vcmpltpd %ymm2, %ymm1, %ymm3
364; AVX1-NEXT:    vpackssdw %xmm3, %xmm3, %xmm3
365; AVX1-NEXT:    vsubpd %ymm2, %ymm1, %ymm2
366; AVX1-NEXT:    vcvttpd2dq %ymm2, %xmm2
367; AVX1-NEXT:    vxorpd %xmm5, %xmm2, %xmm2
368; AVX1-NEXT:    vcvttpd2dq %ymm1, %xmm1
369; AVX1-NEXT:    vblendvps %xmm3, %xmm1, %xmm2, %xmm1
370; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
371; AVX1-NEXT:    vzeroupper
372; AVX1-NEXT:    retq
373;
374; AVX2-LABEL: fptoui_v4f64_v4i32:
375; AVX2:       # %bb.0:
376; AVX2-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
377; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
378; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
379; AVX2-NEXT:    vcmpltpd %ymm2, %ymm0, %ymm3
380; AVX2-NEXT:    vpackssdw %xmm3, %xmm3, %xmm3
381; AVX2-NEXT:    vsubpd %ymm2, %ymm0, %ymm4
382; AVX2-NEXT:    vcvttpd2dq %ymm4, %xmm4
383; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
384; AVX2-NEXT:    vxorpd %xmm5, %xmm4, %xmm4
385; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm0
386; AVX2-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm0
387; AVX2-NEXT:    vcmpltpd %ymm2, %ymm1, %ymm3
388; AVX2-NEXT:    vpackssdw %xmm3, %xmm3, %xmm3
389; AVX2-NEXT:    vsubpd %ymm2, %ymm1, %ymm2
390; AVX2-NEXT:    vcvttpd2dq %ymm2, %xmm2
391; AVX2-NEXT:    vxorpd %xmm5, %xmm2, %xmm2
392; AVX2-NEXT:    vcvttpd2dq %ymm1, %xmm1
393; AVX2-NEXT:    vblendvps %xmm3, %xmm1, %xmm2, %xmm1
394; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
395; AVX2-NEXT:    vzeroupper
396; AVX2-NEXT:    retq
397;
398; AVX512F-LABEL: fptoui_v4f64_v4i32:
399; AVX512F:       # %bb.0:
400; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
401; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
402; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
403; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
404; AVX512F-NEXT:    vzeroupper
405; AVX512F-NEXT:    retq
406;
407; AVX512VL-LABEL: fptoui_v4f64_v4i32:
408; AVX512VL:       # %bb.0:
409; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
410; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
411; AVX512VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
412; AVX512VL-NEXT:    vzeroupper
413; AVX512VL-NEXT:    retq
414  %s0 = fptoui <2 x double> %x to <2 x i32>
415  %s1 = fptoui <2 x double> %y to <2 x i32>
416  %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
417  ret <4 x i32> %r
418}
419
420; Negative test
421
422define <4 x float> @mismatch_tofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) {
423; SSE2-LABEL: mismatch_tofp_v4i32_v4f32:
424; SSE2:       # %bb.0:
425; SSE2-NEXT:    xorpd %xmm2, %xmm2
426; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
427; SSE2-NEXT:    movapd {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
428; SSE2-NEXT:    orpd %xmm2, %xmm0
429; SSE2-NEXT:    subpd %xmm2, %xmm0
430; SSE2-NEXT:    cvtpd2ps %xmm0, %xmm0
431; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
432; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
433; SSE2-NEXT:    retq
434;
435; SSE4-LABEL: mismatch_tofp_v4i32_v4f32:
436; SSE4:       # %bb.0:
437; SSE4-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
438; SSE4-NEXT:    movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
439; SSE4-NEXT:    por %xmm2, %xmm0
440; SSE4-NEXT:    subpd %xmm2, %xmm0
441; SSE4-NEXT:    cvtpd2ps %xmm0, %xmm0
442; SSE4-NEXT:    cvtdq2ps %xmm1, %xmm1
443; SSE4-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
444; SSE4-NEXT:    retq
445;
446; AVX1-LABEL: mismatch_tofp_v4i32_v4f32:
447; AVX1:       # %bb.0:
448; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
449; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
450; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
451; AVX1-NEXT:    vsubpd %xmm2, %xmm0, %xmm0
452; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0
453; AVX1-NEXT:    vcvtdq2ps %xmm1, %xmm1
454; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
455; AVX1-NEXT:    retq
456;
457; AVX2-LABEL: mismatch_tofp_v4i32_v4f32:
458; AVX2:       # %bb.0:
459; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
460; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
461; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
462; AVX2-NEXT:    vsubpd %xmm2, %xmm0, %xmm0
463; AVX2-NEXT:    vcvtpd2ps %xmm0, %xmm0
464; AVX2-NEXT:    vcvtdq2ps %xmm1, %xmm1
465; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
466; AVX2-NEXT:    retq
467;
468; AVX512F-LABEL: mismatch_tofp_v4i32_v4f32:
469; AVX512F:       # %bb.0:
470; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
471; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
472; AVX512F-NEXT:    vcvtdq2ps %xmm1, %xmm1
473; AVX512F-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
474; AVX512F-NEXT:    vzeroupper
475; AVX512F-NEXT:    retq
476;
477; AVX512VL-LABEL: mismatch_tofp_v4i32_v4f32:
478; AVX512VL:       # %bb.0:
479; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
480; AVX512VL-NEXT:    vcvtdq2ps %xmm1, %xmm1
481; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
482; AVX512VL-NEXT:    retq
483  %s0 = uitofp <2 x i32> %x to <2 x float>
484  %s1 = sitofp <2 x i32> %y to <2 x float>
485  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
486  ret <4 x float> %r
487}
488
489; Negative test
490
491define <4 x float> @sitofp_v4i32_v4f32_extra_use(<2 x i32> %x, <2 x i32> %y, <2 x float>* %p) {
492; SSE-LABEL: sitofp_v4i32_v4f32_extra_use:
493; SSE:       # %bb.0:
494; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
495; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
496; SSE-NEXT:    movlps %xmm1, (%rdi)
497; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
498; SSE-NEXT:    retq
499;
500; AVX-LABEL: sitofp_v4i32_v4f32_extra_use:
501; AVX:       # %bb.0:
502; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
503; AVX-NEXT:    vcvtdq2ps %xmm1, %xmm1
504; AVX-NEXT:    vmovlps %xmm1, (%rdi)
505; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
506; AVX-NEXT:    retq
507  %s0 = sitofp <2 x i32> %x to <2 x float>
508  %s1 = sitofp <2 x i32> %y to <2 x float>
509  store <2 x float> %s1, <2 x float>* %p
510  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
511  ret <4 x float> %r
512}
513
514define <4 x float> @PR45794(<2 x i64> %x, <2 x i64> %y) {
515; SSE-LABEL: PR45794:
516; SSE:       # %bb.0:
517; SSE-NEXT:    psrad $16, %xmm0
518; SSE-NEXT:    psrad $16, %xmm1
519; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
520; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
521; SSE-NEXT:    retq
522;
523; AVX1-LABEL: PR45794:
524; AVX1:       # %bb.0:
525; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
526; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
527; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
528; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
529; AVX1-NEXT:    retq
530;
531; AVX2-LABEL: PR45794:
532; AVX2:       # %bb.0:
533; AVX2-NEXT:    vpsrad $16, %xmm0, %xmm0
534; AVX2-NEXT:    vpsrad $16, %xmm1, %xmm1
535; AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
536; AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
537; AVX2-NEXT:    retq
538;
539; AVX512F-LABEL: PR45794:
540; AVX512F:       # %bb.0:
541; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
542; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
543; AVX512F-NEXT:    vpsraq $48, %zmm0, %zmm0
544; AVX512F-NEXT:    vpsraq $48, %zmm1, %zmm1
545; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
546; AVX512F-NEXT:    vcvtdq2ps %xmm0, %xmm0
547; AVX512F-NEXT:    vzeroupper
548; AVX512F-NEXT:    retq
549;
550; AVX512VL-LABEL: PR45794:
551; AVX512VL:       # %bb.0:
552; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
553; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
554; AVX512VL-NEXT:    vpsraq $48, %ymm0, %ymm0
555; AVX512VL-NEXT:    vpmovqd %ymm0, %xmm0
556; AVX512VL-NEXT:    vcvtdq2ps %xmm0, %xmm0
557; AVX512VL-NEXT:    vzeroupper
558; AVX512VL-NEXT:    retq
559  %a0 = ashr <2 x i64> %x, <i64 48, i64 48>
560  %s0 = sitofp <2 x i64> %a0 to <2 x float>
561  %a1 = ashr <2 x i64> %y, <i64 48, i64 48>
562  %s1 = sitofp <2 x i64> %a1 to <2 x float>
563  %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
564  ret <4 x float> %r
565}
566