• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define <2 x double> @signbits_sext_v2i64_sitofp_v2f64(i32 %a0, i32 %a1) nounwind {
6; X32-LABEL: signbits_sext_v2i64_sitofp_v2f64:
7; X32:       # %bb.0:
8; X32-NEXT:    vcvtdq2pd {{[0-9]+}}(%esp), %xmm0
9; X32-NEXT:    retl
10;
11; X64-LABEL: signbits_sext_v2i64_sitofp_v2f64:
12; X64:       # %bb.0:
13; X64-NEXT:    vmovd %edi, %xmm0
14; X64-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
15; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
16; X64-NEXT:    retq
17  %1 = sext i32 %a0 to i64
18  %2 = sext i32 %a1 to i64
19  %3 = insertelement <2 x i64> undef, i64 %1, i32 0
20  %4 = insertelement <2 x i64> %3, i64 %2, i32 1
21  %5 = sitofp <2 x i64> %4 to <2 x double>
22  ret <2 x double> %5
23}
24
25define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext %a1, i32 %a2, i32 %a3) nounwind {
26; X32-LABEL: signbits_sext_v4i64_sitofp_v4f32:
27; X32:       # %bb.0:
28; X32-NEXT:    movswl {{[0-9]+}}(%esp), %eax
29; X32-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
30; X32-NEXT:    vmovd %ecx, %xmm0
31; X32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
32; X32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
33; X32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
34; X32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
35; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
36; X32-NEXT:    retl
37;
38; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32:
39; X64:       # %bb.0:
40; X64-NEXT:    movslq %edi, %rax
41; X64-NEXT:    movslq %esi, %rsi
42; X64-NEXT:    movslq %edx, %rdx
43; X64-NEXT:    movslq %ecx, %rcx
44; X64-NEXT:    vmovq %rcx, %xmm0
45; X64-NEXT:    vmovq %rdx, %xmm1
46; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
47; X64-NEXT:    vmovq %rsi, %xmm1
48; X64-NEXT:    vmovq %rax, %xmm2
49; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
50; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
51; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
52; X64-NEXT:    retq
53  %1 = sext i8 %a0 to i64
54  %2 = sext i16 %a1 to i64
55  %3 = sext i32 %a2 to i64
56  %4 = sext i32 %a3 to i64
57  %5 = insertelement <4 x i64> undef, i64 %1, i32 0
58  %6 = insertelement <4 x i64> %5, i64 %2, i32 1
59  %7 = insertelement <4 x i64> %6, i64 %3, i32 2
60  %8 = insertelement <4 x i64> %7, i64 %4, i32 3
61  %9 = sitofp <4 x i64> %8 to <4 x float>
62  ret <4 x float> %9
63}
64
65define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
66; X32-LABEL: signbits_ashr_extract_sitofp_0:
67; X32:       # %bb.0:
68; X32-NEXT:    pushl %eax
69; X32-NEXT:    vextractps $1, %xmm0, %eax
70; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
71; X32-NEXT:    vmovss %xmm0, (%esp)
72; X32-NEXT:    flds (%esp)
73; X32-NEXT:    popl %eax
74; X32-NEXT:    retl
75;
76; X64-LABEL: signbits_ashr_extract_sitofp_0:
77; X64:       # %bb.0:
78; X64-NEXT:    vpsrad $31, %xmm0, %xmm1
79; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
80; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
81; X64-NEXT:    vmovq %xmm0, %rax
82; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
83; X64-NEXT:    retq
84  %1 = ashr <2 x i64> %a0, <i64 32, i64 32>
85  %2 = extractelement <2 x i64> %1, i32 0
86  %3 = sitofp i64 %2 to float
87  ret float %3
88}
89
90define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
91; X32-LABEL: signbits_ashr_extract_sitofp_1:
92; X32:       # %bb.0:
93; X32-NEXT:    pushl %eax
94; X32-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
95; X32-NEXT:    vpsrlq $63, %xmm1, %xmm2
96; X32-NEXT:    vpsrlq $32, %xmm1, %xmm1
97; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
98; X32-NEXT:    vpsrlq $63, %xmm0, %xmm2
99; X32-NEXT:    vpsrlq $32, %xmm0, %xmm0
100; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
101; X32-NEXT:    vpxor %xmm1, %xmm0, %xmm0
102; X32-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
103; X32-NEXT:    vmovd %xmm0, %eax
104; X32-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
105; X32-NEXT:    vmovss %xmm0, (%esp)
106; X32-NEXT:    flds (%esp)
107; X32-NEXT:    popl %eax
108; X32-NEXT:    retl
109;
110; X64-LABEL: signbits_ashr_extract_sitofp_1:
111; X64:       # %bb.0:
112; X64-NEXT:    vpsrlq $63, %xmm0, %xmm1
113; X64-NEXT:    vpsrlq $32, %xmm0, %xmm0
114; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
115; X64-NEXT:    vmovdqa {{.*#+}} xmm1 = [2147483648,1]
116; X64-NEXT:    vpxor %xmm1, %xmm0, %xmm0
117; X64-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
118; X64-NEXT:    vmovq %xmm0, %rax
119; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
120; X64-NEXT:    retq
121  %1 = ashr <2 x i64> %a0, <i64 32, i64 63>
122  %2 = extractelement <2 x i64> %1, i32 0
123  %3 = sitofp i64 %2 to float
124  ret float %3
125}
126
127define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
128; X32-LABEL: signbits_ashr_shl_extract_sitofp:
129; X32:       # %bb.0:
130; X32-NEXT:    pushl %eax
131; X32-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
132; X32-NEXT:    vpsrlq $60, %xmm1, %xmm2
133; X32-NEXT:    vpsrlq $61, %xmm1, %xmm1
134; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
135; X32-NEXT:    vpsrlq $60, %xmm0, %xmm2
136; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
137; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
138; X32-NEXT:    vpxor %xmm1, %xmm0, %xmm0
139; X32-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
140; X32-NEXT:    vpsllq $20, %xmm0, %xmm0
141; X32-NEXT:    vmovd %xmm0, %eax
142; X32-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
143; X32-NEXT:    vmovss %xmm0, (%esp)
144; X32-NEXT:    flds (%esp)
145; X32-NEXT:    popl %eax
146; X32-NEXT:    retl
147;
148; X64-LABEL: signbits_ashr_shl_extract_sitofp:
149; X64:       # %bb.0:
150; X64-NEXT:    vpsrlq $60, %xmm0, %xmm1
151; X64-NEXT:    vpsrlq $61, %xmm0, %xmm0
152; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
153; X64-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,8]
154; X64-NEXT:    vpxor %xmm1, %xmm0, %xmm0
155; X64-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
156; X64-NEXT:    vpsllq $20, %xmm0, %xmm0
157; X64-NEXT:    vmovq %xmm0, %rax
158; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
159; X64-NEXT:    retq
160  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
161  %2 = shl <2 x i64> %1, <i64 20, i64 16>
162  %3 = extractelement <2 x i64> %2, i32 0
163  %4 = sitofp i64 %3 to float
164  ret float %4
165}
166
167define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind {
168; X32-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
169; X32:       # %bb.0:
170; X32-NEXT:    pushl %eax
171; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
172; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
173; X32-NEXT:    shrdl $30, %ecx, %eax
174; X32-NEXT:    sarl $30, %ecx
175; X32-NEXT:    vmovd %eax, %xmm0
176; X32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
177; X32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
178; X32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
179; X32-NEXT:    vpsrlq $3, %xmm0, %xmm0
180; X32-NEXT:    vmovd %xmm0, %eax
181; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
182; X32-NEXT:    vmovss %xmm0, (%esp)
183; X32-NEXT:    flds (%esp)
184; X32-NEXT:    popl %eax
185; X32-NEXT:    retl
186;
187; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
188; X64:       # %bb.0:
189; X64-NEXT:    sarq $30, %rdi
190; X64-NEXT:    vmovq %rsi, %xmm0
191; X64-NEXT:    vmovq %rdi, %xmm1
192; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
193; X64-NEXT:    vpsrad $3, %xmm0, %xmm1
194; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0
195; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
196; X64-NEXT:    vmovq %xmm0, %rax
197; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
198; X64-NEXT:    retq
199  %1 = ashr i64 %a0, 30
200  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
201  %3 = insertelement <2 x i64> %2, i64 %a1, i32 1
202  %4 = ashr <2 x i64> %3, <i64 3, i64 3>
203  %5 = extractelement <2 x i64> %4, i32 0
204  %6 = sitofp i64 %5 to float
205  ret float %6
206}
207
208define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind {
209; X32-LABEL: signbits_sext_shuffle_sitofp:
210; X32:       # %bb.0:
211; X32-NEXT:    vpmovsxdq %xmm0, %xmm1
212; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
213; X32-NEXT:    vpmovsxdq %xmm0, %xmm0
214; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
215; X32-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
216; X32-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
217; X32-NEXT:    vextractf128 $1, %ymm0, %xmm1
218; X32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
219; X32-NEXT:    vcvtdq2pd %xmm0, %ymm0
220; X32-NEXT:    retl
221;
222; X64-LABEL: signbits_sext_shuffle_sitofp:
223; X64:       # %bb.0:
224; X64-NEXT:    vpmovsxdq %xmm0, %xmm1
225; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
226; X64-NEXT:    vpmovsxdq %xmm0, %xmm0
227; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
228; X64-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
229; X64-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
230; X64-NEXT:    vextractf128 $1, %ymm0, %xmm1
231; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
232; X64-NEXT:    vcvtdq2pd %xmm0, %ymm0
233; X64-NEXT:    retq
234  %1 = sext <4 x i32> %a0 to <4 x i64>
235  %2 = shufflevector <4 x i64> %1, <4 x i64>%a1, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
236  %3 = sitofp <4 x i64> %2 to <4 x double>
237  ret <4 x double> %3
238}
239
240define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind {
241; X32-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
242; X32:       # %bb.0:
243; X32-NEXT:    vpsrad $16, %xmm0, %xmm1
244; X32-NEXT:    vpsrlq $16, %xmm0, %xmm0
245; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
246; X32-NEXT:    vpsrlq $16, %xmm0, %xmm0
247; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
248; X32-NEXT:    vcvtdq2pd %xmm0, %xmm0
249; X32-NEXT:    retl
250;
251; X64-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
252; X64:       # %bb.0:
253; X64-NEXT:    vpsrad $16, %xmm0, %xmm1
254; X64-NEXT:    vpsrlq $16, %xmm0, %xmm0
255; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
256; X64-NEXT:    vpsrlq $16, %xmm0, %xmm0
257; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
258; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
259; X64-NEXT:    retq
260  %1 = ashr <2 x i64> %a0, <i64 16, i64 16>
261  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
262  %3 = shufflevector <4 x i64> %a1, <4 x i64> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
263  %4 = ashr <4 x i64> %3, <i64 16, i64 16, i64 16, i64 16>
264  %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
265  %6 = sitofp <2 x i64> %5 to <2 x double>
266  ret <2 x double> %6
267}
268
269define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2 x i64> %a1, i32 %a2) nounwind {
270; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
271; X32:       # %bb.0:
272; X32-NEXT:    pushl %eax
273; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
274; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
275; X32-NEXT:    vpsrlq $60, %xmm2, %xmm3
276; X32-NEXT:    vpsrlq $61, %xmm2, %xmm2
277; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
278; X32-NEXT:    vpsrlq $60, %xmm0, %xmm3
279; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
280; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
281; X32-NEXT:    vpxor %xmm2, %xmm0, %xmm0
282; X32-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
283; X32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
284; X32-NEXT:    sarl $31, %eax
285; X32-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
286; X32-NEXT:    vpsllq $20, %xmm1, %xmm1
287; X32-NEXT:    vpsrad $20, %xmm1, %xmm2
288; X32-NEXT:    vpsrlq $20, %xmm1, %xmm1
289; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
290; X32-NEXT:    vpand %xmm1, %xmm0, %xmm0
291; X32-NEXT:    vmovd %xmm0, %eax
292; X32-NEXT:    vcvtsi2ssl %eax, %xmm4, %xmm0
293; X32-NEXT:    vmovss %xmm0, (%esp)
294; X32-NEXT:    flds (%esp)
295; X32-NEXT:    popl %eax
296; X32-NEXT:    retl
297;
298; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
299; X64:       # %bb.0:
300; X64-NEXT:    vpsrlq $60, %xmm0, %xmm2
301; X64-NEXT:    vpsrlq $61, %xmm0, %xmm0
302; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
303; X64-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,8]
304; X64-NEXT:    vpxor %xmm2, %xmm0, %xmm0
305; X64-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
306; X64-NEXT:    movslq %edi, %rax
307; X64-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm1
308; X64-NEXT:    vpsllq $20, %xmm1, %xmm1
309; X64-NEXT:    vpsrad $20, %xmm1, %xmm2
310; X64-NEXT:    vpsrlq $20, %xmm1, %xmm1
311; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
312; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
313; X64-NEXT:    vmovq %xmm0, %rax
314; X64-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
315; X64-NEXT:    retq
316  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
317  %2 = sext i32 %a2 to i64
318  %3 = insertelement <2 x i64> %a1, i64 %2, i32 0
319  %4 = shl <2 x i64> %3, <i64 20, i64 20>
320  %5 = ashr <2 x i64> %4, <i64 20, i64 20>
321  %6 = and <2 x i64> %1, %5
322  %7 = extractelement <2 x i64> %6, i32 0
323  %8 = sitofp i64 %7 to float
324  ret float %8
325}
326
327define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
328; X32-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
329; X32:       # %bb.0:
330; X32-NEXT:    pushl %eax
331; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
332; X32-NEXT:    vpsrlq $60, %xmm2, %xmm3
333; X32-NEXT:    vpsrlq $61, %xmm2, %xmm2
334; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
335; X32-NEXT:    vpsrlq $60, %xmm0, %xmm3
336; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
337; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
338; X32-NEXT:    vpxor %xmm2, %xmm0, %xmm0
339; X32-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
340; X32-NEXT:    vpmovsxdq %xmm1, %xmm1
341; X32-NEXT:    vpand %xmm1, %xmm0, %xmm2
342; X32-NEXT:    vpor %xmm1, %xmm2, %xmm1
343; X32-NEXT:    vpxor %xmm0, %xmm1, %xmm0
344; X32-NEXT:    vmovd %xmm0, %eax
345; X32-NEXT:    vcvtsi2ssl %eax, %xmm4, %xmm0
346; X32-NEXT:    vmovss %xmm0, (%esp)
347; X32-NEXT:    flds (%esp)
348; X32-NEXT:    popl %eax
349; X32-NEXT:    retl
350;
351; X64-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
352; X64:       # %bb.0:
353; X64-NEXT:    vpsrlq $60, %xmm0, %xmm2
354; X64-NEXT:    vpsrlq $61, %xmm0, %xmm0
355; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
356; X64-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,8]
357; X64-NEXT:    vpxor %xmm2, %xmm0, %xmm0
358; X64-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
359; X64-NEXT:    vpmovsxdq %xmm1, %xmm1
360; X64-NEXT:    vpand %xmm1, %xmm0, %xmm2
361; X64-NEXT:    vpor %xmm1, %xmm2, %xmm1
362; X64-NEXT:    vpxor %xmm0, %xmm1, %xmm0
363; X64-NEXT:    vmovq %xmm0, %rax
364; X64-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
365; X64-NEXT:    retq
366  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
367  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
368  %3 = sext <2 x i32> %2 to <2 x i64>
369  %4 = and <2 x i64> %1, %3
370  %5 = or <2 x i64> %4, %3
371  %6 = xor <2 x i64> %5, %1
372  %7 = extractelement <2 x i64> %6, i32 0
373  %8 = sitofp i64 %7 to float
374  ret float %8
375}
376
377define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i32> %a3) nounwind {
378; X32-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
379; X32:       # %bb.0:
380; X32-NEXT:    pushl %ebp
381; X32-NEXT:    movl %esp, %ebp
382; X32-NEXT:    andl $-16, %esp
383; X32-NEXT:    subl $16, %esp
384; X32-NEXT:    vmovdqa {{.*#+}} xmm3 = [33,0,63,0]
385; X32-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
386; X32-NEXT:    vpsrlq %xmm3, %xmm4, %xmm5
387; X32-NEXT:    vpshufd {{.*#+}} xmm6 = xmm3[2,3,0,1]
388; X32-NEXT:    vpsrlq %xmm6, %xmm4, %xmm4
389; X32-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
390; X32-NEXT:    vextractf128 $1, %ymm2, %xmm5
391; X32-NEXT:    vpsrlq %xmm6, %xmm5, %xmm7
392; X32-NEXT:    vpsrlq %xmm3, %xmm5, %xmm5
393; X32-NEXT:    vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
394; X32-NEXT:    vpsrlq %xmm6, %xmm2, %xmm6
395; X32-NEXT:    vpsrlq %xmm3, %xmm2, %xmm2
396; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
397; X32-NEXT:    vpmovsxdq 16(%ebp), %xmm3
398; X32-NEXT:    vpxor %xmm4, %xmm5, %xmm5
399; X32-NEXT:    vpsubq %xmm4, %xmm5, %xmm5
400; X32-NEXT:    vpxor %xmm4, %xmm2, %xmm2
401; X32-NEXT:    vpsubq %xmm4, %xmm2, %xmm2
402; X32-NEXT:    vpmovsxdq 8(%ebp), %xmm4
403; X32-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
404; X32-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
405; X32-NEXT:    vextractf128 $1, %ymm1, %xmm4
406; X32-NEXT:    vextractf128 $1, %ymm0, %xmm5
407; X32-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
408; X32-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
409; X32-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
410; X32-NEXT:    vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
411; X32-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
412; X32-NEXT:    vextractf128 $1, %ymm0, %xmm1
413; X32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
414; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
415; X32-NEXT:    movl %ebp, %esp
416; X32-NEXT:    popl %ebp
417; X32-NEXT:    vzeroupper
418; X32-NEXT:    retl
419;
420; X64-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
421; X64:       # %bb.0:
422; X64-NEXT:    vextractf128 $1, %ymm2, %xmm4
423; X64-NEXT:    vpsrlq $63, %xmm4, %xmm5
424; X64-NEXT:    vpsrlq $33, %xmm4, %xmm4
425; X64-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm5[4,5,6,7]
426; X64-NEXT:    vmovdqa {{.*#+}} xmm5 = [1073741824,1]
427; X64-NEXT:    vpxor %xmm5, %xmm4, %xmm4
428; X64-NEXT:    vpsubq %xmm5, %xmm4, %xmm4
429; X64-NEXT:    vpsrlq $63, %xmm2, %xmm6
430; X64-NEXT:    vpsrlq $33, %xmm2, %xmm2
431; X64-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
432; X64-NEXT:    vpxor %xmm5, %xmm2, %xmm2
433; X64-NEXT:    vpsubq %xmm5, %xmm2, %xmm2
434; X64-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
435; X64-NEXT:    vpmovsxdq %xmm3, %xmm4
436; X64-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
437; X64-NEXT:    vpmovsxdq %xmm3, %xmm3
438; X64-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
439; X64-NEXT:    vextractf128 $1, %ymm1, %xmm4
440; X64-NEXT:    vextractf128 $1, %ymm0, %xmm5
441; X64-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
442; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
443; X64-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
444; X64-NEXT:    vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
445; X64-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
446; X64-NEXT:    vextractf128 $1, %ymm0, %xmm1
447; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
448; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
449; X64-NEXT:    vzeroupper
450; X64-NEXT:    retq
451  %1 = ashr <4 x i64> %a2, <i64 33, i64 63, i64 33, i64 63>
452  %2 = sext <4 x i32> %a3 to <4 x i64>
453  %3 = icmp eq <4 x i64> %a0, %a1
454  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
455  %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
456  %6 = sitofp <4 x i64> %5 to <4 x float>
457  ret <4 x float> %6
458}
459