• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
6; X32-LABEL: knownbits_mask_extract_sext:
7; X32:       # %bb.0:
8; X32-NEXT:    vmovd %xmm0, %eax
9; X32-NEXT:    andl $15, %eax
10; X32-NEXT:    retl
11;
12; X64-LABEL: knownbits_mask_extract_sext:
13; X64:       # %bb.0:
14; X64-NEXT:    vmovd %xmm0, %eax
15; X64-NEXT:    andl $15, %eax
16; X64-NEXT:    retq
17  %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
18  %2 = extractelement <8 x i16> %1, i32 0
19  %3 = sext i16 %2 to i32
20  ret i32 %3
21}
22
23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
24; X32-LABEL: knownbits_mask_extract_uitofp:
25; X32:       # %bb.0:
26; X32-NEXT:    pushl %eax
27; X32-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
28; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
29; X32-NEXT:    vmovss %xmm0, (%esp)
30; X32-NEXT:    flds (%esp)
31; X32-NEXT:    popl %eax
32; X32-NEXT:    retl
33;
34; X64-LABEL: knownbits_mask_extract_uitofp:
35; X64:       # %bb.0:
36; X64-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
37; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
38; X64-NEXT:    retq
39  %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
40  %2 = extractelement <2 x i64> %1, i32 0
41  %3 = uitofp i64 %2 to float
42  ret float %3
43}
44
45define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind {
46; X32-LABEL: knownbits_insert_uitofp:
47; X32:       # %bb.0:
48; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
49; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
50; X32-NEXT:    vmovd %ecx, %xmm0
51; X32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
52; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
53; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
54; X32-NEXT:    retl
55;
56; X64-LABEL: knownbits_insert_uitofp:
57; X64:       # %bb.0:
58; X64-NEXT:    movzwl %di, %eax
59; X64-NEXT:    movzwl %si, %ecx
60; X64-NEXT:    vmovd %eax, %xmm0
61; X64-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
62; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
63; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
64; X64-NEXT:    retq
65  %1 = zext i16 %a1 to i32
66  %2 = zext i16 %a2 to i32
67  %3 = insertelement <4 x i32> %a0, i32 %1, i32 0
68  %4 = insertelement <4 x i32>  %3, i32 %2, i32 2
69  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
70  %6 = uitofp <4 x i32> %5 to <4 x float>
71  ret <4 x float> %6
72}
73
74define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
75; X32-LABEL: knownbits_mask_shuffle_sext:
76; X32:       # %bb.0:
77; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
78; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
79; X32-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
80; X32-NEXT:    retl
81;
82; X64-LABEL: knownbits_mask_shuffle_sext:
83; X64:       # %bb.0:
84; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
85; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
86; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
87; X64-NEXT:    retq
88  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
89  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
90  %3 = sext <4 x i16> %2 to <4 x i32>
91  ret <4 x i32> %3
92}
93
94define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
95; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
96; X32:       # %bb.0:
97; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
98; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
99; X32-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
100; X32-NEXT:    retl
101;
102; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
103; X64:       # %bb.0:
104; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
105; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
106; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
107; X64-NEXT:    retq
108  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
109  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
110  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111  %4 = sext <4 x i16> %3 to <4 x i32>
112  ret <4 x i32> %4
113}
114
115define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind {
116; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
117; X32:       # %bb.0:
118; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
119; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
120; X32-NEXT:    vpmovsxwd %xmm0, %xmm0
121; X32-NEXT:    retl
122;
123; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
124; X64:       # %bb.0:
125; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
126; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
127; X64-NEXT:    vpmovsxwd %xmm0, %xmm0
128; X64-NEXT:    retq
129  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
130  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
131  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
132  %4 = sext <4 x i16> %3 to <4 x i32>
133  ret <4 x i32> %4
134}
135
136define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
137; X32-LABEL: knownbits_mask_shuffle_uitofp:
138; X32:       # %bb.0:
139; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
140; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
141; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
142; X32-NEXT:    retl
143;
144; X64-LABEL: knownbits_mask_shuffle_uitofp:
145; X64:       # %bb.0:
146; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
147; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
148; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
149; X64-NEXT:    retq
150  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
151  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
152  %3 = uitofp <4 x i32> %2 to <4 x float>
153  ret <4 x float> %3
154}
155
156define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
157; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
158; X32:       # %bb.0:
159; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4]
160; X32-NEXT:    retl
161;
162; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
163; X64:       # %bb.0:
164; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4]
165; X64-NEXT:    retq
166  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
167  %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
168  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
169  %4 = uitofp <4 x i32> %3 to <4 x float>
170  ret <4 x float> %4
171}
172
173define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
174; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
175; X32:       # %bb.0:
176; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
177; X32-NEXT:    vxorps {{\.LCPI.*}}, %xmm0, %xmm0
178; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
179; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
180; X32-NEXT:    retl
181;
182; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
183; X64:       # %bb.0:
184; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
185; X64-NEXT:    vxorps {{.*}}(%rip), %xmm0, %xmm0
186; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
187; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
188; X64-NEXT:    retq
189  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
190  %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
191  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
192  %4 = uitofp <4 x i32> %3 to <4 x float>
193  ret <4 x float> %4
194}
195
196define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
197; X32-LABEL: knownbits_mask_shl_shuffle_lshr:
198; X32:       # %bb.0:
199; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
200; X32-NEXT:    retl
201;
202; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
203; X64:       # %bb.0:
204; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
205; X64-NEXT:    retq
206  %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
207  %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
208  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
209  %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
210  ret <4 x i32> %4
211}
212
213define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
214; X32-LABEL: knownbits_mask_ashr_shuffle_lshr:
215; X32:       # %bb.0:
216; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
217; X32-NEXT:    retl
218;
219; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
220; X64:       # %bb.0:
221; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
222; X64-NEXT:    retq
223  %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
224  %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
225  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
226  %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
227  ret <4 x i32> %4
228}
229
230define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
231; X32-LABEL: knownbits_mask_mul_shuffle_shl:
232; X32:       # %bb.0:
233; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
234; X32-NEXT:    retl
235;
236; X64-LABEL: knownbits_mask_mul_shuffle_shl:
237; X64:       # %bb.0:
238; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
239; X64-NEXT:    retq
240  %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
241  %2 = mul <4 x i32> %a1, %1
242  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
243  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
244  ret <4 x i32> %4
245}
246
247define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
248; X32-LABEL: knownbits_mask_trunc_shuffle_shl:
249; X32:       # %bb.0:
250; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
251; X32-NEXT:    retl
252;
253; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
254; X64:       # %bb.0:
255; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
256; X64-NEXT:    retq
257  %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
258  %2 = trunc <4 x i64> %1 to <4 x i32>
259  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
260  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
261  ret <4 x i32> %4
262}
263
264define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
265; X32-LABEL: knownbits_mask_add_shuffle_lshr:
266; X32:       # %bb.0:
267; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
268; X32-NEXT:    retl
269;
270; X64-LABEL: knownbits_mask_add_shuffle_lshr:
271; X64:       # %bb.0:
272; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
273; X64-NEXT:    retq
274  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
275  %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
276  %3 = add <4 x i32> %1, %2
277  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
278  %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
279  ret <4 x i32> %5
280}
281
282define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
283; X32-LABEL: knownbits_mask_sub_shuffle_lshr:
284; X32:       # %bb.0:
285; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
286; X32-NEXT:    retl
287;
288; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
289; X64:       # %bb.0:
290; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
291; X64-NEXT:    retq
292  %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
293  %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
294  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
295  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
296  ret <4 x i32> %4
297}
298
299define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
300; X32-LABEL: knownbits_mask_udiv_shuffle_lshr:
301; X32:       # %bb.0:
302; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
303; X32-NEXT:    retl
304;
305; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
306; X64:       # %bb.0:
307; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
308; X64-NEXT:    retq
309  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
310  %2 = udiv <4 x i32> %1, %a1
311  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
312  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
313  ret <4 x i32> %4
314}
315
316define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
317; X32-LABEL: knownbits_urem_lshr:
318; X32:       # %bb.0:
319; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
320; X32-NEXT:    retl
321;
322; X64-LABEL: knownbits_urem_lshr:
323; X64:       # %bb.0:
324; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
325; X64-NEXT:    retq
326  %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
327  %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
328  ret <4 x i32> %2
329}
330
331define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
332; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
333; X32:       # %bb.0:
334; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
335; X32-NEXT:    retl
336;
337; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
338; X64:       # %bb.0:
339; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
340; X64-NEXT:    retq
341  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
342  %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
343  %3 = urem <4 x i32> %1, %2
344  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
345  %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
346  ret <4 x i32> %5
347}
348
349define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
350; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
351; X32:       # %bb.0:
352; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
353; X32-NEXT:    retl
354;
355; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
356; X64:       # %bb.0:
357; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
358; X64-NEXT:    retq
359  %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
360  %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
361  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
362  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
363  ret <4 x i32> %4
364}
365
366define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
367; X32-LABEL: knownbits_mask_bswap_shuffle_shl:
368; X32:       # %bb.0:
369; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
370; X32-NEXT:    retl
371;
372; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
373; X64:       # %bb.0:
374; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
375; X64-NEXT:    retq
376  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
377  %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
378  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
379  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
380  ret <4 x i32> %4
381}
382declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
383
384define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
385; X32-LABEL: knownbits_mask_concat_uitofp:
386; X32:       # %bb.0:
387; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
388; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm1, %xmm1
389; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
390; X32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
391; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
392; X32-NEXT:    vcvtdq2ps %ymm0, %ymm0
393; X32-NEXT:    retl
394;
395; X64-LABEL: knownbits_mask_concat_uitofp:
396; X64:       # %bb.0:
397; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
398; X64-NEXT:    vandps {{.*}}(%rip), %xmm1, %xmm1
399; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
400; X64-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
401; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
402; X64-NEXT:    vcvtdq2ps %ymm0, %ymm0
403; X64-NEXT:    retq
404  %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
405  %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
406  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
407  %4 = uitofp <8 x i32> %3 to <8 x float>
408  ret <8 x float> %4
409}
410
411define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
412; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
413; X32:       # %bb.0:
414; X32-NEXT:    vpsrlq $1, %xmm0, %xmm0
415; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
416; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
417; X32-NEXT:    retl
418;
419; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
420; X64:       # %bb.0:
421; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
422; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
423; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
424; X64-NEXT:    retq
425  %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
426  %2 = bitcast <2 x i64> %1 to <4 x i32>
427  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
428  %4 = uitofp <4 x i32> %3 to <4 x float>
429  ret <4 x float> %4
430}
431
432define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
433; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
434; X32:       # %bb.0:
435; X32-NEXT:    vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
436; X32-NEXT:    vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
437; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
438; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
439; X32-NEXT:    vpsrld $16, %xmm0, %xmm0
440; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
441; X32-NEXT:    vsubps {{\.LCPI.*}}, %xmm0, %xmm0
442; X32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
443; X32-NEXT:    retl
444;
445; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
446; X64:       # %bb.0:
447; X64-NEXT:    vpminsd {{.*}}(%rip), %xmm0, %xmm0
448; X64-NEXT:    vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
449; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
450; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
451; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
452; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
453; X64-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
454; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
455; X64-NEXT:    retq
456  %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
457  %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
458  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
459  %4 = uitofp <4 x i32> %3 to <4 x float>
460  ret <4 x float> %4
461}
462declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
463declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
464
465define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) {
466; X32-LABEL: knownbits_umin_shuffle_uitofp:
467; X32:       # %bb.0:
468; X32-NEXT:    vpminud {{\.LCPI.*}}, %xmm0, %xmm0
469; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
470; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
471; X32-NEXT:    retl
472;
473; X64-LABEL: knownbits_umin_shuffle_uitofp:
474; X64:       # %bb.0:
475; X64-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm0
476; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
477; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
478; X64-NEXT:    retq
479  %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
480  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
481  %3 = uitofp <4 x i32> %2 to <4 x float>
482  ret <4 x float> %3
483}
484declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
485declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
486
487define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) {
488; X32-LABEL: knownbits_umax_shuffle_ashr:
489; X32:       # %bb.0:
490; X32-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
491; X32-NEXT:    retl
492;
493; X64-LABEL: knownbits_umax_shuffle_ashr:
494; X64:       # %bb.0:
495; X64-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
496; X64-NEXT:    retq
497  %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
498  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
499  %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
500  ret <4 x i32> %3
501}
502
503define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) {
504; X32-LABEL: knownbits_mask_umax_shuffle_uitofp:
505; X32:       # %bb.0:
506; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
507; X32-NEXT:    vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
508; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
509; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
510; X32-NEXT:    retl
511;
512; X64-LABEL: knownbits_mask_umax_shuffle_uitofp:
513; X64:       # %bb.0:
514; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
515; X64-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm0
516; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
517; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
518; X64-NEXT:    retq
519  %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143>
520  %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
521  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
522  %4 = uitofp <4 x i32> %3 to <4 x float>
523  ret <4 x float> %4
524}
525
526define <4 x i32> @knownbits_mask_bitreverse_ashr(<4 x i32> %a0) {
527; X32-LABEL: knownbits_mask_bitreverse_ashr:
528; X32:       # %bb.0:
529; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
530; X32-NEXT:    retl
531;
532; X64-LABEL: knownbits_mask_bitreverse_ashr:
533; X64:       # %bb.0:
534; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
535; X64-NEXT:    retq
536  %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 -2, i32 -2>
537  %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %1)
538  %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
539  ret <4 x i32> %3
540}
541declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone
542
543; If we don't know that the input isn't INT_MIN we can't combine to sitofp
544define <4 x float> @knownbits_abs_uitofp(<4 x i32> %a0) {
545; X32-LABEL: knownbits_abs_uitofp:
546; X32:       # %bb.0:
547; X32-NEXT:    vpabsd %xmm0, %xmm0
548; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
549; X32-NEXT:    vpsrld $16, %xmm0, %xmm0
550; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
551; X32-NEXT:    vsubps {{\.LCPI.*}}, %xmm0, %xmm0
552; X32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
553; X32-NEXT:    retl
554;
555; X64-LABEL: knownbits_abs_uitofp:
556; X64:       # %bb.0:
557; X64-NEXT:    vpabsd %xmm0, %xmm0
558; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
559; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
560; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
561; X64-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
562; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
563; X64-NEXT:    retq
564  %1 = sub <4 x i32> zeroinitializer, %a0
565  %2 = icmp slt <4 x i32> %a0, zeroinitializer
566  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %a0
567  %4 = uitofp <4 x i32> %3 to <4 x float>
568  ret <4 x float> %4
569}
570
571define <4 x float> @knownbits_or_abs_uitofp(<4 x i32> %a0) {
572; X32-LABEL: knownbits_or_abs_uitofp:
573; X32:       # %bb.0:
574; X32-NEXT:    vpor {{\.LCPI.*}}, %xmm0, %xmm0
575; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
576; X32-NEXT:    vpabsd %xmm0, %xmm0
577; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
578; X32-NEXT:    retl
579;
580; X64-LABEL: knownbits_or_abs_uitofp:
581; X64:       # %bb.0:
582; X64-NEXT:    vpor {{.*}}(%rip), %xmm0, %xmm0
583; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
584; X64-NEXT:    vpabsd %xmm0, %xmm0
585; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
586; X64-NEXT:    retq
587  %1 = or <4 x i32> %a0, <i32 1, i32 0, i32 3, i32 0>
588  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
589  %3 = sub <4 x i32> zeroinitializer, %2
590  %4 = icmp slt <4 x i32> %2, zeroinitializer
591  %5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> %2
592  %6 = uitofp <4 x i32> %5 to <4 x float>
593  ret <4 x float> %6
594}
595
596define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind {
597; X32-LABEL: knownbits_and_select_shuffle_uitofp:
598; X32:       # %bb.0:
599; X32-NEXT:    pushl %ebp
600; X32-NEXT:    movl %esp, %ebp
601; X32-NEXT:    andl $-16, %esp
602; X32-NEXT:    subl $16, %esp
603; X32-NEXT:    vmovaps 8(%ebp), %xmm3
604; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm2, %xmm2
605; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm3, %xmm3
606; X32-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
607; X32-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
608; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
609; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
610; X32-NEXT:    movl %ebp, %esp
611; X32-NEXT:    popl %ebp
612; X32-NEXT:    retl
613;
614; X64-LABEL: knownbits_and_select_shuffle_uitofp:
615; X64:       # %bb.0:
616; X64-NEXT:    vandps {{.*}}(%rip), %xmm2, %xmm2
617; X64-NEXT:    vandps {{.*}}(%rip), %xmm3, %xmm3
618; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
619; X64-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
620; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
621; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
622; X64-NEXT:    retq
623  %1 = and <4 x i32> %a2, <i32 65535, i32 -1, i32 255, i32 -1>
624  %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1>
625  %3 = icmp eq <4 x i32> %a0, %a1
626  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
627  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
628  %6 = uitofp <4 x i32> %5 to <4 x float>
629  ret <4 x float> %6
630}
631
632define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind {
633; X32-LABEL: knownbits_lshr_and_select_shuffle_uitofp:
634; X32:       # %bb.0:
635; X32-NEXT:    pushl %ebp
636; X32-NEXT:    movl %esp, %ebp
637; X32-NEXT:    andl $-16, %esp
638; X32-NEXT:    subl $16, %esp
639; X32-NEXT:    vmovaps 8(%ebp), %xmm3
640; X32-NEXT:    vpsrld $5, %xmm2, %xmm2
641; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm3, %xmm3
642; X32-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
643; X32-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
644; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
645; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
646; X32-NEXT:    movl %ebp, %esp
647; X32-NEXT:    popl %ebp
648; X32-NEXT:    retl
649;
650; X64-LABEL: knownbits_lshr_and_select_shuffle_uitofp:
651; X64:       # %bb.0:
652; X64-NEXT:    vpsrld $5, %xmm2, %xmm2
653; X64-NEXT:    vandps {{.*}}(%rip), %xmm3, %xmm3
654; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
655; X64-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
656; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
657; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
658; X64-NEXT:    retq
659  %1 = lshr <4 x i32> %a2, <i32 5, i32 1, i32 5, i32 1>
660  %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1>
661  %3 = icmp eq <4 x i32> %a0, %a1
662  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
663  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
664  %6 = uitofp <4 x i32> %5 to <4 x float>
665  ret <4 x float> %6
666}
667
668define <2 x double> @knownbits_lshr_subvector_uitofp(<4 x i32> %x)  {
669; X32-LABEL: knownbits_lshr_subvector_uitofp:
670; X32:       # %bb.0:
671; X32-NEXT:    vpsrld $2, %xmm0, %xmm1
672; X32-NEXT:    vpsrld $1, %xmm0, %xmm0
673; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
674; X32-NEXT:    vcvtdq2pd %xmm0, %xmm0
675; X32-NEXT:    retl
676;
677; X64-LABEL: knownbits_lshr_subvector_uitofp:
678; X64:       # %bb.0:
679; X64-NEXT:    vpsrld $2, %xmm0, %xmm1
680; X64-NEXT:    vpsrld $1, %xmm0, %xmm0
681; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
682; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
683; X64-NEXT:    retq
684  %1 = lshr <4 x i32> %x, <i32 1, i32 2, i32 0, i32 0>
685  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
686  %3 = uitofp <2 x i32> %2 to <2 x double>
687  ret <2 x double> %3
688}
689