• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7
8; AVX128 tests:
9
10define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
11; SSE2-LABEL: vsel_float:
12; SSE2:       # BB#0: # %entry
13; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
14; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
15; SSE2-NEXT:    retq
16;
17; SSSE3-LABEL: vsel_float:
18; SSSE3:       # BB#0: # %entry
19; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
20; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
21; SSSE3-NEXT:    retq
22;
23; SSE41-LABEL: vsel_float:
24; SSE41:       # BB#0: # %entry
25; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
26; SSE41-NEXT:    retq
27;
28; AVX-LABEL: vsel_float:
29; AVX:       # BB#0: # %entry
30; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
31; AVX-NEXT:    retq
32entry:
33  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
34  ret <4 x float> %vsel
35}
36
37define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) {
38; SSE2-LABEL: vsel_float2:
39; SSE2:       # BB#0: # %entry
40; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
41; SSE2-NEXT:    movaps %xmm1, %xmm0
42; SSE2-NEXT:    retq
43;
44; SSSE3-LABEL: vsel_float2:
45; SSSE3:       # BB#0: # %entry
46; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
47; SSSE3-NEXT:    movaps %xmm1, %xmm0
48; SSSE3-NEXT:    retq
49;
50; SSE41-LABEL: vsel_float2:
51; SSE41:       # BB#0: # %entry
52; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
53; SSE41-NEXT:    retq
54;
55; AVX-LABEL: vsel_float2:
56; AVX:       # BB#0: # %entry
57; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
58; AVX-NEXT:    retq
59entry:
60  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
61  ret <4 x float> %vsel
62}
63
64define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
65; SSE2-LABEL: vsel_4xi8:
66; SSE2:       # BB#0: # %entry
67; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
68; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
69; SSE2-NEXT:    retq
70;
71; SSSE3-LABEL: vsel_4xi8:
72; SSSE3:       # BB#0: # %entry
73; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
74; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
75; SSSE3-NEXT:    retq
76;
77; SSE41-LABEL: vsel_4xi8:
78; SSE41:       # BB#0: # %entry
79; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
80; SSE41-NEXT:    retq
81;
82; AVX1-LABEL: vsel_4xi8:
83; AVX1:       # BB#0: # %entry
84; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
85; AVX1-NEXT:    retq
86;
87; AVX2-LABEL: vsel_4xi8:
88; AVX2:       # BB#0: # %entry
89; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
90; AVX2-NEXT:    retq
91entry:
92  %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
93  ret <4 x i8> %vsel
94}
95
96define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
97; SSE2-LABEL: vsel_4xi16:
98; SSE2:       # BB#0: # %entry
99; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
100; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
101; SSE2-NEXT:    movaps %xmm1, %xmm0
102; SSE2-NEXT:    retq
103;
104; SSSE3-LABEL: vsel_4xi16:
105; SSSE3:       # BB#0: # %entry
106; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
107; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
108; SSSE3-NEXT:    movaps %xmm1, %xmm0
109; SSSE3-NEXT:    retq
110;
111; SSE41-LABEL: vsel_4xi16:
112; SSE41:       # BB#0: # %entry
113; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
114; SSE41-NEXT:    retq
115;
116; AVX1-LABEL: vsel_4xi16:
117; AVX1:       # BB#0: # %entry
118; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
119; AVX1-NEXT:    retq
120;
121; AVX2-LABEL: vsel_4xi16:
122; AVX2:       # BB#0: # %entry
123; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
124; AVX2-NEXT:    retq
125entry:
126  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
127  ret <4 x i16> %vsel
128}
129
130define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
131; SSE2-LABEL: vsel_i32:
132; SSE2:       # BB#0: # %entry
133; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
134; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
135; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
136; SSE2-NEXT:    retq
137;
138; SSSE3-LABEL: vsel_i32:
139; SSSE3:       # BB#0: # %entry
140; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
141; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
142; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
143; SSSE3-NEXT:    retq
144;
145; SSE41-LABEL: vsel_i32:
146; SSE41:       # BB#0: # %entry
147; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
148; SSE41-NEXT:    retq
149;
150; AVX1-LABEL: vsel_i32:
151; AVX1:       # BB#0: # %entry
152; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
153; AVX1-NEXT:    retq
154;
155; AVX2-LABEL: vsel_i32:
156; AVX2:       # BB#0: # %entry
157; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
158; AVX2-NEXT:    retq
159entry:
160  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
161  ret <4 x i32> %vsel
162}
163
164define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
165; SSE2-LABEL: vsel_double:
166; SSE2:       # BB#0: # %entry
167; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
168; SSE2-NEXT:    movapd %xmm1, %xmm0
169; SSE2-NEXT:    retq
170;
171; SSSE3-LABEL: vsel_double:
172; SSSE3:       # BB#0: # %entry
173; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
174; SSSE3-NEXT:    movapd %xmm1, %xmm0
175; SSSE3-NEXT:    retq
176;
177; SSE41-LABEL: vsel_double:
178; SSE41:       # BB#0: # %entry
179; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
180; SSE41-NEXT:    retq
181;
182; AVX-LABEL: vsel_double:
183; AVX:       # BB#0: # %entry
184; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
185; AVX-NEXT:    retq
186entry:
187  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
188  ret <2 x double> %vsel
189}
190
191define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
192; SSE2-LABEL: vsel_i64:
193; SSE2:       # BB#0: # %entry
194; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
195; SSE2-NEXT:    movapd %xmm1, %xmm0
196; SSE2-NEXT:    retq
197;
198; SSSE3-LABEL: vsel_i64:
199; SSSE3:       # BB#0: # %entry
200; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
201; SSSE3-NEXT:    movapd %xmm1, %xmm0
202; SSSE3-NEXT:    retq
203;
204; SSE41-LABEL: vsel_i64:
205; SSE41:       # BB#0: # %entry
206; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
207; SSE41-NEXT:    retq
208;
209; AVX1-LABEL: vsel_i64:
210; AVX1:       # BB#0: # %entry
211; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
212; AVX1-NEXT:    retq
213;
214; AVX2-LABEL: vsel_i64:
215; AVX2:       # BB#0: # %entry
216; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
217; AVX2-NEXT:    retq
218entry:
219  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
220  ret <2 x i64> %vsel
221}
222
223define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
224; SSE2-LABEL: vsel_8xi16:
225; SSE2:       # BB#0: # %entry
226; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
227; SSE2-NEXT:    andps %xmm2, %xmm1
228; SSE2-NEXT:    andnps %xmm0, %xmm2
229; SSE2-NEXT:    orps %xmm1, %xmm2
230; SSE2-NEXT:    movaps %xmm2, %xmm0
231; SSE2-NEXT:    retq
232;
233; SSSE3-LABEL: vsel_8xi16:
234; SSSE3:       # BB#0: # %entry
235; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
236; SSSE3-NEXT:    andps %xmm2, %xmm1
237; SSSE3-NEXT:    andnps %xmm0, %xmm2
238; SSSE3-NEXT:    orps %xmm1, %xmm2
239; SSSE3-NEXT:    movaps %xmm2, %xmm0
240; SSSE3-NEXT:    retq
241;
242; SSE41-LABEL: vsel_8xi16:
243; SSE41:       # BB#0: # %entry
244; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
245; SSE41-NEXT:    retq
246;
247; AVX-LABEL: vsel_8xi16:
248; AVX:       # BB#0: # %entry
249; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
250; AVX-NEXT:    retq
251entry:
252  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
253  ret <8 x i16> %vsel
254}
255
256define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
257; SSE2-LABEL: vsel_i8:
258; SSE2:       # BB#0: # %entry
259; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
260; SSE2-NEXT:    andps %xmm2, %xmm1
261; SSE2-NEXT:    andnps %xmm0, %xmm2
262; SSE2-NEXT:    orps %xmm1, %xmm2
263; SSE2-NEXT:    movaps %xmm2, %xmm0
264; SSE2-NEXT:    retq
265;
266; SSSE3-LABEL: vsel_i8:
267; SSSE3:       # BB#0: # %entry
268; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero
269; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15]
270; SSSE3-NEXT:    por %xmm1, %xmm0
271; SSSE3-NEXT:    retq
272;
273; SSE41-LABEL: vsel_i8:
274; SSE41:       # BB#0: # %entry
275; SSE41-NEXT:    movdqa %xmm0, %xmm2
276; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
277; SSE41-NEXT:    pblendvb %xmm2, %xmm1
278; SSE41-NEXT:    movdqa %xmm1, %xmm0
279; SSE41-NEXT:    retq
280;
281; AVX-LABEL: vsel_i8:
282; AVX:       # BB#0: # %entry
283; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
284; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
285; AVX-NEXT:    retq
286entry:
287  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
288  ret <16 x i8> %vsel
289}
290
291
292; AVX256 tests:
293
294define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
295; SSE2-LABEL: vsel_float8:
296; SSE2:       # BB#0: # %entry
297; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
298; SSE2-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
299; SSE2-NEXT:    movaps %xmm2, %xmm0
300; SSE2-NEXT:    movaps %xmm3, %xmm1
301; SSE2-NEXT:    retq
302;
303; SSSE3-LABEL: vsel_float8:
304; SSSE3:       # BB#0: # %entry
305; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
306; SSSE3-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
307; SSSE3-NEXT:    movaps %xmm2, %xmm0
308; SSSE3-NEXT:    movaps %xmm3, %xmm1
309; SSSE3-NEXT:    retq
310;
311; SSE41-LABEL: vsel_float8:
312; SSE41:       # BB#0: # %entry
313; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
314; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
315; SSE41-NEXT:    retq
316;
317; AVX-LABEL: vsel_float8:
318; AVX:       # BB#0: # %entry
319; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
320; AVX-NEXT:    retq
321entry:
322  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
323  ret <8 x float> %vsel
324}
325
326define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
327; SSE2-LABEL: vsel_i328:
328; SSE2:       # BB#0: # %entry
329; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
330; SSE2-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
331; SSE2-NEXT:    movaps %xmm2, %xmm0
332; SSE2-NEXT:    movaps %xmm3, %xmm1
333; SSE2-NEXT:    retq
334;
335; SSSE3-LABEL: vsel_i328:
336; SSSE3:       # BB#0: # %entry
337; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
338; SSSE3-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
339; SSSE3-NEXT:    movaps %xmm2, %xmm0
340; SSSE3-NEXT:    movaps %xmm3, %xmm1
341; SSSE3-NEXT:    retq
342;
343; SSE41-LABEL: vsel_i328:
344; SSE41:       # BB#0: # %entry
345; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
346; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5,6,7]
347; SSE41-NEXT:    retq
348;
349; AVX1-LABEL: vsel_i328:
350; AVX1:       # BB#0: # %entry
351; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
352; AVX1-NEXT:    retq
353;
354; AVX2-LABEL: vsel_i328:
355; AVX2:       # BB#0: # %entry
356; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
357; AVX2-NEXT:    retq
358entry:
359  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
360  ret <8 x i32> %vsel
361}
362
363define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
364; SSE2-LABEL: vsel_double8:
365; SSE2:       # BB#0: # %entry
366; SSE2-NEXT:    movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
367; SSE2-NEXT:    movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
368; SSE2-NEXT:    movapd %xmm4, %xmm0
369; SSE2-NEXT:    movaps %xmm5, %xmm1
370; SSE2-NEXT:    movapd %xmm6, %xmm2
371; SSE2-NEXT:    movaps %xmm7, %xmm3
372; SSE2-NEXT:    retq
373;
374; SSSE3-LABEL: vsel_double8:
375; SSSE3:       # BB#0: # %entry
376; SSSE3-NEXT:    movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
377; SSSE3-NEXT:    movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
378; SSSE3-NEXT:    movapd %xmm4, %xmm0
379; SSSE3-NEXT:    movaps %xmm5, %xmm1
380; SSSE3-NEXT:    movapd %xmm6, %xmm2
381; SSSE3-NEXT:    movaps %xmm7, %xmm3
382; SSSE3-NEXT:    retq
383;
384; SSE41-LABEL: vsel_double8:
385; SSE41:       # BB#0: # %entry
386; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
387; SSE41-NEXT:    blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
388; SSE41-NEXT:    movaps %xmm5, %xmm1
389; SSE41-NEXT:    movaps %xmm7, %xmm3
390; SSE41-NEXT:    retq
391;
392; AVX-LABEL: vsel_double8:
393; AVX:       # BB#0: # %entry
394; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3]
395; AVX-NEXT:    vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3]
396; AVX-NEXT:    retq
397entry:
398  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
399  ret <8 x double> %vsel
400}
401
402define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
403; SSE2-LABEL: vsel_i648:
404; SSE2:       # BB#0: # %entry
405; SSE2-NEXT:    movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
406; SSE2-NEXT:    movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
407; SSE2-NEXT:    movapd %xmm4, %xmm0
408; SSE2-NEXT:    movaps %xmm5, %xmm1
409; SSE2-NEXT:    movapd %xmm6, %xmm2
410; SSE2-NEXT:    movaps %xmm7, %xmm3
411; SSE2-NEXT:    retq
412;
413; SSSE3-LABEL: vsel_i648:
414; SSSE3:       # BB#0: # %entry
415; SSSE3-NEXT:    movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
416; SSSE3-NEXT:    movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
417; SSSE3-NEXT:    movapd %xmm4, %xmm0
418; SSSE3-NEXT:    movaps %xmm5, %xmm1
419; SSSE3-NEXT:    movapd %xmm6, %xmm2
420; SSSE3-NEXT:    movaps %xmm7, %xmm3
421; SSSE3-NEXT:    retq
422;
423; SSE41-LABEL: vsel_i648:
424; SSE41:       # BB#0: # %entry
425; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
426; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
427; SSE41-NEXT:    movaps %xmm5, %xmm1
428; SSE41-NEXT:    movaps %xmm7, %xmm3
429; SSE41-NEXT:    retq
430;
431; AVX1-LABEL: vsel_i648:
432; AVX1:       # BB#0: # %entry
433; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3]
434; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3]
435; AVX1-NEXT:    retq
436;
437; AVX2-LABEL: vsel_i648:
438; AVX2:       # BB#0: # %entry
439; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
440; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
441; AVX2-NEXT:    retq
442entry:
443  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
444  ret <8 x i64> %vsel
445}
446
447define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
448; SSE2-LABEL: vsel_double4:
449; SSE2:       # BB#0: # %entry
450; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
451; SSE2-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
452; SSE2-NEXT:    movapd %xmm2, %xmm0
453; SSE2-NEXT:    movapd %xmm3, %xmm1
454; SSE2-NEXT:    retq
455;
456; SSSE3-LABEL: vsel_double4:
457; SSSE3:       # BB#0: # %entry
458; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
459; SSSE3-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
460; SSSE3-NEXT:    movapd %xmm2, %xmm0
461; SSSE3-NEXT:    movapd %xmm3, %xmm1
462; SSSE3-NEXT:    retq
463;
464; SSE41-LABEL: vsel_double4:
465; SSE41:       # BB#0: # %entry
466; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
467; SSE41-NEXT:    blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
468; SSE41-NEXT:    retq
469;
470; AVX-LABEL: vsel_double4:
471; AVX:       # BB#0: # %entry
472; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
473; AVX-NEXT:    retq
474entry:
475  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
476  ret <4 x double> %vsel
477}
478
479define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
480; SSE2-LABEL: testa:
481; SSE2:       # BB#0: # %entry
482; SSE2-NEXT:    movapd %xmm1, %xmm2
483; SSE2-NEXT:    cmplepd %xmm0, %xmm2
484; SSE2-NEXT:    andpd %xmm2, %xmm0
485; SSE2-NEXT:    andnpd %xmm1, %xmm2
486; SSE2-NEXT:    orpd %xmm2, %xmm0
487; SSE2-NEXT:    retq
488;
489; SSSE3-LABEL: testa:
490; SSSE3:       # BB#0: # %entry
491; SSSE3-NEXT:    movapd %xmm1, %xmm2
492; SSSE3-NEXT:    cmplepd %xmm0, %xmm2
493; SSSE3-NEXT:    andpd %xmm2, %xmm0
494; SSSE3-NEXT:    andnpd %xmm1, %xmm2
495; SSSE3-NEXT:    orpd %xmm2, %xmm0
496; SSSE3-NEXT:    retq
497;
498; SSE41-LABEL: testa:
499; SSE41:       # BB#0: # %entry
500; SSE41-NEXT:    movapd %xmm0, %xmm2
501; SSE41-NEXT:    movapd %xmm1, %xmm0
502; SSE41-NEXT:    cmplepd %xmm2, %xmm0
503; SSE41-NEXT:    blendvpd %xmm2, %xmm1
504; SSE41-NEXT:    movapd %xmm1, %xmm0
505; SSE41-NEXT:    retq
506;
507; AVX-LABEL: testa:
508; AVX:       # BB#0: # %entry
509; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm2
510; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
511; AVX-NEXT:    retq
512entry:
513  %max_is_x = fcmp oge <2 x double> %x, %y
514  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
515  ret <2 x double> %max
516}
517
518define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
519; SSE2-LABEL: testb:
520; SSE2:       # BB#0: # %entry
521; SSE2-NEXT:    movapd %xmm1, %xmm2
522; SSE2-NEXT:    cmpnlepd %xmm0, %xmm2
523; SSE2-NEXT:    andpd %xmm2, %xmm0
524; SSE2-NEXT:    andnpd %xmm1, %xmm2
525; SSE2-NEXT:    orpd %xmm2, %xmm0
526; SSE2-NEXT:    retq
527;
528; SSSE3-LABEL: testb:
529; SSSE3:       # BB#0: # %entry
530; SSSE3-NEXT:    movapd %xmm1, %xmm2
531; SSSE3-NEXT:    cmpnlepd %xmm0, %xmm2
532; SSSE3-NEXT:    andpd %xmm2, %xmm0
533; SSSE3-NEXT:    andnpd %xmm1, %xmm2
534; SSSE3-NEXT:    orpd %xmm2, %xmm0
535; SSSE3-NEXT:    retq
536;
537; SSE41-LABEL: testb:
538; SSE41:       # BB#0: # %entry
539; SSE41-NEXT:    movapd %xmm0, %xmm2
540; SSE41-NEXT:    movapd %xmm1, %xmm0
541; SSE41-NEXT:    cmpnlepd %xmm2, %xmm0
542; SSE41-NEXT:    blendvpd %xmm2, %xmm1
543; SSE41-NEXT:    movapd %xmm1, %xmm0
544; SSE41-NEXT:    retq
545;
546; AVX-LABEL: testb:
547; AVX:       # BB#0: # %entry
548; AVX-NEXT:    vcmpnlepd %xmm0, %xmm1, %xmm2
549; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
550; AVX-NEXT:    retq
551entry:
552  %min_is_x = fcmp ult <2 x double> %x, %y
553  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
554  ret <2 x double> %min
555}
556
557; If we can figure out a blend has a constant mask, we should emit the
558; blend instruction with an immediate mask
559define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
560; SSE2-LABEL: constant_blendvpd_avx:
561; SSE2:       # BB#0: # %entry
562; SSE2-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
563; SSE2-NEXT:    movaps %xmm2, %xmm0
564; SSE2-NEXT:    movapd %xmm3, %xmm1
565; SSE2-NEXT:    retq
566;
567; SSSE3-LABEL: constant_blendvpd_avx:
568; SSSE3:       # BB#0: # %entry
569; SSSE3-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
570; SSSE3-NEXT:    movaps %xmm2, %xmm0
571; SSSE3-NEXT:    movapd %xmm3, %xmm1
572; SSSE3-NEXT:    retq
573;
574; SSE41-LABEL: constant_blendvpd_avx:
575; SSE41:       # BB#0: # %entry
576; SSE41-NEXT:    blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
577; SSE41-NEXT:    movaps %xmm2, %xmm0
578; SSE41-NEXT:    retq
579;
580; AVX-LABEL: constant_blendvpd_avx:
581; AVX:       # BB#0: # %entry
582; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
583; AVX-NEXT:    retq
584entry:
585  %select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
586  ret <4 x double> %select
587}
588
589define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
590; SSE2-LABEL: constant_blendvps_avx:
591; SSE2:       # BB#0: # %entry
592; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
593; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
594; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
595; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
596; SSE2-NEXT:    movaps %xmm2, %xmm0
597; SSE2-NEXT:    movaps %xmm3, %xmm1
598; SSE2-NEXT:    retq
599;
600; SSSE3-LABEL: constant_blendvps_avx:
601; SSSE3:       # BB#0: # %entry
602; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
603; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
604; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
605; SSSE3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
606; SSSE3-NEXT:    movaps %xmm2, %xmm0
607; SSSE3-NEXT:    movaps %xmm3, %xmm1
608; SSSE3-NEXT:    retq
609;
610; SSE41-LABEL: constant_blendvps_avx:
611; SSE41:       # BB#0: # %entry
612; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
613; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[3]
614; SSE41-NEXT:    retq
615;
616; AVX-LABEL: constant_blendvps_avx:
617; AVX:       # BB#0: # %entry
618; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7]
619; AVX-NEXT:    retq
620entry:
621  %select = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
622  ret <8 x float> %select
623}
624
625define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
626; SSE2-LABEL: constant_pblendvb_avx2:
627; SSE2:       # BB#0: # %entry
628; SSE2-NEXT:    movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
629; SSE2-NEXT:    movaps %xmm4, %xmm5
630; SSE2-NEXT:    andnps %xmm0, %xmm5
631; SSE2-NEXT:    andps %xmm4, %xmm2
632; SSE2-NEXT:    orps %xmm2, %xmm5
633; SSE2-NEXT:    andps %xmm4, %xmm3
634; SSE2-NEXT:    andnps %xmm1, %xmm4
635; SSE2-NEXT:    orps %xmm3, %xmm4
636; SSE2-NEXT:    movaps %xmm5, %xmm0
637; SSE2-NEXT:    movaps %xmm4, %xmm1
638; SSE2-NEXT:    retq
639;
640; SSSE3-LABEL: constant_pblendvb_avx2:
641; SSSE3:       # BB#0: # %entry
642; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128]
643; SSSE3-NEXT:    pshufb %xmm4, %xmm0
644; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15]
645; SSSE3-NEXT:    pshufb %xmm5, %xmm2
646; SSSE3-NEXT:    por %xmm2, %xmm0
647; SSSE3-NEXT:    pshufb %xmm4, %xmm1
648; SSSE3-NEXT:    pshufb %xmm5, %xmm3
649; SSSE3-NEXT:    por %xmm3, %xmm1
650; SSSE3-NEXT:    retq
651;
652; SSE41-LABEL: constant_pblendvb_avx2:
653; SSE41:       # BB#0: # %entry
654; SSE41-NEXT:    movdqa %xmm0, %xmm4
655; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
656; SSE41-NEXT:    pblendvb %xmm4, %xmm2
657; SSE41-NEXT:    pblendvb %xmm1, %xmm3
658; SSE41-NEXT:    movdqa %xmm2, %xmm0
659; SSE41-NEXT:    movdqa %xmm3, %xmm1
660; SSE41-NEXT:    retq
661;
662; AVX1-LABEL: constant_pblendvb_avx2:
663; AVX1:       # BB#0: # %entry
664; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
665; AVX1-NEXT:    vandnps %ymm0, %ymm2, %ymm0
666; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
667; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
668; AVX1-NEXT:    retq
669;
670; AVX2-LABEL: constant_pblendvb_avx2:
671; AVX2:       # BB#0: # %entry
672; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
673; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
674; AVX2-NEXT:    retq
675entry:
676  %select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
677  ret <32 x i8> %select
678}
679
680declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
681declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
682
683;; 4 tests for shufflevectors that optimize to blend + immediate
684define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
685; SSE2-LABEL: blend_shufflevector_4xfloat:
686; SSE2:       # BB#0: # %entry
687; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
688; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
689; SSE2-NEXT:    retq
690;
691; SSSE3-LABEL: blend_shufflevector_4xfloat:
692; SSSE3:       # BB#0: # %entry
693; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
694; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
695; SSSE3-NEXT:    retq
696;
697; SSE41-LABEL: blend_shufflevector_4xfloat:
698; SSE41:       # BB#0: # %entry
699; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
700; SSE41-NEXT:    retq
701;
702; AVX-LABEL: blend_shufflevector_4xfloat:
703; AVX:       # BB#0: # %entry
704; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
705; AVX-NEXT:    retq
706entry:
707  %select = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
708  ret <4 x float> %select
709}
710
711define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
712; SSE2-LABEL: blend_shufflevector_8xfloat:
713; SSE2:       # BB#0: # %entry
714; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
715; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
716; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
717; SSE2-NEXT:    movaps %xmm2, %xmm0
718; SSE2-NEXT:    movaps %xmm3, %xmm1
719; SSE2-NEXT:    retq
720;
721; SSSE3-LABEL: blend_shufflevector_8xfloat:
722; SSSE3:       # BB#0: # %entry
723; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
724; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
725; SSSE3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
726; SSSE3-NEXT:    movaps %xmm2, %xmm0
727; SSSE3-NEXT:    movaps %xmm3, %xmm1
728; SSSE3-NEXT:    retq
729;
730; SSE41-LABEL: blend_shufflevector_8xfloat:
731; SSE41:       # BB#0: # %entry
732; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
733; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3]
734; SSE41-NEXT:    retq
735;
736; AVX-LABEL: blend_shufflevector_8xfloat:
737; AVX:       # BB#0: # %entry
738; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5],ymm0[6],ymm1[7]
739; AVX-NEXT:    retq
740entry:
741  %select = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15>
742  ret <8 x float> %select
743}
744
745define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
746; SSE2-LABEL: blend_shufflevector_4xdouble:
747; SSE2:       # BB#0: # %entry
748; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
749; SSE2-NEXT:    movapd %xmm2, %xmm0
750; SSE2-NEXT:    retq
751;
752; SSSE3-LABEL: blend_shufflevector_4xdouble:
753; SSSE3:       # BB#0: # %entry
754; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
755; SSSE3-NEXT:    movapd %xmm2, %xmm0
756; SSSE3-NEXT:    retq
757;
758; SSE41-LABEL: blend_shufflevector_4xdouble:
759; SSE41:       # BB#0: # %entry
760; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
761; SSE41-NEXT:    retq
762;
763; AVX-LABEL: blend_shufflevector_4xdouble:
764; AVX:       # BB#0: # %entry
765; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
766; AVX-NEXT:    retq
767entry:
768  %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
769  ret <4 x double> %select
770}
771
772define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
773; SSE2-LABEL: blend_shufflevector_4xi64:
774; SSE2:       # BB#0: # %entry
775; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
776; SSE2-NEXT:    movaps %xmm3, %xmm1
777; SSE2-NEXT:    retq
778;
779; SSSE3-LABEL: blend_shufflevector_4xi64:
780; SSSE3:       # BB#0: # %entry
781; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
782; SSSE3-NEXT:    movaps %xmm3, %xmm1
783; SSSE3-NEXT:    retq
784;
785; SSE41-LABEL: blend_shufflevector_4xi64:
786; SSE41:       # BB#0: # %entry
787; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
788; SSE41-NEXT:    movaps %xmm3, %xmm1
789; SSE41-NEXT:    retq
790;
791; AVX1-LABEL: blend_shufflevector_4xi64:
792; AVX1:       # BB#0: # %entry
793; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
794; AVX1-NEXT:    retq
795;
796; AVX2-LABEL: blend_shufflevector_4xi64:
797; AVX2:       # BB#0: # %entry
798; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
799; AVX2-NEXT:    retq
800entry:
801  %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
802  ret <4 x i64> %select
803}
804
805define <4 x i32> @blend_logic_v4i32(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) {
806; SSE2-LABEL: blend_logic_v4i32:
807; SSE2:       # BB#0: # %entry
808; SSE2-NEXT:    psrad $31, %xmm0
809; SSE2-NEXT:    pand %xmm0, %xmm1
810; SSE2-NEXT:    pandn %xmm2, %xmm0
811; SSE2-NEXT:    por %xmm1, %xmm0
812; SSE2-NEXT:    retq
813;
814; SSSE3-LABEL: blend_logic_v4i32:
815; SSSE3:       # BB#0: # %entry
816; SSSE3-NEXT:    psrad $31, %xmm0
817; SSSE3-NEXT:    pand %xmm0, %xmm1
818; SSSE3-NEXT:    pandn %xmm2, %xmm0
819; SSSE3-NEXT:    por %xmm1, %xmm0
820; SSSE3-NEXT:    retq
821;
822; SSE41-LABEL: blend_logic_v4i32:
823; SSE41:       # BB#0: # %entry
824; SSE41-NEXT:    psrad $31, %xmm0
825; SSE41-NEXT:    pblendvb %xmm1, %xmm2
826; SSE41-NEXT:    movdqa %xmm2, %xmm0
827; SSE41-NEXT:    retq
828;
829; AVX-LABEL: blend_logic_v4i32:
830; AVX:       # BB#0: # %entry
831; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
832; AVX-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
833; AVX-NEXT:    retq
834entry:
835  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
836  %sub = sub nsw <4 x i32> zeroinitializer, %a
837  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
838  %1 = and <4 x i32> %c, %0
839  %2 = and <4 x i32> %a, %b.lobit
840  %cond = or <4 x i32> %1, %2
841  ret <4 x i32> %cond
842}
843
844define <8 x i32> @blend_logic_v8i32(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) {
845; SSE2-LABEL: blend_logic_v8i32:
846; SSE2:       # BB#0: # %entry
847; SSE2-NEXT:    psrad $31, %xmm0
848; SSE2-NEXT:    psrad $31, %xmm1
849; SSE2-NEXT:    pand %xmm1, %xmm3
850; SSE2-NEXT:    pandn %xmm5, %xmm1
851; SSE2-NEXT:    pand %xmm0, %xmm2
852; SSE2-NEXT:    pandn %xmm4, %xmm0
853; SSE2-NEXT:    por %xmm2, %xmm0
854; SSE2-NEXT:    por %xmm3, %xmm1
855; SSE2-NEXT:    retq
856;
857; SSSE3-LABEL: blend_logic_v8i32:
858; SSSE3:       # BB#0: # %entry
859; SSSE3-NEXT:    psrad $31, %xmm0
860; SSSE3-NEXT:    psrad $31, %xmm1
861; SSSE3-NEXT:    pand %xmm1, %xmm3
862; SSSE3-NEXT:    pandn %xmm5, %xmm1
863; SSSE3-NEXT:    pand %xmm0, %xmm2
864; SSSE3-NEXT:    pandn %xmm4, %xmm0
865; SSSE3-NEXT:    por %xmm2, %xmm0
866; SSSE3-NEXT:    por %xmm3, %xmm1
867; SSSE3-NEXT:    retq
868;
869; SSE41-LABEL: blend_logic_v8i32:
870; SSE41:       # BB#0: # %entry
871; SSE41-NEXT:    psrad $31, %xmm1
872; SSE41-NEXT:    psrad $31, %xmm0
873; SSE41-NEXT:    pblendvb %xmm2, %xmm4
874; SSE41-NEXT:    movdqa %xmm1, %xmm0
875; SSE41-NEXT:    pblendvb %xmm3, %xmm5
876; SSE41-NEXT:    movdqa %xmm4, %xmm0
877; SSE41-NEXT:    movdqa %xmm5, %xmm1
878; SSE41-NEXT:    retq
879;
880; AVX1-LABEL: blend_logic_v8i32:
881; AVX1:       # BB#0: # %entry
882; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm3
883; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
884; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
885; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
886; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
887; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
888; AVX1-NEXT:    vorps %ymm0, %ymm2, %ymm0
889; AVX1-NEXT:    retq
890;
891; AVX2-LABEL: blend_logic_v8i32:
892; AVX2:       # BB#0: # %entry
893; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
894; AVX2-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
895; AVX2-NEXT:    retq
896entry:
897  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
898  %sub = sub nsw <8 x i32> zeroinitializer, %a
899  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
900  %1 = and <8 x i32> %c, %0
901  %2 = and <8 x i32> %a, %b.lobit
902  %cond = or <8 x i32> %1, %2
903  ret <8 x i32> %cond
904}
905
906define <4 x i32> @blend_neg_logic_v4i32(<4 x i32> %a, <4 x i32> %b) {
907; SSE2-LABEL: blend_neg_logic_v4i32:
908; SSE2:       # BB#0: # %entry
909; SSE2-NEXT:    psrad $31, %xmm1
910; SSE2-NEXT:    pxor %xmm1, %xmm0
911; SSE2-NEXT:    psubd %xmm1, %xmm0
912; SSE2-NEXT:    retq
913;
914; SSSE3-LABEL: blend_neg_logic_v4i32:
915; SSSE3:       # BB#0: # %entry
916; SSSE3-NEXT:    psrad $31, %xmm1
917; SSSE3-NEXT:    pxor %xmm1, %xmm0
918; SSSE3-NEXT:    psubd %xmm1, %xmm0
919; SSSE3-NEXT:    retq
920;
921; SSE41-LABEL: blend_neg_logic_v4i32:
922; SSE41:       # BB#0: # %entry
923; SSE41-NEXT:    psrad $31, %xmm1
924; SSE41-NEXT:    pxor %xmm1, %xmm0
925; SSE41-NEXT:    psubd %xmm1, %xmm0
926; SSE41-NEXT:    retq
927;
928; AVX-LABEL: blend_neg_logic_v4i32:
929; AVX:       # BB#0: # %entry
930; AVX-NEXT:    vpsrad $31, %xmm1, %xmm1
931; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
932; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
933; AVX-NEXT:    retq
934entry:
935  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
936  %sub = sub nsw <4 x i32> zeroinitializer, %a
937  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
938  %1 = and <4 x i32> %a, %0
939  %2 = and <4 x i32> %b.lobit, %sub
940  %cond = or <4 x i32> %1, %2
941  ret <4 x i32> %cond
942}
943
944define <8 x i32> @blend_neg_logic_v8i32(<8 x i32> %a, <8 x i32> %b) {
945; SSE2-LABEL: blend_neg_logic_v8i32:
946; SSE2:       # BB#0: # %entry
947; SSE2-NEXT:    psrad $31, %xmm3
948; SSE2-NEXT:    psrad $31, %xmm2
949; SSE2-NEXT:    pxor %xmm2, %xmm0
950; SSE2-NEXT:    psubd %xmm2, %xmm0
951; SSE2-NEXT:    pxor %xmm3, %xmm1
952; SSE2-NEXT:    psubd %xmm3, %xmm1
953; SSE2-NEXT:    retq
954;
955; SSSE3-LABEL: blend_neg_logic_v8i32:
956; SSSE3:       # BB#0: # %entry
957; SSSE3-NEXT:    psrad $31, %xmm3
958; SSSE3-NEXT:    psrad $31, %xmm2
959; SSSE3-NEXT:    pxor %xmm2, %xmm0
960; SSSE3-NEXT:    psubd %xmm2, %xmm0
961; SSSE3-NEXT:    pxor %xmm3, %xmm1
962; SSSE3-NEXT:    psubd %xmm3, %xmm1
963; SSSE3-NEXT:    retq
964;
965; SSE41-LABEL: blend_neg_logic_v8i32:
966; SSE41:       # BB#0: # %entry
967; SSE41-NEXT:    psrad $31, %xmm3
968; SSE41-NEXT:    psrad $31, %xmm2
969; SSE41-NEXT:    pxor %xmm2, %xmm0
970; SSE41-NEXT:    psubd %xmm2, %xmm0
971; SSE41-NEXT:    pxor %xmm3, %xmm1
972; SSE41-NEXT:    psubd %xmm3, %xmm1
973; SSE41-NEXT:    retq
974;
975; AVX1-LABEL: blend_neg_logic_v8i32:
976; AVX1:       # BB#0: # %entry
977; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
978; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
979; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
980; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
981; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
982; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
983; AVX1-NEXT:    vpsubd %xmm2, %xmm3, %xmm2
984; AVX1-NEXT:    vpsubd %xmm0, %xmm3, %xmm3
985; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
986; AVX1-NEXT:    vandnps %ymm0, %ymm1, %ymm0
987; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
988; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
989; AVX1-NEXT:    retq
990;
991; AVX2-LABEL: blend_neg_logic_v8i32:
992; AVX2:       # BB#0: # %entry
993; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
994; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
995; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
996; AVX2-NEXT:    retq
997entry:
998  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
999  %sub = sub nsw <8 x i32> zeroinitializer, %a
1000  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1001  %1 = and <8 x i32> %a, %0
1002  %2 = and <8 x i32> %b.lobit, %sub
1003  %cond = or <8 x i32> %1, %2
1004  ret <8 x i32> %cond
1005}
1006
1007define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
1008; SSE2-LABEL: blend_neg_logic_v4i32_2:
1009; SSE2:       # BB#0: # %entry
1010; SSE2-NEXT:    psrld $31, %xmm1
1011; SSE2-NEXT:    pslld $31, %xmm1
1012; SSE2-NEXT:    psrad $31, %xmm1
1013; SSE2-NEXT:    pxor %xmm1, %xmm0
1014; SSE2-NEXT:    psubd %xmm0, %xmm1
1015; SSE2-NEXT:    movdqa %xmm1, %xmm0
1016; SSE2-NEXT:    retq
1017;
1018; SSSE3-LABEL: blend_neg_logic_v4i32_2:
1019; SSSE3:       # BB#0: # %entry
1020; SSSE3-NEXT:    psrld $31, %xmm1
1021; SSSE3-NEXT:    pslld $31, %xmm1
1022; SSSE3-NEXT:    psrad $31, %xmm1
1023; SSSE3-NEXT:    pxor %xmm1, %xmm0
1024; SSSE3-NEXT:    psubd %xmm0, %xmm1
1025; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1026; SSSE3-NEXT:    retq
1027;
1028; SSE41-LABEL: blend_neg_logic_v4i32_2:
1029; SSE41:       # BB#0: # %entry
1030; SSE41-NEXT:    movdqa %xmm0, %xmm2
1031; SSE41-NEXT:    psrld $31, %xmm1
1032; SSE41-NEXT:    pslld $31, %xmm1
1033; SSE41-NEXT:    pxor %xmm3, %xmm3
1034; SSE41-NEXT:    psubd %xmm2, %xmm3
1035; SSE41-NEXT:    movdqa %xmm1, %xmm0
1036; SSE41-NEXT:    blendvps %xmm2, %xmm3
1037; SSE41-NEXT:    movaps %xmm3, %xmm0
1038; SSE41-NEXT:    retq
1039;
1040; AVX-LABEL: blend_neg_logic_v4i32_2:
1041; AVX:       # BB#0: # %entry
1042; AVX-NEXT:    vpsrld $31, %xmm1, %xmm1
1043; AVX-NEXT:    vpslld $31, %xmm1, %xmm1
1044; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1045; AVX-NEXT:    vpsubd %xmm0, %xmm2, %xmm2
1046; AVX-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
1047; AVX-NEXT:    retq
1048entry:
1049  %0 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
1050  %1 = trunc <4 x i32> %0 to <4 x i1>
1051  %2 = sub nsw <4 x i32> zeroinitializer, %v
1052  %3 = select <4 x i1> %1, <4 x i32> %v, <4 x i32> %2
1053  ret <4 x i32> %3
1054}
1055