• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7
8; AVX128 tests:
9
10define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
11; SSE2-LABEL: vsel_float:
12; SSE2:       # %bb.0: # %entry
13; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
14; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
15; SSE2-NEXT:    retq
16;
17; SSSE3-LABEL: vsel_float:
18; SSSE3:       # %bb.0: # %entry
19; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
20; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
21; SSSE3-NEXT:    retq
22;
23; SSE41-LABEL: vsel_float:
24; SSE41:       # %bb.0: # %entry
25; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
26; SSE41-NEXT:    retq
27;
28; AVX-LABEL: vsel_float:
29; AVX:       # %bb.0: # %entry
30; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
31; AVX-NEXT:    retq
32entry:
33  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
34  ret <4 x float> %vsel
35}
36
37define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) {
38; SSE2-LABEL: vsel_float2:
39; SSE2:       # %bb.0: # %entry
40; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
41; SSE2-NEXT:    movaps %xmm1, %xmm0
42; SSE2-NEXT:    retq
43;
44; SSSE3-LABEL: vsel_float2:
45; SSSE3:       # %bb.0: # %entry
46; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
47; SSSE3-NEXT:    movaps %xmm1, %xmm0
48; SSSE3-NEXT:    retq
49;
50; SSE41-LABEL: vsel_float2:
51; SSE41:       # %bb.0: # %entry
52; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
53; SSE41-NEXT:    retq
54;
55; AVX-LABEL: vsel_float2:
56; AVX:       # %bb.0: # %entry
57; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
58; AVX-NEXT:    retq
59entry:
60  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
61  ret <4 x float> %vsel
62}
63
64define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
65; SSE2-LABEL: vsel_4xi8:
66; SSE2:       # %bb.0: # %entry
67; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
68; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
69; SSE2-NEXT:    retq
70;
71; SSSE3-LABEL: vsel_4xi8:
72; SSSE3:       # %bb.0: # %entry
73; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
74; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
75; SSSE3-NEXT:    retq
76;
77; SSE41-LABEL: vsel_4xi8:
78; SSE41:       # %bb.0: # %entry
79; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
80; SSE41-NEXT:    retq
81;
82; AVX-LABEL: vsel_4xi8:
83; AVX:       # %bb.0: # %entry
84; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
85; AVX-NEXT:    retq
86entry:
87  %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
88  ret <4 x i8> %vsel
89}
90
91define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
92; SSE2-LABEL: vsel_4xi16:
93; SSE2:       # %bb.0: # %entry
94; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
95; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
96; SSE2-NEXT:    movaps %xmm1, %xmm0
97; SSE2-NEXT:    retq
98;
99; SSSE3-LABEL: vsel_4xi16:
100; SSSE3:       # %bb.0: # %entry
101; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
102; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
103; SSSE3-NEXT:    movaps %xmm1, %xmm0
104; SSSE3-NEXT:    retq
105;
106; SSE41-LABEL: vsel_4xi16:
107; SSE41:       # %bb.0: # %entry
108; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
109; SSE41-NEXT:    retq
110;
111; AVX-LABEL: vsel_4xi16:
112; AVX:       # %bb.0: # %entry
113; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
114; AVX-NEXT:    retq
115entry:
116  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
117  ret <4 x i16> %vsel
118}
119
120define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
121; SSE2-LABEL: vsel_i32:
122; SSE2:       # %bb.0: # %entry
123; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
124; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
125; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
126; SSE2-NEXT:    retq
127;
128; SSSE3-LABEL: vsel_i32:
129; SSSE3:       # %bb.0: # %entry
130; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
131; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
132; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
133; SSSE3-NEXT:    retq
134;
135; SSE41-LABEL: vsel_i32:
136; SSE41:       # %bb.0: # %entry
137; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
138; SSE41-NEXT:    retq
139;
140; AVX-LABEL: vsel_i32:
141; AVX:       # %bb.0: # %entry
142; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
143; AVX-NEXT:    retq
144entry:
145  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
146  ret <4 x i32> %vsel
147}
148
149define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
150; SSE2-LABEL: vsel_double:
151; SSE2:       # %bb.0: # %entry
152; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
153; SSE2-NEXT:    movapd %xmm1, %xmm0
154; SSE2-NEXT:    retq
155;
156; SSSE3-LABEL: vsel_double:
157; SSSE3:       # %bb.0: # %entry
158; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
159; SSSE3-NEXT:    movapd %xmm1, %xmm0
160; SSSE3-NEXT:    retq
161;
162; SSE41-LABEL: vsel_double:
163; SSE41:       # %bb.0: # %entry
164; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
165; SSE41-NEXT:    retq
166;
167; AVX-LABEL: vsel_double:
168; AVX:       # %bb.0: # %entry
169; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
170; AVX-NEXT:    retq
171entry:
172  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
173  ret <2 x double> %vsel
174}
175
176define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
177; SSE2-LABEL: vsel_i64:
178; SSE2:       # %bb.0: # %entry
179; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
180; SSE2-NEXT:    movapd %xmm1, %xmm0
181; SSE2-NEXT:    retq
182;
183; SSSE3-LABEL: vsel_i64:
184; SSSE3:       # %bb.0: # %entry
185; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
186; SSSE3-NEXT:    movapd %xmm1, %xmm0
187; SSSE3-NEXT:    retq
188;
189; SSE41-LABEL: vsel_i64:
190; SSE41:       # %bb.0: # %entry
191; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
192; SSE41-NEXT:    retq
193;
194; AVX-LABEL: vsel_i64:
195; AVX:       # %bb.0: # %entry
196; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
197; AVX-NEXT:    retq
198entry:
199  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
200  ret <2 x i64> %vsel
201}
202
203define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
204; SSE2-LABEL: vsel_8xi16:
205; SSE2:       # %bb.0: # %entry
206; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
207; SSE2-NEXT:    andps %xmm2, %xmm1
208; SSE2-NEXT:    andnps %xmm0, %xmm2
209; SSE2-NEXT:    orps %xmm1, %xmm2
210; SSE2-NEXT:    movaps %xmm2, %xmm0
211; SSE2-NEXT:    retq
212;
213; SSSE3-LABEL: vsel_8xi16:
214; SSSE3:       # %bb.0: # %entry
215; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
216; SSSE3-NEXT:    andps %xmm2, %xmm1
217; SSSE3-NEXT:    andnps %xmm0, %xmm2
218; SSSE3-NEXT:    orps %xmm1, %xmm2
219; SSSE3-NEXT:    movaps %xmm2, %xmm0
220; SSSE3-NEXT:    retq
221;
222; SSE41-LABEL: vsel_8xi16:
223; SSE41:       # %bb.0: # %entry
224; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
225; SSE41-NEXT:    retq
226;
227; AVX-LABEL: vsel_8xi16:
228; AVX:       # %bb.0: # %entry
229; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
230; AVX-NEXT:    retq
231entry:
232  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
233  ret <8 x i16> %vsel
234}
235
236define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
237; SSE2-LABEL: vsel_i8:
238; SSE2:       # %bb.0: # %entry
239; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
240; SSE2-NEXT:    andps %xmm2, %xmm1
241; SSE2-NEXT:    andnps %xmm0, %xmm2
242; SSE2-NEXT:    orps %xmm1, %xmm2
243; SSE2-NEXT:    movaps %xmm2, %xmm0
244; SSE2-NEXT:    retq
245;
246; SSSE3-LABEL: vsel_i8:
247; SSSE3:       # %bb.0: # %entry
248; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero
249; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15]
250; SSSE3-NEXT:    por %xmm1, %xmm0
251; SSSE3-NEXT:    retq
252;
253; SSE41-LABEL: vsel_i8:
254; SSE41:       # %bb.0: # %entry
255; SSE41-NEXT:    movdqa %xmm0, %xmm2
256; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
257; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
258; SSE41-NEXT:    movdqa %xmm2, %xmm0
259; SSE41-NEXT:    retq
260;
261; AVX-LABEL: vsel_i8:
262; AVX:       # %bb.0: # %entry
263; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
264; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
265; AVX-NEXT:    retq
266entry:
267  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
268  ret <16 x i8> %vsel
269}
270
271
272; AVX256 tests:
273
274define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
275; SSE2-LABEL: vsel_float8:
276; SSE2:       # %bb.0: # %entry
277; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
278; SSE2-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
279; SSE2-NEXT:    movaps %xmm2, %xmm0
280; SSE2-NEXT:    movaps %xmm3, %xmm1
281; SSE2-NEXT:    retq
282;
283; SSSE3-LABEL: vsel_float8:
284; SSSE3:       # %bb.0: # %entry
285; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
286; SSSE3-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
287; SSSE3-NEXT:    movaps %xmm2, %xmm0
288; SSSE3-NEXT:    movaps %xmm3, %xmm1
289; SSSE3-NEXT:    retq
290;
291; SSE41-LABEL: vsel_float8:
292; SSE41:       # %bb.0: # %entry
293; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
294; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
295; SSE41-NEXT:    retq
296;
297; AVX-LABEL: vsel_float8:
298; AVX:       # %bb.0: # %entry
299; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
300; AVX-NEXT:    retq
301entry:
302  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
303  ret <8 x float> %vsel
304}
305
306define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
307; SSE2-LABEL: vsel_i328:
308; SSE2:       # %bb.0: # %entry
309; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
310; SSE2-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
311; SSE2-NEXT:    movaps %xmm2, %xmm0
312; SSE2-NEXT:    movaps %xmm3, %xmm1
313; SSE2-NEXT:    retq
314;
315; SSSE3-LABEL: vsel_i328:
316; SSSE3:       # %bb.0: # %entry
317; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
318; SSSE3-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
319; SSSE3-NEXT:    movaps %xmm2, %xmm0
320; SSSE3-NEXT:    movaps %xmm3, %xmm1
321; SSSE3-NEXT:    retq
322;
323; SSE41-LABEL: vsel_i328:
324; SSE41:       # %bb.0: # %entry
325; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
326; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
327; SSE41-NEXT:    retq
328;
329; AVX-LABEL: vsel_i328:
330; AVX:       # %bb.0: # %entry
331; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
332; AVX-NEXT:    retq
333entry:
334  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
335  ret <8 x i32> %vsel
336}
337
338define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
339; SSE2-LABEL: vsel_double8:
340; SSE2:       # %bb.0: # %entry
341; SSE2-NEXT:    movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
342; SSE2-NEXT:    movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
343; SSE2-NEXT:    movapd %xmm4, %xmm0
344; SSE2-NEXT:    movaps %xmm5, %xmm1
345; SSE2-NEXT:    movapd %xmm6, %xmm2
346; SSE2-NEXT:    movaps %xmm7, %xmm3
347; SSE2-NEXT:    retq
348;
349; SSSE3-LABEL: vsel_double8:
350; SSSE3:       # %bb.0: # %entry
351; SSSE3-NEXT:    movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
352; SSSE3-NEXT:    movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
353; SSSE3-NEXT:    movapd %xmm4, %xmm0
354; SSSE3-NEXT:    movaps %xmm5, %xmm1
355; SSSE3-NEXT:    movapd %xmm6, %xmm2
356; SSSE3-NEXT:    movaps %xmm7, %xmm3
357; SSSE3-NEXT:    retq
358;
359; SSE41-LABEL: vsel_double8:
360; SSE41:       # %bb.0: # %entry
361; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
362; SSE41-NEXT:    blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
363; SSE41-NEXT:    movaps %xmm5, %xmm1
364; SSE41-NEXT:    movaps %xmm7, %xmm3
365; SSE41-NEXT:    retq
366;
367; AVX-LABEL: vsel_double8:
368; AVX:       # %bb.0: # %entry
369; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
370; AVX-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
371; AVX-NEXT:    retq
372entry:
373  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
374  ret <8 x double> %vsel
375}
376
377define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
378; SSE2-LABEL: vsel_i648:
379; SSE2:       # %bb.0: # %entry
380; SSE2-NEXT:    movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
381; SSE2-NEXT:    movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
382; SSE2-NEXT:    movapd %xmm4, %xmm0
383; SSE2-NEXT:    movaps %xmm5, %xmm1
384; SSE2-NEXT:    movapd %xmm6, %xmm2
385; SSE2-NEXT:    movaps %xmm7, %xmm3
386; SSE2-NEXT:    retq
387;
388; SSSE3-LABEL: vsel_i648:
389; SSSE3:       # %bb.0: # %entry
390; SSSE3-NEXT:    movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
391; SSSE3-NEXT:    movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
392; SSSE3-NEXT:    movapd %xmm4, %xmm0
393; SSSE3-NEXT:    movaps %xmm5, %xmm1
394; SSSE3-NEXT:    movapd %xmm6, %xmm2
395; SSSE3-NEXT:    movaps %xmm7, %xmm3
396; SSSE3-NEXT:    retq
397;
398; SSE41-LABEL: vsel_i648:
399; SSE41:       # %bb.0: # %entry
400; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
401; SSE41-NEXT:    blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
402; SSE41-NEXT:    movaps %xmm5, %xmm1
403; SSE41-NEXT:    movaps %xmm7, %xmm3
404; SSE41-NEXT:    retq
405;
406; AVX-LABEL: vsel_i648:
407; AVX:       # %bb.0: # %entry
408; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
409; AVX-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
410; AVX-NEXT:    retq
411entry:
412  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
413  ret <8 x i64> %vsel
414}
415
416define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
417; SSE2-LABEL: vsel_double4:
418; SSE2:       # %bb.0: # %entry
419; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
420; SSE2-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
421; SSE2-NEXT:    movapd %xmm2, %xmm0
422; SSE2-NEXT:    movapd %xmm3, %xmm1
423; SSE2-NEXT:    retq
424;
425; SSSE3-LABEL: vsel_double4:
426; SSSE3:       # %bb.0: # %entry
427; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
428; SSSE3-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
429; SSSE3-NEXT:    movapd %xmm2, %xmm0
430; SSSE3-NEXT:    movapd %xmm3, %xmm1
431; SSSE3-NEXT:    retq
432;
433; SSE41-LABEL: vsel_double4:
434; SSE41:       # %bb.0: # %entry
435; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
436; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
437; SSE41-NEXT:    retq
438;
439; AVX-LABEL: vsel_double4:
440; AVX:       # %bb.0: # %entry
441; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
442; AVX-NEXT:    retq
443entry:
444  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
445  ret <4 x double> %vsel
446}
447
448define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
449; SSE2-LABEL: testa:
450; SSE2:       # %bb.0: # %entry
451; SSE2-NEXT:    movapd %xmm1, %xmm2
452; SSE2-NEXT:    cmplepd %xmm0, %xmm2
453; SSE2-NEXT:    andpd %xmm2, %xmm0
454; SSE2-NEXT:    andnpd %xmm1, %xmm2
455; SSE2-NEXT:    orpd %xmm2, %xmm0
456; SSE2-NEXT:    retq
457;
458; SSSE3-LABEL: testa:
459; SSSE3:       # %bb.0: # %entry
460; SSSE3-NEXT:    movapd %xmm1, %xmm2
461; SSSE3-NEXT:    cmplepd %xmm0, %xmm2
462; SSSE3-NEXT:    andpd %xmm2, %xmm0
463; SSSE3-NEXT:    andnpd %xmm1, %xmm2
464; SSSE3-NEXT:    orpd %xmm2, %xmm0
465; SSSE3-NEXT:    retq
466;
467; SSE41-LABEL: testa:
468; SSE41:       # %bb.0: # %entry
469; SSE41-NEXT:    movapd %xmm0, %xmm2
470; SSE41-NEXT:    movapd %xmm1, %xmm0
471; SSE41-NEXT:    cmplepd %xmm2, %xmm0
472; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
473; SSE41-NEXT:    movapd %xmm1, %xmm0
474; SSE41-NEXT:    retq
475;
476; AVX-LABEL: testa:
477; AVX:       # %bb.0: # %entry
478; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm2
479; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
480; AVX-NEXT:    retq
481entry:
482  %max_is_x = fcmp oge <2 x double> %x, %y
483  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
484  ret <2 x double> %max
485}
486
487define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
488; SSE2-LABEL: testb:
489; SSE2:       # %bb.0: # %entry
490; SSE2-NEXT:    movapd %xmm1, %xmm2
491; SSE2-NEXT:    cmpnlepd %xmm0, %xmm2
492; SSE2-NEXT:    andpd %xmm2, %xmm0
493; SSE2-NEXT:    andnpd %xmm1, %xmm2
494; SSE2-NEXT:    orpd %xmm2, %xmm0
495; SSE2-NEXT:    retq
496;
497; SSSE3-LABEL: testb:
498; SSSE3:       # %bb.0: # %entry
499; SSSE3-NEXT:    movapd %xmm1, %xmm2
500; SSSE3-NEXT:    cmpnlepd %xmm0, %xmm2
501; SSSE3-NEXT:    andpd %xmm2, %xmm0
502; SSSE3-NEXT:    andnpd %xmm1, %xmm2
503; SSSE3-NEXT:    orpd %xmm2, %xmm0
504; SSSE3-NEXT:    retq
505;
506; SSE41-LABEL: testb:
507; SSE41:       # %bb.0: # %entry
508; SSE41-NEXT:    movapd %xmm0, %xmm2
509; SSE41-NEXT:    movapd %xmm1, %xmm0
510; SSE41-NEXT:    cmpnlepd %xmm2, %xmm0
511; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
512; SSE41-NEXT:    movapd %xmm1, %xmm0
513; SSE41-NEXT:    retq
514;
515; AVX-LABEL: testb:
516; AVX:       # %bb.0: # %entry
517; AVX-NEXT:    vcmpnlepd %xmm0, %xmm1, %xmm2
518; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
519; AVX-NEXT:    retq
520entry:
521  %min_is_x = fcmp ult <2 x double> %x, %y
522  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
523  ret <2 x double> %min
524}
525
526; If we can figure out a blend has a constant mask, we should emit the
527; blend instruction with an immediate mask
528define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
529; SSE2-LABEL: constant_blendvpd_avx:
530; SSE2:       # %bb.0: # %entry
531; SSE2-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
532; SSE2-NEXT:    movaps %xmm2, %xmm0
533; SSE2-NEXT:    movapd %xmm3, %xmm1
534; SSE2-NEXT:    retq
535;
536; SSSE3-LABEL: constant_blendvpd_avx:
537; SSSE3:       # %bb.0: # %entry
538; SSSE3-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
539; SSSE3-NEXT:    movaps %xmm2, %xmm0
540; SSSE3-NEXT:    movapd %xmm3, %xmm1
541; SSSE3-NEXT:    retq
542;
543; SSE41-LABEL: constant_blendvpd_avx:
544; SSE41:       # %bb.0: # %entry
545; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
546; SSE41-NEXT:    movaps %xmm2, %xmm0
547; SSE41-NEXT:    retq
548;
549; AVX-LABEL: constant_blendvpd_avx:
550; AVX:       # %bb.0: # %entry
551; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
552; AVX-NEXT:    retq
553entry:
554  %select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
555  ret <4 x double> %select
556}
557
558define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
559; SSE2-LABEL: constant_blendvps_avx:
560; SSE2:       # %bb.0: # %entry
561; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
562; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
563; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
564; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
565; SSE2-NEXT:    movaps %xmm2, %xmm0
566; SSE2-NEXT:    movaps %xmm3, %xmm1
567; SSE2-NEXT:    retq
568;
569; SSSE3-LABEL: constant_blendvps_avx:
570; SSSE3:       # %bb.0: # %entry
571; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
572; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
573; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
574; SSSE3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
575; SSSE3-NEXT:    movaps %xmm2, %xmm0
576; SSSE3-NEXT:    movaps %xmm3, %xmm1
577; SSSE3-NEXT:    retq
578;
579; SSE41-LABEL: constant_blendvps_avx:
580; SSE41:       # %bb.0: # %entry
581; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
582; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[3]
583; SSE41-NEXT:    retq
584;
585; AVX-LABEL: constant_blendvps_avx:
586; AVX:       # %bb.0: # %entry
587; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7]
588; AVX-NEXT:    retq
589entry:
590  %select = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
591  ret <8 x float> %select
592}
593
594define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
595; SSE2-LABEL: constant_pblendvb_avx2:
596; SSE2:       # %bb.0: # %entry
597; SSE2-NEXT:    movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
598; SSE2-NEXT:    movaps %xmm4, %xmm5
599; SSE2-NEXT:    andnps %xmm0, %xmm5
600; SSE2-NEXT:    andps %xmm4, %xmm2
601; SSE2-NEXT:    orps %xmm2, %xmm5
602; SSE2-NEXT:    andps %xmm4, %xmm3
603; SSE2-NEXT:    andnps %xmm1, %xmm4
604; SSE2-NEXT:    orps %xmm3, %xmm4
605; SSE2-NEXT:    movaps %xmm5, %xmm0
606; SSE2-NEXT:    movaps %xmm4, %xmm1
607; SSE2-NEXT:    retq
608;
609; SSSE3-LABEL: constant_pblendvb_avx2:
610; SSSE3:       # %bb.0: # %entry
611; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128]
612; SSSE3-NEXT:    pshufb %xmm4, %xmm0
613; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15]
614; SSSE3-NEXT:    pshufb %xmm5, %xmm2
615; SSSE3-NEXT:    por %xmm2, %xmm0
616; SSSE3-NEXT:    pshufb %xmm4, %xmm1
617; SSSE3-NEXT:    pshufb %xmm5, %xmm3
618; SSSE3-NEXT:    por %xmm3, %xmm1
619; SSSE3-NEXT:    retq
620;
621; SSE41-LABEL: constant_pblendvb_avx2:
622; SSE41:       # %bb.0: # %entry
623; SSE41-NEXT:    movdqa %xmm0, %xmm4
624; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
625; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm4
626; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
627; SSE41-NEXT:    movdqa %xmm4, %xmm0
628; SSE41-NEXT:    retq
629;
630; AVX1-LABEL: constant_pblendvb_avx2:
631; AVX1:       # %bb.0: # %entry
632; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
633; AVX1-NEXT:    vandnps %ymm0, %ymm2, %ymm0
634; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
635; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
636; AVX1-NEXT:    retq
637;
638; AVX2-LABEL: constant_pblendvb_avx2:
639; AVX2:       # %bb.0: # %entry
640; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
641; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
642; AVX2-NEXT:    retq
643entry:
644  %select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
645  ret <32 x i8> %select
646}
647
648declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
649declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
650
651;; 4 tests for shufflevectors that optimize to blend + immediate
652define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
653; SSE2-LABEL: blend_shufflevector_4xfloat:
654; SSE2:       # %bb.0: # %entry
655; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
656; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
657; SSE2-NEXT:    retq
658;
659; SSSE3-LABEL: blend_shufflevector_4xfloat:
660; SSSE3:       # %bb.0: # %entry
661; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
662; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
663; SSSE3-NEXT:    retq
664;
665; SSE41-LABEL: blend_shufflevector_4xfloat:
666; SSE41:       # %bb.0: # %entry
667; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
668; SSE41-NEXT:    retq
669;
670; AVX-LABEL: blend_shufflevector_4xfloat:
671; AVX:       # %bb.0: # %entry
672; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
673; AVX-NEXT:    retq
674entry:
675  %select = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
676  ret <4 x float> %select
677}
678
679define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
680; SSE2-LABEL: blend_shufflevector_8xfloat:
681; SSE2:       # %bb.0: # %entry
682; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
683; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
684; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
685; SSE2-NEXT:    movaps %xmm2, %xmm0
686; SSE2-NEXT:    movaps %xmm3, %xmm1
687; SSE2-NEXT:    retq
688;
689; SSSE3-LABEL: blend_shufflevector_8xfloat:
690; SSSE3:       # %bb.0: # %entry
691; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
692; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
693; SSSE3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
694; SSSE3-NEXT:    movaps %xmm2, %xmm0
695; SSSE3-NEXT:    movaps %xmm3, %xmm1
696; SSSE3-NEXT:    retq
697;
698; SSE41-LABEL: blend_shufflevector_8xfloat:
699; SSE41:       # %bb.0: # %entry
700; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
701; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3]
702; SSE41-NEXT:    retq
703;
704; AVX-LABEL: blend_shufflevector_8xfloat:
705; AVX:       # %bb.0: # %entry
706; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5],ymm0[6],ymm1[7]
707; AVX-NEXT:    retq
708entry:
709  %select = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15>
710  ret <8 x float> %select
711}
712
713define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
714; SSE2-LABEL: blend_shufflevector_4xdouble:
715; SSE2:       # %bb.0: # %entry
716; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
717; SSE2-NEXT:    movapd %xmm2, %xmm0
718; SSE2-NEXT:    retq
719;
720; SSSE3-LABEL: blend_shufflevector_4xdouble:
721; SSSE3:       # %bb.0: # %entry
722; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
723; SSSE3-NEXT:    movapd %xmm2, %xmm0
724; SSSE3-NEXT:    retq
725;
726; SSE41-LABEL: blend_shufflevector_4xdouble:
727; SSE41:       # %bb.0: # %entry
728; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
729; SSE41-NEXT:    retq
730;
731; AVX-LABEL: blend_shufflevector_4xdouble:
732; AVX:       # %bb.0: # %entry
733; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
734; AVX-NEXT:    retq
735entry:
736  %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
737  ret <4 x double> %select
738}
739
740define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
741; SSE2-LABEL: blend_shufflevector_4xi64:
742; SSE2:       # %bb.0: # %entry
743; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
744; SSE2-NEXT:    movaps %xmm3, %xmm1
745; SSE2-NEXT:    retq
746;
747; SSSE3-LABEL: blend_shufflevector_4xi64:
748; SSSE3:       # %bb.0: # %entry
749; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
750; SSSE3-NEXT:    movaps %xmm3, %xmm1
751; SSSE3-NEXT:    retq
752;
753; SSE41-LABEL: blend_shufflevector_4xi64:
754; SSE41:       # %bb.0: # %entry
755; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
756; SSE41-NEXT:    movaps %xmm3, %xmm1
757; SSE41-NEXT:    retq
758;
759; AVX-LABEL: blend_shufflevector_4xi64:
760; AVX:       # %bb.0: # %entry
761; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
762; AVX-NEXT:    retq
763entry:
764  %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
765  ret <4 x i64> %select
766}
767
768define <4 x i32> @blend_logic_v4i32(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) {
769; SSE2-LABEL: blend_logic_v4i32:
770; SSE2:       # %bb.0: # %entry
771; SSE2-NEXT:    psrad $31, %xmm0
772; SSE2-NEXT:    pand %xmm0, %xmm1
773; SSE2-NEXT:    pandn %xmm2, %xmm0
774; SSE2-NEXT:    por %xmm1, %xmm0
775; SSE2-NEXT:    retq
776;
777; SSSE3-LABEL: blend_logic_v4i32:
778; SSSE3:       # %bb.0: # %entry
779; SSSE3-NEXT:    psrad $31, %xmm0
780; SSSE3-NEXT:    pand %xmm0, %xmm1
781; SSSE3-NEXT:    pandn %xmm2, %xmm0
782; SSSE3-NEXT:    por %xmm1, %xmm0
783; SSSE3-NEXT:    retq
784;
785; SSE41-LABEL: blend_logic_v4i32:
786; SSE41:       # %bb.0: # %entry
787; SSE41-NEXT:    psrad $31, %xmm0
788; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
789; SSE41-NEXT:    movdqa %xmm2, %xmm0
790; SSE41-NEXT:    retq
791;
792; AVX-LABEL: blend_logic_v4i32:
793; AVX:       # %bb.0: # %entry
794; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
795; AVX-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
796; AVX-NEXT:    retq
797entry:
798  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
799  %sub = sub nsw <4 x i32> zeroinitializer, %a
800  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
801  %1 = and <4 x i32> %c, %0
802  %2 = and <4 x i32> %a, %b.lobit
803  %cond = or <4 x i32> %1, %2
804  ret <4 x i32> %cond
805}
806
807define <8 x i32> @blend_logic_v8i32(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) {
808; SSE2-LABEL: blend_logic_v8i32:
809; SSE2:       # %bb.0: # %entry
810; SSE2-NEXT:    psrad $31, %xmm0
811; SSE2-NEXT:    psrad $31, %xmm1
812; SSE2-NEXT:    pand %xmm1, %xmm3
813; SSE2-NEXT:    pandn %xmm5, %xmm1
814; SSE2-NEXT:    por %xmm3, %xmm1
815; SSE2-NEXT:    pand %xmm0, %xmm2
816; SSE2-NEXT:    pandn %xmm4, %xmm0
817; SSE2-NEXT:    por %xmm2, %xmm0
818; SSE2-NEXT:    retq
819;
820; SSSE3-LABEL: blend_logic_v8i32:
821; SSSE3:       # %bb.0: # %entry
822; SSSE3-NEXT:    psrad $31, %xmm0
823; SSSE3-NEXT:    psrad $31, %xmm1
824; SSSE3-NEXT:    pand %xmm1, %xmm3
825; SSSE3-NEXT:    pandn %xmm5, %xmm1
826; SSSE3-NEXT:    por %xmm3, %xmm1
827; SSSE3-NEXT:    pand %xmm0, %xmm2
828; SSSE3-NEXT:    pandn %xmm4, %xmm0
829; SSSE3-NEXT:    por %xmm2, %xmm0
830; SSSE3-NEXT:    retq
831;
832; SSE41-LABEL: blend_logic_v8i32:
833; SSE41:       # %bb.0: # %entry
834; SSE41-NEXT:    psrad $31, %xmm1
835; SSE41-NEXT:    psrad $31, %xmm0
836; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm4
837; SSE41-NEXT:    movdqa %xmm1, %xmm0
838; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm5
839; SSE41-NEXT:    movdqa %xmm4, %xmm0
840; SSE41-NEXT:    movdqa %xmm5, %xmm1
841; SSE41-NEXT:    retq
842;
843; AVX1-LABEL: blend_logic_v8i32:
844; AVX1:       # %bb.0: # %entry
845; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm3
846; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
847; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
848; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
849; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
850; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
851; AVX1-NEXT:    vorps %ymm0, %ymm2, %ymm0
852; AVX1-NEXT:    retq
853;
854; AVX2-LABEL: blend_logic_v8i32:
855; AVX2:       # %bb.0: # %entry
856; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
857; AVX2-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
858; AVX2-NEXT:    retq
859entry:
860  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
861  %sub = sub nsw <8 x i32> zeroinitializer, %a
862  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
863  %1 = and <8 x i32> %c, %0
864  %2 = and <8 x i32> %a, %b.lobit
865  %cond = or <8 x i32> %1, %2
866  ret <8 x i32> %cond
867}
868
869define <4 x i32> @blend_neg_logic_v4i32(<4 x i32> %a, <4 x i32> %b) {
870; SSE-LABEL: blend_neg_logic_v4i32:
871; SSE:       # %bb.0: # %entry
872; SSE-NEXT:    psrad $31, %xmm1
873; SSE-NEXT:    pxor %xmm1, %xmm0
874; SSE-NEXT:    psubd %xmm1, %xmm0
875; SSE-NEXT:    retq
876;
877; AVX-LABEL: blend_neg_logic_v4i32:
878; AVX:       # %bb.0: # %entry
879; AVX-NEXT:    vpsrad $31, %xmm1, %xmm1
880; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
881; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
882; AVX-NEXT:    retq
883entry:
884  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
885  %sub = sub nsw <4 x i32> zeroinitializer, %a
886  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
887  %1 = and <4 x i32> %a, %0
888  %2 = and <4 x i32> %b.lobit, %sub
889  %cond = or <4 x i32> %1, %2
890  ret <4 x i32> %cond
891}
892
893define <8 x i32> @blend_neg_logic_v8i32(<8 x i32> %a, <8 x i32> %b) {
894; SSE-LABEL: blend_neg_logic_v8i32:
895; SSE:       # %bb.0: # %entry
896; SSE-NEXT:    psrad $31, %xmm3
897; SSE-NEXT:    psrad $31, %xmm2
898; SSE-NEXT:    pxor %xmm2, %xmm0
899; SSE-NEXT:    psubd %xmm2, %xmm0
900; SSE-NEXT:    pxor %xmm3, %xmm1
901; SSE-NEXT:    psubd %xmm3, %xmm1
902; SSE-NEXT:    retq
903;
904; AVX1-LABEL: blend_neg_logic_v8i32:
905; AVX1:       # %bb.0: # %entry
906; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
907; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
908; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
909; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
910; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
911; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
912; AVX1-NEXT:    vpsubd %xmm2, %xmm3, %xmm2
913; AVX1-NEXT:    vpsubd %xmm0, %xmm3, %xmm3
914; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
915; AVX1-NEXT:    vandnps %ymm0, %ymm1, %ymm0
916; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
917; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
918; AVX1-NEXT:    retq
919;
920; AVX2-LABEL: blend_neg_logic_v8i32:
921; AVX2:       # %bb.0: # %entry
922; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
923; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
924; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
925; AVX2-NEXT:    retq
926entry:
927  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
928  %sub = sub nsw <8 x i32> zeroinitializer, %a
929  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
930  %1 = and <8 x i32> %a, %0
931  %2 = and <8 x i32> %b.lobit, %sub
932  %cond = or <8 x i32> %1, %2
933  ret <8 x i32> %cond
934}
935
936define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
937; SSE2-LABEL: blend_neg_logic_v4i32_2:
938; SSE2:       # %bb.0: # %entry
939; SSE2-NEXT:    psrad $31, %xmm1
940; SSE2-NEXT:    pxor %xmm1, %xmm0
941; SSE2-NEXT:    psubd %xmm0, %xmm1
942; SSE2-NEXT:    movdqa %xmm1, %xmm0
943; SSE2-NEXT:    retq
944;
945; SSSE3-LABEL: blend_neg_logic_v4i32_2:
946; SSSE3:       # %bb.0: # %entry
947; SSSE3-NEXT:    psrad $31, %xmm1
948; SSSE3-NEXT:    pxor %xmm1, %xmm0
949; SSSE3-NEXT:    psubd %xmm0, %xmm1
950; SSSE3-NEXT:    movdqa %xmm1, %xmm0
951; SSSE3-NEXT:    retq
952;
953; SSE41-LABEL: blend_neg_logic_v4i32_2:
954; SSE41:       # %bb.0: # %entry
955; SSE41-NEXT:    movdqa %xmm0, %xmm2
956; SSE41-NEXT:    pxor %xmm3, %xmm3
957; SSE41-NEXT:    psubd %xmm0, %xmm3
958; SSE41-NEXT:    movaps %xmm1, %xmm0
959; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm3
960; SSE41-NEXT:    movaps %xmm3, %xmm0
961; SSE41-NEXT:    retq
962;
963; AVX-LABEL: blend_neg_logic_v4i32_2:
964; AVX:       # %bb.0: # %entry
965; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
966; AVX-NEXT:    vpsubd %xmm0, %xmm2, %xmm2
967; AVX-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
968; AVX-NEXT:    retq
969entry:
970  %0 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
971  %1 = trunc <4 x i32> %0 to <4 x i1>
972  %2 = sub nsw <4 x i32> zeroinitializer, %v
973  %3 = select <4 x i1> %1, <4 x i32> %v, <4 x i32> %2
974  ret <4 x i32> %3
975}
976