• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx       | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2      | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f   | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX512 --check-prefix=AVX512F
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefix=AVX                       --check-prefix=AVX512 --check-prefix=AVX512VL
6
7; The condition vector for BLENDV* only cares about the sign bit of each element.
8; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.
9
10; Test 128-bit vectors for all legal element types.
11
12define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
13; AVX-LABEL: signbit_sel_v16i8:
14; AVX:       # %bb.0:
15; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
16; AVX-NEXT:    retq
17  %tr = icmp slt <16 x i8> %mask, zeroinitializer
18  %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
19  ret <16 x i8> %z
20}
21
22; Sorry 16-bit, you're not important enough to support?
23
24define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) {
25; AVX-LABEL: signbit_sel_v8i16:
26; AVX:       # %bb.0:
27; AVX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
28; AVX-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
29; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
30; AVX-NEXT:    retq
31  %tr = icmp slt <8 x i16> %mask, zeroinitializer
32  %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
33  ret <8 x i16> %z
34}
35
36define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
37; AVX12-LABEL: signbit_sel_v4i32:
38; AVX12:       # %bb.0:
39; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
40; AVX12-NEXT:    retq
41;
42; AVX512F-LABEL: signbit_sel_v4i32:
43; AVX512F:       # %bb.0:
44; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
45; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
46; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
47; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
48; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
49; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
50; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
51; AVX512F-NEXT:    vzeroupper
52; AVX512F-NEXT:    retq
53;
54; AVX512VL-LABEL: signbit_sel_v4i32:
55; AVX512VL:       # %bb.0:
56; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
57; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
58; AVX512VL-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
59; AVX512VL-NEXT:    retq
60  %tr = icmp slt <4 x i32> %mask, zeroinitializer
61  %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
62  ret <4 x i32> %z
63}
64
65define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) {
66; AVX12-LABEL: signbit_sel_v2i64:
67; AVX12:       # %bb.0:
68; AVX12-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
69; AVX12-NEXT:    retq
70;
71; AVX512F-LABEL: signbit_sel_v2i64:
72; AVX512F:       # %bb.0:
73; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
74; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
75; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
76; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
77; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
78; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
79; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
80; AVX512F-NEXT:    vzeroupper
81; AVX512F-NEXT:    retq
82;
83; AVX512VL-LABEL: signbit_sel_v2i64:
84; AVX512VL:       # %bb.0:
85; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
86; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
87; AVX512VL-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
88; AVX512VL-NEXT:    retq
89  %tr = icmp slt <2 x i64> %mask, zeroinitializer
90  %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
91  ret <2 x i64> %z
92}
93
94define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) {
95; AVX12-LABEL: signbit_sel_v4f32:
96; AVX12:       # %bb.0:
97; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
98; AVX12-NEXT:    retq
99;
100; AVX512F-LABEL: signbit_sel_v4f32:
101; AVX512F:       # %bb.0:
102; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
103; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
104; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
105; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
106; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
107; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
108; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
109; AVX512F-NEXT:    vzeroupper
110; AVX512F-NEXT:    retq
111;
112; AVX512VL-LABEL: signbit_sel_v4f32:
113; AVX512VL:       # %bb.0:
114; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
115; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
116; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
117; AVX512VL-NEXT:    retq
118  %tr = icmp slt <4 x i32> %mask, zeroinitializer
119  %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
120  ret <4 x float> %z
121}
122
123define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) {
124; AVX12-LABEL: signbit_sel_v2f64:
125; AVX12:       # %bb.0:
126; AVX12-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
127; AVX12-NEXT:    retq
128;
129; AVX512F-LABEL: signbit_sel_v2f64:
130; AVX512F:       # %bb.0:
131; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
132; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
133; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
134; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
135; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
136; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
137; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
138; AVX512F-NEXT:    vzeroupper
139; AVX512F-NEXT:    retq
140;
141; AVX512VL-LABEL: signbit_sel_v2f64:
142; AVX512VL:       # %bb.0:
143; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
144; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
145; AVX512VL-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
146; AVX512VL-NEXT:    retq
147  %tr = icmp slt <2 x i64> %mask, zeroinitializer
148  %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
149  ret <2 x double> %z
150}
151
152; Test 256-bit vectors to see differences between AVX1 and AVX2.
153
154define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) {
155; AVX1-LABEL: signbit_sel_v32i8:
156; AVX1:       # %bb.0:
157; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
158; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
159; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
160; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm4, %xmm2
161; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
162; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
163; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
164; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
165; AVX1-NEXT:    retq
166;
167; AVX2-LABEL: signbit_sel_v32i8:
168; AVX2:       # %bb.0:
169; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
170; AVX2-NEXT:    retq
171;
172; AVX512-LABEL: signbit_sel_v32i8:
173; AVX512:       # %bb.0:
174; AVX512-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
175; AVX512-NEXT:    retq
176  %tr = icmp slt <32 x i8> %mask, zeroinitializer
177  %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
178  ret <32 x i8> %z
179}
180
181; Sorry 16-bit, you'll never be important enough to support?
182
183define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) {
184; AVX1-LABEL: signbit_sel_v16i16:
185; AVX1:       # %bb.0:
186; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
187; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
188; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm4, %xmm3
189; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm4, %xmm2
190; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
191; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
192; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
193; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
194; AVX1-NEXT:    retq
195;
196; AVX2-LABEL: signbit_sel_v16i16:
197; AVX2:       # %bb.0:
198; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
199; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
200; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
201; AVX2-NEXT:    retq
202;
203; AVX512-LABEL: signbit_sel_v16i16:
204; AVX512:       # %bb.0:
205; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
206; AVX512-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
207; AVX512-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
208; AVX512-NEXT:    retq
209  %tr = icmp slt <16 x i16> %mask, zeroinitializer
210  %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
211  ret <16 x i16> %z
212}
213
214define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) {
215; AVX12-LABEL: signbit_sel_v8i32:
216; AVX12:       # %bb.0:
217; AVX12-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
218; AVX12-NEXT:    retq
219;
220; AVX512F-LABEL: signbit_sel_v8i32:
221; AVX512F:       # %bb.0:
222; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
223; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
224; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
225; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
226; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
227; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
228; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
229; AVX512F-NEXT:    retq
230;
231; AVX512VL-LABEL: signbit_sel_v8i32:
232; AVX512VL:       # %bb.0:
233; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
234; AVX512VL-NEXT:    vpcmpgtd %ymm2, %ymm3, %k1
235; AVX512VL-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
236; AVX512VL-NEXT:    retq
237  %tr = icmp slt <8 x i32> %mask, zeroinitializer
238  %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
239  ret <8 x i32> %z
240}
241
242define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) {
243; AVX12-LABEL: signbit_sel_v4i64:
244; AVX12:       # %bb.0:
245; AVX12-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
246; AVX12-NEXT:    retq
247;
248; AVX512F-LABEL: signbit_sel_v4i64:
249; AVX512F:       # %bb.0:
250; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
251; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
252; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
253; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
254; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
255; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
256; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
257; AVX512F-NEXT:    retq
258;
259; AVX512VL-LABEL: signbit_sel_v4i64:
260; AVX512VL:       # %bb.0:
261; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
262; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
263; AVX512VL-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
264; AVX512VL-NEXT:    retq
265  %tr = icmp slt <4 x i64> %mask, zeroinitializer
266  %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
267  ret <4 x i64> %z
268}
269
270define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) {
271; AVX12-LABEL: signbit_sel_v4f64:
272; AVX12:       # %bb.0:
273; AVX12-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
274; AVX12-NEXT:    retq
275;
276; AVX512F-LABEL: signbit_sel_v4f64:
277; AVX512F:       # %bb.0:
278; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
279; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
280; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
281; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
282; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
283; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
284; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
285; AVX512F-NEXT:    retq
286;
287; AVX512VL-LABEL: signbit_sel_v4f64:
288; AVX512VL:       # %bb.0:
289; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
290; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
291; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
292; AVX512VL-NEXT:    retq
293  %tr = icmp slt <4 x i64> %mask, zeroinitializer
294  %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
295  ret <4 x double> %z
296}
297
298; Try a condition with a different type than the select operands.
299
300define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) {
301; AVX1-LABEL: signbit_sel_v4f64_small_mask:
302; AVX1:       # %bb.0:
303; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm3
304; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
305; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
306; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
307; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
308; AVX1-NEXT:    retq
309;
310; AVX2-LABEL: signbit_sel_v4f64_small_mask:
311; AVX2:       # %bb.0:
312; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
313; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
314; AVX2-NEXT:    retq
315;
316; AVX512F-LABEL: signbit_sel_v4f64_small_mask:
317; AVX512F:       # %bb.0:
318; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
319; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
320; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
321; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
322; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
323; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
324; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
325; AVX512F-NEXT:    retq
326;
327; AVX512VL-LABEL: signbit_sel_v4f64_small_mask:
328; AVX512VL:       # %bb.0:
329; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
330; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
331; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
332; AVX512VL-NEXT:    retq
333  %tr = icmp slt <4 x i32> %mask, zeroinitializer
334  %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
335  ret <4 x double> %z
336}
337
338; Try a 512-bit vector to make sure AVX-512 is handled as expected.
339
340define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) {
341; AVX12-LABEL: signbit_sel_v8f64:
342; AVX12:       # %bb.0:
343; AVX12-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
344; AVX12-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
345; AVX12-NEXT:    retq
346;
347; AVX512-LABEL: signbit_sel_v8f64:
348; AVX512:       # %bb.0:
349; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
350; AVX512-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
351; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
352; AVX512-NEXT:    retq
353  %tr = icmp slt <8 x i64> %mask, zeroinitializer
354  %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
355  ret <8 x double> %z
356}
357
358; If we have a floating-point compare:
359; (1) Don't die.
360; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.
361
362define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
363; AVX12-LABEL: signbit_sel_v4f32_fcmp:
364; AVX12:       # %bb.0:
365; AVX12-NEXT:    vxorps %xmm2, %xmm2, %xmm2
366; AVX12-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
367; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
368; AVX12-NEXT:    retq
369;
370; AVX512F-LABEL: signbit_sel_v4f32_fcmp:
371; AVX512F:       # %bb.0:
372; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
373; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
374; AVX512F-NEXT:    vxorps %xmm2, %xmm2, %xmm2
375; AVX512F-NEXT:    vcmpltps %zmm2, %zmm0, %k1
376; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
377; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
378; AVX512F-NEXT:    vzeroupper
379; AVX512F-NEXT:    retq
380;
381; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
382; AVX512VL:       # %bb.0:
383; AVX512VL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
384; AVX512VL-NEXT:    vcmpltps %xmm2, %xmm0, %k1
385; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
386; AVX512VL-NEXT:    retq
387  %cmp = fcmp olt <4 x float> %x, zeroinitializer
388  %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
389  ret <4 x float> %sel
390}
391
392attributes #0 = { "no-nans-fp-math"="true" }
393