1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX512 --check-prefix=AVX512F 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL 6 7; The condition vector for BLENDV* only cares about the sign bit of each element. 8; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op. 9 10; Test 128-bit vectors for all legal element types. 11 12define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) { 13; AVX-LABEL: signbit_sel_v16i8: 14; AVX: # %bb.0: 15; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 16; AVX-NEXT: retq 17 %tr = icmp slt <16 x i8> %mask, zeroinitializer 18 %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y 19 ret <16 x i8> %z 20} 21 22; Sorry 16-bit, you're not important enough to support? 23 24define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) { 25; AVX-LABEL: signbit_sel_v8i16: 26; AVX: # %bb.0: 27; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 28; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 29; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 30; AVX-NEXT: retq 31 %tr = icmp slt <8 x i16> %mask, zeroinitializer 32 %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y 33 ret <8 x i16> %z 34} 35 36define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { 37; AVX12-LABEL: signbit_sel_v4i32: 38; AVX12: # %bb.0: 39; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 40; AVX12-NEXT: retq 41; 42; AVX512F-LABEL: signbit_sel_v4i32: 43; AVX512F: # %bb.0: 44; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 45; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 46; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 47; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 48; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 49; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 50; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 51; AVX512F-NEXT: vzeroupper 52; AVX512F-NEXT: retq 53; 54; AVX512VL-LABEL: signbit_sel_v4i32: 55; AVX512VL: # %bb.0: 56; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 57; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 58; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} 59; AVX512VL-NEXT: retq 60 %tr = icmp slt <4 x i32> %mask, zeroinitializer 61 %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y 62 ret <4 x i32> %z 63} 64 65define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { 66; AVX12-LABEL: signbit_sel_v2i64: 67; AVX12: # %bb.0: 68; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 69; AVX12-NEXT: retq 70; 71; AVX512F-LABEL: signbit_sel_v2i64: 72; AVX512F: # %bb.0: 73; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 74; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 75; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 76; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 77; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 78; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 79; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 80; AVX512F-NEXT: vzeroupper 81; AVX512F-NEXT: retq 82; 83; AVX512VL-LABEL: signbit_sel_v2i64: 84; AVX512VL: # %bb.0: 85; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 86; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 87; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} 88; AVX512VL-NEXT: retq 89 %tr = icmp slt <2 x i64> %mask, zeroinitializer 90 %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y 91 ret <2 x i64> %z 92} 93 94define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { 95; AVX12-LABEL: signbit_sel_v4f32: 96; AVX12: # %bb.0: 97; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 98; AVX12-NEXT: retq 99; 100; AVX512F-LABEL: signbit_sel_v4f32: 101; AVX512F: # %bb.0: 102; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 103; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 104; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 105; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 106; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 107; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 108; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 109; AVX512F-NEXT: vzeroupper 110; AVX512F-NEXT: retq 111; 112; AVX512VL-LABEL: signbit_sel_v4f32: 113; AVX512VL: # %bb.0: 114; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 115; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 116; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 117; AVX512VL-NEXT: retq 118 %tr = icmp slt <4 x i32> %mask, zeroinitializer 119 %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y 120 ret <4 x float> %z 121} 122 123define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { 124; AVX12-LABEL: signbit_sel_v2f64: 125; AVX12: # %bb.0: 126; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 127; AVX12-NEXT: retq 128; 129; AVX512F-LABEL: signbit_sel_v2f64: 130; AVX512F: # %bb.0: 131; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 132; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 133; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 134; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 135; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 136; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 137; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 138; AVX512F-NEXT: vzeroupper 139; AVX512F-NEXT: retq 140; 141; AVX512VL-LABEL: signbit_sel_v2f64: 142; AVX512VL: # %bb.0: 143; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 144; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 145; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 146; AVX512VL-NEXT: retq 147 %tr = icmp slt <2 x i64> %mask, zeroinitializer 148 %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y 149 ret <2 x double> %z 150} 151 152; Test 256-bit vectors to see differences between AVX1 and AVX2. 153 154define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) { 155; AVX1-LABEL: signbit_sel_v32i8: 156; AVX1: # %bb.0: 157; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 158; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 159; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 160; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2 161; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 162; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 163; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 164; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 165; AVX1-NEXT: retq 166; 167; AVX2-LABEL: signbit_sel_v32i8: 168; AVX2: # %bb.0: 169; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 170; AVX2-NEXT: retq 171; 172; AVX512-LABEL: signbit_sel_v32i8: 173; AVX512: # %bb.0: 174; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 175; AVX512-NEXT: retq 176 %tr = icmp slt <32 x i8> %mask, zeroinitializer 177 %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y 178 ret <32 x i8> %z 179} 180 181; Sorry 16-bit, you'll never be important enough to support? 182 183define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) { 184; AVX1-LABEL: signbit_sel_v16i16: 185; AVX1: # %bb.0: 186; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 187; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 188; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3 189; AVX1-NEXT: vpcmpgtw %xmm2, %xmm4, %xmm2 190; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 191; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 192; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 193; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 194; AVX1-NEXT: retq 195; 196; AVX2-LABEL: signbit_sel_v16i16: 197; AVX2: # %bb.0: 198; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 199; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 200; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 201; AVX2-NEXT: retq 202; 203; AVX512-LABEL: signbit_sel_v16i16: 204; AVX512: # %bb.0: 205; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 206; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 207; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 208; AVX512-NEXT: retq 209 %tr = icmp slt <16 x i16> %mask, zeroinitializer 210 %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y 211 ret <16 x i16> %z 212} 213 214define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) { 215; AVX12-LABEL: signbit_sel_v8i32: 216; AVX12: # %bb.0: 217; AVX12-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 218; AVX12-NEXT: retq 219; 220; AVX512F-LABEL: signbit_sel_v8i32: 221; AVX512F: # %bb.0: 222; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 223; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 224; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 225; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 226; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 227; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 228; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 229; AVX512F-NEXT: retq 230; 231; AVX512VL-LABEL: signbit_sel_v8i32: 232; AVX512VL: # %bb.0: 233; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 234; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 235; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 236; AVX512VL-NEXT: retq 237 %tr = icmp slt <8 x i32> %mask, zeroinitializer 238 %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y 239 ret <8 x i32> %z 240} 241 242define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { 243; AVX12-LABEL: signbit_sel_v4i64: 244; AVX12: # %bb.0: 245; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 246; AVX12-NEXT: retq 247; 248; AVX512F-LABEL: signbit_sel_v4i64: 249; AVX512F: # %bb.0: 250; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 251; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 252; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 253; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 254; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 255; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 256; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 257; AVX512F-NEXT: retq 258; 259; AVX512VL-LABEL: signbit_sel_v4i64: 260; AVX512VL: # %bb.0: 261; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 262; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 263; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} 264; AVX512VL-NEXT: retq 265 %tr = icmp slt <4 x i64> %mask, zeroinitializer 266 %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y 267 ret <4 x i64> %z 268} 269 270define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { 271; AVX12-LABEL: signbit_sel_v4f64: 272; AVX12: # %bb.0: 273; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 274; AVX12-NEXT: retq 275; 276; AVX512F-LABEL: signbit_sel_v4f64: 277; AVX512F: # %bb.0: 278; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 279; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 280; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 281; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 282; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 283; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 284; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 285; AVX512F-NEXT: retq 286; 287; AVX512VL-LABEL: signbit_sel_v4f64: 288; AVX512VL: # %bb.0: 289; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 290; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 291; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 292; AVX512VL-NEXT: retq 293 %tr = icmp slt <4 x i64> %mask, zeroinitializer 294 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y 295 ret <4 x double> %z 296} 297 298; Try a condition with a different type than the select operands. 299 300define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) { 301; AVX1-LABEL: signbit_sel_v4f64_small_mask: 302; AVX1: # %bb.0: 303; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 304; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 305; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 306; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 307; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 308; AVX1-NEXT: retq 309; 310; AVX2-LABEL: signbit_sel_v4f64_small_mask: 311; AVX2: # %bb.0: 312; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 313; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 314; AVX2-NEXT: retq 315; 316; AVX512F-LABEL: signbit_sel_v4f64_small_mask: 317; AVX512F: # %bb.0: 318; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 319; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 320; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 321; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 322; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 323; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 324; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 325; AVX512F-NEXT: retq 326; 327; AVX512VL-LABEL: signbit_sel_v4f64_small_mask: 328; AVX512VL: # %bb.0: 329; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 330; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 331; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 332; AVX512VL-NEXT: retq 333 %tr = icmp slt <4 x i32> %mask, zeroinitializer 334 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y 335 ret <4 x double> %z 336} 337 338; Try a 512-bit vector to make sure AVX-512 is handled as expected. 339 340define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) { 341; AVX12-LABEL: signbit_sel_v8f64: 342; AVX12: # %bb.0: 343; AVX12-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 344; AVX12-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 345; AVX12-NEXT: retq 346; 347; AVX512-LABEL: signbit_sel_v8f64: 348; AVX512: # %bb.0: 349; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 350; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 351; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 352; AVX512-NEXT: retq 353 %tr = icmp slt <8 x i64> %mask, zeroinitializer 354 %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y 355 ret <8 x double> %z 356} 357 358; If we have a floating-point compare: 359; (1) Don't die. 360; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded. 361 362define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 { 363; AVX12-LABEL: signbit_sel_v4f32_fcmp: 364; AVX12: # %bb.0: 365; AVX12-NEXT: vxorps %xmm2, %xmm2, %xmm2 366; AVX12-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 367; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 368; AVX12-NEXT: retq 369; 370; AVX512F-LABEL: signbit_sel_v4f32_fcmp: 371; AVX512F: # %bb.0: 372; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 373; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 374; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 375; AVX512F-NEXT: vcmpltps %zmm2, %zmm0, %k1 376; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 377; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 378; AVX512F-NEXT: vzeroupper 379; AVX512F-NEXT: retq 380; 381; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: 382; AVX512VL: # %bb.0: 383; AVX512VL-NEXT: vxorps %xmm2, %xmm2, %xmm2 384; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1 385; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 386; AVX512VL-NEXT: retq 387 %cmp = fcmp olt <4 x float> %x, zeroinitializer 388 %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y 389 ret <4 x float> %sel 390} 391 392attributes #0 = { "no-nans-fp-math"="true" } 393