1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=X64 4 5define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind { 6; X86-LABEL: select00: 7; X86: # %bb.0: 8; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp) 9; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 10; X86-NEXT: je .LBB0_2 11; X86-NEXT: # %bb.1: 12; X86-NEXT: vmovdqa64 %zmm0, %zmm1 13; X86-NEXT: .LBB0_2: 14; X86-NEXT: vpxorq %zmm1, %zmm0, %zmm0 15; X86-NEXT: retl 16; 17; X64-LABEL: select00: 18; X64: # %bb.0: 19; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 20; X64-NEXT: cmpl $255, %edi 21; X64-NEXT: je .LBB0_2 22; X64-NEXT: # %bb.1: 23; X64-NEXT: vmovdqa64 %zmm0, %zmm1 24; X64-NEXT: .LBB0_2: 25; X64-NEXT: vpxorq %zmm1, %zmm0, %zmm0 26; X64-NEXT: retq 27 %cmpres = icmp eq i32 %a, 255 28 %selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b 29 %res = xor <16 x i32> %b, %selres 30 ret <16 x i32> %res 31} 32 33define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind { 34; X86-LABEL: select01: 35; X86: # %bb.0: 36; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp) 37; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 38; X86-NEXT: je .LBB1_2 39; X86-NEXT: # %bb.1: 40; X86-NEXT: vmovdqa64 %zmm0, %zmm1 41; X86-NEXT: .LBB1_2: 42; X86-NEXT: vpxorq %zmm1, %zmm0, %zmm0 43; X86-NEXT: retl 44; 45; X64-LABEL: select01: 46; X64: # %bb.0: 47; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 48; X64-NEXT: cmpl $255, %edi 49; X64-NEXT: je .LBB1_2 50; X64-NEXT: # %bb.1: 51; X64-NEXT: vmovdqa64 %zmm0, %zmm1 52; X64-NEXT: .LBB1_2: 53; X64-NEXT: vpxorq %zmm1, %zmm0, %zmm0 54; X64-NEXT: retq 55 %cmpres = icmp eq i32 %a, 255 56 %selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b 57 %res = xor <8 x i64> %b, %selres 58 ret <8 x i64> %res 59} 60 61define float @select02(float %a, float %b, float %c, float %eps) { 62; X86-LABEL: select02: 63; X86: # %bb.0: 64; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 65; X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0 66; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 67; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 68; X86-NEXT: cmovael %eax, %ecx 69; X86-NEXT: flds (%ecx) 70; X86-NEXT: retl 71; 72; X64-LABEL: select02: 73; X64: # %bb.0: 74; X64-NEXT: vcmpless %xmm0, %xmm3, %k1 75; X64-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1} 76; X64-NEXT: vmovaps %xmm1, %xmm0 77; X64-NEXT: retq 78 %cmp = fcmp oge float %a, %eps 79 %cond = select i1 %cmp, float %c, float %b 80 ret float %cond 81} 82 83define double @select03(double %a, double %b, double %c, double %eps) { 84; X86-LABEL: select03: 85; X86: # %bb.0: 86; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 87; X86-NEXT: vucomisd {{[0-9]+}}(%esp), %xmm0 88; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 89; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 90; X86-NEXT: cmovael %eax, %ecx 91; X86-NEXT: fldl (%ecx) 92; X86-NEXT: retl 93; 94; X64-LABEL: select03: 95; X64: # %bb.0: 96; X64-NEXT: vcmplesd %xmm0, %xmm3, %k1 97; X64-NEXT: vmovsd %xmm2, %xmm0, %xmm1 {%k1} 98; X64-NEXT: vmovapd %xmm1, %xmm0 99; X64-NEXT: retq 100 %cmp = fcmp oge double %a, %eps 101 %cond = select i1 %cmp, double %c, double %b 102 ret double %cond 103} 104 105define <16 x double> @select04(<16 x double> %a, <16 x double> %b) { 106; X86-LABEL: select04: 107; X86: # %bb.0: 108; X86-NEXT: pushl %ebp 109; X86-NEXT: .cfi_def_cfa_offset 8 110; X86-NEXT: .cfi_offset %ebp, -8 111; X86-NEXT: movl %esp, %ebp 112; X86-NEXT: .cfi_def_cfa_register %ebp 113; X86-NEXT: andl $-64, %esp 114; X86-NEXT: subl $64, %esp 115; X86-NEXT: vmovaps 8(%ebp), %zmm1 116; X86-NEXT: movl %ebp, %esp 117; X86-NEXT: popl %ebp 118; X86-NEXT: .cfi_def_cfa %esp, 4 119; X86-NEXT: retl 120; 121; X64-LABEL: select04: 122; X64: # %bb.0: 123; X64-NEXT: vmovaps %zmm3, %zmm1 124; X64-NEXT: retq 125 %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b 126 ret <16 x double> %sel 127} 128 129define i8 @select05(i8 %a.0, i8 %m) { 130; X86-LABEL: select05: 131; X86: # %bb.0: 132; X86-NEXT: movb {{[0-9]+}}(%esp), %al 133; X86-NEXT: orb {{[0-9]+}}(%esp), %al 134; X86-NEXT: retl 135; 136; X64-LABEL: select05: 137; X64: # %bb.0: 138; X64-NEXT: orl %esi, %edi 139; X64-NEXT: movl %edi, %eax 140; X64-NEXT: retq 141 %mask = bitcast i8 %m to <8 x i1> 142 %a = bitcast i8 %a.0 to <8 x i1> 143 %r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a 144 %res = bitcast <8 x i1> %r to i8 145 ret i8 %res; 146} 147 148define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) { 149; X86-LABEL: select05_mem: 150; X86: # %bb.0: 151; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 152; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 153; X86-NEXT: movzbl (%ecx), %ecx 154; X86-NEXT: kmovw %ecx, %k0 155; X86-NEXT: movzbl (%eax), %eax 156; X86-NEXT: kmovw %eax, %k1 157; X86-NEXT: korw %k1, %k0, %k0 158; X86-NEXT: kmovw %k0, %eax 159; X86-NEXT: # kill: def $al killed $al killed $eax 160; X86-NEXT: retl 161; 162; X64-LABEL: select05_mem: 163; X64: # %bb.0: 164; X64-NEXT: movzbl (%rsi), %eax 165; X64-NEXT: kmovw %eax, %k0 166; X64-NEXT: movzbl (%rdi), %eax 167; X64-NEXT: kmovw %eax, %k1 168; X64-NEXT: korw %k1, %k0, %k0 169; X64-NEXT: kmovw %k0, %eax 170; X64-NEXT: # kill: def $al killed $al killed $eax 171; X64-NEXT: retq 172 %mask = load <8 x i1> , <8 x i1>* %m 173 %a = load <8 x i1> , <8 x i1>* %a.0 174 %r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a 175 %res = bitcast <8 x i1> %r to i8 176 ret i8 %res; 177} 178 179define i8 @select06(i8 %a.0, i8 %m) { 180; X86-LABEL: select06: 181; X86: # %bb.0: 182; X86-NEXT: movb {{[0-9]+}}(%esp), %al 183; X86-NEXT: andb {{[0-9]+}}(%esp), %al 184; X86-NEXT: retl 185; 186; X64-LABEL: select06: 187; X64: # %bb.0: 188; X64-NEXT: andl %esi, %edi 189; X64-NEXT: movl %edi, %eax 190; X64-NEXT: retq 191 %mask = bitcast i8 %m to <8 x i1> 192 %a = bitcast i8 %a.0 to <8 x i1> 193 %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer 194 %res = bitcast <8 x i1> %r to i8 195 ret i8 %res; 196} 197 198define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) { 199; X86-LABEL: select06_mem: 200; X86: # %bb.0: 201; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 202; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 203; X86-NEXT: movzbl (%ecx), %ecx 204; X86-NEXT: kmovw %ecx, %k0 205; X86-NEXT: movzbl (%eax), %eax 206; X86-NEXT: kmovw %eax, %k1 207; X86-NEXT: kandw %k1, %k0, %k0 208; X86-NEXT: kmovw %k0, %eax 209; X86-NEXT: # kill: def $al killed $al killed $eax 210; X86-NEXT: retl 211; 212; X64-LABEL: select06_mem: 213; X64: # %bb.0: 214; X64-NEXT: movzbl (%rsi), %eax 215; X64-NEXT: kmovw %eax, %k0 216; X64-NEXT: movzbl (%rdi), %eax 217; X64-NEXT: kmovw %eax, %k1 218; X64-NEXT: kandw %k1, %k0, %k0 219; X64-NEXT: kmovw %k0, %eax 220; X64-NEXT: # kill: def $al killed $al killed $eax 221; X64-NEXT: retq 222 %mask = load <8 x i1> , <8 x i1>* %m 223 %a = load <8 x i1> , <8 x i1>* %a.0 224 %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer 225 %res = bitcast <8 x i1> %r to i8 226 ret i8 %res; 227} 228define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) { 229; X86-LABEL: select07: 230; X86: # %bb.0: 231; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 232; X86-NEXT: kmovw %eax, %k0 233; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 234; X86-NEXT: kmovw %eax, %k1 235; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 236; X86-NEXT: kmovw %eax, %k2 237; X86-NEXT: kandnw %k2, %k0, %k2 238; X86-NEXT: kandw %k0, %k1, %k0 239; X86-NEXT: korw %k2, %k0, %k0 240; X86-NEXT: kmovw %k0, %eax 241; X86-NEXT: # kill: def $al killed $al killed $eax 242; X86-NEXT: retl 243; 244; X64-LABEL: select07: 245; X64: # %bb.0: 246; X64-NEXT: kmovw %edx, %k0 247; X64-NEXT: kmovw %edi, %k1 248; X64-NEXT: kmovw %esi, %k2 249; X64-NEXT: kandnw %k2, %k0, %k2 250; X64-NEXT: kandw %k0, %k1, %k0 251; X64-NEXT: korw %k2, %k0, %k0 252; X64-NEXT: kmovw %k0, %eax 253; X64-NEXT: # kill: def $al killed $al killed $eax 254; X64-NEXT: retq 255 %mask = bitcast i8 %m to <8 x i1> 256 %a = bitcast i8 %a.0 to <8 x i1> 257 %b = bitcast i8 %b.0 to <8 x i1> 258 %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> %b 259 %res = bitcast <8 x i1> %r to i8 260 ret i8 %res; 261} 262 263define i64 @pr30249() { 264; X86-LABEL: pr30249: 265; X86: # %bb.0: 266; X86-NEXT: movl $2, %eax 267; X86-NEXT: xorl %edx, %edx 268; X86-NEXT: retl 269; 270; X64-LABEL: pr30249: 271; X64: # %bb.0: 272; X64-NEXT: movl $2, %eax 273; X64-NEXT: retq 274 %v = select i1 undef , i64 1, i64 2 275 ret i64 %v 276} 277 278define double @pr30561_f64(double %b, double %a, i1 %c) { 279; X86-LABEL: pr30561_f64: 280; X86: # %bb.0: 281; X86-NEXT: testb $1, {{[0-9]+}}(%esp) 282; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 283; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 284; X86-NEXT: cmovnel %eax, %ecx 285; X86-NEXT: fldl (%ecx) 286; X86-NEXT: retl 287; 288; X64-LABEL: pr30561_f64: 289; X64: # %bb.0: 290; X64-NEXT: kmovw %edi, %k1 291; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} 292; X64-NEXT: retq 293 %cond = select i1 %c, double %a, double %b 294 ret double %cond 295} 296 297define float @pr30561_f32(float %b, float %a, i1 %c) { 298; X86-LABEL: pr30561_f32: 299; X86: # %bb.0: 300; X86-NEXT: testb $1, {{[0-9]+}}(%esp) 301; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 302; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 303; X86-NEXT: cmovnel %eax, %ecx 304; X86-NEXT: flds (%ecx) 305; X86-NEXT: retl 306; 307; X64-LABEL: pr30561_f32: 308; X64: # %bb.0: 309; X64-NEXT: kmovw %edi, %k1 310; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} 311; X64-NEXT: retq 312 %cond = select i1 %c, float %a, float %b 313 ret float %cond 314} 315 316define <16 x i16> @pr31515(<16 x i1> %a, <16 x i1> %b, <16 x i16> %c) nounwind { 317; X86-LABEL: pr31515: 318; X86: # %bb.0: 319; X86-NEXT: vpmovsxbd %xmm1, %zmm1 320; X86-NEXT: vpslld $31, %zmm1, %zmm1 321; X86-NEXT: vpmovsxbd %xmm0, %zmm0 322; X86-NEXT: vpslld $31, %zmm0, %zmm0 323; X86-NEXT: vptestmd %zmm0, %zmm0, %k1 324; X86-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1} 325; X86-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 326; X86-NEXT: vpmovdw %zmm0, %ymm0 327; X86-NEXT: vpandn %ymm2, %ymm0, %ymm0 328; X86-NEXT: retl 329; 330; X64-LABEL: pr31515: 331; X64: # %bb.0: 332; X64-NEXT: vpmovsxbd %xmm1, %zmm1 333; X64-NEXT: vpslld $31, %zmm1, %zmm1 334; X64-NEXT: vpmovsxbd %xmm0, %zmm0 335; X64-NEXT: vpslld $31, %zmm0, %zmm0 336; X64-NEXT: vptestmd %zmm0, %zmm0, %k1 337; X64-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1} 338; X64-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 339; X64-NEXT: vpmovdw %zmm0, %ymm0 340; X64-NEXT: vpandn %ymm2, %ymm0, %ymm0 341; X64-NEXT: retq 342 %mask = and <16 x i1> %a, %b 343 %res = select <16 x i1> %mask, <16 x i16> zeroinitializer, <16 x i16> %c 344 ret <16 x i16> %res 345} 346 347