1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 3; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 4; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW 5; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ 6; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86 7 8 9define i16 @mask16(i16 %x) { 10; CHECK-LABEL: mask16: 11; CHECK: ## %bb.0: 12; CHECK-NEXT: notl %edi 13; CHECK-NEXT: movl %edi, %eax 14; CHECK-NEXT: retq 15; 16; X86-LABEL: mask16: 17; X86: ## %bb.0: 18; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 19; X86-NEXT: notl %eax 20; X86-NEXT: ## kill: def $ax killed $ax killed $eax 21; X86-NEXT: retl 22 %m0 = bitcast i16 %x to <16 x i1> 23 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 24 %ret = bitcast <16 x i1> %m1 to i16 25 ret i16 %ret 26} 27 28define i32 @mask16_zext(i16 %x) { 29; CHECK-LABEL: mask16_zext: 30; CHECK: ## %bb.0: 31; CHECK-NEXT: notl %edi 32; CHECK-NEXT: movzwl %di, %eax 33; CHECK-NEXT: retq 34; 35; X86-LABEL: mask16_zext: 36; X86: ## %bb.0: 37; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 38; X86-NEXT: xorl $65535, %eax ## imm = 0xFFFF 39; X86-NEXT: retl 40 %m0 = bitcast i16 %x to <16 x i1> 41 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 42 %m2 = bitcast <16 x i1> %m1 to i16 43 %ret = zext i16 %m2 to i32 44 ret i32 %ret 45} 46 47define i8 @mask8(i8 %x) { 48; CHECK-LABEL: mask8: 49; CHECK: ## %bb.0: 50; CHECK-NEXT: notb %dil 51; CHECK-NEXT: movl %edi, %eax 52; CHECK-NEXT: retq 53; 54; X86-LABEL: mask8: 55; X86: ## %bb.0: 56; X86-NEXT: movb {{[0-9]+}}(%esp), %al 57; X86-NEXT: notb %al 58; X86-NEXT: retl 59 %m0 = bitcast i8 %x to <8 x i1> 60 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 61 %ret = bitcast <8 x i1> %m1 to i8 62 ret i8 %ret 63} 64 65define i32 @mask8_zext(i8 %x) { 66; CHECK-LABEL: mask8_zext: 67; CHECK: ## %bb.0: 68; CHECK-NEXT: notb %dil 69; CHECK-NEXT: movzbl %dil, %eax 70; CHECK-NEXT: retq 71; 72; X86-LABEL: mask8_zext: 73; X86: ## %bb.0: 74; X86-NEXT: movb {{[0-9]+}}(%esp), %al 75; X86-NEXT: notb %al 76; X86-NEXT: movzbl %al, %eax 77; X86-NEXT: retl 78 %m0 = bitcast i8 %x to <8 x i1> 79 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 80 %m2 = bitcast <8 x i1> %m1 to i8 81 %ret = zext i8 %m2 to i32 82 ret i32 %ret 83} 84 85define void @mask16_mem(i16* %ptr) { 86; CHECK-LABEL: mask16_mem: 87; CHECK: ## %bb.0: 88; CHECK-NEXT: kmovw (%rdi), %k0 89; CHECK-NEXT: knotw %k0, %k0 90; CHECK-NEXT: kmovw %k0, (%rdi) 91; CHECK-NEXT: retq 92; 93; X86-LABEL: mask16_mem: 94; X86: ## %bb.0: 95; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 96; X86-NEXT: kmovw (%eax), %k0 97; X86-NEXT: knotw %k0, %k0 98; X86-NEXT: kmovw %k0, (%eax) 99; X86-NEXT: retl 100 %x = load i16, i16* %ptr, align 4 101 %m0 = bitcast i16 %x to <16 x i1> 102 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 103 %ret = bitcast <16 x i1> %m1 to i16 104 store i16 %ret, i16* %ptr, align 4 105 ret void 106} 107 108define void @mask8_mem(i8* %ptr) { 109; KNL-LABEL: mask8_mem: 110; KNL: ## %bb.0: 111; KNL-NEXT: notb (%rdi) 112; KNL-NEXT: retq 113; 114; SKX-LABEL: mask8_mem: 115; SKX: ## %bb.0: 116; SKX-NEXT: kmovb (%rdi), %k0 117; SKX-NEXT: knotb %k0, %k0 118; SKX-NEXT: kmovb %k0, (%rdi) 119; SKX-NEXT: retq 120; 121; AVX512BW-LABEL: mask8_mem: 122; AVX512BW: ## %bb.0: 123; AVX512BW-NEXT: notb (%rdi) 124; AVX512BW-NEXT: retq 125; 126; AVX512DQ-LABEL: mask8_mem: 127; AVX512DQ: ## %bb.0: 128; AVX512DQ-NEXT: kmovb (%rdi), %k0 129; AVX512DQ-NEXT: knotb %k0, %k0 130; AVX512DQ-NEXT: kmovb %k0, (%rdi) 131; AVX512DQ-NEXT: retq 132; 133; X86-LABEL: mask8_mem: 134; X86: ## %bb.0: 135; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 136; X86-NEXT: kmovb (%eax), %k0 137; X86-NEXT: knotb %k0, %k0 138; X86-NEXT: kmovb %k0, (%eax) 139; X86-NEXT: retl 140 %x = load i8, i8* %ptr, align 4 141 %m0 = bitcast i8 %x to <8 x i1> 142 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 143 %ret = bitcast <8 x i1> %m1 to i8 144 store i8 %ret, i8* %ptr, align 4 145 ret void 146} 147 148define i16 @mand16(i16 %x, i16 %y) { 149; CHECK-LABEL: mand16: 150; CHECK: ## %bb.0: 151; CHECK-NEXT: movl %edi, %eax 152; CHECK-NEXT: xorl %esi, %eax 153; CHECK-NEXT: andl %esi, %edi 154; CHECK-NEXT: orl %eax, %edi 155; CHECK-NEXT: movl %edi, %eax 156; CHECK-NEXT: retq 157; 158; X86-LABEL: mand16: 159; X86: ## %bb.0: 160; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 161; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 162; X86-NEXT: movl %eax, %edx 163; X86-NEXT: andl %ecx, %edx 164; X86-NEXT: xorl %ecx, %eax 165; X86-NEXT: orl %edx, %eax 166; X86-NEXT: ## kill: def $ax killed $ax killed $eax 167; X86-NEXT: retl 168 %ma = bitcast i16 %x to <16 x i1> 169 %mb = bitcast i16 %y to <16 x i1> 170 %mc = and <16 x i1> %ma, %mb 171 %md = xor <16 x i1> %ma, %mb 172 %me = or <16 x i1> %mc, %md 173 %ret = bitcast <16 x i1> %me to i16 174 ret i16 %ret 175} 176 177define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { 178; KNL-LABEL: mand16_mem: 179; KNL: ## %bb.0: 180; KNL-NEXT: kmovw (%rdi), %k0 181; KNL-NEXT: kmovw (%rsi), %k1 182; KNL-NEXT: kandw %k1, %k0, %k2 183; KNL-NEXT: kxorw %k1, %k0, %k0 184; KNL-NEXT: korw %k0, %k2, %k0 185; KNL-NEXT: kmovw %k0, %eax 186; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 187; KNL-NEXT: retq 188; 189; SKX-LABEL: mand16_mem: 190; SKX: ## %bb.0: 191; SKX-NEXT: kmovw (%rdi), %k0 192; SKX-NEXT: kmovw (%rsi), %k1 193; SKX-NEXT: kandw %k1, %k0, %k2 194; SKX-NEXT: kxorw %k1, %k0, %k0 195; SKX-NEXT: korw %k0, %k2, %k0 196; SKX-NEXT: kmovd %k0, %eax 197; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 198; SKX-NEXT: retq 199; 200; AVX512BW-LABEL: mand16_mem: 201; AVX512BW: ## %bb.0: 202; AVX512BW-NEXT: kmovw (%rdi), %k0 203; AVX512BW-NEXT: kmovw (%rsi), %k1 204; AVX512BW-NEXT: kandw %k1, %k0, %k2 205; AVX512BW-NEXT: kxorw %k1, %k0, %k0 206; AVX512BW-NEXT: korw %k0, %k2, %k0 207; AVX512BW-NEXT: kmovd %k0, %eax 208; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax 209; AVX512BW-NEXT: retq 210; 211; AVX512DQ-LABEL: mand16_mem: 212; AVX512DQ: ## %bb.0: 213; AVX512DQ-NEXT: kmovw (%rdi), %k0 214; AVX512DQ-NEXT: kmovw (%rsi), %k1 215; AVX512DQ-NEXT: kandw %k1, %k0, %k2 216; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 217; AVX512DQ-NEXT: korw %k0, %k2, %k0 218; AVX512DQ-NEXT: kmovw %k0, %eax 219; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax 220; AVX512DQ-NEXT: retq 221; 222; X86-LABEL: mand16_mem: 223; X86: ## %bb.0: 224; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 225; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 226; X86-NEXT: kmovw (%ecx), %k0 227; X86-NEXT: kmovw (%eax), %k1 228; X86-NEXT: kandw %k1, %k0, %k2 229; X86-NEXT: kxorw %k1, %k0, %k0 230; X86-NEXT: korw %k0, %k2, %k0 231; X86-NEXT: kmovd %k0, %eax 232; X86-NEXT: ## kill: def $ax killed $ax killed $eax 233; X86-NEXT: retl 234 %ma = load <16 x i1>, <16 x i1>* %x 235 %mb = load <16 x i1>, <16 x i1>* %y 236 %mc = and <16 x i1> %ma, %mb 237 %md = xor <16 x i1> %ma, %mb 238 %me = or <16 x i1> %mc, %md 239 %ret = bitcast <16 x i1> %me to i16 240 ret i16 %ret 241} 242 243define i8 @shuf_test1(i16 %v) nounwind { 244; KNL-LABEL: shuf_test1: 245; KNL: ## %bb.0: 246; KNL-NEXT: kmovw %edi, %k0 247; KNL-NEXT: kshiftrw $8, %k0, %k0 248; KNL-NEXT: kmovw %k0, %eax 249; KNL-NEXT: ## kill: def $al killed $al killed $eax 250; KNL-NEXT: retq 251; 252; SKX-LABEL: shuf_test1: 253; SKX: ## %bb.0: 254; SKX-NEXT: kmovd %edi, %k0 255; SKX-NEXT: kshiftrw $8, %k0, %k0 256; SKX-NEXT: kmovd %k0, %eax 257; SKX-NEXT: ## kill: def $al killed $al killed $eax 258; SKX-NEXT: retq 259; 260; AVX512BW-LABEL: shuf_test1: 261; AVX512BW: ## %bb.0: 262; AVX512BW-NEXT: kmovd %edi, %k0 263; AVX512BW-NEXT: kshiftrw $8, %k0, %k0 264; AVX512BW-NEXT: kmovd %k0, %eax 265; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax 266; AVX512BW-NEXT: retq 267; 268; AVX512DQ-LABEL: shuf_test1: 269; AVX512DQ: ## %bb.0: 270; AVX512DQ-NEXT: kmovw %edi, %k0 271; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0 272; AVX512DQ-NEXT: kmovw %k0, %eax 273; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax 274; AVX512DQ-NEXT: retq 275; 276; X86-LABEL: shuf_test1: 277; X86: ## %bb.0: 278; X86-NEXT: movb {{[0-9]+}}(%esp), %al 279; X86-NEXT: retl 280 %v1 = bitcast i16 %v to <16 x i1> 281 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 282 %mask1 = bitcast <8 x i1> %mask to i8 283 ret i8 %mask1 284} 285 286define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { 287; KNL-LABEL: zext_test1: 288; KNL: ## %bb.0: 289; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 290; KNL-NEXT: kshiftrw $5, %k0, %k0 291; KNL-NEXT: kmovw %k0, %eax 292; KNL-NEXT: andl $1, %eax 293; KNL-NEXT: vzeroupper 294; KNL-NEXT: retq 295; 296; SKX-LABEL: zext_test1: 297; SKX: ## %bb.0: 298; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 299; SKX-NEXT: kshiftrw $5, %k0, %k0 300; SKX-NEXT: kmovd %k0, %eax 301; SKX-NEXT: andl $1, %eax 302; SKX-NEXT: vzeroupper 303; SKX-NEXT: retq 304; 305; AVX512BW-LABEL: zext_test1: 306; AVX512BW: ## %bb.0: 307; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 308; AVX512BW-NEXT: kshiftrw $5, %k0, %k0 309; AVX512BW-NEXT: kmovd %k0, %eax 310; AVX512BW-NEXT: andl $1, %eax 311; AVX512BW-NEXT: vzeroupper 312; AVX512BW-NEXT: retq 313; 314; AVX512DQ-LABEL: zext_test1: 315; AVX512DQ: ## %bb.0: 316; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 317; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 318; AVX512DQ-NEXT: kmovw %k0, %eax 319; AVX512DQ-NEXT: andl $1, %eax 320; AVX512DQ-NEXT: vzeroupper 321; AVX512DQ-NEXT: retq 322; 323; X86-LABEL: zext_test1: 324; X86: ## %bb.0: 325; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 326; X86-NEXT: kshiftrw $5, %k0, %k0 327; X86-NEXT: kmovd %k0, %eax 328; X86-NEXT: andl $1, %eax 329; X86-NEXT: vzeroupper 330; X86-NEXT: retl 331 %cmp_res = icmp ugt <16 x i32> %a, %b 332 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 333 %res = zext i1 %cmp_res.i1 to i32 334 ret i32 %res 335} 336 337define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { 338; KNL-LABEL: zext_test2: 339; KNL: ## %bb.0: 340; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 341; KNL-NEXT: kshiftrw $5, %k0, %k0 342; KNL-NEXT: kmovw %k0, %eax 343; KNL-NEXT: andl $1, %eax 344; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 345; KNL-NEXT: vzeroupper 346; KNL-NEXT: retq 347; 348; SKX-LABEL: zext_test2: 349; SKX: ## %bb.0: 350; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 351; SKX-NEXT: kshiftrw $5, %k0, %k0 352; SKX-NEXT: kmovd %k0, %eax 353; SKX-NEXT: andl $1, %eax 354; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 355; SKX-NEXT: vzeroupper 356; SKX-NEXT: retq 357; 358; AVX512BW-LABEL: zext_test2: 359; AVX512BW: ## %bb.0: 360; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 361; AVX512BW-NEXT: kshiftrw $5, %k0, %k0 362; AVX512BW-NEXT: kmovd %k0, %eax 363; AVX512BW-NEXT: andl $1, %eax 364; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax 365; AVX512BW-NEXT: vzeroupper 366; AVX512BW-NEXT: retq 367; 368; AVX512DQ-LABEL: zext_test2: 369; AVX512DQ: ## %bb.0: 370; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 371; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 372; AVX512DQ-NEXT: kmovw %k0, %eax 373; AVX512DQ-NEXT: andl $1, %eax 374; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax 375; AVX512DQ-NEXT: vzeroupper 376; AVX512DQ-NEXT: retq 377; 378; X86-LABEL: zext_test2: 379; X86: ## %bb.0: 380; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 381; X86-NEXT: kshiftrw $5, %k0, %k0 382; X86-NEXT: kmovd %k0, %eax 383; X86-NEXT: andl $1, %eax 384; X86-NEXT: ## kill: def $ax killed $ax killed $eax 385; X86-NEXT: vzeroupper 386; X86-NEXT: retl 387 %cmp_res = icmp ugt <16 x i32> %a, %b 388 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 389 %res = zext i1 %cmp_res.i1 to i16 390 ret i16 %res 391} 392 393define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { 394; KNL-LABEL: zext_test3: 395; KNL: ## %bb.0: 396; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 397; KNL-NEXT: kshiftrw $5, %k0, %k0 398; KNL-NEXT: kmovw %k0, %eax 399; KNL-NEXT: andb $1, %al 400; KNL-NEXT: ## kill: def $al killed $al killed $eax 401; KNL-NEXT: vzeroupper 402; KNL-NEXT: retq 403; 404; SKX-LABEL: zext_test3: 405; SKX: ## %bb.0: 406; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 407; SKX-NEXT: kshiftrw $5, %k0, %k0 408; SKX-NEXT: kmovd %k0, %eax 409; SKX-NEXT: andb $1, %al 410; SKX-NEXT: ## kill: def $al killed $al killed $eax 411; SKX-NEXT: vzeroupper 412; SKX-NEXT: retq 413; 414; AVX512BW-LABEL: zext_test3: 415; AVX512BW: ## %bb.0: 416; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 417; AVX512BW-NEXT: kshiftrw $5, %k0, %k0 418; AVX512BW-NEXT: kmovd %k0, %eax 419; AVX512BW-NEXT: andb $1, %al 420; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax 421; AVX512BW-NEXT: vzeroupper 422; AVX512BW-NEXT: retq 423; 424; AVX512DQ-LABEL: zext_test3: 425; AVX512DQ: ## %bb.0: 426; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 427; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 428; AVX512DQ-NEXT: kmovw %k0, %eax 429; AVX512DQ-NEXT: andb $1, %al 430; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax 431; AVX512DQ-NEXT: vzeroupper 432; AVX512DQ-NEXT: retq 433; 434; X86-LABEL: zext_test3: 435; X86: ## %bb.0: 436; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 437; X86-NEXT: kshiftrw $5, %k0, %k0 438; X86-NEXT: kmovd %k0, %eax 439; X86-NEXT: andb $1, %al 440; X86-NEXT: ## kill: def $al killed $al killed $eax 441; X86-NEXT: vzeroupper 442; X86-NEXT: retl 443 %cmp_res = icmp ugt <16 x i32> %a, %b 444 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 445 %res = zext i1 %cmp_res.i1 to i8 446 ret i8 %res 447} 448 449define i8 @conv1(<8 x i1>* %R) { 450; CHECK-LABEL: conv1: 451; CHECK: ## %bb.0: ## %entry 452; CHECK-NEXT: movb $-1, (%rdi) 453; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp) 454; CHECK-NEXT: movb $-2, %al 455; CHECK-NEXT: retq 456; 457; X86-LABEL: conv1: 458; X86: ## %bb.0: ## %entry 459; X86-NEXT: subl $12, %esp 460; X86-NEXT: .cfi_def_cfa_offset 16 461; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 462; X86-NEXT: movb $-1, (%eax) 463; X86-NEXT: movb $-2, (%esp) 464; X86-NEXT: movb $-2, %al 465; X86-NEXT: addl $12, %esp 466; X86-NEXT: retl 467entry: 468 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R 469 470 %maskPtr = alloca <8 x i1> 471 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr 472 %mask = load <8 x i1>, <8 x i1>* %maskPtr 473 %mask_convert = bitcast <8 x i1> %mask to i8 474 ret i8 %mask_convert 475} 476 477define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { 478; KNL-LABEL: test4: 479; KNL: ## %bb.0: 480; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 481; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 482; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 483; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 484; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1 485; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1} 486; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 487; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 488; KNL-NEXT: vzeroupper 489; KNL-NEXT: retq 490; 491; SKX-LABEL: test4: 492; SKX: ## %bb.0: 493; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 494; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} 495; SKX-NEXT: vpmovm2d %k0, %xmm0 496; SKX-NEXT: vzeroupper 497; SKX-NEXT: retq 498; 499; AVX512BW-LABEL: test4: 500; AVX512BW: ## %bb.0: 501; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 502; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 503; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 504; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 505; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1 506; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1} 507; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 508; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 509; AVX512BW-NEXT: vzeroupper 510; AVX512BW-NEXT: retq 511; 512; AVX512DQ-LABEL: test4: 513; AVX512DQ: ## %bb.0: 514; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 515; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 516; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 517; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 518; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k1 519; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1} 520; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 521; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 522; AVX512DQ-NEXT: vzeroupper 523; AVX512DQ-NEXT: retq 524; 525; X86-LABEL: test4: 526; X86: ## %bb.0: 527; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k1 528; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} 529; X86-NEXT: vpmovm2d %k0, %xmm0 530; X86-NEXT: vzeroupper 531; X86-NEXT: retl 532 %x_gt_y = icmp sgt <4 x i64> %x, %y 533 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 534 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 535 %resse = sext <4 x i1>%res to <4 x i32> 536 ret <4 x i32> %resse 537} 538 539define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { 540; KNL-LABEL: test5: 541; KNL: ## %bb.0: 542; KNL-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3 543; KNL-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2 544; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 545; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 546; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1 547; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1} 548; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 549; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 550; KNL-NEXT: vzeroupper 551; KNL-NEXT: retq 552; 553; SKX-LABEL: test5: 554; SKX: ## %bb.0: 555; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 556; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} 557; SKX-NEXT: vpmovm2q %k0, %xmm0 558; SKX-NEXT: retq 559; 560; AVX512BW-LABEL: test5: 561; AVX512BW: ## %bb.0: 562; AVX512BW-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3 563; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2 564; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 565; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 566; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1 567; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1} 568; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 569; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 570; AVX512BW-NEXT: vzeroupper 571; AVX512BW-NEXT: retq 572; 573; AVX512DQ-LABEL: test5: 574; AVX512DQ: ## %bb.0: 575; AVX512DQ-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3 576; AVX512DQ-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2 577; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 578; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 579; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k1 580; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1} 581; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 582; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 583; AVX512DQ-NEXT: vzeroupper 584; AVX512DQ-NEXT: retq 585; 586; X86-LABEL: test5: 587; X86: ## %bb.0: 588; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k1 589; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} 590; X86-NEXT: vpmovm2q %k0, %xmm0 591; X86-NEXT: retl 592 %x_gt_y = icmp slt <2 x i64> %x, %y 593 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 594 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 595 %resse = sext <2 x i1>%res to <2 x i64> 596 ret <2 x i64> %resse 597}define void @test6(<16 x i1> %mask) { 598allocas: 599 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 600 %b = bitcast <16 x i1> %a to i16 601 %c = icmp eq i16 %b, 0 602 br i1 %c, label %true, label %false 603 604true: 605 ret void 606 607false: 608 ret void 609} 610define void @test7(<8 x i1> %mask) { 611; KNL-LABEL: test7: 612; KNL: ## %bb.0: ## %allocas 613; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 614; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 615; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 616; KNL-NEXT: kmovw %k0, %eax 617; KNL-NEXT: orb $85, %al 618; KNL-NEXT: vzeroupper 619; KNL-NEXT: retq 620; 621; SKX-LABEL: test7: 622; SKX: ## %bb.0: ## %allocas 623; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 624; SKX-NEXT: vpmovw2m %xmm0, %k0 625; SKX-NEXT: kmovd %k0, %eax 626; SKX-NEXT: orb $85, %al 627; SKX-NEXT: retq 628; 629; AVX512BW-LABEL: test7: 630; AVX512BW: ## %bb.0: ## %allocas 631; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 632; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 633; AVX512BW-NEXT: kmovd %k0, %eax 634; AVX512BW-NEXT: orb $85, %al 635; AVX512BW-NEXT: vzeroupper 636; AVX512BW-NEXT: retq 637; 638; AVX512DQ-LABEL: test7: 639; AVX512DQ: ## %bb.0: ## %allocas 640; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 641; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 642; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 643; AVX512DQ-NEXT: kmovw %k0, %eax 644; AVX512DQ-NEXT: orb $85, %al 645; AVX512DQ-NEXT: vzeroupper 646; AVX512DQ-NEXT: retq 647; 648; X86-LABEL: test7: 649; X86: ## %bb.0: ## %allocas 650; X86-NEXT: vpsllw $15, %xmm0, %xmm0 651; X86-NEXT: vpmovw2m %xmm0, %k0 652; X86-NEXT: kmovd %k0, %eax 653; X86-NEXT: orb $85, %al 654; X86-NEXT: retl 655allocas: 656 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 657 %b = bitcast <8 x i1> %a to i8 658 %c = icmp eq i8 %b, 0 659 br i1 %c, label %true, label %false 660 661true: 662 ret void 663 664false: 665 ret void 666} 667define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { 668; KNL-LABEL: test8: 669; KNL: ## %bb.0: 670; KNL-NEXT: cmpl %esi, %edi 671; KNL-NEXT: jg LBB17_1 672; KNL-NEXT: ## %bb.2: 673; KNL-NEXT: kxorw %k0, %k0, %k1 674; KNL-NEXT: jmp LBB17_3 675; KNL-NEXT: LBB17_1: 676; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 677; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 678; KNL-NEXT: LBB17_3: 679; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 680; KNL-NEXT: vpmovdb %zmm0, %xmm0 681; KNL-NEXT: vzeroupper 682; KNL-NEXT: retq 683; 684; SKX-LABEL: test8: 685; SKX: ## %bb.0: 686; SKX-NEXT: cmpl %esi, %edi 687; SKX-NEXT: jg LBB17_1 688; SKX-NEXT: ## %bb.2: 689; SKX-NEXT: kxorw %k0, %k0, %k0 690; SKX-NEXT: vpmovm2b %k0, %xmm0 691; SKX-NEXT: vzeroupper 692; SKX-NEXT: retq 693; SKX-NEXT: LBB17_1: 694; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 695; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 696; SKX-NEXT: vpmovm2b %k0, %xmm0 697; SKX-NEXT: vzeroupper 698; SKX-NEXT: retq 699; 700; AVX512BW-LABEL: test8: 701; AVX512BW: ## %bb.0: 702; AVX512BW-NEXT: cmpl %esi, %edi 703; AVX512BW-NEXT: jg LBB17_1 704; AVX512BW-NEXT: ## %bb.2: 705; AVX512BW-NEXT: kxorw %k0, %k0, %k0 706; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 707; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 708; AVX512BW-NEXT: vzeroupper 709; AVX512BW-NEXT: retq 710; AVX512BW-NEXT: LBB17_1: 711; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 712; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 713; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 714; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 715; AVX512BW-NEXT: vzeroupper 716; AVX512BW-NEXT: retq 717; 718; AVX512DQ-LABEL: test8: 719; AVX512DQ: ## %bb.0: 720; AVX512DQ-NEXT: cmpl %esi, %edi 721; AVX512DQ-NEXT: jg LBB17_1 722; AVX512DQ-NEXT: ## %bb.2: 723; AVX512DQ-NEXT: kxorw %k0, %k0, %k0 724; AVX512DQ-NEXT: jmp LBB17_3 725; AVX512DQ-NEXT: LBB17_1: 726; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 727; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 728; AVX512DQ-NEXT: LBB17_3: 729; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 730; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 731; AVX512DQ-NEXT: vzeroupper 732; AVX512DQ-NEXT: retq 733; 734; X86-LABEL: test8: 735; X86: ## %bb.0: 736; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 737; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 738; X86-NEXT: jg LBB17_1 739; X86-NEXT: ## %bb.2: 740; X86-NEXT: kxorw %k0, %k0, %k0 741; X86-NEXT: vpmovm2b %k0, %xmm0 742; X86-NEXT: vzeroupper 743; X86-NEXT: retl 744; X86-NEXT: LBB17_1: 745; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 746; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 747; X86-NEXT: vpmovm2b %k0, %xmm0 748; X86-NEXT: vzeroupper 749; X86-NEXT: retl 750 %cond = icmp sgt i32 %a1, %b1 751 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer 752 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer 753 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 754 %res = sext <16 x i1> %mix to <16 x i8> 755 ret <16 x i8> %res 756} 757define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { 758; KNL-LABEL: test9: 759; KNL: ## %bb.0: 760; KNL-NEXT: cmpl %esi, %edi 761; KNL-NEXT: jg LBB18_1 762; KNL-NEXT: ## %bb.2: 763; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 764; KNL-NEXT: jmp LBB18_3 765; KNL-NEXT: LBB18_1: 766; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 767; KNL-NEXT: LBB18_3: 768; KNL-NEXT: vpslld $31, %zmm0, %zmm0 769; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 770; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 771; KNL-NEXT: vpmovdb %zmm0, %xmm0 772; KNL-NEXT: vzeroupper 773; KNL-NEXT: retq 774; 775; SKX-LABEL: test9: 776; SKX: ## %bb.0: 777; SKX-NEXT: cmpl %esi, %edi 778; SKX-NEXT: jg LBB18_1 779; SKX-NEXT: ## %bb.2: 780; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 781; SKX-NEXT: jmp LBB18_3 782; SKX-NEXT: LBB18_1: 783; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 784; SKX-NEXT: LBB18_3: 785; SKX-NEXT: vpmovb2m %xmm0, %k0 786; SKX-NEXT: vpmovm2b %k0, %xmm0 787; SKX-NEXT: retq 788; 789; AVX512BW-LABEL: test9: 790; AVX512BW: ## %bb.0: 791; AVX512BW-NEXT: cmpl %esi, %edi 792; AVX512BW-NEXT: jg LBB18_1 793; AVX512BW-NEXT: ## %bb.2: 794; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0 795; AVX512BW-NEXT: jmp LBB18_3 796; AVX512BW-NEXT: LBB18_1: 797; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 798; AVX512BW-NEXT: LBB18_3: 799; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 800; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 801; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 802; AVX512BW-NEXT: vzeroupper 803; AVX512BW-NEXT: retq 804; 805; AVX512DQ-LABEL: test9: 806; AVX512DQ: ## %bb.0: 807; AVX512DQ-NEXT: cmpl %esi, %edi 808; AVX512DQ-NEXT: jg LBB18_1 809; AVX512DQ-NEXT: ## %bb.2: 810; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 811; AVX512DQ-NEXT: jmp LBB18_3 812; AVX512DQ-NEXT: LBB18_1: 813; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 814; AVX512DQ-NEXT: LBB18_3: 815; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 816; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 817; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 818; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 819; AVX512DQ-NEXT: vzeroupper 820; AVX512DQ-NEXT: retq 821; 822; X86-LABEL: test9: 823; X86: ## %bb.0: 824; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 825; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 826; X86-NEXT: jg LBB18_1 827; X86-NEXT: ## %bb.2: 828; X86-NEXT: vpsllw $7, %xmm1, %xmm0 829; X86-NEXT: jmp LBB18_3 830; X86-NEXT: LBB18_1: 831; X86-NEXT: vpsllw $7, %xmm0, %xmm0 832; X86-NEXT: LBB18_3: 833; X86-NEXT: vpmovb2m %xmm0, %k0 834; X86-NEXT: vpmovm2b %k0, %xmm0 835; X86-NEXT: retl 836 %mask = icmp sgt i32 %a1, %b1 837 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b 838 ret <16 x i1>%c 839}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { 840 %mask = icmp sgt i32 %a1, %b1 841 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b 842 ret <8 x i1>%c 843} 844 845define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { 846; KNL-LABEL: test11: 847; KNL: ## %bb.0: 848; KNL-NEXT: cmpl %esi, %edi 849; KNL-NEXT: jg LBB20_1 850; KNL-NEXT: ## %bb.2: 851; KNL-NEXT: vpslld $31, %xmm1, %xmm0 852; KNL-NEXT: jmp LBB20_3 853; KNL-NEXT: LBB20_1: 854; KNL-NEXT: vpslld $31, %xmm0, %xmm0 855; KNL-NEXT: LBB20_3: 856; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 857; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 858; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 859; KNL-NEXT: vzeroupper 860; KNL-NEXT: retq 861; 862; SKX-LABEL: test11: 863; SKX: ## %bb.0: 864; SKX-NEXT: cmpl %esi, %edi 865; SKX-NEXT: jg LBB20_1 866; SKX-NEXT: ## %bb.2: 867; SKX-NEXT: vpslld $31, %xmm1, %xmm0 868; SKX-NEXT: jmp LBB20_3 869; SKX-NEXT: LBB20_1: 870; SKX-NEXT: vpslld $31, %xmm0, %xmm0 871; SKX-NEXT: LBB20_3: 872; SKX-NEXT: vpmovd2m %xmm0, %k0 873; SKX-NEXT: vpmovm2d %k0, %xmm0 874; SKX-NEXT: retq 875; 876; AVX512BW-LABEL: test11: 877; AVX512BW: ## %bb.0: 878; AVX512BW-NEXT: cmpl %esi, %edi 879; AVX512BW-NEXT: jg LBB20_1 880; AVX512BW-NEXT: ## %bb.2: 881; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0 882; AVX512BW-NEXT: jmp LBB20_3 883; AVX512BW-NEXT: LBB20_1: 884; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 885; AVX512BW-NEXT: LBB20_3: 886; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1 887; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 888; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 889; AVX512BW-NEXT: vzeroupper 890; AVX512BW-NEXT: retq 891; 892; AVX512DQ-LABEL: test11: 893; AVX512DQ: ## %bb.0: 894; AVX512DQ-NEXT: cmpl %esi, %edi 895; AVX512DQ-NEXT: jg LBB20_1 896; AVX512DQ-NEXT: ## %bb.2: 897; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0 898; AVX512DQ-NEXT: jmp LBB20_3 899; AVX512DQ-NEXT: LBB20_1: 900; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 901; AVX512DQ-NEXT: LBB20_3: 902; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 903; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 904; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 905; AVX512DQ-NEXT: vzeroupper 906; AVX512DQ-NEXT: retq 907; 908; X86-LABEL: test11: 909; X86: ## %bb.0: 910; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 911; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 912; X86-NEXT: jg LBB20_1 913; X86-NEXT: ## %bb.2: 914; X86-NEXT: vpslld $31, %xmm1, %xmm0 915; X86-NEXT: jmp LBB20_3 916; X86-NEXT: LBB20_1: 917; X86-NEXT: vpslld $31, %xmm0, %xmm0 918; X86-NEXT: LBB20_3: 919; X86-NEXT: vpmovd2m %xmm0, %k0 920; X86-NEXT: vpmovm2d %k0, %xmm0 921; X86-NEXT: retl 922 %mask = icmp sgt i32 %a1, %b1 923 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b 924 ret <4 x i1>%c 925} 926 927define i32 @test12(i32 %x, i32 %y) { 928; CHECK-LABEL: test12: 929; CHECK: ## %bb.0: 930; CHECK-NEXT: movl %edi, %eax 931; CHECK-NEXT: retq 932; 933; X86-LABEL: test12: 934; X86: ## %bb.0: 935; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 936; X86-NEXT: retl 937 %a = bitcast i16 21845 to <16 x i1> 938 %b = extractelement <16 x i1> %a, i32 0 939 %c = select i1 %b, i32 %x, i32 %y 940 ret i32 %c 941} 942 943define i32 @test13(i32 %x, i32 %y) { 944; CHECK-LABEL: test13: 945; CHECK: ## %bb.0: 946; CHECK-NEXT: movl %esi, %eax 947; CHECK-NEXT: retq 948; 949; X86-LABEL: test13: 950; X86: ## %bb.0: 951; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 952; X86-NEXT: retl 953 %a = bitcast i16 21845 to <16 x i1> 954 %b = extractelement <16 x i1> %a, i32 3 955 %c = select i1 %b, i32 %x, i32 %y 956 ret i32 %c 957} 958 959; Make sure we don't crash on a large vector. 960define i32 @test13_crash(i32 %x, i32 %y) { 961; CHECK-LABEL: test13_crash: 962; CHECK: ## %bb.0: 963; CHECK-NEXT: movl %edi, %eax 964; CHECK-NEXT: retq 965; 966; X86-LABEL: test13_crash: 967; X86: ## %bb.0: 968; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 969; X86-NEXT: retl 970 %a = bitcast i128 2184568686868686868686868686 to <128 x i1> 971 %b = extractelement <128 x i1> %a, i32 3 972 %c = select i1 %b, i32 %x, i32 %y 973 ret i32 %c 974} 975 976define <4 x i1> @test14() { 977; CHECK-LABEL: test14: 978; CHECK: ## %bb.0: 979; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1] 980; CHECK-NEXT: retq 981; 982; X86-LABEL: test14: 983; X86: ## %bb.0: 984; X86-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1] 985; X86-NEXT: retl 986 %a = bitcast i16 21845 to <16 x i1> 987 %b = extractelement <16 x i1> %a, i32 2 988 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1 989 ret <4 x i1> %c 990} 991 992define <16 x i1> @test15(i32 %x, i32 %y) { 993; KNL-LABEL: test15: 994; KNL: ## %bb.0: 995; KNL-NEXT: cmpl %esi, %edi 996; KNL-NEXT: movl $21845, %eax ## imm = 0x5555 997; KNL-NEXT: movl $1, %ecx 998; KNL-NEXT: cmovgl %eax, %ecx 999; KNL-NEXT: kmovw %ecx, %k1 1000; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1001; KNL-NEXT: vpmovdb %zmm0, %xmm0 1002; KNL-NEXT: vzeroupper 1003; KNL-NEXT: retq 1004; 1005; SKX-LABEL: test15: 1006; SKX: ## %bb.0: 1007; SKX-NEXT: cmpl %esi, %edi 1008; SKX-NEXT: movl $21845, %eax ## imm = 0x5555 1009; SKX-NEXT: movl $1, %ecx 1010; SKX-NEXT: cmovgl %eax, %ecx 1011; SKX-NEXT: kmovd %ecx, %k0 1012; SKX-NEXT: vpmovm2b %k0, %xmm0 1013; SKX-NEXT: retq 1014; 1015; AVX512BW-LABEL: test15: 1016; AVX512BW: ## %bb.0: 1017; AVX512BW-NEXT: cmpl %esi, %edi 1018; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555 1019; AVX512BW-NEXT: movl $1, %ecx 1020; AVX512BW-NEXT: cmovgl %eax, %ecx 1021; AVX512BW-NEXT: kmovd %ecx, %k0 1022; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 1023; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 1024; AVX512BW-NEXT: vzeroupper 1025; AVX512BW-NEXT: retq 1026; 1027; AVX512DQ-LABEL: test15: 1028; AVX512DQ: ## %bb.0: 1029; AVX512DQ-NEXT: cmpl %esi, %edi 1030; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555 1031; AVX512DQ-NEXT: movl $1, %ecx 1032; AVX512DQ-NEXT: cmovgl %eax, %ecx 1033; AVX512DQ-NEXT: kmovw %ecx, %k0 1034; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1035; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1036; AVX512DQ-NEXT: vzeroupper 1037; AVX512DQ-NEXT: retq 1038; 1039; X86-LABEL: test15: 1040; X86: ## %bb.0: 1041; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1042; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 1043; X86-NEXT: movl $21845, %eax ## imm = 0x5555 1044; X86-NEXT: movl $1, %ecx 1045; X86-NEXT: cmovgl %eax, %ecx 1046; X86-NEXT: kmovd %ecx, %k0 1047; X86-NEXT: vpmovm2b %k0, %xmm0 1048; X86-NEXT: retl 1049 %a = bitcast i16 21845 to <16 x i1> 1050 %b = bitcast i16 1 to <16 x i1> 1051 %mask = icmp sgt i32 %x, %y 1052 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b 1053 ret <16 x i1> %c 1054} 1055 1056define <64 x i8> @test16(i64 %x) { 1057; 1058; KNL-LABEL: test16: 1059; KNL: ## %bb.0: 1060; KNL-NEXT: movq %rdi, %rax 1061; KNL-NEXT: movl %edi, %ecx 1062; KNL-NEXT: kmovw %edi, %k0 1063; KNL-NEXT: shrq $32, %rdi 1064; KNL-NEXT: shrq $48, %rax 1065; KNL-NEXT: shrl $16, %ecx 1066; KNL-NEXT: kmovw %ecx, %k1 1067; KNL-NEXT: kmovw %eax, %k2 1068; KNL-NEXT: kmovw %edi, %k3 1069; KNL-NEXT: movb $1, %al 1070; KNL-NEXT: kmovw %eax, %k4 1071; KNL-NEXT: kshiftrw $5, %k0, %k5 1072; KNL-NEXT: kxorw %k4, %k5, %k4 1073; KNL-NEXT: kshiftlw $15, %k4, %k4 1074; KNL-NEXT: kshiftrw $10, %k4, %k4 1075; KNL-NEXT: kxorw %k4, %k0, %k4 1076; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} 1077; KNL-NEXT: vpmovdb %zmm0, %xmm0 1078; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 1079; KNL-NEXT: vpmovdb %zmm1, %xmm1 1080; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 1081; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} 1082; KNL-NEXT: vpmovdb %zmm0, %xmm0 1083; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} 1084; KNL-NEXT: vpmovdb %zmm2, %xmm2 1085; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1086; KNL-NEXT: retq 1087; 1088; SKX-LABEL: test16: 1089; SKX: ## %bb.0: 1090; SKX-NEXT: kmovq %rdi, %k0 1091; SKX-NEXT: movb $1, %al 1092; SKX-NEXT: kmovd %eax, %k1 1093; SKX-NEXT: kshiftrq $5, %k0, %k2 1094; SKX-NEXT: kxorq %k1, %k2, %k1 1095; SKX-NEXT: kshiftlq $63, %k1, %k1 1096; SKX-NEXT: kshiftrq $58, %k1, %k1 1097; SKX-NEXT: kxorq %k1, %k0, %k0 1098; SKX-NEXT: vpmovm2b %k0, %zmm0 1099; SKX-NEXT: retq 1100; 1101; AVX512BW-LABEL: test16: 1102; AVX512BW: ## %bb.0: 1103; AVX512BW-NEXT: kmovq %rdi, %k0 1104; AVX512BW-NEXT: movb $1, %al 1105; AVX512BW-NEXT: kmovd %eax, %k1 1106; AVX512BW-NEXT: kshiftrq $5, %k0, %k2 1107; AVX512BW-NEXT: kxorq %k1, %k2, %k1 1108; AVX512BW-NEXT: kshiftlq $63, %k1, %k1 1109; AVX512BW-NEXT: kshiftrq $58, %k1, %k1 1110; AVX512BW-NEXT: kxorq %k1, %k0, %k0 1111; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 1112; AVX512BW-NEXT: retq 1113; 1114; AVX512DQ-LABEL: test16: 1115; AVX512DQ: ## %bb.0: 1116; AVX512DQ-NEXT: movq %rdi, %rax 1117; AVX512DQ-NEXT: movl %edi, %ecx 1118; AVX512DQ-NEXT: kmovw %edi, %k0 1119; AVX512DQ-NEXT: shrq $32, %rdi 1120; AVX512DQ-NEXT: shrq $48, %rax 1121; AVX512DQ-NEXT: shrl $16, %ecx 1122; AVX512DQ-NEXT: kmovw %ecx, %k1 1123; AVX512DQ-NEXT: kmovw %eax, %k2 1124; AVX512DQ-NEXT: kmovw %edi, %k3 1125; AVX512DQ-NEXT: movb $1, %al 1126; AVX512DQ-NEXT: kmovw %eax, %k4 1127; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5 1128; AVX512DQ-NEXT: kxorw %k4, %k5, %k4 1129; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 1130; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 1131; AVX512DQ-NEXT: kxorw %k4, %k0, %k0 1132; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 1133; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1134; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 1135; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 1136; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 1137; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1138; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1139; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 1140; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 1141; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1142; AVX512DQ-NEXT: retq 1143; 1144; X86-LABEL: test16: 1145; X86: ## %bb.0: 1146; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 1147; X86-NEXT: movb $1, %al 1148; X86-NEXT: kmovd %eax, %k1 1149; X86-NEXT: kshiftrq $5, %k0, %k2 1150; X86-NEXT: kxorq %k1, %k2, %k1 1151; X86-NEXT: kshiftlq $63, %k1, %k1 1152; X86-NEXT: kshiftrq $58, %k1, %k1 1153; X86-NEXT: kxorq %k1, %k0, %k0 1154; X86-NEXT: vpmovm2b %k0, %zmm0 1155; X86-NEXT: retl 1156 %a = bitcast i64 %x to <64 x i1> 1157 %b = insertelement <64 x i1>%a, i1 true, i32 5 1158 %c = sext <64 x i1>%b to <64 x i8> 1159 ret <64 x i8>%c 1160} 1161 1162define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { 1163; 1164; KNL-LABEL: test17: 1165; KNL: ## %bb.0: 1166; KNL-NEXT: movq %rdi, %rax 1167; KNL-NEXT: movl %edi, %ecx 1168; KNL-NEXT: kmovw %edi, %k0 1169; KNL-NEXT: shrq $32, %rdi 1170; KNL-NEXT: shrq $48, %rax 1171; KNL-NEXT: shrl $16, %ecx 1172; KNL-NEXT: kmovw %ecx, %k1 1173; KNL-NEXT: kmovw %eax, %k2 1174; KNL-NEXT: kmovw %edi, %k3 1175; KNL-NEXT: cmpl %edx, %esi 1176; KNL-NEXT: setg %al 1177; KNL-NEXT: kshiftrw $5, %k0, %k4 1178; KNL-NEXT: kmovw %eax, %k5 1179; KNL-NEXT: kxorw %k5, %k4, %k4 1180; KNL-NEXT: kshiftlw $15, %k4, %k4 1181; KNL-NEXT: kshiftrw $10, %k4, %k4 1182; KNL-NEXT: kxorw %k4, %k0, %k4 1183; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} 1184; KNL-NEXT: vpmovdb %zmm0, %xmm0 1185; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 1186; KNL-NEXT: vpmovdb %zmm1, %xmm1 1187; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 1188; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} 1189; KNL-NEXT: vpmovdb %zmm0, %xmm0 1190; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} 1191; KNL-NEXT: vpmovdb %zmm2, %xmm2 1192; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1193; KNL-NEXT: retq 1194; 1195; SKX-LABEL: test17: 1196; SKX: ## %bb.0: 1197; SKX-NEXT: kmovq %rdi, %k0 1198; SKX-NEXT: cmpl %edx, %esi 1199; SKX-NEXT: setg %al 1200; SKX-NEXT: kmovd %eax, %k1 1201; SKX-NEXT: kshiftrq $5, %k0, %k2 1202; SKX-NEXT: kxorq %k1, %k2, %k1 1203; SKX-NEXT: kshiftlq $63, %k1, %k1 1204; SKX-NEXT: kshiftrq $58, %k1, %k1 1205; SKX-NEXT: kxorq %k1, %k0, %k0 1206; SKX-NEXT: vpmovm2b %k0, %zmm0 1207; SKX-NEXT: retq 1208; 1209; AVX512BW-LABEL: test17: 1210; AVX512BW: ## %bb.0: 1211; AVX512BW-NEXT: kmovq %rdi, %k0 1212; AVX512BW-NEXT: cmpl %edx, %esi 1213; AVX512BW-NEXT: setg %al 1214; AVX512BW-NEXT: kmovd %eax, %k1 1215; AVX512BW-NEXT: kshiftrq $5, %k0, %k2 1216; AVX512BW-NEXT: kxorq %k1, %k2, %k1 1217; AVX512BW-NEXT: kshiftlq $63, %k1, %k1 1218; AVX512BW-NEXT: kshiftrq $58, %k1, %k1 1219; AVX512BW-NEXT: kxorq %k1, %k0, %k0 1220; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 1221; AVX512BW-NEXT: retq 1222; 1223; AVX512DQ-LABEL: test17: 1224; AVX512DQ: ## %bb.0: 1225; AVX512DQ-NEXT: movq %rdi, %rax 1226; AVX512DQ-NEXT: movl %edi, %ecx 1227; AVX512DQ-NEXT: kmovw %edi, %k0 1228; AVX512DQ-NEXT: shrq $32, %rdi 1229; AVX512DQ-NEXT: shrq $48, %rax 1230; AVX512DQ-NEXT: shrl $16, %ecx 1231; AVX512DQ-NEXT: kmovw %ecx, %k1 1232; AVX512DQ-NEXT: kmovw %eax, %k2 1233; AVX512DQ-NEXT: kmovw %edi, %k3 1234; AVX512DQ-NEXT: cmpl %edx, %esi 1235; AVX512DQ-NEXT: setg %al 1236; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4 1237; AVX512DQ-NEXT: kmovw %eax, %k5 1238; AVX512DQ-NEXT: kxorw %k5, %k4, %k4 1239; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 1240; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 1241; AVX512DQ-NEXT: kxorw %k4, %k0, %k0 1242; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 1243; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1244; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 1245; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 1246; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 1247; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1248; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1249; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 1250; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 1251; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1252; AVX512DQ-NEXT: retq 1253; 1254; X86-LABEL: test17: 1255; X86: ## %bb.0: 1256; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1257; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 1258; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 1259; X86-NEXT: setg %al 1260; X86-NEXT: kmovd %eax, %k1 1261; X86-NEXT: kshiftrq $5, %k0, %k2 1262; X86-NEXT: kxorq %k1, %k2, %k1 1263; X86-NEXT: kshiftlq $63, %k1, %k1 1264; X86-NEXT: kshiftrq $58, %k1, %k1 1265; X86-NEXT: kxorq %k1, %k0, %k0 1266; X86-NEXT: vpmovm2b %k0, %zmm0 1267; X86-NEXT: retl 1268 %a = bitcast i64 %x to <64 x i1> 1269 %b = icmp sgt i32 %y, %z 1270 %c = insertelement <64 x i1>%a, i1 %b, i32 5 1271 %d = sext <64 x i1>%c to <64 x i8> 1272 ret <64 x i8>%d 1273} 1274 1275define <8 x i1> @test18(i8 %a, i16 %y) { 1276; KNL-LABEL: test18: 1277; KNL: ## %bb.0: 1278; KNL-NEXT: kmovw %edi, %k1 1279; KNL-NEXT: kmovw %esi, %k2 1280; KNL-NEXT: kshiftrw $8, %k2, %k0 1281; KNL-NEXT: kshiftrw $9, %k2, %k2 1282; KNL-NEXT: kshiftrw $6, %k1, %k3 1283; KNL-NEXT: kxorw %k2, %k3, %k2 1284; KNL-NEXT: kshiftlw $15, %k2, %k2 1285; KNL-NEXT: kshiftrw $9, %k2, %k2 1286; KNL-NEXT: kxorw %k2, %k1, %k1 1287; KNL-NEXT: kshiftlw $9, %k1, %k1 1288; KNL-NEXT: kshiftrw $9, %k1, %k1 1289; KNL-NEXT: kshiftlw $7, %k0, %k0 1290; KNL-NEXT: korw %k0, %k1, %k1 1291; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1292; KNL-NEXT: vpmovdw %zmm0, %ymm0 1293; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 1294; KNL-NEXT: vzeroupper 1295; KNL-NEXT: retq 1296; 1297; SKX-LABEL: test18: 1298; SKX: ## %bb.0: 1299; SKX-NEXT: kmovd %edi, %k1 1300; SKX-NEXT: kmovd %esi, %k2 1301; SKX-NEXT: kshiftrw $8, %k2, %k0 1302; SKX-NEXT: kshiftrw $9, %k2, %k2 1303; SKX-NEXT: kshiftrb $6, %k1, %k3 1304; SKX-NEXT: kxorb %k2, %k3, %k2 1305; SKX-NEXT: kshiftlb $7, %k2, %k2 1306; SKX-NEXT: kshiftrb $1, %k2, %k2 1307; SKX-NEXT: kxorb %k2, %k1, %k1 1308; SKX-NEXT: kshiftlb $1, %k1, %k1 1309; SKX-NEXT: kshiftrb $1, %k1, %k1 1310; SKX-NEXT: kshiftlb $7, %k0, %k0 1311; SKX-NEXT: korb %k0, %k1, %k0 1312; SKX-NEXT: vpmovm2w %k0, %xmm0 1313; SKX-NEXT: retq 1314; 1315; AVX512BW-LABEL: test18: 1316; AVX512BW: ## %bb.0: 1317; AVX512BW-NEXT: kmovd %edi, %k1 1318; AVX512BW-NEXT: kmovd %esi, %k2 1319; AVX512BW-NEXT: kshiftrw $8, %k2, %k0 1320; AVX512BW-NEXT: kshiftrw $9, %k2, %k2 1321; AVX512BW-NEXT: kshiftrw $6, %k1, %k3 1322; AVX512BW-NEXT: kxorw %k2, %k3, %k2 1323; AVX512BW-NEXT: kshiftlw $15, %k2, %k2 1324; AVX512BW-NEXT: kshiftrw $9, %k2, %k2 1325; AVX512BW-NEXT: kxorw %k2, %k1, %k1 1326; AVX512BW-NEXT: kshiftlw $9, %k1, %k1 1327; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 1328; AVX512BW-NEXT: kshiftlw $7, %k0, %k0 1329; AVX512BW-NEXT: korw %k0, %k1, %k0 1330; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 1331; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 1332; AVX512BW-NEXT: vzeroupper 1333; AVX512BW-NEXT: retq 1334; 1335; AVX512DQ-LABEL: test18: 1336; AVX512DQ: ## %bb.0: 1337; AVX512DQ-NEXT: kmovw %edi, %k1 1338; AVX512DQ-NEXT: kmovw %esi, %k2 1339; AVX512DQ-NEXT: kshiftrw $8, %k2, %k0 1340; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2 1341; AVX512DQ-NEXT: kshiftrb $6, %k1, %k3 1342; AVX512DQ-NEXT: kxorb %k2, %k3, %k2 1343; AVX512DQ-NEXT: kshiftlb $7, %k2, %k2 1344; AVX512DQ-NEXT: kshiftrb $1, %k2, %k2 1345; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 1346; AVX512DQ-NEXT: kshiftlb $1, %k1, %k1 1347; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1 1348; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 1349; AVX512DQ-NEXT: korb %k0, %k1, %k0 1350; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1351; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 1352; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 1353; AVX512DQ-NEXT: vzeroupper 1354; AVX512DQ-NEXT: retq 1355; 1356; X86-LABEL: test18: 1357; X86: ## %bb.0: 1358; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 1359; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1360; X86-NEXT: kshiftrw $9, %k1, %k2 1361; X86-NEXT: kshiftrw $8, %k1, %k1 1362; X86-NEXT: kshiftlb $7, %k1, %k1 1363; X86-NEXT: kshiftrb $6, %k0, %k3 1364; X86-NEXT: kxorb %k2, %k3, %k2 1365; X86-NEXT: kshiftlb $7, %k2, %k2 1366; X86-NEXT: kshiftrb $1, %k2, %k2 1367; X86-NEXT: kxorb %k2, %k0, %k0 1368; X86-NEXT: kshiftlb $1, %k0, %k0 1369; X86-NEXT: kshiftrb $1, %k0, %k0 1370; X86-NEXT: korb %k1, %k0, %k0 1371; X86-NEXT: vpmovm2w %k0, %xmm0 1372; X86-NEXT: retl 1373 %b = bitcast i8 %a to <8 x i1> 1374 %b1 = bitcast i16 %y to <16 x i1> 1375 %el1 = extractelement <16 x i1>%b1, i32 8 1376 %el2 = extractelement <16 x i1>%b1, i32 9 1377 %c = insertelement <8 x i1>%b, i1 %el1, i32 7 1378 %d = insertelement <8 x i1>%c, i1 %el2, i32 6 1379 ret <8 x i1>%d 1380} 1381define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { 1382; KNL-LABEL: test21: 1383; KNL: ## %bb.0: 1384; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 1385; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero 1386; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 1387; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 1388; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 1389; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0 1390; KNL-NEXT: vpsllw $15, %ymm3, %ymm2 1391; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 1392; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 1393; KNL-NEXT: retq 1394; 1395; SKX-LABEL: test21: 1396; SKX: ## %bb.0: 1397; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 1398; SKX-NEXT: vpmovb2m %ymm1, %k1 1399; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 1400; SKX-NEXT: retq 1401; 1402; AVX512BW-LABEL: test21: 1403; AVX512BW: ## %bb.0: 1404; AVX512BW-NEXT: vpsllw $7, %ymm1, %ymm1 1405; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 1406; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 1407; AVX512BW-NEXT: retq 1408; 1409; AVX512DQ-LABEL: test21: 1410; AVX512DQ: ## %bb.0: 1411; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 1412; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero 1413; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 1414; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2 1415; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2 1416; AVX512DQ-NEXT: vpand %ymm0, %ymm2, %ymm0 1417; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm2 1418; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2 1419; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1 1420; AVX512DQ-NEXT: retq 1421; 1422; X86-LABEL: test21: 1423; X86: ## %bb.0: 1424; X86-NEXT: vpsllw $7, %ymm1, %ymm1 1425; X86-NEXT: vpmovb2m %ymm1, %k1 1426; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 1427; X86-NEXT: retl 1428 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 1429 ret <32 x i16> %ret 1430} 1431 1432define void @test22(<4 x i1> %a, <4 x i1>* %addr) { 1433; KNL-LABEL: test22: 1434; KNL: ## %bb.0: 1435; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1436; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1437; KNL-NEXT: kmovw %k0, %eax 1438; KNL-NEXT: movb %al, (%rdi) 1439; KNL-NEXT: vzeroupper 1440; KNL-NEXT: retq 1441; 1442; SKX-LABEL: test22: 1443; SKX: ## %bb.0: 1444; SKX-NEXT: vpslld $31, %xmm0, %xmm0 1445; SKX-NEXT: vpmovd2m %xmm0, %k0 1446; SKX-NEXT: kmovb %k0, (%rdi) 1447; SKX-NEXT: retq 1448; 1449; AVX512BW-LABEL: test22: 1450; AVX512BW: ## %bb.0: 1451; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 1452; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 1453; AVX512BW-NEXT: kmovd %k0, %eax 1454; AVX512BW-NEXT: movb %al, (%rdi) 1455; AVX512BW-NEXT: vzeroupper 1456; AVX512BW-NEXT: retq 1457; 1458; AVX512DQ-LABEL: test22: 1459; AVX512DQ: ## %bb.0: 1460; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1461; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 1462; AVX512DQ-NEXT: kmovb %k0, (%rdi) 1463; AVX512DQ-NEXT: vzeroupper 1464; AVX512DQ-NEXT: retq 1465; 1466; X86-LABEL: test22: 1467; X86: ## %bb.0: 1468; X86-NEXT: vpslld $31, %xmm0, %xmm0 1469; X86-NEXT: vpmovd2m %xmm0, %k0 1470; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1471; X86-NEXT: kmovb %k0, (%eax) 1472; X86-NEXT: retl 1473 store <4 x i1> %a, <4 x i1>* %addr 1474 ret void 1475} 1476 1477define void @test23(<2 x i1> %a, <2 x i1>* %addr) { 1478; KNL-LABEL: test23: 1479; KNL: ## %bb.0: 1480; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1481; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1482; KNL-NEXT: kmovw %k0, %eax 1483; KNL-NEXT: movb %al, (%rdi) 1484; KNL-NEXT: vzeroupper 1485; KNL-NEXT: retq 1486; 1487; SKX-LABEL: test23: 1488; SKX: ## %bb.0: 1489; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 1490; SKX-NEXT: vpmovq2m %xmm0, %k0 1491; SKX-NEXT: kmovb %k0, (%rdi) 1492; SKX-NEXT: retq 1493; 1494; AVX512BW-LABEL: test23: 1495; AVX512BW: ## %bb.0: 1496; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 1497; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 1498; AVX512BW-NEXT: kmovd %k0, %eax 1499; AVX512BW-NEXT: movb %al, (%rdi) 1500; AVX512BW-NEXT: vzeroupper 1501; AVX512BW-NEXT: retq 1502; 1503; AVX512DQ-LABEL: test23: 1504; AVX512DQ: ## %bb.0: 1505; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1506; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 1507; AVX512DQ-NEXT: kmovb %k0, (%rdi) 1508; AVX512DQ-NEXT: vzeroupper 1509; AVX512DQ-NEXT: retq 1510; 1511; X86-LABEL: test23: 1512; X86: ## %bb.0: 1513; X86-NEXT: vpsllq $63, %xmm0, %xmm0 1514; X86-NEXT: vpmovq2m %xmm0, %k0 1515; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1516; X86-NEXT: kmovb %k0, (%eax) 1517; X86-NEXT: retl 1518 store <2 x i1> %a, <2 x i1>* %addr 1519 ret void 1520} 1521 1522define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { 1523; KNL-LABEL: store_v1i1: 1524; KNL: ## %bb.0: 1525; KNL-NEXT: kmovw %edi, %k0 1526; KNL-NEXT: kxnorw %k0, %k0, %k1 1527; KNL-NEXT: kxorw %k1, %k0, %k0 1528; KNL-NEXT: kmovw %k0, %eax 1529; KNL-NEXT: movb %al, (%rsi) 1530; KNL-NEXT: retq 1531; 1532; SKX-LABEL: store_v1i1: 1533; SKX: ## %bb.0: 1534; SKX-NEXT: kmovd %edi, %k0 1535; SKX-NEXT: kxnorw %k0, %k0, %k1 1536; SKX-NEXT: kxorw %k1, %k0, %k0 1537; SKX-NEXT: kmovb %k0, (%rsi) 1538; SKX-NEXT: retq 1539; 1540; AVX512BW-LABEL: store_v1i1: 1541; AVX512BW: ## %bb.0: 1542; AVX512BW-NEXT: kmovd %edi, %k0 1543; AVX512BW-NEXT: kxnorw %k0, %k0, %k1 1544; AVX512BW-NEXT: kxorw %k1, %k0, %k0 1545; AVX512BW-NEXT: kmovd %k0, %eax 1546; AVX512BW-NEXT: movb %al, (%rsi) 1547; AVX512BW-NEXT: retq 1548; 1549; AVX512DQ-LABEL: store_v1i1: 1550; AVX512DQ: ## %bb.0: 1551; AVX512DQ-NEXT: kmovw %edi, %k0 1552; AVX512DQ-NEXT: kxnorw %k0, %k0, %k1 1553; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 1554; AVX512DQ-NEXT: kmovb %k0, (%rsi) 1555; AVX512DQ-NEXT: retq 1556; 1557; X86-LABEL: store_v1i1: 1558; X86: ## %bb.0: 1559; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1560; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1561; X86-NEXT: kxnorw %k0, %k0, %k1 1562; X86-NEXT: kxorw %k1, %k0, %k0 1563; X86-NEXT: kmovb %k0, (%eax) 1564; X86-NEXT: retl 1565 %x = xor <1 x i1> %c, <i1 1> 1566 store <1 x i1> %x, <1 x i1>* %ptr, align 4 1567 ret void 1568} 1569 1570define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { 1571; KNL-LABEL: store_v2i1: 1572; KNL: ## %bb.0: 1573; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1574; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 1575; KNL-NEXT: kmovw %k0, %eax 1576; KNL-NEXT: movb %al, (%rdi) 1577; KNL-NEXT: vzeroupper 1578; KNL-NEXT: retq 1579; 1580; SKX-LABEL: store_v2i1: 1581; SKX: ## %bb.0: 1582; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 1583; SKX-NEXT: vpmovq2m %xmm0, %k0 1584; SKX-NEXT: knotw %k0, %k0 1585; SKX-NEXT: kmovb %k0, (%rdi) 1586; SKX-NEXT: retq 1587; 1588; AVX512BW-LABEL: store_v2i1: 1589; AVX512BW: ## %bb.0: 1590; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 1591; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 1592; AVX512BW-NEXT: kmovd %k0, %eax 1593; AVX512BW-NEXT: movb %al, (%rdi) 1594; AVX512BW-NEXT: vzeroupper 1595; AVX512BW-NEXT: retq 1596; 1597; AVX512DQ-LABEL: store_v2i1: 1598; AVX512DQ: ## %bb.0: 1599; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1600; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 1601; AVX512DQ-NEXT: knotw %k0, %k0 1602; AVX512DQ-NEXT: kmovb %k0, (%rdi) 1603; AVX512DQ-NEXT: vzeroupper 1604; AVX512DQ-NEXT: retq 1605; 1606; X86-LABEL: store_v2i1: 1607; X86: ## %bb.0: 1608; X86-NEXT: vpsllq $63, %xmm0, %xmm0 1609; X86-NEXT: vpmovq2m %xmm0, %k0 1610; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1611; X86-NEXT: knotw %k0, %k0 1612; X86-NEXT: kmovb %k0, (%eax) 1613; X86-NEXT: retl 1614 %x = xor <2 x i1> %c, <i1 1, i1 1> 1615 store <2 x i1> %x, <2 x i1>* %ptr, align 4 1616 ret void 1617} 1618 1619define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { 1620; KNL-LABEL: store_v4i1: 1621; KNL: ## %bb.0: 1622; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1623; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 1624; KNL-NEXT: kmovw %k0, %eax 1625; KNL-NEXT: movb %al, (%rdi) 1626; KNL-NEXT: vzeroupper 1627; KNL-NEXT: retq 1628; 1629; SKX-LABEL: store_v4i1: 1630; SKX: ## %bb.0: 1631; SKX-NEXT: vpslld $31, %xmm0, %xmm0 1632; SKX-NEXT: vpmovd2m %xmm0, %k0 1633; SKX-NEXT: knotw %k0, %k0 1634; SKX-NEXT: kmovb %k0, (%rdi) 1635; SKX-NEXT: retq 1636; 1637; AVX512BW-LABEL: store_v4i1: 1638; AVX512BW: ## %bb.0: 1639; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 1640; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 1641; AVX512BW-NEXT: kmovd %k0, %eax 1642; AVX512BW-NEXT: movb %al, (%rdi) 1643; AVX512BW-NEXT: vzeroupper 1644; AVX512BW-NEXT: retq 1645; 1646; AVX512DQ-LABEL: store_v4i1: 1647; AVX512DQ: ## %bb.0: 1648; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1649; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 1650; AVX512DQ-NEXT: knotw %k0, %k0 1651; AVX512DQ-NEXT: kmovb %k0, (%rdi) 1652; AVX512DQ-NEXT: vzeroupper 1653; AVX512DQ-NEXT: retq 1654; 1655; X86-LABEL: store_v4i1: 1656; X86: ## %bb.0: 1657; X86-NEXT: vpslld $31, %xmm0, %xmm0 1658; X86-NEXT: vpmovd2m %xmm0, %k0 1659; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1660; X86-NEXT: knotw %k0, %k0 1661; X86-NEXT: kmovb %k0, (%eax) 1662; X86-NEXT: retl 1663 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1> 1664 store <4 x i1> %x, <4 x i1>* %ptr, align 4 1665 ret void 1666} 1667 1668define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { 1669; KNL-LABEL: store_v8i1: 1670; KNL: ## %bb.0: 1671; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1672; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1673; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 1674; KNL-NEXT: kmovw %k0, %eax 1675; KNL-NEXT: movb %al, (%rdi) 1676; KNL-NEXT: vzeroupper 1677; KNL-NEXT: retq 1678; 1679; SKX-LABEL: store_v8i1: 1680; SKX: ## %bb.0: 1681; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1682; SKX-NEXT: vpmovw2m %xmm0, %k0 1683; SKX-NEXT: knotb %k0, %k0 1684; SKX-NEXT: kmovb %k0, (%rdi) 1685; SKX-NEXT: retq 1686; 1687; AVX512BW-LABEL: store_v8i1: 1688; AVX512BW: ## %bb.0: 1689; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 1690; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 1691; AVX512BW-NEXT: knotw %k0, %k0 1692; AVX512BW-NEXT: kmovd %k0, %eax 1693; AVX512BW-NEXT: movb %al, (%rdi) 1694; AVX512BW-NEXT: vzeroupper 1695; AVX512BW-NEXT: retq 1696; 1697; AVX512DQ-LABEL: store_v8i1: 1698; AVX512DQ: ## %bb.0: 1699; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 1700; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 1701; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 1702; AVX512DQ-NEXT: knotb %k0, %k0 1703; AVX512DQ-NEXT: kmovb %k0, (%rdi) 1704; AVX512DQ-NEXT: vzeroupper 1705; AVX512DQ-NEXT: retq 1706; 1707; X86-LABEL: store_v8i1: 1708; X86: ## %bb.0: 1709; X86-NEXT: vpsllw $15, %xmm0, %xmm0 1710; X86-NEXT: vpmovw2m %xmm0, %k0 1711; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1712; X86-NEXT: knotb %k0, %k0 1713; X86-NEXT: kmovb %k0, (%eax) 1714; X86-NEXT: retl 1715 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 1716 store <8 x i1> %x, <8 x i1>* %ptr, align 4 1717 ret void 1718} 1719 1720define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { 1721; KNL-LABEL: store_v16i1: 1722; KNL: ## %bb.0: 1723; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1724; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1725; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 1726; KNL-NEXT: kmovw %k0, (%rdi) 1727; KNL-NEXT: vzeroupper 1728; KNL-NEXT: retq 1729; 1730; SKX-LABEL: store_v16i1: 1731; SKX: ## %bb.0: 1732; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1733; SKX-NEXT: vpmovb2m %xmm0, %k0 1734; SKX-NEXT: knotw %k0, %k0 1735; SKX-NEXT: kmovw %k0, (%rdi) 1736; SKX-NEXT: retq 1737; 1738; AVX512BW-LABEL: store_v16i1: 1739; AVX512BW: ## %bb.0: 1740; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 1741; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 1742; AVX512BW-NEXT: knotw %k0, %k0 1743; AVX512BW-NEXT: kmovw %k0, (%rdi) 1744; AVX512BW-NEXT: vzeroupper 1745; AVX512BW-NEXT: retq 1746; 1747; AVX512DQ-LABEL: store_v16i1: 1748; AVX512DQ: ## %bb.0: 1749; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 1750; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 1751; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 1752; AVX512DQ-NEXT: knotw %k0, %k0 1753; AVX512DQ-NEXT: kmovw %k0, (%rdi) 1754; AVX512DQ-NEXT: vzeroupper 1755; AVX512DQ-NEXT: retq 1756; 1757; X86-LABEL: store_v16i1: 1758; X86: ## %bb.0: 1759; X86-NEXT: vpsllw $7, %xmm0, %xmm0 1760; X86-NEXT: vpmovb2m %xmm0, %k0 1761; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1762; X86-NEXT: knotw %k0, %k0 1763; X86-NEXT: kmovw %k0, (%eax) 1764; X86-NEXT: retl 1765 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 1766 store <16 x i1> %x, <16 x i1>* %ptr, align 4 1767 ret void 1768} 1769 1770;void f2(int); 1771;void f1(int c) 1772;{ 1773; static int v = 0; 1774; if (v == 0) 1775; v = 1; 1776; else 1777; v = 0; 1778; f2(v); 1779;} 1780 1781@f1.v = internal unnamed_addr global i1 false, align 4 1782 1783define void @f1(i32 %c) { 1784; CHECK-LABEL: f1: 1785; CHECK: ## %bb.0: ## %entry 1786; CHECK-NEXT: movzbl {{.*}}(%rip), %edi 1787; CHECK-NEXT: xorl $1, %edi 1788; CHECK-NEXT: movb %dil, {{.*}}(%rip) 1789; CHECK-NEXT: jmp _f2 ## TAILCALL 1790; 1791; X86-LABEL: f1: 1792; X86: ## %bb.0: ## %entry 1793; X86-NEXT: subl $12, %esp 1794; X86-NEXT: .cfi_def_cfa_offset 16 1795; X86-NEXT: movzbl _f1.v, %eax 1796; X86-NEXT: xorl $1, %eax 1797; X86-NEXT: movb %al, _f1.v 1798; X86-NEXT: movl %eax, (%esp) 1799; X86-NEXT: calll _f2 1800; X86-NEXT: addl $12, %esp 1801; X86-NEXT: retl 1802entry: 1803 %.b1 = load i1, i1* @f1.v, align 4 1804 %not..b1 = xor i1 %.b1, true 1805 store i1 %not..b1, i1* @f1.v, align 4 1806 %0 = zext i1 %not..b1 to i32 1807 tail call void @f2(i32 %0) #2 1808 ret void 1809} 1810 1811declare void @f2(i32) #1 1812 1813define void @store_i16_i1(i16 %x, i1 *%y) { 1814; CHECK-LABEL: store_i16_i1: 1815; CHECK: ## %bb.0: 1816; CHECK-NEXT: andl $1, %edi 1817; CHECK-NEXT: movb %dil, (%rsi) 1818; CHECK-NEXT: retq 1819; 1820; X86-LABEL: store_i16_i1: 1821; X86: ## %bb.0: 1822; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1823; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 1824; X86-NEXT: andl $1, %ecx 1825; X86-NEXT: movb %cl, (%eax) 1826; X86-NEXT: retl 1827 %c = trunc i16 %x to i1 1828 store i1 %c, i1* %y 1829 ret void 1830} 1831 1832define void @store_i8_i1(i8 %x, i1 *%y) { 1833; CHECK-LABEL: store_i8_i1: 1834; CHECK: ## %bb.0: 1835; CHECK-NEXT: andl $1, %edi 1836; CHECK-NEXT: movb %dil, (%rsi) 1837; CHECK-NEXT: retq 1838; 1839; X86-LABEL: store_i8_i1: 1840; X86: ## %bb.0: 1841; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1842; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 1843; X86-NEXT: andb $1, %cl 1844; X86-NEXT: movb %cl, (%eax) 1845; X86-NEXT: retl 1846 %c = trunc i8 %x to i1 1847 store i1 %c, i1* %y 1848 ret void 1849} 1850 1851define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { 1852; KNL-LABEL: test_build_vec_v32i1: 1853; KNL: ## %bb.0: 1854; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1855; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 1856; KNL-NEXT: retq 1857; 1858; SKX-LABEL: test_build_vec_v32i1: 1859; SKX: ## %bb.0: 1860; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495 1861; SKX-NEXT: kmovd %eax, %k1 1862; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 1863; SKX-NEXT: retq 1864; 1865; AVX512BW-LABEL: test_build_vec_v32i1: 1866; AVX512BW: ## %bb.0: 1867; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495 1868; AVX512BW-NEXT: kmovd %eax, %k1 1869; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 1870; AVX512BW-NEXT: retq 1871; 1872; AVX512DQ-LABEL: test_build_vec_v32i1: 1873; AVX512DQ: ## %bb.0: 1874; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1875; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 1876; AVX512DQ-NEXT: retq 1877; 1878; X86-LABEL: test_build_vec_v32i1: 1879; X86: ## %bb.0: 1880; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495 1881; X86-NEXT: kmovd %eax, %k1 1882; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 1883; X86-NEXT: retl 1884 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer 1885 ret <32 x i16> %ret 1886} 1887 1888define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { 1889; KNL-LABEL: test_build_vec_v64i1: 1890; KNL: ## %bb.0: 1891; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1892; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 1893; KNL-NEXT: retq 1894; 1895; SKX-LABEL: test_build_vec_v64i1: 1896; SKX: ## %bb.0: 1897; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero 1898; SKX-NEXT: retq 1899; 1900; AVX512BW-LABEL: test_build_vec_v64i1: 1901; AVX512BW: ## %bb.0: 1902; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero 1903; AVX512BW-NEXT: retq 1904; 1905; AVX512DQ-LABEL: test_build_vec_v64i1: 1906; AVX512DQ: ## %bb.0: 1907; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1908; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 1909; AVX512DQ-NEXT: retq 1910; 1911; X86-LABEL: test_build_vec_v64i1: 1912; X86: ## %bb.0: 1913; X86-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero 1914; X86-NEXT: retl 1915 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer 1916 ret <64 x i8> %ret 1917} 1918 1919define void @ktest_1(<8 x double> %in, double * %base) { 1920; KNL-LABEL: ktest_1: 1921; KNL: ## %bb.0: 1922; KNL-NEXT: vmovupd (%rdi), %zmm1 1923; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1924; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} 1925; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 1926; KNL-NEXT: kmovw %k0, %eax 1927; KNL-NEXT: testb %al, %al 1928; KNL-NEXT: je LBB42_2 1929; KNL-NEXT: ## %bb.1: ## %L1 1930; KNL-NEXT: vmovapd %zmm0, (%rdi) 1931; KNL-NEXT: vzeroupper 1932; KNL-NEXT: retq 1933; KNL-NEXT: LBB42_2: ## %L2 1934; KNL-NEXT: vmovapd %zmm0, 8(%rdi) 1935; KNL-NEXT: vzeroupper 1936; KNL-NEXT: retq 1937; 1938; SKX-LABEL: ktest_1: 1939; SKX: ## %bb.0: 1940; SKX-NEXT: vmovupd (%rdi), %zmm1 1941; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1942; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} 1943; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 1944; SKX-NEXT: kortestb %k0, %k0 1945; SKX-NEXT: je LBB42_2 1946; SKX-NEXT: ## %bb.1: ## %L1 1947; SKX-NEXT: vmovapd %zmm0, (%rdi) 1948; SKX-NEXT: vzeroupper 1949; SKX-NEXT: retq 1950; SKX-NEXT: LBB42_2: ## %L2 1951; SKX-NEXT: vmovapd %zmm0, 8(%rdi) 1952; SKX-NEXT: vzeroupper 1953; SKX-NEXT: retq 1954; 1955; AVX512BW-LABEL: ktest_1: 1956; AVX512BW: ## %bb.0: 1957; AVX512BW-NEXT: vmovupd (%rdi), %zmm1 1958; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1959; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} 1960; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 1961; AVX512BW-NEXT: kmovd %k0, %eax 1962; AVX512BW-NEXT: testb %al, %al 1963; AVX512BW-NEXT: je LBB42_2 1964; AVX512BW-NEXT: ## %bb.1: ## %L1 1965; AVX512BW-NEXT: vmovapd %zmm0, (%rdi) 1966; AVX512BW-NEXT: vzeroupper 1967; AVX512BW-NEXT: retq 1968; AVX512BW-NEXT: LBB42_2: ## %L2 1969; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi) 1970; AVX512BW-NEXT: vzeroupper 1971; AVX512BW-NEXT: retq 1972; 1973; AVX512DQ-LABEL: ktest_1: 1974; AVX512DQ: ## %bb.0: 1975; AVX512DQ-NEXT: vmovupd (%rdi), %zmm1 1976; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1977; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} 1978; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 1979; AVX512DQ-NEXT: kortestb %k0, %k0 1980; AVX512DQ-NEXT: je LBB42_2 1981; AVX512DQ-NEXT: ## %bb.1: ## %L1 1982; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi) 1983; AVX512DQ-NEXT: vzeroupper 1984; AVX512DQ-NEXT: retq 1985; AVX512DQ-NEXT: LBB42_2: ## %L2 1986; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi) 1987; AVX512DQ-NEXT: vzeroupper 1988; AVX512DQ-NEXT: retq 1989; 1990; X86-LABEL: ktest_1: 1991; X86: ## %bb.0: 1992; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1993; X86-NEXT: vmovupd (%eax), %zmm1 1994; X86-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1995; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z} 1996; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 1997; X86-NEXT: kortestb %k0, %k0 1998; X86-NEXT: je LBB42_2 1999; X86-NEXT: ## %bb.1: ## %L1 2000; X86-NEXT: vmovapd %zmm0, (%eax) 2001; X86-NEXT: vzeroupper 2002; X86-NEXT: retl 2003; X86-NEXT: LBB42_2: ## %L2 2004; X86-NEXT: vmovapd %zmm0, 8(%eax) 2005; X86-NEXT: vzeroupper 2006; X86-NEXT: retl 2007 %addr1 = getelementptr double, double * %base, i64 0 2008 %addr2 = getelementptr double, double * %base, i64 1 2009 2010 %vaddr1 = bitcast double* %addr1 to <8 x double>* 2011 %vaddr2 = bitcast double* %addr2 to <8 x double>* 2012 2013 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1 2014 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1 2015 2016 %sel1 = fcmp ogt <8 x double>%in, %val1 2017 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer 2018 %sel2 = fcmp olt <8 x double> %in, %val3 2019 %sel3 = and <8 x i1> %sel1, %sel2 2020 2021 %int_sel3 = bitcast <8 x i1> %sel3 to i8 2022 %res = icmp eq i8 %int_sel3, zeroinitializer 2023 br i1 %res, label %L2, label %L1 2024L1: 2025 store <8 x double> %in, <8 x double>* %vaddr1 2026 br label %End 2027L2: 2028 store <8 x double> %in, <8 x double>* %vaddr2 2029 br label %End 2030End: 2031 ret void 2032} 2033 2034define void @ktest_2(<32 x float> %in, float * %base) { 2035; 2036; KNL-LABEL: ktest_2: 2037; KNL: ## %bb.0: 2038; KNL-NEXT: vmovups (%rdi), %zmm2 2039; KNL-NEXT: vmovups 64(%rdi), %zmm3 2040; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 2041; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 2042; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} 2043; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} 2044; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0 2045; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3 2046; KNL-NEXT: korw %k3, %k2, %k2 2047; KNL-NEXT: kmovw %k2, %eax 2048; KNL-NEXT: korw %k0, %k1, %k0 2049; KNL-NEXT: kmovw %k0, %ecx 2050; KNL-NEXT: shll $16, %ecx 2051; KNL-NEXT: orl %eax, %ecx 2052; KNL-NEXT: je LBB43_2 2053; KNL-NEXT: ## %bb.1: ## %L1 2054; KNL-NEXT: vmovaps %zmm0, (%rdi) 2055; KNL-NEXT: vmovaps %zmm1, 64(%rdi) 2056; KNL-NEXT: vzeroupper 2057; KNL-NEXT: retq 2058; KNL-NEXT: LBB43_2: ## %L2 2059; KNL-NEXT: vmovaps %zmm0, 4(%rdi) 2060; KNL-NEXT: vmovaps %zmm1, 68(%rdi) 2061; KNL-NEXT: vzeroupper 2062; KNL-NEXT: retq 2063; 2064; SKX-LABEL: ktest_2: 2065; SKX: ## %bb.0: 2066; SKX-NEXT: vmovups (%rdi), %zmm2 2067; SKX-NEXT: vmovups 64(%rdi), %zmm3 2068; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 2069; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 2070; SKX-NEXT: kunpckwd %k1, %k2, %k0 2071; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} 2072; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} 2073; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 2074; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 2075; SKX-NEXT: kunpckwd %k1, %k2, %k1 2076; SKX-NEXT: kortestd %k1, %k0 2077; SKX-NEXT: je LBB43_2 2078; SKX-NEXT: ## %bb.1: ## %L1 2079; SKX-NEXT: vmovaps %zmm0, (%rdi) 2080; SKX-NEXT: vmovaps %zmm1, 64(%rdi) 2081; SKX-NEXT: vzeroupper 2082; SKX-NEXT: retq 2083; SKX-NEXT: LBB43_2: ## %L2 2084; SKX-NEXT: vmovaps %zmm0, 4(%rdi) 2085; SKX-NEXT: vmovaps %zmm1, 68(%rdi) 2086; SKX-NEXT: vzeroupper 2087; SKX-NEXT: retq 2088; 2089; AVX512BW-LABEL: ktest_2: 2090; AVX512BW: ## %bb.0: 2091; AVX512BW-NEXT: vmovups (%rdi), %zmm2 2092; AVX512BW-NEXT: vmovups 64(%rdi), %zmm3 2093; AVX512BW-NEXT: vcmpltps %zmm0, %zmm2, %k1 2094; AVX512BW-NEXT: vcmpltps %zmm1, %zmm3, %k2 2095; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0 2096; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} 2097; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} 2098; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1 2099; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2 2100; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1 2101; AVX512BW-NEXT: kortestd %k1, %k0 2102; AVX512BW-NEXT: je LBB43_2 2103; AVX512BW-NEXT: ## %bb.1: ## %L1 2104; AVX512BW-NEXT: vmovaps %zmm0, (%rdi) 2105; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi) 2106; AVX512BW-NEXT: vzeroupper 2107; AVX512BW-NEXT: retq 2108; AVX512BW-NEXT: LBB43_2: ## %L2 2109; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi) 2110; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi) 2111; AVX512BW-NEXT: vzeroupper 2112; AVX512BW-NEXT: retq 2113; 2114; AVX512DQ-LABEL: ktest_2: 2115; AVX512DQ: ## %bb.0: 2116; AVX512DQ-NEXT: vmovups (%rdi), %zmm2 2117; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3 2118; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1 2119; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2 2120; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} 2121; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} 2122; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0 2123; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3 2124; AVX512DQ-NEXT: korw %k3, %k2, %k2 2125; AVX512DQ-NEXT: kmovw %k2, %eax 2126; AVX512DQ-NEXT: korw %k0, %k1, %k0 2127; AVX512DQ-NEXT: kmovw %k0, %ecx 2128; AVX512DQ-NEXT: shll $16, %ecx 2129; AVX512DQ-NEXT: orl %eax, %ecx 2130; AVX512DQ-NEXT: je LBB43_2 2131; AVX512DQ-NEXT: ## %bb.1: ## %L1 2132; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi) 2133; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi) 2134; AVX512DQ-NEXT: vzeroupper 2135; AVX512DQ-NEXT: retq 2136; AVX512DQ-NEXT: LBB43_2: ## %L2 2137; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi) 2138; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi) 2139; AVX512DQ-NEXT: vzeroupper 2140; AVX512DQ-NEXT: retq 2141; 2142; X86-LABEL: ktest_2: 2143; X86: ## %bb.0: 2144; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2145; X86-NEXT: vmovups (%eax), %zmm2 2146; X86-NEXT: vmovups 64(%eax), %zmm3 2147; X86-NEXT: vcmpltps %zmm0, %zmm2, %k1 2148; X86-NEXT: vcmpltps %zmm1, %zmm3, %k2 2149; X86-NEXT: kunpckwd %k1, %k2, %k0 2150; X86-NEXT: vmovups 68(%eax), %zmm2 {%k2} {z} 2151; X86-NEXT: vmovups 4(%eax), %zmm3 {%k1} {z} 2152; X86-NEXT: vcmpltps %zmm3, %zmm0, %k1 2153; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2 2154; X86-NEXT: kunpckwd %k1, %k2, %k1 2155; X86-NEXT: kortestd %k1, %k0 2156; X86-NEXT: je LBB43_2 2157; X86-NEXT: ## %bb.1: ## %L1 2158; X86-NEXT: vmovaps %zmm0, (%eax) 2159; X86-NEXT: vmovaps %zmm1, 64(%eax) 2160; X86-NEXT: vzeroupper 2161; X86-NEXT: retl 2162; X86-NEXT: LBB43_2: ## %L2 2163; X86-NEXT: vmovaps %zmm0, 4(%eax) 2164; X86-NEXT: vmovaps %zmm1, 68(%eax) 2165; X86-NEXT: vzeroupper 2166; X86-NEXT: retl 2167 %addr1 = getelementptr float, float * %base, i64 0 2168 %addr2 = getelementptr float, float * %base, i64 1 2169 2170 %vaddr1 = bitcast float* %addr1 to <32 x float>* 2171 %vaddr2 = bitcast float* %addr2 to <32 x float>* 2172 2173 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1 2174 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1 2175 2176 %sel1 = fcmp ogt <32 x float>%in, %val1 2177 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer 2178 %sel2 = fcmp olt <32 x float> %in, %val3 2179 %sel3 = or <32 x i1> %sel1, %sel2 2180 2181 %int_sel3 = bitcast <32 x i1> %sel3 to i32 2182 %res = icmp eq i32 %int_sel3, zeroinitializer 2183 br i1 %res, label %L2, label %L1 2184L1: 2185 store <32 x float> %in, <32 x float>* %vaddr1 2186 br label %End 2187L2: 2188 store <32 x float> %in, <32 x float>* %vaddr2 2189 br label %End 2190End: 2191 ret void 2192} 2193 2194define <8 x i64> @load_8i1(<8 x i1>* %a) { 2195; KNL-LABEL: load_8i1: 2196; KNL: ## %bb.0: 2197; KNL-NEXT: movzbl (%rdi), %eax 2198; KNL-NEXT: kmovw %eax, %k1 2199; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2200; KNL-NEXT: retq 2201; 2202; SKX-LABEL: load_8i1: 2203; SKX: ## %bb.0: 2204; SKX-NEXT: kmovb (%rdi), %k0 2205; SKX-NEXT: vpmovm2q %k0, %zmm0 2206; SKX-NEXT: retq 2207; 2208; AVX512BW-LABEL: load_8i1: 2209; AVX512BW: ## %bb.0: 2210; AVX512BW-NEXT: movzbl (%rdi), %eax 2211; AVX512BW-NEXT: kmovd %eax, %k1 2212; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2213; AVX512BW-NEXT: retq 2214; 2215; AVX512DQ-LABEL: load_8i1: 2216; AVX512DQ: ## %bb.0: 2217; AVX512DQ-NEXT: kmovb (%rdi), %k0 2218; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 2219; AVX512DQ-NEXT: retq 2220; 2221; X86-LABEL: load_8i1: 2222; X86: ## %bb.0: 2223; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2224; X86-NEXT: kmovb (%eax), %k0 2225; X86-NEXT: vpmovm2q %k0, %zmm0 2226; X86-NEXT: retl 2227 %b = load <8 x i1>, <8 x i1>* %a 2228 %c = sext <8 x i1> %b to <8 x i64> 2229 ret <8 x i64> %c 2230} 2231 2232define <16 x i32> @load_16i1(<16 x i1>* %a) { 2233; KNL-LABEL: load_16i1: 2234; KNL: ## %bb.0: 2235; KNL-NEXT: kmovw (%rdi), %k1 2236; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2237; KNL-NEXT: retq 2238; 2239; SKX-LABEL: load_16i1: 2240; SKX: ## %bb.0: 2241; SKX-NEXT: kmovw (%rdi), %k0 2242; SKX-NEXT: vpmovm2d %k0, %zmm0 2243; SKX-NEXT: retq 2244; 2245; AVX512BW-LABEL: load_16i1: 2246; AVX512BW: ## %bb.0: 2247; AVX512BW-NEXT: kmovw (%rdi), %k1 2248; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2249; AVX512BW-NEXT: retq 2250; 2251; AVX512DQ-LABEL: load_16i1: 2252; AVX512DQ: ## %bb.0: 2253; AVX512DQ-NEXT: kmovw (%rdi), %k0 2254; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 2255; AVX512DQ-NEXT: retq 2256; 2257; X86-LABEL: load_16i1: 2258; X86: ## %bb.0: 2259; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2260; X86-NEXT: kmovw (%eax), %k0 2261; X86-NEXT: vpmovm2d %k0, %zmm0 2262; X86-NEXT: retl 2263 %b = load <16 x i1>, <16 x i1>* %a 2264 %c = sext <16 x i1> %b to <16 x i32> 2265 ret <16 x i32> %c 2266} 2267 2268define <2 x i16> @load_2i1(<2 x i1>* %a) { 2269; KNL-LABEL: load_2i1: 2270; KNL: ## %bb.0: 2271; KNL-NEXT: movzbl (%rdi), %eax 2272; KNL-NEXT: kmovw %eax, %k1 2273; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2274; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 2275; KNL-NEXT: vzeroupper 2276; KNL-NEXT: retq 2277; 2278; SKX-LABEL: load_2i1: 2279; SKX: ## %bb.0: 2280; SKX-NEXT: kmovb (%rdi), %k0 2281; SKX-NEXT: vpmovm2q %k0, %xmm0 2282; SKX-NEXT: retq 2283; 2284; AVX512BW-LABEL: load_2i1: 2285; AVX512BW: ## %bb.0: 2286; AVX512BW-NEXT: movzbl (%rdi), %eax 2287; AVX512BW-NEXT: kmovd %eax, %k1 2288; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2289; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 2290; AVX512BW-NEXT: vzeroupper 2291; AVX512BW-NEXT: retq 2292; 2293; AVX512DQ-LABEL: load_2i1: 2294; AVX512DQ: ## %bb.0: 2295; AVX512DQ-NEXT: kmovb (%rdi), %k0 2296; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 2297; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 2298; AVX512DQ-NEXT: vzeroupper 2299; AVX512DQ-NEXT: retq 2300; 2301; X86-LABEL: load_2i1: 2302; X86: ## %bb.0: 2303; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2304; X86-NEXT: kmovb (%eax), %k0 2305; X86-NEXT: vpmovm2q %k0, %xmm0 2306; X86-NEXT: retl 2307 %b = load <2 x i1>, <2 x i1>* %a 2308 %c = sext <2 x i1> %b to <2 x i16> 2309 ret <2 x i16> %c 2310} 2311 2312define <4 x i16> @load_4i1(<4 x i1>* %a) { 2313; KNL-LABEL: load_4i1: 2314; KNL: ## %bb.0: 2315; KNL-NEXT: movzbl (%rdi), %eax 2316; KNL-NEXT: kmovw %eax, %k1 2317; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2318; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 2319; KNL-NEXT: vzeroupper 2320; KNL-NEXT: retq 2321; 2322; SKX-LABEL: load_4i1: 2323; SKX: ## %bb.0: 2324; SKX-NEXT: kmovb (%rdi), %k0 2325; SKX-NEXT: vpmovm2d %k0, %xmm0 2326; SKX-NEXT: retq 2327; 2328; AVX512BW-LABEL: load_4i1: 2329; AVX512BW: ## %bb.0: 2330; AVX512BW-NEXT: movzbl (%rdi), %eax 2331; AVX512BW-NEXT: kmovd %eax, %k1 2332; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2333; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 2334; AVX512BW-NEXT: vzeroupper 2335; AVX512BW-NEXT: retq 2336; 2337; AVX512DQ-LABEL: load_4i1: 2338; AVX512DQ: ## %bb.0: 2339; AVX512DQ-NEXT: kmovb (%rdi), %k0 2340; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 2341; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 2342; AVX512DQ-NEXT: vzeroupper 2343; AVX512DQ-NEXT: retq 2344; 2345; X86-LABEL: load_4i1: 2346; X86: ## %bb.0: 2347; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2348; X86-NEXT: kmovb (%eax), %k0 2349; X86-NEXT: vpmovm2d %k0, %xmm0 2350; X86-NEXT: retl 2351 %b = load <4 x i1>, <4 x i1>* %a 2352 %c = sext <4 x i1> %b to <4 x i16> 2353 ret <4 x i16> %c 2354} 2355 2356define <32 x i16> @load_32i1(<32 x i1>* %a) { 2357; KNL-LABEL: load_32i1: 2358; KNL: ## %bb.0: 2359; KNL-NEXT: kmovw (%rdi), %k1 2360; KNL-NEXT: kmovw 2(%rdi), %k2 2361; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2362; KNL-NEXT: vpmovdw %zmm0, %ymm0 2363; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 2364; KNL-NEXT: vpmovdw %zmm1, %ymm1 2365; KNL-NEXT: retq 2366; 2367; SKX-LABEL: load_32i1: 2368; SKX: ## %bb.0: 2369; SKX-NEXT: kmovd (%rdi), %k0 2370; SKX-NEXT: vpmovm2w %k0, %zmm0 2371; SKX-NEXT: retq 2372; 2373; AVX512BW-LABEL: load_32i1: 2374; AVX512BW: ## %bb.0: 2375; AVX512BW-NEXT: kmovd (%rdi), %k0 2376; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2377; AVX512BW-NEXT: retq 2378; 2379; AVX512DQ-LABEL: load_32i1: 2380; AVX512DQ: ## %bb.0: 2381; AVX512DQ-NEXT: kmovw (%rdi), %k0 2382; AVX512DQ-NEXT: kmovw 2(%rdi), %k1 2383; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 2384; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 2385; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1 2386; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 2387; AVX512DQ-NEXT: retq 2388; 2389; X86-LABEL: load_32i1: 2390; X86: ## %bb.0: 2391; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2392; X86-NEXT: kmovd (%eax), %k0 2393; X86-NEXT: vpmovm2w %k0, %zmm0 2394; X86-NEXT: retl 2395 %b = load <32 x i1>, <32 x i1>* %a 2396 %c = sext <32 x i1> %b to <32 x i16> 2397 ret <32 x i16> %c 2398} 2399 2400define <64 x i8> @load_64i1(<64 x i1>* %a) { 2401; KNL-LABEL: load_64i1: 2402; KNL: ## %bb.0: 2403; KNL-NEXT: kmovw (%rdi), %k1 2404; KNL-NEXT: kmovw 2(%rdi), %k2 2405; KNL-NEXT: kmovw 4(%rdi), %k3 2406; KNL-NEXT: kmovw 6(%rdi), %k4 2407; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2408; KNL-NEXT: vpmovdb %zmm0, %xmm0 2409; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 2410; KNL-NEXT: vpmovdb %zmm1, %xmm1 2411; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2412; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z} 2413; KNL-NEXT: vpmovdb %zmm1, %xmm1 2414; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k4} {z} 2415; KNL-NEXT: vpmovdb %zmm2, %xmm2 2416; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2417; KNL-NEXT: retq 2418; 2419; SKX-LABEL: load_64i1: 2420; SKX: ## %bb.0: 2421; SKX-NEXT: kmovq (%rdi), %k0 2422; SKX-NEXT: vpmovm2b %k0, %zmm0 2423; SKX-NEXT: retq 2424; 2425; AVX512BW-LABEL: load_64i1: 2426; AVX512BW: ## %bb.0: 2427; AVX512BW-NEXT: kmovq (%rdi), %k0 2428; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2429; AVX512BW-NEXT: retq 2430; 2431; AVX512DQ-LABEL: load_64i1: 2432; AVX512DQ: ## %bb.0: 2433; AVX512DQ-NEXT: kmovw (%rdi), %k0 2434; AVX512DQ-NEXT: kmovw 2(%rdi), %k1 2435; AVX512DQ-NEXT: kmovw 4(%rdi), %k2 2436; AVX512DQ-NEXT: kmovw 6(%rdi), %k3 2437; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 2438; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 2439; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1 2440; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 2441; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2442; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 2443; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 2444; AVX512DQ-NEXT: vpmovm2d %k3, %zmm2 2445; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 2446; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2447; AVX512DQ-NEXT: retq 2448; 2449; X86-LABEL: load_64i1: 2450; X86: ## %bb.0: 2451; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2452; X86-NEXT: kmovq (%eax), %k0 2453; X86-NEXT: vpmovm2b %k0, %zmm0 2454; X86-NEXT: retl 2455 %b = load <64 x i1>, <64 x i1>* %a 2456 %c = sext <64 x i1> %b to <64 x i8> 2457 ret <64 x i8> %c 2458} 2459 2460define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { 2461; KNL-LABEL: store_8i1: 2462; KNL: ## %bb.0: 2463; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 2464; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 2465; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 2466; KNL-NEXT: kmovw %k0, %eax 2467; KNL-NEXT: movb %al, (%rdi) 2468; KNL-NEXT: vzeroupper 2469; KNL-NEXT: retq 2470; 2471; SKX-LABEL: store_8i1: 2472; SKX: ## %bb.0: 2473; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 2474; SKX-NEXT: vpmovw2m %xmm0, %k0 2475; SKX-NEXT: kmovb %k0, (%rdi) 2476; SKX-NEXT: retq 2477; 2478; AVX512BW-LABEL: store_8i1: 2479; AVX512BW: ## %bb.0: 2480; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 2481; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 2482; AVX512BW-NEXT: kmovd %k0, %eax 2483; AVX512BW-NEXT: movb %al, (%rdi) 2484; AVX512BW-NEXT: vzeroupper 2485; AVX512BW-NEXT: retq 2486; 2487; AVX512DQ-LABEL: store_8i1: 2488; AVX512DQ: ## %bb.0: 2489; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 2490; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 2491; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 2492; AVX512DQ-NEXT: kmovb %k0, (%rdi) 2493; AVX512DQ-NEXT: vzeroupper 2494; AVX512DQ-NEXT: retq 2495; 2496; X86-LABEL: store_8i1: 2497; X86: ## %bb.0: 2498; X86-NEXT: vpsllw $15, %xmm0, %xmm0 2499; X86-NEXT: vpmovw2m %xmm0, %k0 2500; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2501; X86-NEXT: kmovb %k0, (%eax) 2502; X86-NEXT: retl 2503 store <8 x i1> %v, <8 x i1>* %a 2504 ret void 2505} 2506 2507define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { 2508; KNL-LABEL: store_8i1_1: 2509; KNL: ## %bb.0: 2510; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 2511; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 2512; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 2513; KNL-NEXT: kmovw %k0, %eax 2514; KNL-NEXT: movb %al, (%rdi) 2515; KNL-NEXT: vzeroupper 2516; KNL-NEXT: retq 2517; 2518; SKX-LABEL: store_8i1_1: 2519; SKX: ## %bb.0: 2520; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 2521; SKX-NEXT: vpmovw2m %xmm0, %k0 2522; SKX-NEXT: kmovb %k0, (%rdi) 2523; SKX-NEXT: retq 2524; 2525; AVX512BW-LABEL: store_8i1_1: 2526; AVX512BW: ## %bb.0: 2527; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 2528; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 2529; AVX512BW-NEXT: kmovd %k0, %eax 2530; AVX512BW-NEXT: movb %al, (%rdi) 2531; AVX512BW-NEXT: vzeroupper 2532; AVX512BW-NEXT: retq 2533; 2534; AVX512DQ-LABEL: store_8i1_1: 2535; AVX512DQ: ## %bb.0: 2536; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 2537; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 2538; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 2539; AVX512DQ-NEXT: kmovb %k0, (%rdi) 2540; AVX512DQ-NEXT: vzeroupper 2541; AVX512DQ-NEXT: retq 2542; 2543; X86-LABEL: store_8i1_1: 2544; X86: ## %bb.0: 2545; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2546; X86-NEXT: vpsllw $15, %xmm0, %xmm0 2547; X86-NEXT: vpmovw2m %xmm0, %k0 2548; X86-NEXT: kmovb %k0, (%eax) 2549; X86-NEXT: retl 2550 %v1 = trunc <8 x i16> %v to <8 x i1> 2551 store <8 x i1> %v1, <8 x i1>* %a 2552 ret void 2553} 2554 2555define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { 2556; KNL-LABEL: store_16i1: 2557; KNL: ## %bb.0: 2558; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 2559; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2560; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2561; KNL-NEXT: kmovw %k0, (%rdi) 2562; KNL-NEXT: vzeroupper 2563; KNL-NEXT: retq 2564; 2565; SKX-LABEL: store_16i1: 2566; SKX: ## %bb.0: 2567; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 2568; SKX-NEXT: vpmovb2m %xmm0, %k0 2569; SKX-NEXT: kmovw %k0, (%rdi) 2570; SKX-NEXT: retq 2571; 2572; AVX512BW-LABEL: store_16i1: 2573; AVX512BW: ## %bb.0: 2574; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 2575; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 2576; AVX512BW-NEXT: kmovw %k0, (%rdi) 2577; AVX512BW-NEXT: vzeroupper 2578; AVX512BW-NEXT: retq 2579; 2580; AVX512DQ-LABEL: store_16i1: 2581; AVX512DQ: ## %bb.0: 2582; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 2583; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 2584; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 2585; AVX512DQ-NEXT: kmovw %k0, (%rdi) 2586; AVX512DQ-NEXT: vzeroupper 2587; AVX512DQ-NEXT: retq 2588; 2589; X86-LABEL: store_16i1: 2590; X86: ## %bb.0: 2591; X86-NEXT: vpsllw $7, %xmm0, %xmm0 2592; X86-NEXT: vpmovb2m %xmm0, %k0 2593; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2594; X86-NEXT: kmovw %k0, (%eax) 2595; X86-NEXT: retl 2596 store <16 x i1> %v, <16 x i1>* %a 2597 ret void 2598} 2599 2600define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { 2601; KNL-LABEL: store_32i1: 2602; KNL: ## %bb.0: 2603; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 2604; KNL-NEXT: vpslld $31, %zmm1, %zmm1 2605; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 2606; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 2607; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 2608; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2609; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 2610; KNL-NEXT: kmovw %k1, 2(%rdi) 2611; KNL-NEXT: kmovw %k0, (%rdi) 2612; KNL-NEXT: vzeroupper 2613; KNL-NEXT: retq 2614; 2615; SKX-LABEL: store_32i1: 2616; SKX: ## %bb.0: 2617; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 2618; SKX-NEXT: vpmovb2m %ymm0, %k0 2619; SKX-NEXT: kmovd %k0, (%rdi) 2620; SKX-NEXT: vzeroupper 2621; SKX-NEXT: retq 2622; 2623; AVX512BW-LABEL: store_32i1: 2624; AVX512BW: ## %bb.0: 2625; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 2626; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 2627; AVX512BW-NEXT: kmovd %k0, (%rdi) 2628; AVX512BW-NEXT: vzeroupper 2629; AVX512BW-NEXT: retq 2630; 2631; AVX512DQ-LABEL: store_32i1: 2632; AVX512DQ: ## %bb.0: 2633; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 2634; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 2635; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0 2636; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 2637; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 2638; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 2639; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 2640; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) 2641; AVX512DQ-NEXT: kmovw %k0, (%rdi) 2642; AVX512DQ-NEXT: vzeroupper 2643; AVX512DQ-NEXT: retq 2644; 2645; X86-LABEL: store_32i1: 2646; X86: ## %bb.0: 2647; X86-NEXT: vpsllw $7, %ymm0, %ymm0 2648; X86-NEXT: vpmovb2m %ymm0, %k0 2649; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2650; X86-NEXT: kmovd %k0, (%eax) 2651; X86-NEXT: vzeroupper 2652; X86-NEXT: retl 2653 store <32 x i1> %v, <32 x i1>* %a 2654 ret void 2655} 2656 2657define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { 2658; KNL-LABEL: store_32i1_1: 2659; KNL: ## %bb.0: 2660; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 2661; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2662; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2663; KNL-NEXT: vpmovsxwd %ymm1, %zmm0 2664; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2665; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 2666; KNL-NEXT: kmovw %k1, 2(%rdi) 2667; KNL-NEXT: kmovw %k0, (%rdi) 2668; KNL-NEXT: vzeroupper 2669; KNL-NEXT: retq 2670; 2671; SKX-LABEL: store_32i1_1: 2672; SKX: ## %bb.0: 2673; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 2674; SKX-NEXT: vpmovw2m %zmm0, %k0 2675; SKX-NEXT: kmovd %k0, (%rdi) 2676; SKX-NEXT: vzeroupper 2677; SKX-NEXT: retq 2678; 2679; AVX512BW-LABEL: store_32i1_1: 2680; AVX512BW: ## %bb.0: 2681; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0 2682; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 2683; AVX512BW-NEXT: kmovd %k0, (%rdi) 2684; AVX512BW-NEXT: vzeroupper 2685; AVX512BW-NEXT: retq 2686; 2687; AVX512DQ-LABEL: store_32i1_1: 2688; AVX512DQ: ## %bb.0: 2689; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 2690; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 2691; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 2692; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0 2693; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 2694; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 2695; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) 2696; AVX512DQ-NEXT: kmovw %k0, (%rdi) 2697; AVX512DQ-NEXT: vzeroupper 2698; AVX512DQ-NEXT: retq 2699; 2700; X86-LABEL: store_32i1_1: 2701; X86: ## %bb.0: 2702; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2703; X86-NEXT: vpsllw $15, %zmm0, %zmm0 2704; X86-NEXT: vpmovw2m %zmm0, %k0 2705; X86-NEXT: kmovd %k0, (%eax) 2706; X86-NEXT: vzeroupper 2707; X86-NEXT: retl 2708 %v1 = trunc <32 x i16> %v to <32 x i1> 2709 store <32 x i1> %v1, <32 x i1>* %a 2710 ret void 2711} 2712 2713 2714define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { 2715; 2716; KNL-LABEL: store_64i1: 2717; KNL: ## %bb.0: 2718; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 2719; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2720; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2721; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 2722; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2723; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 2724; KNL-NEXT: vpmovsxbd %xmm2, %zmm0 2725; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2726; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2 2727; KNL-NEXT: vpmovsxbd %xmm3, %zmm0 2728; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2729; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3 2730; KNL-NEXT: kmovw %k3, 6(%rdi) 2731; KNL-NEXT: kmovw %k2, 4(%rdi) 2732; KNL-NEXT: kmovw %k1, 2(%rdi) 2733; KNL-NEXT: kmovw %k0, (%rdi) 2734; KNL-NEXT: vzeroupper 2735; KNL-NEXT: retq 2736; 2737; SKX-LABEL: store_64i1: 2738; SKX: ## %bb.0: 2739; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 2740; SKX-NEXT: vpmovb2m %zmm0, %k0 2741; SKX-NEXT: kmovq %k0, (%rdi) 2742; SKX-NEXT: vzeroupper 2743; SKX-NEXT: retq 2744; 2745; AVX512BW-LABEL: store_64i1: 2746; AVX512BW: ## %bb.0: 2747; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 2748; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 2749; AVX512BW-NEXT: kmovq %k0, (%rdi) 2750; AVX512BW-NEXT: vzeroupper 2751; AVX512BW-NEXT: retq 2752; 2753; AVX512DQ-LABEL: store_64i1: 2754; AVX512DQ: ## %bb.0: 2755; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 2756; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 2757; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 2758; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 2759; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 2760; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 2761; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0 2762; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 2763; AVX512DQ-NEXT: vpmovd2m %zmm0, %k2 2764; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0 2765; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 2766; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3 2767; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) 2768; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) 2769; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) 2770; AVX512DQ-NEXT: kmovw %k0, (%rdi) 2771; AVX512DQ-NEXT: vzeroupper 2772; AVX512DQ-NEXT: retq 2773; 2774; X86-LABEL: store_64i1: 2775; X86: ## %bb.0: 2776; X86-NEXT: vpsllw $7, %zmm0, %zmm0 2777; X86-NEXT: vpmovb2m %zmm0, %k0 2778; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2779; X86-NEXT: kmovq %k0, (%eax) 2780; X86-NEXT: vzeroupper 2781; X86-NEXT: retl 2782 store <64 x i1> %v, <64 x i1>* %a 2783 ret void 2784} 2785 2786define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { 2787; KNL-LABEL: test_bitcast_v8i1_zext: 2788; KNL: ## %bb.0: 2789; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 2790; KNL-NEXT: kmovw %k0, %eax 2791; KNL-NEXT: movzbl %al, %eax 2792; KNL-NEXT: addl %eax, %eax 2793; KNL-NEXT: vzeroupper 2794; KNL-NEXT: retq 2795; 2796; SKX-LABEL: test_bitcast_v8i1_zext: 2797; SKX: ## %bb.0: 2798; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 2799; SKX-NEXT: kmovb %k0, %eax 2800; SKX-NEXT: addl %eax, %eax 2801; SKX-NEXT: vzeroupper 2802; SKX-NEXT: retq 2803; 2804; AVX512BW-LABEL: test_bitcast_v8i1_zext: 2805; AVX512BW: ## %bb.0: 2806; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 2807; AVX512BW-NEXT: kmovd %k0, %eax 2808; AVX512BW-NEXT: movzbl %al, %eax 2809; AVX512BW-NEXT: addl %eax, %eax 2810; AVX512BW-NEXT: vzeroupper 2811; AVX512BW-NEXT: retq 2812; 2813; AVX512DQ-LABEL: test_bitcast_v8i1_zext: 2814; AVX512DQ: ## %bb.0: 2815; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 2816; AVX512DQ-NEXT: kmovb %k0, %eax 2817; AVX512DQ-NEXT: addl %eax, %eax 2818; AVX512DQ-NEXT: vzeroupper 2819; AVX512DQ-NEXT: retq 2820; 2821; X86-LABEL: test_bitcast_v8i1_zext: 2822; X86: ## %bb.0: 2823; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 2824; X86-NEXT: kmovb %k0, %eax 2825; X86-NEXT: addl %eax, %eax 2826; X86-NEXT: vzeroupper 2827; X86-NEXT: retl 2828 %v1 = icmp eq <16 x i32> %a, zeroinitializer 2829 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2830 %mask1 = bitcast <8 x i1> %mask to i8 2831 %val = zext i8 %mask1 to i32 2832 %val1 = add i32 %val, %val 2833 ret i32 %val1 2834} 2835 2836define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { 2837; CHECK-LABEL: test_bitcast_v16i1_zext: 2838; CHECK: ## %bb.0: 2839; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0 2840; CHECK-NEXT: kmovw %k0, %eax 2841; CHECK-NEXT: addl %eax, %eax 2842; CHECK-NEXT: vzeroupper 2843; CHECK-NEXT: retq 2844; 2845; X86-LABEL: test_bitcast_v16i1_zext: 2846; X86: ## %bb.0: 2847; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 2848; X86-NEXT: kmovw %k0, %eax 2849; X86-NEXT: addl %eax, %eax 2850; X86-NEXT: vzeroupper 2851; X86-NEXT: retl 2852 %v1 = icmp eq <16 x i32> %a, zeroinitializer 2853 %mask1 = bitcast <16 x i1> %v1 to i16 2854 %val = zext i16 %mask1 to i32 2855 %val1 = add i32 %val, %val 2856 ret i32 %val1 2857} 2858 2859define i16 @test_v16i1_add(i16 %x, i16 %y) { 2860; KNL-LABEL: test_v16i1_add: 2861; KNL: ## %bb.0: 2862; KNL-NEXT: kmovw %edi, %k0 2863; KNL-NEXT: kmovw %esi, %k1 2864; KNL-NEXT: kxorw %k1, %k0, %k0 2865; KNL-NEXT: kmovw %k0, %eax 2866; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 2867; KNL-NEXT: retq 2868; 2869; SKX-LABEL: test_v16i1_add: 2870; SKX: ## %bb.0: 2871; SKX-NEXT: kmovd %edi, %k0 2872; SKX-NEXT: kmovd %esi, %k1 2873; SKX-NEXT: kxorw %k1, %k0, %k0 2874; SKX-NEXT: kmovd %k0, %eax 2875; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 2876; SKX-NEXT: retq 2877; 2878; AVX512BW-LABEL: test_v16i1_add: 2879; AVX512BW: ## %bb.0: 2880; AVX512BW-NEXT: kmovd %edi, %k0 2881; AVX512BW-NEXT: kmovd %esi, %k1 2882; AVX512BW-NEXT: kxorw %k1, %k0, %k0 2883; AVX512BW-NEXT: kmovd %k0, %eax 2884; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax 2885; AVX512BW-NEXT: retq 2886; 2887; AVX512DQ-LABEL: test_v16i1_add: 2888; AVX512DQ: ## %bb.0: 2889; AVX512DQ-NEXT: kmovw %edi, %k0 2890; AVX512DQ-NEXT: kmovw %esi, %k1 2891; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 2892; AVX512DQ-NEXT: kmovw %k0, %eax 2893; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax 2894; AVX512DQ-NEXT: retq 2895; 2896; X86-LABEL: test_v16i1_add: 2897; X86: ## %bb.0: 2898; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 2899; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 2900; X86-NEXT: kxorw %k1, %k0, %k0 2901; X86-NEXT: kmovd %k0, %eax 2902; X86-NEXT: ## kill: def $ax killed $ax killed $eax 2903; X86-NEXT: retl 2904 %m0 = bitcast i16 %x to <16 x i1> 2905 %m1 = bitcast i16 %y to <16 x i1> 2906 %m2 = add <16 x i1> %m0, %m1 2907 %ret = bitcast <16 x i1> %m2 to i16 2908 ret i16 %ret 2909} 2910 2911define i16 @test_v16i1_sub(i16 %x, i16 %y) { 2912; KNL-LABEL: test_v16i1_sub: 2913; KNL: ## %bb.0: 2914; KNL-NEXT: kmovw %edi, %k0 2915; KNL-NEXT: kmovw %esi, %k1 2916; KNL-NEXT: kxorw %k1, %k0, %k0 2917; KNL-NEXT: kmovw %k0, %eax 2918; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 2919; KNL-NEXT: retq 2920; 2921; SKX-LABEL: test_v16i1_sub: 2922; SKX: ## %bb.0: 2923; SKX-NEXT: kmovd %edi, %k0 2924; SKX-NEXT: kmovd %esi, %k1 2925; SKX-NEXT: kxorw %k1, %k0, %k0 2926; SKX-NEXT: kmovd %k0, %eax 2927; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 2928; SKX-NEXT: retq 2929; 2930; AVX512BW-LABEL: test_v16i1_sub: 2931; AVX512BW: ## %bb.0: 2932; AVX512BW-NEXT: kmovd %edi, %k0 2933; AVX512BW-NEXT: kmovd %esi, %k1 2934; AVX512BW-NEXT: kxorw %k1, %k0, %k0 2935; AVX512BW-NEXT: kmovd %k0, %eax 2936; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax 2937; AVX512BW-NEXT: retq 2938; 2939; AVX512DQ-LABEL: test_v16i1_sub: 2940; AVX512DQ: ## %bb.0: 2941; AVX512DQ-NEXT: kmovw %edi, %k0 2942; AVX512DQ-NEXT: kmovw %esi, %k1 2943; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 2944; AVX512DQ-NEXT: kmovw %k0, %eax 2945; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax 2946; AVX512DQ-NEXT: retq 2947; 2948; X86-LABEL: test_v16i1_sub: 2949; X86: ## %bb.0: 2950; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 2951; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 2952; X86-NEXT: kxorw %k1, %k0, %k0 2953; X86-NEXT: kmovd %k0, %eax 2954; X86-NEXT: ## kill: def $ax killed $ax killed $eax 2955; X86-NEXT: retl 2956 %m0 = bitcast i16 %x to <16 x i1> 2957 %m1 = bitcast i16 %y to <16 x i1> 2958 %m2 = sub <16 x i1> %m0, %m1 2959 %ret = bitcast <16 x i1> %m2 to i16 2960 ret i16 %ret 2961} 2962 2963define i16 @test_v16i1_mul(i16 %x, i16 %y) { 2964; KNL-LABEL: test_v16i1_mul: 2965; KNL: ## %bb.0: 2966; KNL-NEXT: kmovw %edi, %k0 2967; KNL-NEXT: kmovw %esi, %k1 2968; KNL-NEXT: kandw %k1, %k0, %k0 2969; KNL-NEXT: kmovw %k0, %eax 2970; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 2971; KNL-NEXT: retq 2972; 2973; SKX-LABEL: test_v16i1_mul: 2974; SKX: ## %bb.0: 2975; SKX-NEXT: kmovd %edi, %k0 2976; SKX-NEXT: kmovd %esi, %k1 2977; SKX-NEXT: kandw %k1, %k0, %k0 2978; SKX-NEXT: kmovd %k0, %eax 2979; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 2980; SKX-NEXT: retq 2981; 2982; AVX512BW-LABEL: test_v16i1_mul: 2983; AVX512BW: ## %bb.0: 2984; AVX512BW-NEXT: kmovd %edi, %k0 2985; AVX512BW-NEXT: kmovd %esi, %k1 2986; AVX512BW-NEXT: kandw %k1, %k0, %k0 2987; AVX512BW-NEXT: kmovd %k0, %eax 2988; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax 2989; AVX512BW-NEXT: retq 2990; 2991; AVX512DQ-LABEL: test_v16i1_mul: 2992; AVX512DQ: ## %bb.0: 2993; AVX512DQ-NEXT: kmovw %edi, %k0 2994; AVX512DQ-NEXT: kmovw %esi, %k1 2995; AVX512DQ-NEXT: kandw %k1, %k0, %k0 2996; AVX512DQ-NEXT: kmovw %k0, %eax 2997; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax 2998; AVX512DQ-NEXT: retq 2999; 3000; X86-LABEL: test_v16i1_mul: 3001; X86: ## %bb.0: 3002; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 3003; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 3004; X86-NEXT: kandw %k1, %k0, %k0 3005; X86-NEXT: kmovd %k0, %eax 3006; X86-NEXT: ## kill: def $ax killed $ax killed $eax 3007; X86-NEXT: retl 3008 %m0 = bitcast i16 %x to <16 x i1> 3009 %m1 = bitcast i16 %y to <16 x i1> 3010 %m2 = mul <16 x i1> %m0, %m1 3011 %ret = bitcast <16 x i1> %m2 to i16 3012 ret i16 %ret 3013} 3014 3015define i8 @test_v8i1_add(i8 %x, i8 %y) { 3016; KNL-LABEL: test_v8i1_add: 3017; KNL: ## %bb.0: 3018; KNL-NEXT: kmovw %edi, %k0 3019; KNL-NEXT: kmovw %esi, %k1 3020; KNL-NEXT: kxorw %k1, %k0, %k0 3021; KNL-NEXT: kmovw %k0, %eax 3022; KNL-NEXT: ## kill: def $al killed $al killed $eax 3023; KNL-NEXT: retq 3024; 3025; SKX-LABEL: test_v8i1_add: 3026; SKX: ## %bb.0: 3027; SKX-NEXT: kmovd %edi, %k0 3028; SKX-NEXT: kmovd %esi, %k1 3029; SKX-NEXT: kxorb %k1, %k0, %k0 3030; SKX-NEXT: kmovd %k0, %eax 3031; SKX-NEXT: ## kill: def $al killed $al killed $eax 3032; SKX-NEXT: retq 3033; 3034; AVX512BW-LABEL: test_v8i1_add: 3035; AVX512BW: ## %bb.0: 3036; AVX512BW-NEXT: kmovd %edi, %k0 3037; AVX512BW-NEXT: kmovd %esi, %k1 3038; AVX512BW-NEXT: kxorw %k1, %k0, %k0 3039; AVX512BW-NEXT: kmovd %k0, %eax 3040; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax 3041; AVX512BW-NEXT: retq 3042; 3043; AVX512DQ-LABEL: test_v8i1_add: 3044; AVX512DQ: ## %bb.0: 3045; AVX512DQ-NEXT: kmovw %edi, %k0 3046; AVX512DQ-NEXT: kmovw %esi, %k1 3047; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 3048; AVX512DQ-NEXT: kmovw %k0, %eax 3049; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax 3050; AVX512DQ-NEXT: retq 3051; 3052; X86-LABEL: test_v8i1_add: 3053; X86: ## %bb.0: 3054; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 3055; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 3056; X86-NEXT: kxorb %k1, %k0, %k0 3057; X86-NEXT: kmovd %k0, %eax 3058; X86-NEXT: ## kill: def $al killed $al killed $eax 3059; X86-NEXT: retl 3060 %m0 = bitcast i8 %x to <8 x i1> 3061 %m1 = bitcast i8 %y to <8 x i1> 3062 %m2 = add <8 x i1> %m0, %m1 3063 %ret = bitcast <8 x i1> %m2 to i8 3064 ret i8 %ret 3065} 3066 3067define i8 @test_v8i1_sub(i8 %x, i8 %y) { 3068; KNL-LABEL: test_v8i1_sub: 3069; KNL: ## %bb.0: 3070; KNL-NEXT: kmovw %edi, %k0 3071; KNL-NEXT: kmovw %esi, %k1 3072; KNL-NEXT: kxorw %k1, %k0, %k0 3073; KNL-NEXT: kmovw %k0, %eax 3074; KNL-NEXT: ## kill: def $al killed $al killed $eax 3075; KNL-NEXT: retq 3076; 3077; SKX-LABEL: test_v8i1_sub: 3078; SKX: ## %bb.0: 3079; SKX-NEXT: kmovd %edi, %k0 3080; SKX-NEXT: kmovd %esi, %k1 3081; SKX-NEXT: kxorb %k1, %k0, %k0 3082; SKX-NEXT: kmovd %k0, %eax 3083; SKX-NEXT: ## kill: def $al killed $al killed $eax 3084; SKX-NEXT: retq 3085; 3086; AVX512BW-LABEL: test_v8i1_sub: 3087; AVX512BW: ## %bb.0: 3088; AVX512BW-NEXT: kmovd %edi, %k0 3089; AVX512BW-NEXT: kmovd %esi, %k1 3090; AVX512BW-NEXT: kxorw %k1, %k0, %k0 3091; AVX512BW-NEXT: kmovd %k0, %eax 3092; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax 3093; AVX512BW-NEXT: retq 3094; 3095; AVX512DQ-LABEL: test_v8i1_sub: 3096; AVX512DQ: ## %bb.0: 3097; AVX512DQ-NEXT: kmovw %edi, %k0 3098; AVX512DQ-NEXT: kmovw %esi, %k1 3099; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 3100; AVX512DQ-NEXT: kmovw %k0, %eax 3101; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax 3102; AVX512DQ-NEXT: retq 3103; 3104; X86-LABEL: test_v8i1_sub: 3105; X86: ## %bb.0: 3106; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 3107; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 3108; X86-NEXT: kxorb %k1, %k0, %k0 3109; X86-NEXT: kmovd %k0, %eax 3110; X86-NEXT: ## kill: def $al killed $al killed $eax 3111; X86-NEXT: retl 3112 %m0 = bitcast i8 %x to <8 x i1> 3113 %m1 = bitcast i8 %y to <8 x i1> 3114 %m2 = sub <8 x i1> %m0, %m1 3115 %ret = bitcast <8 x i1> %m2 to i8 3116 ret i8 %ret 3117} 3118 3119define i8 @test_v8i1_mul(i8 %x, i8 %y) { 3120; KNL-LABEL: test_v8i1_mul: 3121; KNL: ## %bb.0: 3122; KNL-NEXT: kmovw %edi, %k0 3123; KNL-NEXT: kmovw %esi, %k1 3124; KNL-NEXT: kandw %k1, %k0, %k0 3125; KNL-NEXT: kmovw %k0, %eax 3126; KNL-NEXT: ## kill: def $al killed $al killed $eax 3127; KNL-NEXT: retq 3128; 3129; SKX-LABEL: test_v8i1_mul: 3130; SKX: ## %bb.0: 3131; SKX-NEXT: kmovd %edi, %k0 3132; SKX-NEXT: kmovd %esi, %k1 3133; SKX-NEXT: kandb %k1, %k0, %k0 3134; SKX-NEXT: kmovd %k0, %eax 3135; SKX-NEXT: ## kill: def $al killed $al killed $eax 3136; SKX-NEXT: retq 3137; 3138; AVX512BW-LABEL: test_v8i1_mul: 3139; AVX512BW: ## %bb.0: 3140; AVX512BW-NEXT: kmovd %edi, %k0 3141; AVX512BW-NEXT: kmovd %esi, %k1 3142; AVX512BW-NEXT: kandw %k1, %k0, %k0 3143; AVX512BW-NEXT: kmovd %k0, %eax 3144; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax 3145; AVX512BW-NEXT: retq 3146; 3147; AVX512DQ-LABEL: test_v8i1_mul: 3148; AVX512DQ: ## %bb.0: 3149; AVX512DQ-NEXT: kmovw %edi, %k0 3150; AVX512DQ-NEXT: kmovw %esi, %k1 3151; AVX512DQ-NEXT: kandb %k1, %k0, %k0 3152; AVX512DQ-NEXT: kmovw %k0, %eax 3153; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax 3154; AVX512DQ-NEXT: retq 3155; 3156; X86-LABEL: test_v8i1_mul: 3157; X86: ## %bb.0: 3158; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 3159; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 3160; X86-NEXT: kandb %k1, %k0, %k0 3161; X86-NEXT: kmovd %k0, %eax 3162; X86-NEXT: ## kill: def $al killed $al killed $eax 3163; X86-NEXT: retl 3164 %m0 = bitcast i8 %x to <8 x i1> 3165 %m1 = bitcast i8 %y to <8 x i1> 3166 %m2 = mul <8 x i1> %m0, %m1 3167 %ret = bitcast <8 x i1> %m2 to i8 3168 ret i8 %ret 3169} 3170 3171; Make sure we don't emit a ktest for signed comparisons. 3172define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) { 3173; KNL-LABEL: ktest_signed: 3174; KNL: ## %bb.0: 3175; KNL-NEXT: pushq %rax 3176; KNL-NEXT: .cfi_def_cfa_offset 16 3177; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0 3178; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 3179; KNL-NEXT: kmovw %k0, %eax 3180; KNL-NEXT: testw %ax, %ax 3181; KNL-NEXT: jle LBB64_1 3182; KNL-NEXT: ## %bb.2: ## %bb.2 3183; KNL-NEXT: popq %rax 3184; KNL-NEXT: vzeroupper 3185; KNL-NEXT: retq 3186; KNL-NEXT: LBB64_1: ## %bb.1 3187; KNL-NEXT: vzeroupper 3188; KNL-NEXT: callq _foo 3189; KNL-NEXT: popq %rax 3190; KNL-NEXT: retq 3191; 3192; SKX-LABEL: ktest_signed: 3193; SKX: ## %bb.0: 3194; SKX-NEXT: pushq %rax 3195; SKX-NEXT: .cfi_def_cfa_offset 16 3196; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 3197; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 3198; SKX-NEXT: kmovd %k0, %eax 3199; SKX-NEXT: testw %ax, %ax 3200; SKX-NEXT: jle LBB64_1 3201; SKX-NEXT: ## %bb.2: ## %bb.2 3202; SKX-NEXT: popq %rax 3203; SKX-NEXT: vzeroupper 3204; SKX-NEXT: retq 3205; SKX-NEXT: LBB64_1: ## %bb.1 3206; SKX-NEXT: vzeroupper 3207; SKX-NEXT: callq _foo 3208; SKX-NEXT: popq %rax 3209; SKX-NEXT: retq 3210; 3211; AVX512BW-LABEL: ktest_signed: 3212; AVX512BW: ## %bb.0: 3213; AVX512BW-NEXT: pushq %rax 3214; AVX512BW-NEXT: .cfi_def_cfa_offset 16 3215; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 3216; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 3217; AVX512BW-NEXT: kmovd %k0, %eax 3218; AVX512BW-NEXT: testw %ax, %ax 3219; AVX512BW-NEXT: jle LBB64_1 3220; AVX512BW-NEXT: ## %bb.2: ## %bb.2 3221; AVX512BW-NEXT: popq %rax 3222; AVX512BW-NEXT: vzeroupper 3223; AVX512BW-NEXT: retq 3224; AVX512BW-NEXT: LBB64_1: ## %bb.1 3225; AVX512BW-NEXT: vzeroupper 3226; AVX512BW-NEXT: callq _foo 3227; AVX512BW-NEXT: popq %rax 3228; AVX512BW-NEXT: retq 3229; 3230; AVX512DQ-LABEL: ktest_signed: 3231; AVX512DQ: ## %bb.0: 3232; AVX512DQ-NEXT: pushq %rax 3233; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 3234; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 3235; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 3236; AVX512DQ-NEXT: kmovw %k0, %eax 3237; AVX512DQ-NEXT: testw %ax, %ax 3238; AVX512DQ-NEXT: jle LBB64_1 3239; AVX512DQ-NEXT: ## %bb.2: ## %bb.2 3240; AVX512DQ-NEXT: popq %rax 3241; AVX512DQ-NEXT: vzeroupper 3242; AVX512DQ-NEXT: retq 3243; AVX512DQ-NEXT: LBB64_1: ## %bb.1 3244; AVX512DQ-NEXT: vzeroupper 3245; AVX512DQ-NEXT: callq _foo 3246; AVX512DQ-NEXT: popq %rax 3247; AVX512DQ-NEXT: retq 3248; 3249; X86-LABEL: ktest_signed: 3250; X86: ## %bb.0: 3251; X86-NEXT: subl $12, %esp 3252; X86-NEXT: .cfi_def_cfa_offset 16 3253; X86-NEXT: vporq %zmm1, %zmm0, %zmm0 3254; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 3255; X86-NEXT: kmovd %k0, %eax 3256; X86-NEXT: testw %ax, %ax 3257; X86-NEXT: jle LBB64_1 3258; X86-NEXT: ## %bb.2: ## %bb.2 3259; X86-NEXT: addl $12, %esp 3260; X86-NEXT: vzeroupper 3261; X86-NEXT: retl 3262; X86-NEXT: LBB64_1: ## %bb.1 3263; X86-NEXT: vzeroupper 3264; X86-NEXT: calll _foo 3265; X86-NEXT: addl $12, %esp 3266; X86-NEXT: retl 3267 %a = icmp eq <16 x i32> %x, zeroinitializer 3268 %b = icmp eq <16 x i32> %y, zeroinitializer 3269 %c = and <16 x i1> %a, %b 3270 %d = bitcast <16 x i1> %c to i16 3271 %e = icmp sgt i16 %d, 0 3272 br i1 %e, label %bb.2, label %bb.1 3273bb.1: 3274 call void @foo() 3275 br label %bb.2 3276bb.2: 3277 ret void 3278} 3279declare void @foo() 3280 3281; Make sure we can use the C flag from kortest to check for all ones. 3282define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) { 3283; CHECK-LABEL: ktest_allones: 3284; CHECK: ## %bb.0: 3285; CHECK-NEXT: pushq %rax 3286; CHECK-NEXT: .cfi_def_cfa_offset 16 3287; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 3288; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0 3289; CHECK-NEXT: kortestw %k0, %k0 3290; CHECK-NEXT: jb LBB65_2 3291; CHECK-NEXT: ## %bb.1: ## %bb.1 3292; CHECK-NEXT: vzeroupper 3293; CHECK-NEXT: callq _foo 3294; CHECK-NEXT: LBB65_2: ## %bb.2 3295; CHECK-NEXT: popq %rax 3296; CHECK-NEXT: vzeroupper 3297; CHECK-NEXT: retq 3298; 3299; X86-LABEL: ktest_allones: 3300; X86: ## %bb.0: 3301; X86-NEXT: subl $12, %esp 3302; X86-NEXT: .cfi_def_cfa_offset 16 3303; X86-NEXT: vporq %zmm1, %zmm0, %zmm0 3304; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 3305; X86-NEXT: kortestw %k0, %k0 3306; X86-NEXT: jb LBB65_2 3307; X86-NEXT: ## %bb.1: ## %bb.1 3308; X86-NEXT: vzeroupper 3309; X86-NEXT: calll _foo 3310; X86-NEXT: LBB65_2: ## %bb.2 3311; X86-NEXT: addl $12, %esp 3312; X86-NEXT: vzeroupper 3313; X86-NEXT: retl 3314 %a = icmp eq <16 x i32> %x, zeroinitializer 3315 %b = icmp eq <16 x i32> %y, zeroinitializer 3316 %c = and <16 x i1> %a, %b 3317 %d = bitcast <16 x i1> %c to i16 3318 %e = icmp eq i16 %d, -1 3319 br i1 %e, label %bb.2, label %bb.1 3320bb.1: 3321 call void @foo() 3322 br label %bb.2 3323bb.2: 3324 ret void 3325} 3326 3327; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask. 3328; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this. 3329define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) { 3330; KNL-LABEL: mask_widening: 3331; KNL: ## %bb.0: ## %entry 3332; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 3333; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 3334; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 3335; KNL-NEXT: kshiftlw $12, %k0, %k0 3336; KNL-NEXT: kshiftrw $12, %k0, %k1 3337; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} 3338; KNL-NEXT: retq 3339; 3340; SKX-LABEL: mask_widening: 3341; SKX: ## %bb.0: ## %entry 3342; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 3343; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} 3344; SKX-NEXT: retq 3345; 3346; AVX512BW-LABEL: mask_widening: 3347; AVX512BW: ## %bb.0: ## %entry 3348; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 3349; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 3350; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 3351; AVX512BW-NEXT: kshiftlw $12, %k0, %k0 3352; AVX512BW-NEXT: kshiftrw $12, %k0, %k1 3353; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} 3354; AVX512BW-NEXT: retq 3355; 3356; AVX512DQ-LABEL: mask_widening: 3357; AVX512DQ: ## %bb.0: ## %entry 3358; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 3359; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 3360; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 3361; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0 3362; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1 3363; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} 3364; AVX512DQ-NEXT: retq 3365; 3366; X86-LABEL: mask_widening: 3367; X86: ## %bb.0: ## %entry 3368; X86-NEXT: pushl %ebp 3369; X86-NEXT: .cfi_def_cfa_offset 8 3370; X86-NEXT: .cfi_offset %ebp, -8 3371; X86-NEXT: movl %esp, %ebp 3372; X86-NEXT: .cfi_def_cfa_register %ebp 3373; X86-NEXT: andl $-64, %esp 3374; X86-NEXT: subl $64, %esp 3375; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 3376; X86-NEXT: vmovdqa64 8(%ebp), %zmm0 3377; X86-NEXT: vmovdqa32 72(%ebp), %zmm0 {%k1} 3378; X86-NEXT: movl %ebp, %esp 3379; X86-NEXT: popl %ebp 3380; X86-NEXT: retl 3381entry: 3382 %0 = bitcast <2 x i64> %a to <4 x i32> 3383 %1 = bitcast <2 x i64> %b to <4 x i32> 3384 %2 = icmp eq <4 x i32> %0, %1 3385 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3386 %4 = bitcast <8 x i64> %f to <16 x i32> 3387 %5 = bitcast <8 x i64> %e to <16 x i32> 3388 %6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 3389 %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5 3390 %8 = bitcast <16 x i32> %7 to <8 x i64> 3391 ret <8 x i64> %8 3392} 3393 3394define void @store_v64i1_constant(<64 x i1>* %R) { 3395; CHECK-LABEL: store_v64i1_constant: 3396; CHECK: ## %bb.0: ## %entry 3397; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD 3398; CHECK-NEXT: movq %rax, (%rdi) 3399; CHECK-NEXT: retq 3400; 3401; X86-LABEL: store_v64i1_constant: 3402; X86: ## %bb.0: ## %entry 3403; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3404; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B 3405; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD 3406; X86-NEXT: retl 3407entry: 3408 store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R 3409 ret void 3410} 3411 3412define void @store_v2i1_constant(<2 x i1>* %R) { 3413; CHECK-LABEL: store_v2i1_constant: 3414; CHECK: ## %bb.0: ## %entry 3415; CHECK-NEXT: movb $1, (%rdi) 3416; CHECK-NEXT: retq 3417; 3418; X86-LABEL: store_v2i1_constant: 3419; X86: ## %bb.0: ## %entry 3420; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3421; X86-NEXT: movb $1, (%eax) 3422; X86-NEXT: retl 3423entry: 3424 store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R 3425 ret void 3426} 3427 3428define void @store_v4i1_constant(<4 x i1>* %R) { 3429; CHECK-LABEL: store_v4i1_constant: 3430; CHECK: ## %bb.0: ## %entry 3431; CHECK-NEXT: movb $5, (%rdi) 3432; CHECK-NEXT: retq 3433; 3434; X86-LABEL: store_v4i1_constant: 3435; X86: ## %bb.0: ## %entry 3436; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3437; X86-NEXT: movb $5, (%eax) 3438; X86-NEXT: retl 3439entry: 3440 store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R 3441 ret void 3442} 3443 3444; Make sure we bring the -1 constant into the mask domain. 3445define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) { 3446; CHECK-LABEL: mask_not_cast: 3447; CHECK: ## %bb.0: 3448; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1 3449; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1} 3450; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} 3451; CHECK-NEXT: vzeroupper 3452; CHECK-NEXT: retq 3453; 3454; X86-LABEL: mask_not_cast: 3455; X86: ## %bb.0: 3456; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3457; X86-NEXT: vpcmpnleud %zmm3, %zmm2, %k1 3458; X86-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1} 3459; X86-NEXT: vmovdqu32 %zmm0, (%eax) {%k1} 3460; X86-NEXT: vzeroupper 3461; X86-NEXT: retl 3462 %6 = and <8 x i64> %2, %1 3463 %7 = bitcast <8 x i64> %6 to <16 x i32> 3464 %8 = icmp ne <16 x i32> %7, zeroinitializer 3465 %9 = bitcast <16 x i1> %8 to i16 3466 %10 = bitcast <8 x i64> %3 to <16 x i32> 3467 %11 = bitcast <8 x i64> %4 to <16 x i32> 3468 %12 = icmp ule <16 x i32> %10, %11 3469 %13 = bitcast <16 x i1> %12 to i16 3470 %14 = xor i16 %13, -1 3471 %15 = and i16 %14, %9 3472 %16 = bitcast <8 x i64> %1 to <16 x i32> 3473 %17 = bitcast i8* %0 to <16 x i32>* 3474 %18 = bitcast i16 %15 to <16 x i1> 3475 tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2 3476 ret void 3477} 3478declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) 3479