1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 3; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 4 5define i16 @mask16(i16 %x) { 6; CHECK-LABEL: mask16: 7; CHECK: ## BB#0: 8; CHECK-NEXT: kmovw %edi, %k0 9; CHECK-NEXT: knotw %k0, %k0 10; CHECK-NEXT: kmovw %k0, %eax 11; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 12; CHECK-NEXT: retq 13 %m0 = bitcast i16 %x to <16 x i1> 14 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 15 %ret = bitcast <16 x i1> %m1 to i16 16 ret i16 %ret 17} 18 19define i32 @mask16_zext(i16 %x) { 20; CHECK-LABEL: mask16_zext: 21; CHECK: ## BB#0: 22; CHECK-NEXT: kmovw %edi, %k0 23; CHECK-NEXT: knotw %k0, %k0 24; CHECK-NEXT: kmovw %k0, %eax 25; CHECK-NEXT: retq 26 %m0 = bitcast i16 %x to <16 x i1> 27 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 28 %m2 = bitcast <16 x i1> %m1 to i16 29 %ret = zext i16 %m2 to i32 30 ret i32 %ret 31} 32 33define i8 @mask8(i8 %x) { 34; KNL-LABEL: mask8: 35; KNL: ## BB#0: 36; KNL-NEXT: kmovw %edi, %k0 37; KNL-NEXT: knotw %k0, %k0 38; KNL-NEXT: kmovw %k0, %eax 39; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 40; KNL-NEXT: retq 41; 42; SKX-LABEL: mask8: 43; SKX: ## BB#0: 44; SKX-NEXT: kmovb %edi, %k0 45; SKX-NEXT: knotb %k0, %k0 46; SKX-NEXT: kmovb %k0, %eax 47; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 48; SKX-NEXT: retq 49 %m0 = bitcast i8 %x to <8 x i1> 50 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 51 %ret = bitcast <8 x i1> %m1 to i8 52 ret i8 %ret 53} 54 55define i32 @mask8_zext(i8 %x) { 56; KNL-LABEL: mask8_zext: 57; KNL: ## BB#0: 58; KNL-NEXT: kmovw %edi, %k0 59; KNL-NEXT: knotw %k0, %k0 60; KNL-NEXT: kmovw %k0, %eax 61; KNL-NEXT: retq 62; 63; SKX-LABEL: mask8_zext: 64; SKX: ## BB#0: 65; SKX-NEXT: kmovb %edi, %k0 66; SKX-NEXT: knotb %k0, %k0 67; SKX-NEXT: kmovb %k0, %eax 68; SKX-NEXT: retq 69 %m0 = bitcast i8 %x to <8 x i1> 70 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 71 %m2 = bitcast <8 x i1> %m1 to i8 72 %ret = zext i8 %m2 to i32 73 ret i32 %ret 74} 75 76define void @mask16_mem(i16* %ptr) { 77; CHECK-LABEL: mask16_mem: 78; CHECK: ## BB#0: 79; CHECK-NEXT: kmovw (%rdi), %k0 80; CHECK-NEXT: knotw %k0, %k0 81; CHECK-NEXT: kmovw %k0, (%rdi) 82; CHECK-NEXT: retq 83 %x = load i16, i16* %ptr, align 4 84 %m0 = bitcast i16 %x to <16 x i1> 85 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 86 %ret = bitcast <16 x i1> %m1 to i16 87 store i16 %ret, i16* %ptr, align 4 88 ret void 89} 90 91define void @mask8_mem(i8* %ptr) { 92; KNL-LABEL: mask8_mem: 93; KNL: ## BB#0: 94; KNL-NEXT: movzbl (%rdi), %eax 95; KNL-NEXT: kmovw %eax, %k0 96; KNL-NEXT: knotw %k0, %k0 97; KNL-NEXT: kmovw %k0, %eax 98; KNL-NEXT: movb %al, (%rdi) 99; KNL-NEXT: retq 100; 101; SKX-LABEL: mask8_mem: 102; SKX: ## BB#0: 103; SKX-NEXT: kmovb (%rdi), %k0 104; SKX-NEXT: knotb %k0, %k0 105; SKX-NEXT: kmovb %k0, (%rdi) 106; SKX-NEXT: retq 107 %x = load i8, i8* %ptr, align 4 108 %m0 = bitcast i8 %x to <8 x i1> 109 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 110 %ret = bitcast <8 x i1> %m1 to i8 111 store i8 %ret, i8* %ptr, align 4 112 ret void 113} 114 115define i16 @mand16(i16 %x, i16 %y) { 116; CHECK-LABEL: mand16: 117; CHECK: ## BB#0: 118; CHECK-NEXT: movl %edi, %eax 119; CHECK-NEXT: xorl %esi, %eax 120; CHECK-NEXT: andl %esi, %edi 121; CHECK-NEXT: orl %eax, %edi 122; CHECK-NEXT: movl %edi, %eax 123; CHECK-NEXT: retq 124 %ma = bitcast i16 %x to <16 x i1> 125 %mb = bitcast i16 %y to <16 x i1> 126 %mc = and <16 x i1> %ma, %mb 127 %md = xor <16 x i1> %ma, %mb 128 %me = or <16 x i1> %mc, %md 129 %ret = bitcast <16 x i1> %me to i16 130 ret i16 %ret 131} 132 133define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { 134; CHECK-LABEL: mand16_mem: 135; CHECK: ## BB#0: 136; CHECK-NEXT: kmovw (%rdi), %k0 137; CHECK-NEXT: kmovw (%rsi), %k1 138; CHECK-NEXT: kandw %k1, %k0, %k2 139; CHECK-NEXT: kxorw %k1, %k0, %k0 140; CHECK-NEXT: korw %k0, %k2, %k0 141; CHECK-NEXT: kmovw %k0, %eax 142; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 143; CHECK-NEXT: retq 144 %ma = load <16 x i1>, <16 x i1>* %x 145 %mb = load <16 x i1>, <16 x i1>* %y 146 %mc = and <16 x i1> %ma, %mb 147 %md = xor <16 x i1> %ma, %mb 148 %me = or <16 x i1> %mc, %md 149 %ret = bitcast <16 x i1> %me to i16 150 ret i16 %ret 151} 152 153define i8 @shuf_test1(i16 %v) nounwind { 154; KNL-LABEL: shuf_test1: 155; KNL: ## BB#0: 156; KNL-NEXT: kmovw %edi, %k0 157; KNL-NEXT: kshiftrw $8, %k0, %k0 158; KNL-NEXT: kmovw %k0, %eax 159; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 160; KNL-NEXT: retq 161; 162; SKX-LABEL: shuf_test1: 163; SKX: ## BB#0: 164; SKX-NEXT: kmovw %edi, %k0 165; SKX-NEXT: kshiftrw $8, %k0, %k0 166; SKX-NEXT: kmovb %k0, %eax 167; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 168; SKX-NEXT: retq 169 %v1 = bitcast i16 %v to <16 x i1> 170 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 171 %mask1 = bitcast <8 x i1> %mask to i8 172 ret i8 %mask1 173} 174 175define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { 176; CHECK-LABEL: zext_test1: 177; CHECK: ## BB#0: 178; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 179; CHECK-NEXT: kshiftlw $10, %k0, %k0 180; CHECK-NEXT: kshiftrw $15, %k0, %k0 181; CHECK-NEXT: kmovw %k0, %eax 182; CHECK-NEXT: retq 183 %cmp_res = icmp ugt <16 x i32> %a, %b 184 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 185 %res = zext i1 %cmp_res.i1 to i32 186 ret i32 %res 187} 188 189define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { 190; CHECK-LABEL: zext_test2: 191; CHECK: ## BB#0: 192; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 193; CHECK-NEXT: kshiftlw $10, %k0, %k0 194; CHECK-NEXT: kshiftrw $15, %k0, %k0 195; CHECK-NEXT: kmovw %k0, %eax 196; CHECK-NEXT: retq 197 %cmp_res = icmp ugt <16 x i32> %a, %b 198 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 199 %res = zext i1 %cmp_res.i1 to i16 200 ret i16 %res 201} 202 203define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { 204; CHECK-LABEL: zext_test3: 205; CHECK: ## BB#0: 206; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 207; CHECK-NEXT: kshiftlw $10, %k0, %k0 208; CHECK-NEXT: kshiftrw $15, %k0, %k0 209; CHECK-NEXT: kmovw %k0, %eax 210; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 211; CHECK-NEXT: retq 212 %cmp_res = icmp ugt <16 x i32> %a, %b 213 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 214 %res = zext i1 %cmp_res.i1 to i8 215 ret i8 %res 216} 217 218define i8 @conv1(<8 x i1>* %R) { 219; KNL-LABEL: conv1: 220; KNL: ## BB#0: ## %entry 221; KNL-NEXT: kxnorw %k0, %k0, %k0 222; KNL-NEXT: kmovw %k0, %eax 223; KNL-NEXT: movb %al, (%rdi) 224; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp) 225; KNL-NEXT: movb $-2, %al 226; KNL-NEXT: retq 227; 228; SKX-LABEL: conv1: 229; SKX: ## BB#0: ## %entry 230; SKX-NEXT: kxnorw %k0, %k0, %k0 231; SKX-NEXT: kmovb %k0, (%rdi) 232; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) 233; SKX-NEXT: movb $-2, %al 234; SKX-NEXT: retq 235entry: 236 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R 237 238 %maskPtr = alloca <8 x i1> 239 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr 240 %mask = load <8 x i1>, <8 x i1>* %maskPtr 241 %mask_convert = bitcast <8 x i1> %mask to i8 242 ret i8 %mask_convert 243} 244 245define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { 246; KNL-LABEL: test4: 247; KNL: ## BB#0: 248; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 249; KNL-NEXT: vpmovqd %zmm0, %ymm0 250; KNL-NEXT: vpslld $31, %xmm0, %xmm0 251; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 252; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 253; KNL-NEXT: vpmovqd %zmm1, %ymm1 254; KNL-NEXT: vpslld $31, %xmm1, %xmm1 255; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 256; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 257; KNL-NEXT: retq 258; 259; SKX-LABEL: test4: 260; SKX: ## BB#0: 261; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 262; SKX-NEXT: knotw %k0, %k1 263; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 264; SKX-NEXT: vpmovm2d %k0, %xmm0 265; SKX-NEXT: retq 266 %x_gt_y = icmp sgt <4 x i64> %x, %y 267 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 268 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 269 %resse = sext <4 x i1>%res to <4 x i32> 270 ret <4 x i32> %resse 271} 272 273define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { 274; KNL-LABEL: test5: 275; KNL: ## BB#0: 276; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 277; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 278; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 279; KNL-NEXT: retq 280; 281; SKX-LABEL: test5: 282; SKX: ## BB#0: 283; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 284; SKX-NEXT: knotw %k0, %k1 285; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1} 286; SKX-NEXT: vpmovm2q %k0, %xmm0 287; SKX-NEXT: retq 288 %x_gt_y = icmp slt <2 x i64> %x, %y 289 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 290 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 291 %resse = sext <2 x i1>%res to <2 x i64> 292 ret <2 x i64> %resse 293}define void @test6(<16 x i1> %mask) { 294allocas: 295 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 296 %b = bitcast <16 x i1> %a to i16 297 %c = icmp eq i16 %b, 0 298 br i1 %c, label %true, label %false 299 300true: 301 ret void 302 303false: 304 ret void 305} 306define void @test7(<8 x i1> %mask) { 307; KNL-LABEL: test7: 308; KNL: ## BB#0: ## %allocas 309; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 310; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 311; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 312; KNL-NEXT: movb $85, %al 313; KNL-NEXT: kmovw %eax, %k1 314; KNL-NEXT: korw %k1, %k0, %k0 315; KNL-NEXT: kmovw %k0, %eax 316; KNL-NEXT: testb %al, %al 317; KNL-NEXT: retq 318; 319; SKX-LABEL: test7: 320; SKX: ## BB#0: ## %allocas 321; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 322; SKX-NEXT: vpmovw2m %xmm0, %k0 323; SKX-NEXT: movb $85, %al 324; SKX-NEXT: kmovb %eax, %k1 325; SKX-NEXT: korb %k1, %k0, %k0 326; SKX-NEXT: ktestb %k0, %k0 327; SKX-NEXT: retq 328allocas: 329 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 330 %b = bitcast <8 x i1> %a to i8 331 %c = icmp eq i8 %b, 0 332 br i1 %c, label %true, label %false 333 334true: 335 ret void 336 337false: 338 ret void 339} 340define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { 341; KNL-LABEL: test8: 342; KNL: ## BB#0: 343; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2 344; KNL-NEXT: cmpl %esi, %edi 345; KNL-NEXT: jg LBB17_1 346; KNL-NEXT: ## BB#2: 347; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 348; KNL-NEXT: jmp LBB17_3 349; KNL-NEXT: LBB17_1: 350; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 351; KNL-NEXT: LBB17_3: 352; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 353; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 354; KNL-NEXT: vpmovdb %zmm0, %xmm0 355; KNL-NEXT: retq 356; 357; SKX-LABEL: test8: 358; SKX: ## BB#0: 359; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2 360; SKX-NEXT: cmpl %esi, %edi 361; SKX-NEXT: jg LBB17_1 362; SKX-NEXT: ## BB#2: 363; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 364; SKX-NEXT: vpmovm2b %k0, %xmm0 365; SKX-NEXT: retq 366; SKX-NEXT: LBB17_1: 367; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 368; SKX-NEXT: vpmovm2b %k0, %xmm0 369; SKX-NEXT: retq 370 %cond = icmp sgt i32 %a1, %b1 371 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer 372 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer 373 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 374 %res = sext <16 x i1> %mix to <16 x i8> 375 ret <16 x i8> %res 376} 377define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { 378; KNL-LABEL: test9: 379; KNL: ## BB#0: 380; KNL-NEXT: cmpl %esi, %edi 381; KNL-NEXT: jg LBB18_1 382; KNL-NEXT: ## BB#2: 383; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 384; KNL-NEXT: jmp LBB18_3 385; KNL-NEXT: LBB18_1: 386; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 387; KNL-NEXT: LBB18_3: 388; KNL-NEXT: vpslld $31, %zmm0, %zmm0 389; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 390; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 391; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 392; KNL-NEXT: vpmovdb %zmm0, %xmm0 393; KNL-NEXT: retq 394; 395; SKX-LABEL: test9: 396; SKX: ## BB#0: 397; SKX-NEXT: cmpl %esi, %edi 398; SKX-NEXT: jg LBB18_1 399; SKX-NEXT: ## BB#2: 400; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 401; SKX-NEXT: jmp LBB18_3 402; SKX-NEXT: LBB18_1: 403; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 404; SKX-NEXT: LBB18_3: 405; SKX-NEXT: vpmovb2m %xmm0, %k0 406; SKX-NEXT: vpmovm2b %k0, %xmm0 407; SKX-NEXT: retq 408 %mask = icmp sgt i32 %a1, %b1 409 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b 410 ret <16 x i1>%c 411}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { 412 %mask = icmp sgt i32 %a1, %b1 413 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b 414 ret <8 x i1>%c 415} 416 417define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { 418; KNL-LABEL: test11: 419; KNL: ## BB#0: 420; KNL-NEXT: cmpl %esi, %edi 421; KNL-NEXT: jg LBB20_2 422; KNL-NEXT: ## BB#1: 423; KNL-NEXT: vmovaps %zmm1, %zmm0 424; KNL-NEXT: LBB20_2: 425; KNL-NEXT: retq 426; 427; SKX-LABEL: test11: 428; SKX: ## BB#0: 429; SKX-NEXT: cmpl %esi, %edi 430; SKX-NEXT: jg LBB20_1 431; SKX-NEXT: ## BB#2: 432; SKX-NEXT: vpslld $31, %xmm1, %xmm0 433; SKX-NEXT: jmp LBB20_3 434; SKX-NEXT: LBB20_1: 435; SKX-NEXT: vpslld $31, %xmm0, %xmm0 436; SKX-NEXT: LBB20_3: 437; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 438; SKX-NEXT: vpmovm2d %k0, %xmm0 439; SKX-NEXT: retq 440 %mask = icmp sgt i32 %a1, %b1 441 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b 442 ret <4 x i1>%c 443} 444 445define i32 @test12(i32 %x, i32 %y) { 446; CHECK-LABEL: test12: 447; CHECK: ## BB#0: 448; CHECK-NEXT: movl %edi, %eax 449; CHECK-NEXT: retq 450 %a = bitcast i16 21845 to <16 x i1> 451 %b = extractelement <16 x i1> %a, i32 0 452 %c = select i1 %b, i32 %x, i32 %y 453 ret i32 %c 454} 455 456define i32 @test13(i32 %x, i32 %y) { 457; CHECK-LABEL: test13: 458; CHECK: ## BB#0: 459; CHECK-NEXT: movl %esi, %eax 460; CHECK-NEXT: retq 461 %a = bitcast i16 21845 to <16 x i1> 462 %b = extractelement <16 x i1> %a, i32 3 463 %c = select i1 %b, i32 %x, i32 %y 464 ret i32 %c 465}define <4 x i1> @test14() { 466 %a = bitcast i16 21845 to <16 x i1> 467 %b = extractelement <16 x i1> %a, i32 2 468 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1 469 ret <4 x i1> %c 470} 471 472define <16 x i1> @test15(i32 %x, i32 %y) { 473; KNL-LABEL: test15: 474; KNL: ## BB#0: 475; KNL-NEXT: cmpl %esi, %edi 476; KNL-NEXT: movw $21845, %ax ## imm = 0x5555 477; KNL-NEXT: movw $1, %cx 478; KNL-NEXT: cmovgw %ax, %cx 479; KNL-NEXT: kmovw %ecx, %k1 480; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 481; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 482; KNL-NEXT: vpmovdb %zmm0, %xmm0 483; KNL-NEXT: retq 484; 485; SKX-LABEL: test15: 486; SKX: ## BB#0: 487; SKX-NEXT: cmpl %esi, %edi 488; SKX-NEXT: movw $21845, %ax ## imm = 0x5555 489; SKX-NEXT: movw $1, %cx 490; SKX-NEXT: cmovgw %ax, %cx 491; SKX-NEXT: kmovw %ecx, %k0 492; SKX-NEXT: vpmovm2b %k0, %xmm0 493; SKX-NEXT: retq 494 %a = bitcast i16 21845 to <16 x i1> 495 %b = bitcast i16 1 to <16 x i1> 496 %mask = icmp sgt i32 %x, %y 497 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b 498 ret <16 x i1> %c 499} 500 501define <64 x i8> @test16(i64 %x) { 502; 503; KNL-LABEL: test16: 504; KNL: ## BB#0: 505; KNL-NEXT: pushq %rbp 506; KNL-NEXT: Ltmp0: 507; KNL-NEXT: .cfi_def_cfa_offset 16 508; KNL-NEXT: Ltmp1: 509; KNL-NEXT: .cfi_offset %rbp, -16 510; KNL-NEXT: movq %rsp, %rbp 511; KNL-NEXT: Ltmp2: 512; KNL-NEXT: .cfi_def_cfa_register %rbp 513; KNL-NEXT: andq $-32, %rsp 514; KNL-NEXT: subq $64, %rsp 515; KNL-NEXT: movl %edi, (%rsp) 516; KNL-NEXT: shrq $32, %rdi 517; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) 518; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 519; KNL-NEXT: kmovw (%rsp), %k1 520; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} 521; KNL-NEXT: vpmovdb %zmm1, %xmm1 522; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 523; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} 524; KNL-NEXT: vpmovdb %zmm2, %xmm2 525; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2 526; KNL-NEXT: movl $1, %eax 527; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 528; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7] 529; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 530; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} 531; KNL-NEXT: vpmovdb %zmm1, %xmm1 532; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 533; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 534; KNL-NEXT: vpmovdb %zmm0, %xmm0 535; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 536; KNL-NEXT: vpsllw $7, %ymm2, %ymm0 537; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 538; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 539; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 540; KNL-NEXT: movq %rbp, %rsp 541; KNL-NEXT: popq %rbp 542; KNL-NEXT: retq 543; 544; SKX-LABEL: test16: 545; SKX: ## BB#0: 546; SKX-NEXT: kmovq %rdi, %k0 547; SKX-NEXT: kxnorw %k0, %k0, %k1 548; SKX-NEXT: kshiftrw $15, %k1, %k1 549; SKX-NEXT: kshiftlq $5, %k1, %k1 550; SKX-NEXT: korq %k1, %k0, %k0 551; SKX-NEXT: vpmovm2b %k0, %zmm0 552; SKX-NEXT: retq 553 %a = bitcast i64 %x to <64 x i1> 554 %b = insertelement <64 x i1>%a, i1 true, i32 5 555 %c = sext <64 x i1>%b to <64 x i8> 556 ret <64 x i8>%c 557} 558 559define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { 560; 561; KNL-LABEL: test17: 562; KNL: ## BB#0: 563; KNL-NEXT: pushq %rbp 564; KNL-NEXT: Ltmp3: 565; KNL-NEXT: .cfi_def_cfa_offset 16 566; KNL-NEXT: Ltmp4: 567; KNL-NEXT: .cfi_offset %rbp, -16 568; KNL-NEXT: movq %rsp, %rbp 569; KNL-NEXT: Ltmp5: 570; KNL-NEXT: .cfi_def_cfa_register %rbp 571; KNL-NEXT: andq $-32, %rsp 572; KNL-NEXT: subq $64, %rsp 573; KNL-NEXT: movl %edi, (%rsp) 574; KNL-NEXT: shrq $32, %rdi 575; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) 576; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 577; KNL-NEXT: kmovw (%rsp), %k1 578; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 579; KNL-NEXT: vpmovdb %zmm0, %xmm0 580; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 581; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} 582; KNL-NEXT: vpmovdb %zmm2, %xmm2 583; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 584; KNL-NEXT: xorl %eax, %eax 585; KNL-NEXT: cmpl %edx, %esi 586; KNL-NEXT: setg %al 587; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 588; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] 589; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 590; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 591; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 592; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 593; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 594; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} 595; KNL-NEXT: vpmovdb %zmm2, %xmm2 596; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 597; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 598; KNL-NEXT: vpmovdb %zmm1, %xmm1 599; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 600; KNL-NEXT: movq %rbp, %rsp 601; KNL-NEXT: popq %rbp 602; KNL-NEXT: retq 603; 604; SKX-LABEL: test17: 605; SKX: ## BB#0: 606; SKX-NEXT: kmovq %rdi, %k0 607; SKX-NEXT: cmpl %edx, %esi 608; SKX-NEXT: setg %al 609; SKX-NEXT: kmovw %eax, %k1 610; SKX-NEXT: kshiftlq $5, %k1, %k1 611; SKX-NEXT: korq %k1, %k0, %k0 612; SKX-NEXT: vpmovm2b %k0, %zmm0 613; SKX-NEXT: retq 614 %a = bitcast i64 %x to <64 x i1> 615 %b = icmp sgt i32 %y, %z 616 %c = insertelement <64 x i1>%a, i1 %b, i32 5 617 %d = sext <64 x i1>%c to <64 x i8> 618 ret <64 x i8>%d 619} 620 621define <8 x i1> @test18(i8 %a, i16 %y) { 622; KNL-LABEL: test18: 623; KNL: ## BB#0: 624; KNL-NEXT: kmovw %edi, %k0 625; KNL-NEXT: kmovw %esi, %k1 626; KNL-NEXT: kshiftlw $7, %k1, %k2 627; KNL-NEXT: kshiftrw $15, %k2, %k2 628; KNL-NEXT: kshiftlw $6, %k1, %k1 629; KNL-NEXT: kshiftrw $15, %k1, %k1 630; KNL-NEXT: kshiftlw $6, %k1, %k1 631; KNL-NEXT: korw %k1, %k0, %k0 632; KNL-NEXT: kshiftlw $7, %k2, %k1 633; KNL-NEXT: korw %k1, %k0, %k1 634; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 635; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 636; KNL-NEXT: vpmovqw %zmm0, %xmm0 637; KNL-NEXT: retq 638; 639; SKX-LABEL: test18: 640; SKX: ## BB#0: 641; SKX-NEXT: kmovb %edi, %k0 642; SKX-NEXT: kmovw %esi, %k1 643; SKX-NEXT: kshiftlw $6, %k1, %k2 644; SKX-NEXT: kshiftrw $15, %k2, %k2 645; SKX-NEXT: kshiftlw $7, %k1, %k1 646; SKX-NEXT: kshiftrw $15, %k1, %k1 647; SKX-NEXT: kshiftlb $7, %k1, %k1 648; SKX-NEXT: kshiftlb $6, %k2, %k2 649; SKX-NEXT: korb %k2, %k0, %k0 650; SKX-NEXT: korb %k1, %k0, %k0 651; SKX-NEXT: vpmovm2w %k0, %xmm0 652; SKX-NEXT: retq 653 %b = bitcast i8 %a to <8 x i1> 654 %b1 = bitcast i16 %y to <16 x i1> 655 %el1 = extractelement <16 x i1>%b1, i32 8 656 %el2 = extractelement <16 x i1>%b1, i32 9 657 %c = insertelement <8 x i1>%b, i1 %el1, i32 7 658 %d = insertelement <8 x i1>%c, i1 %el2, i32 6 659 ret <8 x i1>%d 660} 661define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { 662; KNL-LABEL: test21: 663; KNL: ## BB#0: 664; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 665; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 666; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 667; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 668; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 669; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 670; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 671; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 672; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 673; KNL-NEXT: retq 674; 675; SKX-LABEL: test21: 676; SKX: ## BB#0: 677; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 678; SKX-NEXT: vpmovb2m %ymm1, %k1 679; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 680; SKX-NEXT: retq 681 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 682 ret <32 x i16> %ret 683} 684 685define void @test22(<4 x i1> %a, <4 x i1>* %addr) { 686; KNL-LABEL: test22: 687; KNL: ## BB#0: 688; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> 689; KNL-NEXT: vpslld $31, %ymm0, %ymm0 690; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 691; KNL-NEXT: kmovw %k0, %eax 692; KNL-NEXT: movb %al, (%rdi) 693; KNL-NEXT: retq 694; 695; SKX-LABEL: test22: 696; SKX: ## BB#0: 697; SKX-NEXT: vpslld $31, %xmm0, %xmm0 698; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 699; SKX-NEXT: kmovb %k0, (%rdi) 700; SKX-NEXT: retq 701 store <4 x i1> %a, <4 x i1>* %addr 702 ret void 703} 704 705define void @test23(<2 x i1> %a, <2 x i1>* %addr) { 706; KNL-LABEL: test23: 707; KNL: ## BB#0: 708; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 709; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 710; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 711; KNL-NEXT: kmovw %k0, %eax 712; KNL-NEXT: movb %al, (%rdi) 713; KNL-NEXT: retq 714; 715; SKX-LABEL: test23: 716; SKX: ## BB#0: 717; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 718; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 719; SKX-NEXT: kmovb %k0, (%rdi) 720; SKX-NEXT: retq 721 store <2 x i1> %a, <2 x i1>* %addr 722 ret void 723} 724 725define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { 726; KNL-LABEL: store_v1i1: 727; KNL: ## BB#0: 728; KNL-NEXT: andl $1, %edi 729; KNL-NEXT: kmovw %edi, %k0 730; KNL-NEXT: kxnorw %k0, %k0, %k1 731; KNL-NEXT: kshiftrw $15, %k1, %k1 732; KNL-NEXT: kxorw %k1, %k0, %k0 733; KNL-NEXT: kmovw %k0, %eax 734; KNL-NEXT: movb %al, (%rsi) 735; KNL-NEXT: retq 736; 737; SKX-LABEL: store_v1i1: 738; SKX: ## BB#0: 739; SKX-NEXT: andl $1, %edi 740; SKX-NEXT: kmovw %edi, %k0 741; SKX-NEXT: kxnorw %k0, %k0, %k1 742; SKX-NEXT: kshiftrw $15, %k1, %k1 743; SKX-NEXT: kxorw %k1, %k0, %k0 744; SKX-NEXT: kmovb %k0, (%rsi) 745; SKX-NEXT: retq 746 %x = xor <1 x i1> %c, <i1 1> 747 store <1 x i1> %x, <1 x i1>* %ptr, align 4 748 ret void 749} 750 751define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { 752; KNL-LABEL: store_v2i1: 753; KNL: ## BB#0: 754; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 755; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 756; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 757; KNL-NEXT: kmovw %k0, %eax 758; KNL-NEXT: movb %al, (%rdi) 759; KNL-NEXT: retq 760; 761; SKX-LABEL: store_v2i1: 762; SKX: ## BB#0: 763; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 764; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 765; SKX-NEXT: knotw %k0, %k0 766; SKX-NEXT: kmovb %k0, (%rdi) 767; SKX-NEXT: retq 768 %x = xor <2 x i1> %c, <i1 1, i1 1> 769 store <2 x i1> %x, <2 x i1>* %ptr, align 4 770 ret void 771} 772 773define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { 774; KNL-LABEL: store_v4i1: 775; KNL: ## BB#0: 776; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 777; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 778; KNL-NEXT: vpslld $31, %ymm0, %ymm0 779; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 780; KNL-NEXT: kmovw %k0, %eax 781; KNL-NEXT: movb %al, (%rdi) 782; KNL-NEXT: retq 783; 784; SKX-LABEL: store_v4i1: 785; SKX: ## BB#0: 786; SKX-NEXT: vpslld $31, %xmm0, %xmm0 787; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 788; SKX-NEXT: knotw %k0, %k0 789; SKX-NEXT: kmovb %k0, (%rdi) 790; SKX-NEXT: retq 791 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1> 792 store <4 x i1> %x, <4 x i1>* %ptr, align 4 793 ret void 794} 795 796define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { 797; KNL-LABEL: store_v8i1: 798; KNL: ## BB#0: 799; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 800; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 801; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 802; KNL-NEXT: knotw %k0, %k0 803; KNL-NEXT: kmovw %k0, %eax 804; KNL-NEXT: movb %al, (%rdi) 805; KNL-NEXT: retq 806; 807; SKX-LABEL: store_v8i1: 808; SKX: ## BB#0: 809; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 810; SKX-NEXT: vpmovw2m %xmm0, %k0 811; SKX-NEXT: knotb %k0, %k0 812; SKX-NEXT: kmovb %k0, (%rdi) 813; SKX-NEXT: retq 814 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 815 store <8 x i1> %x, <8 x i1>* %ptr, align 4 816 ret void 817} 818 819define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { 820; KNL-LABEL: store_v16i1: 821; KNL: ## BB#0: 822; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 823; KNL-NEXT: vpslld $31, %zmm0, %zmm0 824; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 825; KNL-NEXT: knotw %k0, %k0 826; KNL-NEXT: kmovw %k0, (%rdi) 827; KNL-NEXT: retq 828; 829; SKX-LABEL: store_v16i1: 830; SKX: ## BB#0: 831; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 832; SKX-NEXT: vpmovb2m %xmm0, %k0 833; SKX-NEXT: knotw %k0, %k0 834; SKX-NEXT: kmovw %k0, (%rdi) 835; SKX-NEXT: retq 836 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 837 store <16 x i1> %x, <16 x i1>* %ptr, align 4 838 ret void 839} 840 841;void f2(int); 842;void f1(int c) 843;{ 844; static int v = 0; 845; if (v == 0) 846; v = 1; 847; else 848; v = 0; 849; f2(v); 850;} 851 852@f1.v = internal unnamed_addr global i1 false, align 4 853 854define void @f1(i32 %c) { 855; KNL-LABEL: f1: 856; KNL: ## BB#0: ## %entry 857; KNL-NEXT: movzbl {{.*}}(%rip), %edi 858; KNL-NEXT: movl %edi, %eax 859; KNL-NEXT: andl $1, %eax 860; KNL-NEXT: kmovw %eax, %k0 861; KNL-NEXT: kxnorw %k0, %k0, %k1 862; KNL-NEXT: kshiftrw $15, %k1, %k1 863; KNL-NEXT: kxorw %k1, %k0, %k0 864; KNL-NEXT: kmovw %k0, %eax 865; KNL-NEXT: movb %al, {{.*}}(%rip) 866; KNL-NEXT: xorl $1, %edi 867; KNL-NEXT: jmp _f2 ## TAILCALL 868; 869; SKX-LABEL: f1: 870; SKX: ## BB#0: ## %entry 871; SKX-NEXT: movzbl {{.*}}(%rip), %edi 872; SKX-NEXT: movl %edi, %eax 873; SKX-NEXT: andl $1, %eax 874; SKX-NEXT: kmovw %eax, %k0 875; SKX-NEXT: kxnorw %k0, %k0, %k1 876; SKX-NEXT: kshiftrw $15, %k1, %k1 877; SKX-NEXT: kxorw %k1, %k0, %k0 878; SKX-NEXT: kmovb %k0, {{.*}}(%rip) 879; SKX-NEXT: xorl $1, %edi 880; SKX-NEXT: jmp _f2 ## TAILCALL 881entry: 882 %.b1 = load i1, i1* @f1.v, align 4 883 %not..b1 = xor i1 %.b1, true 884 store i1 %not..b1, i1* @f1.v, align 4 885 %0 = zext i1 %not..b1 to i32 886 tail call void @f2(i32 %0) #2 887 ret void 888} 889 890declare void @f2(i32) #1 891 892define void @store_i16_i1(i16 %x, i1 *%y) { 893; CHECK-LABEL: store_i16_i1: 894; CHECK: ## BB#0: 895; CHECK-NEXT: andl $1, %edi 896; CHECK-NEXT: movb %dil, (%rsi) 897; CHECK-NEXT: retq 898 %c = trunc i16 %x to i1 899 store i1 %c, i1* %y 900 ret void 901} 902 903define void @store_i8_i1(i8 %x, i1 *%y) { 904; CHECK-LABEL: store_i8_i1: 905; CHECK: ## BB#0: 906; CHECK-NEXT: andl $1, %edi 907; CHECK-NEXT: movb %dil, (%rsi) 908; CHECK-NEXT: retq 909 %c = trunc i8 %x to i1 910 store i1 %c, i1* %y 911 ret void 912} 913 914define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { 915; KNL-LABEL: test_build_vec_v32i1: 916; KNL: ## BB#0: 917; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 918; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 919; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 920; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0 921; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 922; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 923; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 924; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 925; KNL-NEXT: retq 926; 927; SKX-LABEL: test_build_vec_v32i1: 928; SKX: ## BB#0: 929; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495 930; SKX-NEXT: kmovd %eax, %k1 931; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 932; SKX-NEXT: retq 933 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer 934 ret <32 x i16> %ret 935} 936 937define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { 938; KNL-LABEL: test_build_vec_v64i1: 939; KNL: ## BB#0: 940; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 941; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 942; KNL-NEXT: retq 943; 944; SKX-LABEL: test_build_vec_v64i1: 945; SKX: ## BB#0: 946; SKX-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544 947; SKX-NEXT: kmovq %rax, %k1 948; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} 949; SKX-NEXT: retq 950 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer 951 ret <64 x i8> %ret 952} 953 954define void @ktest_1(<8 x double> %in, double * %base) { 955; KNL-LABEL: ktest_1: 956; KNL: ## BB#0: 957; KNL-NEXT: vmovupd (%rdi), %zmm1 958; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1 959; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} 960; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 961; KNL-NEXT: kmovw %k0, %eax 962; KNL-NEXT: testb %al, %al 963; KNL-NEXT: je LBB41_2 964; KNL-NEXT: ## BB#1: ## %L1 965; KNL-NEXT: vmovapd %zmm0, (%rdi) 966; KNL-NEXT: retq 967; KNL-NEXT: LBB41_2: ## %L2 968; KNL-NEXT: vmovapd %zmm0, 8(%rdi) 969; KNL-NEXT: retq 970; 971; SKX-LABEL: ktest_1: 972; SKX: ## BB#0: 973; SKX-NEXT: vmovupd (%rdi), %zmm1 974; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 975; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} 976; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 977; SKX-NEXT: ktestb %k0, %k0 978; SKX-NEXT: je LBB41_2 979; SKX-NEXT: ## BB#1: ## %L1 980; SKX-NEXT: vmovapd %zmm0, (%rdi) 981; SKX-NEXT: retq 982; SKX-NEXT: LBB41_2: ## %L2 983; SKX-NEXT: vmovapd %zmm0, 8(%rdi) 984; SKX-NEXT: retq 985 %addr1 = getelementptr double, double * %base, i64 0 986 %addr2 = getelementptr double, double * %base, i64 1 987 988 %vaddr1 = bitcast double* %addr1 to <8 x double>* 989 %vaddr2 = bitcast double* %addr2 to <8 x double>* 990 991 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1 992 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1 993 994 %sel1 = fcmp ogt <8 x double>%in, %val1 995 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer 996 %sel2 = fcmp olt <8 x double> %in, %val3 997 %sel3 = and <8 x i1> %sel1, %sel2 998 999 %int_sel3 = bitcast <8 x i1> %sel3 to i8 1000 %res = icmp eq i8 %int_sel3, zeroinitializer 1001 br i1 %res, label %L2, label %L1 1002L1: 1003 store <8 x double> %in, <8 x double>* %vaddr1 1004 br label %End 1005L2: 1006 store <8 x double> %in, <8 x double>* %vaddr2 1007 br label %End 1008End: 1009 ret void 1010} 1011 1012define void @ktest_2(<32 x float> %in, float * %base) { 1013; 1014; KNL-LABEL: ktest_2: 1015; KNL: ## BB#0: 1016; KNL-NEXT: pushq %rbp 1017; KNL-NEXT: Ltmp6: 1018; KNL-NEXT: .cfi_def_cfa_offset 16 1019; KNL-NEXT: Ltmp7: 1020; KNL-NEXT: .cfi_offset %rbp, -16 1021; KNL-NEXT: movq %rsp, %rbp 1022; KNL-NEXT: Ltmp8: 1023; KNL-NEXT: .cfi_def_cfa_register %rbp 1024; KNL-NEXT: andq $-32, %rsp 1025; KNL-NEXT: subq $32, %rsp 1026; KNL-NEXT: vmovups (%rdi), %zmm2 1027; KNL-NEXT: vmovups 64(%rdi), %zmm3 1028; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 1029; KNL-NEXT: kshiftlw $14, %k1, %k0 1030; KNL-NEXT: kshiftrw $15, %k0, %k0 1031; KNL-NEXT: kmovw %k0, %eax 1032; KNL-NEXT: kshiftlw $15, %k1, %k0 1033; KNL-NEXT: kshiftrw $15, %k0, %k0 1034; KNL-NEXT: kmovw %k0, %ecx 1035; KNL-NEXT: vmovd %ecx, %xmm3 1036; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 1037; KNL-NEXT: kshiftlw $13, %k1, %k0 1038; KNL-NEXT: kshiftrw $15, %k0, %k0 1039; KNL-NEXT: kmovw %k0, %eax 1040; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 1041; KNL-NEXT: kshiftlw $12, %k1, %k0 1042; KNL-NEXT: kshiftrw $15, %k0, %k0 1043; KNL-NEXT: kmovw %k0, %eax 1044; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 1045; KNL-NEXT: kshiftlw $11, %k1, %k0 1046; KNL-NEXT: kshiftrw $15, %k0, %k0 1047; KNL-NEXT: kmovw %k0, %eax 1048; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 1049; KNL-NEXT: kshiftlw $10, %k1, %k0 1050; KNL-NEXT: kshiftrw $15, %k0, %k0 1051; KNL-NEXT: kmovw %k0, %eax 1052; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 1053; KNL-NEXT: kshiftlw $9, %k1, %k0 1054; KNL-NEXT: kshiftrw $15, %k0, %k0 1055; KNL-NEXT: kmovw %k0, %eax 1056; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 1057; KNL-NEXT: kshiftlw $8, %k1, %k0 1058; KNL-NEXT: kshiftrw $15, %k0, %k0 1059; KNL-NEXT: kmovw %k0, %eax 1060; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 1061; KNL-NEXT: kshiftlw $7, %k1, %k0 1062; KNL-NEXT: kshiftrw $15, %k0, %k0 1063; KNL-NEXT: kmovw %k0, %eax 1064; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 1065; KNL-NEXT: kshiftlw $6, %k1, %k0 1066; KNL-NEXT: kshiftrw $15, %k0, %k0 1067; KNL-NEXT: kmovw %k0, %eax 1068; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 1069; KNL-NEXT: kshiftlw $5, %k1, %k0 1070; KNL-NEXT: kshiftrw $15, %k0, %k0 1071; KNL-NEXT: kmovw %k0, %eax 1072; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 1073; KNL-NEXT: kshiftlw $4, %k1, %k0 1074; KNL-NEXT: kshiftrw $15, %k0, %k0 1075; KNL-NEXT: kmovw %k0, %eax 1076; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 1077; KNL-NEXT: kshiftlw $3, %k1, %k0 1078; KNL-NEXT: kshiftrw $15, %k0, %k0 1079; KNL-NEXT: kmovw %k0, %eax 1080; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 1081; KNL-NEXT: kshiftlw $2, %k1, %k0 1082; KNL-NEXT: kshiftrw $15, %k0, %k0 1083; KNL-NEXT: kmovw %k0, %eax 1084; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 1085; KNL-NEXT: kshiftlw $1, %k1, %k0 1086; KNL-NEXT: kshiftrw $15, %k0, %k0 1087; KNL-NEXT: kmovw %k0, %eax 1088; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1089; KNL-NEXT: kshiftlw $0, %k1, %k0 1090; KNL-NEXT: kshiftrw $15, %k0, %k0 1091; KNL-NEXT: kmovw %k0, %eax 1092; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 1093; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 1094; KNL-NEXT: kshiftlw $14, %k2, %k0 1095; KNL-NEXT: kshiftrw $15, %k0, %k0 1096; KNL-NEXT: kmovw %k0, %eax 1097; KNL-NEXT: kshiftlw $15, %k2, %k0 1098; KNL-NEXT: kshiftrw $15, %k0, %k0 1099; KNL-NEXT: kmovw %k0, %ecx 1100; KNL-NEXT: vmovd %ecx, %xmm2 1101; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 1102; KNL-NEXT: kshiftlw $13, %k2, %k0 1103; KNL-NEXT: kshiftrw $15, %k0, %k0 1104; KNL-NEXT: kmovw %k0, %eax 1105; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 1106; KNL-NEXT: kshiftlw $12, %k2, %k0 1107; KNL-NEXT: kshiftrw $15, %k0, %k0 1108; KNL-NEXT: kmovw %k0, %eax 1109; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 1110; KNL-NEXT: kshiftlw $11, %k2, %k0 1111; KNL-NEXT: kshiftrw $15, %k0, %k0 1112; KNL-NEXT: kmovw %k0, %eax 1113; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 1114; KNL-NEXT: kshiftlw $10, %k2, %k0 1115; KNL-NEXT: kshiftrw $15, %k0, %k0 1116; KNL-NEXT: kmovw %k0, %eax 1117; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 1118; KNL-NEXT: kshiftlw $9, %k2, %k0 1119; KNL-NEXT: kshiftrw $15, %k0, %k0 1120; KNL-NEXT: kmovw %k0, %eax 1121; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 1122; KNL-NEXT: kshiftlw $8, %k2, %k0 1123; KNL-NEXT: kshiftrw $15, %k0, %k0 1124; KNL-NEXT: kmovw %k0, %eax 1125; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 1126; KNL-NEXT: kshiftlw $7, %k2, %k0 1127; KNL-NEXT: kshiftrw $15, %k0, %k0 1128; KNL-NEXT: kmovw %k0, %eax 1129; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 1130; KNL-NEXT: kshiftlw $6, %k2, %k0 1131; KNL-NEXT: kshiftrw $15, %k0, %k0 1132; KNL-NEXT: kmovw %k0, %eax 1133; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 1134; KNL-NEXT: kshiftlw $5, %k2, %k0 1135; KNL-NEXT: kshiftrw $15, %k0, %k0 1136; KNL-NEXT: kmovw %k0, %eax 1137; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1138; KNL-NEXT: kshiftlw $4, %k2, %k0 1139; KNL-NEXT: kshiftrw $15, %k0, %k0 1140; KNL-NEXT: kmovw %k0, %eax 1141; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 1142; KNL-NEXT: kshiftlw $3, %k2, %k0 1143; KNL-NEXT: kshiftrw $15, %k0, %k0 1144; KNL-NEXT: kmovw %k0, %eax 1145; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1146; KNL-NEXT: kshiftlw $2, %k2, %k0 1147; KNL-NEXT: kshiftrw $15, %k0, %k0 1148; KNL-NEXT: kmovw %k0, %eax 1149; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1150; KNL-NEXT: kshiftlw $1, %k2, %k0 1151; KNL-NEXT: kshiftrw $15, %k0, %k0 1152; KNL-NEXT: kmovw %k0, %eax 1153; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1154; KNL-NEXT: kshiftlw $0, %k2, %k0 1155; KNL-NEXT: kshiftrw $15, %k0, %k0 1156; KNL-NEXT: kmovw %k0, %eax 1157; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 1158; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 1159; KNL-NEXT: vpsllw $7, %ymm2, %ymm2 1160; KNL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 1161; KNL-NEXT: vpxor %ymm3, %ymm3, %ymm3 1162; KNL-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2 1163; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} 1164; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} 1165; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0 1166; KNL-NEXT: kshiftlw $14, %k0, %k1 1167; KNL-NEXT: kshiftrw $15, %k1, %k1 1168; KNL-NEXT: kmovw %k1, %eax 1169; KNL-NEXT: kshiftlw $15, %k0, %k1 1170; KNL-NEXT: kshiftrw $15, %k1, %k1 1171; KNL-NEXT: kmovw %k1, %ecx 1172; KNL-NEXT: vmovd %ecx, %xmm4 1173; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 1174; KNL-NEXT: kshiftlw $13, %k0, %k1 1175; KNL-NEXT: kshiftrw $15, %k1, %k1 1176; KNL-NEXT: kmovw %k1, %eax 1177; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 1178; KNL-NEXT: kshiftlw $12, %k0, %k1 1179; KNL-NEXT: kshiftrw $15, %k1, %k1 1180; KNL-NEXT: kmovw %k1, %eax 1181; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 1182; KNL-NEXT: kshiftlw $11, %k0, %k1 1183; KNL-NEXT: kshiftrw $15, %k1, %k1 1184; KNL-NEXT: kmovw %k1, %eax 1185; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 1186; KNL-NEXT: kshiftlw $10, %k0, %k1 1187; KNL-NEXT: kshiftrw $15, %k1, %k1 1188; KNL-NEXT: kmovw %k1, %eax 1189; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 1190; KNL-NEXT: kshiftlw $9, %k0, %k1 1191; KNL-NEXT: kshiftrw $15, %k1, %k1 1192; KNL-NEXT: kmovw %k1, %eax 1193; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 1194; KNL-NEXT: kshiftlw $8, %k0, %k1 1195; KNL-NEXT: kshiftrw $15, %k1, %k1 1196; KNL-NEXT: kmovw %k1, %eax 1197; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 1198; KNL-NEXT: kshiftlw $7, %k0, %k1 1199; KNL-NEXT: kshiftrw $15, %k1, %k1 1200; KNL-NEXT: kmovw %k1, %eax 1201; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 1202; KNL-NEXT: kshiftlw $6, %k0, %k1 1203; KNL-NEXT: kshiftrw $15, %k1, %k1 1204; KNL-NEXT: kmovw %k1, %eax 1205; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 1206; KNL-NEXT: kshiftlw $5, %k0, %k1 1207; KNL-NEXT: kshiftrw $15, %k1, %k1 1208; KNL-NEXT: kmovw %k1, %eax 1209; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 1210; KNL-NEXT: kshiftlw $4, %k0, %k1 1211; KNL-NEXT: kshiftrw $15, %k1, %k1 1212; KNL-NEXT: kmovw %k1, %eax 1213; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 1214; KNL-NEXT: kshiftlw $3, %k0, %k1 1215; KNL-NEXT: kshiftrw $15, %k1, %k1 1216; KNL-NEXT: kmovw %k1, %eax 1217; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 1218; KNL-NEXT: kshiftlw $2, %k0, %k1 1219; KNL-NEXT: kshiftrw $15, %k1, %k1 1220; KNL-NEXT: kmovw %k1, %eax 1221; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 1222; KNL-NEXT: kshiftlw $1, %k0, %k1 1223; KNL-NEXT: kshiftrw $15, %k1, %k1 1224; KNL-NEXT: kmovw %k1, %eax 1225; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 1226; KNL-NEXT: kshiftlw $0, %k0, %k0 1227; KNL-NEXT: kshiftrw $15, %k0, %k0 1228; KNL-NEXT: kmovw %k0, %eax 1229; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 1230; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 1231; KNL-NEXT: kshiftlw $14, %k0, %k1 1232; KNL-NEXT: kshiftrw $15, %k1, %k1 1233; KNL-NEXT: kmovw %k1, %eax 1234; KNL-NEXT: kshiftlw $15, %k0, %k1 1235; KNL-NEXT: kshiftrw $15, %k1, %k1 1236; KNL-NEXT: kmovw %k1, %ecx 1237; KNL-NEXT: vmovd %ecx, %xmm3 1238; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 1239; KNL-NEXT: kshiftlw $13, %k0, %k1 1240; KNL-NEXT: kshiftrw $15, %k1, %k1 1241; KNL-NEXT: kmovw %k1, %eax 1242; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 1243; KNL-NEXT: kshiftlw $12, %k0, %k1 1244; KNL-NEXT: kshiftrw $15, %k1, %k1 1245; KNL-NEXT: kmovw %k1, %eax 1246; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 1247; KNL-NEXT: kshiftlw $11, %k0, %k1 1248; KNL-NEXT: kshiftrw $15, %k1, %k1 1249; KNL-NEXT: kmovw %k1, %eax 1250; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 1251; KNL-NEXT: kshiftlw $10, %k0, %k1 1252; KNL-NEXT: kshiftrw $15, %k1, %k1 1253; KNL-NEXT: kmovw %k1, %eax 1254; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 1255; KNL-NEXT: kshiftlw $9, %k0, %k1 1256; KNL-NEXT: kshiftrw $15, %k1, %k1 1257; KNL-NEXT: kmovw %k1, %eax 1258; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 1259; KNL-NEXT: kshiftlw $8, %k0, %k1 1260; KNL-NEXT: kshiftrw $15, %k1, %k1 1261; KNL-NEXT: kmovw %k1, %eax 1262; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 1263; KNL-NEXT: kshiftlw $7, %k0, %k1 1264; KNL-NEXT: kshiftrw $15, %k1, %k1 1265; KNL-NEXT: kmovw %k1, %eax 1266; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 1267; KNL-NEXT: kshiftlw $6, %k0, %k1 1268; KNL-NEXT: kshiftrw $15, %k1, %k1 1269; KNL-NEXT: kmovw %k1, %eax 1270; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 1271; KNL-NEXT: kshiftlw $5, %k0, %k1 1272; KNL-NEXT: kshiftrw $15, %k1, %k1 1273; KNL-NEXT: kmovw %k1, %eax 1274; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 1275; KNL-NEXT: kshiftlw $4, %k0, %k1 1276; KNL-NEXT: kshiftrw $15, %k1, %k1 1277; KNL-NEXT: kmovw %k1, %eax 1278; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 1279; KNL-NEXT: kshiftlw $3, %k0, %k1 1280; KNL-NEXT: kshiftrw $15, %k1, %k1 1281; KNL-NEXT: kmovw %k1, %eax 1282; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 1283; KNL-NEXT: kshiftlw $2, %k0, %k1 1284; KNL-NEXT: kshiftrw $15, %k1, %k1 1285; KNL-NEXT: kmovw %k1, %eax 1286; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 1287; KNL-NEXT: kshiftlw $1, %k0, %k1 1288; KNL-NEXT: kshiftrw $15, %k1, %k1 1289; KNL-NEXT: kmovw %k1, %eax 1290; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1291; KNL-NEXT: kshiftlw $0, %k0, %k0 1292; KNL-NEXT: kshiftrw $15, %k0, %k0 1293; KNL-NEXT: kmovw %k0, %eax 1294; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 1295; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 1296; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2 1297; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 1298; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 1299; KNL-NEXT: vpslld $31, %zmm3, %zmm3 1300; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 1301; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) 1302; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 1303; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1304; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1305; KNL-NEXT: kmovw %k0, (%rsp) 1306; KNL-NEXT: cmpl $0, (%rsp) 1307; KNL-NEXT: je LBB42_2 1308; KNL-NEXT: ## BB#1: ## %L1 1309; KNL-NEXT: vmovaps %zmm0, (%rdi) 1310; KNL-NEXT: vmovaps %zmm1, 64(%rdi) 1311; KNL-NEXT: jmp LBB42_3 1312; KNL-NEXT: LBB42_2: ## %L2 1313; KNL-NEXT: vmovaps %zmm0, 4(%rdi) 1314; KNL-NEXT: vmovaps %zmm1, 68(%rdi) 1315; KNL-NEXT: LBB42_3: ## %End 1316; KNL-NEXT: movq %rbp, %rsp 1317; KNL-NEXT: popq %rbp 1318; KNL-NEXT: retq 1319; 1320; SKX-LABEL: ktest_2: 1321; SKX: ## BB#0: 1322; SKX-NEXT: vmovups 64(%rdi), %zmm2 1323; SKX-NEXT: vmovups (%rdi), %zmm3 1324; SKX-NEXT: vcmpltps %zmm0, %zmm3, %k1 1325; SKX-NEXT: vcmpltps %zmm1, %zmm2, %k2 1326; SKX-NEXT: kunpckwd %k1, %k2, %k0 1327; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} 1328; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} 1329; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 1330; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 1331; SKX-NEXT: kunpckwd %k1, %k2, %k1 1332; SKX-NEXT: kord %k1, %k0, %k0 1333; SKX-NEXT: ktestd %k0, %k0 1334; SKX-NEXT: je LBB42_2 1335; SKX-NEXT: ## BB#1: ## %L1 1336; SKX-NEXT: vmovaps %zmm0, (%rdi) 1337; SKX-NEXT: vmovaps %zmm1, 64(%rdi) 1338; SKX-NEXT: retq 1339; SKX-NEXT: LBB42_2: ## %L2 1340; SKX-NEXT: vmovaps %zmm0, 4(%rdi) 1341; SKX-NEXT: vmovaps %zmm1, 68(%rdi) 1342; SKX-NEXT: retq 1343 %addr1 = getelementptr float, float * %base, i64 0 1344 %addr2 = getelementptr float, float * %base, i64 1 1345 1346 %vaddr1 = bitcast float* %addr1 to <32 x float>* 1347 %vaddr2 = bitcast float* %addr2 to <32 x float>* 1348 1349 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1 1350 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1 1351 1352 %sel1 = fcmp ogt <32 x float>%in, %val1 1353 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer 1354 %sel2 = fcmp olt <32 x float> %in, %val3 1355 %sel3 = or <32 x i1> %sel1, %sel2 1356 1357 %int_sel3 = bitcast <32 x i1> %sel3 to i32 1358 %res = icmp eq i32 %int_sel3, zeroinitializer 1359 br i1 %res, label %L2, label %L1 1360L1: 1361 store <32 x float> %in, <32 x float>* %vaddr1 1362 br label %End 1363L2: 1364 store <32 x float> %in, <32 x float>* %vaddr2 1365 br label %End 1366End: 1367 ret void 1368} 1369 1370define <8 x i64> @load_8i1(<8 x i1>* %a) { 1371; KNL-LABEL: load_8i1: 1372; KNL: ## BB#0: 1373; KNL-NEXT: movzbl (%rdi), %eax 1374; KNL-NEXT: kmovw %eax, %k1 1375; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1376; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1377; KNL-NEXT: retq 1378; 1379; SKX-LABEL: load_8i1: 1380; SKX: ## BB#0: 1381; SKX-NEXT: kmovb (%rdi), %k0 1382; SKX-NEXT: vpmovm2q %k0, %zmm0 1383; SKX-NEXT: retq 1384 %b = load <8 x i1>, <8 x i1>* %a 1385 %c = sext <8 x i1> %b to <8 x i64> 1386 ret <8 x i64> %c 1387} 1388 1389define <16 x i32> @load_16i1(<16 x i1>* %a) { 1390; KNL-LABEL: load_16i1: 1391; KNL: ## BB#0: 1392; KNL-NEXT: kmovw (%rdi), %k1 1393; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1394; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 1395; KNL-NEXT: retq 1396; 1397; SKX-LABEL: load_16i1: 1398; SKX: ## BB#0: 1399; SKX-NEXT: kmovw (%rdi), %k0 1400; SKX-NEXT: vpmovm2d %k0, %zmm0 1401; SKX-NEXT: retq 1402 %b = load <16 x i1>, <16 x i1>* %a 1403 %c = sext <16 x i1> %b to <16 x i32> 1404 ret <16 x i32> %c 1405} 1406 1407define <2 x i16> @load_2i1(<2 x i1>* %a) { 1408; KNL-LABEL: load_2i1: 1409; KNL: ## BB#0: 1410; KNL-NEXT: movzbl (%rdi), %eax 1411; KNL-NEXT: kmovw %eax, %k1 1412; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1413; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1414; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 1415; KNL-NEXT: retq 1416; 1417; SKX-LABEL: load_2i1: 1418; SKX: ## BB#0: 1419; SKX-NEXT: kmovb (%rdi), %k0 1420; SKX-NEXT: vpmovm2q %k0, %xmm0 1421; SKX-NEXT: retq 1422 %b = load <2 x i1>, <2 x i1>* %a 1423 %c = sext <2 x i1> %b to <2 x i16> 1424 ret <2 x i16> %c 1425} 1426 1427define <4 x i16> @load_4i1(<4 x i1>* %a) { 1428; KNL-LABEL: load_4i1: 1429; KNL: ## BB#0: 1430; KNL-NEXT: movzbl (%rdi), %eax 1431; KNL-NEXT: kmovw %eax, %k1 1432; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1433; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1434; KNL-NEXT: vpmovqd %zmm0, %ymm0 1435; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1436; KNL-NEXT: retq 1437; 1438; SKX-LABEL: load_4i1: 1439; SKX: ## BB#0: 1440; SKX-NEXT: kmovb (%rdi), %k0 1441; SKX-NEXT: vpmovm2d %k0, %xmm0 1442; SKX-NEXT: retq 1443 %b = load <4 x i1>, <4 x i1>* %a 1444 %c = sext <4 x i1> %b to <4 x i16> 1445 ret <4 x i16> %c 1446} 1447 1448define <32 x i16> @load_32i1(<32 x i1>* %a) { 1449; KNL-LABEL: load_32i1: 1450; KNL: ## BB#0: 1451; KNL-NEXT: kmovw (%rdi), %k1 1452; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 1453; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 1454; KNL-NEXT: vpmovdw %zmm0, %ymm0 1455; KNL-NEXT: kmovw 2(%rdi), %k1 1456; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 1457; KNL-NEXT: vpmovdw %zmm1, %ymm1 1458; KNL-NEXT: retq 1459; 1460; SKX-LABEL: load_32i1: 1461; SKX: ## BB#0: 1462; SKX-NEXT: kmovd (%rdi), %k0 1463; SKX-NEXT: vpmovm2w %k0, %zmm0 1464; SKX-NEXT: retq 1465 %b = load <32 x i1>, <32 x i1>* %a 1466 %c = sext <32 x i1> %b to <32 x i16> 1467 ret <32 x i16> %c 1468} 1469 1470define <64 x i8> @load_64i1(<64 x i1>* %a) { 1471; KNL-LABEL: load_64i1: 1472; KNL: ## BB#0: 1473; KNL-NEXT: kmovw (%rdi), %k1 1474; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 1475; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 1476; KNL-NEXT: vpmovdb %zmm0, %xmm0 1477; KNL-NEXT: kmovw 2(%rdi), %k1 1478; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} 1479; KNL-NEXT: vpmovdb %zmm2, %xmm2 1480; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1481; KNL-NEXT: kmovw 4(%rdi), %k1 1482; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} 1483; KNL-NEXT: vpmovdb %zmm2, %xmm2 1484; KNL-NEXT: kmovw 6(%rdi), %k1 1485; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 1486; KNL-NEXT: vpmovdb %zmm1, %xmm1 1487; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1488; KNL-NEXT: retq 1489; 1490; SKX-LABEL: load_64i1: 1491; SKX: ## BB#0: 1492; SKX-NEXT: kmovq (%rdi), %k0 1493; SKX-NEXT: vpmovm2b %k0, %zmm0 1494; SKX-NEXT: retq 1495 %b = load <64 x i1>, <64 x i1>* %a 1496 %c = sext <64 x i1> %b to <64 x i8> 1497 ret <64 x i8> %c 1498} 1499 1500define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { 1501; KNL-LABEL: store_8i1: 1502; KNL: ## BB#0: 1503; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1504; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1505; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1506; KNL-NEXT: kmovw %k0, %eax 1507; KNL-NEXT: movb %al, (%rdi) 1508; KNL-NEXT: retq 1509; 1510; SKX-LABEL: store_8i1: 1511; SKX: ## BB#0: 1512; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1513; SKX-NEXT: vpmovw2m %xmm0, %k0 1514; SKX-NEXT: kmovb %k0, (%rdi) 1515; SKX-NEXT: retq 1516 store <8 x i1> %v, <8 x i1>* %a 1517 ret void 1518} 1519 1520define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { 1521; KNL-LABEL: store_8i1_1: 1522; KNL: ## BB#0: 1523; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1524; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1525; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1526; KNL-NEXT: kmovw %k0, %eax 1527; KNL-NEXT: movb %al, (%rdi) 1528; KNL-NEXT: retq 1529; 1530; SKX-LABEL: store_8i1_1: 1531; SKX: ## BB#0: 1532; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1533; SKX-NEXT: vpmovw2m %xmm0, %k0 1534; SKX-NEXT: kmovb %k0, (%rdi) 1535; SKX-NEXT: retq 1536 %v1 = trunc <8 x i16> %v to <8 x i1> 1537 store <8 x i1> %v1, <8 x i1>* %a 1538 ret void 1539} 1540 1541define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { 1542; KNL-LABEL: store_16i1: 1543; KNL: ## BB#0: 1544; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1545; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1546; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1547; KNL-NEXT: kmovw %k0, (%rdi) 1548; KNL-NEXT: retq 1549; 1550; SKX-LABEL: store_16i1: 1551; SKX: ## BB#0: 1552; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1553; SKX-NEXT: vpmovb2m %xmm0, %k0 1554; SKX-NEXT: kmovw %k0, (%rdi) 1555; SKX-NEXT: retq 1556 store <16 x i1> %v, <16 x i1>* %a 1557 ret void 1558} 1559 1560define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { 1561; KNL-LABEL: store_32i1: 1562; KNL: ## BB#0: 1563; KNL-NEXT: vextractf128 $1, %ymm0, %xmm1 1564; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1565; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1566; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 1567; KNL-NEXT: kmovw %k0, 2(%rdi) 1568; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1569; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1570; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1571; KNL-NEXT: kmovw %k0, (%rdi) 1572; KNL-NEXT: retq 1573; 1574; SKX-LABEL: store_32i1: 1575; SKX: ## BB#0: 1576; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 1577; SKX-NEXT: vpmovb2m %ymm0, %k0 1578; SKX-NEXT: kmovd %k0, (%rdi) 1579; SKX-NEXT: retq 1580 store <32 x i1> %v, <32 x i1>* %a 1581 ret void 1582} 1583 1584define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { 1585; KNL-LABEL: store_32i1_1: 1586; KNL: ## BB#0: 1587; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 1588; KNL-NEXT: vpmovdb %zmm0, %xmm0 1589; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 1590; KNL-NEXT: vpmovdb %zmm1, %xmm1 1591; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1592; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1593; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 1594; KNL-NEXT: kmovw %k0, 2(%rdi) 1595; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1596; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1597; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1598; KNL-NEXT: kmovw %k0, (%rdi) 1599; KNL-NEXT: retq 1600; 1601; SKX-LABEL: store_32i1_1: 1602; SKX: ## BB#0: 1603; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 1604; SKX-NEXT: vpmovw2m %zmm0, %k0 1605; SKX-NEXT: kmovd %k0, (%rdi) 1606; SKX-NEXT: retq 1607 %v1 = trunc <32 x i16> %v to <32 x i1> 1608 store <32 x i1> %v1, <32 x i1>* %a 1609 ret void 1610} 1611 1612 1613define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { 1614; 1615; KNL-LABEL: store_64i1: 1616; KNL: ## BB#0: 1617; KNL-NEXT: pushq %rbp 1618; KNL-NEXT: Ltmp9: 1619; KNL-NEXT: .cfi_def_cfa_offset 16 1620; KNL-NEXT: pushq %r15 1621; KNL-NEXT: Ltmp10: 1622; KNL-NEXT: .cfi_def_cfa_offset 24 1623; KNL-NEXT: pushq %r14 1624; KNL-NEXT: Ltmp11: 1625; KNL-NEXT: .cfi_def_cfa_offset 32 1626; KNL-NEXT: pushq %r13 1627; KNL-NEXT: Ltmp12: 1628; KNL-NEXT: .cfi_def_cfa_offset 40 1629; KNL-NEXT: pushq %r12 1630; KNL-NEXT: Ltmp13: 1631; KNL-NEXT: .cfi_def_cfa_offset 48 1632; KNL-NEXT: pushq %rbx 1633; KNL-NEXT: Ltmp14: 1634; KNL-NEXT: .cfi_def_cfa_offset 56 1635; KNL-NEXT: Ltmp15: 1636; KNL-NEXT: .cfi_offset %rbx, -56 1637; KNL-NEXT: Ltmp16: 1638; KNL-NEXT: .cfi_offset %r12, -48 1639; KNL-NEXT: Ltmp17: 1640; KNL-NEXT: .cfi_offset %r13, -40 1641; KNL-NEXT: Ltmp18: 1642; KNL-NEXT: .cfi_offset %r14, -32 1643; KNL-NEXT: Ltmp19: 1644; KNL-NEXT: .cfi_offset %r15, -24 1645; KNL-NEXT: Ltmp20: 1646; KNL-NEXT: .cfi_offset %rbp, -16 1647; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1648; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1649; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1650; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1651; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 1652; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1653; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 1654; KNL-NEXT: vpslld $31, %zmm3, %zmm3 1655; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 1656; KNL-NEXT: kshiftlw $14, %k0, %k1 1657; KNL-NEXT: kshiftrw $15, %k1, %k1 1658; KNL-NEXT: kmovw %k1, %r8d 1659; KNL-NEXT: kshiftlw $15, %k0, %k1 1660; KNL-NEXT: kshiftrw $15, %k1, %k1 1661; KNL-NEXT: kmovw %k1, %r9d 1662; KNL-NEXT: kshiftlw $13, %k0, %k1 1663; KNL-NEXT: kshiftrw $15, %k1, %k1 1664; KNL-NEXT: kmovw %k1, %r10d 1665; KNL-NEXT: kshiftlw $12, %k0, %k1 1666; KNL-NEXT: kshiftrw $15, %k1, %k1 1667; KNL-NEXT: kmovw %k1, %r11d 1668; KNL-NEXT: kshiftlw $11, %k0, %k1 1669; KNL-NEXT: kshiftrw $15, %k1, %k1 1670; KNL-NEXT: kmovw %k1, %r14d 1671; KNL-NEXT: kshiftlw $10, %k0, %k1 1672; KNL-NEXT: kshiftrw $15, %k1, %k1 1673; KNL-NEXT: kmovw %k1, %r15d 1674; KNL-NEXT: kshiftlw $9, %k0, %k1 1675; KNL-NEXT: kshiftrw $15, %k1, %k1 1676; KNL-NEXT: kmovw %k1, %r12d 1677; KNL-NEXT: kshiftlw $8, %k0, %k1 1678; KNL-NEXT: kshiftrw $15, %k1, %k1 1679; KNL-NEXT: kmovw %k1, %r13d 1680; KNL-NEXT: kshiftlw $7, %k0, %k1 1681; KNL-NEXT: kshiftrw $15, %k1, %k1 1682; KNL-NEXT: kmovw %k1, %ebx 1683; KNL-NEXT: kshiftlw $6, %k0, %k1 1684; KNL-NEXT: kshiftrw $15, %k1, %k1 1685; KNL-NEXT: kmovw %k1, %ebp 1686; KNL-NEXT: kshiftlw $5, %k0, %k1 1687; KNL-NEXT: kshiftrw $15, %k1, %k1 1688; KNL-NEXT: kmovw %k1, %eax 1689; KNL-NEXT: kshiftlw $4, %k0, %k1 1690; KNL-NEXT: kshiftrw $15, %k1, %k1 1691; KNL-NEXT: kmovw %k1, %ecx 1692; KNL-NEXT: kshiftlw $3, %k0, %k1 1693; KNL-NEXT: kshiftrw $15, %k1, %k1 1694; KNL-NEXT: kmovw %k1, %edx 1695; KNL-NEXT: kshiftlw $2, %k0, %k1 1696; KNL-NEXT: kshiftrw $15, %k1, %k1 1697; KNL-NEXT: kmovw %k1, %esi 1698; KNL-NEXT: kshiftlw $1, %k0, %k1 1699; KNL-NEXT: kshiftrw $15, %k1, %k1 1700; KNL-NEXT: vmovd %r9d, %xmm3 1701; KNL-NEXT: kmovw %k1, %r9d 1702; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2 1703; KNL-NEXT: kshiftlw $0, %k0, %k0 1704; KNL-NEXT: kshiftrw $15, %k0, %k0 1705; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 1706; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 1707; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 1708; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2 1709; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2 1710; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2 1711; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2 1712; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2 1713; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2 1714; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1715; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 1716; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2 1717; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 1718; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2 1719; KNL-NEXT: kmovw %k0, %eax 1720; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 1721; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 1722; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1723; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1724; KNL-NEXT: kmovw %k0, 6(%rdi) 1725; KNL-NEXT: kshiftlw $14, %k2, %k0 1726; KNL-NEXT: kshiftrw $15, %k0, %k0 1727; KNL-NEXT: kmovw %k0, %r8d 1728; KNL-NEXT: kshiftlw $15, %k2, %k0 1729; KNL-NEXT: kshiftrw $15, %k0, %k0 1730; KNL-NEXT: kmovw %k0, %r10d 1731; KNL-NEXT: kshiftlw $13, %k2, %k0 1732; KNL-NEXT: kshiftrw $15, %k0, %k0 1733; KNL-NEXT: kmovw %k0, %r9d 1734; KNL-NEXT: kshiftlw $12, %k2, %k0 1735; KNL-NEXT: kshiftrw $15, %k0, %k0 1736; KNL-NEXT: kmovw %k0, %r11d 1737; KNL-NEXT: kshiftlw $11, %k2, %k0 1738; KNL-NEXT: kshiftrw $15, %k0, %k0 1739; KNL-NEXT: kmovw %k0, %r14d 1740; KNL-NEXT: kshiftlw $10, %k2, %k0 1741; KNL-NEXT: kshiftrw $15, %k0, %k0 1742; KNL-NEXT: kmovw %k0, %r15d 1743; KNL-NEXT: kshiftlw $9, %k2, %k0 1744; KNL-NEXT: kshiftrw $15, %k0, %k0 1745; KNL-NEXT: kmovw %k0, %r12d 1746; KNL-NEXT: kshiftlw $8, %k2, %k0 1747; KNL-NEXT: kshiftrw $15, %k0, %k0 1748; KNL-NEXT: kmovw %k0, %r13d 1749; KNL-NEXT: kshiftlw $7, %k2, %k0 1750; KNL-NEXT: kshiftrw $15, %k0, %k0 1751; KNL-NEXT: kmovw %k0, %edx 1752; KNL-NEXT: kshiftlw $6, %k2, %k0 1753; KNL-NEXT: kshiftrw $15, %k0, %k0 1754; KNL-NEXT: kmovw %k0, %esi 1755; KNL-NEXT: kshiftlw $5, %k2, %k0 1756; KNL-NEXT: kshiftrw $15, %k0, %k0 1757; KNL-NEXT: kmovw %k0, %ebp 1758; KNL-NEXT: kshiftlw $4, %k2, %k0 1759; KNL-NEXT: kshiftrw $15, %k0, %k0 1760; KNL-NEXT: kmovw %k0, %ebx 1761; KNL-NEXT: kshiftlw $3, %k2, %k0 1762; KNL-NEXT: kshiftrw $15, %k0, %k0 1763; KNL-NEXT: kmovw %k0, %eax 1764; KNL-NEXT: kshiftlw $2, %k2, %k0 1765; KNL-NEXT: kshiftrw $15, %k0, %k0 1766; KNL-NEXT: kmovw %k0, %ecx 1767; KNL-NEXT: kshiftlw $1, %k2, %k0 1768; KNL-NEXT: kshiftrw $15, %k0, %k0 1769; KNL-NEXT: vmovd %r10d, %xmm2 1770; KNL-NEXT: kmovw %k0, %r10d 1771; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 1772; KNL-NEXT: kshiftlw $0, %k2, %k0 1773; KNL-NEXT: kshiftrw $15, %k0, %k0 1774; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 1775; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 1776; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 1777; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1 1778; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1 1779; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1 1780; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1 1781; KNL-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1 1782; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1 1783; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1 1784; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1 1785; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 1786; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1787; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1 1788; KNL-NEXT: kmovw %k0, %eax 1789; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1790; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1791; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1792; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 1793; KNL-NEXT: kmovw %k0, 4(%rdi) 1794; KNL-NEXT: kshiftlw $14, %k1, %k0 1795; KNL-NEXT: kshiftrw $15, %k0, %k0 1796; KNL-NEXT: kmovw %k0, %r8d 1797; KNL-NEXT: kshiftlw $15, %k1, %k0 1798; KNL-NEXT: kshiftrw $15, %k0, %k0 1799; KNL-NEXT: kmovw %k0, %r10d 1800; KNL-NEXT: kshiftlw $13, %k1, %k0 1801; KNL-NEXT: kshiftrw $15, %k0, %k0 1802; KNL-NEXT: kmovw %k0, %r9d 1803; KNL-NEXT: kshiftlw $12, %k1, %k0 1804; KNL-NEXT: kshiftrw $15, %k0, %k0 1805; KNL-NEXT: kmovw %k0, %r11d 1806; KNL-NEXT: kshiftlw $11, %k1, %k0 1807; KNL-NEXT: kshiftrw $15, %k0, %k0 1808; KNL-NEXT: kmovw %k0, %r14d 1809; KNL-NEXT: kshiftlw $10, %k1, %k0 1810; KNL-NEXT: kshiftrw $15, %k0, %k0 1811; KNL-NEXT: kmovw %k0, %r15d 1812; KNL-NEXT: kshiftlw $9, %k1, %k0 1813; KNL-NEXT: kshiftrw $15, %k0, %k0 1814; KNL-NEXT: kmovw %k0, %r12d 1815; KNL-NEXT: kshiftlw $8, %k1, %k0 1816; KNL-NEXT: kshiftrw $15, %k0, %k0 1817; KNL-NEXT: kmovw %k0, %r13d 1818; KNL-NEXT: kshiftlw $7, %k1, %k0 1819; KNL-NEXT: kshiftrw $15, %k0, %k0 1820; KNL-NEXT: kmovw %k0, %edx 1821; KNL-NEXT: kshiftlw $6, %k1, %k0 1822; KNL-NEXT: kshiftrw $15, %k0, %k0 1823; KNL-NEXT: kmovw %k0, %esi 1824; KNL-NEXT: kshiftlw $5, %k1, %k0 1825; KNL-NEXT: kshiftrw $15, %k0, %k0 1826; KNL-NEXT: kmovw %k0, %ebp 1827; KNL-NEXT: kshiftlw $4, %k1, %k0 1828; KNL-NEXT: kshiftrw $15, %k0, %k0 1829; KNL-NEXT: kmovw %k0, %ebx 1830; KNL-NEXT: kshiftlw $3, %k1, %k0 1831; KNL-NEXT: kshiftrw $15, %k0, %k0 1832; KNL-NEXT: kmovw %k0, %eax 1833; KNL-NEXT: kshiftlw $2, %k1, %k0 1834; KNL-NEXT: kshiftrw $15, %k0, %k0 1835; KNL-NEXT: kmovw %k0, %ecx 1836; KNL-NEXT: kshiftlw $1, %k1, %k0 1837; KNL-NEXT: kshiftrw $15, %k0, %k0 1838; KNL-NEXT: vmovd %r10d, %xmm1 1839; KNL-NEXT: kmovw %k0, %r10d 1840; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1841; KNL-NEXT: kshiftlw $0, %k1, %k1 1842; KNL-NEXT: kshiftrw $15, %k1, %k1 1843; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 1844; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 1845; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 1846; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 1847; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 1848; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 1849; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 1850; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 1851; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 1852; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 1853; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 1854; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1855; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1856; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0 1857; KNL-NEXT: kmovw %k1, %eax 1858; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1859; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1860; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1861; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1862; KNL-NEXT: kmovw %k1, 2(%rdi) 1863; KNL-NEXT: kshiftlw $14, %k0, %k1 1864; KNL-NEXT: kshiftrw $15, %k1, %k1 1865; KNL-NEXT: kmovw %k1, %r8d 1866; KNL-NEXT: kshiftlw $15, %k0, %k1 1867; KNL-NEXT: kshiftrw $15, %k1, %k1 1868; KNL-NEXT: kmovw %k1, %r9d 1869; KNL-NEXT: kshiftlw $13, %k0, %k1 1870; KNL-NEXT: kshiftrw $15, %k1, %k1 1871; KNL-NEXT: kmovw %k1, %r10d 1872; KNL-NEXT: kshiftlw $12, %k0, %k1 1873; KNL-NEXT: kshiftrw $15, %k1, %k1 1874; KNL-NEXT: kmovw %k1, %r11d 1875; KNL-NEXT: kshiftlw $11, %k0, %k1 1876; KNL-NEXT: kshiftrw $15, %k1, %k1 1877; KNL-NEXT: kmovw %k1, %r14d 1878; KNL-NEXT: kshiftlw $10, %k0, %k1 1879; KNL-NEXT: kshiftrw $15, %k1, %k1 1880; KNL-NEXT: kmovw %k1, %r15d 1881; KNL-NEXT: kshiftlw $9, %k0, %k1 1882; KNL-NEXT: kshiftrw $15, %k1, %k1 1883; KNL-NEXT: kmovw %k1, %r12d 1884; KNL-NEXT: kshiftlw $8, %k0, %k1 1885; KNL-NEXT: kshiftrw $15, %k1, %k1 1886; KNL-NEXT: kmovw %k1, %r13d 1887; KNL-NEXT: kshiftlw $7, %k0, %k1 1888; KNL-NEXT: kshiftrw $15, %k1, %k1 1889; KNL-NEXT: kmovw %k1, %edx 1890; KNL-NEXT: kshiftlw $6, %k0, %k1 1891; KNL-NEXT: kshiftrw $15, %k1, %k1 1892; KNL-NEXT: kmovw %k1, %esi 1893; KNL-NEXT: kshiftlw $5, %k0, %k1 1894; KNL-NEXT: kshiftrw $15, %k1, %k1 1895; KNL-NEXT: kmovw %k1, %ebp 1896; KNL-NEXT: kshiftlw $4, %k0, %k1 1897; KNL-NEXT: kshiftrw $15, %k1, %k1 1898; KNL-NEXT: kmovw %k1, %ebx 1899; KNL-NEXT: kshiftlw $3, %k0, %k1 1900; KNL-NEXT: kshiftrw $15, %k1, %k1 1901; KNL-NEXT: kmovw %k1, %eax 1902; KNL-NEXT: kshiftlw $2, %k0, %k1 1903; KNL-NEXT: kshiftrw $15, %k1, %k1 1904; KNL-NEXT: kmovw %k1, %ecx 1905; KNL-NEXT: kshiftlw $1, %k0, %k1 1906; KNL-NEXT: kshiftrw $15, %k1, %k1 1907; KNL-NEXT: vmovd %r9d, %xmm0 1908; KNL-NEXT: kmovw %k1, %r9d 1909; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 1910; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 1911; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 1912; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 1913; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 1914; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 1915; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 1916; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 1917; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 1918; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 1919; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 1920; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1921; KNL-NEXT: kshiftlw $0, %k0, %k0 1922; KNL-NEXT: kshiftrw $15, %k0, %k0 1923; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1924; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 1925; KNL-NEXT: kmovw %k0, %eax 1926; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1927; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1928; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1929; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1930; KNL-NEXT: kmovw %k0, (%rdi) 1931; KNL-NEXT: popq %rbx 1932; KNL-NEXT: popq %r12 1933; KNL-NEXT: popq %r13 1934; KNL-NEXT: popq %r14 1935; KNL-NEXT: popq %r15 1936; KNL-NEXT: popq %rbp 1937; KNL-NEXT: retq 1938; 1939; SKX-LABEL: store_64i1: 1940; SKX: ## BB#0: 1941; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 1942; SKX-NEXT: vpmovb2m %zmm0, %k0 1943; SKX-NEXT: kmovq %k0, (%rdi) 1944; SKX-NEXT: retq 1945 store <64 x i1> %v, <64 x i1>* %a 1946 ret void 1947} 1948