1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL --check-prefix=CHECK 2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX --check-prefix=CHECK 3 4; CHECK-LABEL: mask16 5; CHECK: kmovw 6; CHECK-NEXT: knotw 7; CHECK-NEXT: kmovw 8define i16 @mask16(i16 %x) { 9 %m0 = bitcast i16 %x to <16 x i1> 10 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 11 %ret = bitcast <16 x i1> %m1 to i16 12 ret i16 %ret 13} 14 15; CHECK-LABEL: mask8 16; KNL: kmovw 17; KNL-NEXT: knotw 18; KNL-NEXT: kmovw 19; SKX: kmovb 20; SKX-NEXT: knotb 21; SKX-NEXT: kmovb 22 23define i8 @mask8(i8 %x) { 24 %m0 = bitcast i8 %x to <8 x i1> 25 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 26 %ret = bitcast <8 x i1> %m1 to i8 27 ret i8 %ret 28} 29 30; CHECK-LABEL: mask16_mem 31; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}} 32; CHECK-NEXT: knotw 33; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]]) 34; CHECK: ret 35 36define void @mask16_mem(i16* %ptr) { 37 %x = load i16, i16* %ptr, align 4 38 %m0 = bitcast i16 %x to <16 x i1> 39 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 40 %ret = bitcast <16 x i1> %m1 to i16 41 store i16 %ret, i16* %ptr, align 4 42 ret void 43} 44 45; CHECK-LABEL: mask8_mem 46; KNL: kmovw ([[ARG1]]), %k{{[0-7]}} 47; KNL-NEXT: knotw 48; KNL-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]]) 49; SKX: kmovb ([[ARG1]]), %k{{[0-7]}} 50; SKX-NEXT: knotb 51; SKX-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]]) 52 53define void @mask8_mem(i8* %ptr) { 54 %x = load i8, i8* %ptr, align 4 55 %m0 = bitcast i8 %x to <8 x i1> 56 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 57 %ret = bitcast <8 x i1> %m1 to i8 58 store i8 %ret, i8* %ptr, align 4 59 ret void 60} 61 62; CHECK-LABEL: mand16 63; CHECK: kandw 64; CHECK: kxorw 65; CHECK: korw 66define i16 @mand16(i16 %x, i16 %y) { 67 %ma = bitcast i16 %x to <16 x i1> 68 %mb = bitcast i16 %y to <16 x i1> 69 %mc = and <16 x i1> %ma, %mb 70 %md = xor <16 x i1> %ma, %mb 71 %me = or <16 x i1> %mc, %md 72 %ret = bitcast <16 x i1> %me to i16 73 ret i16 %ret 74} 75 76; CHECK-LABEL: shuf_test1 77; CHECK: kshiftrw $8 78define i8 @shuf_test1(i16 %v) nounwind { 79 %v1 = bitcast i16 %v to <16 x i1> 80 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 81 %mask1 = bitcast <8 x i1> %mask to i8 82 ret i8 %mask1 83} 84 85; CHECK-LABEL: zext_test1 86; CHECK: kshiftlw 87; CHECK: kshiftrw 88; CHECK: kmovw 89 90define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { 91 %cmp_res = icmp ugt <16 x i32> %a, %b 92 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 93 %res = zext i1 %cmp_res.i1 to i32 94 ret i32 %res 95} 96 97; CHECK-LABEL: zext_test2 98; CHECK: kshiftlw 99; CHECK: kshiftrw 100; CHECK: kmovw 101 102define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { 103 %cmp_res = icmp ugt <16 x i32> %a, %b 104 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 105 %res = zext i1 %cmp_res.i1 to i16 106 ret i16 %res 107} 108 109; CHECK-LABEL: zext_test3 110; CHECK: kshiftlw 111; CHECK: kshiftrw 112; CHECK: kmovw 113 114define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { 115 %cmp_res = icmp ugt <16 x i32> %a, %b 116 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 117 %res = zext i1 %cmp_res.i1 to i8 118 ret i8 %res 119} 120 121; CHECK-LABEL: conv1 122; KNL: kmovw %k0, %eax 123; KNL: movb %al, (%rdi) 124; SKX: kmovb %k0, (%rdi) 125define i8 @conv1(<8 x i1>* %R) { 126entry: 127 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R 128 129 %maskPtr = alloca <8 x i1> 130 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr 131 %mask = load <8 x i1>, <8 x i1>* %maskPtr 132 %mask_convert = bitcast <8 x i1> %mask to i8 133 ret i8 %mask_convert 134} 135 136; SKX-LABEL: test4 137; SKX: vpcmpgt 138; SKX: knot 139; SKX: vpcmpgt 140; SKX: vpmovm2d 141define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { 142 %x_gt_y = icmp sgt <4 x i64> %x, %y 143 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 144 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 145 %resse = sext <4 x i1>%res to <4 x i32> 146 ret <4 x i32> %resse 147} 148 149; SKX-LABEL: test5 150; SKX: vpcmpgt 151; SKX: knot 152; SKX: vpcmpgt 153; SKX: vpmovm2q 154define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { 155 %x_gt_y = icmp slt <2 x i64> %x, %y 156 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 157 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 158 %resse = sext <2 x i1>%res to <2 x i64> 159 ret <2 x i64> %resse 160} 161 162; KNL-LABEL: test6 163; KNL: vpmovsxbd 164; KNL: vpandd 165; KNL: kmovw %eax, %k1 166; KNL vptestmd {{.*}}, %k0 {%k1} 167 168; SKX-LABEL: test6 169; SKX: vpmovb2m 170; SKX: kmovw %eax, %k1 171; SKX: kandw 172define void @test6(<16 x i1> %mask) { 173allocas: 174 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 175 %b = bitcast <16 x i1> %a to i16 176 %c = icmp eq i16 %b, 0 177 br i1 %c, label %true, label %false 178 179true: 180 ret void 181 182false: 183 ret void 184} 185 186; KNL-LABEL: test7 187; KNL: vpmovsxwq 188; KNL: vpandq 189; KNL: vptestmq {{.*}}, %k0 190; KNL: korw 191 192; SKX-LABEL: test7 193; SKX: vpmovw2m 194; SKX: kmovb %eax, %k1 195; SKX: korb 196 197define void @test7(<8 x i1> %mask) { 198allocas: 199 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 200 %b = bitcast <8 x i1> %a to i8 201 %c = icmp eq i8 %b, 0 202 br i1 %c, label %true, label %false 203 204true: 205 ret void 206 207false: 208 ret void 209} 210 211; KNL-LABEL: test8 212; KNL: vpxord %zmm2, %zmm2, %zmm2 213; KNL: jg 214; KNL: vpcmpltud %zmm2, %zmm1, %k1 215; KNL: jmp 216; KNL: vpcmpgtd %zmm2, %zmm0, %k1 217 218; SKX-LABEL: test8 219; SKX: jg 220; SKX: vpcmpltud {{.*}}, %k0 221; SKX: vpmovm2b 222; SKX: vpcmpgtd {{.*}}, %k0 223; SKX: vpmovm2b 224 225define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { 226 %cond = icmp sgt i32 %a1, %b1 227 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer 228 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer 229 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 230 %res = sext <16 x i1> %mix to <16 x i8> 231 ret <16 x i8> %res 232} 233 234; KNL-LABEL: test9 235; KNL: jg 236; KNL: vpmovsxbd %xmm1, %zmm0 237; KNL: jmp 238; KNL: vpmovsxbd %xmm0, %zmm0 239 240; SKX-LABEL: test9 241; SKX: vpmovb2m %xmm1, %k0 242; SKX: vpmovm2b %k0, %xmm0 243; SKX: retq 244; SKX: vpmovb2m %xmm0, %k0 245; SKX: vpmovm2b %k0, %xmm0 246 247define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { 248 %mask = icmp sgt i32 %a1, %b1 249 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b 250 ret <16 x i1>%c 251} 252 253; KNL-LABEL: test10 254; KNL: jg 255; KNL: vpmovsxwq %xmm1, %zmm0 256; KNL: jmp 257; KNL: vpmovsxwq %xmm0, %zmm0 258 259; SKX-LABEL: test10 260; SKX: jg 261; SKX: vpmovw2m %xmm1, %k0 262; SKX: vpmovm2w %k0, %xmm0 263; SKX: retq 264; SKX: vpmovw2m %xmm0, %k0 265; SKX: vpmovm2w %k0, %xmm0 266define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { 267 %mask = icmp sgt i32 %a1, %b1 268 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b 269 ret <8 x i1>%c 270} 271 272; SKX-LABEL: test11 273; SKX: jg 274; SKX: vpmovd2m %xmm1, %k0 275; SKX: vpmovm2d %k0, %xmm0 276; SKX: retq 277; SKX: vpmovd2m %xmm0, %k0 278; SKX: vpmovm2d %k0, %xmm0 279define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { 280 %mask = icmp sgt i32 %a1, %b1 281 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b 282 ret <4 x i1>%c 283} 284 285; KNL-LABEL: test12 286; KNL: movl %edi, %eax 287define i32 @test12(i32 %x, i32 %y) { 288 %a = bitcast i16 21845 to <16 x i1> 289 %b = extractelement <16 x i1> %a, i32 0 290 %c = select i1 %b, i32 %x, i32 %y 291 ret i32 %c 292} 293 294; KNL-LABEL: test13 295; KNL: movl %esi, %eax 296define i32 @test13(i32 %x, i32 %y) { 297 %a = bitcast i16 21845 to <16 x i1> 298 %b = extractelement <16 x i1> %a, i32 3 299 %c = select i1 %b, i32 %x, i32 %y 300 ret i32 %c 301} 302 303; SKX-LABEL: test14 304; SKX: movb $11, %al 305; SKX: kmovb %eax, %k0 306; SKX: vpmovm2d %k0, %xmm0 307 308define <4 x i1> @test14() { 309 %a = bitcast i16 21845 to <16 x i1> 310 %b = extractelement <16 x i1> %a, i32 2 311 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1 312 ret <4 x i1> %c 313} 314 315; KNL-LABEL: test15 316; KNL: cmovgw 317define <16 x i1> @test15(i32 %x, i32 %y) { 318 %a = bitcast i16 21845 to <16 x i1> 319 %b = bitcast i16 1 to <16 x i1> 320 %mask = icmp sgt i32 %x, %y 321 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b 322 ret <16 x i1> %c 323} 324 325; SKX-LABEL: test16 326; SKX: kxnorw %k1, %k1, %k1 327; SKX: kshiftrw $15, %k1, %k1 328; SKX: kshiftlq $5, %k1, %k1 329; SKX: korq %k1, %k0, %k0 330; SKX: vpmovm2b %k0, %zmm0 331define <64 x i8> @test16(i64 %x) { 332 %a = bitcast i64 %x to <64 x i1> 333 %b = insertelement <64 x i1>%a, i1 true, i32 5 334 %c = sext <64 x i1>%b to <64 x i8> 335 ret <64 x i8>%c 336} 337 338; SKX-LABEL: test17 339; SKX: setg %al 340; SKX: andl $1, %eax 341; SKX: kmovw %eax, %k1 342; SKX: kshiftlq $5, %k1, %k1 343; SKX: korq %k1, %k0, %k0 344; SKX: vpmovm2b %k0, %zmm0 345define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { 346 %a = bitcast i64 %x to <64 x i1> 347 %b = icmp sgt i32 %y, %z 348 %c = insertelement <64 x i1>%a, i1 %b, i32 5 349 %d = sext <64 x i1>%c to <64 x i8> 350 ret <64 x i8>%d 351} 352 353; KNL-LABEL: test18 354define <8 x i1> @test18(i8 %a, i16 %y) { 355 %b = bitcast i8 %a to <8 x i1> 356 %b1 = bitcast i16 %y to <16 x i1> 357 %el1 = extractelement <16 x i1>%b1, i32 8 358 %el2 = extractelement <16 x i1>%b1, i32 9 359 %c = insertelement <8 x i1>%b, i1 %el1, i32 7 360 %d = insertelement <8 x i1>%c, i1 %el2, i32 6 361 ret <8 x i1>%d 362} 363 364; KNL-LABEL: test21 365; KNL: vpand %ymm 366; KNL: vextracti128 $1, %ymm2 367; KNL: vpand %ymm 368 369; SKX-LABEL: test21 370; SKX: vpmovb2m 371; SKX: vmovdqu16 {{.*}}%k1 372 373define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { 374 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 375 ret <32 x i16> %ret 376} 377 378; SKX-LABEL: test22 379; SKX: kmovb 380define void @test22(<4 x i1> %a, <4 x i1>* %addr) { 381 store <4 x i1> %a, <4 x i1>* %addr 382 ret void 383} 384 385; SKX-LABEL: test23 386; SKX: kmovb 387define void @test23(<2 x i1> %a, <2 x i1>* %addr) { 388 store <2 x i1> %a, <2 x i1>* %addr 389 ret void 390} 391