1; RUN: llc < %s -march=x86-64 -mcpu=knl | FileCheck %s 2 3define i16 @mask16(i16 %x) { 4 %m0 = bitcast i16 %x to <16 x i1> 5 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6 %ret = bitcast <16 x i1> %m1 to i16 7 ret i16 %ret 8; CHECK: mask16 9; CHECK: knotw 10; CHECK: ret 11} 12 13define i8 @mask8(i8 %x) { 14 %m0 = bitcast i8 %x to <8 x i1> 15 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 16 %ret = bitcast <8 x i1> %m1 to i8 17 ret i8 %ret 18; CHECK: mask8 19; CHECK: knotw 20; CHECK: ret 21} 22 23define i16 @mand16(i16 %x, i16 %y) { 24 %ma = bitcast i16 %x to <16 x i1> 25 %mb = bitcast i16 %y to <16 x i1> 26 %mc = and <16 x i1> %ma, %mb 27 %md = xor <16 x i1> %ma, %mb 28 %me = or <16 x i1> %mc, %md 29 %ret = bitcast <16 x i1> %me to i16 30; CHECK: kandw 31; CHECK: kxorw 32; CHECK: korw 33 ret i16 %ret 34} 35 36; CHECK: shuf_test1 37; CHECK: kshiftrw $8 38; CHECK:ret 39define i8 @shuf_test1(i16 %v) nounwind { 40 %v1 = bitcast i16 %v to <16 x i1> 41 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 42 %mask1 = bitcast <8 x i1> %mask to i8 43 ret i8 %mask1 44} 45 46; CHECK: zext_test1 47; CHECK: kshiftlw 48; CHECK: kshiftrw 49; CHECK: kmovw 50; CHECK:ret 51define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { 52 %cmp_res = icmp ugt <16 x i32> %a, %b 53 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 54 %res = zext i1 %cmp_res.i1 to i32 55 ret i32 %res 56} 57 58; CHECK: zext_test2 59; CHECK: kshiftlw 60; CHECK: kshiftrw 61; CHECK: kmovw 62; CHECK:ret 63define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { 64 %cmp_res = icmp ugt <16 x i32> %a, %b 65 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 66 %res = zext i1 %cmp_res.i1 to i16 67 ret i16 %res 68} 69 70; CHECK: zext_test3 71; CHECK: kshiftlw 72; CHECK: kshiftrw 73; CHECK: kmovw 74; CHECK:ret 75define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { 76 %cmp_res = icmp ugt <16 x i32> %a, %b 77 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 78 %res = zext i1 %cmp_res.i1 to i8 79 ret i8 %res 80} 81