1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=skx | FileCheck %s --check-prefixes=CHECK,CHECK-SKX 3; RUN: llc < %s -mcpu=knl | FileCheck %s --check-prefixes=CHECK,CHECK-KNL 4 5target triple = "x86_64-unknown-unknown" 6 7define <8 x i64> @test1(<8 x i64> %m, <8 x i64> %a, <8 x i64> %b) { 8; CHECK-SKX-LABEL: test1: 9; CHECK-SKX: # %bb.0: # %entry 10; CHECK-SKX-NEXT: vpsllq $63, %zmm0, %zmm0 11; CHECK-SKX-NEXT: vpmovq2m %zmm0, %k1 12; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 13; CHECK-SKX-NEXT: retq 14; 15; CHECK-KNL-LABEL: test1: 16; CHECK-KNL: # %bb.0: # %entry 17; CHECK-KNL-NEXT: vpsllq $63, %zmm0, %zmm0 18; CHECK-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 19; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 20; CHECK-KNL-NEXT: retq 21entry: 22 %m.trunc = trunc <8 x i64> %m to <8 x i1> 23 %ret = select <8 x i1> %m.trunc, <8 x i64> %a, <8 x i64> %b 24 ret <8 x i64> %ret 25} 26 27; This is a very contrived test case to trick the legalizer into splitting the 28; v16i1 masks in the select during type legalization, and in so doing extend them 29; into two v8i64 types. This lets us ensure that the lowering code can handle 30; both formulations of vselect. All of this trickery is because we can't 31; directly form an SDAG input to the lowering. 32define <16 x double> @test2(<16 x float> %x, <16 x float> %y, <16 x double> %a, <16 x double> %b) { 33; CHECK-LABEL: test2: 34; CHECK: # %bb.0: # %entry 35; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6 36; CHECK-NEXT: vcmpltps %zmm0, %zmm6, %k0 37; CHECK-NEXT: vcmpltps %zmm6, %zmm1, %k1 38; CHECK-NEXT: korw %k1, %k0, %k1 39; CHECK-NEXT: vblendmpd %zmm2, %zmm4, %zmm0 {%k1} 40; CHECK-NEXT: kshiftrw $8, %k1, %k1 41; CHECK-NEXT: vblendmpd %zmm3, %zmm5, %zmm1 {%k1} 42; CHECK-NEXT: retq 43entry: 44 %gt.m = fcmp ogt <16 x float> %x, zeroinitializer 45 %lt.m = fcmp olt <16 x float> %y, zeroinitializer 46 %m.or = or <16 x i1> %gt.m, %lt.m 47 %ret = select <16 x i1> %m.or, <16 x double> %a, <16 x double> %b 48 ret <16 x double> %ret 49} 50 51define <16 x i64> @test3(<16 x i8> %x, <16 x i64> %a, <16 x i64> %b) { 52; CHECK-SKX-LABEL: test3: 53; CHECK-SKX: # %bb.0: 54; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 55; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1} 56; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1 57; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} 58; CHECK-SKX-NEXT: retq 59; 60; CHECK-KNL-LABEL: test3: 61; CHECK-KNL: # %bb.0: 62; CHECK-KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5 63; CHECK-KNL-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm0 64; CHECK-KNL-NEXT: vpmovsxbd %xmm0, %zmm0 65; CHECK-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 66; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1} 67; CHECK-KNL-NEXT: kshiftrw $8, %k1, %k1 68; CHECK-KNL-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} 69; CHECK-KNL-NEXT: retq 70 %c = icmp eq <16 x i8> %x, zeroinitializer 71 %ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b 72 ret <16 x i64> %ret 73} 74 75define <16 x i64> @test4(<16 x i16> %x, <16 x i64> %a, <16 x i64> %b) { 76; CHECK-SKX-LABEL: test4: 77; CHECK-SKX: # %bb.0: 78; CHECK-SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 79; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1} 80; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1 81; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} 82; CHECK-SKX-NEXT: retq 83; 84; CHECK-KNL-LABEL: test4: 85; CHECK-KNL: # %bb.0: 86; CHECK-KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5 87; CHECK-KNL-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm0 88; CHECK-KNL-NEXT: vpmovsxwd %ymm0, %zmm0 89; CHECK-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 90; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1} 91; CHECK-KNL-NEXT: kshiftrw $8, %k1, %k1 92; CHECK-KNL-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} 93; CHECK-KNL-NEXT: retq 94 %c = icmp eq <16 x i16> %x, zeroinitializer 95 %ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b 96 ret <16 x i64> %ret 97} 98 99define <16 x i64> @test5(<16 x i32> %x, <16 x i64> %a, <16 x i64> %b) { 100; CHECK-LABEL: test5: 101; CHECK: # %bb.0: 102; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 103; CHECK-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1} 104; CHECK-NEXT: kshiftrw $8, %k1, %k1 105; CHECK-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} 106; CHECK-NEXT: retq 107 %c = icmp eq <16 x i32> %x, zeroinitializer 108 %ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b 109 ret <16 x i64> %ret 110} 111 112define <32 x i32> @test6(<32 x i8> %x, <32 x i32> %a, <32 x i32> %b) { 113; CHECK-SKX-LABEL: test6: 114; CHECK-SKX: # %bb.0: 115; CHECK-SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 116; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1} 117; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1 118; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1} 119; CHECK-SKX-NEXT: retq 120; 121; CHECK-KNL-LABEL: test6: 122; CHECK-KNL: # %bb.0: 123; CHECK-KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5 124; CHECK-KNL-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0 125; CHECK-KNL-NEXT: vextracti128 $1, %ymm0, %xmm5 126; CHECK-KNL-NEXT: vpmovsxbd %xmm5, %zmm5 127; CHECK-KNL-NEXT: vptestmd %zmm5, %zmm5, %k1 128; CHECK-KNL-NEXT: vpmovsxbd %xmm0, %zmm0 129; CHECK-KNL-NEXT: vptestmd %zmm0, %zmm0, %k2 130; CHECK-KNL-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2} 131; CHECK-KNL-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1} 132; CHECK-KNL-NEXT: retq 133 %c = icmp eq <32 x i8> %x, zeroinitializer 134 %ret = select <32 x i1> %c, <32 x i32> %a, <32 x i32> %b 135 ret <32 x i32> %ret 136} 137 138define <32 x i32> @test7(<32 x i16> %x, <32 x i32> %a, <32 x i32> %b) { 139; CHECK-SKX-LABEL: test7: 140; CHECK-SKX: # %bb.0: 141; CHECK-SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 142; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1} 143; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1 144; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1} 145; CHECK-SKX-NEXT: retq 146; 147; CHECK-KNL-LABEL: test7: 148; CHECK-KNL: # %bb.0: 149; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm5 150; CHECK-KNL-NEXT: vpxor %xmm6, %xmm6, %xmm6 151; CHECK-KNL-NEXT: vpcmpeqw %ymm6, %ymm5, %ymm5 152; CHECK-KNL-NEXT: vpmovsxwd %ymm5, %zmm5 153; CHECK-KNL-NEXT: vptestmd %zmm5, %zmm5, %k1 154; CHECK-KNL-NEXT: vpcmpeqw %ymm6, %ymm0, %ymm0 155; CHECK-KNL-NEXT: vpmovsxwd %ymm0, %zmm0 156; CHECK-KNL-NEXT: vptestmd %zmm0, %zmm0, %k2 157; CHECK-KNL-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2} 158; CHECK-KNL-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1} 159; CHECK-KNL-NEXT: retq 160 %c = icmp eq <32 x i16> %x, zeroinitializer 161 %ret = select <32 x i1> %c, <32 x i32> %a, <32 x i32> %b 162 ret <32 x i32> %ret 163} 164 165define <64 x i16> @test8(<64 x i8> %x, <64 x i16> %a, <64 x i16> %b) { 166; CHECK-SKX-LABEL: test8: 167; CHECK-SKX: # %bb.0: 168; CHECK-SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 169; CHECK-SKX-NEXT: vpblendmw %zmm1, %zmm3, %zmm0 {%k1} 170; CHECK-SKX-NEXT: kshiftrq $32, %k1, %k1 171; CHECK-SKX-NEXT: vpblendmw %zmm2, %zmm4, %zmm1 {%k1} 172; CHECK-SKX-NEXT: retq 173; 174; CHECK-KNL-LABEL: test8: 175; CHECK-KNL: # %bb.0: 176; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm5 177; CHECK-KNL-NEXT: vpxor %xmm6, %xmm6, %xmm6 178; CHECK-KNL-NEXT: vpcmpeqb %ymm6, %ymm5, %ymm5 179; CHECK-KNL-NEXT: vpcmpeqb %ymm6, %ymm0, %ymm0 180; CHECK-KNL-NEXT: vpmovsxbw %xmm0, %ymm6 181; CHECK-KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 182; CHECK-KNL-NEXT: vpmovsxbw %xmm0, %ymm0 183; CHECK-KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm6, %zmm0 184; CHECK-KNL-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0 185; CHECK-KNL-NEXT: vpmovsxbw %xmm5, %ymm1 186; CHECK-KNL-NEXT: vextracti128 $1, %ymm5, %xmm3 187; CHECK-KNL-NEXT: vpmovsxbw %xmm3, %ymm3 188; CHECK-KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 189; CHECK-KNL-NEXT: vpternlogq $202, %zmm4, %zmm2, %zmm1 190; CHECK-KNL-NEXT: retq 191 %c = icmp eq <64 x i8> %x, zeroinitializer 192 %ret = select <64 x i1> %c, <64 x i16> %a, <64 x i16> %b 193 ret <64 x i16> %ret 194} 195