1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=ALL,AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=ALL,AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,avx512vl,avx512f | FileCheck %s --check-prefixes=ALL,AVX512 5 6; PR37427 - https://bugs.llvm.org/show_bug.cgi?id=37427 7 8define <8 x i32> @eq_zero(<8 x i8>* %p, <8 x i32> %x, <8 x i32> %y) { 9; AVX1-LABEL: eq_zero: 10; AVX1: # %bb.0: 11; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 12; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 13; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 14; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 15; AVX1-NEXT: vpslld $24, %xmm3, %xmm3 16; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 17; AVX1-NEXT: vpslld $24, %xmm2, %xmm2 18; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 19; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 20; AVX1-NEXT: retq 21; 22; AVX2-LABEL: eq_zero: 23; AVX2: # %bb.0: 24; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 25; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 26; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 27; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 28; AVX2-NEXT: retq 29; 30; AVX512-LABEL: eq_zero: 31; AVX512: # %bb.0: 32; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 33; AVX512-NEXT: vptestnmw %xmm2, %xmm2, %k1 34; AVX512-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 35; AVX512-NEXT: retq 36 %load = load <8 x i8>, <8 x i8>* %p 37 %cmp = icmp eq <8 x i8> %load, zeroinitializer 38 %sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %y 39 ret <8 x i32> %sel 40} 41 42define <4 x i64> @ne_zero(<4 x i16>* %p, <4 x i64> %x, <4 x i64> %y) { 43; AVX1-LABEL: ne_zero: 44; AVX1: # %bb.0: 45; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 46; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 47; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 48; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 49; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 50; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 51; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 52; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 53; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 54; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 55; AVX1-NEXT: retq 56; 57; AVX2-LABEL: ne_zero: 58; AVX2: # %bb.0: 59; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 60; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 61; AVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 62; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 63; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 64; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 65; AVX2-NEXT: retq 66; 67; AVX512-LABEL: ne_zero: 68; AVX512: # %bb.0: 69; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 70; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k1 71; AVX512-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} 72; AVX512-NEXT: retq 73 %load = load <4 x i16>, <4 x i16>* %p 74 %cmp = icmp ne <4 x i16> %load, zeroinitializer 75 %sel = select <4 x i1> %cmp, <4 x i64> %x, <4 x i64> %y 76 ret <4 x i64> %sel 77} 78 79define <16 x i16> @sgt_zero(<16 x i8>* %p, <16 x i16> %x, <16 x i16> %y) { 80; AVX1-LABEL: sgt_zero: 81; AVX1: # %bb.0: 82; AVX1-NEXT: vmovdqa (%rdi), %xmm2 83; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 84; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 85; AVX1-NEXT: vpmovsxbw %xmm2, %xmm3 86; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 87; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2 88; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 89; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 90; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 91; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 92; AVX1-NEXT: retq 93; 94; AVX2-LABEL: sgt_zero: 95; AVX2: # %bb.0: 96; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2 97; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 98; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 99; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 100; AVX2-NEXT: retq 101; 102; AVX512-LABEL: sgt_zero: 103; AVX512: # %bb.0: 104; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 105; AVX512-NEXT: vpcmpltb (%rdi), %xmm2, %k1 106; AVX512-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} 107; AVX512-NEXT: retq 108 %load = load <16 x i8>, <16 x i8>* %p 109 %cmp = icmp sgt <16 x i8> %load, zeroinitializer 110 %sel = select <16 x i1> %cmp, <16 x i16> %x, <16 x i16> %y 111 ret <16 x i16> %sel 112} 113 114define <8 x i32> @slt_zero(<8 x i8>* %p, <8 x i32> %x, <8 x i32> %y) { 115; AVX1-LABEL: slt_zero: 116; AVX1: # %bb.0: 117; AVX1-NEXT: vpmovsxbw (%rdi), %xmm2 118; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3 119; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 120; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 121; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 122; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 123; AVX1-NEXT: retq 124; 125; AVX2-LABEL: slt_zero: 126; AVX2: # %bb.0: 127; AVX2-NEXT: vpmovsxbd (%rdi), %ymm2 128; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 129; AVX2-NEXT: retq 130; 131; AVX512-LABEL: slt_zero: 132; AVX512: # %bb.0: 133; AVX512-NEXT: vpmovsxbw (%rdi), %xmm2 134; AVX512-NEXT: vpmovw2m %xmm2, %k1 135; AVX512-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 136; AVX512-NEXT: retq 137 %load = load <8 x i8>, <8 x i8>* %p 138 %cmp = icmp slt <8 x i8> %load, zeroinitializer 139 %sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %y 140 ret <8 x i32> %sel 141} 142 143define <4 x double> @eq_zero_fp_select(<4 x i8>* %p, <4 x double> %x, <4 x double> %y) { 144; AVX1-LABEL: eq_zero_fp_select: 145; AVX1: # %bb.0: 146; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 147; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 148; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 149; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 150; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 151; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 152; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 153; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 154; AVX1-NEXT: retq 155; 156; AVX2-LABEL: eq_zero_fp_select: 157; AVX2: # %bb.0: 158; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 159; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 160; AVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 161; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 162; AVX2-NEXT: retq 163; 164; AVX512-LABEL: eq_zero_fp_select: 165; AVX512: # %bb.0: 166; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 167; AVX512-NEXT: vptestnmd %xmm2, %xmm2, %k1 168; AVX512-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 169; AVX512-NEXT: retq 170 %load = load <4 x i8>, <4 x i8>* %p 171 %cmp = icmp eq <4 x i8> %load, zeroinitializer 172 %sel = select <4 x i1> %cmp, <4 x double> %x, <4 x double> %y 173 ret <4 x double> %sel 174} 175 176define <8 x float> @ne_zero_fp_select(<8 x i8>* %p, <8 x float> %x, <8 x float> %y) { 177; AVX1-LABEL: ne_zero_fp_select: 178; AVX1: # %bb.0: 179; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 180; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 181; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 182; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 183; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 184; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 185; AVX1-NEXT: vpslld $24, %xmm3, %xmm3 186; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 187; AVX1-NEXT: vpslld $24, %xmm2, %xmm2 188; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 189; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 190; AVX1-NEXT: retq 191; 192; AVX2-LABEL: ne_zero_fp_select: 193; AVX2: # %bb.0: 194; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 195; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 196; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 197; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 198; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 199; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 200; AVX2-NEXT: retq 201; 202; AVX512-LABEL: ne_zero_fp_select: 203; AVX512: # %bb.0: 204; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 205; AVX512-NEXT: vptestmw %xmm2, %xmm2, %k1 206; AVX512-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 207; AVX512-NEXT: retq 208 %load = load <8 x i8>, <8 x i8>* %p 209 %cmp = icmp ne <8 x i8> %load, zeroinitializer 210 %sel = select <8 x i1> %cmp, <8 x float> %x, <8 x float> %y 211 ret <8 x float> %sel 212} 213 214define <4 x double> @sgt_zero_fp_select(<4 x i8>* %p, <4 x double> %x, <4 x double> %y) { 215; AVX1-LABEL: sgt_zero_fp_select: 216; AVX1: # %bb.0: 217; AVX1-NEXT: vpmovsxbd (%rdi), %xmm2 218; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 219; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 220; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 221; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 222; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2 223; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 224; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 225; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 226; AVX1-NEXT: retq 227; 228; AVX2-LABEL: sgt_zero_fp_select: 229; AVX2: # %bb.0: 230; AVX2-NEXT: vpmovsxbq (%rdi), %ymm2 231; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 232; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 233; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 234; AVX2-NEXT: retq 235; 236; AVX512-LABEL: sgt_zero_fp_select: 237; AVX512: # %bb.0: 238; AVX512-NEXT: vpmovsxbd (%rdi), %xmm2 239; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 240; AVX512-NEXT: vpcmpgtd %xmm3, %xmm2, %k1 241; AVX512-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 242; AVX512-NEXT: retq 243 %load = load <4 x i8>, <4 x i8>* %p 244 %cmp = icmp sgt <4 x i8> %load, zeroinitializer 245 %sel = select <4 x i1> %cmp, <4 x double> %x, <4 x double> %y 246 ret <4 x double> %sel 247} 248 249define <8 x float> @slt_zero_fp_select(<8 x i16>* %p, <8 x float> %x, <8 x float> %y) { 250; AVX1-LABEL: slt_zero_fp_select: 251; AVX1: # %bb.0: 252; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm2 253; AVX1-NEXT: vpmovsxwd (%rdi), %xmm3 254; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 255; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 256; AVX1-NEXT: retq 257; 258; AVX2-LABEL: slt_zero_fp_select: 259; AVX2: # %bb.0: 260; AVX2-NEXT: vpmovsxwd (%rdi), %ymm2 261; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 262; AVX2-NEXT: retq 263; 264; AVX512-LABEL: slt_zero_fp_select: 265; AVX512: # %bb.0: 266; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 267; AVX512-NEXT: vpcmpgtw (%rdi), %xmm2, %k1 268; AVX512-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 269; AVX512-NEXT: retq 270 %load = load <8 x i16>, <8 x i16>* %p 271 %cmp = icmp slt <8 x i16> %load, zeroinitializer 272 %sel = select <8 x i1> %cmp, <8 x float> %x, <8 x float> %y 273 ret <8 x float> %sel 274} 275 276