1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 4 5declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) 6declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) 7 8; Use widest possible vector for movmsk comparisons (PR37087) 9 10define i1 @movmskps_noneof_bitcast_v4f64(<4 x double> %a0) { 11; CHECK-LABEL: movmskps_noneof_bitcast_v4f64: 12; CHECK: # %bb.0: 13; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 14; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 15; CHECK-NEXT: vmovmskpd %ymm0, %eax 16; CHECK-NEXT: testl %eax, %eax 17; CHECK-NEXT: sete %al 18; CHECK-NEXT: vzeroupper 19; CHECK-NEXT: retq 20 %1 = fcmp oeq <4 x double> %a0, zeroinitializer 21 %2 = sext <4 x i1> %1 to <4 x i64> 22 %3 = bitcast <4 x i64> %2 to <8 x float> 23 %4 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %3) 24 %5 = icmp eq i32 %4, 0 25 ret i1 %5 26} 27 28define i1 @movmskps_allof_bitcast_v4f64(<4 x double> %a0) { 29; CHECK-LABEL: movmskps_allof_bitcast_v4f64: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 32; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 33; CHECK-NEXT: vmovmskpd %ymm0, %eax 34; CHECK-NEXT: cmpl $15, %eax 35; CHECK-NEXT: sete %al 36; CHECK-NEXT: vzeroupper 37; CHECK-NEXT: retq 38 %1 = fcmp oeq <4 x double> %a0, zeroinitializer 39 %2 = sext <4 x i1> %1 to <4 x i64> 40 %3 = bitcast <4 x i64> %2 to <8 x float> 41 %4 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %3) 42 %5 = icmp eq i32 %4, 255 43 ret i1 %5 44} 45 46; 47; TODO - Avoid sign extension ops when just extracting the sign bits. 48; 49 50define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) { 51; AVX1-LABEL: movmskpd_cmpgt_v4i64: 52; AVX1: # %bb.0: 53; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 54; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 55; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 56; AVX1-NEXT: vmovmskpd %ymm0, %eax 57; AVX1-NEXT: vzeroupper 58; AVX1-NEXT: retq 59; 60; AVX2-LABEL: movmskpd_cmpgt_v4i64: 61; AVX2: # %bb.0: 62; AVX2-NEXT: vmovmskpd %ymm0, %eax 63; AVX2-NEXT: vzeroupper 64; AVX2-NEXT: retq 65 %1 = icmp sgt <4 x i64> zeroinitializer, %a0 66 %2 = sext <4 x i1> %1 to <4 x i64> 67 %3 = bitcast <4 x i64> %2 to <4 x double> 68 %4 = tail call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %3) 69 ret i32 %4 70} 71 72define i32 @movmskps_ashr_v8i32(<8 x i32> %a0) { 73; AVX1-LABEL: movmskps_ashr_v8i32: 74; AVX1: # %bb.0: 75; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 76; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 77; AVX1-NEXT: vmovmskps %ymm0, %eax 78; AVX1-NEXT: vzeroupper 79; AVX1-NEXT: retq 80; 81; AVX2-LABEL: movmskps_ashr_v8i32: 82; AVX2: # %bb.0: 83; AVX2-NEXT: vmovmskps %ymm0, %eax 84; AVX2-NEXT: vzeroupper 85; AVX2-NEXT: retq 86 %1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 87 %2 = bitcast <8 x i32> %1 to <8 x float> 88 %3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2) 89 ret i32 %3 90} 91 92define i32 @movmskps_sext_v4i64(<4 x i32> %a0) { 93; AVX1-LABEL: movmskps_sext_v4i64: 94; AVX1: # %bb.0: 95; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 96; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 97; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 98; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 99; AVX1-NEXT: vmovmskpd %ymm0, %eax 100; AVX1-NEXT: vzeroupper 101; AVX1-NEXT: retq 102; 103; AVX2-LABEL: movmskps_sext_v4i64: 104; AVX2: # %bb.0: 105; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 106; AVX2-NEXT: vmovmskpd %ymm0, %eax 107; AVX2-NEXT: vzeroupper 108; AVX2-NEXT: retq 109 %1 = sext <4 x i32> %a0 to <4 x i64> 110 %2 = bitcast <4 x i64> %1 to <4 x double> 111 %3 = tail call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %2) 112 ret i32 %3 113} 114 115define i32 @movmskps_sext_v8i32(<8 x i16> %a0) { 116; AVX1-LABEL: movmskps_sext_v8i32: 117; AVX1: # %bb.0: 118; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 119; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 120; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 121; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 122; AVX1-NEXT: vmovmskps %ymm0, %eax 123; AVX1-NEXT: vzeroupper 124; AVX1-NEXT: retq 125; 126; AVX2-LABEL: movmskps_sext_v8i32: 127; AVX2: # %bb.0: 128; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 129; AVX2-NEXT: vmovmskps %ymm0, %eax 130; AVX2-NEXT: vzeroupper 131; AVX2-NEXT: retq 132 %1 = sext <8 x i16> %a0 to <8 x i32> 133 %2 = bitcast <8 x i32> %1 to <8 x float> 134 %3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2) 135 ret i32 %3 136} 137