1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vpopcntdq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6;; The following tests check that patterns that includes ;; 7;; ctpop intrinsic + select are translated to the vpopcntd/q ;; 8;; instruction in a correct way. ;; 9;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10 11define <16 x i32> @test_mask_vpopcnt_d(<16 x i32> %a, i16 %mask, <16 x i32> %b) { 12; X86-LABEL: test_mask_vpopcnt_d: 13; X86: # %bb.0: 14; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 15; X86-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x55,0xc1] 16; X86-NEXT: retl # encoding: [0xc3] 17; 18; X64-LABEL: test_mask_vpopcnt_d: 19; X64: # %bb.0: 20; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 21; X64-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x55,0xc1] 22; X64-NEXT: retq # encoding: [0xc3] 23 %1 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %b) 24 %2 = bitcast i16 %mask to <16 x i1> 25 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %a 26 ret <16 x i32> %3 27} 28 29define <16 x i32> @test_maskz_vpopcnt_d(i16 %mask, <16 x i32> %a) { 30; X86-LABEL: test_maskz_vpopcnt_d: 31; X86: # %bb.0: 32; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 33; X86-NEXT: vpopcntd %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x55,0xc0] 34; X86-NEXT: retl # encoding: [0xc3] 35; 36; X64-LABEL: test_maskz_vpopcnt_d: 37; X64: # %bb.0: 38; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 39; X64-NEXT: vpopcntd %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x55,0xc0] 40; X64-NEXT: retq # encoding: [0xc3] 41 %1 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a) 42 %2 = bitcast i16 %mask to <16 x i1> 43 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 44 ret <16 x i32> %3 45} 46 47define <8 x i64> @test_mask_vpopcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 48; X86-LABEL: test_mask_vpopcnt_q: 49; X86: # %bb.0: 50; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 51; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 52; X86-NEXT: vpopcntq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x55,0xc8] 53; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 54; X86-NEXT: retl # encoding: [0xc3] 55; 56; X64-LABEL: test_mask_vpopcnt_q: 57; X64: # %bb.0: 58; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 59; X64-NEXT: vpopcntq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x55,0xc8] 60; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 61; X64-NEXT: retq # encoding: [0xc3] 62 %1 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a) 63 %2 = bitcast i8 %mask to <8 x i1> 64 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %b 65 ret <8 x i64> %3 66} 67 68define <8 x i64> @test_maskz_vpopcnt_q(<8 x i64> %a, i8 %mask) { 69; X86-LABEL: test_maskz_vpopcnt_q: 70; X86: # %bb.0: 71; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 72; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 73; X86-NEXT: vpopcntq %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x55,0xc0] 74; X86-NEXT: retl # encoding: [0xc3] 75; 76; X64-LABEL: test_maskz_vpopcnt_q: 77; X64: # %bb.0: 78; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 79; X64-NEXT: vpopcntq %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x55,0xc0] 80; X64-NEXT: retq # encoding: [0xc3] 81 %1 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a) 82 %2 = bitcast i8 %mask to <8 x i1> 83 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 84 ret <8 x i64> %3 85} 86 87declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) 88declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) 89