1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>) 6 7define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 8; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_512: 9; X86: # %bb.0: 10; X86-NEXT: vpermb %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0x75,0x48,0x8d,0xd8] 11; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 12; X86-NEXT: vpermb %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x8d,0xd0] 13; X86-NEXT: vpermb %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x8d,0xc0] 14; X86-NEXT: vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3] 15; X86-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 16; X86-NEXT: retl # encoding: [0xc3] 17; 18; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_512: 19; X64: # %bb.0: 20; X64-NEXT: vpermb %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0x75,0x48,0x8d,0xd8] 21; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 22; X64-NEXT: vpermb %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x8d,0xd0] 23; X64-NEXT: vpermb %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x8d,0xc0] 24; X64-NEXT: vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3] 25; X64-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 26; X64-NEXT: retq # encoding: [0xc3] 27 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1) 28 %2 = bitcast i64 %x3 to <64 x i1> 29 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x2 30 %4 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1) 31 %5 = bitcast i64 %x3 to <64 x i1> 32 %6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> zeroinitializer 33 %7 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1) 34 %res3 = add <64 x i8> %3, %6 35 %res4 = add <64 x i8> %res3, %7 36 ret <64 x i8> %res4 37} 38 39declare <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 40 41define <64 x i8>@test_int_x86_avx512_mask_pmultishift_qb_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 42; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_512: 43; X86: # %bb.0: 44; X86-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x83,0xd9] 45; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 46; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 47; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] 48; X86-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x83,0xd1] 49; X86-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x83,0xc1] 50; X86-NEXT: vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3] 51; X86-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 52; X86-NEXT: retl # encoding: [0xc3] 53; 54; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_512: 55; X64: # %bb.0: 56; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 57; X64-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x83,0xd1] 58; X64-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x83,0xd9] 59; X64-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x83,0xc1] 60; X64-NEXT: vpaddb %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfc,0xc0] 61; X64-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 62; X64-NEXT: retq # encoding: [0xc3] 63 %res = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 64 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3) 65 %res2 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 66 %res3 = add <64 x i8> %res, %res1 67 %res4 = add <64 x i8> %res3, %res2 68 ret <64 x i8> %res4 69} 70 71declare <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>) 72 73define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 74; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_512: 75; X86: # %bb.0: 76; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 77; X86-NEXT: vpermt2b %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0x75,0x48,0x7d,0xda] 78; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 79; X86-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x75,0xca] 80; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0xef,0xe4] 81; X86-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x75,0xe2] 82; X86-NEXT: vpaddb %zmm3, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfc,0xc3] 83; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 84; X86-NEXT: retl # encoding: [0xc3] 85; 86; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_512: 87; X64: # %bb.0: 88; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 89; X64-NEXT: vpermt2b %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0x75,0x48,0x7d,0xda] 90; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 91; X64-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x75,0xca] 92; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0xef,0xe4] 93; X64-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x75,0xe2] 94; X64-NEXT: vpaddb %zmm3, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfc,0xc3] 95; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 96; X64-NEXT: retq # encoding: [0xc3] 97 %1 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) 98 %2 = bitcast i64 %x3 to <64 x i1> 99 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x1 100 %4 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2) 101 %5 = bitcast i64 %x3 to <64 x i1> 102 %6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> zeroinitializer 103 %7 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) 104 %res3 = add <64 x i8> %3, %6 105 %res4 = add <64 x i8> %res3, %7 106 ret <64 x i8> %res4 107} 108 109define <64 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 110; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_512: 111; X86: # %bb.0: 112; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 113; X86-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x7d,0xda] 114; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 115; X86-NEXT: vpermt2b %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7d,0xca] 116; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0xef,0xe4] 117; X86-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7d,0xe2] 118; X86-NEXT: vpaddb %zmm3, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfc,0xc3] 119; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 120; X86-NEXT: retl # encoding: [0xc3] 121; 122; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_512: 123; X64: # %bb.0: 124; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 125; X64-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x7d,0xda] 126; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 127; X64-NEXT: vpermt2b %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7d,0xca] 128; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0xef,0xe4] 129; X64-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7d,0xe2] 130; X64-NEXT: vpaddb %zmm3, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfc,0xc3] 131; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 132; X64-NEXT: retq # encoding: [0xc3] 133 %1 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x1, <64 x i8> %x0, <64 x i8> %x2) 134 %2 = bitcast i64 %x3 to <64 x i1> 135 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x1 136 %4 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> zeroinitializer, <64 x i8> %x0, <64 x i8> %x2) 137 %5 = bitcast i64 %x3 to <64 x i1> 138 %6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> zeroinitializer 139 %7 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x1, <64 x i8> %x0, <64 x i8> %x2) 140 %res3 = add <64 x i8> %3, %6 141 %res4 = add <64 x i8> %res3, %7 142 ret <64 x i8> %res4 143} 144 145define <64 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 146; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_512: 147; X86: # %bb.0: 148; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 149; X86-NEXT: vpermi2b %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x75,0xc2] 150; X86-NEXT: retl # encoding: [0xc3] 151; 152; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_512: 153; X64: # %bb.0: 154; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 155; X64-NEXT: vpermi2b %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x75,0xc2] 156; X64-NEXT: retq # encoding: [0xc3] 157 %1 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x1, <64 x i8> %x0, <64 x i8> %x2) 158 %2 = bitcast i64 %x3 to <64 x i1> 159 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 160 ret <64 x i8> %3 161} 162