1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi-builtins.c 6 7define <2 x i64> @test_mm_mask2_permutex2var_epi8(<2 x i64> %__A, <2 x i64> %__I, i16 zeroext %__U, <2 x i64> %__B) { 8; X86-LABEL: test_mm_mask2_permutex2var_epi8: 9; X86: # %bb.0: # %entry 10; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 11; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} 12; X86-NEXT: vmovdqa %xmm1, %xmm0 13; X86-NEXT: retl 14; 15; X64-LABEL: test_mm_mask2_permutex2var_epi8: 16; X64: # %bb.0: # %entry 17; X64-NEXT: kmovd %edi, %k1 18; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} 19; X64-NEXT: vmovdqa %xmm1, %xmm0 20; X64-NEXT: retq 21entry: 22 %0 = bitcast <2 x i64> %__A to <16 x i8> 23 %1 = bitcast <2 x i64> %__I to <16 x i8> 24 %2 = bitcast <2 x i64> %__B to <16 x i8> 25 %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 26 %4 = bitcast i16 %__U to <16 x i1> 27 %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %1 28 %6 = bitcast <16 x i8> %5 to <2 x i64> 29 ret <2 x i64> %6 30} 31 32define <4 x i64> @test_mm256_mask2_permutex2var_epi8(<4 x i64> %__A, <4 x i64> %__I, i32 %__U, <4 x i64> %__B) { 33; X86-LABEL: test_mm256_mask2_permutex2var_epi8: 34; X86: # %bb.0: # %entry 35; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 36; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} 37; X86-NEXT: vmovdqa %ymm1, %ymm0 38; X86-NEXT: retl 39; 40; X64-LABEL: test_mm256_mask2_permutex2var_epi8: 41; X64: # %bb.0: # %entry 42; X64-NEXT: kmovd %edi, %k1 43; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} 44; X64-NEXT: vmovdqa %ymm1, %ymm0 45; X64-NEXT: retq 46entry: 47 %0 = bitcast <4 x i64> %__A to <32 x i8> 48 %1 = bitcast <4 x i64> %__I to <32 x i8> 49 %2 = bitcast <4 x i64> %__B to <32 x i8> 50 %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2) 51 %4 = bitcast i32 %__U to <32 x i1> 52 %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %1 53 %6 = bitcast <32 x i8> %5 to <4 x i64> 54 ret <4 x i64> %6 55} 56 57define <2 x i64> @test_mm_permutex2var_epi8(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { 58; CHECK-LABEL: test_mm_permutex2var_epi8: 59; CHECK: # %bb.0: # %entry 60; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 61; CHECK-NEXT: ret{{[l|q]}} 62entry: 63 %0 = bitcast <2 x i64> %__A to <16 x i8> 64 %1 = bitcast <2 x i64> %__I to <16 x i8> 65 %2 = bitcast <2 x i64> %__B to <16 x i8> 66 %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 67 %4 = bitcast <16 x i8> %3 to <2 x i64> 68 ret <2 x i64> %4 69} 70 71define <2 x i64> @test_mm_mask_permutex2var_epi8(<2 x i64> %__A, i16 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) { 72; X86-LABEL: test_mm_mask_permutex2var_epi8: 73; X86: # %bb.0: # %entry 74; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 75; X86-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 {%k1} 76; X86-NEXT: retl 77; 78; X64-LABEL: test_mm_mask_permutex2var_epi8: 79; X64: # %bb.0: # %entry 80; X64-NEXT: kmovd %edi, %k1 81; X64-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 {%k1} 82; X64-NEXT: retq 83entry: 84 %0 = bitcast <2 x i64> %__A to <16 x i8> 85 %1 = bitcast <2 x i64> %__I to <16 x i8> 86 %2 = bitcast <2 x i64> %__B to <16 x i8> 87 %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 88 %4 = bitcast i16 %__U to <16 x i1> 89 %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %0 90 %6 = bitcast <16 x i8> %5 to <2 x i64> 91 ret <2 x i64> %6 92} 93 94define <2 x i64> @test_mm_maskz_permutex2var_epi8(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { 95; X86-LABEL: test_mm_maskz_permutex2var_epi8: 96; X86: # %bb.0: # %entry 97; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 98; X86-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 {%k1} {z} 99; X86-NEXT: retl 100; 101; X64-LABEL: test_mm_maskz_permutex2var_epi8: 102; X64: # %bb.0: # %entry 103; X64-NEXT: kmovd %edi, %k1 104; X64-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 {%k1} {z} 105; X64-NEXT: retq 106entry: 107 %0 = bitcast <2 x i64> %__A to <16 x i8> 108 %1 = bitcast <2 x i64> %__I to <16 x i8> 109 %2 = bitcast <2 x i64> %__B to <16 x i8> 110 %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 111 %4 = bitcast i16 %__U to <16 x i1> 112 %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer 113 %6 = bitcast <16 x i8> %5 to <2 x i64> 114 ret <2 x i64> %6 115} 116 117define <4 x i64> @test_mm256_permutex2var_epi8(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) { 118; CHECK-LABEL: test_mm256_permutex2var_epi8: 119; CHECK: # %bb.0: # %entry 120; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 121; CHECK-NEXT: ret{{[l|q]}} 122entry: 123 %0 = bitcast <4 x i64> %__A to <32 x i8> 124 %1 = bitcast <4 x i64> %__I to <32 x i8> 125 %2 = bitcast <4 x i64> %__B to <32 x i8> 126 %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2) 127 %4 = bitcast <32 x i8> %3 to <4 x i64> 128 ret <4 x i64> %4 129} 130 131define <4 x i64> @test_mm256_mask_permutex2var_epi8(<4 x i64> %__A, i32 %__U, <4 x i64> %__I, <4 x i64> %__B) { 132; X86-LABEL: test_mm256_mask_permutex2var_epi8: 133; X86: # %bb.0: # %entry 134; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 135; X86-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 {%k1} 136; X86-NEXT: retl 137; 138; X64-LABEL: test_mm256_mask_permutex2var_epi8: 139; X64: # %bb.0: # %entry 140; X64-NEXT: kmovd %edi, %k1 141; X64-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 {%k1} 142; X64-NEXT: retq 143entry: 144 %0 = bitcast <4 x i64> %__A to <32 x i8> 145 %1 = bitcast <4 x i64> %__I to <32 x i8> 146 %2 = bitcast <4 x i64> %__B to <32 x i8> 147 %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2) 148 %4 = bitcast i32 %__U to <32 x i1> 149 %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %0 150 %6 = bitcast <32 x i8> %5 to <4 x i64> 151 ret <4 x i64> %6 152} 153 154define <4 x i64> @test_mm256_maskz_permutex2var_epi8(i32 %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) { 155; X86-LABEL: test_mm256_maskz_permutex2var_epi8: 156; X86: # %bb.0: # %entry 157; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 158; X86-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 {%k1} {z} 159; X86-NEXT: retl 160; 161; X64-LABEL: test_mm256_maskz_permutex2var_epi8: 162; X64: # %bb.0: # %entry 163; X64-NEXT: kmovd %edi, %k1 164; X64-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 {%k1} {z} 165; X64-NEXT: retq 166entry: 167 %0 = bitcast <4 x i64> %__A to <32 x i8> 168 %1 = bitcast <4 x i64> %__I to <32 x i8> 169 %2 = bitcast <4 x i64> %__B to <32 x i8> 170 %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2) 171 %4 = bitcast i32 %__U to <32 x i1> 172 %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer 173 %6 = bitcast <32 x i8> %5 to <4 x i64> 174 ret <4 x i64> %6 175} 176 177define <2 x i64> @test_mm_mask_multishift_epi64_epi8(<2 x i64> %__W, i16 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) { 178; X86-LABEL: test_mm_mask_multishift_epi64_epi8: 179; X86: # %bb.0: # %entry 180; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 181; X86-NEXT: vpmultishiftqb %xmm2, %xmm1, %xmm0 {%k1} 182; X86-NEXT: retl 183; 184; X64-LABEL: test_mm_mask_multishift_epi64_epi8: 185; X64: # %bb.0: # %entry 186; X64-NEXT: kmovd %edi, %k1 187; X64-NEXT: vpmultishiftqb %xmm2, %xmm1, %xmm0 {%k1} 188; X64-NEXT: retq 189entry: 190 %0 = bitcast <2 x i64> %__X to <16 x i8> 191 %1 = bitcast <2 x i64> %__Y to <16 x i8> 192 %2 = tail call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %0, <16 x i8> %1) 193 %3 = bitcast <2 x i64> %__W to <16 x i8> 194 %4 = bitcast i16 %__M to <16 x i1> 195 %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3 196 %6 = bitcast <16 x i8> %5 to <2 x i64> 197 ret <2 x i64> %6 198} 199 200define <2 x i64> @test_mm_maskz_multishift_epi64_epi8(i16 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) { 201; X86-LABEL: test_mm_maskz_multishift_epi64_epi8: 202; X86: # %bb.0: # %entry 203; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 204; X86-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} 205; X86-NEXT: retl 206; 207; X64-LABEL: test_mm_maskz_multishift_epi64_epi8: 208; X64: # %bb.0: # %entry 209; X64-NEXT: kmovd %edi, %k1 210; X64-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} 211; X64-NEXT: retq 212entry: 213 %0 = bitcast <2 x i64> %__X to <16 x i8> 214 %1 = bitcast <2 x i64> %__Y to <16 x i8> 215 %2 = tail call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %0, <16 x i8> %1) 216 %3 = bitcast i16 %__M to <16 x i1> 217 %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer 218 %5 = bitcast <16 x i8> %4 to <2 x i64> 219 ret <2 x i64> %5 220} 221 222define <2 x i64> @test_mm_multishift_epi64_epi8(<2 x i64> %__X, <2 x i64> %__Y) { 223; CHECK-LABEL: test_mm_multishift_epi64_epi8: 224; CHECK: # %bb.0: # %entry 225; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 226; CHECK-NEXT: ret{{[l|q]}} 227entry: 228 %0 = bitcast <2 x i64> %__X to <16 x i8> 229 %1 = bitcast <2 x i64> %__Y to <16 x i8> 230 %2 = tail call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %0, <16 x i8> %1) 231 %3 = bitcast <16 x i8> %2 to <2 x i64> 232 ret <2 x i64> %3 233} 234 235define <4 x i64> @test_mm256_mask_multishift_epi64_epi8(<4 x i64> %__W, i32 %__M, <4 x i64> %__X, <4 x i64> %__Y) { 236; X86-LABEL: test_mm256_mask_multishift_epi64_epi8: 237; X86: # %bb.0: # %entry 238; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 239; X86-NEXT: vpmultishiftqb %ymm2, %ymm1, %ymm0 {%k1} 240; X86-NEXT: retl 241; 242; X64-LABEL: test_mm256_mask_multishift_epi64_epi8: 243; X64: # %bb.0: # %entry 244; X64-NEXT: kmovd %edi, %k1 245; X64-NEXT: vpmultishiftqb %ymm2, %ymm1, %ymm0 {%k1} 246; X64-NEXT: retq 247entry: 248 %0 = bitcast <4 x i64> %__X to <32 x i8> 249 %1 = bitcast <4 x i64> %__Y to <32 x i8> 250 %2 = tail call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %0, <32 x i8> %1) 251 %3 = bitcast <4 x i64> %__W to <32 x i8> 252 %4 = bitcast i32 %__M to <32 x i1> 253 %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3 254 %6 = bitcast <32 x i8> %5 to <4 x i64> 255 ret <4 x i64> %6 256} 257 258define <4 x i64> @test_mm256_maskz_multishift_epi64_epi8(i32 %__M, <4 x i64> %__X, <4 x i64> %__Y) { 259; X86-LABEL: test_mm256_maskz_multishift_epi64_epi8: 260; X86: # %bb.0: # %entry 261; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 262; X86-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 {%k1} {z} 263; X86-NEXT: retl 264; 265; X64-LABEL: test_mm256_maskz_multishift_epi64_epi8: 266; X64: # %bb.0: # %entry 267; X64-NEXT: kmovd %edi, %k1 268; X64-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 {%k1} {z} 269; X64-NEXT: retq 270entry: 271 %0 = bitcast <4 x i64> %__X to <32 x i8> 272 %1 = bitcast <4 x i64> %__Y to <32 x i8> 273 %2 = tail call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %0, <32 x i8> %1) #3 274 %3 = bitcast i32 %__M to <32 x i1> 275 %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer 276 %5 = bitcast <32 x i8> %4 to <4 x i64> 277 ret <4 x i64> %5 278} 279 280define <4 x i64> @test_mm256_multishift_epi64_epi8(<4 x i64> %__X, <4 x i64> %__Y) { 281; CHECK-LABEL: test_mm256_multishift_epi64_epi8: 282; CHECK: # %bb.0: # %entry 283; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 284; CHECK-NEXT: ret{{[l|q]}} 285entry: 286 %0 = bitcast <4 x i64> %__X to <32 x i8> 287 %1 = bitcast <4 x i64> %__Y to <32 x i8> 288 %2 = tail call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %0, <32 x i8> %1) 289 %3 = bitcast <32 x i8> %2 to <4 x i64> 290 ret <4 x i64> %3 291} 292 293declare <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>) 294declare <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>) 295declare <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8>, <16 x i8>) 296declare <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8>, <32 x i8>) 297