1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi2-builtins.c 6 7define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) { 8; X86-LABEL: test_mm_mask_compress_epi16: 9; X86: # %bb.0: # %entry 10; X86-NEXT: movb {{[0-9]+}}(%esp), %al 11; X86-NEXT: kmovd %eax, %k1 12; X86-NEXT: vpcompressw %xmm1, %xmm0 {%k1} 13; X86-NEXT: retl 14; 15; X64-LABEL: test_mm_mask_compress_epi16: 16; X64: # %bb.0: # %entry 17; X64-NEXT: kmovd %edi, %k1 18; X64-NEXT: vpcompressw %xmm1, %xmm0 {%k1} 19; X64-NEXT: retq 20entry: 21 %0 = bitcast <2 x i64> %__D to <8 x i16> 22 %1 = bitcast <2 x i64> %__S to <8 x i16> 23 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U) 24 %3 = bitcast <8 x i16> %2 to <2 x i64> 25 ret <2 x i64> %3 26} 27 28define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) { 29; X86-LABEL: test_mm_maskz_compress_epi16: 30; X86: # %bb.0: # %entry 31; X86-NEXT: movb {{[0-9]+}}(%esp), %al 32; X86-NEXT: kmovd %eax, %k1 33; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} 34; X86-NEXT: retl 35; 36; X64-LABEL: test_mm_maskz_compress_epi16: 37; X64: # %bb.0: # %entry 38; X64-NEXT: kmovd %edi, %k1 39; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} 40; X64-NEXT: retq 41entry: 42 %0 = bitcast <2 x i64> %__D to <8 x i16> 43 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U) 44 %2 = bitcast <8 x i16> %1 to <2 x i64> 45 ret <2 x i64> %2 46} 47 48define <2 x i64> @test_mm_mask_compress_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) { 49; X86-LABEL: test_mm_mask_compress_epi8: 50; X86: # %bb.0: # %entry 51; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 52; X86-NEXT: vpcompressb %xmm1, %xmm0 {%k1} 53; X86-NEXT: retl 54; 55; X64-LABEL: test_mm_mask_compress_epi8: 56; X64: # %bb.0: # %entry 57; X64-NEXT: kmovd %edi, %k1 58; X64-NEXT: vpcompressb %xmm1, %xmm0 {%k1} 59; X64-NEXT: retq 60entry: 61 %0 = bitcast <2 x i64> %__D to <16 x i8> 62 %1 = bitcast <2 x i64> %__S to <16 x i8> 63 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U) 64 %3 = bitcast <16 x i8> %2 to <2 x i64> 65 ret <2 x i64> %3 66} 67 68define <2 x i64> @test_mm_maskz_compress_epi8(i16 zeroext %__U, <2 x i64> %__D) { 69; X86-LABEL: test_mm_maskz_compress_epi8: 70; X86: # %bb.0: # %entry 71; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 72; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} 73; X86-NEXT: retl 74; 75; X64-LABEL: test_mm_maskz_compress_epi8: 76; X64: # %bb.0: # %entry 77; X64-NEXT: kmovd %edi, %k1 78; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} 79; X64-NEXT: retq 80entry: 81 %0 = bitcast <2 x i64> %__D to <16 x i8> 82 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U) 83 %2 = bitcast <16 x i8> %1 to <2 x i64> 84 ret <2 x i64> %2 85} 86 87define void @test_mm_mask_compressstoreu_epi16(i8* %__P, i8 zeroext %__U, <2 x i64> %__D) { 88; X86-LABEL: test_mm_mask_compressstoreu_epi16: 89; X86: # %bb.0: # %entry 90; X86-NEXT: movb {{[0-9]+}}(%esp), %al 91; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 92; X86-NEXT: kmovd %eax, %k1 93; X86-NEXT: vpcompressw %xmm0, (%ecx) {%k1} 94; X86-NEXT: retl 95; 96; X64-LABEL: test_mm_mask_compressstoreu_epi16: 97; X64: # %bb.0: # %entry 98; X64-NEXT: kmovd %esi, %k1 99; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} 100; X64-NEXT: retq 101entry: 102 %0 = bitcast <2 x i64> %__D to <8 x i16> 103 %1 = bitcast i8* %__P to i16* 104 %2 = bitcast i8 %__U to <8 x i1> 105 tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %0, i16* %1, <8 x i1> %2) 106 ret void 107} 108 109define void @test_mm_mask_compressstoreu_epi8(i8* %__P, i16 zeroext %__U, <2 x i64> %__D) { 110; X86-LABEL: test_mm_mask_compressstoreu_epi8: 111; X86: # %bb.0: # %entry 112; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 113; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 114; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} 115; X86-NEXT: retl 116; 117; X64-LABEL: test_mm_mask_compressstoreu_epi8: 118; X64: # %bb.0: # %entry 119; X64-NEXT: kmovd %esi, %k1 120; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} 121; X64-NEXT: retq 122entry: 123 %0 = bitcast <2 x i64> %__D to <16 x i8> 124 %1 = bitcast i16 %__U to <16 x i1> 125 tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %0, i8* %__P, <16 x i1> %1) 126 ret void 127} 128 129define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) { 130; X86-LABEL: test_mm_mask_expand_epi16: 131; X86: # %bb.0: # %entry 132; X86-NEXT: movb {{[0-9]+}}(%esp), %al 133; X86-NEXT: kmovd %eax, %k1 134; X86-NEXT: vpexpandw %xmm1, %xmm0 {%k1} 135; X86-NEXT: retl 136; 137; X64-LABEL: test_mm_mask_expand_epi16: 138; X64: # %bb.0: # %entry 139; X64-NEXT: kmovd %edi, %k1 140; X64-NEXT: vpexpandw %xmm1, %xmm0 {%k1} 141; X64-NEXT: retq 142entry: 143 %0 = bitcast <2 x i64> %__D to <8 x i16> 144 %1 = bitcast <2 x i64> %__S to <8 x i16> 145 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U) 146 %3 = bitcast <8 x i16> %2 to <2 x i64> 147 ret <2 x i64> %3 148} 149 150define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) { 151; X86-LABEL: test_mm_maskz_expand_epi16: 152; X86: # %bb.0: # %entry 153; X86-NEXT: movb {{[0-9]+}}(%esp), %al 154; X86-NEXT: kmovd %eax, %k1 155; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} 156; X86-NEXT: retl 157; 158; X64-LABEL: test_mm_maskz_expand_epi16: 159; X64: # %bb.0: # %entry 160; X64-NEXT: kmovd %edi, %k1 161; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} 162; X64-NEXT: retq 163entry: 164 %0 = bitcast <2 x i64> %__D to <8 x i16> 165 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U) 166 %2 = bitcast <8 x i16> %1 to <2 x i64> 167 ret <2 x i64> %2 168} 169 170define <2 x i64> @test_mm_mask_expand_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) { 171; X86-LABEL: test_mm_mask_expand_epi8: 172; X86: # %bb.0: # %entry 173; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 174; X86-NEXT: vpexpandb %xmm1, %xmm0 {%k1} 175; X86-NEXT: retl 176; 177; X64-LABEL: test_mm_mask_expand_epi8: 178; X64: # %bb.0: # %entry 179; X64-NEXT: kmovd %edi, %k1 180; X64-NEXT: vpexpandb %xmm1, %xmm0 {%k1} 181; X64-NEXT: retq 182entry: 183 %0 = bitcast <2 x i64> %__D to <16 x i8> 184 %1 = bitcast <2 x i64> %__S to <16 x i8> 185 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U) 186 %3 = bitcast <16 x i8> %2 to <2 x i64> 187 ret <2 x i64> %3 188} 189 190define <2 x i64> @test_mm_maskz_expand_epi8(i16 zeroext %__U, <2 x i64> %__D) { 191; X86-LABEL: test_mm_maskz_expand_epi8: 192; X86: # %bb.0: # %entry 193; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 194; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} 195; X86-NEXT: retl 196; 197; X64-LABEL: test_mm_maskz_expand_epi8: 198; X64: # %bb.0: # %entry 199; X64-NEXT: kmovd %edi, %k1 200; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} 201; X64-NEXT: retq 202entry: 203 %0 = bitcast <2 x i64> %__D to <16 x i8> 204 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U) 205 %2 = bitcast <16 x i8> %1 to <2 x i64> 206 ret <2 x i64> %2 207} 208 209define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U, i8* readonly %__P) { 210; X86-LABEL: test_mm_mask_expandloadu_epi16: 211; X86: # %bb.0: # %entry 212; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 213; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 214; X86-NEXT: kmovd %ecx, %k1 215; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} 216; X86-NEXT: retl 217; 218; X64-LABEL: test_mm_mask_expandloadu_epi16: 219; X64: # %bb.0: # %entry 220; X64-NEXT: kmovd %edi, %k1 221; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} 222; X64-NEXT: retq 223entry: 224 %0 = bitcast <2 x i64> %__S to <8 x i16> 225 %1 = bitcast i8* %__P to i16* 226 %2 = bitcast i8 %__U to <8 x i1> 227 %3 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> %0) 228 %4 = bitcast <8 x i16> %3 to <2 x i64> 229 ret <2 x i64> %4 230} 231 232define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, i8* readonly %__P) { 233; X86-LABEL: test_mm_maskz_expandloadu_epi16: 234; X86: # %bb.0: # %entry 235; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 236; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 237; X86-NEXT: kmovd %ecx, %k1 238; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} 239; X86-NEXT: retl 240; 241; X64-LABEL: test_mm_maskz_expandloadu_epi16: 242; X64: # %bb.0: # %entry 243; X64-NEXT: kmovd %edi, %k1 244; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} {z} 245; X64-NEXT: retq 246entry: 247 %0 = bitcast i8* %__P to i16* 248 %1 = bitcast i8 %__U to <8 x i1> 249 %2 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %0, <8 x i1> %1, <8 x i16> zeroinitializer) 250 %3 = bitcast <8 x i16> %2 to <2 x i64> 251 ret <2 x i64> %3 252} 253 254define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) { 255; X86-LABEL: test_mm_mask_expandloadu_epi8: 256; X86: # %bb.0: # %entry 257; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 258; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 259; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} 260; X86-NEXT: retl 261; 262; X64-LABEL: test_mm_mask_expandloadu_epi8: 263; X64: # %bb.0: # %entry 264; X64-NEXT: kmovd %edi, %k1 265; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} 266; X64-NEXT: retq 267entry: 268 %0 = bitcast <2 x i64> %__S to <16 x i8> 269 %1 = bitcast i16 %__U to <16 x i1> 270 %2 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %1, <16 x i8> %0) 271 %3 = bitcast <16 x i8> %2 to <2 x i64> 272 ret <2 x i64> %3 273} 274 275define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, i8* readonly %__P) { 276; X86-LABEL: test_mm_maskz_expandloadu_epi8: 277; X86: # %bb.0: # %entry 278; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 279; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 280; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} 281; X86-NEXT: retl 282; 283; X64-LABEL: test_mm_maskz_expandloadu_epi8: 284; X64: # %bb.0: # %entry 285; X64-NEXT: kmovd %edi, %k1 286; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} {z} 287; X64-NEXT: retq 288entry: 289 %0 = bitcast i16 %__U to <16 x i1> 290 %1 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %0, <16 x i8> zeroinitializer) 291 %2 = bitcast <16 x i8> %1 to <2 x i64> 292 ret <2 x i64> %2 293} 294 295define <4 x i64> @test_mm256_mask_compress_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) { 296; X86-LABEL: test_mm256_mask_compress_epi16: 297; X86: # %bb.0: # %entry 298; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 299; X86-NEXT: vpcompressw %ymm1, %ymm0 {%k1} 300; X86-NEXT: retl 301; 302; X64-LABEL: test_mm256_mask_compress_epi16: 303; X64: # %bb.0: # %entry 304; X64-NEXT: kmovd %edi, %k1 305; X64-NEXT: vpcompressw %ymm1, %ymm0 {%k1} 306; X64-NEXT: retq 307entry: 308 %0 = bitcast <4 x i64> %__D to <16 x i16> 309 %1 = bitcast <4 x i64> %__S to <16 x i16> 310 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U) 311 %3 = bitcast <16 x i16> %2 to <4 x i64> 312 ret <4 x i64> %3 313} 314 315define <4 x i64> @test_mm256_maskz_compress_epi16(i16 zeroext %__U, <4 x i64> %__D) { 316; X86-LABEL: test_mm256_maskz_compress_epi16: 317; X86: # %bb.0: # %entry 318; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 319; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} 320; X86-NEXT: retl 321; 322; X64-LABEL: test_mm256_maskz_compress_epi16: 323; X64: # %bb.0: # %entry 324; X64-NEXT: kmovd %edi, %k1 325; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} 326; X64-NEXT: retq 327entry: 328 %0 = bitcast <4 x i64> %__D to <16 x i16> 329 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U) 330 %2 = bitcast <16 x i16> %1 to <4 x i64> 331 ret <4 x i64> %2 332} 333 334define <4 x i64> @test_mm256_mask_compress_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) { 335; X86-LABEL: test_mm256_mask_compress_epi8: 336; X86: # %bb.0: # %entry 337; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 338; X86-NEXT: vpcompressb %ymm1, %ymm0 {%k1} 339; X86-NEXT: retl 340; 341; X64-LABEL: test_mm256_mask_compress_epi8: 342; X64: # %bb.0: # %entry 343; X64-NEXT: kmovd %edi, %k1 344; X64-NEXT: vpcompressb %ymm1, %ymm0 {%k1} 345; X64-NEXT: retq 346entry: 347 %0 = bitcast <4 x i64> %__D to <32 x i8> 348 %1 = bitcast <4 x i64> %__S to <32 x i8> 349 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U) 350 %3 = bitcast <32 x i8> %2 to <4 x i64> 351 ret <4 x i64> %3 352} 353 354define <4 x i64> @test_mm256_maskz_compress_epi8(i32 %__U, <4 x i64> %__D) { 355; X86-LABEL: test_mm256_maskz_compress_epi8: 356; X86: # %bb.0: # %entry 357; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 358; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} 359; X86-NEXT: retl 360; 361; X64-LABEL: test_mm256_maskz_compress_epi8: 362; X64: # %bb.0: # %entry 363; X64-NEXT: kmovd %edi, %k1 364; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} 365; X64-NEXT: retq 366entry: 367 %0 = bitcast <4 x i64> %__D to <32 x i8> 368 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U) 369 %2 = bitcast <32 x i8> %1 to <4 x i64> 370 ret <4 x i64> %2 371} 372 373define void @test_mm256_mask_compressstoreu_epi16(i8* %__P, i16 zeroext %__U, <4 x i64> %__D) { 374; X86-LABEL: test_mm256_mask_compressstoreu_epi16: 375; X86: # %bb.0: # %entry 376; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 378; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} 379; X86-NEXT: vzeroupper 380; X86-NEXT: retl 381; 382; X64-LABEL: test_mm256_mask_compressstoreu_epi16: 383; X64: # %bb.0: # %entry 384; X64-NEXT: kmovd %esi, %k1 385; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} 386; X64-NEXT: vzeroupper 387; X64-NEXT: retq 388entry: 389 %0 = bitcast <4 x i64> %__D to <16 x i16> 390 %1 = bitcast i8* %__P to i16* 391 %2 = bitcast i16 %__U to <16 x i1> 392 tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %0, i16* %1, <16 x i1> %2) 393 ret void 394} 395 396define void @test_mm256_mask_compressstoreu_epi8(i8* %__P, i32 %__U, <4 x i64> %__D) { 397; X86-LABEL: test_mm256_mask_compressstoreu_epi8: 398; X86: # %bb.0: # %entry 399; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 400; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 401; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} 402; X86-NEXT: vzeroupper 403; X86-NEXT: retl 404; 405; X64-LABEL: test_mm256_mask_compressstoreu_epi8: 406; X64: # %bb.0: # %entry 407; X64-NEXT: kmovd %esi, %k1 408; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} 409; X64-NEXT: vzeroupper 410; X64-NEXT: retq 411entry: 412 %0 = bitcast <4 x i64> %__D to <32 x i8> 413 %1 = bitcast i32 %__U to <32 x i1> 414 tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %0, i8* %__P, <32 x i1> %1) 415 ret void 416} 417 418define <4 x i64> @test_mm256_mask_expand_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) { 419; X86-LABEL: test_mm256_mask_expand_epi16: 420; X86: # %bb.0: # %entry 421; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 422; X86-NEXT: vpexpandw %ymm1, %ymm0 {%k1} 423; X86-NEXT: retl 424; 425; X64-LABEL: test_mm256_mask_expand_epi16: 426; X64: # %bb.0: # %entry 427; X64-NEXT: kmovd %edi, %k1 428; X64-NEXT: vpexpandw %ymm1, %ymm0 {%k1} 429; X64-NEXT: retq 430entry: 431 %0 = bitcast <4 x i64> %__D to <16 x i16> 432 %1 = bitcast <4 x i64> %__S to <16 x i16> 433 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U) 434 %3 = bitcast <16 x i16> %2 to <4 x i64> 435 ret <4 x i64> %3 436} 437 438define <4 x i64> @test_mm256_maskz_expand_epi16(i16 zeroext %__U, <4 x i64> %__D) { 439; X86-LABEL: test_mm256_maskz_expand_epi16: 440; X86: # %bb.0: # %entry 441; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 442; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} 443; X86-NEXT: retl 444; 445; X64-LABEL: test_mm256_maskz_expand_epi16: 446; X64: # %bb.0: # %entry 447; X64-NEXT: kmovd %edi, %k1 448; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} 449; X64-NEXT: retq 450entry: 451 %0 = bitcast <4 x i64> %__D to <16 x i16> 452 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U) 453 %2 = bitcast <16 x i16> %1 to <4 x i64> 454 ret <4 x i64> %2 455} 456 457define <4 x i64> @test_mm256_mask_expand_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) { 458; X86-LABEL: test_mm256_mask_expand_epi8: 459; X86: # %bb.0: # %entry 460; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 461; X86-NEXT: vpexpandb %ymm1, %ymm0 {%k1} 462; X86-NEXT: retl 463; 464; X64-LABEL: test_mm256_mask_expand_epi8: 465; X64: # %bb.0: # %entry 466; X64-NEXT: kmovd %edi, %k1 467; X64-NEXT: vpexpandb %ymm1, %ymm0 {%k1} 468; X64-NEXT: retq 469entry: 470 %0 = bitcast <4 x i64> %__D to <32 x i8> 471 %1 = bitcast <4 x i64> %__S to <32 x i8> 472 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U) 473 %3 = bitcast <32 x i8> %2 to <4 x i64> 474 ret <4 x i64> %3 475} 476 477define <4 x i64> @test_mm256_maskz_expand_epi8(i32 %__U, <4 x i64> %__D) { 478; X86-LABEL: test_mm256_maskz_expand_epi8: 479; X86: # %bb.0: # %entry 480; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 481; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} 482; X86-NEXT: retl 483; 484; X64-LABEL: test_mm256_maskz_expand_epi8: 485; X64: # %bb.0: # %entry 486; X64-NEXT: kmovd %edi, %k1 487; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} 488; X64-NEXT: retq 489entry: 490 %0 = bitcast <4 x i64> %__D to <32 x i8> 491 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U) 492 %2 = bitcast <32 x i8> %1 to <4 x i64> 493 ret <4 x i64> %2 494} 495 496define <4 x i64> @test_mm256_mask_expandloadu_epi16(<4 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) { 497; X86-LABEL: test_mm256_mask_expandloadu_epi16: 498; X86: # %bb.0: # %entry 499; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 500; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 501; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} 502; X86-NEXT: retl 503; 504; X64-LABEL: test_mm256_mask_expandloadu_epi16: 505; X64: # %bb.0: # %entry 506; X64-NEXT: kmovd %edi, %k1 507; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} 508; X64-NEXT: retq 509entry: 510 %0 = bitcast <4 x i64> %__S to <16 x i16> 511 %1 = bitcast i8* %__P to i16* 512 %2 = bitcast i16 %__U to <16 x i1> 513 %3 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> %0) 514 %4 = bitcast <16 x i16> %3 to <4 x i64> 515 ret <4 x i64> %4 516} 517 518define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, i8* readonly %__P) { 519; X86-LABEL: test_mm256_maskz_expandloadu_epi16: 520; X86: # %bb.0: # %entry 521; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 522; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 523; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} 524; X86-NEXT: retl 525; 526; X64-LABEL: test_mm256_maskz_expandloadu_epi16: 527; X64: # %bb.0: # %entry 528; X64-NEXT: kmovd %edi, %k1 529; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} {z} 530; X64-NEXT: retq 531entry: 532 %0 = bitcast i8* %__P to i16* 533 %1 = bitcast i16 %__U to <16 x i1> 534 %2 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %0, <16 x i1> %1, <16 x i16> zeroinitializer) 535 %3 = bitcast <16 x i16> %2 to <4 x i64> 536 ret <4 x i64> %3 537} 538 539define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, i8* readonly %__P) { 540; X86-LABEL: test_mm256_mask_expandloadu_epi8: 541; X86: # %bb.0: # %entry 542; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 543; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 544; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} 545; X86-NEXT: retl 546; 547; X64-LABEL: test_mm256_mask_expandloadu_epi8: 548; X64: # %bb.0: # %entry 549; X64-NEXT: kmovd %edi, %k1 550; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} 551; X64-NEXT: retq 552entry: 553 %0 = bitcast <4 x i64> %__S to <32 x i8> 554 %1 = bitcast i32 %__U to <32 x i1> 555 %2 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %1, <32 x i8> %0) 556 %3 = bitcast <32 x i8> %2 to <4 x i64> 557 ret <4 x i64> %3 558} 559 560define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, i8* readonly %__P) { 561; X86-LABEL: test_mm256_maskz_expandloadu_epi8: 562; X86: # %bb.0: # %entry 563; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 564; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 565; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} 566; X86-NEXT: retl 567; 568; X64-LABEL: test_mm256_maskz_expandloadu_epi8: 569; X64: # %bb.0: # %entry 570; X64-NEXT: kmovd %edi, %k1 571; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} {z} 572; X64-NEXT: retq 573entry: 574 %0 = bitcast i32 %__U to <32 x i1> 575 %1 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %0, <32 x i8> zeroinitializer) 576 %2 = bitcast <32 x i8> %1 to <4 x i64> 577 ret <4 x i64> %2 578} 579 580define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 581; X86-LABEL: test_mm256_mask_shldi_epi64: 582; X86: # %bb.0: # %entry 583; X86-NEXT: movb {{[0-9]+}}(%esp), %al 584; X86-NEXT: kmovd %eax, %k1 585; X86-NEXT: vpshldq $127, %ymm2, %ymm1, %ymm0 {%k1} 586; X86-NEXT: retl 587; 588; X64-LABEL: test_mm256_mask_shldi_epi64: 589; X64: # %bb.0: # %entry 590; X64-NEXT: kmovd %edi, %k1 591; X64-NEXT: vpshldq $127, %ymm2, %ymm1, %ymm0 {%k1} 592; X64-NEXT: retq 593entry: 594 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 127) 595 %1 = bitcast i8 %__U to <8 x i1> 596 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 597 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S 598 ret <4 x i64> %2 599} 600 601declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32) 602 603define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 604; X86-LABEL: test_mm256_maskz_shldi_epi64: 605; X86: # %bb.0: # %entry 606; X86-NEXT: movb {{[0-9]+}}(%esp), %al 607; X86-NEXT: kmovd %eax, %k1 608; X86-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 609; X86-NEXT: retl 610; 611; X64-LABEL: test_mm256_maskz_shldi_epi64: 612; X64: # %bb.0: # %entry 613; X64-NEXT: kmovd %edi, %k1 614; X64-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 615; X64-NEXT: retq 616entry: 617 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 63) 618 %1 = bitcast i8 %__U to <8 x i1> 619 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 620 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer 621 ret <4 x i64> %2 622} 623 624define <4 x i64> @test_mm256_shldi_epi64(<4 x i64> %__A, <4 x i64> %__B) { 625; CHECK-LABEL: test_mm256_shldi_epi64: 626; CHECK: # %bb.0: # %entry 627; CHECK-NEXT: vpshldq $31, %ymm1, %ymm0, %ymm0 628; CHECK-NEXT: ret{{[l|q]}} 629entry: 630 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 31) 631 ret <4 x i64> %0 632} 633 634define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 635; X86-LABEL: test_mm_mask_shldi_epi64: 636; X86: # %bb.0: # %entry 637; X86-NEXT: movb {{[0-9]+}}(%esp), %al 638; X86-NEXT: kmovd %eax, %k1 639; X86-NEXT: vpshldq $127, %xmm2, %xmm1, %xmm0 {%k1} 640; X86-NEXT: retl 641; 642; X64-LABEL: test_mm_mask_shldi_epi64: 643; X64: # %bb.0: # %entry 644; X64-NEXT: kmovd %edi, %k1 645; X64-NEXT: vpshldq $127, %xmm2, %xmm1, %xmm0 {%k1} 646; X64-NEXT: retq 647entry: 648 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 127) 649 %1 = bitcast i8 %__U to <8 x i1> 650 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 651 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S 652 ret <2 x i64> %2 653} 654 655declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32) #3 656 657define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 658; X86-LABEL: test_mm_maskz_shldi_epi64: 659; X86: # %bb.0: # %entry 660; X86-NEXT: movb {{[0-9]+}}(%esp), %al 661; X86-NEXT: kmovd %eax, %k1 662; X86-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 663; X86-NEXT: retl 664; 665; X64-LABEL: test_mm_maskz_shldi_epi64: 666; X64: # %bb.0: # %entry 667; X64-NEXT: kmovd %edi, %k1 668; X64-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 669; X64-NEXT: retq 670entry: 671 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 63) 672 %1 = bitcast i8 %__U to <8 x i1> 673 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 674 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer 675 ret <2 x i64> %2 676} 677 678define <2 x i64> @test_mm_shldi_epi64(<2 x i64> %__A, <2 x i64> %__B) { 679; CHECK-LABEL: test_mm_shldi_epi64: 680; CHECK: # %bb.0: # %entry 681; CHECK-NEXT: vpshldq $31, %xmm1, %xmm0, %xmm0 682; CHECK-NEXT: ret{{[l|q]}} 683entry: 684 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 31) 685 ret <2 x i64> %0 686} 687 688define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 689; X86-LABEL: test_mm256_mask_shldi_epi32: 690; X86: # %bb.0: # %entry 691; X86-NEXT: movb {{[0-9]+}}(%esp), %al 692; X86-NEXT: kmovd %eax, %k1 693; X86-NEXT: vpshldd $127, %ymm2, %ymm1, %ymm0 {%k1} 694; X86-NEXT: retl 695; 696; X64-LABEL: test_mm256_mask_shldi_epi32: 697; X64: # %bb.0: # %entry 698; X64-NEXT: kmovd %edi, %k1 699; X64-NEXT: vpshldd $127, %ymm2, %ymm1, %ymm0 {%k1} 700; X64-NEXT: retq 701entry: 702 %0 = bitcast <4 x i64> %__A to <8 x i32> 703 %1 = bitcast <4 x i64> %__B to <8 x i32> 704 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 127) 705 %3 = bitcast <4 x i64> %__S to <8 x i32> 706 %4 = bitcast i8 %__U to <8 x i1> 707 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 708 %6 = bitcast <8 x i32> %5 to <4 x i64> 709 ret <4 x i64> %6 710} 711 712declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32) 713 714define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 715; X86-LABEL: test_mm256_maskz_shldi_epi32: 716; X86: # %bb.0: # %entry 717; X86-NEXT: movb {{[0-9]+}}(%esp), %al 718; X86-NEXT: kmovd %eax, %k1 719; X86-NEXT: vpshldd $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 720; X86-NEXT: retl 721; 722; X64-LABEL: test_mm256_maskz_shldi_epi32: 723; X64: # %bb.0: # %entry 724; X64-NEXT: kmovd %edi, %k1 725; X64-NEXT: vpshldd $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 726; X64-NEXT: retq 727entry: 728 %0 = bitcast <4 x i64> %__A to <8 x i32> 729 %1 = bitcast <4 x i64> %__B to <8 x i32> 730 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 63) 731 %3 = bitcast i8 %__U to <8 x i1> 732 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer 733 %5 = bitcast <8 x i32> %4 to <4 x i64> 734 ret <4 x i64> %5 735} 736 737define <4 x i64> @test_mm256_shldi_epi32(<4 x i64> %__A, <4 x i64> %__B) { 738; CHECK-LABEL: test_mm256_shldi_epi32: 739; CHECK: # %bb.0: # %entry 740; CHECK-NEXT: vpshldd $31, %ymm1, %ymm0, %ymm0 741; CHECK-NEXT: ret{{[l|q]}} 742entry: 743 %0 = bitcast <4 x i64> %__A to <8 x i32> 744 %1 = bitcast <4 x i64> %__B to <8 x i32> 745 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 31) 746 %3 = bitcast <8 x i32> %2 to <4 x i64> 747 ret <4 x i64> %3 748} 749 750define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 751; X86-LABEL: test_mm_mask_shldi_epi32: 752; X86: # %bb.0: # %entry 753; X86-NEXT: movb {{[0-9]+}}(%esp), %al 754; X86-NEXT: kmovd %eax, %k1 755; X86-NEXT: vpshldd $127, %xmm2, %xmm1, %xmm0 {%k1} 756; X86-NEXT: retl 757; 758; X64-LABEL: test_mm_mask_shldi_epi32: 759; X64: # %bb.0: # %entry 760; X64-NEXT: kmovd %edi, %k1 761; X64-NEXT: vpshldd $127, %xmm2, %xmm1, %xmm0 {%k1} 762; X64-NEXT: retq 763entry: 764 %0 = bitcast <2 x i64> %__A to <4 x i32> 765 %1 = bitcast <2 x i64> %__B to <4 x i32> 766 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 127) 767 %3 = bitcast <2 x i64> %__S to <4 x i32> 768 %4 = bitcast i8 %__U to <8 x i1> 769 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 770 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3 771 %6 = bitcast <4 x i32> %5 to <2 x i64> 772 ret <2 x i64> %6 773} 774 775declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32) 776 777define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 778; X86-LABEL: test_mm_maskz_shldi_epi32: 779; X86: # %bb.0: # %entry 780; X86-NEXT: movb {{[0-9]+}}(%esp), %al 781; X86-NEXT: kmovd %eax, %k1 782; X86-NEXT: vpshldd $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 783; X86-NEXT: retl 784; 785; X64-LABEL: test_mm_maskz_shldi_epi32: 786; X64: # %bb.0: # %entry 787; X64-NEXT: kmovd %edi, %k1 788; X64-NEXT: vpshldd $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 789; X64-NEXT: retq 790entry: 791 %0 = bitcast <2 x i64> %__A to <4 x i32> 792 %1 = bitcast <2 x i64> %__B to <4 x i32> 793 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 63) 794 %3 = bitcast i8 %__U to <8 x i1> 795 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 796 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer 797 %5 = bitcast <4 x i32> %4 to <2 x i64> 798 ret <2 x i64> %5 799} 800 801define <2 x i64> @test_mm_shldi_epi32(<2 x i64> %__A, <2 x i64> %__B) { 802; CHECK-LABEL: test_mm_shldi_epi32: 803; CHECK: # %bb.0: # %entry 804; CHECK-NEXT: vpshldd $31, %xmm1, %xmm0, %xmm0 805; CHECK-NEXT: ret{{[l|q]}} 806entry: 807 %0 = bitcast <2 x i64> %__A to <4 x i32> 808 %1 = bitcast <2 x i64> %__B to <4 x i32> 809 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 31) 810 %3 = bitcast <4 x i32> %2 to <2 x i64> 811 ret <2 x i64> %3 812} 813 814define <4 x i64> @test_mm256_mask_shldi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 815; X86-LABEL: test_mm256_mask_shldi_epi16: 816; X86: # %bb.0: # %entry 817; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 818; X86-NEXT: vpshldw $127, %ymm2, %ymm1, %ymm0 {%k1} 819; X86-NEXT: retl 820; 821; X64-LABEL: test_mm256_mask_shldi_epi16: 822; X64: # %bb.0: # %entry 823; X64-NEXT: kmovd %edi, %k1 824; X64-NEXT: vpshldw $127, %ymm2, %ymm1, %ymm0 {%k1} 825; X64-NEXT: retq 826entry: 827 %0 = bitcast <4 x i64> %__A to <16 x i16> 828 %1 = bitcast <4 x i64> %__B to <16 x i16> 829 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 127) 830 %3 = bitcast <4 x i64> %__S to <16 x i16> 831 %4 = bitcast i16 %__U to <16 x i1> 832 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 833 %6 = bitcast <16 x i16> %5 to <4 x i64> 834 ret <4 x i64> %6 835} 836 837declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32) 838 839define <4 x i64> @test_mm256_maskz_shldi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 840; X86-LABEL: test_mm256_maskz_shldi_epi16: 841; X86: # %bb.0: # %entry 842; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 843; X86-NEXT: vpshldw $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 844; X86-NEXT: retl 845; 846; X64-LABEL: test_mm256_maskz_shldi_epi16: 847; X64: # %bb.0: # %entry 848; X64-NEXT: kmovd %edi, %k1 849; X64-NEXT: vpshldw $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 850; X64-NEXT: retq 851entry: 852 %0 = bitcast <4 x i64> %__A to <16 x i16> 853 %1 = bitcast <4 x i64> %__B to <16 x i16> 854 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 63) 855 %3 = bitcast i16 %__U to <16 x i1> 856 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer 857 %5 = bitcast <16 x i16> %4 to <4 x i64> 858 ret <4 x i64> %5 859} 860 861define <4 x i64> @test_mm256_shldi_epi16(<4 x i64> %__A, <4 x i64> %__B) { 862; CHECK-LABEL: test_mm256_shldi_epi16: 863; CHECK: # %bb.0: # %entry 864; CHECK-NEXT: vpshldw $31, %ymm1, %ymm0, %ymm0 865; CHECK-NEXT: ret{{[l|q]}} 866entry: 867 %0 = bitcast <4 x i64> %__A to <16 x i16> 868 %1 = bitcast <4 x i64> %__B to <16 x i16> 869 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 31) 870 %3 = bitcast <16 x i16> %2 to <4 x i64> 871 ret <4 x i64> %3 872} 873 874define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 875; X86-LABEL: test_mm_mask_shldi_epi16: 876; X86: # %bb.0: # %entry 877; X86-NEXT: movb {{[0-9]+}}(%esp), %al 878; X86-NEXT: kmovd %eax, %k1 879; X86-NEXT: vpshldw $127, %xmm2, %xmm1, %xmm0 {%k1} 880; X86-NEXT: retl 881; 882; X64-LABEL: test_mm_mask_shldi_epi16: 883; X64: # %bb.0: # %entry 884; X64-NEXT: kmovd %edi, %k1 885; X64-NEXT: vpshldw $127, %xmm2, %xmm1, %xmm0 {%k1} 886; X64-NEXT: retq 887entry: 888 %0 = bitcast <2 x i64> %__A to <8 x i16> 889 %1 = bitcast <2 x i64> %__B to <8 x i16> 890 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 127) 891 %3 = bitcast <2 x i64> %__S to <8 x i16> 892 %4 = bitcast i8 %__U to <8 x i1> 893 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 894 %6 = bitcast <8 x i16> %5 to <2 x i64> 895 ret <2 x i64> %6 896} 897 898declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32) 899 900define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 901; X86-LABEL: test_mm_maskz_shldi_epi16: 902; X86: # %bb.0: # %entry 903; X86-NEXT: movb {{[0-9]+}}(%esp), %al 904; X86-NEXT: kmovd %eax, %k1 905; X86-NEXT: vpshldw $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 906; X86-NEXT: retl 907; 908; X64-LABEL: test_mm_maskz_shldi_epi16: 909; X64: # %bb.0: # %entry 910; X64-NEXT: kmovd %edi, %k1 911; X64-NEXT: vpshldw $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 912; X64-NEXT: retq 913entry: 914 %0 = bitcast <2 x i64> %__A to <8 x i16> 915 %1 = bitcast <2 x i64> %__B to <8 x i16> 916 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 63) 917 %3 = bitcast i8 %__U to <8 x i1> 918 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer 919 %5 = bitcast <8 x i16> %4 to <2 x i64> 920 ret <2 x i64> %5 921} 922 923define <2 x i64> @test_mm_shldi_epi16(<2 x i64> %__A, <2 x i64> %__B) { 924; CHECK-LABEL: test_mm_shldi_epi16: 925; CHECK: # %bb.0: # %entry 926; CHECK-NEXT: vpshldw $31, %xmm1, %xmm0, %xmm0 927; CHECK-NEXT: ret{{[l|q]}} 928entry: 929 %0 = bitcast <2 x i64> %__A to <8 x i16> 930 %1 = bitcast <2 x i64> %__B to <8 x i16> 931 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 31) 932 %3 = bitcast <8 x i16> %2 to <2 x i64> 933 ret <2 x i64> %3 934} 935 936define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 937; X86-LABEL: test_mm256_mask_shrdi_epi64: 938; X86: # %bb.0: # %entry 939; X86-NEXT: movb {{[0-9]+}}(%esp), %al 940; X86-NEXT: kmovd %eax, %k1 941; X86-NEXT: vpshrdq $127, %ymm2, %ymm1, %ymm0 {%k1} 942; X86-NEXT: retl 943; 944; X64-LABEL: test_mm256_mask_shrdi_epi64: 945; X64: # %bb.0: # %entry 946; X64-NEXT: kmovd %edi, %k1 947; X64-NEXT: vpshrdq $127, %ymm2, %ymm1, %ymm0 {%k1} 948; X64-NEXT: retq 949entry: 950 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 127) 951 %1 = bitcast i8 %__U to <8 x i1> 952 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 953 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S 954 ret <4 x i64> %2 955} 956 957declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32) 958 959define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 960; X86-LABEL: test_mm256_maskz_shrdi_epi64: 961; X86: # %bb.0: # %entry 962; X86-NEXT: movb {{[0-9]+}}(%esp), %al 963; X86-NEXT: kmovd %eax, %k1 964; X86-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 965; X86-NEXT: retl 966; 967; X64-LABEL: test_mm256_maskz_shrdi_epi64: 968; X64: # %bb.0: # %entry 969; X64-NEXT: kmovd %edi, %k1 970; X64-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 971; X64-NEXT: retq 972entry: 973 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 63) 974 %1 = bitcast i8 %__U to <8 x i1> 975 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 976 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer 977 ret <4 x i64> %2 978} 979 980define <4 x i64> @test_mm256_shrdi_epi64(<4 x i64> %__A, <4 x i64> %__B) { 981; CHECK-LABEL: test_mm256_shrdi_epi64: 982; CHECK: # %bb.0: # %entry 983; CHECK-NEXT: vpshrdq $31, %ymm1, %ymm0, %ymm0 984; CHECK-NEXT: ret{{[l|q]}} 985entry: 986 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 31) 987 ret <4 x i64> %0 988} 989 990define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 991; X86-LABEL: test_mm_mask_shrdi_epi64: 992; X86: # %bb.0: # %entry 993; X86-NEXT: movb {{[0-9]+}}(%esp), %al 994; X86-NEXT: kmovd %eax, %k1 995; X86-NEXT: vpshrdq $127, %xmm2, %xmm1, %xmm0 {%k1} 996; X86-NEXT: retl 997; 998; X64-LABEL: test_mm_mask_shrdi_epi64: 999; X64: # %bb.0: # %entry 1000; X64-NEXT: kmovd %edi, %k1 1001; X64-NEXT: vpshrdq $127, %xmm2, %xmm1, %xmm0 {%k1} 1002; X64-NEXT: retq 1003entry: 1004 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 127) 1005 %1 = bitcast i8 %__U to <8 x i1> 1006 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1007 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S 1008 ret <2 x i64> %2 1009} 1010 1011declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32) 1012 1013define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1014; X86-LABEL: test_mm_maskz_shrdi_epi64: 1015; X86: # %bb.0: # %entry 1016; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1017; X86-NEXT: kmovd %eax, %k1 1018; X86-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1019; X86-NEXT: retl 1020; 1021; X64-LABEL: test_mm_maskz_shrdi_epi64: 1022; X64: # %bb.0: # %entry 1023; X64-NEXT: kmovd %edi, %k1 1024; X64-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1025; X64-NEXT: retq 1026entry: 1027 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 63) 1028 %1 = bitcast i8 %__U to <8 x i1> 1029 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1030 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer 1031 ret <2 x i64> %2 1032} 1033 1034define <2 x i64> @test_mm_shrdi_epi64(<2 x i64> %__A, <2 x i64> %__B) { 1035; CHECK-LABEL: test_mm_shrdi_epi64: 1036; CHECK: # %bb.0: # %entry 1037; CHECK-NEXT: vpshrdq $31, %xmm1, %xmm0, %xmm0 1038; CHECK-NEXT: ret{{[l|q]}} 1039entry: 1040 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 31) 1041 ret <2 x i64> %0 1042} 1043 1044define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1045; X86-LABEL: test_mm256_mask_shrdi_epi32: 1046; X86: # %bb.0: # %entry 1047; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1048; X86-NEXT: kmovd %eax, %k1 1049; X86-NEXT: vpshrdd $127, %ymm2, %ymm1, %ymm0 {%k1} 1050; X86-NEXT: retl 1051; 1052; X64-LABEL: test_mm256_mask_shrdi_epi32: 1053; X64: # %bb.0: # %entry 1054; X64-NEXT: kmovd %edi, %k1 1055; X64-NEXT: vpshrdd $127, %ymm2, %ymm1, %ymm0 {%k1} 1056; X64-NEXT: retq 1057entry: 1058 %0 = bitcast <4 x i64> %__A to <8 x i32> 1059 %1 = bitcast <4 x i64> %__B to <8 x i32> 1060 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 127) 1061 %3 = bitcast <4 x i64> %__S to <8 x i32> 1062 %4 = bitcast i8 %__U to <8 x i1> 1063 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 1064 %6 = bitcast <8 x i32> %5 to <4 x i64> 1065 ret <4 x i64> %6 1066} 1067 1068declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32) 1069 1070define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1071; X86-LABEL: test_mm256_maskz_shrdi_epi32: 1072; X86: # %bb.0: # %entry 1073; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1074; X86-NEXT: kmovd %eax, %k1 1075; X86-NEXT: vpshrdd $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 1076; X86-NEXT: retl 1077; 1078; X64-LABEL: test_mm256_maskz_shrdi_epi32: 1079; X64: # %bb.0: # %entry 1080; X64-NEXT: kmovd %edi, %k1 1081; X64-NEXT: vpshrdd $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 1082; X64-NEXT: retq 1083entry: 1084 %0 = bitcast <4 x i64> %__A to <8 x i32> 1085 %1 = bitcast <4 x i64> %__B to <8 x i32> 1086 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 63) 1087 %3 = bitcast i8 %__U to <8 x i1> 1088 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer 1089 %5 = bitcast <8 x i32> %4 to <4 x i64> 1090 ret <4 x i64> %5 1091} 1092 1093define <4 x i64> @test_mm256_shrdi_epi32(<4 x i64> %__A, <4 x i64> %__B) { 1094; CHECK-LABEL: test_mm256_shrdi_epi32: 1095; CHECK: # %bb.0: # %entry 1096; CHECK-NEXT: vpshrdd $31, %ymm1, %ymm0, %ymm0 1097; CHECK-NEXT: ret{{[l|q]}} 1098entry: 1099 %0 = bitcast <4 x i64> %__A to <8 x i32> 1100 %1 = bitcast <4 x i64> %__B to <8 x i32> 1101 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 31) 1102 %3 = bitcast <8 x i32> %2 to <4 x i64> 1103 ret <4 x i64> %3 1104} 1105 1106define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1107; X86-LABEL: test_mm_mask_shrdi_epi32: 1108; X86: # %bb.0: # %entry 1109; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1110; X86-NEXT: kmovd %eax, %k1 1111; X86-NEXT: vpshrdd $127, %xmm2, %xmm1, %xmm0 {%k1} 1112; X86-NEXT: retl 1113; 1114; X64-LABEL: test_mm_mask_shrdi_epi32: 1115; X64: # %bb.0: # %entry 1116; X64-NEXT: kmovd %edi, %k1 1117; X64-NEXT: vpshrdd $127, %xmm2, %xmm1, %xmm0 {%k1} 1118; X64-NEXT: retq 1119entry: 1120 %0 = bitcast <2 x i64> %__A to <4 x i32> 1121 %1 = bitcast <2 x i64> %__B to <4 x i32> 1122 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 127) 1123 %3 = bitcast <2 x i64> %__S to <4 x i32> 1124 %4 = bitcast i8 %__U to <8 x i1> 1125 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1126 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3 1127 %6 = bitcast <4 x i32> %5 to <2 x i64> 1128 ret <2 x i64> %6 1129} 1130 1131declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32) 1132 1133define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1134; X86-LABEL: test_mm_maskz_shrdi_epi32: 1135; X86: # %bb.0: # %entry 1136; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1137; X86-NEXT: kmovd %eax, %k1 1138; X86-NEXT: vpshrdd $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1139; X86-NEXT: retl 1140; 1141; X64-LABEL: test_mm_maskz_shrdi_epi32: 1142; X64: # %bb.0: # %entry 1143; X64-NEXT: kmovd %edi, %k1 1144; X64-NEXT: vpshrdd $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1145; X64-NEXT: retq 1146entry: 1147 %0 = bitcast <2 x i64> %__A to <4 x i32> 1148 %1 = bitcast <2 x i64> %__B to <4 x i32> 1149 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 63) 1150 %3 = bitcast i8 %__U to <8 x i1> 1151 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1152 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer 1153 %5 = bitcast <4 x i32> %4 to <2 x i64> 1154 ret <2 x i64> %5 1155} 1156 1157define <2 x i64> @test_mm_shrdi_epi32(<2 x i64> %__A, <2 x i64> %__B) { 1158; CHECK-LABEL: test_mm_shrdi_epi32: 1159; CHECK: # %bb.0: # %entry 1160; CHECK-NEXT: vpshrdd $31, %xmm1, %xmm0, %xmm0 1161; CHECK-NEXT: ret{{[l|q]}} 1162entry: 1163 %0 = bitcast <2 x i64> %__A to <4 x i32> 1164 %1 = bitcast <2 x i64> %__B to <4 x i32> 1165 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 31) 1166 %3 = bitcast <4 x i32> %2 to <2 x i64> 1167 ret <2 x i64> %3 1168} 1169 1170define <4 x i64> @test_mm256_mask_shrdi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1171; X86-LABEL: test_mm256_mask_shrdi_epi16: 1172; X86: # %bb.0: # %entry 1173; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1174; X86-NEXT: vpshrdw $127, %ymm2, %ymm1, %ymm0 {%k1} 1175; X86-NEXT: retl 1176; 1177; X64-LABEL: test_mm256_mask_shrdi_epi16: 1178; X64: # %bb.0: # %entry 1179; X64-NEXT: kmovd %edi, %k1 1180; X64-NEXT: vpshrdw $127, %ymm2, %ymm1, %ymm0 {%k1} 1181; X64-NEXT: retq 1182entry: 1183 %0 = bitcast <4 x i64> %__A to <16 x i16> 1184 %1 = bitcast <4 x i64> %__B to <16 x i16> 1185 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 127) 1186 %3 = bitcast <4 x i64> %__S to <16 x i16> 1187 %4 = bitcast i16 %__U to <16 x i1> 1188 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 1189 %6 = bitcast <16 x i16> %5 to <4 x i64> 1190 ret <4 x i64> %6 1191} 1192 1193declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32) 1194 1195define <4 x i64> @test_mm256_maskz_shrdi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1196; X86-LABEL: test_mm256_maskz_shrdi_epi16: 1197; X86: # %bb.0: # %entry 1198; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1199; X86-NEXT: vpshrdw $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 1200; X86-NEXT: retl 1201; 1202; X64-LABEL: test_mm256_maskz_shrdi_epi16: 1203; X64: # %bb.0: # %entry 1204; X64-NEXT: kmovd %edi, %k1 1205; X64-NEXT: vpshrdw $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 1206; X64-NEXT: retq 1207entry: 1208 %0 = bitcast <4 x i64> %__A to <16 x i16> 1209 %1 = bitcast <4 x i64> %__B to <16 x i16> 1210 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 63) 1211 %3 = bitcast i16 %__U to <16 x i1> 1212 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer 1213 %5 = bitcast <16 x i16> %4 to <4 x i64> 1214 ret <4 x i64> %5 1215} 1216 1217define <4 x i64> @test_mm256_shrdi_epi16(<4 x i64> %__A, <4 x i64> %__B) { 1218; CHECK-LABEL: test_mm256_shrdi_epi16: 1219; CHECK: # %bb.0: # %entry 1220; CHECK-NEXT: vpshrdw $31, %ymm1, %ymm0, %ymm0 1221; CHECK-NEXT: ret{{[l|q]}} 1222entry: 1223 %0 = bitcast <4 x i64> %__A to <16 x i16> 1224 %1 = bitcast <4 x i64> %__B to <16 x i16> 1225 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 31) 1226 %3 = bitcast <16 x i16> %2 to <4 x i64> 1227 ret <4 x i64> %3 1228} 1229 1230define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1231; X86-LABEL: test_mm_mask_shrdi_epi16: 1232; X86: # %bb.0: # %entry 1233; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1234; X86-NEXT: kmovd %eax, %k1 1235; X86-NEXT: vpshrdw $127, %xmm2, %xmm1, %xmm0 {%k1} 1236; X86-NEXT: retl 1237; 1238; X64-LABEL: test_mm_mask_shrdi_epi16: 1239; X64: # %bb.0: # %entry 1240; X64-NEXT: kmovd %edi, %k1 1241; X64-NEXT: vpshrdw $127, %xmm2, %xmm1, %xmm0 {%k1} 1242; X64-NEXT: retq 1243entry: 1244 %0 = bitcast <2 x i64> %__A to <8 x i16> 1245 %1 = bitcast <2 x i64> %__B to <8 x i16> 1246 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 127) 1247 %3 = bitcast <2 x i64> %__S to <8 x i16> 1248 %4 = bitcast i8 %__U to <8 x i1> 1249 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 1250 %6 = bitcast <8 x i16> %5 to <2 x i64> 1251 ret <2 x i64> %6 1252} 1253 1254declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32) 1255 1256define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1257; X86-LABEL: test_mm_maskz_shrdi_epi16: 1258; X86: # %bb.0: # %entry 1259; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1260; X86-NEXT: kmovd %eax, %k1 1261; X86-NEXT: vpshrdw $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1262; X86-NEXT: retl 1263; 1264; X64-LABEL: test_mm_maskz_shrdi_epi16: 1265; X64: # %bb.0: # %entry 1266; X64-NEXT: kmovd %edi, %k1 1267; X64-NEXT: vpshrdw $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1268; X64-NEXT: retq 1269entry: 1270 %0 = bitcast <2 x i64> %__A to <8 x i16> 1271 %1 = bitcast <2 x i64> %__B to <8 x i16> 1272 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 63) 1273 %3 = bitcast i8 %__U to <8 x i1> 1274 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer 1275 %5 = bitcast <8 x i16> %4 to <2 x i64> 1276 ret <2 x i64> %5 1277} 1278 1279define <2 x i64> @test_mm_shrdi_epi16(<2 x i64> %__A, <2 x i64> %__B) { 1280; CHECK-LABEL: test_mm_shrdi_epi16: 1281; CHECK: # %bb.0: # %entry 1282; CHECK-NEXT: vpshrdw $31, %xmm1, %xmm0, %xmm0 1283; CHECK-NEXT: ret{{[l|q]}} 1284entry: 1285 %0 = bitcast <2 x i64> %__A to <8 x i16> 1286 %1 = bitcast <2 x i64> %__B to <8 x i16> 1287 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 31) 1288 %3 = bitcast <8 x i16> %2 to <2 x i64> 1289 ret <2 x i64> %3 1290} 1291 1292define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1293; X86-LABEL: test_mm256_mask_shldv_epi64: 1294; X86: # %bb.0: # %entry 1295; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1296; X86-NEXT: kmovd %eax, %k1 1297; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} 1298; X86-NEXT: retl 1299; 1300; X64-LABEL: test_mm256_mask_shldv_epi64: 1301; X64: # %bb.0: # %entry 1302; X64-NEXT: kmovd %edi, %k1 1303; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} 1304; X64-NEXT: retq 1305entry: 1306 %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U) 1307 ret <4 x i64> %0 1308} 1309 1310define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1311; X86-LABEL: test_mm256_maskz_shldv_epi64: 1312; X86: # %bb.0: # %entry 1313; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1314; X86-NEXT: kmovd %eax, %k1 1315; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1316; X86-NEXT: retl 1317; 1318; X64-LABEL: test_mm256_maskz_shldv_epi64: 1319; X64: # %bb.0: # %entry 1320; X64-NEXT: kmovd %edi, %k1 1321; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1322; X64-NEXT: retq 1323entry: 1324 %0 = tail call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U) 1325 ret <4 x i64> %0 1326} 1327 1328define <4 x i64> @test_mm256_shldv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1329; CHECK-LABEL: test_mm256_shldv_epi64: 1330; CHECK: # %bb.0: # %entry 1331; CHECK-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 1332; CHECK-NEXT: ret{{[l|q]}} 1333entry: 1334 %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 -1) 1335 ret <4 x i64> %0 1336} 1337 1338define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1339; X86-LABEL: test_mm_mask_shldv_epi64: 1340; X86: # %bb.0: # %entry 1341; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1342; X86-NEXT: kmovd %eax, %k1 1343; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} 1344; X86-NEXT: retl 1345; 1346; X64-LABEL: test_mm_mask_shldv_epi64: 1347; X64: # %bb.0: # %entry 1348; X64-NEXT: kmovd %edi, %k1 1349; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} 1350; X64-NEXT: retq 1351entry: 1352 %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U) 1353 ret <2 x i64> %0 1354} 1355 1356define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1357; X86-LABEL: test_mm_maskz_shldv_epi64: 1358; X86: # %bb.0: # %entry 1359; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1360; X86-NEXT: kmovd %eax, %k1 1361; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1362; X86-NEXT: retl 1363; 1364; X64-LABEL: test_mm_maskz_shldv_epi64: 1365; X64: # %bb.0: # %entry 1366; X64-NEXT: kmovd %edi, %k1 1367; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1368; X64-NEXT: retq 1369entry: 1370 %0 = tail call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U) 1371 ret <2 x i64> %0 1372} 1373 1374define <2 x i64> @test_mm_shldv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1375; CHECK-LABEL: test_mm_shldv_epi64: 1376; CHECK: # %bb.0: # %entry 1377; CHECK-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 1378; CHECK-NEXT: ret{{[l|q]}} 1379entry: 1380 %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 -1) 1381 ret <2 x i64> %0 1382} 1383 1384define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1385; X86-LABEL: test_mm256_mask_shldv_epi32: 1386; X86: # %bb.0: # %entry 1387; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1388; X86-NEXT: kmovd %eax, %k1 1389; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} 1390; X86-NEXT: retl 1391; 1392; X64-LABEL: test_mm256_mask_shldv_epi32: 1393; X64: # %bb.0: # %entry 1394; X64-NEXT: kmovd %edi, %k1 1395; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} 1396; X64-NEXT: retq 1397entry: 1398 %0 = bitcast <4 x i64> %__S to <8 x i32> 1399 %1 = bitcast <4 x i64> %__A to <8 x i32> 1400 %2 = bitcast <4 x i64> %__B to <8 x i32> 1401 %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U) 1402 %4 = bitcast <8 x i32> %3 to <4 x i64> 1403 ret <4 x i64> %4 1404} 1405 1406define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1407; X86-LABEL: test_mm256_maskz_shldv_epi32: 1408; X86: # %bb.0: # %entry 1409; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1410; X86-NEXT: kmovd %eax, %k1 1411; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1412; X86-NEXT: retl 1413; 1414; X64-LABEL: test_mm256_maskz_shldv_epi32: 1415; X64: # %bb.0: # %entry 1416; X64-NEXT: kmovd %edi, %k1 1417; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1418; X64-NEXT: retq 1419entry: 1420 %0 = bitcast <4 x i64> %__S to <8 x i32> 1421 %1 = bitcast <4 x i64> %__A to <8 x i32> 1422 %2 = bitcast <4 x i64> %__B to <8 x i32> 1423 %3 = tail call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U) 1424 %4 = bitcast <8 x i32> %3 to <4 x i64> 1425 ret <4 x i64> %4 1426} 1427 1428define <4 x i64> @test_mm256_shldv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1429; CHECK-LABEL: test_mm256_shldv_epi32: 1430; CHECK: # %bb.0: # %entry 1431; CHECK-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 1432; CHECK-NEXT: ret{{[l|q]}} 1433entry: 1434 %0 = bitcast <4 x i64> %__S to <8 x i32> 1435 %1 = bitcast <4 x i64> %__A to <8 x i32> 1436 %2 = bitcast <4 x i64> %__B to <8 x i32> 1437 %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 -1) 1438 %4 = bitcast <8 x i32> %3 to <4 x i64> 1439 ret <4 x i64> %4 1440} 1441 1442define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1443; X86-LABEL: test_mm_mask_shldv_epi32: 1444; X86: # %bb.0: # %entry 1445; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1446; X86-NEXT: kmovd %eax, %k1 1447; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} 1448; X86-NEXT: retl 1449; 1450; X64-LABEL: test_mm_mask_shldv_epi32: 1451; X64: # %bb.0: # %entry 1452; X64-NEXT: kmovd %edi, %k1 1453; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} 1454; X64-NEXT: retq 1455entry: 1456 %0 = bitcast <2 x i64> %__S to <4 x i32> 1457 %1 = bitcast <2 x i64> %__A to <4 x i32> 1458 %2 = bitcast <2 x i64> %__B to <4 x i32> 1459 %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U) 1460 %4 = bitcast <4 x i32> %3 to <2 x i64> 1461 ret <2 x i64> %4 1462} 1463 1464define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1465; X86-LABEL: test_mm_maskz_shldv_epi32: 1466; X86: # %bb.0: # %entry 1467; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1468; X86-NEXT: kmovd %eax, %k1 1469; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1470; X86-NEXT: retl 1471; 1472; X64-LABEL: test_mm_maskz_shldv_epi32: 1473; X64: # %bb.0: # %entry 1474; X64-NEXT: kmovd %edi, %k1 1475; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1476; X64-NEXT: retq 1477entry: 1478 %0 = bitcast <2 x i64> %__S to <4 x i32> 1479 %1 = bitcast <2 x i64> %__A to <4 x i32> 1480 %2 = bitcast <2 x i64> %__B to <4 x i32> 1481 %3 = tail call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U) 1482 %4 = bitcast <4 x i32> %3 to <2 x i64> 1483 ret <2 x i64> %4 1484} 1485 1486define <2 x i64> @test_mm_shldv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1487; CHECK-LABEL: test_mm_shldv_epi32: 1488; CHECK: # %bb.0: # %entry 1489; CHECK-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 1490; CHECK-NEXT: ret{{[l|q]}} 1491entry: 1492 %0 = bitcast <2 x i64> %__S to <4 x i32> 1493 %1 = bitcast <2 x i64> %__A to <4 x i32> 1494 %2 = bitcast <2 x i64> %__B to <4 x i32> 1495 %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 -1) 1496 %4 = bitcast <4 x i32> %3 to <2 x i64> 1497 ret <2 x i64> %4 1498} 1499 1500define <4 x i64> @test_mm256_mask_shldv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1501; X86-LABEL: test_mm256_mask_shldv_epi16: 1502; X86: # %bb.0: # %entry 1503; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1504; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} 1505; X86-NEXT: retl 1506; 1507; X64-LABEL: test_mm256_mask_shldv_epi16: 1508; X64: # %bb.0: # %entry 1509; X64-NEXT: kmovd %edi, %k1 1510; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} 1511; X64-NEXT: retq 1512entry: 1513 %0 = bitcast <4 x i64> %__S to <16 x i16> 1514 %1 = bitcast <4 x i64> %__A to <16 x i16> 1515 %2 = bitcast <4 x i64> %__B to <16 x i16> 1516 %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U) 1517 %4 = bitcast <16 x i16> %3 to <4 x i64> 1518 ret <4 x i64> %4 1519} 1520 1521define <4 x i64> @test_mm256_maskz_shldv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1522; X86-LABEL: test_mm256_maskz_shldv_epi16: 1523; X86: # %bb.0: # %entry 1524; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1525; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1526; X86-NEXT: retl 1527; 1528; X64-LABEL: test_mm256_maskz_shldv_epi16: 1529; X64: # %bb.0: # %entry 1530; X64-NEXT: kmovd %edi, %k1 1531; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1532; X64-NEXT: retq 1533entry: 1534 %0 = bitcast <4 x i64> %__S to <16 x i16> 1535 %1 = bitcast <4 x i64> %__A to <16 x i16> 1536 %2 = bitcast <4 x i64> %__B to <16 x i16> 1537 %3 = tail call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U) 1538 %4 = bitcast <16 x i16> %3 to <4 x i64> 1539 ret <4 x i64> %4 1540} 1541 1542define <4 x i64> @test_mm256_shldv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1543; CHECK-LABEL: test_mm256_shldv_epi16: 1544; CHECK: # %bb.0: # %entry 1545; CHECK-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 1546; CHECK-NEXT: ret{{[l|q]}} 1547entry: 1548 %0 = bitcast <4 x i64> %__S to <16 x i16> 1549 %1 = bitcast <4 x i64> %__A to <16 x i16> 1550 %2 = bitcast <4 x i64> %__B to <16 x i16> 1551 %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 -1) 1552 %4 = bitcast <16 x i16> %3 to <4 x i64> 1553 ret <4 x i64> %4 1554} 1555 1556define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1557; X86-LABEL: test_mm_mask_shldv_epi16: 1558; X86: # %bb.0: # %entry 1559; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1560; X86-NEXT: kmovd %eax, %k1 1561; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} 1562; X86-NEXT: retl 1563; 1564; X64-LABEL: test_mm_mask_shldv_epi16: 1565; X64: # %bb.0: # %entry 1566; X64-NEXT: kmovd %edi, %k1 1567; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} 1568; X64-NEXT: retq 1569entry: 1570 %0 = bitcast <2 x i64> %__S to <8 x i16> 1571 %1 = bitcast <2 x i64> %__A to <8 x i16> 1572 %2 = bitcast <2 x i64> %__B to <8 x i16> 1573 %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U) 1574 %4 = bitcast <8 x i16> %3 to <2 x i64> 1575 ret <2 x i64> %4 1576} 1577 1578define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1579; X86-LABEL: test_mm_maskz_shldv_epi16: 1580; X86: # %bb.0: # %entry 1581; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1582; X86-NEXT: kmovd %eax, %k1 1583; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1584; X86-NEXT: retl 1585; 1586; X64-LABEL: test_mm_maskz_shldv_epi16: 1587; X64: # %bb.0: # %entry 1588; X64-NEXT: kmovd %edi, %k1 1589; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1590; X64-NEXT: retq 1591entry: 1592 %0 = bitcast <2 x i64> %__S to <8 x i16> 1593 %1 = bitcast <2 x i64> %__A to <8 x i16> 1594 %2 = bitcast <2 x i64> %__B to <8 x i16> 1595 %3 = tail call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U) 1596 %4 = bitcast <8 x i16> %3 to <2 x i64> 1597 ret <2 x i64> %4 1598} 1599 1600define <2 x i64> @test_mm_shldv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1601; CHECK-LABEL: test_mm_shldv_epi16: 1602; CHECK: # %bb.0: # %entry 1603; CHECK-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 1604; CHECK-NEXT: ret{{[l|q]}} 1605entry: 1606 %0 = bitcast <2 x i64> %__S to <8 x i16> 1607 %1 = bitcast <2 x i64> %__A to <8 x i16> 1608 %2 = bitcast <2 x i64> %__B to <8 x i16> 1609 %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 -1) 1610 %4 = bitcast <8 x i16> %3 to <2 x i64> 1611 ret <2 x i64> %4 1612} 1613 1614define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1615; X86-LABEL: test_mm256_mask_shrdv_epi64: 1616; X86: # %bb.0: # %entry 1617; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1618; X86-NEXT: kmovd %eax, %k1 1619; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} 1620; X86-NEXT: retl 1621; 1622; X64-LABEL: test_mm256_mask_shrdv_epi64: 1623; X64: # %bb.0: # %entry 1624; X64-NEXT: kmovd %edi, %k1 1625; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} 1626; X64-NEXT: retq 1627entry: 1628 %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U) 1629 ret <4 x i64> %0 1630} 1631 1632define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1633; X86-LABEL: test_mm256_maskz_shrdv_epi64: 1634; X86: # %bb.0: # %entry 1635; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1636; X86-NEXT: kmovd %eax, %k1 1637; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1638; X86-NEXT: retl 1639; 1640; X64-LABEL: test_mm256_maskz_shrdv_epi64: 1641; X64: # %bb.0: # %entry 1642; X64-NEXT: kmovd %edi, %k1 1643; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1644; X64-NEXT: retq 1645entry: 1646 %0 = tail call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U) 1647 ret <4 x i64> %0 1648} 1649 1650define <4 x i64> @test_mm256_shrdv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1651; CHECK-LABEL: test_mm256_shrdv_epi64: 1652; CHECK: # %bb.0: # %entry 1653; CHECK-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 1654; CHECK-NEXT: ret{{[l|q]}} 1655entry: 1656 %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 -1) 1657 ret <4 x i64> %0 1658} 1659 1660define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1661; X86-LABEL: test_mm_mask_shrdv_epi64: 1662; X86: # %bb.0: # %entry 1663; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1664; X86-NEXT: kmovd %eax, %k1 1665; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} 1666; X86-NEXT: retl 1667; 1668; X64-LABEL: test_mm_mask_shrdv_epi64: 1669; X64: # %bb.0: # %entry 1670; X64-NEXT: kmovd %edi, %k1 1671; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} 1672; X64-NEXT: retq 1673entry: 1674 %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U) 1675 ret <2 x i64> %0 1676} 1677 1678define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1679; X86-LABEL: test_mm_maskz_shrdv_epi64: 1680; X86: # %bb.0: # %entry 1681; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1682; X86-NEXT: kmovd %eax, %k1 1683; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1684; X86-NEXT: retl 1685; 1686; X64-LABEL: test_mm_maskz_shrdv_epi64: 1687; X64: # %bb.0: # %entry 1688; X64-NEXT: kmovd %edi, %k1 1689; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1690; X64-NEXT: retq 1691entry: 1692 %0 = tail call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U) 1693 ret <2 x i64> %0 1694} 1695 1696define <2 x i64> @test_mm_shrdv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1697; CHECK-LABEL: test_mm_shrdv_epi64: 1698; CHECK: # %bb.0: # %entry 1699; CHECK-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 1700; CHECK-NEXT: ret{{[l|q]}} 1701entry: 1702 %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 -1) 1703 ret <2 x i64> %0 1704} 1705 1706define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1707; X86-LABEL: test_mm256_mask_shrdv_epi32: 1708; X86: # %bb.0: # %entry 1709; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1710; X86-NEXT: kmovd %eax, %k1 1711; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} 1712; X86-NEXT: retl 1713; 1714; X64-LABEL: test_mm256_mask_shrdv_epi32: 1715; X64: # %bb.0: # %entry 1716; X64-NEXT: kmovd %edi, %k1 1717; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} 1718; X64-NEXT: retq 1719entry: 1720 %0 = bitcast <4 x i64> %__S to <8 x i32> 1721 %1 = bitcast <4 x i64> %__A to <8 x i32> 1722 %2 = bitcast <4 x i64> %__B to <8 x i32> 1723 %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U) 1724 %4 = bitcast <8 x i32> %3 to <4 x i64> 1725 ret <4 x i64> %4 1726} 1727 1728define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1729; X86-LABEL: test_mm256_maskz_shrdv_epi32: 1730; X86: # %bb.0: # %entry 1731; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1732; X86-NEXT: kmovd %eax, %k1 1733; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1734; X86-NEXT: retl 1735; 1736; X64-LABEL: test_mm256_maskz_shrdv_epi32: 1737; X64: # %bb.0: # %entry 1738; X64-NEXT: kmovd %edi, %k1 1739; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1740; X64-NEXT: retq 1741entry: 1742 %0 = bitcast <4 x i64> %__S to <8 x i32> 1743 %1 = bitcast <4 x i64> %__A to <8 x i32> 1744 %2 = bitcast <4 x i64> %__B to <8 x i32> 1745 %3 = tail call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U) 1746 %4 = bitcast <8 x i32> %3 to <4 x i64> 1747 ret <4 x i64> %4 1748} 1749 1750define <4 x i64> @test_mm256_shrdv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1751; CHECK-LABEL: test_mm256_shrdv_epi32: 1752; CHECK: # %bb.0: # %entry 1753; CHECK-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 1754; CHECK-NEXT: ret{{[l|q]}} 1755entry: 1756 %0 = bitcast <4 x i64> %__S to <8 x i32> 1757 %1 = bitcast <4 x i64> %__A to <8 x i32> 1758 %2 = bitcast <4 x i64> %__B to <8 x i32> 1759 %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 -1) 1760 %4 = bitcast <8 x i32> %3 to <4 x i64> 1761 ret <4 x i64> %4 1762} 1763 1764define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1765; X86-LABEL: test_mm_mask_shrdv_epi32: 1766; X86: # %bb.0: # %entry 1767; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1768; X86-NEXT: kmovd %eax, %k1 1769; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} 1770; X86-NEXT: retl 1771; 1772; X64-LABEL: test_mm_mask_shrdv_epi32: 1773; X64: # %bb.0: # %entry 1774; X64-NEXT: kmovd %edi, %k1 1775; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} 1776; X64-NEXT: retq 1777entry: 1778 %0 = bitcast <2 x i64> %__S to <4 x i32> 1779 %1 = bitcast <2 x i64> %__A to <4 x i32> 1780 %2 = bitcast <2 x i64> %__B to <4 x i32> 1781 %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U) 1782 %4 = bitcast <4 x i32> %3 to <2 x i64> 1783 ret <2 x i64> %4 1784} 1785 1786define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1787; X86-LABEL: test_mm_maskz_shrdv_epi32: 1788; X86: # %bb.0: # %entry 1789; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1790; X86-NEXT: kmovd %eax, %k1 1791; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1792; X86-NEXT: retl 1793; 1794; X64-LABEL: test_mm_maskz_shrdv_epi32: 1795; X64: # %bb.0: # %entry 1796; X64-NEXT: kmovd %edi, %k1 1797; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1798; X64-NEXT: retq 1799entry: 1800 %0 = bitcast <2 x i64> %__S to <4 x i32> 1801 %1 = bitcast <2 x i64> %__A to <4 x i32> 1802 %2 = bitcast <2 x i64> %__B to <4 x i32> 1803 %3 = tail call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U) 1804 %4 = bitcast <4 x i32> %3 to <2 x i64> 1805 ret <2 x i64> %4 1806} 1807 1808define <2 x i64> @test_mm_shrdv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1809; CHECK-LABEL: test_mm_shrdv_epi32: 1810; CHECK: # %bb.0: # %entry 1811; CHECK-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 1812; CHECK-NEXT: ret{{[l|q]}} 1813entry: 1814 %0 = bitcast <2 x i64> %__S to <4 x i32> 1815 %1 = bitcast <2 x i64> %__A to <4 x i32> 1816 %2 = bitcast <2 x i64> %__B to <4 x i32> 1817 %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 -1) 1818 %4 = bitcast <4 x i32> %3 to <2 x i64> 1819 ret <2 x i64> %4 1820} 1821 1822define <4 x i64> @test_mm256_mask_shrdv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1823; X86-LABEL: test_mm256_mask_shrdv_epi16: 1824; X86: # %bb.0: # %entry 1825; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1826; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} 1827; X86-NEXT: retl 1828; 1829; X64-LABEL: test_mm256_mask_shrdv_epi16: 1830; X64: # %bb.0: # %entry 1831; X64-NEXT: kmovd %edi, %k1 1832; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} 1833; X64-NEXT: retq 1834entry: 1835 %0 = bitcast <4 x i64> %__S to <16 x i16> 1836 %1 = bitcast <4 x i64> %__A to <16 x i16> 1837 %2 = bitcast <4 x i64> %__B to <16 x i16> 1838 %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U) 1839 %4 = bitcast <16 x i16> %3 to <4 x i64> 1840 ret <4 x i64> %4 1841} 1842 1843define <4 x i64> @test_mm256_maskz_shrdv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1844; X86-LABEL: test_mm256_maskz_shrdv_epi16: 1845; X86: # %bb.0: # %entry 1846; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1847; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1848; X86-NEXT: retl 1849; 1850; X64-LABEL: test_mm256_maskz_shrdv_epi16: 1851; X64: # %bb.0: # %entry 1852; X64-NEXT: kmovd %edi, %k1 1853; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1854; X64-NEXT: retq 1855entry: 1856 %0 = bitcast <4 x i64> %__S to <16 x i16> 1857 %1 = bitcast <4 x i64> %__A to <16 x i16> 1858 %2 = bitcast <4 x i64> %__B to <16 x i16> 1859 %3 = tail call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U) 1860 %4 = bitcast <16 x i16> %3 to <4 x i64> 1861 ret <4 x i64> %4 1862} 1863 1864define <4 x i64> @test_mm256_shrdv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1865; CHECK-LABEL: test_mm256_shrdv_epi16: 1866; CHECK: # %bb.0: # %entry 1867; CHECK-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 1868; CHECK-NEXT: ret{{[l|q]}} 1869entry: 1870 %0 = bitcast <4 x i64> %__S to <16 x i16> 1871 %1 = bitcast <4 x i64> %__A to <16 x i16> 1872 %2 = bitcast <4 x i64> %__B to <16 x i16> 1873 %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 -1) 1874 %4 = bitcast <16 x i16> %3 to <4 x i64> 1875 ret <4 x i64> %4 1876} 1877 1878define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1879; X86-LABEL: test_mm_mask_shrdv_epi16: 1880; X86: # %bb.0: # %entry 1881; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1882; X86-NEXT: kmovd %eax, %k1 1883; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} 1884; X86-NEXT: retl 1885; 1886; X64-LABEL: test_mm_mask_shrdv_epi16: 1887; X64: # %bb.0: # %entry 1888; X64-NEXT: kmovd %edi, %k1 1889; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} 1890; X64-NEXT: retq 1891entry: 1892 %0 = bitcast <2 x i64> %__S to <8 x i16> 1893 %1 = bitcast <2 x i64> %__A to <8 x i16> 1894 %2 = bitcast <2 x i64> %__B to <8 x i16> 1895 %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U) 1896 %4 = bitcast <8 x i16> %3 to <2 x i64> 1897 ret <2 x i64> %4 1898} 1899 1900define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1901; X86-LABEL: test_mm_maskz_shrdv_epi16: 1902; X86: # %bb.0: # %entry 1903; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1904; X86-NEXT: kmovd %eax, %k1 1905; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1906; X86-NEXT: retl 1907; 1908; X64-LABEL: test_mm_maskz_shrdv_epi16: 1909; X64: # %bb.0: # %entry 1910; X64-NEXT: kmovd %edi, %k1 1911; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1912; X64-NEXT: retq 1913entry: 1914 %0 = bitcast <2 x i64> %__S to <8 x i16> 1915 %1 = bitcast <2 x i64> %__A to <8 x i16> 1916 %2 = bitcast <2 x i64> %__B to <8 x i16> 1917 %3 = tail call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U) 1918 %4 = bitcast <8 x i16> %3 to <2 x i64> 1919 ret <2 x i64> %4 1920} 1921 1922define <2 x i64> @test_mm_shrdv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1923; CHECK-LABEL: test_mm_shrdv_epi16: 1924; CHECK: # %bb.0: # %entry 1925; CHECK-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 1926; CHECK-NEXT: ret{{[l|q]}} 1927entry: 1928 %0 = bitcast <2 x i64> %__S to <8 x i16> 1929 %1 = bitcast <2 x i64> %__A to <8 x i16> 1930 %2 = bitcast <2 x i64> %__B to <8 x i16> 1931 %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 -1) 1932 %4 = bitcast <8 x i16> %3 to <2 x i64> 1933 ret <2 x i64> %4 1934} 1935 1936declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8) 1937declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16) 1938declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>) 1939declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>) 1940declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8) 1941declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16) 1942declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>) 1943declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>) 1944declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16) 1945declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32) 1946declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>) 1947declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>) 1948declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16) 1949declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32) 1950declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>) 1951declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>) 1952declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1953declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1954declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1955declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1956declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1957declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1958declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1959declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1960declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1961declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1962declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1963declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1964declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1965declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1966declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1967declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1968declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1969declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1970declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1971declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1972declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1973declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1974declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1975declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1976