1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi2-builtins.c 6 7define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) { 8; X86-LABEL: test_mm_mask_compress_epi16: 9; X86: # %bb.0: # %entry 10; X86-NEXT: movb {{[0-9]+}}(%esp), %al 11; X86-NEXT: kmovd %eax, %k1 12; X86-NEXT: vpcompressw %xmm1, %xmm0 {%k1} 13; X86-NEXT: retl 14; 15; X64-LABEL: test_mm_mask_compress_epi16: 16; X64: # %bb.0: # %entry 17; X64-NEXT: kmovd %edi, %k1 18; X64-NEXT: vpcompressw %xmm1, %xmm0 {%k1} 19; X64-NEXT: retq 20entry: 21 %0 = bitcast <2 x i64> %__D to <8 x i16> 22 %1 = bitcast <2 x i64> %__S to <8 x i16> 23 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U) 24 %3 = bitcast <8 x i16> %2 to <2 x i64> 25 ret <2 x i64> %3 26} 27 28define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) { 29; X86-LABEL: test_mm_maskz_compress_epi16: 30; X86: # %bb.0: # %entry 31; X86-NEXT: movb {{[0-9]+}}(%esp), %al 32; X86-NEXT: kmovd %eax, %k1 33; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} 34; X86-NEXT: retl 35; 36; X64-LABEL: test_mm_maskz_compress_epi16: 37; X64: # %bb.0: # %entry 38; X64-NEXT: kmovd %edi, %k1 39; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} 40; X64-NEXT: retq 41entry: 42 %0 = bitcast <2 x i64> %__D to <8 x i16> 43 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U) 44 %2 = bitcast <8 x i16> %1 to <2 x i64> 45 ret <2 x i64> %2 46} 47 48define <2 x i64> @test_mm_mask_compress_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) { 49; X86-LABEL: test_mm_mask_compress_epi8: 50; X86: # %bb.0: # %entry 51; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 52; X86-NEXT: vpcompressb %xmm1, %xmm0 {%k1} 53; X86-NEXT: retl 54; 55; X64-LABEL: test_mm_mask_compress_epi8: 56; X64: # %bb.0: # %entry 57; X64-NEXT: kmovd %edi, %k1 58; X64-NEXT: vpcompressb %xmm1, %xmm0 {%k1} 59; X64-NEXT: retq 60entry: 61 %0 = bitcast <2 x i64> %__D to <16 x i8> 62 %1 = bitcast <2 x i64> %__S to <16 x i8> 63 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U) 64 %3 = bitcast <16 x i8> %2 to <2 x i64> 65 ret <2 x i64> %3 66} 67 68define <2 x i64> @test_mm_maskz_compress_epi8(i16 zeroext %__U, <2 x i64> %__D) { 69; X86-LABEL: test_mm_maskz_compress_epi8: 70; X86: # %bb.0: # %entry 71; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 72; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} 73; X86-NEXT: retl 74; 75; X64-LABEL: test_mm_maskz_compress_epi8: 76; X64: # %bb.0: # %entry 77; X64-NEXT: kmovd %edi, %k1 78; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} 79; X64-NEXT: retq 80entry: 81 %0 = bitcast <2 x i64> %__D to <16 x i8> 82 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U) 83 %2 = bitcast <16 x i8> %1 to <2 x i64> 84 ret <2 x i64> %2 85} 86 87define void @test_mm_mask_compressstoreu_epi16(i8* %__P, i8 zeroext %__U, <2 x i64> %__D) { 88; X86-LABEL: test_mm_mask_compressstoreu_epi16: 89; X86: # %bb.0: # %entry 90; X86-NEXT: movb {{[0-9]+}}(%esp), %al 91; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 92; X86-NEXT: kmovd %eax, %k1 93; X86-NEXT: vpcompressw %xmm0, (%ecx) {%k1} 94; X86-NEXT: retl 95; 96; X64-LABEL: test_mm_mask_compressstoreu_epi16: 97; X64: # %bb.0: # %entry 98; X64-NEXT: kmovd %esi, %k1 99; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} 100; X64-NEXT: retq 101entry: 102 %0 = bitcast <2 x i64> %__D to <8 x i16> 103 %1 = bitcast i8* %__P to i16* 104 %2 = bitcast i8 %__U to <8 x i1> 105 tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %0, i16* %1, <8 x i1> %2) 106 ret void 107} 108 109define void @test_mm_mask_compressstoreu_epi8(i8* %__P, i16 zeroext %__U, <2 x i64> %__D) { 110; X86-LABEL: test_mm_mask_compressstoreu_epi8: 111; X86: # %bb.0: # %entry 112; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 113; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 114; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} 115; X86-NEXT: retl 116; 117; X64-LABEL: test_mm_mask_compressstoreu_epi8: 118; X64: # %bb.0: # %entry 119; X64-NEXT: kmovd %esi, %k1 120; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} 121; X64-NEXT: retq 122entry: 123 %0 = bitcast <2 x i64> %__D to <16 x i8> 124 %1 = bitcast i16 %__U to <16 x i1> 125 tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %0, i8* %__P, <16 x i1> %1) 126 ret void 127} 128 129define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) { 130; X86-LABEL: test_mm_mask_expand_epi16: 131; X86: # %bb.0: # %entry 132; X86-NEXT: movb {{[0-9]+}}(%esp), %al 133; X86-NEXT: kmovd %eax, %k1 134; X86-NEXT: vpexpandw %xmm1, %xmm0 {%k1} 135; X86-NEXT: retl 136; 137; X64-LABEL: test_mm_mask_expand_epi16: 138; X64: # %bb.0: # %entry 139; X64-NEXT: kmovd %edi, %k1 140; X64-NEXT: vpexpandw %xmm1, %xmm0 {%k1} 141; X64-NEXT: retq 142entry: 143 %0 = bitcast <2 x i64> %__D to <8 x i16> 144 %1 = bitcast <2 x i64> %__S to <8 x i16> 145 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U) 146 %3 = bitcast <8 x i16> %2 to <2 x i64> 147 ret <2 x i64> %3 148} 149 150define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) { 151; X86-LABEL: test_mm_maskz_expand_epi16: 152; X86: # %bb.0: # %entry 153; X86-NEXT: movb {{[0-9]+}}(%esp), %al 154; X86-NEXT: kmovd %eax, %k1 155; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} 156; X86-NEXT: retl 157; 158; X64-LABEL: test_mm_maskz_expand_epi16: 159; X64: # %bb.0: # %entry 160; X64-NEXT: kmovd %edi, %k1 161; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} 162; X64-NEXT: retq 163entry: 164 %0 = bitcast <2 x i64> %__D to <8 x i16> 165 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U) 166 %2 = bitcast <8 x i16> %1 to <2 x i64> 167 ret <2 x i64> %2 168} 169 170define <2 x i64> @test_mm_mask_expand_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) { 171; X86-LABEL: test_mm_mask_expand_epi8: 172; X86: # %bb.0: # %entry 173; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 174; X86-NEXT: vpexpandb %xmm1, %xmm0 {%k1} 175; X86-NEXT: retl 176; 177; X64-LABEL: test_mm_mask_expand_epi8: 178; X64: # %bb.0: # %entry 179; X64-NEXT: kmovd %edi, %k1 180; X64-NEXT: vpexpandb %xmm1, %xmm0 {%k1} 181; X64-NEXT: retq 182entry: 183 %0 = bitcast <2 x i64> %__D to <16 x i8> 184 %1 = bitcast <2 x i64> %__S to <16 x i8> 185 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U) 186 %3 = bitcast <16 x i8> %2 to <2 x i64> 187 ret <2 x i64> %3 188} 189 190define <2 x i64> @test_mm_maskz_expand_epi8(i16 zeroext %__U, <2 x i64> %__D) { 191; X86-LABEL: test_mm_maskz_expand_epi8: 192; X86: # %bb.0: # %entry 193; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 194; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} 195; X86-NEXT: retl 196; 197; X64-LABEL: test_mm_maskz_expand_epi8: 198; X64: # %bb.0: # %entry 199; X64-NEXT: kmovd %edi, %k1 200; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} 201; X64-NEXT: retq 202entry: 203 %0 = bitcast <2 x i64> %__D to <16 x i8> 204 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U) 205 %2 = bitcast <16 x i8> %1 to <2 x i64> 206 ret <2 x i64> %2 207} 208 209define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U, i8* readonly %__P) { 210; X86-LABEL: test_mm_mask_expandloadu_epi16: 211; X86: # %bb.0: # %entry 212; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 213; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 214; X86-NEXT: kmovd %ecx, %k1 215; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} 216; X86-NEXT: retl 217; 218; X64-LABEL: test_mm_mask_expandloadu_epi16: 219; X64: # %bb.0: # %entry 220; X64-NEXT: kmovd %edi, %k1 221; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} 222; X64-NEXT: retq 223entry: 224 %0 = bitcast <2 x i64> %__S to <8 x i16> 225 %1 = bitcast i8* %__P to i16* 226 %2 = bitcast i8 %__U to <8 x i1> 227 %3 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> %0) 228 %4 = bitcast <8 x i16> %3 to <2 x i64> 229 ret <2 x i64> %4 230} 231 232define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, i8* readonly %__P) { 233; X86-LABEL: test_mm_maskz_expandloadu_epi16: 234; X86: # %bb.0: # %entry 235; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 236; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 237; X86-NEXT: kmovd %ecx, %k1 238; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} 239; X86-NEXT: retl 240; 241; X64-LABEL: test_mm_maskz_expandloadu_epi16: 242; X64: # %bb.0: # %entry 243; X64-NEXT: kmovd %edi, %k1 244; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} {z} 245; X64-NEXT: retq 246entry: 247 %0 = bitcast i8* %__P to i16* 248 %1 = bitcast i8 %__U to <8 x i1> 249 %2 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %0, <8 x i1> %1, <8 x i16> zeroinitializer) 250 %3 = bitcast <8 x i16> %2 to <2 x i64> 251 ret <2 x i64> %3 252} 253 254define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) { 255; X86-LABEL: test_mm_mask_expandloadu_epi8: 256; X86: # %bb.0: # %entry 257; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 258; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 259; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} 260; X86-NEXT: retl 261; 262; X64-LABEL: test_mm_mask_expandloadu_epi8: 263; X64: # %bb.0: # %entry 264; X64-NEXT: kmovd %edi, %k1 265; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} 266; X64-NEXT: retq 267entry: 268 %0 = bitcast <2 x i64> %__S to <16 x i8> 269 %1 = bitcast i16 %__U to <16 x i1> 270 %2 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %1, <16 x i8> %0) 271 %3 = bitcast <16 x i8> %2 to <2 x i64> 272 ret <2 x i64> %3 273} 274 275define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, i8* readonly %__P) { 276; X86-LABEL: test_mm_maskz_expandloadu_epi8: 277; X86: # %bb.0: # %entry 278; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 279; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 280; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} 281; X86-NEXT: retl 282; 283; X64-LABEL: test_mm_maskz_expandloadu_epi8: 284; X64: # %bb.0: # %entry 285; X64-NEXT: kmovd %edi, %k1 286; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} {z} 287; X64-NEXT: retq 288entry: 289 %0 = bitcast i16 %__U to <16 x i1> 290 %1 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %0, <16 x i8> zeroinitializer) 291 %2 = bitcast <16 x i8> %1 to <2 x i64> 292 ret <2 x i64> %2 293} 294 295define <4 x i64> @test_mm256_mask_compress_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) { 296; X86-LABEL: test_mm256_mask_compress_epi16: 297; X86: # %bb.0: # %entry 298; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 299; X86-NEXT: vpcompressw %ymm1, %ymm0 {%k1} 300; X86-NEXT: retl 301; 302; X64-LABEL: test_mm256_mask_compress_epi16: 303; X64: # %bb.0: # %entry 304; X64-NEXT: kmovd %edi, %k1 305; X64-NEXT: vpcompressw %ymm1, %ymm0 {%k1} 306; X64-NEXT: retq 307entry: 308 %0 = bitcast <4 x i64> %__D to <16 x i16> 309 %1 = bitcast <4 x i64> %__S to <16 x i16> 310 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U) 311 %3 = bitcast <16 x i16> %2 to <4 x i64> 312 ret <4 x i64> %3 313} 314 315define <4 x i64> @test_mm256_maskz_compress_epi16(i16 zeroext %__U, <4 x i64> %__D) { 316; X86-LABEL: test_mm256_maskz_compress_epi16: 317; X86: # %bb.0: # %entry 318; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 319; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} 320; X86-NEXT: retl 321; 322; X64-LABEL: test_mm256_maskz_compress_epi16: 323; X64: # %bb.0: # %entry 324; X64-NEXT: kmovd %edi, %k1 325; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} 326; X64-NEXT: retq 327entry: 328 %0 = bitcast <4 x i64> %__D to <16 x i16> 329 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U) 330 %2 = bitcast <16 x i16> %1 to <4 x i64> 331 ret <4 x i64> %2 332} 333 334define <4 x i64> @test_mm256_mask_compress_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) { 335; X86-LABEL: test_mm256_mask_compress_epi8: 336; X86: # %bb.0: # %entry 337; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 338; X86-NEXT: vpcompressb %ymm1, %ymm0 {%k1} 339; X86-NEXT: retl 340; 341; X64-LABEL: test_mm256_mask_compress_epi8: 342; X64: # %bb.0: # %entry 343; X64-NEXT: kmovd %edi, %k1 344; X64-NEXT: vpcompressb %ymm1, %ymm0 {%k1} 345; X64-NEXT: retq 346entry: 347 %0 = bitcast <4 x i64> %__D to <32 x i8> 348 %1 = bitcast <4 x i64> %__S to <32 x i8> 349 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U) 350 %3 = bitcast <32 x i8> %2 to <4 x i64> 351 ret <4 x i64> %3 352} 353 354define <4 x i64> @test_mm256_maskz_compress_epi8(i32 %__U, <4 x i64> %__D) { 355; X86-LABEL: test_mm256_maskz_compress_epi8: 356; X86: # %bb.0: # %entry 357; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 358; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} 359; X86-NEXT: retl 360; 361; X64-LABEL: test_mm256_maskz_compress_epi8: 362; X64: # %bb.0: # %entry 363; X64-NEXT: kmovd %edi, %k1 364; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} 365; X64-NEXT: retq 366entry: 367 %0 = bitcast <4 x i64> %__D to <32 x i8> 368 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U) 369 %2 = bitcast <32 x i8> %1 to <4 x i64> 370 ret <4 x i64> %2 371} 372 373define void @test_mm256_mask_compressstoreu_epi16(i8* %__P, i16 zeroext %__U, <4 x i64> %__D) { 374; X86-LABEL: test_mm256_mask_compressstoreu_epi16: 375; X86: # %bb.0: # %entry 376; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 378; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} 379; X86-NEXT: vzeroupper 380; X86-NEXT: retl 381; 382; X64-LABEL: test_mm256_mask_compressstoreu_epi16: 383; X64: # %bb.0: # %entry 384; X64-NEXT: kmovd %esi, %k1 385; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} 386; X64-NEXT: vzeroupper 387; X64-NEXT: retq 388entry: 389 %0 = bitcast <4 x i64> %__D to <16 x i16> 390 %1 = bitcast i8* %__P to i16* 391 %2 = bitcast i16 %__U to <16 x i1> 392 tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %0, i16* %1, <16 x i1> %2) 393 ret void 394} 395 396define void @test_mm256_mask_compressstoreu_epi8(i8* %__P, i32 %__U, <4 x i64> %__D) { 397; X86-LABEL: test_mm256_mask_compressstoreu_epi8: 398; X86: # %bb.0: # %entry 399; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 400; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 401; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} 402; X86-NEXT: vzeroupper 403; X86-NEXT: retl 404; 405; X64-LABEL: test_mm256_mask_compressstoreu_epi8: 406; X64: # %bb.0: # %entry 407; X64-NEXT: kmovd %esi, %k1 408; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} 409; X64-NEXT: vzeroupper 410; X64-NEXT: retq 411entry: 412 %0 = bitcast <4 x i64> %__D to <32 x i8> 413 %1 = bitcast i32 %__U to <32 x i1> 414 tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %0, i8* %__P, <32 x i1> %1) 415 ret void 416} 417 418define <4 x i64> @test_mm256_mask_expand_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) { 419; X86-LABEL: test_mm256_mask_expand_epi16: 420; X86: # %bb.0: # %entry 421; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 422; X86-NEXT: vpexpandw %ymm1, %ymm0 {%k1} 423; X86-NEXT: retl 424; 425; X64-LABEL: test_mm256_mask_expand_epi16: 426; X64: # %bb.0: # %entry 427; X64-NEXT: kmovd %edi, %k1 428; X64-NEXT: vpexpandw %ymm1, %ymm0 {%k1} 429; X64-NEXT: retq 430entry: 431 %0 = bitcast <4 x i64> %__D to <16 x i16> 432 %1 = bitcast <4 x i64> %__S to <16 x i16> 433 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U) 434 %3 = bitcast <16 x i16> %2 to <4 x i64> 435 ret <4 x i64> %3 436} 437 438define <4 x i64> @test_mm256_maskz_expand_epi16(i16 zeroext %__U, <4 x i64> %__D) { 439; X86-LABEL: test_mm256_maskz_expand_epi16: 440; X86: # %bb.0: # %entry 441; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 442; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} 443; X86-NEXT: retl 444; 445; X64-LABEL: test_mm256_maskz_expand_epi16: 446; X64: # %bb.0: # %entry 447; X64-NEXT: kmovd %edi, %k1 448; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} 449; X64-NEXT: retq 450entry: 451 %0 = bitcast <4 x i64> %__D to <16 x i16> 452 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U) 453 %2 = bitcast <16 x i16> %1 to <4 x i64> 454 ret <4 x i64> %2 455} 456 457define <4 x i64> @test_mm256_mask_expand_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) { 458; X86-LABEL: test_mm256_mask_expand_epi8: 459; X86: # %bb.0: # %entry 460; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 461; X86-NEXT: vpexpandb %ymm1, %ymm0 {%k1} 462; X86-NEXT: retl 463; 464; X64-LABEL: test_mm256_mask_expand_epi8: 465; X64: # %bb.0: # %entry 466; X64-NEXT: kmovd %edi, %k1 467; X64-NEXT: vpexpandb %ymm1, %ymm0 {%k1} 468; X64-NEXT: retq 469entry: 470 %0 = bitcast <4 x i64> %__D to <32 x i8> 471 %1 = bitcast <4 x i64> %__S to <32 x i8> 472 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U) 473 %3 = bitcast <32 x i8> %2 to <4 x i64> 474 ret <4 x i64> %3 475} 476 477define <4 x i64> @test_mm256_maskz_expand_epi8(i32 %__U, <4 x i64> %__D) { 478; X86-LABEL: test_mm256_maskz_expand_epi8: 479; X86: # %bb.0: # %entry 480; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 481; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} 482; X86-NEXT: retl 483; 484; X64-LABEL: test_mm256_maskz_expand_epi8: 485; X64: # %bb.0: # %entry 486; X64-NEXT: kmovd %edi, %k1 487; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} 488; X64-NEXT: retq 489entry: 490 %0 = bitcast <4 x i64> %__D to <32 x i8> 491 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U) 492 %2 = bitcast <32 x i8> %1 to <4 x i64> 493 ret <4 x i64> %2 494} 495 496define <4 x i64> @test_mm256_mask_expandloadu_epi16(<4 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) { 497; X86-LABEL: test_mm256_mask_expandloadu_epi16: 498; X86: # %bb.0: # %entry 499; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 500; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 501; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} 502; X86-NEXT: retl 503; 504; X64-LABEL: test_mm256_mask_expandloadu_epi16: 505; X64: # %bb.0: # %entry 506; X64-NEXT: kmovd %edi, %k1 507; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} 508; X64-NEXT: retq 509entry: 510 %0 = bitcast <4 x i64> %__S to <16 x i16> 511 %1 = bitcast i8* %__P to i16* 512 %2 = bitcast i16 %__U to <16 x i1> 513 %3 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> %0) 514 %4 = bitcast <16 x i16> %3 to <4 x i64> 515 ret <4 x i64> %4 516} 517 518define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, i8* readonly %__P) { 519; X86-LABEL: test_mm256_maskz_expandloadu_epi16: 520; X86: # %bb.0: # %entry 521; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 522; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 523; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} 524; X86-NEXT: retl 525; 526; X64-LABEL: test_mm256_maskz_expandloadu_epi16: 527; X64: # %bb.0: # %entry 528; X64-NEXT: kmovd %edi, %k1 529; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} {z} 530; X64-NEXT: retq 531entry: 532 %0 = bitcast i8* %__P to i16* 533 %1 = bitcast i16 %__U to <16 x i1> 534 %2 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %0, <16 x i1> %1, <16 x i16> zeroinitializer) 535 %3 = bitcast <16 x i16> %2 to <4 x i64> 536 ret <4 x i64> %3 537} 538 539define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, i8* readonly %__P) { 540; X86-LABEL: test_mm256_mask_expandloadu_epi8: 541; X86: # %bb.0: # %entry 542; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 543; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 544; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} 545; X86-NEXT: retl 546; 547; X64-LABEL: test_mm256_mask_expandloadu_epi8: 548; X64: # %bb.0: # %entry 549; X64-NEXT: kmovd %edi, %k1 550; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} 551; X64-NEXT: retq 552entry: 553 %0 = bitcast <4 x i64> %__S to <32 x i8> 554 %1 = bitcast i32 %__U to <32 x i1> 555 %2 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %1, <32 x i8> %0) 556 %3 = bitcast <32 x i8> %2 to <4 x i64> 557 ret <4 x i64> %3 558} 559 560define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, i8* readonly %__P) { 561; X86-LABEL: test_mm256_maskz_expandloadu_epi8: 562; X86: # %bb.0: # %entry 563; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 564; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 565; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} 566; X86-NEXT: retl 567; 568; X64-LABEL: test_mm256_maskz_expandloadu_epi8: 569; X64: # %bb.0: # %entry 570; X64-NEXT: kmovd %edi, %k1 571; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} {z} 572; X64-NEXT: retq 573entry: 574 %0 = bitcast i32 %__U to <32 x i1> 575 %1 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %0, <32 x i8> zeroinitializer) 576 %2 = bitcast <32 x i8> %1 to <4 x i64> 577 ret <4 x i64> %2 578} 579 580define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 581; X86-LABEL: test_mm256_mask_shldi_epi64: 582; X86: # %bb.0: # %entry 583; X86-NEXT: movb {{[0-9]+}}(%esp), %al 584; X86-NEXT: kmovd %eax, %k1 585; X86-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1} 586; X86-NEXT: retl 587; 588; X64-LABEL: test_mm256_mask_shldi_epi64: 589; X64: # %bb.0: # %entry 590; X64-NEXT: kmovd %edi, %k1 591; X64-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1} 592; X64-NEXT: retq 593entry: 594 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 47, i64 47, i64 47, i64 47>) 595 %1 = bitcast i8 %__U to <8 x i1> 596 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 597 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S 598 ret <4 x i64> %2 599} 600 601declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 602 603define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 604; X86-LABEL: test_mm256_maskz_shldi_epi64: 605; X86: # %bb.0: # %entry 606; X86-NEXT: movb {{[0-9]+}}(%esp), %al 607; X86-NEXT: kmovd %eax, %k1 608; X86-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 609; X86-NEXT: retl 610; 611; X64-LABEL: test_mm256_maskz_shldi_epi64: 612; X64: # %bb.0: # %entry 613; X64-NEXT: kmovd %edi, %k1 614; X64-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 615; X64-NEXT: retq 616entry: 617 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 63, i64 63, i64 63, i64 63>) 618 %1 = bitcast i8 %__U to <8 x i1> 619 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 620 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer 621 ret <4 x i64> %2 622} 623 624define <4 x i64> @test_mm256_shldi_epi64(<4 x i64> %__A, <4 x i64> %__B) { 625; CHECK-LABEL: test_mm256_shldi_epi64: 626; CHECK: # %bb.0: # %entry 627; CHECK-NEXT: vpshldq $31, %ymm1, %ymm0, %ymm0 628; CHECK-NEXT: ret{{[l|q]}} 629entry: 630 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 31, i64 31, i64 31, i64 31>) 631 ret <4 x i64> %0 632} 633 634define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 635; X86-LABEL: test_mm_mask_shldi_epi64: 636; X86: # %bb.0: # %entry 637; X86-NEXT: movb {{[0-9]+}}(%esp), %al 638; X86-NEXT: kmovd %eax, %k1 639; X86-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1} 640; X86-NEXT: retl 641; 642; X64-LABEL: test_mm_mask_shldi_epi64: 643; X64: # %bb.0: # %entry 644; X64-NEXT: kmovd %edi, %k1 645; X64-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1} 646; X64-NEXT: retq 647entry: 648 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 47, i64 47>) 649 %1 = bitcast i8 %__U to <8 x i1> 650 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 651 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S 652 ret <2 x i64> %2 653} 654 655declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 656 657define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 658; X86-LABEL: test_mm_maskz_shldi_epi64: 659; X86: # %bb.0: # %entry 660; X86-NEXT: movb {{[0-9]+}}(%esp), %al 661; X86-NEXT: kmovd %eax, %k1 662; X86-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 663; X86-NEXT: retl 664; 665; X64-LABEL: test_mm_maskz_shldi_epi64: 666; X64: # %bb.0: # %entry 667; X64-NEXT: kmovd %edi, %k1 668; X64-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 669; X64-NEXT: retq 670entry: 671 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 63, i64 63>) 672 %1 = bitcast i8 %__U to <8 x i1> 673 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 674 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer 675 ret <2 x i64> %2 676} 677 678define <2 x i64> @test_mm_shldi_epi64(<2 x i64> %__A, <2 x i64> %__B) { 679; CHECK-LABEL: test_mm_shldi_epi64: 680; CHECK: # %bb.0: # %entry 681; CHECK-NEXT: vpshldq $31, %xmm1, %xmm0, %xmm0 682; CHECK-NEXT: ret{{[l|q]}} 683entry: 684 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 31, i64 31>) 685 ret <2 x i64> %0 686} 687 688define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 689; X86-LABEL: test_mm256_mask_shldi_epi32: 690; X86: # %bb.0: # %entry 691; X86-NEXT: movb {{[0-9]+}}(%esp), %al 692; X86-NEXT: kmovd %eax, %k1 693; X86-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1} 694; X86-NEXT: retl 695; 696; X64-LABEL: test_mm256_mask_shldi_epi32: 697; X64: # %bb.0: # %entry 698; X64-NEXT: kmovd %edi, %k1 699; X64-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1} 700; X64-NEXT: retq 701entry: 702 %0 = bitcast <4 x i64> %__A to <8 x i32> 703 %1 = bitcast <4 x i64> %__B to <8 x i32> 704 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>) 705 %3 = bitcast <4 x i64> %__S to <8 x i32> 706 %4 = bitcast i8 %__U to <8 x i1> 707 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 708 %6 = bitcast <8 x i32> %5 to <4 x i64> 709 ret <4 x i64> %6 710} 711 712declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 713 714define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 715; X86-LABEL: test_mm256_maskz_shldi_epi32: 716; X86: # %bb.0: # %entry 717; X86-NEXT: movb {{[0-9]+}}(%esp), %al 718; X86-NEXT: kmovd %eax, %k1 719; X86-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} 720; X86-NEXT: retl 721; 722; X64-LABEL: test_mm256_maskz_shldi_epi32: 723; X64: # %bb.0: # %entry 724; X64-NEXT: kmovd %edi, %k1 725; X64-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} 726; X64-NEXT: retq 727entry: 728 %0 = bitcast <4 x i64> %__A to <8 x i32> 729 %1 = bitcast <4 x i64> %__B to <8 x i32> 730 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>) 731 %3 = bitcast i8 %__U to <8 x i1> 732 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer 733 %5 = bitcast <8 x i32> %4 to <4 x i64> 734 ret <4 x i64> %5 735} 736 737define <4 x i64> @test_mm256_shldi_epi32(<4 x i64> %__A, <4 x i64> %__B) { 738; CHECK-LABEL: test_mm256_shldi_epi32: 739; CHECK: # %bb.0: # %entry 740; CHECK-NEXT: vpshldd $31, %ymm1, %ymm0, %ymm0 741; CHECK-NEXT: ret{{[l|q]}} 742entry: 743 %0 = bitcast <4 x i64> %__A to <8 x i32> 744 %1 = bitcast <4 x i64> %__B to <8 x i32> 745 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>) 746 %3 = bitcast <8 x i32> %2 to <4 x i64> 747 ret <4 x i64> %3 748} 749 750define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 751; X86-LABEL: test_mm_mask_shldi_epi32: 752; X86: # %bb.0: # %entry 753; X86-NEXT: movb {{[0-9]+}}(%esp), %al 754; X86-NEXT: kmovd %eax, %k1 755; X86-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1} 756; X86-NEXT: retl 757; 758; X64-LABEL: test_mm_mask_shldi_epi32: 759; X64: # %bb.0: # %entry 760; X64-NEXT: kmovd %edi, %k1 761; X64-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1} 762; X64-NEXT: retq 763entry: 764 %0 = bitcast <2 x i64> %__A to <4 x i32> 765 %1 = bitcast <2 x i64> %__B to <4 x i32> 766 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>) 767 %3 = bitcast <2 x i64> %__S to <4 x i32> 768 %4 = bitcast i8 %__U to <8 x i1> 769 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 770 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3 771 %6 = bitcast <4 x i32> %5 to <2 x i64> 772 ret <2 x i64> %6 773} 774 775declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 776 777define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 778; X86-LABEL: test_mm_maskz_shldi_epi32: 779; X86: # %bb.0: # %entry 780; X86-NEXT: movb {{[0-9]+}}(%esp), %al 781; X86-NEXT: kmovd %eax, %k1 782; X86-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} 783; X86-NEXT: retl 784; 785; X64-LABEL: test_mm_maskz_shldi_epi32: 786; X64: # %bb.0: # %entry 787; X64-NEXT: kmovd %edi, %k1 788; X64-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} 789; X64-NEXT: retq 790entry: 791 %0 = bitcast <2 x i64> %__A to <4 x i32> 792 %1 = bitcast <2 x i64> %__B to <4 x i32> 793 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 794 %3 = bitcast i8 %__U to <8 x i1> 795 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 796 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer 797 %5 = bitcast <4 x i32> %4 to <2 x i64> 798 ret <2 x i64> %5 799} 800 801define <2 x i64> @test_mm_shldi_epi32(<2 x i64> %__A, <2 x i64> %__B) { 802; CHECK-LABEL: test_mm_shldi_epi32: 803; CHECK: # %bb.0: # %entry 804; CHECK-NEXT: vpshldd $31, %xmm1, %xmm0, %xmm0 805; CHECK-NEXT: ret{{[l|q]}} 806entry: 807 %0 = bitcast <2 x i64> %__A to <4 x i32> 808 %1 = bitcast <2 x i64> %__B to <4 x i32> 809 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 31, i32 31, i32 31, i32 31>) 810 %3 = bitcast <4 x i32> %2 to <2 x i64> 811 ret <2 x i64> %3 812} 813 814define <4 x i64> @test_mm256_mask_shldi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 815; X86-LABEL: test_mm256_mask_shldi_epi16: 816; X86: # %bb.0: # %entry 817; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 818; X86-NEXT: vpshldw $3, %ymm2, %ymm1, %ymm0 {%k1} 819; X86-NEXT: retl 820; 821; X64-LABEL: test_mm256_mask_shldi_epi16: 822; X64: # %bb.0: # %entry 823; X64-NEXT: kmovd %edi, %k1 824; X64-NEXT: vpshldw $3, %ymm2, %ymm1, %ymm0 {%k1} 825; X64-NEXT: retq 826entry: 827 %0 = bitcast <4 x i64> %__A to <16 x i16> 828 %1 = bitcast <4 x i64> %__B to <16 x i16> 829 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 830 %3 = bitcast <4 x i64> %__S to <16 x i16> 831 %4 = bitcast i16 %__U to <16 x i1> 832 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 833 %6 = bitcast <16 x i16> %5 to <4 x i64> 834 ret <4 x i64> %6 835} 836 837declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 838 839define <4 x i64> @test_mm256_maskz_shldi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 840; X86-LABEL: test_mm256_maskz_shldi_epi16: 841; X86: # %bb.0: # %entry 842; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 843; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 {%k1} {z} 844; X86-NEXT: retl 845; 846; X64-LABEL: test_mm256_maskz_shldi_epi16: 847; X64: # %bb.0: # %entry 848; X64-NEXT: kmovd %edi, %k1 849; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 {%k1} {z} 850; X64-NEXT: retq 851entry: 852 %0 = bitcast <4 x i64> %__A to <16 x i16> 853 %1 = bitcast <4 x i64> %__B to <16 x i16> 854 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 855 %3 = bitcast i16 %__U to <16 x i1> 856 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer 857 %5 = bitcast <16 x i16> %4 to <4 x i64> 858 ret <4 x i64> %5 859} 860 861define <4 x i64> @test_mm256_shldi_epi16(<4 x i64> %__A, <4 x i64> %__B) { 862; CHECK-LABEL: test_mm256_shldi_epi16: 863; CHECK: # %bb.0: # %entry 864; CHECK-NEXT: vpshldw $15, %ymm1, %ymm0, %ymm0 865; CHECK-NEXT: ret{{[l|q]}} 866entry: 867 %0 = bitcast <4 x i64> %__A to <16 x i16> 868 %1 = bitcast <4 x i64> %__B to <16 x i16> 869 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 870 %3 = bitcast <16 x i16> %2 to <4 x i64> 871 ret <4 x i64> %3 872} 873 874define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 875; X86-LABEL: test_mm_mask_shldi_epi16: 876; X86: # %bb.0: # %entry 877; X86-NEXT: movb {{[0-9]+}}(%esp), %al 878; X86-NEXT: kmovd %eax, %k1 879; X86-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1} 880; X86-NEXT: retl 881; 882; X64-LABEL: test_mm_mask_shldi_epi16: 883; X64: # %bb.0: # %entry 884; X64-NEXT: kmovd %edi, %k1 885; X64-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1} 886; X64-NEXT: retq 887entry: 888 %0 = bitcast <2 x i64> %__A to <8 x i16> 889 %1 = bitcast <2 x i64> %__B to <8 x i16> 890 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 891 %3 = bitcast <2 x i64> %__S to <8 x i16> 892 %4 = bitcast i8 %__U to <8 x i1> 893 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 894 %6 = bitcast <8 x i16> %5 to <2 x i64> 895 ret <2 x i64> %6 896} 897 898declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 899 900define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 901; X86-LABEL: test_mm_maskz_shldi_epi16: 902; X86: # %bb.0: # %entry 903; X86-NEXT: movb {{[0-9]+}}(%esp), %al 904; X86-NEXT: kmovd %eax, %k1 905; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} 906; X86-NEXT: retl 907; 908; X64-LABEL: test_mm_maskz_shldi_epi16: 909; X64: # %bb.0: # %entry 910; X64-NEXT: kmovd %edi, %k1 911; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} 912; X64-NEXT: retq 913entry: 914 %0 = bitcast <2 x i64> %__A to <8 x i16> 915 %1 = bitcast <2 x i64> %__B to <8 x i16> 916 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 917 %3 = bitcast i8 %__U to <8 x i1> 918 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer 919 %5 = bitcast <8 x i16> %4 to <2 x i64> 920 ret <2 x i64> %5 921} 922 923define <2 x i64> @test_mm_shldi_epi16(<2 x i64> %__A, <2 x i64> %__B) { 924; CHECK-LABEL: test_mm_shldi_epi16: 925; CHECK: # %bb.0: # %entry 926; CHECK-NEXT: vpshldw $15, %xmm1, %xmm0, %xmm0 927; CHECK-NEXT: ret{{[l|q]}} 928entry: 929 %0 = bitcast <2 x i64> %__A to <8 x i16> 930 %1 = bitcast <2 x i64> %__B to <8 x i16> 931 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 932 %3 = bitcast <8 x i16> %2 to <2 x i64> 933 ret <2 x i64> %3 934} 935 936define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 937; X86-LABEL: test_mm256_mask_shrdi_epi64: 938; X86: # %bb.0: # %entry 939; X86-NEXT: movb {{[0-9]+}}(%esp), %al 940; X86-NEXT: kmovd %eax, %k1 941; X86-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1} 942; X86-NEXT: retl 943; 944; X64-LABEL: test_mm256_mask_shrdi_epi64: 945; X64: # %bb.0: # %entry 946; X64-NEXT: kmovd %edi, %k1 947; X64-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1} 948; X64-NEXT: retq 949entry: 950 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 47, i64 47, i64 47, i64 47>) 951 %1 = bitcast i8 %__U to <8 x i1> 952 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 953 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S 954 ret <4 x i64> %2 955} 956 957declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 958 959define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 960; X86-LABEL: test_mm256_maskz_shrdi_epi64: 961; X86: # %bb.0: # %entry 962; X86-NEXT: movb {{[0-9]+}}(%esp), %al 963; X86-NEXT: kmovd %eax, %k1 964; X86-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 965; X86-NEXT: retl 966; 967; X64-LABEL: test_mm256_maskz_shrdi_epi64: 968; X64: # %bb.0: # %entry 969; X64-NEXT: kmovd %edi, %k1 970; X64-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 971; X64-NEXT: retq 972entry: 973 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 63, i64 63, i64 63, i64 63>) 974 %1 = bitcast i8 %__U to <8 x i1> 975 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 976 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer 977 ret <4 x i64> %2 978} 979 980define <4 x i64> @test_mm256_shrdi_epi64(<4 x i64> %__A, <4 x i64> %__B) { 981; CHECK-LABEL: test_mm256_shrdi_epi64: 982; CHECK: # %bb.0: # %entry 983; CHECK-NEXT: vpshrdq $31, %ymm1, %ymm0, %ymm0 984; CHECK-NEXT: ret{{[l|q]}} 985entry: 986 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 31, i64 31, i64 31, i64 31>) 987 ret <4 x i64> %0 988} 989 990define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 991; X86-LABEL: test_mm_mask_shrdi_epi64: 992; X86: # %bb.0: # %entry 993; X86-NEXT: movb {{[0-9]+}}(%esp), %al 994; X86-NEXT: kmovd %eax, %k1 995; X86-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1} 996; X86-NEXT: retl 997; 998; X64-LABEL: test_mm_mask_shrdi_epi64: 999; X64: # %bb.0: # %entry 1000; X64-NEXT: kmovd %edi, %k1 1001; X64-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1} 1002; X64-NEXT: retq 1003entry: 1004 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 47, i64 47>) 1005 %1 = bitcast i8 %__U to <8 x i1> 1006 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1007 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S 1008 ret <2 x i64> %2 1009} 1010 1011declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 1012 1013define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1014; X86-LABEL: test_mm_maskz_shrdi_epi64: 1015; X86: # %bb.0: # %entry 1016; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1017; X86-NEXT: kmovd %eax, %k1 1018; X86-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1019; X86-NEXT: retl 1020; 1021; X64-LABEL: test_mm_maskz_shrdi_epi64: 1022; X64: # %bb.0: # %entry 1023; X64-NEXT: kmovd %edi, %k1 1024; X64-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1025; X64-NEXT: retq 1026entry: 1027 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 63, i64 63>) 1028 %1 = bitcast i8 %__U to <8 x i1> 1029 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1030 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer 1031 ret <2 x i64> %2 1032} 1033 1034define <2 x i64> @test_mm_shrdi_epi64(<2 x i64> %__A, <2 x i64> %__B) { 1035; CHECK-LABEL: test_mm_shrdi_epi64: 1036; CHECK: # %bb.0: # %entry 1037; CHECK-NEXT: vpshrdq $31, %xmm1, %xmm0, %xmm0 1038; CHECK-NEXT: ret{{[l|q]}} 1039entry: 1040 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 31, i64 31>) 1041 ret <2 x i64> %0 1042} 1043 1044define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1045; X86-LABEL: test_mm256_mask_shrdi_epi32: 1046; X86: # %bb.0: # %entry 1047; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1048; X86-NEXT: kmovd %eax, %k1 1049; X86-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1} 1050; X86-NEXT: retl 1051; 1052; X64-LABEL: test_mm256_mask_shrdi_epi32: 1053; X64: # %bb.0: # %entry 1054; X64-NEXT: kmovd %edi, %k1 1055; X64-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1} 1056; X64-NEXT: retq 1057entry: 1058 %0 = bitcast <4 x i64> %__A to <8 x i32> 1059 %1 = bitcast <4 x i64> %__B to <8 x i32> 1060 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>) 1061 %3 = bitcast <4 x i64> %__S to <8 x i32> 1062 %4 = bitcast i8 %__U to <8 x i1> 1063 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 1064 %6 = bitcast <8 x i32> %5 to <4 x i64> 1065 ret <4 x i64> %6 1066} 1067 1068declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 1069 1070define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1071; X86-LABEL: test_mm256_maskz_shrdi_epi32: 1072; X86: # %bb.0: # %entry 1073; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1074; X86-NEXT: kmovd %eax, %k1 1075; X86-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} 1076; X86-NEXT: retl 1077; 1078; X64-LABEL: test_mm256_maskz_shrdi_epi32: 1079; X64: # %bb.0: # %entry 1080; X64-NEXT: kmovd %edi, %k1 1081; X64-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} 1082; X64-NEXT: retq 1083entry: 1084 %0 = bitcast <4 x i64> %__A to <8 x i32> 1085 %1 = bitcast <4 x i64> %__B to <8 x i32> 1086 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>) 1087 %3 = bitcast i8 %__U to <8 x i1> 1088 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer 1089 %5 = bitcast <8 x i32> %4 to <4 x i64> 1090 ret <4 x i64> %5 1091} 1092 1093define <4 x i64> @test_mm256_shrdi_epi32(<4 x i64> %__A, <4 x i64> %__B) { 1094; CHECK-LABEL: test_mm256_shrdi_epi32: 1095; CHECK: # %bb.0: # %entry 1096; CHECK-NEXT: vpshrdd $31, %ymm1, %ymm0, %ymm0 1097; CHECK-NEXT: ret{{[l|q]}} 1098entry: 1099 %0 = bitcast <4 x i64> %__A to <8 x i32> 1100 %1 = bitcast <4 x i64> %__B to <8 x i32> 1101 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>) 1102 %3 = bitcast <8 x i32> %2 to <4 x i64> 1103 ret <4 x i64> %3 1104} 1105 1106define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1107; X86-LABEL: test_mm_mask_shrdi_epi32: 1108; X86: # %bb.0: # %entry 1109; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1110; X86-NEXT: kmovd %eax, %k1 1111; X86-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1} 1112; X86-NEXT: retl 1113; 1114; X64-LABEL: test_mm_mask_shrdi_epi32: 1115; X64: # %bb.0: # %entry 1116; X64-NEXT: kmovd %edi, %k1 1117; X64-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1} 1118; X64-NEXT: retq 1119entry: 1120 %0 = bitcast <2 x i64> %__A to <4 x i32> 1121 %1 = bitcast <2 x i64> %__B to <4 x i32> 1122 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 7, i32 7, i32 7, i32 7>) 1123 %3 = bitcast <2 x i64> %__S to <4 x i32> 1124 %4 = bitcast i8 %__U to <8 x i1> 1125 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1126 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3 1127 %6 = bitcast <4 x i32> %5 to <2 x i64> 1128 ret <2 x i64> %6 1129} 1130 1131declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 1132 1133define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1134; X86-LABEL: test_mm_maskz_shrdi_epi32: 1135; X86: # %bb.0: # %entry 1136; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1137; X86-NEXT: kmovd %eax, %k1 1138; X86-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} 1139; X86-NEXT: retl 1140; 1141; X64-LABEL: test_mm_maskz_shrdi_epi32: 1142; X64: # %bb.0: # %entry 1143; X64-NEXT: kmovd %edi, %k1 1144; X64-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} 1145; X64-NEXT: retq 1146entry: 1147 %0 = bitcast <2 x i64> %__A to <4 x i32> 1148 %1 = bitcast <2 x i64> %__B to <4 x i32> 1149 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 1150 %3 = bitcast i8 %__U to <8 x i1> 1151 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1152 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer 1153 %5 = bitcast <4 x i32> %4 to <2 x i64> 1154 ret <2 x i64> %5 1155} 1156 1157define <2 x i64> @test_mm_shrdi_epi32(<2 x i64> %__A, <2 x i64> %__B) { 1158; CHECK-LABEL: test_mm_shrdi_epi32: 1159; CHECK: # %bb.0: # %entry 1160; CHECK-NEXT: vpshrdd $31, %xmm1, %xmm0, %xmm0 1161; CHECK-NEXT: ret{{[l|q]}} 1162entry: 1163 %0 = bitcast <2 x i64> %__A to <4 x i32> 1164 %1 = bitcast <2 x i64> %__B to <4 x i32> 1165 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 31, i32 31, i32 31, i32 31>) 1166 %3 = bitcast <4 x i32> %2 to <2 x i64> 1167 ret <2 x i64> %3 1168} 1169 1170define <4 x i64> @test_mm256_mask_shrdi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1171; X86-LABEL: test_mm256_mask_shrdi_epi16: 1172; X86: # %bb.0: # %entry 1173; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1174; X86-NEXT: vpshrdw $3, %ymm2, %ymm1, %ymm0 {%k1} 1175; X86-NEXT: retl 1176; 1177; X64-LABEL: test_mm256_mask_shrdi_epi16: 1178; X64: # %bb.0: # %entry 1179; X64-NEXT: kmovd %edi, %k1 1180; X64-NEXT: vpshrdw $3, %ymm2, %ymm1, %ymm0 {%k1} 1181; X64-NEXT: retq 1182entry: 1183 %0 = bitcast <4 x i64> %__A to <16 x i16> 1184 %1 = bitcast <4 x i64> %__B to <16 x i16> 1185 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 1186 %3 = bitcast <4 x i64> %__S to <16 x i16> 1187 %4 = bitcast i16 %__U to <16 x i1> 1188 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 1189 %6 = bitcast <16 x i16> %5 to <4 x i64> 1190 ret <4 x i64> %6 1191} 1192 1193declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 1194 1195define <4 x i64> @test_mm256_maskz_shrdi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1196; X86-LABEL: test_mm256_maskz_shrdi_epi16: 1197; X86: # %bb.0: # %entry 1198; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1199; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 {%k1} {z} 1200; X86-NEXT: retl 1201; 1202; X64-LABEL: test_mm256_maskz_shrdi_epi16: 1203; X64: # %bb.0: # %entry 1204; X64-NEXT: kmovd %edi, %k1 1205; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 {%k1} {z} 1206; X64-NEXT: retq 1207entry: 1208 %0 = bitcast <4 x i64> %__A to <16 x i16> 1209 %1 = bitcast <4 x i64> %__B to <16 x i16> 1210 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1211 %3 = bitcast i16 %__U to <16 x i1> 1212 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer 1213 %5 = bitcast <16 x i16> %4 to <4 x i64> 1214 ret <4 x i64> %5 1215} 1216 1217define <4 x i64> @test_mm256_shrdi_epi16(<4 x i64> %__A, <4 x i64> %__B) { 1218; CHECK-LABEL: test_mm256_shrdi_epi16: 1219; CHECK: # %bb.0: # %entry 1220; CHECK-NEXT: vpshrdw $15, %ymm1, %ymm0, %ymm0 1221; CHECK-NEXT: ret{{[l|q]}} 1222entry: 1223 %0 = bitcast <4 x i64> %__A to <16 x i16> 1224 %1 = bitcast <4 x i64> %__B to <16 x i16> 1225 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 1226 %3 = bitcast <16 x i16> %2 to <4 x i64> 1227 ret <4 x i64> %3 1228} 1229 1230define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1231; X86-LABEL: test_mm_mask_shrdi_epi16: 1232; X86: # %bb.0: # %entry 1233; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1234; X86-NEXT: kmovd %eax, %k1 1235; X86-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1} 1236; X86-NEXT: retl 1237; 1238; X64-LABEL: test_mm_mask_shrdi_epi16: 1239; X64: # %bb.0: # %entry 1240; X64-NEXT: kmovd %edi, %k1 1241; X64-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1} 1242; X64-NEXT: retq 1243entry: 1244 %0 = bitcast <2 x i64> %__A to <8 x i16> 1245 %1 = bitcast <2 x i64> %__B to <8 x i16> 1246 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 1247 %3 = bitcast <2 x i64> %__S to <8 x i16> 1248 %4 = bitcast i8 %__U to <8 x i1> 1249 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 1250 %6 = bitcast <8 x i16> %5 to <2 x i64> 1251 ret <2 x i64> %6 1252} 1253 1254declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 1255 1256define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1257; X86-LABEL: test_mm_maskz_shrdi_epi16: 1258; X86: # %bb.0: # %entry 1259; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1260; X86-NEXT: kmovd %eax, %k1 1261; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} 1262; X86-NEXT: retl 1263; 1264; X64-LABEL: test_mm_maskz_shrdi_epi16: 1265; X64: # %bb.0: # %entry 1266; X64-NEXT: kmovd %edi, %k1 1267; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} 1268; X64-NEXT: retq 1269entry: 1270 %0 = bitcast <2 x i64> %__A to <8 x i16> 1271 %1 = bitcast <2 x i64> %__B to <8 x i16> 1272 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1273 %3 = bitcast i8 %__U to <8 x i1> 1274 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer 1275 %5 = bitcast <8 x i16> %4 to <2 x i64> 1276 ret <2 x i64> %5 1277} 1278 1279define <2 x i64> @test_mm_shrdi_epi16(<2 x i64> %__A, <2 x i64> %__B) { 1280; CHECK-LABEL: test_mm_shrdi_epi16: 1281; CHECK: # %bb.0: # %entry 1282; CHECK-NEXT: vpshrdw $15, %xmm1, %xmm0, %xmm0 1283; CHECK-NEXT: ret{{[l|q]}} 1284entry: 1285 %0 = bitcast <2 x i64> %__A to <8 x i16> 1286 %1 = bitcast <2 x i64> %__B to <8 x i16> 1287 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 1288 %3 = bitcast <8 x i16> %2 to <2 x i64> 1289 ret <2 x i64> %3 1290} 1291 1292define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1293; X86-LABEL: test_mm256_mask_shldv_epi64: 1294; X86: # %bb.0: # %entry 1295; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1296; X86-NEXT: kmovd %eax, %k1 1297; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} 1298; X86-NEXT: retl 1299; 1300; X64-LABEL: test_mm256_mask_shldv_epi64: 1301; X64: # %bb.0: # %entry 1302; X64-NEXT: kmovd %edi, %k1 1303; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} 1304; X64-NEXT: retq 1305entry: 1306 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) 1307 %1 = bitcast i8 %__U to <8 x i1> 1308 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1309 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__S 1310 ret <4 x i64> %2 1311} 1312 1313define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1314; X86-LABEL: test_mm256_maskz_shldv_epi64: 1315; X86: # %bb.0: # %entry 1316; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1317; X86-NEXT: kmovd %eax, %k1 1318; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1319; X86-NEXT: retl 1320; 1321; X64-LABEL: test_mm256_maskz_shldv_epi64: 1322; X64: # %bb.0: # %entry 1323; X64-NEXT: kmovd %edi, %k1 1324; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1325; X64-NEXT: retq 1326entry: 1327 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) 1328 %1 = bitcast i8 %__U to <8 x i1> 1329 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1330 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer 1331 ret <4 x i64> %2 1332} 1333 1334define <4 x i64> @test_mm256_shldv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1335; CHECK-LABEL: test_mm256_shldv_epi64: 1336; CHECK: # %bb.0: # %entry 1337; CHECK-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 1338; CHECK-NEXT: ret{{[l|q]}} 1339entry: 1340 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) 1341 ret <4 x i64> %0 1342} 1343 1344define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1345; X86-LABEL: test_mm_mask_shldv_epi64: 1346; X86: # %bb.0: # %entry 1347; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1348; X86-NEXT: kmovd %eax, %k1 1349; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} 1350; X86-NEXT: retl 1351; 1352; X64-LABEL: test_mm_mask_shldv_epi64: 1353; X64: # %bb.0: # %entry 1354; X64-NEXT: kmovd %edi, %k1 1355; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} 1356; X64-NEXT: retq 1357entry: 1358 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) 1359 %1 = bitcast i8 %__U to <8 x i1> 1360 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1361 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__S 1362 ret <2 x i64> %2 1363} 1364 1365define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1366; X86-LABEL: test_mm_maskz_shldv_epi64: 1367; X86: # %bb.0: # %entry 1368; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1369; X86-NEXT: kmovd %eax, %k1 1370; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1371; X86-NEXT: retl 1372; 1373; X64-LABEL: test_mm_maskz_shldv_epi64: 1374; X64: # %bb.0: # %entry 1375; X64-NEXT: kmovd %edi, %k1 1376; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1377; X64-NEXT: retq 1378entry: 1379 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) 1380 %1 = bitcast i8 %__U to <8 x i1> 1381 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1382 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer 1383 ret <2 x i64> %2 1384} 1385 1386define <2 x i64> @test_mm_shldv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1387; CHECK-LABEL: test_mm_shldv_epi64: 1388; CHECK: # %bb.0: # %entry 1389; CHECK-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 1390; CHECK-NEXT: ret{{[l|q]}} 1391entry: 1392 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) 1393 ret <2 x i64> %0 1394} 1395 1396define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1397; X86-LABEL: test_mm256_mask_shldv_epi32: 1398; X86: # %bb.0: # %entry 1399; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1400; X86-NEXT: kmovd %eax, %k1 1401; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} 1402; X86-NEXT: retl 1403; 1404; X64-LABEL: test_mm256_mask_shldv_epi32: 1405; X64: # %bb.0: # %entry 1406; X64-NEXT: kmovd %edi, %k1 1407; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} 1408; X64-NEXT: retq 1409entry: 1410 %0 = bitcast <4 x i64> %__S to <8 x i32> 1411 %1 = bitcast <4 x i64> %__A to <8 x i32> 1412 %2 = bitcast <4 x i64> %__B to <8 x i32> 1413 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) 1414 %4 = bitcast i8 %__U to <8 x i1> 1415 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0 1416 %6 = bitcast <8 x i32> %5 to <4 x i64> 1417 ret <4 x i64> %6 1418} 1419 1420define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1421; X86-LABEL: test_mm256_maskz_shldv_epi32: 1422; X86: # %bb.0: # %entry 1423; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1424; X86-NEXT: kmovd %eax, %k1 1425; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1426; X86-NEXT: retl 1427; 1428; X64-LABEL: test_mm256_maskz_shldv_epi32: 1429; X64: # %bb.0: # %entry 1430; X64-NEXT: kmovd %edi, %k1 1431; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1432; X64-NEXT: retq 1433entry: 1434 %0 = bitcast <4 x i64> %__S to <8 x i32> 1435 %1 = bitcast <4 x i64> %__A to <8 x i32> 1436 %2 = bitcast <4 x i64> %__B to <8 x i32> 1437 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) 1438 %4 = bitcast i8 %__U to <8 x i1> 1439 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer 1440 %6 = bitcast <8 x i32> %5 to <4 x i64> 1441 ret <4 x i64> %6 1442} 1443 1444define <4 x i64> @test_mm256_shldv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1445; CHECK-LABEL: test_mm256_shldv_epi32: 1446; CHECK: # %bb.0: # %entry 1447; CHECK-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 1448; CHECK-NEXT: ret{{[l|q]}} 1449entry: 1450 %0 = bitcast <4 x i64> %__S to <8 x i32> 1451 %1 = bitcast <4 x i64> %__A to <8 x i32> 1452 %2 = bitcast <4 x i64> %__B to <8 x i32> 1453 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) 1454 %4 = bitcast <8 x i32> %3 to <4 x i64> 1455 ret <4 x i64> %4 1456} 1457 1458define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1459; X86-LABEL: test_mm_mask_shldv_epi32: 1460; X86: # %bb.0: # %entry 1461; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1462; X86-NEXT: kmovd %eax, %k1 1463; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} 1464; X86-NEXT: retl 1465; 1466; X64-LABEL: test_mm_mask_shldv_epi32: 1467; X64: # %bb.0: # %entry 1468; X64-NEXT: kmovd %edi, %k1 1469; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} 1470; X64-NEXT: retq 1471entry: 1472 %0 = bitcast <2 x i64> %__S to <4 x i32> 1473 %1 = bitcast <2 x i64> %__A to <4 x i32> 1474 %2 = bitcast <2 x i64> %__B to <4 x i32> 1475 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) 1476 %4 = bitcast i8 %__U to <8 x i1> 1477 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1478 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %0 1479 %6 = bitcast <4 x i32> %5 to <2 x i64> 1480 ret <2 x i64> %6 1481} 1482 1483define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1484; X86-LABEL: test_mm_maskz_shldv_epi32: 1485; X86: # %bb.0: # %entry 1486; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1487; X86-NEXT: kmovd %eax, %k1 1488; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1489; X86-NEXT: retl 1490; 1491; X64-LABEL: test_mm_maskz_shldv_epi32: 1492; X64: # %bb.0: # %entry 1493; X64-NEXT: kmovd %edi, %k1 1494; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1495; X64-NEXT: retq 1496entry: 1497 %0 = bitcast <2 x i64> %__S to <4 x i32> 1498 %1 = bitcast <2 x i64> %__A to <4 x i32> 1499 %2 = bitcast <2 x i64> %__B to <4 x i32> 1500 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) 1501 %4 = bitcast i8 %__U to <8 x i1> 1502 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1503 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer 1504 %6 = bitcast <4 x i32> %5 to <2 x i64> 1505 ret <2 x i64> %6 1506} 1507 1508define <2 x i64> @test_mm_shldv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1509; CHECK-LABEL: test_mm_shldv_epi32: 1510; CHECK: # %bb.0: # %entry 1511; CHECK-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 1512; CHECK-NEXT: ret{{[l|q]}} 1513entry: 1514 %0 = bitcast <2 x i64> %__S to <4 x i32> 1515 %1 = bitcast <2 x i64> %__A to <4 x i32> 1516 %2 = bitcast <2 x i64> %__B to <4 x i32> 1517 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) 1518 %4 = bitcast <4 x i32> %3 to <2 x i64> 1519 ret <2 x i64> %4 1520} 1521 1522define <4 x i64> @test_mm256_mask_shldv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1523; X86-LABEL: test_mm256_mask_shldv_epi16: 1524; X86: # %bb.0: # %entry 1525; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1526; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} 1527; X86-NEXT: retl 1528; 1529; X64-LABEL: test_mm256_mask_shldv_epi16: 1530; X64: # %bb.0: # %entry 1531; X64-NEXT: kmovd %edi, %k1 1532; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} 1533; X64-NEXT: retq 1534entry: 1535 %0 = bitcast <4 x i64> %__S to <16 x i16> 1536 %1 = bitcast <4 x i64> %__A to <16 x i16> 1537 %2 = bitcast <4 x i64> %__B to <16 x i16> 1538 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 1539 %4 = bitcast i16 %__U to <16 x i1> 1540 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0 1541 %6 = bitcast <16 x i16> %5 to <4 x i64> 1542 ret <4 x i64> %6 1543} 1544 1545define <4 x i64> @test_mm256_maskz_shldv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1546; X86-LABEL: test_mm256_maskz_shldv_epi16: 1547; X86: # %bb.0: # %entry 1548; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1549; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1550; X86-NEXT: retl 1551; 1552; X64-LABEL: test_mm256_maskz_shldv_epi16: 1553; X64: # %bb.0: # %entry 1554; X64-NEXT: kmovd %edi, %k1 1555; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1556; X64-NEXT: retq 1557entry: 1558 %0 = bitcast <4 x i64> %__S to <16 x i16> 1559 %1 = bitcast <4 x i64> %__A to <16 x i16> 1560 %2 = bitcast <4 x i64> %__B to <16 x i16> 1561 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 1562 %4 = bitcast i16 %__U to <16 x i1> 1563 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer 1564 %6 = bitcast <16 x i16> %5 to <4 x i64> 1565 ret <4 x i64> %6 1566} 1567 1568define <4 x i64> @test_mm256_shldv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1569; CHECK-LABEL: test_mm256_shldv_epi16: 1570; CHECK: # %bb.0: # %entry 1571; CHECK-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 1572; CHECK-NEXT: ret{{[l|q]}} 1573entry: 1574 %0 = bitcast <4 x i64> %__S to <16 x i16> 1575 %1 = bitcast <4 x i64> %__A to <16 x i16> 1576 %2 = bitcast <4 x i64> %__B to <16 x i16> 1577 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 1578 %4 = bitcast <16 x i16> %3 to <4 x i64> 1579 ret <4 x i64> %4 1580} 1581 1582define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1583; X86-LABEL: test_mm_mask_shldv_epi16: 1584; X86: # %bb.0: # %entry 1585; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1586; X86-NEXT: kmovd %eax, %k1 1587; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} 1588; X86-NEXT: retl 1589; 1590; X64-LABEL: test_mm_mask_shldv_epi16: 1591; X64: # %bb.0: # %entry 1592; X64-NEXT: kmovd %edi, %k1 1593; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} 1594; X64-NEXT: retq 1595entry: 1596 %0 = bitcast <2 x i64> %__S to <8 x i16> 1597 %1 = bitcast <2 x i64> %__A to <8 x i16> 1598 %2 = bitcast <2 x i64> %__B to <8 x i16> 1599 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 1600 %4 = bitcast i8 %__U to <8 x i1> 1601 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0 1602 %6 = bitcast <8 x i16> %5 to <2 x i64> 1603 ret <2 x i64> %6 1604} 1605 1606define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1607; X86-LABEL: test_mm_maskz_shldv_epi16: 1608; X86: # %bb.0: # %entry 1609; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1610; X86-NEXT: kmovd %eax, %k1 1611; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1612; X86-NEXT: retl 1613; 1614; X64-LABEL: test_mm_maskz_shldv_epi16: 1615; X64: # %bb.0: # %entry 1616; X64-NEXT: kmovd %edi, %k1 1617; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1618; X64-NEXT: retq 1619entry: 1620 %0 = bitcast <2 x i64> %__S to <8 x i16> 1621 %1 = bitcast <2 x i64> %__A to <8 x i16> 1622 %2 = bitcast <2 x i64> %__B to <8 x i16> 1623 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 1624 %4 = bitcast i8 %__U to <8 x i1> 1625 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer 1626 %6 = bitcast <8 x i16> %5 to <2 x i64> 1627 ret <2 x i64> %6 1628} 1629 1630define <2 x i64> @test_mm_shldv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1631; CHECK-LABEL: test_mm_shldv_epi16: 1632; CHECK: # %bb.0: # %entry 1633; CHECK-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 1634; CHECK-NEXT: ret{{[l|q]}} 1635entry: 1636 %0 = bitcast <2 x i64> %__S to <8 x i16> 1637 %1 = bitcast <2 x i64> %__A to <8 x i16> 1638 %2 = bitcast <2 x i64> %__B to <8 x i16> 1639 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 1640 %4 = bitcast <8 x i16> %3 to <2 x i64> 1641 ret <2 x i64> %4 1642} 1643 1644define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1645; X86-LABEL: test_mm256_mask_shrdv_epi64: 1646; X86: # %bb.0: # %entry 1647; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1648; X86-NEXT: kmovd %eax, %k1 1649; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} 1650; X86-NEXT: retl 1651; 1652; X64-LABEL: test_mm256_mask_shrdv_epi64: 1653; X64: # %bb.0: # %entry 1654; X64-NEXT: kmovd %edi, %k1 1655; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} 1656; X64-NEXT: retq 1657entry: 1658 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B) 1659 %1 = bitcast i8 %__U to <8 x i1> 1660 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1661 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__S 1662 ret <4 x i64> %2 1663} 1664 1665define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1666; X86-LABEL: test_mm256_maskz_shrdv_epi64: 1667; X86: # %bb.0: # %entry 1668; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1669; X86-NEXT: kmovd %eax, %k1 1670; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1671; X86-NEXT: retl 1672; 1673; X64-LABEL: test_mm256_maskz_shrdv_epi64: 1674; X64: # %bb.0: # %entry 1675; X64-NEXT: kmovd %edi, %k1 1676; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1677; X64-NEXT: retq 1678entry: 1679 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B) 1680 %1 = bitcast i8 %__U to <8 x i1> 1681 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1682 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer 1683 ret <4 x i64> %2 1684} 1685 1686define <4 x i64> @test_mm256_shrdv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1687; CHECK-LABEL: test_mm256_shrdv_epi64: 1688; CHECK: # %bb.0: # %entry 1689; CHECK-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 1690; CHECK-NEXT: ret{{[l|q]}} 1691entry: 1692 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B) 1693 ret <4 x i64> %0 1694} 1695 1696define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1697; X86-LABEL: test_mm_mask_shrdv_epi64: 1698; X86: # %bb.0: # %entry 1699; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1700; X86-NEXT: kmovd %eax, %k1 1701; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} 1702; X86-NEXT: retl 1703; 1704; X64-LABEL: test_mm_mask_shrdv_epi64: 1705; X64: # %bb.0: # %entry 1706; X64-NEXT: kmovd %edi, %k1 1707; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} 1708; X64-NEXT: retq 1709entry: 1710 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B) 1711 %1 = bitcast i8 %__U to <8 x i1> 1712 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1713 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__S 1714 ret <2 x i64> %2 1715} 1716 1717define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1718; X86-LABEL: test_mm_maskz_shrdv_epi64: 1719; X86: # %bb.0: # %entry 1720; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1721; X86-NEXT: kmovd %eax, %k1 1722; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1723; X86-NEXT: retl 1724; 1725; X64-LABEL: test_mm_maskz_shrdv_epi64: 1726; X64: # %bb.0: # %entry 1727; X64-NEXT: kmovd %edi, %k1 1728; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1729; X64-NEXT: retq 1730entry: 1731 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B) 1732 %1 = bitcast i8 %__U to <8 x i1> 1733 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1734 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer 1735 ret <2 x i64> %2 1736} 1737 1738define <2 x i64> @test_mm_shrdv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1739; CHECK-LABEL: test_mm_shrdv_epi64: 1740; CHECK: # %bb.0: # %entry 1741; CHECK-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 1742; CHECK-NEXT: ret{{[l|q]}} 1743entry: 1744 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B) 1745 ret <2 x i64> %0 1746} 1747 1748define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1749; X86-LABEL: test_mm256_mask_shrdv_epi32: 1750; X86: # %bb.0: # %entry 1751; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1752; X86-NEXT: kmovd %eax, %k1 1753; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} 1754; X86-NEXT: retl 1755; 1756; X64-LABEL: test_mm256_mask_shrdv_epi32: 1757; X64: # %bb.0: # %entry 1758; X64-NEXT: kmovd %edi, %k1 1759; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} 1760; X64-NEXT: retq 1761entry: 1762 %0 = bitcast <4 x i64> %__S to <8 x i32> 1763 %1 = bitcast <4 x i64> %__A to <8 x i32> 1764 %2 = bitcast <4 x i64> %__B to <8 x i32> 1765 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2) 1766 %4 = bitcast i8 %__U to <8 x i1> 1767 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0 1768 %6 = bitcast <8 x i32> %5 to <4 x i64> 1769 ret <4 x i64> %6 1770} 1771 1772define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1773; X86-LABEL: test_mm256_maskz_shrdv_epi32: 1774; X86: # %bb.0: # %entry 1775; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1776; X86-NEXT: kmovd %eax, %k1 1777; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1778; X86-NEXT: retl 1779; 1780; X64-LABEL: test_mm256_maskz_shrdv_epi32: 1781; X64: # %bb.0: # %entry 1782; X64-NEXT: kmovd %edi, %k1 1783; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1784; X64-NEXT: retq 1785entry: 1786 %0 = bitcast <4 x i64> %__S to <8 x i32> 1787 %1 = bitcast <4 x i64> %__A to <8 x i32> 1788 %2 = bitcast <4 x i64> %__B to <8 x i32> 1789 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2) 1790 %4 = bitcast i8 %__U to <8 x i1> 1791 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer 1792 %6 = bitcast <8 x i32> %5 to <4 x i64> 1793 ret <4 x i64> %6 1794} 1795 1796define <4 x i64> @test_mm256_shrdv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1797; CHECK-LABEL: test_mm256_shrdv_epi32: 1798; CHECK: # %bb.0: # %entry 1799; CHECK-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 1800; CHECK-NEXT: ret{{[l|q]}} 1801entry: 1802 %0 = bitcast <4 x i64> %__S to <8 x i32> 1803 %1 = bitcast <4 x i64> %__A to <8 x i32> 1804 %2 = bitcast <4 x i64> %__B to <8 x i32> 1805 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2) 1806 %4 = bitcast <8 x i32> %3 to <4 x i64> 1807 ret <4 x i64> %4 1808} 1809 1810define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1811; X86-LABEL: test_mm_mask_shrdv_epi32: 1812; X86: # %bb.0: # %entry 1813; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1814; X86-NEXT: kmovd %eax, %k1 1815; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} 1816; X86-NEXT: retl 1817; 1818; X64-LABEL: test_mm_mask_shrdv_epi32: 1819; X64: # %bb.0: # %entry 1820; X64-NEXT: kmovd %edi, %k1 1821; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} 1822; X64-NEXT: retq 1823entry: 1824 %0 = bitcast <2 x i64> %__S to <4 x i32> 1825 %1 = bitcast <2 x i64> %__A to <4 x i32> 1826 %2 = bitcast <2 x i64> %__B to <4 x i32> 1827 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2) 1828 %4 = bitcast i8 %__U to <8 x i1> 1829 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1830 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %0 1831 %6 = bitcast <4 x i32> %5 to <2 x i64> 1832 ret <2 x i64> %6 1833} 1834 1835define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1836; X86-LABEL: test_mm_maskz_shrdv_epi32: 1837; X86: # %bb.0: # %entry 1838; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1839; X86-NEXT: kmovd %eax, %k1 1840; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1841; X86-NEXT: retl 1842; 1843; X64-LABEL: test_mm_maskz_shrdv_epi32: 1844; X64: # %bb.0: # %entry 1845; X64-NEXT: kmovd %edi, %k1 1846; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1847; X64-NEXT: retq 1848entry: 1849 %0 = bitcast <2 x i64> %__S to <4 x i32> 1850 %1 = bitcast <2 x i64> %__A to <4 x i32> 1851 %2 = bitcast <2 x i64> %__B to <4 x i32> 1852 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2) 1853 %4 = bitcast i8 %__U to <8 x i1> 1854 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1855 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer 1856 %6 = bitcast <4 x i32> %5 to <2 x i64> 1857 ret <2 x i64> %6 1858} 1859 1860define <2 x i64> @test_mm_shrdv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1861; CHECK-LABEL: test_mm_shrdv_epi32: 1862; CHECK: # %bb.0: # %entry 1863; CHECK-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 1864; CHECK-NEXT: ret{{[l|q]}} 1865entry: 1866 %0 = bitcast <2 x i64> %__S to <4 x i32> 1867 %1 = bitcast <2 x i64> %__A to <4 x i32> 1868 %2 = bitcast <2 x i64> %__B to <4 x i32> 1869 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2) 1870 %4 = bitcast <4 x i32> %3 to <2 x i64> 1871 ret <2 x i64> %4 1872} 1873 1874define <4 x i64> @test_mm256_mask_shrdv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1875; X86-LABEL: test_mm256_mask_shrdv_epi16: 1876; X86: # %bb.0: # %entry 1877; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1878; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} 1879; X86-NEXT: retl 1880; 1881; X64-LABEL: test_mm256_mask_shrdv_epi16: 1882; X64: # %bb.0: # %entry 1883; X64-NEXT: kmovd %edi, %k1 1884; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} 1885; X64-NEXT: retq 1886entry: 1887 %0 = bitcast <4 x i64> %__S to <16 x i16> 1888 %1 = bitcast <4 x i64> %__A to <16 x i16> 1889 %2 = bitcast <4 x i64> %__B to <16 x i16> 1890 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2) 1891 %4 = bitcast i16 %__U to <16 x i1> 1892 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0 1893 %6 = bitcast <16 x i16> %5 to <4 x i64> 1894 ret <4 x i64> %6 1895} 1896 1897define <4 x i64> @test_mm256_maskz_shrdv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1898; X86-LABEL: test_mm256_maskz_shrdv_epi16: 1899; X86: # %bb.0: # %entry 1900; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1901; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1902; X86-NEXT: retl 1903; 1904; X64-LABEL: test_mm256_maskz_shrdv_epi16: 1905; X64: # %bb.0: # %entry 1906; X64-NEXT: kmovd %edi, %k1 1907; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1908; X64-NEXT: retq 1909entry: 1910 %0 = bitcast <4 x i64> %__S to <16 x i16> 1911 %1 = bitcast <4 x i64> %__A to <16 x i16> 1912 %2 = bitcast <4 x i64> %__B to <16 x i16> 1913 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2) 1914 %4 = bitcast i16 %__U to <16 x i1> 1915 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer 1916 %6 = bitcast <16 x i16> %5 to <4 x i64> 1917 ret <4 x i64> %6 1918} 1919 1920define <4 x i64> @test_mm256_shrdv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1921; CHECK-LABEL: test_mm256_shrdv_epi16: 1922; CHECK: # %bb.0: # %entry 1923; CHECK-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 1924; CHECK-NEXT: ret{{[l|q]}} 1925entry: 1926 %0 = bitcast <4 x i64> %__S to <16 x i16> 1927 %1 = bitcast <4 x i64> %__A to <16 x i16> 1928 %2 = bitcast <4 x i64> %__B to <16 x i16> 1929 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2) 1930 %4 = bitcast <16 x i16> %3 to <4 x i64> 1931 ret <4 x i64> %4 1932} 1933 1934define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1935; X86-LABEL: test_mm_mask_shrdv_epi16: 1936; X86: # %bb.0: # %entry 1937; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1938; X86-NEXT: kmovd %eax, %k1 1939; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} 1940; X86-NEXT: retl 1941; 1942; X64-LABEL: test_mm_mask_shrdv_epi16: 1943; X64: # %bb.0: # %entry 1944; X64-NEXT: kmovd %edi, %k1 1945; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} 1946; X64-NEXT: retq 1947entry: 1948 %0 = bitcast <2 x i64> %__S to <8 x i16> 1949 %1 = bitcast <2 x i64> %__A to <8 x i16> 1950 %2 = bitcast <2 x i64> %__B to <8 x i16> 1951 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2) 1952 %4 = bitcast i8 %__U to <8 x i1> 1953 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0 1954 %6 = bitcast <8 x i16> %5 to <2 x i64> 1955 ret <2 x i64> %6 1956} 1957 1958define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1959; X86-LABEL: test_mm_maskz_shrdv_epi16: 1960; X86: # %bb.0: # %entry 1961; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1962; X86-NEXT: kmovd %eax, %k1 1963; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1964; X86-NEXT: retl 1965; 1966; X64-LABEL: test_mm_maskz_shrdv_epi16: 1967; X64: # %bb.0: # %entry 1968; X64-NEXT: kmovd %edi, %k1 1969; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1970; X64-NEXT: retq 1971entry: 1972 %0 = bitcast <2 x i64> %__S to <8 x i16> 1973 %1 = bitcast <2 x i64> %__A to <8 x i16> 1974 %2 = bitcast <2 x i64> %__B to <8 x i16> 1975 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2) 1976 %4 = bitcast i8 %__U to <8 x i1> 1977 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer 1978 %6 = bitcast <8 x i16> %5 to <2 x i64> 1979 ret <2 x i64> %6 1980} 1981 1982define <2 x i64> @test_mm_shrdv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1983; CHECK-LABEL: test_mm_shrdv_epi16: 1984; CHECK: # %bb.0: # %entry 1985; CHECK-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 1986; CHECK-NEXT: ret{{[l|q]}} 1987entry: 1988 %0 = bitcast <2 x i64> %__S to <8 x i16> 1989 %1 = bitcast <2 x i64> %__A to <8 x i16> 1990 %2 = bitcast <2 x i64> %__B to <8 x i16> 1991 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2) 1992 %4 = bitcast <8 x i16> %3 to <2 x i64> 1993 ret <2 x i64> %4 1994} 1995 1996declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8) 1997declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16) 1998declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>) 1999declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>) 2000declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8) 2001declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16) 2002declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>) 2003declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>) 2004declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16) 2005declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32) 2006declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>) 2007declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>) 2008declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16) 2009declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32) 2010declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>) 2011declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>) 2012