1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 -| FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i64> @avx512_funnel_shift_q_512(<8 x i64> %a0, <8 x i64> %a1) { 6; X86-LABEL: avx512_funnel_shift_q_512: 7; X86: # %bb.0: 8; X86-NEXT: vpshldvq {{\.LCPI.*}}, %zmm1, %zmm0 9; X86-NEXT: retl 10; 11; X64-LABEL: avx512_funnel_shift_q_512: 12; X64: # %bb.0: 13; X64-NEXT: vpshldvq {{.*}}(%rip), %zmm1, %zmm0 14; X64-NEXT: retq 15 %1 = shl <8 x i64> %a0, <i64 31, i64 33, i64 31, i64 33, i64 31, i64 33, i64 31, i64 33> 16 %2 = lshr <8 x i64> %a1, <i64 33, i64 31, i64 33, i64 31, i64 33, i64 31, i64 33, i64 31> 17 %3 = or <8 x i64> %1, %2 18 ret <8 x i64> %3 19} 20 21define <8 x i64> @avx512_funnel_shift_q_512_splat(<8 x i64> %a0, <8 x i64> %a1) { 22; CHECK-LABEL: avx512_funnel_shift_q_512_splat: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vpshldq $31, %zmm1, %zmm0, %zmm0 25; CHECK-NEXT: ret{{[l|q]}} 26 %1 = shl <8 x i64> %a0, <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31> 27 %2 = lshr <8 x i64> %a1, <i64 33, i64 33, i64 33, i64 33, i64 33, i64 33, i64 33, i64 33> 28 %3 = or <8 x i64> %1, %2 29 ret <8 x i64> %3 30} 31 32define <16 x i32> @avx512_funnel_shift_d_512(<16 x i32> %a0, <16 x i32> %a1) { 33; X86-LABEL: avx512_funnel_shift_d_512: 34; X86: # %bb.0: 35; X86-NEXT: vpshldvd {{\.LCPI.*}}, %zmm1, %zmm0 36; X86-NEXT: retl 37; 38; X64-LABEL: avx512_funnel_shift_d_512: 39; X64: # %bb.0: 40; X64-NEXT: vpshldvd {{.*}}(%rip), %zmm1, %zmm0 41; X64-NEXT: retq 42 %1 = shl <16 x i32> %a0, <i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17> 43 %2 = lshr <16 x i32> %a1, <i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15> 44 %3 = or <16 x i32> %1, %2 45 ret <16 x i32> %3 46} 47 48define <16 x i32> @avx512_funnel_shift_d_512_splat(<16 x i32> %a0, <16 x i32> %a1) { 49; CHECK-LABEL: avx512_funnel_shift_d_512_splat: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 52; CHECK-NEXT: ret{{[l|q]}} 53 %1 = shl <16 x i32> %a0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 54 %2 = lshr <16 x i32> %a1, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17> 55 %3 = or <16 x i32> %1, %2 56 ret <16 x i32> %3 57} 58 59define <32 x i16> @avx512_funnel_shift_w_512(<32 x i16> %a0, <32 x i16> %a1) { 60; X86-LABEL: avx512_funnel_shift_w_512: 61; X86: # %bb.0: 62; X86-NEXT: vpshldvw {{\.LCPI.*}}, %zmm1, %zmm0 63; X86-NEXT: retl 64; 65; X64-LABEL: avx512_funnel_shift_w_512: 66; X64: # %bb.0: 67; X64-NEXT: vpshldvw {{.*}}(%rip), %zmm1, %zmm0 68; X64-NEXT: retq 69 %1 = shl <32 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9> 70 %2 = lshr <32 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7> 71 %3 = or <32 x i16> %1, %2 72 ret <32 x i16> %3 73} 74 75define <32 x i16> @avx512_funnel_shift_w_512_splat(<32 x i16> %a0, <32 x i16> %a1) { 76; CHECK-LABEL: avx512_funnel_shift_w_512_splat: 77; CHECK: # %bb.0: 78; CHECK-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 79; CHECK-NEXT: ret{{[l|q]}} 80 %1 = shl <32 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 81 %2 = lshr <32 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> 82 %3 = or <32 x i16> %1, %2 83 ret <32 x i16> %3 84} 85 86 87