1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl -| FileCheck %s --check-prefixes=CHECK,X64 4 5define <2 x i64> @avx512_funnel_shift_q_128(<2 x i64> %a0, <2 x i64> %a1) { 6; X86-LABEL: avx512_funnel_shift_q_128: 7; X86: # %bb.0: 8; X86-NEXT: vpshldvq {{\.LCPI.*}}, %xmm1, %xmm0 9; X86-NEXT: retl 10; 11; X64-LABEL: avx512_funnel_shift_q_128: 12; X64: # %bb.0: 13; X64-NEXT: vpshldvq {{.*}}(%rip), %xmm1, %xmm0 14; X64-NEXT: retq 15 %1 = shl <2 x i64> %a0, <i64 31, i64 33> 16 %2 = lshr <2 x i64> %a1, <i64 33, i64 31> 17 %3 = or <2 x i64> %1, %2 18 ret <2 x i64> %3 19} 20 21define <4 x i64> @avx512_funnel_shift_q_256(<4 x i64> %a0, <4 x i64> %a1) { 22; X86-LABEL: avx512_funnel_shift_q_256: 23; X86: # %bb.0: 24; X86-NEXT: vpshldvq {{\.LCPI.*}}, %ymm1, %ymm0 25; X86-NEXT: retl 26; 27; X64-LABEL: avx512_funnel_shift_q_256: 28; X64: # %bb.0: 29; X64-NEXT: vpshldvq {{.*}}(%rip), %ymm1, %ymm0 30; X64-NEXT: retq 31 %1 = shl <4 x i64> %a0, <i64 31, i64 33, i64 31, i64 33> 32 %2 = lshr <4 x i64> %a1, <i64 33, i64 31, i64 33, i64 31> 33 %3 = or <4 x i64> %1, %2 34 ret <4 x i64> %3 35} 36 37define <2 x i64> @avx512_funnel_shift_q_128_splat(<2 x i64> %a0, <2 x i64> %a1) { 38; CHECK-LABEL: avx512_funnel_shift_q_128_splat: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vpshldq $31, %xmm1, %xmm0, %xmm0 41; CHECK-NEXT: ret{{[l|q]}} 42 %1 = shl <2 x i64> %a0, <i64 31, i64 31> 43 %2 = lshr <2 x i64> %a1, <i64 33, i64 33> 44 %3 = or <2 x i64> %1, %2 45 ret <2 x i64> %3 46} 47 48define <4 x i64> @avx512_funnel_shift_q_256_splat(<4 x i64> %a0, <4 x i64> %a1) { 49; CHECK-LABEL: avx512_funnel_shift_q_256_splat: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vpshldq $31, %ymm1, %ymm0, %ymm0 52; CHECK-NEXT: ret{{[l|q]}} 53 %1 = shl <4 x i64> %a0, <i64 31, i64 31, i64 31, i64 31> 54 %2 = lshr <4 x i64> %a1, <i64 33, i64 33, i64 33, i64 33> 55 %3 = or <4 x i64> %1, %2 56 ret <4 x i64> %3 57} 58 59define <4 x i32> @avx512_funnel_shift_d_128(<4 x i32> %a0, <4 x i32> %a1) { 60; X86-LABEL: avx512_funnel_shift_d_128: 61; X86: # %bb.0: 62; X86-NEXT: vpshldvd {{\.LCPI.*}}, %xmm1, %xmm0 63; X86-NEXT: retl 64; 65; X64-LABEL: avx512_funnel_shift_d_128: 66; X64: # %bb.0: 67; X64-NEXT: vpshldvd {{.*}}(%rip), %xmm1, %xmm0 68; X64-NEXT: retq 69 %1 = shl <4 x i32> %a0, <i32 15, i32 17, i32 15, i32 17> 70 %2 = lshr <4 x i32> %a1, <i32 17, i32 15, i32 17, i32 15> 71 %3 = or <4 x i32> %1, %2 72 ret <4 x i32> %3 73} 74 75define <8 x i32> @avx512_funnel_shift_d_256(<8 x i32> %a0, <8 x i32> %a1) { 76; X86-LABEL: avx512_funnel_shift_d_256: 77; X86: # %bb.0: 78; X86-NEXT: vpshldvd {{\.LCPI.*}}, %ymm1, %ymm0 79; X86-NEXT: retl 80; 81; X64-LABEL: avx512_funnel_shift_d_256: 82; X64: # %bb.0: 83; X64-NEXT: vpshldvd {{.*}}(%rip), %ymm1, %ymm0 84; X64-NEXT: retq 85 %1 = shl <8 x i32> %a0, <i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17> 86 %2 = lshr <8 x i32> %a1, <i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15> 87 %3 = or <8 x i32> %1, %2 88 ret <8 x i32> %3 89} 90 91define <4 x i32> @avx512_funnel_shift_d_128_splat(<4 x i32> %a0, <4 x i32> %a1) { 92; CHECK-LABEL: avx512_funnel_shift_d_128_splat: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 95; CHECK-NEXT: ret{{[l|q]}} 96 %1 = shl <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15> 97 %2 = lshr <4 x i32> %a1, <i32 17, i32 17, i32 17, i32 17> 98 %3 = or <4 x i32> %1, %2 99 ret <4 x i32> %3 100} 101 102define <8 x i32> @avx512_funnel_shift_d_256_splat(<8 x i32> %a0, <8 x i32> %a1) { 103; CHECK-LABEL: avx512_funnel_shift_d_256_splat: 104; CHECK: # %bb.0: 105; CHECK-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 106; CHECK-NEXT: ret{{[l|q]}} 107 %1 = shl <8 x i32> %a0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 108 %2 = lshr <8 x i32> %a1, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17> 109 %3 = or <8 x i32> %1, %2 110 ret <8 x i32> %3 111} 112 113define <8 x i16> @avx512_funnel_shift_w_128(<8 x i16> %a0, <8 x i16> %a1) { 114; X86-LABEL: avx512_funnel_shift_w_128: 115; X86: # %bb.0: 116; X86-NEXT: vpshldvw {{\.LCPI.*}}, %xmm1, %xmm0 117; X86-NEXT: retl 118; 119; X64-LABEL: avx512_funnel_shift_w_128: 120; X64: # %bb.0: 121; X64-NEXT: vpshldvw {{.*}}(%rip), %xmm1, %xmm0 122; X64-NEXT: retq 123 %1 = shl <8 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9> 124 %2 = lshr <8 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7> 125 %3 = or <8 x i16> %1, %2 126 ret <8 x i16> %3 127} 128 129define <16 x i16> @avx512_funnel_shift_w_256(<16 x i16> %a0, <16 x i16> %a1) { 130; X86-LABEL: avx512_funnel_shift_w_256: 131; X86: # %bb.0: 132; X86-NEXT: vpshldvw {{\.LCPI.*}}, %ymm1, %ymm0 133; X86-NEXT: retl 134; 135; X64-LABEL: avx512_funnel_shift_w_256: 136; X64: # %bb.0: 137; X64-NEXT: vpshldvw {{.*}}(%rip), %ymm1, %ymm0 138; X64-NEXT: retq 139 %1 = shl <16 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9> 140 %2 = lshr <16 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7> 141 %3 = or <16 x i16> %1, %2 142 ret <16 x i16> %3 143} 144 145define <8 x i16> @avx512_funnel_shift_w_128_splat(<8 x i16> %a0, <8 x i16> %a1) { 146; CHECK-LABEL: avx512_funnel_shift_w_128_splat: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 149; CHECK-NEXT: ret{{[l|q]}} 150 %1 = shl <8 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 151 %2 = lshr <8 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> 152 %3 = or <8 x i16> %1, %2 153 ret <8 x i16> %3 154} 155 156define <16 x i16> @avx512_funnel_shift_w_256_splat(<16 x i16> %a0, <16 x i16> %a1) { 157; CHECK-LABEL: avx512_funnel_shift_w_256_splat: 158; CHECK: # %bb.0: 159; CHECK-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 160; CHECK-NEXT: ret{{[l|q]}} 161 %1 = shl <16 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 162 %2 = lshr <16 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> 163 %3 = or <16 x i16> %1, %2 164 ret <16 x i16> %3 165} 166 167 168