• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl -| FileCheck %s --check-prefixes=CHECK,X64
4
5define <2 x i64> @avx512_funnel_shift_q_128(<2 x i64> %a0, <2 x i64> %a1) {
6; X86-LABEL: avx512_funnel_shift_q_128:
7; X86:       # %bb.0:
8; X86-NEXT:    vpshldvq {{\.LCPI.*}}, %xmm1, %xmm0
9; X86-NEXT:    retl
10;
11; X64-LABEL: avx512_funnel_shift_q_128:
12; X64:       # %bb.0:
13; X64-NEXT:    vpshldvq {{.*}}(%rip), %xmm1, %xmm0
14; X64-NEXT:    retq
15  %1 =  shl <2 x i64> %a0, <i64 31, i64 33>
16  %2 = lshr <2 x i64> %a1, <i64 33, i64 31>
17  %3 = or <2 x i64> %1, %2
18  ret <2 x i64> %3
19}
20
21define <4 x i64> @avx512_funnel_shift_q_256(<4 x i64> %a0, <4 x i64> %a1) {
22; X86-LABEL: avx512_funnel_shift_q_256:
23; X86:       # %bb.0:
24; X86-NEXT:    vpshldvq {{\.LCPI.*}}, %ymm1, %ymm0
25; X86-NEXT:    retl
26;
27; X64-LABEL: avx512_funnel_shift_q_256:
28; X64:       # %bb.0:
29; X64-NEXT:    vpshldvq {{.*}}(%rip), %ymm1, %ymm0
30; X64-NEXT:    retq
31  %1 =  shl <4 x i64> %a0, <i64 31, i64 33, i64 31, i64 33>
32  %2 = lshr <4 x i64> %a1, <i64 33, i64 31, i64 33, i64 31>
33  %3 = or <4 x i64> %1, %2
34  ret <4 x i64> %3
35}
36
37define <2 x i64> @avx512_funnel_shift_q_128_splat(<2 x i64> %a0, <2 x i64> %a1) {
38; CHECK-LABEL: avx512_funnel_shift_q_128_splat:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vpshldq $31, %xmm1, %xmm0, %xmm0
41; CHECK-NEXT:    ret{{[l|q]}}
42  %1 =  shl <2 x i64> %a0, <i64 31, i64 31>
43  %2 = lshr <2 x i64> %a1, <i64 33, i64 33>
44  %3 = or <2 x i64> %1, %2
45  ret <2 x i64> %3
46}
47
48define <4 x i64> @avx512_funnel_shift_q_256_splat(<4 x i64> %a0, <4 x i64> %a1) {
49; CHECK-LABEL: avx512_funnel_shift_q_256_splat:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    vpshldq $31, %ymm1, %ymm0, %ymm0
52; CHECK-NEXT:    ret{{[l|q]}}
53  %1 =  shl <4 x i64> %a0, <i64 31, i64 31, i64 31, i64 31>
54  %2 = lshr <4 x i64> %a1, <i64 33, i64 33, i64 33, i64 33>
55  %3 = or <4 x i64> %1, %2
56  ret <4 x i64> %3
57}
58
59define <4 x i32> @avx512_funnel_shift_d_128(<4 x i32> %a0, <4 x i32> %a1) {
60; X86-LABEL: avx512_funnel_shift_d_128:
61; X86:       # %bb.0:
62; X86-NEXT:    vpshldvd {{\.LCPI.*}}, %xmm1, %xmm0
63; X86-NEXT:    retl
64;
65; X64-LABEL: avx512_funnel_shift_d_128:
66; X64:       # %bb.0:
67; X64-NEXT:    vpshldvd {{.*}}(%rip), %xmm1, %xmm0
68; X64-NEXT:    retq
69  %1 =  shl <4 x i32> %a0, <i32 15, i32 17, i32 15, i32 17>
70  %2 = lshr <4 x i32> %a1, <i32 17, i32 15, i32 17, i32 15>
71  %3 = or <4 x i32> %1, %2
72  ret <4 x i32> %3
73}
74
75define <8 x i32> @avx512_funnel_shift_d_256(<8 x i32> %a0, <8 x i32> %a1) {
76; X86-LABEL: avx512_funnel_shift_d_256:
77; X86:       # %bb.0:
78; X86-NEXT:    vpshldvd {{\.LCPI.*}}, %ymm1, %ymm0
79; X86-NEXT:    retl
80;
81; X64-LABEL: avx512_funnel_shift_d_256:
82; X64:       # %bb.0:
83; X64-NEXT:    vpshldvd {{.*}}(%rip), %ymm1, %ymm0
84; X64-NEXT:    retq
85  %1 =  shl <8 x i32> %a0, <i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17>
86  %2 = lshr <8 x i32> %a1, <i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15>
87  %3 = or <8 x i32> %1, %2
88  ret <8 x i32> %3
89}
90
91define <4 x i32> @avx512_funnel_shift_d_128_splat(<4 x i32> %a0, <4 x i32> %a1) {
92; CHECK-LABEL: avx512_funnel_shift_d_128_splat:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vpshldd $15, %xmm1, %xmm0, %xmm0
95; CHECK-NEXT:    ret{{[l|q]}}
96  %1 =  shl <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15>
97  %2 = lshr <4 x i32> %a1, <i32 17, i32 17, i32 17, i32 17>
98  %3 = or <4 x i32> %1, %2
99  ret <4 x i32> %3
100}
101
102define <8 x i32> @avx512_funnel_shift_d_256_splat(<8 x i32> %a0, <8 x i32> %a1) {
103; CHECK-LABEL: avx512_funnel_shift_d_256_splat:
104; CHECK:       # %bb.0:
105; CHECK-NEXT:    vpshldd $15, %ymm1, %ymm0, %ymm0
106; CHECK-NEXT:    ret{{[l|q]}}
107  %1 =  shl <8 x i32> %a0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
108  %2 = lshr <8 x i32> %a1, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
109  %3 = or <8 x i32> %1, %2
110  ret <8 x i32> %3
111}
112
113define <8 x i16> @avx512_funnel_shift_w_128(<8 x i16> %a0, <8 x i16> %a1) {
114; X86-LABEL: avx512_funnel_shift_w_128:
115; X86:       # %bb.0:
116; X86-NEXT:    vpshldvw {{\.LCPI.*}}, %xmm1, %xmm0
117; X86-NEXT:    retl
118;
119; X64-LABEL: avx512_funnel_shift_w_128:
120; X64:       # %bb.0:
121; X64-NEXT:    vpshldvw {{.*}}(%rip), %xmm1, %xmm0
122; X64-NEXT:    retq
123  %1 =  shl <8 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
124  %2 = lshr <8 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
125  %3 = or <8 x i16> %1, %2
126  ret <8 x i16> %3
127}
128
129define <16 x i16> @avx512_funnel_shift_w_256(<16 x i16> %a0, <16 x i16> %a1) {
130; X86-LABEL: avx512_funnel_shift_w_256:
131; X86:       # %bb.0:
132; X86-NEXT:    vpshldvw {{\.LCPI.*}}, %ymm1, %ymm0
133; X86-NEXT:    retl
134;
135; X64-LABEL: avx512_funnel_shift_w_256:
136; X64:       # %bb.0:
137; X64-NEXT:    vpshldvw {{.*}}(%rip), %ymm1, %ymm0
138; X64-NEXT:    retq
139  %1 =  shl <16 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
140  %2 = lshr <16 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
141  %3 = or <16 x i16> %1, %2
142  ret <16 x i16> %3
143}
144
145define <8 x i16> @avx512_funnel_shift_w_128_splat(<8 x i16> %a0, <8 x i16> %a1) {
146; CHECK-LABEL: avx512_funnel_shift_w_128_splat:
147; CHECK:       # %bb.0:
148; CHECK-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm0
149; CHECK-NEXT:    ret{{[l|q]}}
150  %1 =  shl <8 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
151  %2 = lshr <8 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
152  %3 = or <8 x i16> %1, %2
153  ret <8 x i16> %3
154}
155
156define <16 x i16> @avx512_funnel_shift_w_256_splat(<16 x i16> %a0, <16 x i16> %a1) {
157; CHECK-LABEL: avx512_funnel_shift_w_256_splat:
158; CHECK:       # %bb.0:
159; CHECK-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm0
160; CHECK-NEXT:    ret{{[l|q]}}
161  %1 =  shl <16 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
162  %2 = lshr <16 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
163  %3 = or <16 x i16> %1, %2
164  ret <16 x i16> %3
165}
166
167
168