• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
3
4; These test cases demonstrate cases where vpternlog could benefit from being commuted.
5
6declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
7
8define <16 x i32> @vpternlog_v16i32_012(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
9; CHECK-LABEL: vpternlog_v16i32_012:
10; CHECK:       ## %bb.0:
11; CHECK-NEXT:    vpternlogd $114, %zmm2, %zmm1, %zmm0
12; CHECK-NEXT:    retq
13  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
14  ret <16 x i32> %1
15}
16
17define <16 x i32> @vpternlog_v16i32_102(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
18; CHECK-LABEL: vpternlog_v16i32_102:
19; CHECK:       ## %bb.0:
20; CHECK-NEXT:    vpternlogd $78, %zmm2, %zmm1, %zmm0
21; CHECK-NEXT:    retq
22  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
23  ret <16 x i32> %1
24}
25
26define <16 x i32> @vpternlog_v16i32_210(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
27; CHECK-LABEL: vpternlog_v16i32_210:
28; CHECK:       ## %bb.0:
29; CHECK-NEXT:    vpternlogd $92, %zmm1, %zmm2, %zmm0
30; CHECK-NEXT:    retq
31  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
32  ret <16 x i32> %1
33}
34
35define <16 x i32> @vpternlog_v16i32_012_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
36; CHECK-LABEL: vpternlog_v16i32_012_load0:
37; CHECK:       ## %bb.0:
38; CHECK-NEXT:    vpternlogd $46, (%rdi), %zmm1, %zmm0
39; CHECK-NEXT:    retq
40  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
41  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
42  ret <16 x i32> %1
43}
44
45define <16 x i32> @vpternlog_v16i32_012_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
46; CHECK-LABEL: vpternlog_v16i32_012_load1:
47; CHECK:       ## %bb.0:
48; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0
49; CHECK-NEXT:    retq
50  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
51  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
52  ret <16 x i32> %1
53}
54
55define <16 x i32> @vpternlog_v16i32_012_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
56; CHECK-LABEL: vpternlog_v16i32_012_load2:
57; CHECK:       ## %bb.0:
58; CHECK-NEXT:    vpternlogd $114, (%rdi), %zmm1, %zmm0
59; CHECK-NEXT:    retq
60  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
61  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
62  ret <16 x i32> %1
63}
64
65define <16 x i32> @vpternlog_v16i32_102_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
66; CHECK-LABEL: vpternlog_v16i32_102_load0:
67; CHECK:       ## %bb.0:
68; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0
69; CHECK-NEXT:    retq
70  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
71  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
72  ret <16 x i32> %1
73}
74
75define <16 x i32> @vpternlog_v16i32_102_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
76; CHECK-LABEL: vpternlog_v16i32_102_load1:
77; CHECK:       ## %bb.0:
78; CHECK-NEXT:    vpternlogd $46, (%rdi), %zmm1, %zmm0
79; CHECK-NEXT:    retq
80  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
81  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
82  ret <16 x i32> %1
83}
84
85define <16 x i32> @vpternlog_v16i32_102_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
86; CHECK-LABEL: vpternlog_v16i32_102_load2:
87; CHECK:       ## %bb.0:
88; CHECK-NEXT:    vpternlogd $78, (%rdi), %zmm1, %zmm0
89; CHECK-NEXT:    retq
90  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
91  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
92  ret <16 x i32> %1
93}
94
95define <16 x i32> @vpternlog_v16i32_210_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
96; CHECK-LABEL: vpternlog_v16i32_210_load0:
97; CHECK:       ## %bb.0:
98; CHECK-NEXT:    vpternlogd $78, (%rdi), %zmm1, %zmm0
99; CHECK-NEXT:    retq
100  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
101  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
102  ret <16 x i32> %1
103}
104
105define <16 x i32> @vpternlog_v16i32_210_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
106; CHECK-LABEL: vpternlog_v16i32_210_load1:
107; CHECK:       ## %bb.0:
108; CHECK-NEXT:    vpternlogd $92, (%rdi), %zmm1, %zmm0
109; CHECK-NEXT:    retq
110  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
111  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
112  ret <16 x i32> %1
113}
114
115define <16 x i32> @vpternlog_v16i32_210_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
116; CHECK-LABEL: vpternlog_v16i32_210_load2:
117; CHECK:       ## %bb.0:
118; CHECK-NEXT:    vpternlogd $58, (%rdi), %zmm1, %zmm0
119; CHECK-NEXT:    retq
120  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
121  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
122  ret <16 x i32> %1
123}
124
125define <16 x i32> @vpternlog_v16i32_021_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
126; CHECK-LABEL: vpternlog_v16i32_021_load0:
127; CHECK:       ## %bb.0:
128; CHECK-NEXT:    vpternlogd $58, (%rdi), %zmm1, %zmm0
129; CHECK-NEXT:    retq
130  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
131  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
132  ret <16 x i32> %1
133}
134
135define <16 x i32> @vpternlog_v16i32_021_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
136; CHECK-LABEL: vpternlog_v16i32_021_load1:
137; CHECK:       ## %bb.0:
138; CHECK-NEXT:    vpternlogd $114, (%rdi), %zmm1, %zmm0
139; CHECK-NEXT:    retq
140  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
141  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
142  ret <16 x i32> %1
143}
144
145define <16 x i32> @vpternlog_v16i32_021_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
146; CHECK-LABEL: vpternlog_v16i32_021_load2:
147; CHECK:       ## %bb.0:
148; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0
149; CHECK-NEXT:    retq
150  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
151  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
152  ret <16 x i32> %1
153}
154
155define <16 x i32> @vpternlog_v16i32_012_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
156; CHECK-LABEL: vpternlog_v16i32_012_mask:
157; CHECK:       ## %bb.0:
158; CHECK-NEXT:    kmovd %edi, %k1
159; CHECK-NEXT:    vpternlogd $114, %zmm2, %zmm1, %zmm0 {%k1}
160; CHECK-NEXT:    retq
161  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
162  %2 = bitcast i16 %mask to <16 x i1>
163  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
164  ret <16 x i32> %3
165}
166
167define <16 x i32> @vpternlog_v16i32_102_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
168; CHECK-LABEL: vpternlog_v16i32_102_mask:
169; CHECK:       ## %bb.0:
170; CHECK-NEXT:    kmovd %edi, %k1
171; CHECK-NEXT:    vpternlogd $114, %zmm2, %zmm0, %zmm1 {%k1}
172; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
173; CHECK-NEXT:    retq
174  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
175  %2 = bitcast i16 %mask to <16 x i1>
176  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
177  ret <16 x i32> %3
178}
179
180define <16 x i32> @vpternlog_v16i32_210_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
181; CHECK-LABEL: vpternlog_v16i32_210_mask:
182; CHECK:       ## %bb.0:
183; CHECK-NEXT:    kmovd %edi, %k1
184; CHECK-NEXT:    vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
185; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
186; CHECK-NEXT:    retq
187  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
188  %2 = bitcast i16 %mask to <16 x i1>
189  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
190  ret <16 x i32> %3
191}
192
193define <16 x i32> @vpternlog_v16i32_012_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
194; CHECK-LABEL: vpternlog_v16i32_012_mask1:
195; CHECK:       ## %bb.0:
196; CHECK-NEXT:    kmovd %edi, %k1
197; CHECK-NEXT:    vpternlogd $78, %zmm2, %zmm0, %zmm1 {%k1}
198; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
199; CHECK-NEXT:    retq
200  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
201  %mask.cast = bitcast i16 %mask to <16 x i1>
202  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
203  ret <16 x i32> %res2
204}
205
206define <16 x i32> @vpternlog_v16i32_012_mask2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
207; CHECK-LABEL: vpternlog_v16i32_012_mask2:
208; CHECK:       ## %bb.0:
209; CHECK-NEXT:    kmovd %edi, %k1
210; CHECK-NEXT:    vpternlogd $58, %zmm0, %zmm1, %zmm2 {%k1}
211; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
212; CHECK-NEXT:    retq
213  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
214  %mask.cast = bitcast i16 %mask to <16 x i1>
215  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
216  ret <16 x i32> %res2
217}
218
219define <16 x i32> @vpternlog_v16i32_012_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
220; CHECK-LABEL: vpternlog_v16i32_012_load0_mask:
221; CHECK:       ## %bb.0:
222; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm2
223; CHECK-NEXT:    kmovd %esi, %k1
224; CHECK-NEXT:    vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
225; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
226; CHECK-NEXT:    retq
227  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
228  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
229  %2 = bitcast i16 %mask to <16 x i1>
230  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
231  ret <16 x i32> %3
232}
233
234define <16 x i32> @vpternlog_v16i32_012_load0_mask1(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
235; CHECK-LABEL: vpternlog_v16i32_012_load0_mask1:
236; CHECK:       ## %bb.0:
237; CHECK-NEXT:    kmovd %esi, %k1
238; CHECK-NEXT:    vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1}
239; CHECK-NEXT:    retq
240  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
241  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
242  %mask.cast = bitcast i16 %mask to <16 x i1>
243  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
244  ret <16 x i32> %res2
245}
246
247define <16 x i32> @vpternlog_v16i32_012_load0_mask2(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
248; CHECK-LABEL: vpternlog_v16i32_012_load0_mask2:
249; CHECK:       ## %bb.0:
250; CHECK-NEXT:    kmovd %esi, %k1
251; CHECK-NEXT:    vpternlogd $33, (%rdi), %zmm0, %zmm1 {%k1}
252; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
253; CHECK-NEXT:    retq
254  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
255  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
256  %mask.cast = bitcast i16 %mask to <16 x i1>
257  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
258  ret <16 x i32> %res2
259}
260
261define <16 x i32> @vpternlog_v16i32_012_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
262; CHECK-LABEL: vpternlog_v16i32_012_load1_mask:
263; CHECK:       ## %bb.0:
264; CHECK-NEXT:    kmovd %esi, %k1
265; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1}
266; CHECK-NEXT:    retq
267  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
268  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
269  %2 = bitcast i16 %mask to <16 x i1>
270  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
271  ret <16 x i32> %3
272}
273
274define <16 x i32> @vpternlog_v16i32_012_load1_mask2(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
275; CHECK-LABEL: vpternlog_v16i32_012_load1_mask2:
276; CHECK:       ## %bb.0:
277; CHECK-NEXT:    kmovd %esi, %k1
278; CHECK-NEXT:    vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1}
279; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
280; CHECK-NEXT:    retq
281  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
282  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
283  %mask.cast = bitcast i16 %mask to <16 x i1>
284  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
285  ret <16 x i32> %res2
286}
287
288define <16 x i32> @vpternlog_v16i32_012_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
289; CHECK-LABEL: vpternlog_v16i32_012_load2_mask:
290; CHECK:       ## %bb.0:
291; CHECK-NEXT:    kmovd %esi, %k1
292; CHECK-NEXT:    vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1}
293; CHECK-NEXT:    retq
294  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
295  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
296  %2 = bitcast i16 %mask to <16 x i1>
297  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
298  ret <16 x i32> %3
299}
300
301define <16 x i32> @vpternlog_v16i32_012_load2_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
302; CHECK-LABEL: vpternlog_v16i32_012_load2_mask1:
303; CHECK:       ## %bb.0:
304; CHECK-NEXT:    kmovd %esi, %k1
305; CHECK-NEXT:    vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1}
306; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
307; CHECK-NEXT:    retq
308  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
309  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
310  %mask.cast = bitcast i16 %mask to <16 x i1>
311  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
312  ret <16 x i32> %res2
313}
314
315define <16 x i32> @vpternlog_v16i32_102_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
316; CHECK-LABEL: vpternlog_v16i32_102_load0_mask:
317; CHECK:       ## %bb.0:
318; CHECK-NEXT:    kmovd %esi, %k1
319; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1}
320; CHECK-NEXT:    retq
321  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
322  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
323  %2 = bitcast i16 %mask to <16 x i1>
324  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
325  ret <16 x i32> %3
326}
327
328define <16 x i32> @vpternlog_v16i32_102_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
329; CHECK-LABEL: vpternlog_v16i32_102_load1_mask:
330; CHECK:       ## %bb.0:
331; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm2
332; CHECK-NEXT:    kmovd %esi, %k1
333; CHECK-NEXT:    vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
334; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
335; CHECK-NEXT:    retq
336  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
337  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
338  %2 = bitcast i16 %mask to <16 x i1>
339  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
340  ret <16 x i32> %3
341}
342
343define <16 x i32> @vpternlog_v16i32_102_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
344; CHECK-LABEL: vpternlog_v16i32_102_load2_mask:
345; CHECK:       ## %bb.0:
346; CHECK-NEXT:    kmovd %esi, %k1
347; CHECK-NEXT:    vpternlogd $114, (%rdi), %zmm0, %zmm1 {%k1}
348; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
349; CHECK-NEXT:    retq
350  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
351  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
352  %2 = bitcast i16 %mask to <16 x i1>
353  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
354  ret <16 x i32> %3
355}
356
357define <16 x i32> @vpternlog_v16i32_210_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
358; CHECK-LABEL: vpternlog_v16i32_210_load0_mask:
359; CHECK:       ## %bb.0:
360; CHECK-NEXT:    kmovd %esi, %k1
361; CHECK-NEXT:    vpternlogd $114, (%rdi), %zmm0, %zmm1 {%k1}
362; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
363; CHECK-NEXT:    retq
364  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
365  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
366  %2 = bitcast i16 %mask to <16 x i1>
367  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
368  ret <16 x i32> %3
369}
370
371define <16 x i32> @vpternlog_v16i32_210_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
372; CHECK-LABEL: vpternlog_v16i32_210_load1_mask:
373; CHECK:       ## %bb.0:
374; CHECK-NEXT:    kmovd %esi, %k1
375; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm0, %zmm1 {%k1}
376; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
377; CHECK-NEXT:    retq
378  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
379  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
380  %2 = bitcast i16 %mask to <16 x i1>
381  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
382  ret <16 x i32> %3
383}
384
385define <16 x i32> @vpternlog_v16i32_210_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
386; CHECK-LABEL: vpternlog_v16i32_210_load2_mask:
387; CHECK:       ## %bb.0:
388; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm2
389; CHECK-NEXT:    kmovd %esi, %k1
390; CHECK-NEXT:    vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
391; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
392; CHECK-NEXT:    retq
393  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
394  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
395  %2 = bitcast i16 %mask to <16 x i1>
396  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
397  ret <16 x i32> %3
398}
399
400define <16 x i32> @vpternlog_v16i32_021_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
401; CHECK-LABEL: vpternlog_v16i32_021_load0_mask:
402; CHECK:       ## %bb.0:
403; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm2
404; CHECK-NEXT:    kmovd %esi, %k1
405; CHECK-NEXT:    vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
406; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
407; CHECK-NEXT:    retq
408  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
409  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
410  %2 = bitcast i16 %mask to <16 x i1>
411  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
412  ret <16 x i32> %3
413}
414
415define <16 x i32> @vpternlog_v16i32_021_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
416; CHECK-LABEL: vpternlog_v16i32_021_load1_mask:
417; CHECK:       ## %bb.0:
418; CHECK-NEXT:    kmovd %esi, %k1
419; CHECK-NEXT:    vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1}
420; CHECK-NEXT:    retq
421  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
422  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
423  %2 = bitcast i16 %mask to <16 x i1>
424  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
425  ret <16 x i32> %3
426}
427
428define <16 x i32> @vpternlog_v16i32_021_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
429; CHECK-LABEL: vpternlog_v16i32_021_load2_mask:
430; CHECK:       ## %bb.0:
431; CHECK-NEXT:    kmovd %esi, %k1
432; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1}
433; CHECK-NEXT:    retq
434  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
435  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
436  %2 = bitcast i16 %mask to <16 x i1>
437  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
438  ret <16 x i32> %3
439}
440
441define <16 x i32> @vpternlog_v16i32_012_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
442; CHECK-LABEL: vpternlog_v16i32_012_maskz:
443; CHECK:       ## %bb.0:
444; CHECK-NEXT:    kmovd %edi, %k1
445; CHECK-NEXT:    vpternlogd $114, %zmm2, %zmm1, %zmm0 {%k1} {z}
446; CHECK-NEXT:    retq
447  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
448  %2 = bitcast i16 %mask to <16 x i1>
449  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
450  ret <16 x i32> %3
451}
452
453define <16 x i32> @vpternlog_v16i32_102_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
454; CHECK-LABEL: vpternlog_v16i32_102_maskz:
455; CHECK:       ## %bb.0:
456; CHECK-NEXT:    kmovd %edi, %k1
457; CHECK-NEXT:    vpternlogd $78, %zmm2, %zmm1, %zmm0 {%k1} {z}
458; CHECK-NEXT:    retq
459  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
460  %2 = bitcast i16 %mask to <16 x i1>
461  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
462  ret <16 x i32> %3
463}
464
465define <16 x i32> @vpternlog_v16i32_210_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
466; CHECK-LABEL: vpternlog_v16i32_210_maskz:
467; CHECK:       ## %bb.0:
468; CHECK-NEXT:    kmovd %edi, %k1
469; CHECK-NEXT:    vpternlogd $92, %zmm1, %zmm2, %zmm0 {%k1} {z}
470; CHECK-NEXT:    retq
471  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
472  %2 = bitcast i16 %mask to <16 x i1>
473  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
474  ret <16 x i32> %3
475}
476
477define <16 x i32> @vpternlog_v16i32_012_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
478; CHECK-LABEL: vpternlog_v16i32_012_load0_maskz:
479; CHECK:       ## %bb.0:
480; CHECK-NEXT:    kmovd %esi, %k1
481; CHECK-NEXT:    vpternlogd $46, (%rdi), %zmm1, %zmm0 {%k1} {z}
482; CHECK-NEXT:    retq
483  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
484  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
485  %2 = bitcast i16 %mask to <16 x i1>
486  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
487  ret <16 x i32> %3
488}
489
490define <16 x i32> @vpternlog_v16i32_012_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
491; CHECK-LABEL: vpternlog_v16i32_012_load1_maskz:
492; CHECK:       ## %bb.0:
493; CHECK-NEXT:    kmovd %esi, %k1
494; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z}
495; CHECK-NEXT:    retq
496  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
497  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
498  %2 = bitcast i16 %mask to <16 x i1>
499  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
500  ret <16 x i32> %3
501}
502
503define <16 x i32> @vpternlog_v16i32_012_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
504; CHECK-LABEL: vpternlog_v16i32_012_load2_maskz:
505; CHECK:       ## %bb.0:
506; CHECK-NEXT:    kmovd %esi, %k1
507; CHECK-NEXT:    vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} {z}
508; CHECK-NEXT:    retq
509  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
510  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
511  %2 = bitcast i16 %mask to <16 x i1>
512  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
513  ret <16 x i32> %3
514}
515
516define <16 x i32> @vpternlog_v16i32_102_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
517; CHECK-LABEL: vpternlog_v16i32_102_load0_maskz:
518; CHECK:       ## %bb.0:
519; CHECK-NEXT:    kmovd %esi, %k1
520; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z}
521; CHECK-NEXT:    retq
522  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
523  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
524  %2 = bitcast i16 %mask to <16 x i1>
525  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
526  ret <16 x i32> %3
527}
528
529define <16 x i32> @vpternlog_v16i32_102_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
530; CHECK-LABEL: vpternlog_v16i32_102_load1_maskz:
531; CHECK:       ## %bb.0:
532; CHECK-NEXT:    kmovd %esi, %k1
533; CHECK-NEXT:    vpternlogd $46, (%rdi), %zmm1, %zmm0 {%k1} {z}
534; CHECK-NEXT:    retq
535  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
536  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
537  %2 = bitcast i16 %mask to <16 x i1>
538  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
539  ret <16 x i32> %3
540}
541
542define <16 x i32> @vpternlog_v16i32_102_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
543; CHECK-LABEL: vpternlog_v16i32_102_load2_maskz:
544; CHECK:       ## %bb.0:
545; CHECK-NEXT:    kmovd %esi, %k1
546; CHECK-NEXT:    vpternlogd $78, (%rdi), %zmm1, %zmm0 {%k1} {z}
547; CHECK-NEXT:    retq
548  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
549  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
550  %2 = bitcast i16 %mask to <16 x i1>
551  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
552  ret <16 x i32> %3
553}
554
555define <16 x i32> @vpternlog_v16i32_210_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
556; CHECK-LABEL: vpternlog_v16i32_210_load0_maskz:
557; CHECK:       ## %bb.0:
558; CHECK-NEXT:    kmovd %esi, %k1
559; CHECK-NEXT:    vpternlogd $78, (%rdi), %zmm1, %zmm0 {%k1} {z}
560; CHECK-NEXT:    retq
561  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
562  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
563  %2 = bitcast i16 %mask to <16 x i1>
564  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
565  ret <16 x i32> %3
566}
567
568define <16 x i32> @vpternlog_v16i32_210_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
569; CHECK-LABEL: vpternlog_v16i32_210_load1_maskz:
570; CHECK:       ## %bb.0:
571; CHECK-NEXT:    kmovd %esi, %k1
572; CHECK-NEXT:    vpternlogd $92, (%rdi), %zmm1, %zmm0 {%k1} {z}
573; CHECK-NEXT:    retq
574  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
575  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
576  %2 = bitcast i16 %mask to <16 x i1>
577  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
578  ret <16 x i32> %3
579}
580
581define <16 x i32> @vpternlog_v16i32_210_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
582; CHECK-LABEL: vpternlog_v16i32_210_load2_maskz:
583; CHECK:       ## %bb.0:
584; CHECK-NEXT:    kmovd %esi, %k1
585; CHECK-NEXT:    vpternlogd $58, (%rdi), %zmm1, %zmm0 {%k1} {z}
586; CHECK-NEXT:    retq
587  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
588  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
589  %2 = bitcast i16 %mask to <16 x i1>
590  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
591  ret <16 x i32> %3
592}
593
594define <16 x i32> @vpternlog_v16i32_021_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
595; CHECK-LABEL: vpternlog_v16i32_021_load0_maskz:
596; CHECK:       ## %bb.0:
597; CHECK-NEXT:    kmovd %esi, %k1
598; CHECK-NEXT:    vpternlogd $58, (%rdi), %zmm1, %zmm0 {%k1} {z}
599; CHECK-NEXT:    retq
600  %x0 = load <16 x i32>, <16 x i32>* %x0ptr
601  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
602  %2 = bitcast i16 %mask to <16 x i1>
603  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
604  ret <16 x i32> %3
605}
606
607define <16 x i32> @vpternlog_v16i32_021_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
608; CHECK-LABEL: vpternlog_v16i32_021_load1_maskz:
609; CHECK:       ## %bb.0:
610; CHECK-NEXT:    kmovd %esi, %k1
611; CHECK-NEXT:    vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} {z}
612; CHECK-NEXT:    retq
613  %x1 = load <16 x i32>, <16 x i32>* %x1ptr
614  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
615  %2 = bitcast i16 %mask to <16 x i1>
616  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
617  ret <16 x i32> %3
618}
619
620define <16 x i32> @vpternlog_v16i32_021_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
621; CHECK-LABEL: vpternlog_v16i32_021_load2_maskz:
622; CHECK:       ## %bb.0:
623; CHECK-NEXT:    kmovd %esi, %k1
624; CHECK-NEXT:    vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z}
625; CHECK-NEXT:    retq
626  %x2 = load <16 x i32>, <16 x i32>* %x2ptr
627  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
628  %2 = bitcast i16 %mask to <16 x i1>
629  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
630  ret <16 x i32> %3
631}
632
633define <16 x i32> @vpternlog_v16i32_012_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
634; CHECK-LABEL: vpternlog_v16i32_012_broadcast0:
635; CHECK:       ## %bb.0:
636; CHECK-NEXT:    vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0
637; CHECK-NEXT:    retq
638  %x0_scalar = load i32, i32* %ptr_x0
639  %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
640  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
641  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
642  ret <16 x i32> %1
643}
644
645define <16 x i32> @vpternlog_v16i32_012_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
646; CHECK-LABEL: vpternlog_v16i32_012_broadcast1:
647; CHECK:       ## %bb.0:
648; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0
649; CHECK-NEXT:    retq
650  %x1_scalar = load i32, i32* %ptr_x1
651  %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
652  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
653  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
654  ret <16 x i32> %1
655}
656
657define <16 x i32> @vpternlog_v16i32_012_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
658; CHECK-LABEL: vpternlog_v16i32_012_broadcast2:
659; CHECK:       ## %bb.0:
660; CHECK-NEXT:    vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0
661; CHECK-NEXT:    retq
662  %x2_scalar = load i32, i32* %ptr_x2
663  %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
664  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
665  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
666  ret <16 x i32> %1
667}
668
669define <16 x i32> @vpternlog_v16i32_102_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
670; CHECK-LABEL: vpternlog_v16i32_102_broadcast0:
671; CHECK:       ## %bb.0:
672; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0
673; CHECK-NEXT:    retq
674  %x0_scalar = load i32, i32* %ptr_x0
675  %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
676  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
677  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
678  ret <16 x i32> %1
679}
680
681define <16 x i32> @vpternlog_v16i32_102_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
682; CHECK-LABEL: vpternlog_v16i32_102_broadcast1:
683; CHECK:       ## %bb.0:
684; CHECK-NEXT:    vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0
685; CHECK-NEXT:    retq
686  %x1_scalar = load i32, i32* %ptr_x1
687  %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
688  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
689  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
690  ret <16 x i32> %1
691}
692
693define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
694; CHECK-LABEL: vpternlog_v16i32_102_broadcast2:
695; CHECK:       ## %bb.0:
696; CHECK-NEXT:    vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0
697; CHECK-NEXT:    retq
698  %x2_scalar = load i32, i32* %ptr_x2
699  %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
700  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
701  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
702  ret <16 x i32> %1
703}
704
705define <16 x i32> @vpternlog_v16i32_210_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
706; CHECK-LABEL: vpternlog_v16i32_210_broadcast0:
707; CHECK:       ## %bb.0:
708; CHECK-NEXT:    vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0
709; CHECK-NEXT:    retq
710  %x0_scalar = load i32, i32* %ptr_x0
711  %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
712  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
713  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
714  ret <16 x i32> %1
715}
716
717define <16 x i32> @vpternlog_v16i32_210_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
718; CHECK-LABEL: vpternlog_v16i32_210_broadcast1:
719; CHECK:       ## %bb.0:
720; CHECK-NEXT:    vpternlogd $92, (%rdi){1to16}, %zmm1, %zmm0
721; CHECK-NEXT:    retq
722  %x1_scalar = load i32, i32* %ptr_x1
723  %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
724  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
725  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
726  ret <16 x i32> %1
727}
728
729define <16 x i32> @vpternlog_v16i32_210_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
730; CHECK-LABEL: vpternlog_v16i32_210_broadcast2:
731; CHECK:       ## %bb.0:
732; CHECK-NEXT:    vpternlogd $58, (%rdi){1to16}, %zmm1, %zmm0
733; CHECK-NEXT:    retq
734  %x2_scalar = load i32, i32* %ptr_x2
735  %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
736  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
737  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
738  ret <16 x i32> %1
739}
740
741define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
742; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask:
743; CHECK:       ## %bb.0:
744; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
745; CHECK-NEXT:    kmovd %esi, %k1
746; CHECK-NEXT:    vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
747; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
748; CHECK-NEXT:    retq
749  %x0scalar = load i32, i32* %x0ptr
750  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
751  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
752  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
753  %2 = bitcast i16 %mask to <16 x i1>
754  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
755  ret <16 x i32> %3
756}
757
758define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
759; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask:
760; CHECK:       ## %bb.0:
761; CHECK-NEXT:    kmovd %esi, %k1
762; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
763; CHECK-NEXT:    retq
764  %x1scalar = load i32, i32* %x1ptr
765  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
766  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
767  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
768  %2 = bitcast i16 %mask to <16 x i1>
769  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
770  ret <16 x i32> %3
771}
772
773define <16 x i32> @vpternlog_v16i32_012_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
774; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_mask:
775; CHECK:       ## %bb.0:
776; CHECK-NEXT:    kmovd %esi, %k1
777; CHECK-NEXT:    vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
778; CHECK-NEXT:    retq
779  %x2scalar = load i32, i32* %x2ptr
780  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
781  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
782  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
783  %2 = bitcast i16 %mask to <16 x i1>
784  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
785  ret <16 x i32> %3
786}
787
788define <16 x i32> @vpternlog_v16i32_102_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
789; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_mask:
790; CHECK:       ## %bb.0:
791; CHECK-NEXT:    kmovd %esi, %k1
792; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
793; CHECK-NEXT:    retq
794  %x0scalar = load i32, i32* %x0ptr
795  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
796  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
797  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
798  %2 = bitcast i16 %mask to <16 x i1>
799  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
800  ret <16 x i32> %3
801}
802
803define <16 x i32> @vpternlog_v16i32_102_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
804; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_mask:
805; CHECK:       ## %bb.0:
806; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
807; CHECK-NEXT:    kmovd %esi, %k1
808; CHECK-NEXT:    vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
809; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
810; CHECK-NEXT:    retq
811  %x1scalar = load i32, i32* %x1ptr
812  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
813  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
814  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
815  %2 = bitcast i16 %mask to <16 x i1>
816  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
817  ret <16 x i32> %3
818}
819
820define <16 x i32> @vpternlog_v16i32_102_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
821; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_mask:
822; CHECK:       ## %bb.0:
823; CHECK-NEXT:    kmovd %esi, %k1
824; CHECK-NEXT:    vpternlogd $114, (%rdi){1to16}, %zmm0, %zmm1 {%k1}
825; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
826; CHECK-NEXT:    retq
827  %x2scalar = load i32, i32* %x2ptr
828  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
829  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
830  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
831  %2 = bitcast i16 %mask to <16 x i1>
832  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
833  ret <16 x i32> %3
834}
835
836define <16 x i32> @vpternlog_v16i32_210_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
837; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_mask:
838; CHECK:       ## %bb.0:
839; CHECK-NEXT:    kmovd %esi, %k1
840; CHECK-NEXT:    vpternlogd $114, (%rdi){1to16}, %zmm0, %zmm1 {%k1}
841; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
842; CHECK-NEXT:    retq
843  %x0scalar = load i32, i32* %x0ptr
844  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
845  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
846  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
847  %2 = bitcast i16 %mask to <16 x i1>
848  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
849  ret <16 x i32> %3
850}
851
852define <16 x i32> @vpternlog_v16i32_210_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
853; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_mask:
854; CHECK:       ## %bb.0:
855; CHECK-NEXT:    kmovd %esi, %k1
856; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm0, %zmm1 {%k1}
857; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
858; CHECK-NEXT:    retq
859  %x1scalar = load i32, i32* %x1ptr
860  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
861  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
862  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
863  %2 = bitcast i16 %mask to <16 x i1>
864  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
865  ret <16 x i32> %3
866}
867
868define <16 x i32> @vpternlog_v16i32_210_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
869; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_mask:
870; CHECK:       ## %bb.0:
871; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
872; CHECK-NEXT:    kmovd %esi, %k1
873; CHECK-NEXT:    vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
874; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
875; CHECK-NEXT:    retq
876  %x2scalar = load i32, i32* %x2ptr
877  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
878  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
879  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
880  %2 = bitcast i16 %mask to <16 x i1>
881  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
882  ret <16 x i32> %3
883}
884
885define <16 x i32> @vpternlog_v16i32_021_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
886; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_mask:
887; CHECK:       ## %bb.0:
888; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
889; CHECK-NEXT:    kmovd %esi, %k1
890; CHECK-NEXT:    vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
891; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
892; CHECK-NEXT:    retq
893  %x0scalar = load i32, i32* %x0ptr
894  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
895  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
896  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
897  %2 = bitcast i16 %mask to <16 x i1>
898  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
899  ret <16 x i32> %3
900}
901
902define <16 x i32> @vpternlog_v16i32_021_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
903; CHECK-LABEL: vpternlog_v16i32_021_broadcast1_mask:
904; CHECK:       ## %bb.0:
905; CHECK-NEXT:    kmovd %esi, %k1
906; CHECK-NEXT:    vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
907; CHECK-NEXT:    retq
908  %x1scalar = load i32, i32* %x1ptr
909  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
910  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
911  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
912  %2 = bitcast i16 %mask to <16 x i1>
913  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
914  ret <16 x i32> %3
915}
916
917define <16 x i32> @vpternlog_v16i32_021_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
918; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_mask:
919; CHECK:       ## %bb.0:
920; CHECK-NEXT:    kmovd %esi, %k1
921; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
922; CHECK-NEXT:    retq
923  %x2scalar = load i32, i32* %x2ptr
924  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
925  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
926  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
927  %2 = bitcast i16 %mask to <16 x i1>
928  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
929  ret <16 x i32> %3
930}
931
932define <16 x i32> @vpternlog_v16i32_012_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
933; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_maskz:
934; CHECK:       ## %bb.0:
935; CHECK-NEXT:    kmovd %esi, %k1
936; CHECK-NEXT:    vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
937; CHECK-NEXT:    retq
938  %x0scalar = load i32, i32* %x0ptr
939  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
940  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
941  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
942  %2 = bitcast i16 %mask to <16 x i1>
943  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
944  ret <16 x i32> %3
945}
946
947define <16 x i32> @vpternlog_v16i32_012_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
948; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_maskz:
949; CHECK:       ## %bb.0:
950; CHECK-NEXT:    kmovd %esi, %k1
951; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
952; CHECK-NEXT:    retq
953  %x1scalar = load i32, i32* %x1ptr
954  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
955  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
956  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
957  %2 = bitcast i16 %mask to <16 x i1>
958  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
959  ret <16 x i32> %3
960}
961
962define <16 x i32> @vpternlog_v16i32_012_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
963; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_maskz:
964; CHECK:       ## %bb.0:
965; CHECK-NEXT:    kmovd %esi, %k1
966; CHECK-NEXT:    vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
967; CHECK-NEXT:    retq
968  %x2scalar = load i32, i32* %x2ptr
969  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
970  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
971  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
972  %2 = bitcast i16 %mask to <16 x i1>
973  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
974  ret <16 x i32> %3
975}
976
977define <16 x i32> @vpternlog_v16i32_102_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
978; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_maskz:
979; CHECK:       ## %bb.0:
980; CHECK-NEXT:    kmovd %esi, %k1
981; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
982; CHECK-NEXT:    retq
983  %x0scalar = load i32, i32* %x0ptr
984  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
985  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
986  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
987  %2 = bitcast i16 %mask to <16 x i1>
988  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
989  ret <16 x i32> %3
990}
991
992define <16 x i32> @vpternlog_v16i32_102_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
993; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_maskz:
994; CHECK:       ## %bb.0:
995; CHECK-NEXT:    kmovd %esi, %k1
996; CHECK-NEXT:    vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
997; CHECK-NEXT:    retq
998  %x1scalar = load i32, i32* %x1ptr
999  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
1000  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1001  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
1002  %2 = bitcast i16 %mask to <16 x i1>
1003  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
1004  ret <16 x i32> %3
1005}
1006
1007define <16 x i32> @vpternlog_v16i32_102_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
1008; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_maskz:
1009; CHECK:       ## %bb.0:
1010; CHECK-NEXT:    kmovd %esi, %k1
1011; CHECK-NEXT:    vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
1012; CHECK-NEXT:    retq
1013  %x2scalar = load i32, i32* %x2ptr
1014  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
1015  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1016  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
1017  %2 = bitcast i16 %mask to <16 x i1>
1018  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
1019  ret <16 x i32> %3
1020}
1021
1022define <16 x i32> @vpternlog_v16i32_210_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
1023; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_maskz:
1024; CHECK:       ## %bb.0:
1025; CHECK-NEXT:    kmovd %esi, %k1
1026; CHECK-NEXT:    vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
1027; CHECK-NEXT:    retq
1028  %x0scalar = load i32, i32* %x0ptr
1029  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
1030  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1031  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
1032  %2 = bitcast i16 %mask to <16 x i1>
1033  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
1034  ret <16 x i32> %3
1035}
1036
1037define <16 x i32> @vpternlog_v16i32_210_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
1038; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_maskz:
1039; CHECK:       ## %bb.0:
1040; CHECK-NEXT:    kmovd %esi, %k1
1041; CHECK-NEXT:    vpternlogd $92, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
1042; CHECK-NEXT:    retq
1043  %x1scalar = load i32, i32* %x1ptr
1044  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
1045  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1046  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
1047  %2 = bitcast i16 %mask to <16 x i1>
1048  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
1049  ret <16 x i32> %3
1050}
1051
1052define <16 x i32> @vpternlog_v16i32_210_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
1053; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_maskz:
1054; CHECK:       ## %bb.0:
1055; CHECK-NEXT:    kmovd %esi, %k1
1056; CHECK-NEXT:    vpternlogd $58, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
1057; CHECK-NEXT:    retq
1058  %x2scalar = load i32, i32* %x2ptr
1059  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
1060  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1061  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
1062  %2 = bitcast i16 %mask to <16 x i1>
1063  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
1064  ret <16 x i32> %3
1065}
1066
1067define <16 x i32> @vpternlog_v16i32_021_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
1068; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_maskz:
1069; CHECK:       ## %bb.0:
1070; CHECK-NEXT:    kmovd %esi, %k1
1071; CHECK-NEXT:    vpternlogd $58, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
1072; CHECK-NEXT:    retq
1073  %x0scalar = load i32, i32* %x0ptr
1074  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
1075  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1076  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
1077  %2 = bitcast i16 %mask to <16 x i1>
1078  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
1079  ret <16 x i32> %3
1080}
1081
1082define <16 x i32> @vpternlog_v16i32_021_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
1083; CHECK-LABEL: vpternlog_v16i32_021_broadcast1_maskz:
1084; CHECK:       ## %bb.0:
1085; CHECK-NEXT:    kmovd %esi, %k1
1086; CHECK-NEXT:    vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
1087; CHECK-NEXT:    retq
1088  %x1scalar = load i32, i32* %x1ptr
1089  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
1090  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1091  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
1092  %2 = bitcast i16 %mask to <16 x i1>
1093  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
1094  ret <16 x i32> %3
1095}
1096
1097define <16 x i32> @vpternlog_v16i32_021_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
1098; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_maskz:
1099; CHECK:       ## %bb.0:
1100; CHECK-NEXT:    kmovd %esi, %k1
1101; CHECK-NEXT:    vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
1102; CHECK-NEXT:    retq
1103  %x2scalar = load i32, i32* %x2ptr
1104  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
1105  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1106  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
1107  %2 = bitcast i16 %mask to <16 x i1>
1108  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
1109  ret <16 x i32> %3
1110}
1111
1112define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask1(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
1113; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask1:
1114; CHECK:       ## %bb.0:
1115; CHECK-NEXT:    kmovd %esi, %k1
1116; CHECK-NEXT:    vpternlogd $92, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
1117; CHECK-NEXT:    retq
1118  %x0scalar = load i32, i32* %x0ptr
1119  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
1120  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1121  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
1122  %mask.cast = bitcast i16 %mask to <16 x i1>
1123  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
1124  ret <16 x i32> %res2
1125}
1126
1127define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask2(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
1128; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask2:
1129; CHECK:       ## %bb.0:
1130; CHECK-NEXT:    kmovd %esi, %k1
1131; CHECK-NEXT:    vpternlogd $58, (%rdi){1to16}, %zmm0, %zmm1 {%k1}
1132; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
1133; CHECK-NEXT:    retq
1134  %x0scalar = load i32, i32* %x0ptr
1135  %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
1136  %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1137  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
1138  %mask.cast = bitcast i16 %mask to <16 x i1>
1139  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
1140  ret <16 x i32> %res2
1141}
1142
1143define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask2(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
1144; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask2:
1145; CHECK:       ## %bb.0:
1146; CHECK-NEXT:    kmovd %esi, %k1
1147; CHECK-NEXT:    vpternlogd $46, (%rdi){1to16}, %zmm0, %zmm1 {%k1}
1148; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
1149; CHECK-NEXT:    retq
1150  %x1scalar = load i32, i32* %x1ptr
1151  %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
1152  %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1153  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
1154  %mask.cast = bitcast i16 %mask to <16 x i1>
1155  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
1156  ret <16 x i32> %res2
1157}
1158
1159define <16 x i32> @vpternlog_v16i32_012_broadcast2_mask1(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
1160; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_mask1:
1161; CHECK:       ## %bb.0:
1162; CHECK-NEXT:    kmovd %esi, %k1
1163; CHECK-NEXT:    vpternlogd $78, (%rdi){1to16}, %zmm0, %zmm1 {%k1}
1164; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
1165; CHECK-NEXT:    retq
1166  %x2scalar = load i32, i32* %x2ptr
1167  %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
1168  %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1169  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
1170  %mask.cast = bitcast i16 %mask to <16 x i1>
1171  %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
1172  ret <16 x i32> %res2
1173}
1174