• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
3
4declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
5declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
6declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
7declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
8
9declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
10declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
11declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
12declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
13
14define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf)  {
15; CHECK-LABEL: gather_mask_dps:
16; CHECK:       ## BB#0:
17; CHECK-NEXT:    kmovw %edi, %k1
18; CHECK-NEXT:    kmovq %k1, %k2
19; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
20; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
21; CHECK-NEXT:    vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
22; CHECK-NEXT:    retq
23  %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
24  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
25  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
26  ret void
27}
28
29define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
30; CHECK-LABEL: gather_mask_dpd:
31; CHECK:       ## BB#0:
32; CHECK-NEXT:    kmovb %edi, %k1
33; CHECK-NEXT:    kmovq %k1, %k2
34; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
35; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
36; CHECK-NEXT:    vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
37; CHECK-NEXT:    retq
38  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
39  %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
40  call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
41  ret void
42}
43
44define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf)  {
45; CHECK-LABEL: gather_mask_qps:
46; CHECK:       ## BB#0:
47; CHECK-NEXT:    kmovb %edi, %k1
48; CHECK-NEXT:    kmovq %k1, %k2
49; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
50; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
51; CHECK-NEXT:    vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
52; CHECK-NEXT:    retq
53  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
54  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
55  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
56  ret void
57}
58
59define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
60; CHECK-LABEL: gather_mask_qpd:
61; CHECK:       ## BB#0:
62; CHECK-NEXT:    kmovb %edi, %k1
63; CHECK-NEXT:    kmovq %k1, %k2
64; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
65; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
66; CHECK-NEXT:    vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
67; CHECK-NEXT:    retq
68  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
69  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
70  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
71  ret void
72}
73;;
74;; Integer Gather/Scatter
75;;
76declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
77declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
78declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
79declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
80
81declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
82declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
83declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
84declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
85
86define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf)  {
87; CHECK-LABEL: gather_mask_dd:
88; CHECK:       ## BB#0:
89; CHECK-NEXT:    kmovw %edi, %k1
90; CHECK-NEXT:    kmovq %k1, %k2
91; CHECK-NEXT:    vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
92; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
93; CHECK-NEXT:    vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
94; CHECK-NEXT:    retq
95  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
96  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
97  call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
98  ret void
99}
100
101define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf)  {
102; CHECK-LABEL: gather_mask_qd:
103; CHECK:       ## BB#0:
104; CHECK-NEXT:    kmovb %edi, %k1
105; CHECK-NEXT:    kmovq %k1, %k2
106; CHECK-NEXT:    vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
107; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
108; CHECK-NEXT:    vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
109; CHECK-NEXT:    retq
110  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
111  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
112  call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
113  ret void
114}
115
116define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
117; CHECK-LABEL: gather_mask_qq:
118; CHECK:       ## BB#0:
119; CHECK-NEXT:    kmovb %edi, %k1
120; CHECK-NEXT:    kmovq %k1, %k2
121; CHECK-NEXT:    vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
122; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
123; CHECK-NEXT:    vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
124; CHECK-NEXT:    retq
125  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
126  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
127  call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
128  ret void
129}
130
131define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
132; CHECK-LABEL: gather_mask_dq:
133; CHECK:       ## BB#0:
134; CHECK-NEXT:    kmovb %edi, %k1
135; CHECK-NEXT:    kmovq %k1, %k2
136; CHECK-NEXT:    vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
137; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
138; CHECK-NEXT:    vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
139; CHECK-NEXT:    retq
140  %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
141  %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
142  call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
143  ret void
144}
145
146define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
147; CHECK-LABEL: gather_mask_dpd_execdomain:
148; CHECK:       ## BB#0:
149; CHECK-NEXT:    kmovb %edi, %k1
150; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
151; CHECK-NEXT:    vmovapd %zmm1, (%rdx)
152; CHECK-NEXT:    retq
153  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
154  store <8 x double> %x, <8 x double>* %stbuf
155  ret void
156}
157
158define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
159; CHECK-LABEL: gather_mask_qpd_execdomain:
160; CHECK:       ## BB#0:
161; CHECK-NEXT:    kmovb %edi, %k1
162; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
163; CHECK-NEXT:    vmovapd %zmm1, (%rdx)
164; CHECK-NEXT:    retq
165  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
166  store <8 x double> %x, <8 x double>* %stbuf
167  ret void
168}
169
170define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base)  {
171; CHECK-LABEL: gather_mask_dps_execdomain:
172; CHECK:       ## BB#0:
173; CHECK-NEXT:    kmovw %edi, %k1
174; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
175; CHECK-NEXT:    vmovaps %zmm1, %zmm0
176; CHECK-NEXT:    retq
177  %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
178  ret <16 x float> %res;
179}
180
181define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base)  {
182; CHECK-LABEL: gather_mask_qps_execdomain:
183; CHECK:       ## BB#0:
184; CHECK-NEXT:    kmovb %edi, %k1
185; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
186; CHECK-NEXT:    vmovaps %zmm1, %zmm0
187; CHECK-NEXT:    retq
188  %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
189  ret <8 x float> %res;
190}
191
192define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
193; CHECK-LABEL: scatter_mask_dpd_execdomain:
194; CHECK:       ## BB#0:
195; CHECK-NEXT:    kmovb %esi, %k1
196; CHECK-NEXT:    vmovapd (%rdi), %zmm1
197; CHECK-NEXT:    vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
198; CHECK-NEXT:    retq
199  %x = load <8 x double>, <8 x double>* %src, align 64
200  call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
201  ret void
202}
203
204define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
205; CHECK-LABEL: scatter_mask_qpd_execdomain:
206; CHECK:       ## BB#0:
207; CHECK-NEXT:    kmovb %esi, %k1
208; CHECK-NEXT:    vmovapd (%rdi), %zmm1
209; CHECK-NEXT:    vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
210; CHECK-NEXT:    retq
211  %x = load <8 x double>, <8 x double>* %src, align 64
212  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
213  ret void
214}
215
216define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf)  {
217; CHECK-LABEL: scatter_mask_dps_execdomain:
218; CHECK:       ## BB#0:
219; CHECK-NEXT:    kmovw %esi, %k1
220; CHECK-NEXT:    vmovaps (%rdi), %zmm1
221; CHECK-NEXT:    vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
222; CHECK-NEXT:    retq
223  %x = load <16 x float>, <16 x float>* %src, align 64
224  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
225  ret void
226}
227
228define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
229; CHECK-LABEL: scatter_mask_qps_execdomain:
230; CHECK:       ## BB#0:
231; CHECK-NEXT:    kmovb %esi, %k1
232; CHECK-NEXT:    vmovaps (%rdi), %ymm1
233; CHECK-NEXT:    vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
234; CHECK-NEXT:    retq
235  %x = load <8 x float>, <8 x float>* %src, align 32
236  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
237  ret void
238}
239
240define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf)  {
241; CHECK-LABEL: gather_qps:
242; CHECK:       ## BB#0:
243; CHECK-NEXT:    kxnorw %k1, %k1, %k1
244; CHECK-NEXT:    kxnorw %k2, %k2, %k2
245; CHECK-NEXT:    vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
246; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
247; CHECK-NEXT:    vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
248; CHECK-NEXT:    retq
249  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
250  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
251  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
252  ret void
253}
254
255declare  void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
256declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
257define void @prefetch(<8 x i64> %ind, i8* %base) {
258; CHECK-LABEL: prefetch:
259; CHECK:       ## BB#0:
260; CHECK-NEXT:    kxnorw %k1, %k1, %k1
261; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
262; CHECK-NEXT:    vgatherpf1qps (%rdi,%zmm0,4) {%k1}
263; CHECK-NEXT:    vscatterpf0qps (%rdi,%zmm0,2) {%k1}
264; CHECK-NEXT:    vscatterpf1qps (%rdi,%zmm0,2) {%k1}
265; CHECK-NEXT:    retq
266  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
267  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
268  call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
269  call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
270  ret void
271}
272
273
274declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
275
276define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
277; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
278; CHECK:       ## BB#0:
279; CHECK-NEXT:    kmovb %esi, %k1
280; CHECK-NEXT:    vmovaps %zmm0, %zmm2
281; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
282; CHECK-NEXT:    kxnorw %k1, %k1, %k1
283; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1}
284; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
285; CHECK-NEXT:    retq
286  %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
287  %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
288  %res2 = fadd <2 x double> %res, %res1
289  ret <2 x double> %res2
290}
291
292declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
293
294define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
295; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
296; CHECK:       ## BB#0:
297; CHECK-NEXT:    kmovb %esi, %k1
298; CHECK-NEXT:    vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
299; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
300; CHECK-NEXT:    retq
301  %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
302  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
303  %res2 = add <4 x i32> %res, %res1
304  ret <4 x i32> %res2
305}
306
307declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
308
309define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
310; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
311; CHECK:       ## BB#0:
312; CHECK-NEXT:    kmovb %esi, %k1
313; CHECK-NEXT:    vmovaps %zmm0, %zmm2
314; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
315; CHECK-NEXT:    kxnorw %k1, %k1, %k1
316; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1}
317; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
318; CHECK-NEXT:    retq
319  %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
320  %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
321  %res2 = fadd <4 x double> %res, %res1
322  ret <4 x double> %res2
323}
324
325declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
326
327define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
328; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
329; CHECK:       ## BB#0:
330; CHECK-NEXT:    kmovb %esi, %k1
331; CHECK-NEXT:    vmovaps %zmm0, %zmm2
332; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
333; CHECK-NEXT:    kxnorw %k1, %k1, %k1
334; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
335; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
336; CHECK-NEXT:    retq
337  %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
338  %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
339  %res2 = add <8 x i32> %res, %res1
340  ret <8 x i32> %res2
341}
342
343declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
344
345define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
346; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
347; CHECK:       ## BB#0:
348; CHECK-NEXT:    kmovb %esi, %k1
349; CHECK-NEXT:    vmovaps %zmm0, %zmm2
350; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
351; CHECK-NEXT:    kxnorw %k1, %k1, %k1
352; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1}
353; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
354; CHECK-NEXT:    retq
355  %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
356  %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
357  %res2 = fadd <4 x float> %res, %res1
358  ret <4 x float> %res2
359}
360
361declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
362
363define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
364; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
365; CHECK:       ## BB#0:
366; CHECK-NEXT:    kmovb %esi, %k1
367; CHECK-NEXT:    kxnorw %k2, %k2, %k2
368; CHECK-NEXT:    vmovaps %zmm0, %zmm2
369; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
370; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
371; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
372; CHECK-NEXT:    retq
373  %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
374  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
375  %res2 = add <4 x i32> %res, %res1
376  ret <4 x i32> %res2
377}
378
379declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
380
381define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
382; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
383; CHECK:       ## BB#0:
384; CHECK-NEXT:    kmovb %esi, %k1
385; CHECK-NEXT:    vmovaps %zmm0, %zmm2
386; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
387; CHECK-NEXT:    kxnorw %k1, %k1, %k1
388; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1}
389; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
390; CHECK-NEXT:    retq
391  %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
392  %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
393  %res2 = fadd <4 x float> %res, %res1
394  ret <4 x float> %res2
395}
396
397declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
398
399define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
400; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
401; CHECK:       ## BB#0:
402; CHECK-NEXT:    kmovb %esi, %k1
403; CHECK-NEXT:    vmovaps %zmm0, %zmm2
404; CHECK-NEXT:    kmovq %k1, %k2
405; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
406; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
407; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
408; CHECK-NEXT:    retq
409  %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
410  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
411  %res2 = add <4 x i32> %res, %res1
412  ret <4 x i32> %res2
413}
414
415declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
416
417define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
418; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
419; CHECK:       ## BB#0:
420; CHECK-NEXT:    kmovb %esi, %k1
421; CHECK-NEXT:    vmovaps %zmm0, %zmm2
422; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
423; CHECK-NEXT:    kxnorw %k1, %k1, %k1
424; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1}
425; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
426; CHECK-NEXT:    retq
427  %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
428  %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
429  %res2 = fadd <2 x double> %res, %res1
430  ret <2 x double> %res2
431}
432
433declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
434
435define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
436; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
437; CHECK:       ## BB#0:
438; CHECK-NEXT:    kmovb %esi, %k1
439; CHECK-NEXT:    vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
440; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
441; CHECK-NEXT:    retq
442  %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
443  %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
444  %res2 = add <4 x i32> %res, %res1
445  ret <4 x i32> %res2
446}
447
448declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
449
450define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
451; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
452; CHECK:       ## BB#0:
453; CHECK-NEXT:    kmovb %esi, %k1
454; CHECK-NEXT:    vmovaps %zmm0, %zmm2
455; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
456; CHECK-NEXT:    kxnorw %k1, %k1, %k1
457; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1}
458; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
459; CHECK-NEXT:    retq
460  %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
461  %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
462  %res2 = fadd <4 x double> %res, %res1
463  ret <4 x double> %res2
464}
465
466declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
467
468define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
469; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
470; CHECK:       ## BB#0:
471; CHECK-NEXT:    kmovb %esi, %k1
472; CHECK-NEXT:    vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
473; CHECK-NEXT:    vpaddd %ymm0, %ymm0, %ymm0
474; CHECK-NEXT:    retq
475  %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
476  %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
477  %res2 = add <8 x i32> %res, %res1
478  ret <8 x i32> %res2
479}
480
481declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
482
483define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
484; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
485; CHECK:       ## BB#0:
486; CHECK-NEXT:    kmovb %esi, %k1
487; CHECK-NEXT:    vmovaps %zmm0, %zmm2
488; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
489; CHECK-NEXT:    kxnorw %k1, %k1, %k1
490; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1}
491; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
492; CHECK-NEXT:    retq
493  %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
494  %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
495  %res2 = fadd <4 x float> %res, %res1
496  ret <4 x float> %res2
497}
498
499declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
500
501define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
502; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
503; CHECK:       ## BB#0:
504; CHECK-NEXT:    kmovb %esi, %k1
505; CHECK-NEXT:    kxnorw %k2, %k2, %k2
506; CHECK-NEXT:    vmovaps %zmm0, %zmm2
507; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
508; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1}
509; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
510; CHECK-NEXT:    retq
511  %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
512  %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 2)
513  %res2 = add <4 x i32> %res, %res1
514  ret <4 x i32> %res2
515}
516
517declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
518
519define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
520; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
521; CHECK:       ## BB#0:
522; CHECK-NEXT:    kmovb %esi, %k1
523; CHECK-NEXT:    vmovaps %zmm0, %zmm2
524; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
525; CHECK-NEXT:    kxnorw %k1, %k1, %k1
526; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1}
527; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
528; CHECK-NEXT:    retq
529  %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
530  %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 2)
531  %res2 = fadd <8 x float> %res, %res1
532  ret <8 x float> %res2
533}
534
535declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
536
537define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
538; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
539; CHECK:       ## BB#0:
540; CHECK-NEXT:    kmovb %esi, %k1
541; CHECK-NEXT:    vmovaps %zmm0, %zmm2
542; CHECK-NEXT:    kmovq %k1, %k2
543; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
544; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1}
545; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
546; CHECK-NEXT:    retq
547  %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
548  %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 2)
549  %res2 = add <8 x i32> %res, %res1
550  ret <8 x i32> %res2
551}
552
553declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
554
555define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
556; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
557; CHECK:       ## BB#0:
558; CHECK-NEXT:    kmovb %esi, %k1
559; CHECK-NEXT:    kxnorw %k2, %k2, %k2
560; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
561; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
562; CHECK-NEXT:    retq
563  call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
564  call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
565  ret void
566}
567
568declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
569
570define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
571; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
572; CHECK:       ## BB#0:
573; CHECK-NEXT:    kmovb %esi, %k1
574; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
575; CHECK-NEXT:    kxnorw %k1, %k1, %k1
576; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
577; CHECK-NEXT:    retq
578  call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
579  call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
580  ret void
581}
582
583declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
584
585define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
586; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
587; CHECK:       ## BB#0:
588; CHECK-NEXT:    kmovb %esi, %k1
589; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
590; CHECK-NEXT:    kxnorw %k1, %k1, %k1
591; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
592; CHECK-NEXT:    retq
593  call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
594  call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
595  ret void
596}
597
598declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
599
600define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
601; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
602; CHECK:       ## BB#0:
603; CHECK-NEXT:    kmovb %esi, %k1
604; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
605; CHECK-NEXT:    kxnorw %k1, %k1, %k1
606; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
607; CHECK-NEXT:    retq
608  call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
609  call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
610  ret void
611}
612
613declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
614
615define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
616; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
617; CHECK:       ## BB#0:
618; CHECK-NEXT:    kmovb %esi, %k1
619; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
620; CHECK-NEXT:    kxnorw %k1, %k1, %k1
621; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
622; CHECK-NEXT:    retq
623  call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
624  call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
625  ret void
626}
627
628declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
629
630define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
631; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
632; CHECK:       ## BB#0:
633; CHECK-NEXT:    kmovb %esi, %k1
634; CHECK-NEXT:    kxnorw %k2, %k2, %k2
635; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
636; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
637; CHECK-NEXT:    retq
638  call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
639  call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
640  ret void
641}
642
643declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
644
645define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
646; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
647; CHECK:       ## BB#0:
648; CHECK-NEXT:    kmovb %esi, %k1
649; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
650; CHECK-NEXT:    kxnorw %k1, %k1, %k1
651; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
652; CHECK-NEXT:    retq
653  call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
654  call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
655  ret void
656}
657
658declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
659
660define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
661; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
662; CHECK:       ## BB#0:
663; CHECK-NEXT:    kmovb %esi, %k1
664; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
665; CHECK-NEXT:    kxnorw %k1, %k1, %k1
666; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
667; CHECK-NEXT:    retq
668  call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
669  call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
670  ret void
671}
672
673declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
674
675define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
676; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
677; CHECK:       ## BB#0:
678; CHECK-NEXT:    kmovb %esi, %k1
679; CHECK-NEXT:    kxnorw %k2, %k2, %k2
680; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
681; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
682; CHECK-NEXT:    retq
683  call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
684  call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
685  ret void
686}
687
688declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
689
690define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
691; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
692; CHECK:       ## BB#0:
693; CHECK-NEXT:    kmovb %esi, %k1
694; CHECK-NEXT:    kxnorw %k2, %k2, %k2
695; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
696; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
697; CHECK-NEXT:    retq
698  call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
699  call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
700  ret void
701}
702
703declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
704
705define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
706; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
707; CHECK:       ## BB#0:
708; CHECK-NEXT:    kmovb %esi, %k1
709; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
710; CHECK-NEXT:    kxnorw %k1, %k1, %k1
711; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
712; CHECK-NEXT:    retq
713  call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
714  call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
715  ret void
716}
717
718declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
719
720define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
721; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
722; CHECK:       ## BB#0:
723; CHECK-NEXT:    kmovb %esi, %k1
724; CHECK-NEXT:    kxnorw %k2, %k2, %k2
725; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
726; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
727; CHECK-NEXT:    retq
728  call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
729  call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
730  ret void
731}
732
733declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
734
735define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
736; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
737; CHECK:       ## BB#0:
738; CHECK-NEXT:    kmovb %esi, %k1
739; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
740; CHECK-NEXT:    kxnorw %k1, %k1, %k1
741; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
742; CHECK-NEXT:    retq
743  call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
744  call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
745  ret void
746}
747
748declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
749
750define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
751; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
752; CHECK:       ## BB#0:
753; CHECK-NEXT:    kmovb %esi, %k1
754; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
755; CHECK-NEXT:    kxnorw %k1, %k1, %k1
756; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
757; CHECK-NEXT:    retq
758  call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
759  call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
760  ret void
761}
762
763declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
764
765define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
766; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
767; CHECK:       ## BB#0:
768; CHECK-NEXT:    kmovb %esi, %k1
769; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
770; CHECK-NEXT:    kxnorw %k1, %k1, %k1
771; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
772; CHECK-NEXT:    retq
773  call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
774  call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
775  ret void
776}
777
778declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
779
780define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
781; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
782; CHECK:       ## BB#0:
783; CHECK-NEXT:    kmovb %esi, %k1
784; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
785; CHECK-NEXT:    kxnorw %k1, %k1, %k1
786; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
787; CHECK-NEXT:    retq
788  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
789  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
790  ret void
791}
792
793