• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
3
4declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
5declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
6declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
7declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
8
9declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
10declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
11declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
12declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
13
14define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf)  {
15; CHECK-LABEL: gather_mask_dps:
16; CHECK:       ## BB#0:
17; CHECK-NEXT:    kmovw %edi, %k1
18; CHECK-NEXT:    kmovq %k1, %k2
19; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
20; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
21; CHECK-NEXT:    vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
22; CHECK-NEXT:    retq
23  %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
24  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
25  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
26  ret void
27}
28
29define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
30; CHECK-LABEL: gather_mask_dpd:
31; CHECK:       ## BB#0:
32; CHECK-NEXT:    kmovb %edi, %k1
33; CHECK-NEXT:    kmovq %k1, %k2
34; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
35; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
36; CHECK-NEXT:    vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
37; CHECK-NEXT:    retq
38  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
39  %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
40  call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
41  ret void
42}
43
44define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf)  {
45; CHECK-LABEL: gather_mask_qps:
46; CHECK:       ## BB#0:
47; CHECK-NEXT:    kmovb %edi, %k1
48; CHECK-NEXT:    kmovq %k1, %k2
49; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
50; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
51; CHECK-NEXT:    vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
52; CHECK-NEXT:    retq
53  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
54  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
55  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
56  ret void
57}
58
59define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
60; CHECK-LABEL: gather_mask_qpd:
61; CHECK:       ## BB#0:
62; CHECK-NEXT:    kmovb %edi, %k1
63; CHECK-NEXT:    kmovq %k1, %k2
64; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
65; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
66; CHECK-NEXT:    vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
67; CHECK-NEXT:    retq
68  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
69  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
70  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
71  ret void
72}
73;;
74;; Integer Gather/Scatter
75;;
76declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
77declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
78declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
79declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
80
81declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
82declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
83declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
84declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
85
86define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf)  {
87; CHECK-LABEL: gather_mask_dd:
88; CHECK:       ## BB#0:
89; CHECK-NEXT:    kmovw %edi, %k1
90; CHECK-NEXT:    kmovq %k1, %k2
91; CHECK-NEXT:    vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
92; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
93; CHECK-NEXT:    vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
94; CHECK-NEXT:    retq
95  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
96  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
97  call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
98  ret void
99}
100
101define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf)  {
102; CHECK-LABEL: gather_mask_qd:
103; CHECK:       ## BB#0:
104; CHECK-NEXT:    kmovb %edi, %k1
105; CHECK-NEXT:    kmovq %k1, %k2
106; CHECK-NEXT:    vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
107; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
108; CHECK-NEXT:    vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
109; CHECK-NEXT:    retq
110  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
111  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
112  call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
113  ret void
114}
115
116define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
117; CHECK-LABEL: gather_mask_qq:
118; CHECK:       ## BB#0:
119; CHECK-NEXT:    kmovb %edi, %k1
120; CHECK-NEXT:    kmovq %k1, %k2
121; CHECK-NEXT:    vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
122; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
123; CHECK-NEXT:    vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
124; CHECK-NEXT:    retq
125  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
126  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
127  call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
128  ret void
129}
130
131define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
132; CHECK-LABEL: gather_mask_dq:
133; CHECK:       ## BB#0:
134; CHECK-NEXT:    kmovb %edi, %k1
135; CHECK-NEXT:    kmovq %k1, %k2
136; CHECK-NEXT:    vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
137; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
138; CHECK-NEXT:    vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
139; CHECK-NEXT:    retq
140  %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
141  %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
142  call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
143  ret void
144}
145
146define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
147; CHECK-LABEL: gather_mask_dpd_execdomain:
148; CHECK:       ## BB#0:
149; CHECK-NEXT:    kmovb %edi, %k1
150; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
151; CHECK-NEXT:    vmovapd %zmm1, (%rdx)
152; CHECK-NEXT:    retq
153  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
154  store <8 x double> %x, <8 x double>* %stbuf
155  ret void
156}
157
158define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
159; CHECK-LABEL: gather_mask_qpd_execdomain:
160; CHECK:       ## BB#0:
161; CHECK-NEXT:    kmovb %edi, %k1
162; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
163; CHECK-NEXT:    vmovapd %zmm1, (%rdx)
164; CHECK-NEXT:    retq
165  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
166  store <8 x double> %x, <8 x double>* %stbuf
167  ret void
168}
169
170define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base)  {
171; CHECK-LABEL: gather_mask_dps_execdomain:
172; CHECK:       ## BB#0:
173; CHECK-NEXT:    kmovw %edi, %k1
174; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
175; CHECK-NEXT:    vmovaps %zmm1, %zmm0
176; CHECK-NEXT:    retq
177  %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
178  ret <16 x float> %res;
179}
180
181define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base)  {
182; CHECK-LABEL: gather_mask_qps_execdomain:
183; CHECK:       ## BB#0:
184; CHECK-NEXT:    kmovb %edi, %k1
185; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
186; CHECK-NEXT:    vmovaps %zmm1, %zmm0
187; CHECK-NEXT:    retq
188  %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
189  ret <8 x float> %res;
190}
191
192define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
193; CHECK-LABEL: scatter_mask_dpd_execdomain:
194; CHECK:       ## BB#0:
195; CHECK-NEXT:    kmovb %esi, %k1
196; CHECK-NEXT:    vmovapd (%rdi), %zmm1
197; CHECK-NEXT:    vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
198; CHECK-NEXT:    retq
199  %x = load <8 x double>, <8 x double>* %src, align 64
200  call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
201  ret void
202}
203
204define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
205; CHECK-LABEL: scatter_mask_qpd_execdomain:
206; CHECK:       ## BB#0:
207; CHECK-NEXT:    kmovb %esi, %k1
208; CHECK-NEXT:    vmovapd (%rdi), %zmm1
209; CHECK-NEXT:    vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
210; CHECK-NEXT:    retq
211  %x = load <8 x double>, <8 x double>* %src, align 64
212  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
213  ret void
214}
215
216define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf)  {
217; CHECK-LABEL: scatter_mask_dps_execdomain:
218; CHECK:       ## BB#0:
219; CHECK-NEXT:    kmovw %esi, %k1
220; CHECK-NEXT:    vmovaps (%rdi), %zmm1
221; CHECK-NEXT:    vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
222; CHECK-NEXT:    retq
223  %x = load <16 x float>, <16 x float>* %src, align 64
224  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
225  ret void
226}
227
228define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
229; CHECK-LABEL: scatter_mask_qps_execdomain:
230; CHECK:       ## BB#0:
231; CHECK-NEXT:    kmovb %esi, %k1
232; CHECK-NEXT:    vmovaps (%rdi), %ymm1
233; CHECK-NEXT:    vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
234; CHECK-NEXT:    retq
235  %x = load <8 x float>, <8 x float>* %src, align 32
236  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
237  ret void
238}
239
240define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf)  {
241; CHECK-LABEL: gather_qps:
242; CHECK:       ## BB#0:
243; CHECK-NEXT:    kxnorw %k0, %k0, %k1
244; CHECK-NEXT:    kxnorw %k0, %k0, %k2
245; CHECK-NEXT:    vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
246; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
247; CHECK-NEXT:    vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
248; CHECK-NEXT:    retq
249  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
250  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
251  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
252  ret void
253}
254
255declare  void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
256declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
257define void @prefetch(<8 x i64> %ind, i8* %base) {
258; CHECK-LABEL: prefetch:
259; CHECK:       ## BB#0:
260; CHECK-NEXT:    kxnorw %k0, %k0, %k1
261; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
262; CHECK-NEXT:    kxorw %k0, %k0, %k1
263; CHECK-NEXT:    vgatherpf1qps (%rdi,%zmm0,4) {%k1}
264; CHECK-NEXT:    movb $1, %al
265; CHECK-NEXT:    kmovb %eax, %k1
266; CHECK-NEXT:    vscatterpf0qps (%rdi,%zmm0,2) {%k1}
267; CHECK-NEXT:    movb $120, %al
268; CHECK-NEXT:    kmovb %eax, %k1
269; CHECK-NEXT:    vscatterpf1qps (%rdi,%zmm0,2) {%k1}
270; CHECK-NEXT:    retq
271  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
272  call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, i8* %base, i32 4, i32 1)
273  call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
274  call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, i8* %base, i32 2, i32 1)
275  ret void
276}
277
278declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
279
280define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
281; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
282; CHECK:       ## BB#0:
283; CHECK-NEXT:    kmovb %esi, %k1
284; CHECK-NEXT:    vmovaps %zmm0, %zmm2
285; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
286; CHECK-NEXT:    kxnorw %k0, %k0, %k1
287; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1}
288; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
289; CHECK-NEXT:    retq
290  %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
291  %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
292  %res2 = fadd <2 x double> %res, %res1
293  ret <2 x double> %res2
294}
295
296declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
297
298define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
299; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
300; CHECK:       ## BB#0:
301; CHECK-NEXT:    kmovb %esi, %k1
302; CHECK-NEXT:    vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
303; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
304; CHECK-NEXT:    retq
305  %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
306  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
307  %res2 = add <4 x i32> %res, %res1
308  ret <4 x i32> %res2
309}
310
311declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
312
313define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
314; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
315; CHECK:       ## BB#0:
316; CHECK-NEXT:    kmovb %esi, %k1
317; CHECK-NEXT:    vmovaps %zmm0, %zmm2
318; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
319; CHECK-NEXT:    kxnorw %k0, %k0, %k1
320; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1}
321; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
322; CHECK-NEXT:    retq
323  %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
324  %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
325  %res2 = fadd <4 x double> %res, %res1
326  ret <4 x double> %res2
327}
328
329declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
330
331define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
332; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
333; CHECK:       ## BB#0:
334; CHECK-NEXT:    kmovb %esi, %k1
335; CHECK-NEXT:    vmovaps %zmm0, %zmm2
336; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
337; CHECK-NEXT:    kxnorw %k0, %k0, %k1
338; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
339; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
340; CHECK-NEXT:    retq
341  %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
342  %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
343  %res2 = add <8 x i32> %res, %res1
344  ret <8 x i32> %res2
345}
346
347declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
348
349define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
350; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
351; CHECK:       ## BB#0:
352; CHECK-NEXT:    kmovb %esi, %k1
353; CHECK-NEXT:    vmovaps %zmm0, %zmm2
354; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
355; CHECK-NEXT:    kxnorw %k0, %k0, %k1
356; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1}
357; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
358; CHECK-NEXT:    retq
359  %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
360  %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
361  %res2 = fadd <4 x float> %res, %res1
362  ret <4 x float> %res2
363}
364
365declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
366
367define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
368; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
369; CHECK:       ## BB#0:
370; CHECK-NEXT:    kmovb %esi, %k1
371; CHECK-NEXT:    kxnorw %k0, %k0, %k2
372; CHECK-NEXT:    vmovaps %zmm0, %zmm2
373; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
374; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
375; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
376; CHECK-NEXT:    retq
377  %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
378  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
379  %res2 = add <4 x i32> %res, %res1
380  ret <4 x i32> %res2
381}
382
383declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
384
385define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
386; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
387; CHECK:       ## BB#0:
388; CHECK-NEXT:    kmovb %esi, %k1
389; CHECK-NEXT:    vmovaps %zmm0, %zmm2
390; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
391; CHECK-NEXT:    kxnorw %k0, %k0, %k1
392; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1}
393; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
394; CHECK-NEXT:    retq
395  %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
396  %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
397  %res2 = fadd <4 x float> %res, %res1
398  ret <4 x float> %res2
399}
400
401declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
402
403define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
404; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
405; CHECK:       ## BB#0:
406; CHECK-NEXT:    kmovb %esi, %k1
407; CHECK-NEXT:    vmovaps %zmm0, %zmm2
408; CHECK-NEXT:    kmovq %k1, %k2
409; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
410; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
411; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
412; CHECK-NEXT:    retq
413  %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
414  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
415  %res2 = add <4 x i32> %res, %res1
416  ret <4 x i32> %res2
417}
418
419declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
420
421define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
422; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
423; CHECK:       ## BB#0:
424; CHECK-NEXT:    kmovb %esi, %k1
425; CHECK-NEXT:    vmovaps %zmm0, %zmm2
426; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
427; CHECK-NEXT:    kxnorw %k0, %k0, %k1
428; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1}
429; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
430; CHECK-NEXT:    retq
431  %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
432  %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
433  %res2 = fadd <2 x double> %res, %res1
434  ret <2 x double> %res2
435}
436
437declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
438
439define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
440; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
441; CHECK:       ## BB#0:
442; CHECK-NEXT:    kmovb %esi, %k1
443; CHECK-NEXT:    vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
444; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
445; CHECK-NEXT:    retq
446  %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
447  %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
448  %res2 = add <4 x i32> %res, %res1
449  ret <4 x i32> %res2
450}
451
452declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
453
454define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
455; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
456; CHECK:       ## BB#0:
457; CHECK-NEXT:    kmovb %esi, %k1
458; CHECK-NEXT:    vmovaps %zmm0, %zmm2
459; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
460; CHECK-NEXT:    kxnorw %k0, %k0, %k1
461; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1}
462; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
463; CHECK-NEXT:    retq
464  %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
465  %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
466  %res2 = fadd <4 x double> %res, %res1
467  ret <4 x double> %res2
468}
469
470declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
471
472define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
473; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
474; CHECK:       ## BB#0:
475; CHECK-NEXT:    kmovb %esi, %k1
476; CHECK-NEXT:    vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
477; CHECK-NEXT:    vpaddd %ymm0, %ymm0, %ymm0
478; CHECK-NEXT:    retq
479  %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
480  %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
481  %res2 = add <8 x i32> %res, %res1
482  ret <8 x i32> %res2
483}
484
485declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
486
487define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
488; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
489; CHECK:       ## BB#0:
490; CHECK-NEXT:    kmovb %esi, %k1
491; CHECK-NEXT:    vmovaps %zmm0, %zmm2
492; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
493; CHECK-NEXT:    kxnorw %k0, %k0, %k1
494; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1}
495; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
496; CHECK-NEXT:    retq
497  %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
498  %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
499  %res2 = fadd <4 x float> %res, %res1
500  ret <4 x float> %res2
501}
502
503declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
504
505define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
506; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
507; CHECK:       ## BB#0:
508; CHECK-NEXT:    kmovb %esi, %k1
509; CHECK-NEXT:    kxnorw %k0, %k0, %k2
510; CHECK-NEXT:    vmovaps %zmm0, %zmm2
511; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
512; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1}
513; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
514; CHECK-NEXT:    retq
515  %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
516  %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 2)
517  %res2 = add <4 x i32> %res, %res1
518  ret <4 x i32> %res2
519}
520
521declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
522
523define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
524; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
525; CHECK:       ## BB#0:
526; CHECK-NEXT:    kmovb %esi, %k1
527; CHECK-NEXT:    vmovaps %zmm0, %zmm2
528; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
529; CHECK-NEXT:    kxnorw %k0, %k0, %k1
530; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1}
531; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
532; CHECK-NEXT:    retq
533  %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
534  %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 2)
535  %res2 = fadd <8 x float> %res, %res1
536  ret <8 x float> %res2
537}
538
539declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
540
541define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
542; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
543; CHECK:       ## BB#0:
544; CHECK-NEXT:    kmovb %esi, %k1
545; CHECK-NEXT:    vmovaps %zmm0, %zmm2
546; CHECK-NEXT:    kmovq %k1, %k2
547; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
548; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1}
549; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
550; CHECK-NEXT:    retq
551  %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
552  %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 2)
553  %res2 = add <8 x i32> %res, %res1
554  ret <8 x i32> %res2
555}
556
557declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
558
559define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
560; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
561; CHECK:       ## BB#0:
562; CHECK-NEXT:    kmovb %esi, %k1
563; CHECK-NEXT:    kxnorw %k0, %k0, %k2
564; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
565; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
566; CHECK-NEXT:    retq
567  call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
568  call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
569  ret void
570}
571
572declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
573
574define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
575; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
576; CHECK:       ## BB#0:
577; CHECK-NEXT:    kmovb %esi, %k1
578; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
579; CHECK-NEXT:    kxnorw %k0, %k0, %k1
580; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
581; CHECK-NEXT:    retq
582  call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
583  call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
584  ret void
585}
586
587declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
588
589define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
590; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
591; CHECK:       ## BB#0:
592; CHECK-NEXT:    kmovb %esi, %k1
593; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
594; CHECK-NEXT:    kxnorw %k0, %k0, %k1
595; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
596; CHECK-NEXT:    retq
597  call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
598  call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
599  ret void
600}
601
602declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
603
604define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
605; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
606; CHECK:       ## BB#0:
607; CHECK-NEXT:    kmovb %esi, %k1
608; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
609; CHECK-NEXT:    kxnorw %k0, %k0, %k1
610; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
611; CHECK-NEXT:    retq
612  call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
613  call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
614  ret void
615}
616
617declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
618
619define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
620; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
621; CHECK:       ## BB#0:
622; CHECK-NEXT:    kmovb %esi, %k1
623; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
624; CHECK-NEXT:    kxnorw %k0, %k0, %k1
625; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
626; CHECK-NEXT:    retq
627  call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
628  call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
629  ret void
630}
631
632declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
633
634define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
635; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
636; CHECK:       ## BB#0:
637; CHECK-NEXT:    kmovb %esi, %k1
638; CHECK-NEXT:    kxnorw %k0, %k0, %k2
639; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
640; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
641; CHECK-NEXT:    retq
642  call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
643  call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
644  ret void
645}
646
647declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
648
649define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
650; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
651; CHECK:       ## BB#0:
652; CHECK-NEXT:    kmovb %esi, %k1
653; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
654; CHECK-NEXT:    kxnorw %k0, %k0, %k1
655; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
656; CHECK-NEXT:    retq
657  call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
658  call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
659  ret void
660}
661
662declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
663
664define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
665; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
666; CHECK:       ## BB#0:
667; CHECK-NEXT:    kmovb %esi, %k1
668; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
669; CHECK-NEXT:    kxnorw %k0, %k0, %k1
670; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
671; CHECK-NEXT:    retq
672  call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
673  call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
674  ret void
675}
676
677declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
678
679define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
680; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
681; CHECK:       ## BB#0:
682; CHECK-NEXT:    kmovb %esi, %k1
683; CHECK-NEXT:    kxnorw %k0, %k0, %k2
684; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
685; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
686; CHECK-NEXT:    retq
687  call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
688  call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
689  ret void
690}
691
692declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
693
694define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
695; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
696; CHECK:       ## BB#0:
697; CHECK-NEXT:    kmovb %esi, %k1
698; CHECK-NEXT:    kxnorw %k0, %k0, %k2
699; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
700; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
701; CHECK-NEXT:    retq
702  call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
703  call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
704  ret void
705}
706
707declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
708
709define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
710; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
711; CHECK:       ## BB#0:
712; CHECK-NEXT:    kmovb %esi, %k1
713; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
714; CHECK-NEXT:    kxnorw %k0, %k0, %k1
715; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
716; CHECK-NEXT:    retq
717  call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
718  call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
719  ret void
720}
721
722declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
723
724define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
725; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
726; CHECK:       ## BB#0:
727; CHECK-NEXT:    kmovb %esi, %k1
728; CHECK-NEXT:    kxnorw %k0, %k0, %k2
729; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
730; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
731; CHECK-NEXT:    retq
732  call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
733  call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
734  ret void
735}
736
737declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
738
739define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
740; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
741; CHECK:       ## BB#0:
742; CHECK-NEXT:    kmovb %esi, %k1
743; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
744; CHECK-NEXT:    kxnorw %k0, %k0, %k1
745; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
746; CHECK-NEXT:    retq
747  call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
748  call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
749  ret void
750}
751
752declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
753
754define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
755; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
756; CHECK:       ## BB#0:
757; CHECK-NEXT:    kmovb %esi, %k1
758; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
759; CHECK-NEXT:    kxnorw %k0, %k0, %k1
760; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
761; CHECK-NEXT:    retq
762  call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
763  call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
764  ret void
765}
766
767declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
768
769define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
770; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
771; CHECK:       ## BB#0:
772; CHECK-NEXT:    kmovb %esi, %k1
773; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
774; CHECK-NEXT:    kxnorw %k0, %k0, %k1
775; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
776; CHECK-NEXT:    retq
777  call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
778  call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
779  ret void
780}
781
782declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
783
784define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
785; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
786; CHECK:       ## BB#0:
787; CHECK-NEXT:    kmovb %esi, %k1
788; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
789; CHECK-NEXT:    kxnorw %k0, %k0, %k1
790; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
791; CHECK-NEXT:    retq
792  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
793  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
794  ret void
795}
796
797define void @scatter_mask_test(i8* %x0, <8 x i32> %x2, <8 x i32> %x3) {
798; CHECK-LABEL: scatter_mask_test:
799; CHECK:       ## BB#0:
800; CHECK-NEXT:    kxnorw %k0, %k0, %k1
801; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
802; CHECK-NEXT:    kxorw %k0, %k0, %k1
803; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
804; CHECK-NEXT:    movb $1, %al
805; CHECK-NEXT:    kmovb %eax, %k1
806; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
807; CHECK-NEXT:    movb $96, %al
808; CHECK-NEXT:    kmovb %eax, %k1
809; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
810; CHECK-NEXT:    retq
811  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
812  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 0, <8 x i32> %x2, <8 x i32> %x3, i32 4)
813  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
814  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 96, <8 x i32> %x2, <8 x i32> %x3, i32 4)
815  ret void
816}
817
818define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, i8* %base)  {
819; CHECK-LABEL: gather_mask_test:
820; CHECK:       ## BB#0:
821; CHECK-NEXT:    kxnorw %k0, %k0, %k1
822; CHECK-NEXT:    vmovaps %zmm1, %zmm2
823; CHECK-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1}
824; CHECK-NEXT:    kxorw %k0, %k0, %k1
825; CHECK-NEXT:    vmovaps %zmm1, %zmm3
826; CHECK-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1}
827; CHECK-NEXT:    movw $1, %ax
828; CHECK-NEXT:    kmovw %eax, %k1
829; CHECK-NEXT:    vmovaps %zmm1, %zmm4
830; CHECK-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm4 {%k1}
831; CHECK-NEXT:    movw $220, %ax
832; CHECK-NEXT:    kmovw %eax, %k1
833; CHECK-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
834; CHECK-NEXT:    vaddps %zmm3, %zmm2, %zmm0
835; CHECK-NEXT:    vaddps %zmm4, %zmm1, %zmm1
836; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
837; CHECK-NEXT:    retq
838  %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 -1, i32 4)
839  %res1 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 0, i32 4)
840  %res2 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 1, i32 4)
841  %res3 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 220, i32 4)
842
843  %res4 = fadd <16 x float> %res, %res1
844  %res5 = fadd <16 x float> %res3, %res2
845  %res6 = fadd <16 x float> %res5, %res4
846  ret <16 x float> %res6
847}
848