• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening | FileCheck %s
3
4declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8)
5
6define <4 x float> @test_llvm_x86_avx2_gather_d_ps(i8* %b, <4 x i32> %iv, <4 x float> %mask) #0 {
7; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps:
8; CHECK:       # %bb.0: # %entry
9; CHECK-NEXT:    movq %rsp, %rax
10; CHECK-NEXT:    movq $-1, %rcx
11; CHECK-NEXT:    sarq $63, %rax
12; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
13; CHECK-NEXT:    orq %rax, %rdi
14; CHECK-NEXT:    vmovq %rax, %xmm3
15; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
16; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
17; CHECK-NEXT:    vgatherdps %xmm1, (%rdi,%xmm0), %xmm2
18; CHECK-NEXT:    shlq $47, %rax
19; CHECK-NEXT:    vmovaps %xmm2, %xmm0
20; CHECK-NEXT:    orq %rax, %rsp
21; CHECK-NEXT:    retq
22entry:
23  %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x float> %mask, i8 1)
24  ret <4 x float> %v
25}
26
27declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8)
28
29define <4 x float> @test_llvm_x86_avx2_gather_q_ps(i8* %b, <2 x i64> %iv, <4 x float> %mask) #0 {
30; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps:
31; CHECK:       # %bb.0: # %entry
32; CHECK-NEXT:    movq %rsp, %rax
33; CHECK-NEXT:    movq $-1, %rcx
34; CHECK-NEXT:    sarq $63, %rax
35; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
36; CHECK-NEXT:    orq %rax, %rdi
37; CHECK-NEXT:    vmovq %rax, %xmm3
38; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
39; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
40; CHECK-NEXT:    vgatherqps %xmm1, (%rdi,%xmm0), %xmm2
41; CHECK-NEXT:    shlq $47, %rax
42; CHECK-NEXT:    vmovaps %xmm2, %xmm0
43; CHECK-NEXT:    orq %rax, %rsp
44; CHECK-NEXT:    retq
45entry:
46  %v = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x float> %mask, i8 1)
47  ret <4 x float> %v
48}
49
50declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8)
51
52define <2 x double> @test_llvm_x86_avx2_gather_d_pd(i8* %b, <4 x i32> %iv, <2 x double> %mask) #0 {
53; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd:
54; CHECK:       # %bb.0: # %entry
55; CHECK-NEXT:    movq %rsp, %rax
56; CHECK-NEXT:    movq $-1, %rcx
57; CHECK-NEXT:    sarq $63, %rax
58; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
59; CHECK-NEXT:    orq %rax, %rdi
60; CHECK-NEXT:    vmovq %rax, %xmm3
61; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
62; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
63; CHECK-NEXT:    vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2
64; CHECK-NEXT:    shlq $47, %rax
65; CHECK-NEXT:    vmovapd %xmm2, %xmm0
66; CHECK-NEXT:    orq %rax, %rsp
67; CHECK-NEXT:    retq
68entry:
69  %v = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x double> %mask, i8 1)
70  ret <2 x double> %v
71}
72
73declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8)
74
75define <2 x double> @test_llvm_x86_avx2_gather_q_pd(i8* %b, <2 x i64> %iv, <2 x double> %mask) #0 {
76; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd:
77; CHECK:       # %bb.0: # %entry
78; CHECK-NEXT:    movq %rsp, %rax
79; CHECK-NEXT:    movq $-1, %rcx
80; CHECK-NEXT:    sarq $63, %rax
81; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
82; CHECK-NEXT:    orq %rax, %rdi
83; CHECK-NEXT:    vmovq %rax, %xmm3
84; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
85; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
86; CHECK-NEXT:    vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2
87; CHECK-NEXT:    shlq $47, %rax
88; CHECK-NEXT:    vmovapd %xmm2, %xmm0
89; CHECK-NEXT:    orq %rax, %rsp
90; CHECK-NEXT:    retq
91entry:
92  %v = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x double> %mask, i8 1)
93  ret <2 x double> %v
94}
95
96declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8)
97
98define <8 x float> @test_llvm_x86_avx2_gather_d_ps_256(i8* %b, <8 x i32> %iv, <8 x float> %mask) #0 {
99; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256:
100; CHECK:       # %bb.0: # %entry
101; CHECK-NEXT:    movq %rsp, %rax
102; CHECK-NEXT:    movq $-1, %rcx
103; CHECK-NEXT:    sarq $63, %rax
104; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
105; CHECK-NEXT:    orq %rax, %rdi
106; CHECK-NEXT:    vmovq %rax, %xmm3
107; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
108; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
109; CHECK-NEXT:    vgatherdps %ymm1, (%rdi,%ymm0), %ymm2
110; CHECK-NEXT:    shlq $47, %rax
111; CHECK-NEXT:    vmovaps %ymm2, %ymm0
112; CHECK-NEXT:    orq %rax, %rsp
113; CHECK-NEXT:    retq
114entry:
115  %v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x float> %mask, i8 1)
116  ret <8 x float> %v
117}
118
119declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8)
120
121define <4 x float> @test_llvm_x86_avx2_gather_q_ps_256(i8* %b, <4 x i64> %iv, <4 x float> %mask) #0 {
122; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256:
123; CHECK:       # %bb.0: # %entry
124; CHECK-NEXT:    movq %rsp, %rax
125; CHECK-NEXT:    movq $-1, %rcx
126; CHECK-NEXT:    sarq $63, %rax
127; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
128; CHECK-NEXT:    orq %rax, %rdi
129; CHECK-NEXT:    vmovq %rax, %xmm3
130; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
131; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
132; CHECK-NEXT:    vgatherqps %xmm1, (%rdi,%ymm0), %xmm2
133; CHECK-NEXT:    shlq $47, %rax
134; CHECK-NEXT:    vmovaps %xmm2, %xmm0
135; CHECK-NEXT:    orq %rax, %rsp
136; CHECK-NEXT:    vzeroupper
137; CHECK-NEXT:    retq
138entry:
139  %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x float> %mask, i8 1)
140  ret <4 x float> %v
141}
142
143declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8)
144
145define <4 x double> @test_llvm_x86_avx2_gather_d_pd_256(i8* %b, <4 x i32> %iv, <4 x double> %mask) #0 {
146; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256:
147; CHECK:       # %bb.0: # %entry
148; CHECK-NEXT:    movq %rsp, %rax
149; CHECK-NEXT:    movq $-1, %rcx
150; CHECK-NEXT:    sarq $63, %rax
151; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
152; CHECK-NEXT:    orq %rax, %rdi
153; CHECK-NEXT:    vmovq %rax, %xmm3
154; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
155; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
156; CHECK-NEXT:    vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2
157; CHECK-NEXT:    shlq $47, %rax
158; CHECK-NEXT:    vmovapd %ymm2, %ymm0
159; CHECK-NEXT:    orq %rax, %rsp
160; CHECK-NEXT:    retq
161entry:
162  %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x double> %mask, i8 1)
163  ret <4 x double> %v
164}
165
166declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8)
167
168define <4 x double> @test_llvm_x86_avx2_gather_q_pd_256(i8* %b, <4 x i64> %iv, <4 x double> %mask) #0 {
169; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256:
170; CHECK:       # %bb.0: # %entry
171; CHECK-NEXT:    movq %rsp, %rax
172; CHECK-NEXT:    movq $-1, %rcx
173; CHECK-NEXT:    sarq $63, %rax
174; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
175; CHECK-NEXT:    orq %rax, %rdi
176; CHECK-NEXT:    vmovq %rax, %xmm3
177; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
178; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
179; CHECK-NEXT:    vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2
180; CHECK-NEXT:    shlq $47, %rax
181; CHECK-NEXT:    vmovapd %ymm2, %ymm0
182; CHECK-NEXT:    orq %rax, %rsp
183; CHECK-NEXT:    retq
184entry:
185  %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x double> %mask, i8 1)
186  ret <4 x double> %v
187}
188
189declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8)
190
191define <4 x i32> @test_llvm_x86_avx2_gather_d_d(i8* %b, <4 x i32> %iv, <4 x i32> %mask) #0 {
192; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d:
193; CHECK:       # %bb.0: # %entry
194; CHECK-NEXT:    movq %rsp, %rax
195; CHECK-NEXT:    movq $-1, %rcx
196; CHECK-NEXT:    sarq $63, %rax
197; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
198; CHECK-NEXT:    orq %rax, %rdi
199; CHECK-NEXT:    vmovq %rax, %xmm3
200; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
201; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
202; CHECK-NEXT:    vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2
203; CHECK-NEXT:    shlq $47, %rax
204; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
205; CHECK-NEXT:    orq %rax, %rsp
206; CHECK-NEXT:    retq
207entry:
208  %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i32> %mask, i8 1)
209  ret <4 x i32> %v
210}
211
212declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8)
213
214define <4 x i32> @test_llvm_x86_avx2_gather_q_d(i8* %b, <2 x i64> %iv, <4 x i32> %mask) #0 {
215; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d:
216; CHECK:       # %bb.0: # %entry
217; CHECK-NEXT:    movq %rsp, %rax
218; CHECK-NEXT:    movq $-1, %rcx
219; CHECK-NEXT:    sarq $63, %rax
220; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
221; CHECK-NEXT:    orq %rax, %rdi
222; CHECK-NEXT:    vmovq %rax, %xmm3
223; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
224; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
225; CHECK-NEXT:    vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2
226; CHECK-NEXT:    shlq $47, %rax
227; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
228; CHECK-NEXT:    orq %rax, %rsp
229; CHECK-NEXT:    retq
230entry:
231  %v = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x i32> %mask, i8 1)
232  ret <4 x i32> %v
233}
234
235declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8)
236
237define <2 x i64> @test_llvm_x86_avx2_gather_d_q(i8* %b, <4 x i32> %iv, <2 x i64> %mask) #0 {
238; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q:
239; CHECK:       # %bb.0: # %entry
240; CHECK-NEXT:    movq %rsp, %rax
241; CHECK-NEXT:    movq $-1, %rcx
242; CHECK-NEXT:    sarq $63, %rax
243; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
244; CHECK-NEXT:    orq %rax, %rdi
245; CHECK-NEXT:    vmovq %rax, %xmm3
246; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
247; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
248; CHECK-NEXT:    vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2
249; CHECK-NEXT:    shlq $47, %rax
250; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
251; CHECK-NEXT:    orq %rax, %rsp
252; CHECK-NEXT:    retq
253entry:
254  %v = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x i64> %mask, i8 1)
255  ret <2 x i64> %v
256}
257
258declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8)
259
260define <2 x i64> @test_llvm_x86_avx2_gather_q_q(i8* %b, <2 x i64> %iv, <2 x i64> %mask) #0 {
261; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q:
262; CHECK:       # %bb.0: # %entry
263; CHECK-NEXT:    movq %rsp, %rax
264; CHECK-NEXT:    movq $-1, %rcx
265; CHECK-NEXT:    sarq $63, %rax
266; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
267; CHECK-NEXT:    orq %rax, %rdi
268; CHECK-NEXT:    vmovq %rax, %xmm3
269; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
270; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
271; CHECK-NEXT:    vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2
272; CHECK-NEXT:    shlq $47, %rax
273; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
274; CHECK-NEXT:    orq %rax, %rsp
275; CHECK-NEXT:    retq
276entry:
277  %v = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x i64> %mask, i8 1)
278  ret <2 x i64> %v
279}
280
281declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8)
282
283define <8 x i32> @test_llvm_x86_avx2_gather_d_d_256(i8* %b, <8 x i32> %iv, <8 x i32> %mask) #0 {
284; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256:
285; CHECK:       # %bb.0: # %entry
286; CHECK-NEXT:    movq %rsp, %rax
287; CHECK-NEXT:    movq $-1, %rcx
288; CHECK-NEXT:    sarq $63, %rax
289; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
290; CHECK-NEXT:    orq %rax, %rdi
291; CHECK-NEXT:    vmovq %rax, %xmm3
292; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
293; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
294; CHECK-NEXT:    vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2
295; CHECK-NEXT:    shlq $47, %rax
296; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
297; CHECK-NEXT:    orq %rax, %rsp
298; CHECK-NEXT:    retq
299entry:
300  %v = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x i32> %mask, i8 1)
301  ret <8 x i32> %v
302}
303
304declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8)
305
306define <4 x i32> @test_llvm_x86_avx2_gather_q_d_256(i8* %b, <4 x i64> %iv, <4 x i32> %mask) #0 {
307; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256:
308; CHECK:       # %bb.0: # %entry
309; CHECK-NEXT:    movq %rsp, %rax
310; CHECK-NEXT:    movq $-1, %rcx
311; CHECK-NEXT:    sarq $63, %rax
312; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
313; CHECK-NEXT:    orq %rax, %rdi
314; CHECK-NEXT:    vmovq %rax, %xmm3
315; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
316; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
317; CHECK-NEXT:    vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2
318; CHECK-NEXT:    shlq $47, %rax
319; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
320; CHECK-NEXT:    orq %rax, %rsp
321; CHECK-NEXT:    vzeroupper
322; CHECK-NEXT:    retq
323entry:
324  %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i32> %mask, i8 1)
325  ret <4 x i32> %v
326}
327
328declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8)
329
330define <4 x i64> @test_llvm_x86_avx2_gather_d_q_256(i8* %b, <4 x i32> %iv, <4 x i64> %mask) #0 {
331; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256:
332; CHECK:       # %bb.0: # %entry
333; CHECK-NEXT:    movq %rsp, %rax
334; CHECK-NEXT:    movq $-1, %rcx
335; CHECK-NEXT:    sarq $63, %rax
336; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
337; CHECK-NEXT:    orq %rax, %rdi
338; CHECK-NEXT:    vmovq %rax, %xmm3
339; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
340; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
341; CHECK-NEXT:    vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2
342; CHECK-NEXT:    shlq $47, %rax
343; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
344; CHECK-NEXT:    orq %rax, %rsp
345; CHECK-NEXT:    retq
346entry:
347  %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i64> %mask, i8 1)
348  ret <4 x i64> %v
349}
350
351declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8)
352
353define <4 x i64> @test_llvm_x86_avx2_gather_q_q_256(i8* %b, <4 x i64> %iv, <4 x i64> %mask) #0 {
354; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256:
355; CHECK:       # %bb.0: # %entry
356; CHECK-NEXT:    movq %rsp, %rax
357; CHECK-NEXT:    movq $-1, %rcx
358; CHECK-NEXT:    sarq $63, %rax
359; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
360; CHECK-NEXT:    orq %rax, %rdi
361; CHECK-NEXT:    vmovq %rax, %xmm3
362; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
363; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
364; CHECK-NEXT:    vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2
365; CHECK-NEXT:    shlq $47, %rax
366; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
367; CHECK-NEXT:    orq %rax, %rsp
368; CHECK-NEXT:    retq
369entry:
370  %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i64> %mask, i8 1)
371  ret <4 x i64> %v
372}
373
374declare <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float>, i8*, <16 x i32>, i16, i32)
375
376define <16 x float> @test_llvm_x86_avx512_gather_dps_512(i8* %b, <16 x i32> %iv) #1 {
377; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512:
378; CHECK:       # %bb.0: # %entry
379; CHECK-NEXT:    movq %rsp, %rax
380; CHECK-NEXT:    movq $-1, %rcx
381; CHECK-NEXT:    sarq $63, %rax
382; CHECK-NEXT:    kxnorw %k0, %k0, %k1
383; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
384; CHECK-NEXT:    orq %rax, %rdi
385; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
386; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
387; CHECK-NEXT:    vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
388; CHECK-NEXT:    shlq $47, %rax
389; CHECK-NEXT:    vmovaps %zmm1, %zmm0
390; CHECK-NEXT:    orq %rax, %rsp
391; CHECK-NEXT:    retq
392entry:
393  %v = call <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1)
394  ret <16 x float> %v
395}
396
397declare <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double>, i8*, <8 x i32>, i8, i32)
398
399define <8 x double> @test_llvm_x86_avx512_gather_dpd_512(i8* %b, <8 x i32> %iv) #1 {
400; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512:
401; CHECK:       # %bb.0: # %entry
402; CHECK-NEXT:    movq %rsp, %rax
403; CHECK-NEXT:    movq $-1, %rcx
404; CHECK-NEXT:    sarq $63, %rax
405; CHECK-NEXT:    kxnorw %k0, %k0, %k1
406; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
407; CHECK-NEXT:    orq %rax, %rdi
408; CHECK-NEXT:    vmovq %rax, %xmm2
409; CHECK-NEXT:    vpbroadcastq %xmm2, %ymm2
410; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
411; CHECK-NEXT:    vgatherdpd (%rdi,%ymm0), %zmm1 {%k1}
412; CHECK-NEXT:    shlq $47, %rax
413; CHECK-NEXT:    vmovapd %zmm1, %zmm0
414; CHECK-NEXT:    orq %rax, %rsp
415; CHECK-NEXT:    retq
416entry:
417  %v = call <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
418  ret <8 x double> %v
419}
420
421declare <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float>, i8*, <8 x i64>, i8, i32)
422
423define <8 x float> @test_llvm_x86_avx512_gather_qps_512(i8* %b, <8 x i64> %iv) #1 {
424; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512:
425; CHECK:       # %bb.0: # %entry
426; CHECK-NEXT:    movq %rsp, %rax
427; CHECK-NEXT:    movq $-1, %rcx
428; CHECK-NEXT:    sarq $63, %rax
429; CHECK-NEXT:    kxnorw %k0, %k0, %k1
430; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
431; CHECK-NEXT:    orq %rax, %rdi
432; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
433; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
434; CHECK-NEXT:    vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
435; CHECK-NEXT:    shlq $47, %rax
436; CHECK-NEXT:    vmovaps %ymm1, %ymm0
437; CHECK-NEXT:    orq %rax, %rsp
438; CHECK-NEXT:    retq
439entry:
440  %v = call <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
441  ret <8 x float> %v
442}
443
444declare <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double>, i8*, <8 x i64>, i8, i32)
445
446define <8 x double> @test_llvm_x86_avx512_gather_qpd_512(i8* %b, <8 x i64> %iv) #1 {
447; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512:
448; CHECK:       # %bb.0: # %entry
449; CHECK-NEXT:    movq %rsp, %rax
450; CHECK-NEXT:    movq $-1, %rcx
451; CHECK-NEXT:    sarq $63, %rax
452; CHECK-NEXT:    kxnorw %k0, %k0, %k1
453; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
454; CHECK-NEXT:    orq %rax, %rdi
455; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
456; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
457; CHECK-NEXT:    vgatherqpd (%rdi,%zmm0), %zmm1 {%k1}
458; CHECK-NEXT:    shlq $47, %rax
459; CHECK-NEXT:    vmovapd %zmm1, %zmm0
460; CHECK-NEXT:    orq %rax, %rsp
461; CHECK-NEXT:    retq
462entry:
463  %v = call <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
464  ret <8 x double> %v
465}
466
467declare <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32>, i8*, <16 x i32>, i16, i32)
468
469define <16 x i32> @test_llvm_x86_avx512_gather_dpi_512(i8* %b, <16 x i32> %iv) #1 {
470; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512:
471; CHECK:       # %bb.0: # %entry
472; CHECK-NEXT:    movq %rsp, %rax
473; CHECK-NEXT:    movq $-1, %rcx
474; CHECK-NEXT:    sarq $63, %rax
475; CHECK-NEXT:    kxnorw %k0, %k0, %k1
476; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
477; CHECK-NEXT:    orq %rax, %rdi
478; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
479; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
480; CHECK-NEXT:    vpgatherdd (%rdi,%zmm0), %zmm1 {%k1}
481; CHECK-NEXT:    shlq $47, %rax
482; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
483; CHECK-NEXT:    orq %rax, %rsp
484; CHECK-NEXT:    retq
485entry:
486  %v = call <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1)
487  ret <16 x i32> %v
488}
489
490declare <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64>, i8*, <8 x i32>, i8, i32)
491
492define <8 x i64> @test_llvm_x86_avx512_gather_dpq_512(i8* %b, <8 x i32> %iv) #1 {
493; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512:
494; CHECK:       # %bb.0: # %entry
495; CHECK-NEXT:    movq %rsp, %rax
496; CHECK-NEXT:    movq $-1, %rcx
497; CHECK-NEXT:    sarq $63, %rax
498; CHECK-NEXT:    kxnorw %k0, %k0, %k1
499; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
500; CHECK-NEXT:    orq %rax, %rdi
501; CHECK-NEXT:    vmovq %rax, %xmm2
502; CHECK-NEXT:    vpbroadcastq %xmm2, %ymm2
503; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
504; CHECK-NEXT:    vpgatherdq (%rdi,%ymm0), %zmm1 {%k1}
505; CHECK-NEXT:    shlq $47, %rax
506; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
507; CHECK-NEXT:    orq %rax, %rsp
508; CHECK-NEXT:    retq
509entry:
510  %v = call <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
511  ret <8 x i64> %v
512}
513
514
515declare <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32>, i8*, <8 x i64>, i8, i32)
516
517define <8 x i32> @test_llvm_x86_avx512_gather_qpi_512(i8* %b, <8 x i64> %iv) #1 {
518; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512:
519; CHECK:       # %bb.0: # %entry
520; CHECK-NEXT:    movq %rsp, %rax
521; CHECK-NEXT:    movq $-1, %rcx
522; CHECK-NEXT:    sarq $63, %rax
523; CHECK-NEXT:    kxnorw %k0, %k0, %k1
524; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
525; CHECK-NEXT:    orq %rax, %rdi
526; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
527; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
528; CHECK-NEXT:    vpgatherqd (%rdi,%zmm0), %ymm1 {%k1}
529; CHECK-NEXT:    shlq $47, %rax
530; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
531; CHECK-NEXT:    orq %rax, %rsp
532; CHECK-NEXT:    retq
533entry:
534  %v = call <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
535  ret <8 x i32> %v
536}
537
538declare <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64>, i8*, <8 x i64>, i8, i32)
539
540define <8 x i64> @test_llvm_x86_avx512_gather_qpq_512(i8* %b, <8 x i64> %iv) #1 {
541; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512:
542; CHECK:       # %bb.0: # %entry
543; CHECK-NEXT:    movq %rsp, %rax
544; CHECK-NEXT:    movq $-1, %rcx
545; CHECK-NEXT:    sarq $63, %rax
546; CHECK-NEXT:    kxnorw %k0, %k0, %k1
547; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
548; CHECK-NEXT:    orq %rax, %rdi
549; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
550; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
551; CHECK-NEXT:    vpgatherqq (%rdi,%zmm0), %zmm1 {%k1}
552; CHECK-NEXT:    shlq $47, %rax
553; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
554; CHECK-NEXT:    orq %rax, %rsp
555; CHECK-NEXT:    retq
556entry:
557  %v = call <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
558  ret <8 x i64> %v
559}
560
561declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8*, i32, i32);
562
563define void @test_llvm_x86_avx512_gatherpf_qps_512(<8 x i64> %iv, i8* %b) #1 {
564; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512:
565; CHECK:       # %bb.0: # %entry
566; CHECK-NEXT:    movq %rsp, %rax
567; CHECK-NEXT:    movq $-1, %rcx
568; CHECK-NEXT:    sarq $63, %rax
569; CHECK-NEXT:    kxnorw %k0, %k0, %k1
570; CHECK-NEXT:    orq %rax, %rdi
571; CHECK-NEXT:    vpbroadcastq %rax, %zmm1
572; CHECK-NEXT:    vporq %zmm0, %zmm1, %zmm0
573; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
574; CHECK-NEXT:    shlq $47, %rax
575; CHECK-NEXT:    orq %rax, %rsp
576; CHECK-NEXT:    vzeroupper
577; CHECK-NEXT:    retq
578entry:
579  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %iv, i8* %b, i32 4, i32 3)
580  ret void
581}
582
583declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
584
585define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(i8* %b, <4 x i32> %iv) #2 {
586; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf:
587; CHECK:       # %bb.0: # %entry
588; CHECK-NEXT:    movq %rsp, %rax
589; CHECK-NEXT:    movq $-1, %rcx
590; CHECK-NEXT:    sarq $63, %rax
591; CHECK-NEXT:    kxnorw %k0, %k0, %k1
592; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
593; CHECK-NEXT:    orq %rax, %rdi
594; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
595; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
596; CHECK-NEXT:    vgatherdps (%rdi,%xmm0), %xmm1 {%k1}
597; CHECK-NEXT:    shlq $47, %rax
598; CHECK-NEXT:    vmovaps %xmm1, %xmm0
599; CHECK-NEXT:    orq %rax, %rsp
600; CHECK-NEXT:    retq
601entry:
602  %v = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
603  ret <4 x float> %v
604}
605
606declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
607
608define <4 x float> @test_llvm_x86_avx512_gather3div4_sf(i8* %b, <2 x i64> %iv) #2 {
609; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf:
610; CHECK:       # %bb.0: # %entry
611; CHECK-NEXT:    movq %rsp, %rax
612; CHECK-NEXT:    movq $-1, %rcx
613; CHECK-NEXT:    sarq $63, %rax
614; CHECK-NEXT:    kxnorw %k0, %k0, %k1
615; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
616; CHECK-NEXT:    orq %rax, %rdi
617; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
618; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
619; CHECK-NEXT:    vgatherqps (%rdi,%xmm0), %xmm1 {%k1}
620; CHECK-NEXT:    shlq $47, %rax
621; CHECK-NEXT:    vmovaps %xmm1, %xmm0
622; CHECK-NEXT:    orq %rax, %rsp
623; CHECK-NEXT:    retq
624entry:
625  %v = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
626  ret <4 x float> %v
627}
628
629declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
630
631define <2 x double> @test_llvm_x86_avx512_gather3siv2_df(i8* %b, <4 x i32> %iv) #2 {
632; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df:
633; CHECK:       # %bb.0: # %entry
634; CHECK-NEXT:    movq %rsp, %rax
635; CHECK-NEXT:    movq $-1, %rcx
636; CHECK-NEXT:    sarq $63, %rax
637; CHECK-NEXT:    kxnorw %k0, %k0, %k1
638; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
639; CHECK-NEXT:    orq %rax, %rdi
640; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
641; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
642; CHECK-NEXT:    vgatherdpd (%rdi,%xmm0), %xmm1 {%k1}
643; CHECK-NEXT:    shlq $47, %rax
644; CHECK-NEXT:    vmovapd %xmm1, %xmm0
645; CHECK-NEXT:    orq %rax, %rsp
646; CHECK-NEXT:    retq
647entry:
648  %v = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
649  ret <2 x double> %v
650}
651
652declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
653
654define <2 x double> @test_llvm_x86_avx512_gather3div2_df(i8* %b, <2 x i64> %iv) #2 {
655; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df:
656; CHECK:       # %bb.0: # %entry
657; CHECK-NEXT:    movq %rsp, %rax
658; CHECK-NEXT:    movq $-1, %rcx
659; CHECK-NEXT:    sarq $63, %rax
660; CHECK-NEXT:    kxnorw %k0, %k0, %k1
661; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
662; CHECK-NEXT:    orq %rax, %rdi
663; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
664; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
665; CHECK-NEXT:    vgatherqpd (%rdi,%xmm0), %xmm1 {%k1}
666; CHECK-NEXT:    shlq $47, %rax
667; CHECK-NEXT:    vmovapd %xmm1, %xmm0
668; CHECK-NEXT:    orq %rax, %rsp
669; CHECK-NEXT:    retq
670entry:
671  %v = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
672  ret <2 x double> %v
673}
674
675declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
676
677define <8 x float> @test_llvm_x86_avx512_gather3siv8_sf(i8* %b, <8 x i32> %iv) #2 {
678; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf:
679; CHECK:       # %bb.0: # %entry
680; CHECK-NEXT:    movq %rsp, %rax
681; CHECK-NEXT:    movq $-1, %rcx
682; CHECK-NEXT:    sarq $63, %rax
683; CHECK-NEXT:    kxnorw %k0, %k0, %k1
684; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
685; CHECK-NEXT:    orq %rax, %rdi
686; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
687; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
688; CHECK-NEXT:    vgatherdps (%rdi,%ymm0), %ymm1 {%k1}
689; CHECK-NEXT:    shlq $47, %rax
690; CHECK-NEXT:    vmovaps %ymm1, %ymm0
691; CHECK-NEXT:    orq %rax, %rsp
692; CHECK-NEXT:    retq
693entry:
694  %v = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
695  ret <8 x float> %v
696}
697
698declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
699
700define <4 x float> @test_llvm_x86_avx512_gather3div8_sf(i8* %b, <4 x i64> %iv) #2 {
701; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf:
702; CHECK:       # %bb.0: # %entry
703; CHECK-NEXT:    movq %rsp, %rax
704; CHECK-NEXT:    movq $-1, %rcx
705; CHECK-NEXT:    sarq $63, %rax
706; CHECK-NEXT:    kxnorw %k0, %k0, %k1
707; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
708; CHECK-NEXT:    orq %rax, %rdi
709; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
710; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
711; CHECK-NEXT:    vgatherqps (%rdi,%ymm0), %xmm1 {%k1}
712; CHECK-NEXT:    shlq $47, %rax
713; CHECK-NEXT:    vmovaps %xmm1, %xmm0
714; CHECK-NEXT:    orq %rax, %rsp
715; CHECK-NEXT:    vzeroupper
716; CHECK-NEXT:    retq
717entry:
718  %v = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
719  ret <4 x float> %v
720}
721
722declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
723
724define <4 x double> @test_llvm_x86_avx512_gather3siv4_df(i8* %b, <4 x i32> %iv) #2 {
725; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df:
726; CHECK:       # %bb.0: # %entry
727; CHECK-NEXT:    movq %rsp, %rax
728; CHECK-NEXT:    movq $-1, %rcx
729; CHECK-NEXT:    sarq $63, %rax
730; CHECK-NEXT:    kxnorw %k0, %k0, %k1
731; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
732; CHECK-NEXT:    orq %rax, %rdi
733; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
734; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
735; CHECK-NEXT:    vgatherdpd (%rdi,%xmm0), %ymm1 {%k1}
736; CHECK-NEXT:    shlq $47, %rax
737; CHECK-NEXT:    vmovapd %ymm1, %ymm0
738; CHECK-NEXT:    orq %rax, %rsp
739; CHECK-NEXT:    retq
740entry:
741  %v = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
742  ret <4 x double> %v
743}
744
745declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
746
747define <4 x double> @test_llvm_x86_avx512_gather3div4_df(i8* %b, <4 x i64> %iv) #2 {
748; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df:
749; CHECK:       # %bb.0: # %entry
750; CHECK-NEXT:    movq %rsp, %rax
751; CHECK-NEXT:    movq $-1, %rcx
752; CHECK-NEXT:    sarq $63, %rax
753; CHECK-NEXT:    kxnorw %k0, %k0, %k1
754; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
755; CHECK-NEXT:    orq %rax, %rdi
756; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
757; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
758; CHECK-NEXT:    vgatherqpd (%rdi,%ymm0), %ymm1 {%k1}
759; CHECK-NEXT:    shlq $47, %rax
760; CHECK-NEXT:    vmovapd %ymm1, %ymm0
761; CHECK-NEXT:    orq %rax, %rsp
762; CHECK-NEXT:    retq
763entry:
764  %v = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
765  ret <4 x double> %v
766}
767
768declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
769
770define <4 x i32> @test_llvm_x86_avx512_gather3siv4_si(i8* %b, <4 x i32> %iv) #2 {
771; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si:
772; CHECK:       # %bb.0: # %entry
773; CHECK-NEXT:    movq %rsp, %rax
774; CHECK-NEXT:    movq $-1, %rcx
775; CHECK-NEXT:    sarq $63, %rax
776; CHECK-NEXT:    kxnorw %k0, %k0, %k1
777; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
778; CHECK-NEXT:    orq %rax, %rdi
779; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
780; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
781; CHECK-NEXT:    vpgatherdd (%rdi,%xmm0), %xmm1 {%k1}
782; CHECK-NEXT:    shlq $47, %rax
783; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
784; CHECK-NEXT:    orq %rax, %rsp
785; CHECK-NEXT:    retq
786entry:
787  %v = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
788  ret <4 x i32> %v
789}
790
791declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
792
793define <4 x i32> @test_llvm_x86_avx512_gather3div4_si(i8* %b, <2 x i64> %iv) #2 {
794; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si:
795; CHECK:       # %bb.0: # %entry
796; CHECK-NEXT:    movq %rsp, %rax
797; CHECK-NEXT:    movq $-1, %rcx
798; CHECK-NEXT:    sarq $63, %rax
799; CHECK-NEXT:    kxnorw %k0, %k0, %k1
800; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
801; CHECK-NEXT:    orq %rax, %rdi
802; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
803; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
804; CHECK-NEXT:    vpgatherqd (%rdi,%xmm0), %xmm1 {%k1}
805; CHECK-NEXT:    shlq $47, %rax
806; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
807; CHECK-NEXT:    orq %rax, %rsp
808; CHECK-NEXT:    retq
809entry:
810  %v = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
811  ret <4 x i32> %v
812}
813
814declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
815
816define <2 x i64> @test_llvm_x86_avx512_gather3siv2_di(i8* %b, <4 x i32> %iv) #2 {
817; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di:
818; CHECK:       # %bb.0: # %entry
819; CHECK-NEXT:    movq %rsp, %rax
820; CHECK-NEXT:    movq $-1, %rcx
821; CHECK-NEXT:    sarq $63, %rax
822; CHECK-NEXT:    kxnorw %k0, %k0, %k1
823; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
824; CHECK-NEXT:    orq %rax, %rdi
825; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
826; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
827; CHECK-NEXT:    vpgatherdq (%rdi,%xmm0), %xmm1 {%k1}
828; CHECK-NEXT:    shlq $47, %rax
829; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
830; CHECK-NEXT:    orq %rax, %rsp
831; CHECK-NEXT:    retq
832entry:
833  %v = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
834  ret <2 x i64> %v
835}
836
837declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
838
839define <2 x i64> @test_llvm_x86_avx512_gather3div2_di(i8* %b, <2 x i64> %iv) #2 {
840; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di:
841; CHECK:       # %bb.0: # %entry
842; CHECK-NEXT:    movq %rsp, %rax
843; CHECK-NEXT:    movq $-1, %rcx
844; CHECK-NEXT:    sarq $63, %rax
845; CHECK-NEXT:    kxnorw %k0, %k0, %k1
846; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
847; CHECK-NEXT:    orq %rax, %rdi
848; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
849; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
850; CHECK-NEXT:    vpgatherqq (%rdi,%xmm0), %xmm1 {%k1}
851; CHECK-NEXT:    shlq $47, %rax
852; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
853; CHECK-NEXT:    orq %rax, %rsp
854; CHECK-NEXT:    retq
855entry:
856  %v = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
857  ret <2 x i64> %v
858}
859
860declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
861
862define <8 x i32> @test_llvm_x86_avx512_gather3siv8_si(i8* %b, <8 x i32> %iv) #2 {
863; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si:
864; CHECK:       # %bb.0: # %entry
865; CHECK-NEXT:    movq %rsp, %rax
866; CHECK-NEXT:    movq $-1, %rcx
867; CHECK-NEXT:    sarq $63, %rax
868; CHECK-NEXT:    kxnorw %k0, %k0, %k1
869; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
870; CHECK-NEXT:    orq %rax, %rdi
871; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
872; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
873; CHECK-NEXT:    vpgatherdd (%rdi,%ymm0), %ymm1 {%k1}
874; CHECK-NEXT:    shlq $47, %rax
875; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
876; CHECK-NEXT:    orq %rax, %rsp
877; CHECK-NEXT:    retq
878entry:
879  %v = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
880  ret <8 x i32> %v
881}
882
883declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
884
885define <4 x i32> @test_llvm_x86_avx512_gather3div8_si(i8* %b, <4 x i64> %iv) #2 {
886; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si:
887; CHECK:       # %bb.0: # %entry
888; CHECK-NEXT:    movq %rsp, %rax
889; CHECK-NEXT:    movq $-1, %rcx
890; CHECK-NEXT:    sarq $63, %rax
891; CHECK-NEXT:    kxnorw %k0, %k0, %k1
892; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
893; CHECK-NEXT:    orq %rax, %rdi
894; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
895; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
896; CHECK-NEXT:    vpgatherqd (%rdi,%ymm0), %xmm1 {%k1}
897; CHECK-NEXT:    shlq $47, %rax
898; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
899; CHECK-NEXT:    orq %rax, %rsp
900; CHECK-NEXT:    vzeroupper
901; CHECK-NEXT:    retq
902entry:
903  %v = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
904  ret <4 x i32> %v
905}
906
907declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
908
909define <4 x i64> @test_llvm_x86_avx512_gather3siv4_di(i8* %b, <4 x i32> %iv) #2 {
910; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di:
911; CHECK:       # %bb.0: # %entry
912; CHECK-NEXT:    movq %rsp, %rax
913; CHECK-NEXT:    movq $-1, %rcx
914; CHECK-NEXT:    sarq $63, %rax
915; CHECK-NEXT:    kxnorw %k0, %k0, %k1
916; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
917; CHECK-NEXT:    orq %rax, %rdi
918; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
919; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
920; CHECK-NEXT:    vpgatherdq (%rdi,%xmm0), %ymm1 {%k1}
921; CHECK-NEXT:    shlq $47, %rax
922; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
923; CHECK-NEXT:    orq %rax, %rsp
924; CHECK-NEXT:    retq
925entry:
926  %v = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
927  ret <4 x i64> %v
928}
929
930declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
931
932define <4 x i64> @test_llvm_x86_avx512_gather3div4_di(i8* %b, <4 x i64> %iv) #2 {
933; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di:
934; CHECK:       # %bb.0: # %entry
935; CHECK-NEXT:    movq %rsp, %rax
936; CHECK-NEXT:    movq $-1, %rcx
937; CHECK-NEXT:    sarq $63, %rax
938; CHECK-NEXT:    kxnorw %k0, %k0, %k1
939; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
940; CHECK-NEXT:    orq %rax, %rdi
941; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
942; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
943; CHECK-NEXT:    vpgatherqq (%rdi,%ymm0), %ymm1 {%k1}
944; CHECK-NEXT:    shlq $47, %rax
945; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
946; CHECK-NEXT:    orq %rax, %rsp
947; CHECK-NEXT:    retq
948entry:
949  %v = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
950  ret <4 x i64> %v
951}
952
953attributes #0 = { nounwind "target-features"="+avx2" }
954attributes #1 = { nounwind "target-features"="+avx512f" }
955attributes #2 = { nounwind "target-features"="+avx512vl" }
956