• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s
3; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s
4; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s
5; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s
6
7declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro)
8
9define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
10; X86-LABEL: masked_gather_v2i32:
11; X86:       # %bb.0: # %entry
12; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
13; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
14; X86-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
15; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
16; X86-NEXT:    vpslld $31, %xmm0, %xmm0
17; X86-NEXT:    vpgatherdd %xmm0, (,%xmm2), %xmm1
18; X86-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
19; X86-NEXT:    retl
20;
21; X64-LABEL: masked_gather_v2i32:
22; X64:       # %bb.0: # %entry
23; X64-NEXT:    vmovdqa (%rdi), %xmm2
24; X64-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
25; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
26; X64-NEXT:    vpslld $31, %xmm0, %xmm0
27; X64-NEXT:    vpgatherqd %xmm0, (,%xmm2), %xmm1
28; X64-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
29; X64-NEXT:    retq
30;
31; NOGATHER-LABEL: masked_gather_v2i32:
32; NOGATHER:       # %bb.0: # %entry
33; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
34; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
35; NOGATHER-NEXT:    # implicit-def: $xmm2
36; NOGATHER-NEXT:    testb $1, %al
37; NOGATHER-NEXT:    je .LBB0_2
38; NOGATHER-NEXT:  # %bb.1: # %cond.load
39; NOGATHER-NEXT:    vmovq %xmm3, %rax
40; NOGATHER-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
41; NOGATHER-NEXT:  .LBB0_2: # %else
42; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
43; NOGATHER-NEXT:    testb $1, %al
44; NOGATHER-NEXT:    je .LBB0_4
45; NOGATHER-NEXT:  # %bb.3: # %cond.load1
46; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
47; NOGATHER-NEXT:    movl (%rax), %eax
48; NOGATHER-NEXT:    vpinsrq $1, %rax, %xmm2, %xmm2
49; NOGATHER-NEXT:  .LBB0_4: # %else2
50; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
51; NOGATHER-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
52; NOGATHER-NEXT:    retq
53entry:
54  %ld  = load <2 x i32*>, <2 x i32*>* %ptr
55  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
56  ret <2 x i32> %res
57}
58
59define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
60; X86-LABEL: masked_gather_v2i32_concat:
61; X86:       # %bb.0: # %entry
62; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
63; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
64; X86-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
65; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
66; X86-NEXT:    vpslld $31, %xmm0, %xmm0
67; X86-NEXT:    vpgatherdd %xmm0, (,%xmm2), %xmm1
68; X86-NEXT:    vmovdqa %xmm1, %xmm0
69; X86-NEXT:    retl
70;
71; X64-LABEL: masked_gather_v2i32_concat:
72; X64:       # %bb.0: # %entry
73; X64-NEXT:    vmovdqa (%rdi), %xmm2
74; X64-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
75; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
76; X64-NEXT:    vpslld $31, %xmm0, %xmm0
77; X64-NEXT:    vpgatherqd %xmm0, (,%xmm2), %xmm1
78; X64-NEXT:    vmovdqa %xmm1, %xmm0
79; X64-NEXT:    retq
80;
81; NOGATHER-LABEL: masked_gather_v2i32_concat:
82; NOGATHER:       # %bb.0: # %entry
83; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
84; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
85; NOGATHER-NEXT:    # implicit-def: $xmm2
86; NOGATHER-NEXT:    testb $1, %al
87; NOGATHER-NEXT:    je .LBB1_2
88; NOGATHER-NEXT:  # %bb.1: # %cond.load
89; NOGATHER-NEXT:    vmovq %xmm3, %rax
90; NOGATHER-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
91; NOGATHER-NEXT:  .LBB1_2: # %else
92; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
93; NOGATHER-NEXT:    testb $1, %al
94; NOGATHER-NEXT:    je .LBB1_4
95; NOGATHER-NEXT:  # %bb.3: # %cond.load1
96; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
97; NOGATHER-NEXT:    movl (%rax), %eax
98; NOGATHER-NEXT:    vpinsrq $1, %rax, %xmm2, %xmm2
99; NOGATHER-NEXT:  .LBB1_4: # %else2
100; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
101; NOGATHER-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
102; NOGATHER-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
103; NOGATHER-NEXT:    retq
104entry:
105  %ld  = load <2 x i32*>, <2 x i32*>* %ptr
106  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
107  %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108  ret <4 x i32> %res2
109}
110
111declare <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro)
112
113define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
114; X86-LABEL: masked_gather_v2float:
115; X86:       # %bb.0: # %entry
116; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
117; X86-NEXT:    vpslld $31, %xmm0, %xmm0
118; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
119; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
120; X86-NEXT:    vgatherdps %xmm0, (,%xmm2), %xmm1
121; X86-NEXT:    vmovaps %xmm1, %xmm0
122; X86-NEXT:    retl
123;
124; X64-LABEL: masked_gather_v2float:
125; X64:       # %bb.0: # %entry
126; X64-NEXT:    vmovaps (%rdi), %xmm2
127; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
128; X64-NEXT:    vpslld $31, %xmm0, %xmm0
129; X64-NEXT:    vgatherqps %xmm0, (,%xmm2), %xmm1
130; X64-NEXT:    vmovaps %xmm1, %xmm0
131; X64-NEXT:    retq
132;
133; NOGATHER-LABEL: masked_gather_v2float:
134; NOGATHER:       # %bb.0: # %entry
135; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
136; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
137; NOGATHER-NEXT:    # implicit-def: $xmm2
138; NOGATHER-NEXT:    testb $1, %al
139; NOGATHER-NEXT:    je .LBB2_2
140; NOGATHER-NEXT:  # %bb.1: # %cond.load
141; NOGATHER-NEXT:    vmovq %xmm3, %rax
142; NOGATHER-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
143; NOGATHER-NEXT:  .LBB2_2: # %else
144; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
145; NOGATHER-NEXT:    testb $1, %al
146; NOGATHER-NEXT:    je .LBB2_4
147; NOGATHER-NEXT:  # %bb.3: # %cond.load1
148; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
149; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
150; NOGATHER-NEXT:  .LBB2_4: # %else2
151; NOGATHER-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
152; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
153; NOGATHER-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
154; NOGATHER-NEXT:    retq
155entry:
156  %ld  = load <2 x float*>, <2 x float*>* %ptr
157  %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
158  ret <2 x float> %res
159}
160
161define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
162; X86-LABEL: masked_gather_v2float_concat:
163; X86:       # %bb.0: # %entry
164; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
165; X86-NEXT:    vpslld $31, %xmm0, %xmm0
166; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
167; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
168; X86-NEXT:    vgatherdps %xmm0, (,%xmm2), %xmm1
169; X86-NEXT:    vmovaps %xmm1, %xmm0
170; X86-NEXT:    retl
171;
172; X64-LABEL: masked_gather_v2float_concat:
173; X64:       # %bb.0: # %entry
174; X64-NEXT:    vmovaps (%rdi), %xmm2
175; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
176; X64-NEXT:    vpslld $31, %xmm0, %xmm0
177; X64-NEXT:    vgatherqps %xmm0, (,%xmm2), %xmm1
178; X64-NEXT:    vmovaps %xmm1, %xmm0
179; X64-NEXT:    retq
180;
181; NOGATHER-LABEL: masked_gather_v2float_concat:
182; NOGATHER:       # %bb.0: # %entry
183; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
184; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
185; NOGATHER-NEXT:    # implicit-def: $xmm2
186; NOGATHER-NEXT:    testb $1, %al
187; NOGATHER-NEXT:    je .LBB3_2
188; NOGATHER-NEXT:  # %bb.1: # %cond.load
189; NOGATHER-NEXT:    vmovq %xmm3, %rax
190; NOGATHER-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
191; NOGATHER-NEXT:  .LBB3_2: # %else
192; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
193; NOGATHER-NEXT:    testb $1, %al
194; NOGATHER-NEXT:    je .LBB3_4
195; NOGATHER-NEXT:  # %bb.3: # %cond.load1
196; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
197; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
198; NOGATHER-NEXT:  .LBB3_4: # %else2
199; NOGATHER-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
200; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
201; NOGATHER-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
202; NOGATHER-NEXT:    retq
203entry:
204  %ld  = load <2 x float*>, <2 x float*>* %ptr
205  %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
206  %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
207  ret <4 x float> %res2
208}
209
210
211declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro)
212
213define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) {
214; X86-LABEL: masked_gather_v4i32:
215; X86:       # %bb.0: # %entry
216; X86-NEXT:    vpslld $31, %xmm1, %xmm1
217; X86-NEXT:    vpgatherdd %xmm1, (,%xmm0), %xmm2
218; X86-NEXT:    vmovdqa %xmm2, %xmm0
219; X86-NEXT:    retl
220;
221; X64-LABEL: masked_gather_v4i32:
222; X64:       # %bb.0: # %entry
223; X64-NEXT:    vpslld $31, %xmm1, %xmm1
224; X64-NEXT:    vpgatherqd %xmm1, (,%ymm0), %xmm2
225; X64-NEXT:    vmovdqa %xmm2, %xmm0
226; X64-NEXT:    vzeroupper
227; X64-NEXT:    retq
228;
229; NOGATHER-LABEL: masked_gather_v4i32:
230; NOGATHER:       # %bb.0: # %entry
231; NOGATHER-NEXT:    vpextrb $0, %xmm1, %eax
232; NOGATHER-NEXT:    # implicit-def: $xmm3
233; NOGATHER-NEXT:    testb $1, %al
234; NOGATHER-NEXT:    je .LBB4_2
235; NOGATHER-NEXT:  # %bb.1: # %cond.load
236; NOGATHER-NEXT:    vmovq %xmm0, %rax
237; NOGATHER-NEXT:    vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
238; NOGATHER-NEXT:  .LBB4_2: # %else
239; NOGATHER-NEXT:    vpextrb $4, %xmm1, %eax
240; NOGATHER-NEXT:    testb $1, %al
241; NOGATHER-NEXT:    je .LBB4_4
242; NOGATHER-NEXT:  # %bb.3: # %cond.load1
243; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
244; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm3, %xmm3
245; NOGATHER-NEXT:  .LBB4_4: # %else2
246; NOGATHER-NEXT:    vpextrb $8, %xmm1, %eax
247; NOGATHER-NEXT:    testb $1, %al
248; NOGATHER-NEXT:    je .LBB4_6
249; NOGATHER-NEXT:  # %bb.5: # %cond.load4
250; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm4
251; NOGATHER-NEXT:    vmovq %xmm4, %rax
252; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm3, %xmm3
253; NOGATHER-NEXT:  .LBB4_6: # %else5
254; NOGATHER-NEXT:    vpextrb $12, %xmm1, %eax
255; NOGATHER-NEXT:    testb $1, %al
256; NOGATHER-NEXT:    je .LBB4_8
257; NOGATHER-NEXT:  # %bb.7: # %cond.load7
258; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
259; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
260; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm3, %xmm3
261; NOGATHER-NEXT:  .LBB4_8: # %else8
262; NOGATHER-NEXT:    vpslld $31, %xmm1, %xmm0
263; NOGATHER-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
264; NOGATHER-NEXT:    vzeroupper
265; NOGATHER-NEXT:    retq
266entry:
267  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro)
268  ret <4 x i32> %res
269}
270
271declare <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro)
272
273define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <4 x float> %passthro) {
274; X86-LABEL: masked_gather_v4float:
275; X86:       # %bb.0: # %entry
276; X86-NEXT:    vpslld $31, %xmm1, %xmm1
277; X86-NEXT:    vgatherdps %xmm1, (,%xmm0), %xmm2
278; X86-NEXT:    vmovaps %xmm2, %xmm0
279; X86-NEXT:    retl
280;
281; X64-LABEL: masked_gather_v4float:
282; X64:       # %bb.0: # %entry
283; X64-NEXT:    vpslld $31, %xmm1, %xmm1
284; X64-NEXT:    vgatherqps %xmm1, (,%ymm0), %xmm2
285; X64-NEXT:    vmovaps %xmm2, %xmm0
286; X64-NEXT:    vzeroupper
287; X64-NEXT:    retq
288;
289; NOGATHER-LABEL: masked_gather_v4float:
290; NOGATHER:       # %bb.0: # %entry
291; NOGATHER-NEXT:    vpextrb $0, %xmm1, %eax
292; NOGATHER-NEXT:    # implicit-def: $xmm3
293; NOGATHER-NEXT:    testb $1, %al
294; NOGATHER-NEXT:    je .LBB5_2
295; NOGATHER-NEXT:  # %bb.1: # %cond.load
296; NOGATHER-NEXT:    vmovq %xmm0, %rax
297; NOGATHER-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
298; NOGATHER-NEXT:  .LBB5_2: # %else
299; NOGATHER-NEXT:    vpextrb $4, %xmm1, %eax
300; NOGATHER-NEXT:    testb $1, %al
301; NOGATHER-NEXT:    je .LBB5_4
302; NOGATHER-NEXT:  # %bb.3: # %cond.load1
303; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
304; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
305; NOGATHER-NEXT:  .LBB5_4: # %else2
306; NOGATHER-NEXT:    vpextrb $8, %xmm1, %eax
307; NOGATHER-NEXT:    testb $1, %al
308; NOGATHER-NEXT:    je .LBB5_6
309; NOGATHER-NEXT:  # %bb.5: # %cond.load4
310; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm4
311; NOGATHER-NEXT:    vmovq %xmm4, %rax
312; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
313; NOGATHER-NEXT:  .LBB5_6: # %else5
314; NOGATHER-NEXT:    vpextrb $12, %xmm1, %eax
315; NOGATHER-NEXT:    testb $1, %al
316; NOGATHER-NEXT:    je .LBB5_8
317; NOGATHER-NEXT:  # %bb.7: # %cond.load7
318; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
319; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
320; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
321; NOGATHER-NEXT:  .LBB5_8: # %else8
322; NOGATHER-NEXT:    vpslld $31, %xmm1, %xmm0
323; NOGATHER-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
324; NOGATHER-NEXT:    vzeroupper
325; NOGATHER-NEXT:    retq
326entry:
327  %res = call <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro)
328  ret <4 x float> %res
329}
330
331declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro)
332
333define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i32> %passthro) {
334; X86-LABEL: masked_gather_v8i32:
335; X86:       # %bb.0: # %entry
336; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
337; X86-NEXT:    vpslld $31, %ymm0, %ymm0
338; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
339; X86-NEXT:    vmovdqa (%eax), %ymm2
340; X86-NEXT:    vpgatherdd %ymm0, (,%ymm2), %ymm1
341; X86-NEXT:    vmovdqa %ymm1, %ymm0
342; X86-NEXT:    retl
343;
344; X64-LABEL: masked_gather_v8i32:
345; X64:       # %bb.0: # %entry
346; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
347; X64-NEXT:    vpslld $31, %ymm0, %ymm0
348; X64-NEXT:    vpsrad $31, %ymm0, %ymm0
349; X64-NEXT:    vmovdqa (%rdi), %ymm2
350; X64-NEXT:    vmovdqa 32(%rdi), %ymm3
351; X64-NEXT:    vextracti128 $1, %ymm1, %xmm4
352; X64-NEXT:    vextracti128 $1, %ymm0, %xmm5
353; X64-NEXT:    vpgatherqd %xmm5, (,%ymm3), %xmm4
354; X64-NEXT:    vpgatherqd %xmm0, (,%ymm2), %xmm1
355; X64-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm0
356; X64-NEXT:    retq
357;
358; NOGATHER-LABEL: masked_gather_v8i32:
359; NOGATHER:       # %bb.0: # %entry
360; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm4
361; NOGATHER-NEXT:    vmovdqa 32(%rdi), %ymm3
362; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
363; NOGATHER-NEXT:    # implicit-def: $ymm2
364; NOGATHER-NEXT:    testb $1, %al
365; NOGATHER-NEXT:    je .LBB6_2
366; NOGATHER-NEXT:  # %bb.1: # %cond.load
367; NOGATHER-NEXT:    vmovq %xmm4, %rax
368; NOGATHER-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
369; NOGATHER-NEXT:  .LBB6_2: # %else
370; NOGATHER-NEXT:    vpextrb $2, %xmm0, %eax
371; NOGATHER-NEXT:    testb $1, %al
372; NOGATHER-NEXT:    je .LBB6_4
373; NOGATHER-NEXT:  # %bb.3: # %cond.load1
374; NOGATHER-NEXT:    vpextrq $1, %xmm4, %rax
375; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm2, %xmm5
376; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
377; NOGATHER-NEXT:  .LBB6_4: # %else2
378; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
379; NOGATHER-NEXT:    testb $1, %al
380; NOGATHER-NEXT:    je .LBB6_6
381; NOGATHER-NEXT:  # %bb.5: # %cond.load4
382; NOGATHER-NEXT:    vextractf128 $1, %ymm4, %xmm5
383; NOGATHER-NEXT:    vmovq %xmm5, %rax
384; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm2, %xmm5
385; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
386; NOGATHER-NEXT:  .LBB6_6: # %else5
387; NOGATHER-NEXT:    vpextrb $6, %xmm0, %eax
388; NOGATHER-NEXT:    testb $1, %al
389; NOGATHER-NEXT:    je .LBB6_8
390; NOGATHER-NEXT:  # %bb.7: # %cond.load7
391; NOGATHER-NEXT:    vextractf128 $1, %ymm4, %xmm4
392; NOGATHER-NEXT:    vpextrq $1, %xmm4, %rax
393; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm2, %xmm4
394; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
395; NOGATHER-NEXT:  .LBB6_8: # %else8
396; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
397; NOGATHER-NEXT:    testb $1, %al
398; NOGATHER-NEXT:    je .LBB6_10
399; NOGATHER-NEXT:  # %bb.9: # %cond.load10
400; NOGATHER-NEXT:    vmovq %xmm3, %rax
401; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
402; NOGATHER-NEXT:    vpinsrd $0, (%rax), %xmm4, %xmm4
403; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
404; NOGATHER-NEXT:  .LBB6_10: # %else11
405; NOGATHER-NEXT:    vpextrb $10, %xmm0, %eax
406; NOGATHER-NEXT:    testb $1, %al
407; NOGATHER-NEXT:    je .LBB6_12
408; NOGATHER-NEXT:  # %bb.11: # %cond.load13
409; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
410; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
411; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm4, %xmm4
412; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
413; NOGATHER-NEXT:  .LBB6_12: # %else14
414; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
415; NOGATHER-NEXT:    testb $1, %al
416; NOGATHER-NEXT:    je .LBB6_14
417; NOGATHER-NEXT:  # %bb.13: # %cond.load16
418; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
419; NOGATHER-NEXT:    vmovq %xmm4, %rax
420; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
421; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm4, %xmm4
422; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
423; NOGATHER-NEXT:  .LBB6_14: # %else17
424; NOGATHER-NEXT:    vpextrb $14, %xmm0, %eax
425; NOGATHER-NEXT:    testb $1, %al
426; NOGATHER-NEXT:    je .LBB6_16
427; NOGATHER-NEXT:  # %bb.15: # %cond.load19
428; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
429; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
430; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
431; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm3, %xmm3
432; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
433; NOGATHER-NEXT:  .LBB6_16: # %else20
434; NOGATHER-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
435; NOGATHER-NEXT:    vpslld $31, %xmm3, %xmm3
436; NOGATHER-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
437; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
438; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
439; NOGATHER-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
440; NOGATHER-NEXT:    retq
441entry:
442  %ld  = load <8 x i32*>, <8 x i32*>* %ptr
443  %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro)
444  ret <8 x i32> %res
445}
446
447declare <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro)
448
449define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <8 x float> %passthro) {
450; X86-LABEL: masked_gather_v8float:
451; X86:       # %bb.0: # %entry
452; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
453; X86-NEXT:    vpslld $31, %ymm0, %ymm0
454; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
455; X86-NEXT:    vmovaps (%eax), %ymm2
456; X86-NEXT:    vgatherdps %ymm0, (,%ymm2), %ymm1
457; X86-NEXT:    vmovaps %ymm1, %ymm0
458; X86-NEXT:    retl
459;
460; X64-LABEL: masked_gather_v8float:
461; X64:       # %bb.0: # %entry
462; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
463; X64-NEXT:    vpslld $31, %ymm0, %ymm0
464; X64-NEXT:    vpsrad $31, %ymm0, %ymm0
465; X64-NEXT:    vmovaps (%rdi), %ymm2
466; X64-NEXT:    vmovaps 32(%rdi), %ymm3
467; X64-NEXT:    vextractf128 $1, %ymm1, %xmm4
468; X64-NEXT:    vextracti128 $1, %ymm0, %xmm5
469; X64-NEXT:    vgatherqps %xmm5, (,%ymm3), %xmm4
470; X64-NEXT:    vgatherqps %xmm0, (,%ymm2), %xmm1
471; X64-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm0
472; X64-NEXT:    retq
473;
474; NOGATHER-LABEL: masked_gather_v8float:
475; NOGATHER:       # %bb.0: # %entry
476; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm4
477; NOGATHER-NEXT:    vmovdqa 32(%rdi), %ymm3
478; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
479; NOGATHER-NEXT:    # implicit-def: $ymm2
480; NOGATHER-NEXT:    testb $1, %al
481; NOGATHER-NEXT:    je .LBB7_2
482; NOGATHER-NEXT:  # %bb.1: # %cond.load
483; NOGATHER-NEXT:    vmovq %xmm4, %rax
484; NOGATHER-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
485; NOGATHER-NEXT:  .LBB7_2: # %else
486; NOGATHER-NEXT:    vpextrb $2, %xmm0, %eax
487; NOGATHER-NEXT:    testb $1, %al
488; NOGATHER-NEXT:    je .LBB7_4
489; NOGATHER-NEXT:  # %bb.3: # %cond.load1
490; NOGATHER-NEXT:    vpextrq $1, %xmm4, %rax
491; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm5 = xmm2[0],mem[0],xmm2[2,3]
492; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
493; NOGATHER-NEXT:  .LBB7_4: # %else2
494; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
495; NOGATHER-NEXT:    testb $1, %al
496; NOGATHER-NEXT:    je .LBB7_6
497; NOGATHER-NEXT:  # %bb.5: # %cond.load4
498; NOGATHER-NEXT:    vextractf128 $1, %ymm4, %xmm5
499; NOGATHER-NEXT:    vmovq %xmm5, %rax
500; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm5 = xmm2[0,1],mem[0],xmm2[3]
501; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
502; NOGATHER-NEXT:  .LBB7_6: # %else5
503; NOGATHER-NEXT:    vpextrb $6, %xmm0, %eax
504; NOGATHER-NEXT:    testb $1, %al
505; NOGATHER-NEXT:    je .LBB7_8
506; NOGATHER-NEXT:  # %bb.7: # %cond.load7
507; NOGATHER-NEXT:    vextractf128 $1, %ymm4, %xmm4
508; NOGATHER-NEXT:    vpextrq $1, %xmm4, %rax
509; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm4 = xmm2[0,1,2],mem[0]
510; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
511; NOGATHER-NEXT:  .LBB7_8: # %else8
512; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
513; NOGATHER-NEXT:    testb $1, %al
514; NOGATHER-NEXT:    je .LBB7_10
515; NOGATHER-NEXT:  # %bb.9: # %cond.load10
516; NOGATHER-NEXT:    vmovq %xmm3, %rax
517; NOGATHER-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
518; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm5
519; NOGATHER-NEXT:    vblendps {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3]
520; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
521; NOGATHER-NEXT:  .LBB7_10: # %else11
522; NOGATHER-NEXT:    vpextrb $10, %xmm0, %eax
523; NOGATHER-NEXT:    testb $1, %al
524; NOGATHER-NEXT:    je .LBB7_12
525; NOGATHER-NEXT:  # %bb.11: # %cond.load13
526; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
527; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
528; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0],mem[0],xmm4[2,3]
529; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
530; NOGATHER-NEXT:  .LBB7_12: # %else14
531; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
532; NOGATHER-NEXT:    testb $1, %al
533; NOGATHER-NEXT:    je .LBB7_14
534; NOGATHER-NEXT:  # %bb.13: # %cond.load16
535; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
536; NOGATHER-NEXT:    vmovq %xmm4, %rax
537; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
538; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],mem[0],xmm4[3]
539; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
540; NOGATHER-NEXT:  .LBB7_14: # %else17
541; NOGATHER-NEXT:    vpextrb $14, %xmm0, %eax
542; NOGATHER-NEXT:    testb $1, %al
543; NOGATHER-NEXT:    je .LBB7_16
544; NOGATHER-NEXT:  # %bb.15: # %cond.load19
545; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
546; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
547; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
548; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
549; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
550; NOGATHER-NEXT:  .LBB7_16: # %else20
551; NOGATHER-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
552; NOGATHER-NEXT:    vpslld $31, %xmm3, %xmm3
553; NOGATHER-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
554; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
555; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
556; NOGATHER-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
557; NOGATHER-NEXT:    retq
558entry:
559  %ld  = load <8 x float*>, <8 x float*>* %ptr
560  %res = call <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro)
561  ret <8 x float> %res
562}
563
564declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro)
565
566define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i64> %passthro) {
567; X86-LABEL: masked_gather_v4i64:
568; X86:       # %bb.0: # %entry
569; X86-NEXT:    vpslld $31, %xmm0, %xmm0
570; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
571; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
572; X86-NEXT:    vmovdqa (%eax), %xmm2
573; X86-NEXT:    vpgatherdq %ymm0, (,%xmm2), %ymm1
574; X86-NEXT:    vmovdqa %ymm1, %ymm0
575; X86-NEXT:    retl
576;
577; X64-LABEL: masked_gather_v4i64:
578; X64:       # %bb.0: # %entry
579; X64-NEXT:    vpslld $31, %xmm0, %xmm0
580; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
581; X64-NEXT:    vmovdqa (%rdi), %ymm2
582; X64-NEXT:    vpgatherqq %ymm0, (,%ymm2), %ymm1
583; X64-NEXT:    vmovdqa %ymm1, %ymm0
584; X64-NEXT:    retq
585;
586; NOGATHER-LABEL: masked_gather_v4i64:
587; NOGATHER:       # %bb.0: # %entry
588; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm3
589; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
590; NOGATHER-NEXT:    # implicit-def: $ymm2
591; NOGATHER-NEXT:    testb $1, %al
592; NOGATHER-NEXT:    je .LBB8_2
593; NOGATHER-NEXT:  # %bb.1: # %cond.load
594; NOGATHER-NEXT:    vmovq %xmm3, %rax
595; NOGATHER-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
596; NOGATHER-NEXT:  .LBB8_2: # %else
597; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
598; NOGATHER-NEXT:    testb $1, %al
599; NOGATHER-NEXT:    je .LBB8_4
600; NOGATHER-NEXT:  # %bb.3: # %cond.load1
601; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
602; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm2, %xmm4
603; NOGATHER-NEXT:    vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
604; NOGATHER-NEXT:  .LBB8_4: # %else2
605; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
606; NOGATHER-NEXT:    testb $1, %al
607; NOGATHER-NEXT:    je .LBB8_6
608; NOGATHER-NEXT:  # %bb.5: # %cond.load4
609; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
610; NOGATHER-NEXT:    vmovq %xmm4, %rax
611; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
612; NOGATHER-NEXT:    vpinsrq $0, (%rax), %xmm4, %xmm4
613; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
614; NOGATHER-NEXT:  .LBB8_6: # %else5
615; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
616; NOGATHER-NEXT:    testb $1, %al
617; NOGATHER-NEXT:    je .LBB8_8
618; NOGATHER-NEXT:  # %bb.7: # %cond.load7
619; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
620; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
621; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
622; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm3, %xmm3
623; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
624; NOGATHER-NEXT:  .LBB8_8: # %else8
625; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
626; NOGATHER-NEXT:    vpsrad $31, %xmm0, %xmm0
627; NOGATHER-NEXT:    vpmovsxdq %xmm0, %xmm3
628; NOGATHER-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
629; NOGATHER-NEXT:    vpmovsxdq %xmm0, %xmm0
630; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
631; NOGATHER-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
632; NOGATHER-NEXT:    retq
633entry:
634  %ld  = load <4 x i64*>, <4 x i64*>* %ptr
635  %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro)
636  ret <4 x i64> %res
637}
638
639declare <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro)
640
641define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks, <4 x double> %passthro) {
642; X86-LABEL: masked_gather_v4double:
643; X86:       # %bb.0: # %entry
644; X86-NEXT:    vpslld $31, %xmm0, %xmm0
645; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
646; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
647; X86-NEXT:    vmovapd (%eax), %xmm2
648; X86-NEXT:    vgatherdpd %ymm0, (,%xmm2), %ymm1
649; X86-NEXT:    vmovapd %ymm1, %ymm0
650; X86-NEXT:    retl
651;
652; X64-LABEL: masked_gather_v4double:
653; X64:       # %bb.0: # %entry
654; X64-NEXT:    vpslld $31, %xmm0, %xmm0
655; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
656; X64-NEXT:    vmovapd (%rdi), %ymm2
657; X64-NEXT:    vgatherqpd %ymm0, (,%ymm2), %ymm1
658; X64-NEXT:    vmovapd %ymm1, %ymm0
659; X64-NEXT:    retq
660;
661; NOGATHER-LABEL: masked_gather_v4double:
662; NOGATHER:       # %bb.0: # %entry
663; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm3
664; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
665; NOGATHER-NEXT:    # implicit-def: $ymm2
666; NOGATHER-NEXT:    testb $1, %al
667; NOGATHER-NEXT:    je .LBB9_2
668; NOGATHER-NEXT:  # %bb.1: # %cond.load
669; NOGATHER-NEXT:    vmovq %xmm3, %rax
670; NOGATHER-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
671; NOGATHER-NEXT:  .LBB9_2: # %else
672; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
673; NOGATHER-NEXT:    testb $1, %al
674; NOGATHER-NEXT:    je .LBB9_4
675; NOGATHER-NEXT:  # %bb.3: # %cond.load1
676; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
677; NOGATHER-NEXT:    vmovhpd {{.*#+}} xmm4 = xmm2[0],mem[0]
678; NOGATHER-NEXT:    vblendpd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3]
679; NOGATHER-NEXT:  .LBB9_4: # %else2
680; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
681; NOGATHER-NEXT:    testb $1, %al
682; NOGATHER-NEXT:    je .LBB9_6
683; NOGATHER-NEXT:  # %bb.5: # %cond.load4
684; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
685; NOGATHER-NEXT:    vmovq %xmm4, %rax
686; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm4
687; NOGATHER-NEXT:    vmovlpd {{.*#+}} xmm4 = mem[0],xmm4[1]
688; NOGATHER-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
689; NOGATHER-NEXT:  .LBB9_6: # %else5
690; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
691; NOGATHER-NEXT:    testb $1, %al
692; NOGATHER-NEXT:    je .LBB9_8
693; NOGATHER-NEXT:  # %bb.7: # %cond.load7
694; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
695; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
696; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
697; NOGATHER-NEXT:    vmovhpd {{.*#+}} xmm3 = xmm3[0],mem[0]
698; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
699; NOGATHER-NEXT:  .LBB9_8: # %else8
700; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
701; NOGATHER-NEXT:    vpsrad $31, %xmm0, %xmm0
702; NOGATHER-NEXT:    vpmovsxdq %xmm0, %xmm3
703; NOGATHER-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
704; NOGATHER-NEXT:    vpmovsxdq %xmm0, %xmm0
705; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
706; NOGATHER-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
707; NOGATHER-NEXT:    retq
708entry:
709  %ld  = load <4 x double*>, <4 x double*>* %ptr
710  %res = call <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro)
711  ret <4 x double> %res
712}
713
714declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro)
715
716define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i64> %passthro) {
717; X86-LABEL: masked_gather_v2i64:
718; X86:       # %bb.0: # %entry
719; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
720; X86-NEXT:    vpmovsxdq (%eax), %xmm2
721; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
722; X86-NEXT:    vpgatherqq %xmm0, (,%xmm2), %xmm1
723; X86-NEXT:    vmovdqa %xmm1, %xmm0
724; X86-NEXT:    retl
725;
726; X64-LABEL: masked_gather_v2i64:
727; X64:       # %bb.0: # %entry
728; X64-NEXT:    vpsllq $63, %xmm0, %xmm0
729; X64-NEXT:    vmovdqa (%rdi), %xmm2
730; X64-NEXT:    vpgatherqq %xmm0, (,%xmm2), %xmm1
731; X64-NEXT:    vmovdqa %xmm1, %xmm0
732; X64-NEXT:    retq
733;
734; NOGATHER-LABEL: masked_gather_v2i64:
735; NOGATHER:       # %bb.0: # %entry
736; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
737; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
738; NOGATHER-NEXT:    # implicit-def: $xmm2
739; NOGATHER-NEXT:    testb $1, %al
740; NOGATHER-NEXT:    je .LBB10_2
741; NOGATHER-NEXT:  # %bb.1: # %cond.load
742; NOGATHER-NEXT:    vmovq %xmm3, %rax
743; NOGATHER-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
744; NOGATHER-NEXT:  .LBB10_2: # %else
745; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
746; NOGATHER-NEXT:    testb $1, %al
747; NOGATHER-NEXT:    je .LBB10_4
748; NOGATHER-NEXT:  # %bb.3: # %cond.load1
749; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
750; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm2, %xmm2
751; NOGATHER-NEXT:  .LBB10_4: # %else2
752; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
753; NOGATHER-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
754; NOGATHER-NEXT:    retq
755entry:
756  %ld  = load <2 x i64*>, <2 x i64*>* %ptr
757  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro)
758  ret <2 x i64> %res
759}
760
761declare <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro)
762
763define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks, <2 x double> %passthro) {
764; X86-LABEL: masked_gather_v2double:
765; X86:       # %bb.0: # %entry
766; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
767; X86-NEXT:    vpmovsxdq (%eax), %xmm2
768; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
769; X86-NEXT:    vgatherqpd %xmm0, (,%xmm2), %xmm1
770; X86-NEXT:    vmovapd %xmm1, %xmm0
771; X86-NEXT:    retl
772;
773; X64-LABEL: masked_gather_v2double:
774; X64:       # %bb.0: # %entry
775; X64-NEXT:    vpsllq $63, %xmm0, %xmm0
776; X64-NEXT:    vmovapd (%rdi), %xmm2
777; X64-NEXT:    vgatherqpd %xmm0, (,%xmm2), %xmm1
778; X64-NEXT:    vmovapd %xmm1, %xmm0
779; X64-NEXT:    retq
780;
781; NOGATHER-LABEL: masked_gather_v2double:
782; NOGATHER:       # %bb.0: # %entry
783; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm3
784; NOGATHER-NEXT:    vpextrb $0, %xmm0, %eax
785; NOGATHER-NEXT:    # implicit-def: $xmm2
786; NOGATHER-NEXT:    testb $1, %al
787; NOGATHER-NEXT:    je .LBB11_2
788; NOGATHER-NEXT:  # %bb.1: # %cond.load
789; NOGATHER-NEXT:    vmovq %xmm3, %rax
790; NOGATHER-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
791; NOGATHER-NEXT:  .LBB11_2: # %else
792; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
793; NOGATHER-NEXT:    testb $1, %al
794; NOGATHER-NEXT:    je .LBB11_4
795; NOGATHER-NEXT:  # %bb.3: # %cond.load1
796; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
797; NOGATHER-NEXT:    vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0]
798; NOGATHER-NEXT:  .LBB11_4: # %else2
799; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
800; NOGATHER-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
801; NOGATHER-NEXT:    retq
802entry:
803  %ld  = load <2 x double*>, <2 x double*>* %ptr
804  %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro)
805  ret <2 x double> %res
806}
807
808