• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
3; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
5
6target triple = "x86_64-unknown-unknown"
7
8define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
9; AVX1-LABEL: shuffle_v4f64_0000:
10; AVX1:       # BB#0:
11; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
12; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
13; AVX1-NEXT:    retq
14;
15; AVX2-LABEL: shuffle_v4f64_0000:
16; AVX2:       # BB#0:
17; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
18; AVX2-NEXT:    retq
19;
20; AVX512VL-LABEL: shuffle_v4f64_0000:
21; AVX512VL:       # BB#0:
22; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
23; AVX512VL-NEXT:    retq
24  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
25  ret <4 x double> %shuffle
26}
27
28define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
29; AVX1-LABEL: shuffle_v4f64_0001:
30; AVX1:       # BB#0:
31; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
32; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
33; AVX1-NEXT:    retq
34;
35; AVX2-LABEL: shuffle_v4f64_0001:
36; AVX2:       # BB#0:
37; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
38; AVX2-NEXT:    retq
39;
40; AVX512VL-LABEL: shuffle_v4f64_0001:
41; AVX512VL:       # BB#0:
42; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
43; AVX512VL-NEXT:    retq
44  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
45  ret <4 x double> %shuffle
46}
47
48define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
49; AVX1-LABEL: shuffle_v4f64_0020:
50; AVX1:       # BB#0:
51; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
52; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
53; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
54; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
55; AVX1-NEXT:    retq
56;
57; AVX2-LABEL: shuffle_v4f64_0020:
58; AVX2:       # BB#0:
59; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
60; AVX2-NEXT:    retq
61;
62; AVX512VL-LABEL: shuffle_v4f64_0020:
63; AVX512VL:       # BB#0:
64; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
65; AVX512VL-NEXT:    retq
66  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
67  ret <4 x double> %shuffle
68}
69
70define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
71; AVX1-LABEL: shuffle_v4f64_0300:
72; AVX1:       # BB#0:
73; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
74; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
75; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
76; AVX1-NEXT:    retq
77;
78; AVX2-LABEL: shuffle_v4f64_0300:
79; AVX2:       # BB#0:
80; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
81; AVX2-NEXT:    retq
82;
83; AVX512VL-LABEL: shuffle_v4f64_0300:
84; AVX512VL:       # BB#0:
85; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
86; AVX512VL-NEXT:    retq
87  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
88  ret <4 x double> %shuffle
89}
90
91define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
92; AVX1-LABEL: shuffle_v4f64_1000:
93; AVX1:       # BB#0:
94; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
95; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
96; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
97; AVX1-NEXT:    retq
98;
99; AVX2-LABEL: shuffle_v4f64_1000:
100; AVX2:       # BB#0:
101; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
102; AVX2-NEXT:    retq
103;
104; AVX512VL-LABEL: shuffle_v4f64_1000:
105; AVX512VL:       # BB#0:
106; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
107; AVX512VL-NEXT:    retq
108  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
109  ret <4 x double> %shuffle
110}
111
112define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
113; AVX1-LABEL: shuffle_v4f64_2200:
114; AVX1:       # BB#0:
115; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
116; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
117; AVX1-NEXT:    retq
118;
119; AVX2-LABEL: shuffle_v4f64_2200:
120; AVX2:       # BB#0:
121; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
122; AVX2-NEXT:    retq
123;
124; AVX512VL-LABEL: shuffle_v4f64_2200:
125; AVX512VL:       # BB#0:
126; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
127; AVX512VL-NEXT:    retq
128  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
129  ret <4 x double> %shuffle
130}
131
132define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
133; AVX1-LABEL: shuffle_v4f64_3330:
134; AVX1:       # BB#0:
135; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
136; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
137; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
138; AVX1-NEXT:    retq
139;
140; AVX2-LABEL: shuffle_v4f64_3330:
141; AVX2:       # BB#0:
142; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
143; AVX2-NEXT:    retq
144;
145; AVX512VL-LABEL: shuffle_v4f64_3330:
146; AVX512VL:       # BB#0:
147; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
148; AVX512VL-NEXT:    retq
149  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
150  ret <4 x double> %shuffle
151}
152
153define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
154; AVX1-LABEL: shuffle_v4f64_3210:
155; AVX1:       # BB#0:
156; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
157; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
158; AVX1-NEXT:    retq
159;
160; AVX2-LABEL: shuffle_v4f64_3210:
161; AVX2:       # BB#0:
162; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
163; AVX2-NEXT:    retq
164;
165; AVX512VL-LABEL: shuffle_v4f64_3210:
166; AVX512VL:       # BB#0:
167; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
168; AVX512VL-NEXT:    retq
169  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
170  ret <4 x double> %shuffle
171}
172
173define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
174; ALL-LABEL: shuffle_v4f64_0023:
175; ALL:       # BB#0:
176; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
177; ALL-NEXT:    retq
178
179  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
180  ret <4 x double> %shuffle
181}
182
183define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
184; ALL-LABEL: shuffle_v4f64_0022:
185; ALL:       # BB#0:
186; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
187; ALL-NEXT:    retq
188  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
189  ret <4 x double> %shuffle
190}
191
192define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) {
193; ALL-LABEL: shuffle_v4f64mem_0022:
194; ALL:       # BB#0:
195; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
196; ALL-NEXT:    retq
197  %a = load  <4 x double>,  <4 x double>* %ptr
198  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
199  ret <4 x double> %shuffle
200}
201
202define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
203; ALL-LABEL: shuffle_v4f64_1032:
204; ALL:       # BB#0:
205; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
206; ALL-NEXT:    retq
207  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
208  ret <4 x double> %shuffle
209}
210
211define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
212; ALL-LABEL: shuffle_v4f64_1133:
213; ALL:       # BB#0:
214; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
215; ALL-NEXT:    retq
216  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
217  ret <4 x double> %shuffle
218}
219
220define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
221; ALL-LABEL: shuffle_v4f64_1023:
222; ALL:       # BB#0:
223; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
224; ALL-NEXT:    retq
225  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
226  ret <4 x double> %shuffle
227}
228
229define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
230; ALL-LABEL: shuffle_v4f64_1022:
231; ALL:       # BB#0:
232; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
233; ALL-NEXT:    retq
234  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
235  ret <4 x double> %shuffle
236}
237
238define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
239; ALL-LABEL: shuffle_v4f64_0423:
240; ALL:       # BB#0:
241; ALL-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
242; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
243; ALL-NEXT:    retq
244  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
245  ret <4 x double> %shuffle
246}
247
248define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
249; ALL-LABEL: shuffle_v4f64_0462:
250; ALL:       # BB#0:
251; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
252; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
253; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
254; ALL-NEXT:    retq
255  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
256  ret <4 x double> %shuffle
257}
258
259define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
260; ALL-LABEL: shuffle_v4f64_0426:
261; ALL:       # BB#0:
262; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
263; ALL-NEXT:    retq
264  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
265  ret <4 x double> %shuffle
266}
267
268define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
269; ALL-LABEL: shuffle_v4f64_1537:
270; ALL:       # BB#0:
271; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
272; ALL-NEXT:    retq
273  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
274  ret <4 x double> %shuffle
275}
276
277define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
278; ALL-LABEL: shuffle_v4f64_4062:
279; ALL:       # BB#0:
280; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
281; ALL-NEXT:    retq
282  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
283  ret <4 x double> %shuffle
284}
285
286define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
287; ALL-LABEL: shuffle_v4f64_5173:
288; ALL:       # BB#0:
289; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
290; ALL-NEXT:    retq
291  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
292  ret <4 x double> %shuffle
293}
294
295define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
296; ALL-LABEL: shuffle_v4f64_5163:
297; ALL:       # BB#0:
298; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
299; ALL-NEXT:    retq
300  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
301  ret <4 x double> %shuffle
302}
303
304define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
305; ALL-LABEL: shuffle_v4f64_0527:
306; ALL:       # BB#0:
307; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
308; ALL-NEXT:    retq
309  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
310  ret <4 x double> %shuffle
311}
312
313define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
314; ALL-LABEL: shuffle_v4f64_4163:
315; ALL:       # BB#0:
316; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
317; ALL-NEXT:    retq
318  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
319  ret <4 x double> %shuffle
320}
321
322define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
323; AVX1-LABEL: shuffle_v4f64_0145:
324; AVX1:       # BB#0:
325; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
326; AVX1-NEXT:    retq
327;
328; AVX2-LABEL: shuffle_v4f64_0145:
329; AVX2:       # BB#0:
330; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
331; AVX2-NEXT:    retq
332;
333; AVX512VL-LABEL: shuffle_v4f64_0145:
334; AVX512VL:       # BB#0:
335; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
336; AVX512VL-NEXT:    retq
337  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
338  ret <4 x double> %shuffle
339}
340
341define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
342; AVX1-LABEL: shuffle_v4f64_4501:
343; AVX1:       # BB#0:
344; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
345; AVX1-NEXT:    retq
346;
347; AVX2-LABEL: shuffle_v4f64_4501:
348; AVX2:       # BB#0:
349; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
350; AVX2-NEXT:    retq
351;
352; AVX512VL-LABEL: shuffle_v4f64_4501:
353; AVX512VL:       # BB#0:
354; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
355; AVX512VL-NEXT:    retq
356  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
357  ret <4 x double> %shuffle
358}
359
360define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
361; ALL-LABEL: shuffle_v4f64_0167:
362; ALL:       # BB#0:
363; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
364; ALL-NEXT:    retq
365  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
366  ret <4 x double> %shuffle
367}
368
369define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
370; AVX1-LABEL: shuffle_v4f64_1054:
371; AVX1:       # BB#0:
372; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
373; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
374; AVX1-NEXT:    retq
375;
376; AVX2-LABEL: shuffle_v4f64_1054:
377; AVX2:       # BB#0:
378; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
379; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
380; AVX2-NEXT:    retq
381;
382; AVX512VL-LABEL: shuffle_v4f64_1054:
383; AVX512VL:       # BB#0:
384; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
385; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
386; AVX512VL-NEXT:    retq
387  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
388  ret <4 x double> %shuffle
389}
390
391define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
392; AVX1-LABEL: shuffle_v4f64_3254:
393; AVX1:       # BB#0:
394; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
395; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
396; AVX1-NEXT:    retq
397;
398; AVX2-LABEL: shuffle_v4f64_3254:
399; AVX2:       # BB#0:
400; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
401; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
402; AVX2-NEXT:    retq
403;
404; AVX512VL-LABEL: shuffle_v4f64_3254:
405; AVX512VL:       # BB#0:
406; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
407; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
408; AVX512VL-NEXT:    retq
409  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
410  ret <4 x double> %shuffle
411}
412
413define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
414; AVX1-LABEL: shuffle_v4f64_3276:
415; AVX1:       # BB#0:
416; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
417; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
418; AVX1-NEXT:    retq
419;
420; AVX2-LABEL: shuffle_v4f64_3276:
421; AVX2:       # BB#0:
422; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
423; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
424; AVX2-NEXT:    retq
425;
426; AVX512VL-LABEL: shuffle_v4f64_3276:
427; AVX512VL:       # BB#0:
428; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
429; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
430; AVX512VL-NEXT:    retq
431  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
432  ret <4 x double> %shuffle
433}
434
435define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
436; ALL-LABEL: shuffle_v4f64_1076:
437; ALL:       # BB#0:
438; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
439; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
440; ALL-NEXT:    retq
441  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
442  ret <4 x double> %shuffle
443}
444
445define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
446; AVX1-LABEL: shuffle_v4f64_0415:
447; AVX1:       # BB#0:
448; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
449; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
450; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
451; AVX1-NEXT:    retq
452;
453; AVX2-LABEL: shuffle_v4f64_0415:
454; AVX2:       # BB#0:
455; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
456; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
457; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
458; AVX2-NEXT:    retq
459;
460; AVX512VL-LABEL: shuffle_v4f64_0415:
461; AVX512VL:       # BB#0:
462; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
463; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
464; AVX512VL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
465; AVX512VL-NEXT:    retq
466  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
467  ret <4 x double> %shuffle
468}
469
470define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
471; ALL-LABEL: shuffle_v4f64_u062:
472; ALL:       # BB#0:
473; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
474; ALL-NEXT:    retq
475  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>
476  ret <4 x double> %shuffle
477}
478
479define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) {
480; ALL-LABEL: shuffle_v4f64_15uu:
481; ALL:       # BB#0:
482; ALL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
483; ALL-NEXT:    retq
484  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
485  ret <4 x double> %shuffle
486}
487
488define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
489; ALL-LABEL: shuffle_v4f64_11uu:
490; ALL:       # BB#0:
491; ALL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
492; ALL-NEXT:    retq
493  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
494  ret <4 x double> %shuffle
495}
496
497define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
498; AVX1-LABEL: shuffle_v4f64_22uu:
499; AVX1:       # BB#0:
500; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
501; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
502; AVX1-NEXT:    retq
503;
504; AVX2-LABEL: shuffle_v4f64_22uu:
505; AVX2:       # BB#0:
506; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
507; AVX2-NEXT:    retq
508;
509; AVX512VL-LABEL: shuffle_v4f64_22uu:
510; AVX512VL:       # BB#0:
511; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
512; AVX512VL-NEXT:    retq
513  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
514  ret <4 x double> %shuffle
515}
516
517define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) {
518; AVX1-LABEL: shuffle_v4f64_3333:
519; AVX1:       # BB#0:
520; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
521; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
522; AVX1-NEXT:    retq
523;
524; AVX2-LABEL: shuffle_v4f64_3333:
525; AVX2:       # BB#0:
526; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
527; AVX2-NEXT:    retq
528;
529; AVX512VL-LABEL: shuffle_v4f64_3333:
530; AVX512VL:       # BB#0:
531; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
532; AVX512VL-NEXT:    retq
533  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
534  ret <4 x double> %shuffle
535}
536
537define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
538; AVX1-LABEL: shuffle_v4i64_0000:
539; AVX1:       # BB#0:
540; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
541; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
542; AVX1-NEXT:    retq
543;
544; AVX2-LABEL: shuffle_v4i64_0000:
545; AVX2:       # BB#0:
546; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
547; AVX2-NEXT:    retq
548;
549; AVX512VL-LABEL: shuffle_v4i64_0000:
550; AVX512VL:       # BB#0:
551; AVX512VL-NEXT:    vpbroadcastq %xmm0, %ymm0
552; AVX512VL-NEXT:    retq
553  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
554  ret <4 x i64> %shuffle
555}
556
557define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
558; AVX1-LABEL: shuffle_v4i64_0001:
559; AVX1:       # BB#0:
560; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
561; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
562; AVX1-NEXT:    retq
563;
564; AVX2-LABEL: shuffle_v4i64_0001:
565; AVX2:       # BB#0:
566; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
567; AVX2-NEXT:    retq
568;
569; AVX512VL-LABEL: shuffle_v4i64_0001:
570; AVX512VL:       # BB#0:
571; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
572; AVX512VL-NEXT:    retq
573  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
574  ret <4 x i64> %shuffle
575}
576
577define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
578; AVX1-LABEL: shuffle_v4i64_0020:
579; AVX1:       # BB#0:
580; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
581; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
582; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
583; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
584; AVX1-NEXT:    retq
585;
586; AVX2-LABEL: shuffle_v4i64_0020:
587; AVX2:       # BB#0:
588; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
589; AVX2-NEXT:    retq
590;
591; AVX512VL-LABEL: shuffle_v4i64_0020:
592; AVX512VL:       # BB#0:
593; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
594; AVX512VL-NEXT:    retq
595  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
596  ret <4 x i64> %shuffle
597}
598
599define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
600; AVX1-LABEL: shuffle_v4i64_0112:
601; AVX1:       # BB#0:
602; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
603; AVX1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
604; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
605; AVX1-NEXT:    retq
606;
607; AVX2-LABEL: shuffle_v4i64_0112:
608; AVX2:       # BB#0:
609; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
610; AVX2-NEXT:    retq
611;
612; AVX512VL-LABEL: shuffle_v4i64_0112:
613; AVX512VL:       # BB#0:
614; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
615; AVX512VL-NEXT:    retq
616  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
617  ret <4 x i64> %shuffle
618}
619
620define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
621; AVX1-LABEL: shuffle_v4i64_0300:
622; AVX1:       # BB#0:
623; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
624; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
625; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
626; AVX1-NEXT:    retq
627;
628; AVX2-LABEL: shuffle_v4i64_0300:
629; AVX2:       # BB#0:
630; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
631; AVX2-NEXT:    retq
632;
633; AVX512VL-LABEL: shuffle_v4i64_0300:
634; AVX512VL:       # BB#0:
635; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
636; AVX512VL-NEXT:    retq
637  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
638  ret <4 x i64> %shuffle
639}
640
641define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
642; AVX1-LABEL: shuffle_v4i64_1000:
643; AVX1:       # BB#0:
644; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
645; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
646; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
647; AVX1-NEXT:    retq
648;
649; AVX2-LABEL: shuffle_v4i64_1000:
650; AVX2:       # BB#0:
651; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
652; AVX2-NEXT:    retq
653;
654; AVX512VL-LABEL: shuffle_v4i64_1000:
655; AVX512VL:       # BB#0:
656; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
657; AVX512VL-NEXT:    retq
658  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
659  ret <4 x i64> %shuffle
660}
661
662define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
663; AVX1-LABEL: shuffle_v4i64_2200:
664; AVX1:       # BB#0:
665; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
666; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
667; AVX1-NEXT:    retq
668;
669; AVX2-LABEL: shuffle_v4i64_2200:
670; AVX2:       # BB#0:
671; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
672; AVX2-NEXT:    retq
673;
674; AVX512VL-LABEL: shuffle_v4i64_2200:
675; AVX512VL:       # BB#0:
676; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
677; AVX512VL-NEXT:    retq
678  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
679  ret <4 x i64> %shuffle
680}
681
682define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
683; AVX1-LABEL: shuffle_v4i64_3330:
684; AVX1:       # BB#0:
685; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
686; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
687; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
688; AVX1-NEXT:    retq
689;
690; AVX2-LABEL: shuffle_v4i64_3330:
691; AVX2:       # BB#0:
692; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
693; AVX2-NEXT:    retq
694;
695; AVX512VL-LABEL: shuffle_v4i64_3330:
696; AVX512VL:       # BB#0:
697; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
698; AVX512VL-NEXT:    retq
699  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
700  ret <4 x i64> %shuffle
701}
702
703define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
704; AVX1-LABEL: shuffle_v4i64_3210:
705; AVX1:       # BB#0:
706; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
707; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
708; AVX1-NEXT:    retq
709;
710; AVX2-LABEL: shuffle_v4i64_3210:
711; AVX2:       # BB#0:
712; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
713; AVX2-NEXT:    retq
714;
715; AVX512VL-LABEL: shuffle_v4i64_3210:
716; AVX512VL:       # BB#0:
717; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
718; AVX512VL-NEXT:    retq
719  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
720  ret <4 x i64> %shuffle
721}
722
723define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
724; AVX1-LABEL: shuffle_v4i64_0124:
725; AVX1:       # BB#0:
726; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
727; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
728; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
729; AVX1-NEXT:    retq
730;
731; AVX2-LABEL: shuffle_v4i64_0124:
732; AVX2:       # BB#0:
733; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
734; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
735; AVX2-NEXT:    retq
736;
737; AVX512VL-LABEL: shuffle_v4i64_0124:
738; AVX512VL:       # BB#0:
739; AVX512VL-NEXT:    vpbroadcastq %xmm1, %ymm1
740; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
741; AVX512VL-NEXT:    retq
742  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
743  ret <4 x i64> %shuffle
744}
745
746define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
747; AVX1-LABEL: shuffle_v4i64_0142:
748; AVX1:       # BB#0:
749; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
750; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
751; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
752; AVX1-NEXT:    retq
753;
754; AVX2-LABEL: shuffle_v4i64_0142:
755; AVX2:       # BB#0:
756; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
757; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
758; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
759; AVX2-NEXT:    retq
760;
761; AVX512VL-LABEL: shuffle_v4i64_0142:
762; AVX512VL:       # BB#0:
763; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm1
764; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
765; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
766; AVX512VL-NEXT:    retq
767  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
768  ret <4 x i64> %shuffle
769}
770
771define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
772; AVX1-LABEL: shuffle_v4i64_0412:
773; AVX1:       # BB#0:
774; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
775; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
776; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
777; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
778; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
779; AVX1-NEXT:    retq
780;
781; AVX2-LABEL: shuffle_v4i64_0412:
782; AVX2:       # BB#0:
783; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
784; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
785; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
786; AVX2-NEXT:    retq
787;
788; AVX512VL-LABEL: shuffle_v4i64_0412:
789; AVX512VL:       # BB#0:
790; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm1
791; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
792; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
793; AVX512VL-NEXT:    retq
794  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
795  ret <4 x i64> %shuffle
796}
797
798define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
799; AVX1-LABEL: shuffle_v4i64_4012:
800; AVX1:       # BB#0:
801; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
802; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
803; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
804; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
805; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
806; AVX1-NEXT:    retq
807;
808; AVX2-LABEL: shuffle_v4i64_4012:
809; AVX2:       # BB#0:
810; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
811; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
812; AVX2-NEXT:    retq
813;
814; AVX512VL-LABEL: shuffle_v4i64_4012:
815; AVX512VL:       # BB#0:
816; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
817; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
818; AVX512VL-NEXT:    retq
819  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
820  ret <4 x i64> %shuffle
821}
822
823define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
824; AVX1-LABEL: shuffle_v4i64_0145:
825; AVX1:       # BB#0:
826; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
827; AVX1-NEXT:    retq
828;
829; AVX2-LABEL: shuffle_v4i64_0145:
830; AVX2:       # BB#0:
831; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
832; AVX2-NEXT:    retq
833;
834; AVX512VL-LABEL: shuffle_v4i64_0145:
835; AVX512VL:       # BB#0:
836; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
837; AVX512VL-NEXT:    retq
838  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
839  ret <4 x i64> %shuffle
840}
841
842define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
843; AVX1-LABEL: shuffle_v4i64_0451:
844; AVX1:       # BB#0:
845; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
846; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
847; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
848; AVX1-NEXT:    retq
849;
850; AVX2-LABEL: shuffle_v4i64_0451:
851; AVX2:       # BB#0:
852; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
853; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
854; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
855; AVX2-NEXT:    retq
856;
857; AVX512VL-LABEL: shuffle_v4i64_0451:
858; AVX512VL:       # BB#0:
859; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
860; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
861; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
862; AVX512VL-NEXT:    retq
863  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
864  ret <4 x i64> %shuffle
865}
866
867define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
868; AVX1-LABEL: shuffle_v4i64_4501:
869; AVX1:       # BB#0:
870; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
871; AVX1-NEXT:    retq
872;
873; AVX2-LABEL: shuffle_v4i64_4501:
874; AVX2:       # BB#0:
875; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
876; AVX2-NEXT:    retq
877;
878; AVX512VL-LABEL: shuffle_v4i64_4501:
879; AVX512VL:       # BB#0:
880; AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm1, %ymm0
881; AVX512VL-NEXT:    retq
882  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
883  ret <4 x i64> %shuffle
884}
885
886define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
887; AVX1-LABEL: shuffle_v4i64_4015:
888; AVX1:       # BB#0:
889; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
890; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
891; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
892; AVX1-NEXT:    retq
893;
894; AVX2-LABEL: shuffle_v4i64_4015:
895; AVX2:       # BB#0:
896; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
897; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
898; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
899; AVX2-NEXT:    retq
900;
901; AVX512VL-LABEL: shuffle_v4i64_4015:
902; AVX512VL:       # BB#0:
903; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
904; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
905; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
906; AVX512VL-NEXT:    retq
907  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
908  ret <4 x i64> %shuffle
909}
910
911define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
912; AVX1-LABEL: shuffle_v4i64_2u35:
913; AVX1:       # BB#0:
914; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
915; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1]
916; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
917; AVX1-NEXT:    retq
918;
919; AVX2-LABEL: shuffle_v4i64_2u35:
920; AVX2:       # BB#0:
921; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
922; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
923; AVX2-NEXT:    retq
924;
925; AVX512VL-LABEL: shuffle_v4i64_2u35:
926; AVX512VL:       # BB#0:
927; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
928; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
929; AVX512VL-NEXT:    retq
930  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
931  ret <4 x i64> %shuffle
932}
933
934define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
935; AVX1-LABEL: shuffle_v4i64_1251:
936; AVX1:       # BB#0:
937; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
938; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
939; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
940; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
941; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
942; AVX1-NEXT:    retq
943;
944; AVX2-LABEL: shuffle_v4i64_1251:
945; AVX2:       # BB#0:
946; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
947; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
948; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
949; AVX2-NEXT:    retq
950;
951; AVX512VL-LABEL: shuffle_v4i64_1251:
952; AVX512VL:       # BB#0:
953; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
954; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
955; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
956; AVX512VL-NEXT:    retq
957  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
958  ret <4 x i64> %shuffle
959}
960
961define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
962; AVX1-LABEL: shuffle_v4i64_1054:
963; AVX1:       # BB#0:
964; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
965; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
966; AVX1-NEXT:    retq
967;
968; AVX2-LABEL: shuffle_v4i64_1054:
969; AVX2:       # BB#0:
970; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
971; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
972; AVX2-NEXT:    retq
973;
974; AVX512VL-LABEL: shuffle_v4i64_1054:
975; AVX512VL:       # BB#0:
976; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
977; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
978; AVX512VL-NEXT:    retq
979  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
980  ret <4 x i64> %shuffle
981}
982
983define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
984; AVX1-LABEL: shuffle_v4i64_3254:
985; AVX1:       # BB#0:
986; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
987; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
988; AVX1-NEXT:    retq
989;
990; AVX2-LABEL: shuffle_v4i64_3254:
991; AVX2:       # BB#0:
992; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
993; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
994; AVX2-NEXT:    retq
995;
996; AVX512VL-LABEL: shuffle_v4i64_3254:
997; AVX512VL:       # BB#0:
998; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
999; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1000; AVX512VL-NEXT:    retq
1001  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
1002  ret <4 x i64> %shuffle
1003}
1004
1005define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
1006; AVX1-LABEL: shuffle_v4i64_3276:
1007; AVX1:       # BB#0:
1008; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1009; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1010; AVX1-NEXT:    retq
1011;
1012; AVX2-LABEL: shuffle_v4i64_3276:
1013; AVX2:       # BB#0:
1014; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1015; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1016; AVX2-NEXT:    retq
1017;
1018; AVX512VL-LABEL: shuffle_v4i64_3276:
1019; AVX512VL:       # BB#0:
1020; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1021; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1022; AVX512VL-NEXT:    retq
1023  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
1024  ret <4 x i64> %shuffle
1025}
1026
1027define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
1028; AVX1-LABEL: shuffle_v4i64_1076:
1029; AVX1:       # BB#0:
1030; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1031; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1032; AVX1-NEXT:    retq
1033;
1034; AVX2-LABEL: shuffle_v4i64_1076:
1035; AVX2:       # BB#0:
1036; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1037; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1038; AVX2-NEXT:    retq
1039;
1040; AVX512VL-LABEL: shuffle_v4i64_1076:
1041; AVX512VL:       # BB#0:
1042; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1043; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1044; AVX512VL-NEXT:    retq
1045  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
1046  ret <4 x i64> %shuffle
1047}
1048
1049define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
1050; AVX1-LABEL: shuffle_v4i64_0415:
1051; AVX1:       # BB#0:
1052; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
1053; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1054; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1055; AVX1-NEXT:    retq
1056;
1057; AVX2-LABEL: shuffle_v4i64_0415:
1058; AVX2:       # BB#0:
1059; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
1060; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1061; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1062; AVX2-NEXT:    retq
1063;
1064; AVX512VL-LABEL: shuffle_v4i64_0415:
1065; AVX512VL:       # BB#0:
1066; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
1067; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1068; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1069; AVX512VL-NEXT:    retq
1070  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1071  ret <4 x i64> %shuffle
1072}
1073
1074define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
1075; AVX1-LABEL: shuffle_v4i64_z4z6:
1076; AVX1:       # BB#0:
1077; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
1078; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1079; AVX1-NEXT:    retq
1080;
1081; AVX2-LABEL: shuffle_v4i64_z4z6:
1082; AVX2:       # BB#0:
1083; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
1084; AVX2-NEXT:    retq
1085;
1086; AVX512VL-LABEL: shuffle_v4i64_z4z6:
1087; AVX512VL:       # BB#0:
1088; AVX512VL-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
1089; AVX512VL-NEXT:    retq
1090  %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
1091  ret <4 x i64> %shuffle
1092}
1093
1094define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
1095; AVX1-LABEL: shuffle_v4i64_5zuz:
1096; AVX1:       # BB#0:
1097; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
1098; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1099; AVX1-NEXT:    retq
1100;
1101; AVX2-LABEL: shuffle_v4i64_5zuz:
1102; AVX2:       # BB#0:
1103; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1104; AVX2-NEXT:    retq
1105;
1106; AVX512VL-LABEL: shuffle_v4i64_5zuz:
1107; AVX512VL:       # BB#0:
1108; AVX512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1109; AVX512VL-NEXT:    retq
1110  %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0>
1111  ret <4 x i64> %shuffle
1112}
1113
1114define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
1115; AVX1-LABEL: shuffle_v4i64_40u2:
1116; AVX1:       # BB#0:
1117; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1118; AVX1-NEXT:    retq
1119;
1120; AVX2-LABEL: shuffle_v4i64_40u2:
1121; AVX2:       # BB#0:
1122; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1123; AVX2-NEXT:    retq
1124;
1125; AVX512VL-LABEL: shuffle_v4i64_40u2:
1126; AVX512VL:       # BB#0:
1127; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1128; AVX512VL-NEXT:    retq
1129  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
1130  ret <4 x i64> %shuffle
1131}
1132
1133define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
1134; ALL-LABEL: shuffle_v4i64_15uu:
1135; ALL:       # BB#0:
1136; ALL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1137; ALL-NEXT:    retq
1138  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
1139  ret <4 x i64> %shuffle
1140}
1141
1142define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
1143; ALL-LABEL: shuffle_v4i64_11uu:
1144; ALL:       # BB#0:
1145; ALL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1146; ALL-NEXT:    retq
1147  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
1148  ret <4 x i64> %shuffle
1149}
1150
1151define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {
1152; AVX1-LABEL: shuffle_v4i64_22uu:
1153; AVX1:       # BB#0:
1154; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1155; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1156; AVX1-NEXT:    retq
1157;
1158; AVX2-LABEL: shuffle_v4i64_22uu:
1159; AVX2:       # BB#0:
1160; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
1161; AVX2-NEXT:    retq
1162;
1163; AVX512VL-LABEL: shuffle_v4i64_22uu:
1164; AVX512VL:       # BB#0:
1165; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
1166; AVX512VL-NEXT:    retq
1167  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
1168  ret <4 x i64> %shuffle
1169}
1170
1171define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) {
1172; AVX1-LABEL: shuffle_v4i64_3333:
1173; AVX1:       # BB#0:
1174; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
1175; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1176; AVX1-NEXT:    retq
1177;
1178; AVX2-LABEL: shuffle_v4i64_3333:
1179; AVX2:       # BB#0:
1180; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
1181; AVX2-NEXT:    retq
1182;
1183; AVX512VL-LABEL: shuffle_v4i64_3333:
1184; AVX512VL:       # BB#0:
1185; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
1186; AVX512VL-NEXT:    retq
1187  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1188  ret <4 x i64> %shuffle
1189}
1190
1191define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
1192; ALL-LABEL: stress_test1:
1193; ALL:         retq
1194  %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
1195  %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
1196  %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
1197  %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
1198
1199  ret <4 x i64> %f
1200}
1201
1202define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
1203; ALL-LABEL: insert_reg_and_zero_v4i64:
1204; ALL:       # BB#0:
1205; ALL-NEXT:    vmovq %rdi, %xmm0
1206; ALL-NEXT:    retq
1207  %v = insertelement <4 x i64> undef, i64 %a, i64 0
1208  %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1209  ret <4 x i64> %shuffle
1210}
1211
1212define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
1213; ALL-LABEL: insert_mem_and_zero_v4i64:
1214; ALL:       # BB#0:
1215; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
1216; ALL-NEXT:    retq
1217  %a = load i64, i64* %ptr
1218  %v = insertelement <4 x i64> undef, i64 %a, i64 0
1219  %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1220  ret <4 x i64> %shuffle
1221}
1222
1223define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
1224; AVX1-LABEL: insert_reg_and_zero_v4f64:
1225; AVX1:       # BB#0:
1226; AVX1-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
1227; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
1228; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1229; AVX1-NEXT:    retq
1230;
1231; AVX2-LABEL: insert_reg_and_zero_v4f64:
1232; AVX2:       # BB#0:
1233; AVX2-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
1234; AVX2-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
1235; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1236; AVX2-NEXT:    retq
1237;
1238; AVX512VL-LABEL: insert_reg_and_zero_v4f64:
1239; AVX512VL:       # BB#0:
1240; AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1241; AVX512VL-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1242; AVX512VL-NEXT:    retq
1243  %v = insertelement <4 x double> undef, double %a, i32 0
1244  %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1245  ret <4 x double> %shuffle
1246}
1247
1248define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
1249; ALL-LABEL: insert_mem_and_zero_v4f64:
1250; ALL:       # BB#0:
1251; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1252; ALL-NEXT:    retq
1253  %a = load double, double* %ptr
1254  %v = insertelement <4 x double> undef, double %a, i32 0
1255  %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1256  ret <4 x double> %shuffle
1257}
1258
1259define <4 x double> @splat_mem_v4f64(double* %ptr) {
1260; ALL-LABEL: splat_mem_v4f64:
1261; ALL:       # BB#0:
1262; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1263; ALL-NEXT:    retq
1264  %a = load double, double* %ptr
1265  %v = insertelement <4 x double> undef, double %a, i32 0
1266  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1267  ret <4 x double> %shuffle
1268}
1269
1270define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
1271; AVX1-LABEL: splat_mem_v4i64:
1272; AVX1:       # BB#0:
1273; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
1274; AVX1-NEXT:    retq
1275;
1276; AVX2-LABEL: splat_mem_v4i64:
1277; AVX2:       # BB#0:
1278; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
1279; AVX2-NEXT:    retq
1280;
1281; AVX512VL-LABEL: splat_mem_v4i64:
1282; AVX512VL:       # BB#0:
1283; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
1284; AVX512VL-NEXT:    retq
1285  %a = load i64, i64* %ptr
1286  %v = insertelement <4 x i64> undef, i64 %a, i64 0
1287  %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1288  ret <4 x i64> %shuffle
1289}
1290
1291define <4 x double> @splat_mem_v4f64_2(double* %p) {
1292; ALL-LABEL: splat_mem_v4f64_2:
1293; ALL:       # BB#0:
1294; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1295; ALL-NEXT:    retq
1296  %1 = load double, double* %p
1297  %2 = insertelement <2 x double> undef, double %1, i32 0
1298  %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
1299  ret <4 x double> %3
1300}
1301
1302define <4 x double> @splat_v4f64(<2 x double> %r) {
1303; AVX1-LABEL: splat_v4f64:
1304; AVX1:       # BB#0:
1305; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1306; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1307; AVX1-NEXT:    retq
1308;
1309; AVX2-LABEL: splat_v4f64:
1310; AVX2:       # BB#0:
1311; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
1312; AVX2-NEXT:    retq
1313;
1314; AVX512VL-LABEL: splat_v4f64:
1315; AVX512VL:       # BB#0:
1316; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
1317; AVX512VL-NEXT:    retq
1318  %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
1319  ret <4 x double> %1
1320}
1321
1322define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
1323; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
1324; AVX1:       # BB#0:
1325; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
1326; AVX1-NEXT:    retq
1327;
1328; AVX2-LABEL: splat_mem_v4i64_from_v2i64:
1329; AVX2:       # BB#0:
1330; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
1331; AVX2-NEXT:    retq
1332;
1333; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64:
1334; AVX512VL:       # BB#0:
1335; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
1336; AVX512VL-NEXT:    retq
1337  %v = load <2 x i64>, <2 x i64>* %ptr
1338  %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1339  ret <4 x i64> %shuffle
1340}
1341
1342define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
1343; ALL-LABEL: splat_mem_v4f64_from_v2f64:
1344; ALL:       # BB#0:
1345; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
1346; ALL-NEXT:    retq
1347  %v = load <2 x double>, <2 x double>* %ptr
1348  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1349  ret <4 x double> %shuffle
1350}
1351
1352define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
1353; AVX1-LABEL: splat128_mem_v4i64_from_v2i64:
1354; AVX1:       # BB#0:
1355; AVX1-NEXT:    vmovaps (%rdi), %xmm0
1356; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1357; AVX1-NEXT:    retq
1358;
1359; AVX2-LABEL: splat128_mem_v4i64_from_v2i64:
1360; AVX2:       # BB#0:
1361; AVX2-NEXT:    vmovaps (%rdi), %xmm0
1362; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1363; AVX2-NEXT:    retq
1364;
1365; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64:
1366; AVX512VL:       # BB#0:
1367; AVX512VL-NEXT:    vmovdqa64 (%rdi), %xmm0
1368; AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0
1369; AVX512VL-NEXT:    retq
1370  %v = load <2 x i64>, <2 x i64>* %ptr
1371  %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1372  ret <4 x i64> %shuffle
1373}
1374
1375define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
1376; AVX1-LABEL: splat128_mem_v4f64_from_v2f64:
1377; AVX1:       # BB#0:
1378; AVX1-NEXT:    vmovaps (%rdi), %xmm0
1379; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1380; AVX1-NEXT:    retq
1381;
1382; AVX2-LABEL: splat128_mem_v4f64_from_v2f64:
1383; AVX2:       # BB#0:
1384; AVX2-NEXT:    vmovaps (%rdi), %xmm0
1385; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1386; AVX2-NEXT:    retq
1387;
1388; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64:
1389; AVX512VL:       # BB#0:
1390; AVX512VL-NEXT:    vmovapd (%rdi), %xmm0
1391; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
1392; AVX512VL-NEXT:    retq
1393  %v = load <2 x double>, <2 x double>* %ptr
1394  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1395  ret <4 x double> %shuffle
1396}
1397
1398define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) {
1399; AVX1-LABEL: broadcast_v4f64_0000_from_v2i64:
1400; AVX1:       # BB#0:
1401; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1402; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1403; AVX1-NEXT:    retq
1404;
1405; AVX2-LABEL: broadcast_v4f64_0000_from_v2i64:
1406; AVX2:       # BB#0:
1407; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
1408; AVX2-NEXT:    retq
1409;
1410; AVX512VL-LABEL: broadcast_v4f64_0000_from_v2i64:
1411; AVX512VL:       # BB#0:
1412; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
1413; AVX512VL-NEXT:    retq
1414  %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1415  %2 = bitcast <4 x i64> %1 to <4 x double>
1416  %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer
1417  ret <4 x double> %3
1418}
1419
1420define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
1421; AVX1-LABEL: bitcast_v4f64_0426:
1422; AVX1:       # BB#0:
1423; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1424; AVX1-NEXT:    retq
1425;
1426; AVX2-LABEL: bitcast_v4f64_0426:
1427; AVX2:       # BB#0:
1428; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1429; AVX2-NEXT:    retq
1430;
1431; AVX512VL-LABEL: bitcast_v4f64_0426:
1432; AVX512VL:       # BB#0:
1433; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1434; AVX512VL-NEXT:    retq
1435  %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
1436  %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
1437  %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1438  %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
1439  %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
1440  %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
1441  ret <4 x double> %bitcast64
1442}
1443
1444define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
1445; AVX1-LABEL: concat_v4i64_0167:
1446; AVX1:       # BB#0:
1447; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1448; AVX1-NEXT:    retq
1449;
1450; AVX2-LABEL: concat_v4i64_0167:
1451; AVX2:       # BB#0:
1452; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1453; AVX2-NEXT:    retq
1454;
1455; AVX512VL-LABEL: concat_v4i64_0167:
1456; AVX512VL:       # BB#0:
1457; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1458; AVX512VL-NEXT:    retq
1459  %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
1460  %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7>
1461  %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1462  ret <4 x i64> %shuffle64
1463}
1464
1465define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
1466; AVX1-LABEL: concat_v4i64_0145_bc:
1467; AVX1:       # BB#0:
1468; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1469; AVX1-NEXT:    retq
1470;
1471; AVX2-LABEL: concat_v4i64_0145_bc:
1472; AVX2:       # BB#0:
1473; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1474; AVX2-NEXT:    retq
1475;
1476; AVX512VL-LABEL: concat_v4i64_0145_bc:
1477; AVX512VL:       # BB#0:
1478; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
1479; AVX512VL-NEXT:    retq
1480  %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
1481  %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
1482  %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32>
1483  %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32>
1484  %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1485  %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64>
1486  ret <4 x i64> %shuffle64
1487}
1488
1489define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) {
1490; AVX1-LABEL: insert_dup_mem_v4i64:
1491; AVX1:       # BB#0:
1492; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
1493; AVX1-NEXT:    retq
1494;
1495; AVX2-LABEL: insert_dup_mem_v4i64:
1496; AVX2:       # BB#0:
1497; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
1498; AVX2-NEXT:    retq
1499;
1500; AVX512VL-LABEL: insert_dup_mem_v4i64:
1501; AVX512VL:       # BB#0:
1502; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
1503; AVX512VL-NEXT:    retq
1504  %tmp = load i64, i64* %ptr, align 1
1505  %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1506  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer
1507  ret <4 x i64> %tmp2
1508}
1509