• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
4
5define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
6; ALL-LABEL: shuffle_v8f64_00000000:
7; ALL:       # %bb.0:
8; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
9; ALL-NEXT:    ret{{[l|q]}}
10  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11  ret <8 x double> %shuffle
12}
13
14define <8 x double> @shuffle_v8f64_22222222(<8 x double> %a, <8 x double> %b) {
15; ALL-LABEL: shuffle_v8f64_22222222:
16; ALL:       # %bb.0:
17; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm0
18; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
19; ALL-NEXT:    ret{{[l|q]}}
20  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
21  ret <8 x double> %shuffle
22}
23
24define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) {
25; ALL-LABEL: shuffle_v8f64_44444444:
26; ALL:       # %bb.0:
27; ALL-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
28; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
29; ALL-NEXT:    ret{{[l|q]}}
30  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
31  ret <8 x double> %shuffle
32}
33
34define <8 x double> @shuffle_v8f64_44444444_bc(<8 x i64> %a, <8 x i64> %b) {
35; ALL-LABEL: shuffle_v8f64_44444444_bc:
36; ALL:       # %bb.0:
37; ALL-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
38; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
39; ALL-NEXT:    ret{{[l|q]}}
40  %tmp0 = bitcast <8 x i64> %a to <8 x double>
41  %tmp1 = bitcast <8 x i64> %b to <8 x double>
42  %shuffle = shufflevector <8 x double> %tmp0, <8 x double> %tmp1, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
43  ret <8 x double> %shuffle
44}
45
46define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
47; AVX512F-LABEL: shuffle_v8f64_00000010:
48; AVX512F:       # %bb.0:
49; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
50; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
51; AVX512F-NEXT:    retq
52;
53; AVX512F-32-LABEL: shuffle_v8f64_00000010:
54; AVX512F-32:       # %bb.0:
55; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
56; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
57; AVX512F-32-NEXT:    retl
58  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
59  ret <8 x double> %shuffle
60}
61
62define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
63; AVX512F-LABEL: shuffle_v8f64_00000200:
64; AVX512F:       # %bb.0:
65; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
66; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
67; AVX512F-NEXT:    retq
68;
69; AVX512F-32-LABEL: shuffle_v8f64_00000200:
70; AVX512F-32:       # %bb.0:
71; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
72; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
73; AVX512F-32-NEXT:    retl
74  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
75  ret <8 x double> %shuffle
76}
77
78define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
79; AVX512F-LABEL: shuffle_v8f64_00003000:
80; AVX512F:       # %bb.0:
81; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
82; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
83; AVX512F-NEXT:    retq
84;
85; AVX512F-32-LABEL: shuffle_v8f64_00003000:
86; AVX512F-32:       # %bb.0:
87; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
88; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
89; AVX512F-32-NEXT:    retl
90  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
91  ret <8 x double> %shuffle
92}
93
94define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
95; AVX512F-LABEL: shuffle_v8f64_00040000:
96; AVX512F:       # %bb.0:
97; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
98; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
99; AVX512F-NEXT:    retq
100;
101; AVX512F-32-LABEL: shuffle_v8f64_00040000:
102; AVX512F-32:       # %bb.0:
103; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
104; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
105; AVX512F-32-NEXT:    retl
106  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
107  ret <8 x double> %shuffle
108}
109
110define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
111; AVX512F-LABEL: shuffle_v8f64_00500000:
112; AVX512F:       # %bb.0:
113; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
114; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
115; AVX512F-NEXT:    retq
116;
117; AVX512F-32-LABEL: shuffle_v8f64_00500000:
118; AVX512F-32:       # %bb.0:
119; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
120; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
121; AVX512F-32-NEXT:    retl
122  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
123  ret <8 x double> %shuffle
124}
125
126define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
127; AVX512F-LABEL: shuffle_v8f64_06000000:
128; AVX512F:       # %bb.0:
129; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
130; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
131; AVX512F-NEXT:    retq
132;
133; AVX512F-32-LABEL: shuffle_v8f64_06000000:
134; AVX512F-32:       # %bb.0:
135; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
136; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
137; AVX512F-32-NEXT:    retl
138  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
139  ret <8 x double> %shuffle
140}
141
142define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
143; AVX512F-LABEL: shuffle_v8f64_70000000:
144; AVX512F:       # %bb.0:
145; AVX512F-NEXT:    movl $7, %eax
146; AVX512F-NEXT:    vmovq %rax, %xmm1
147; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
148; AVX512F-NEXT:    retq
149;
150; AVX512F-32-LABEL: shuffle_v8f64_70000000:
151; AVX512F-32:       # %bb.0:
152; AVX512F-32-NEXT:    movl $7, %eax
153; AVX512F-32-NEXT:    vmovd %eax, %xmm1
154; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
155; AVX512F-32-NEXT:    retl
156  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
157  ret <8 x double> %shuffle
158}
159
160define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
161; ALL-LABEL: shuffle_v8f64_01014545:
162; ALL:       # %bb.0:
163; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
164; ALL-NEXT:    ret{{[l|q]}}
165  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
166  ret <8 x double> %shuffle
167}
168
169define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
170; AVX512F-LABEL: shuffle_v8f64_00112233:
171; AVX512F:       # %bb.0:
172; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
173; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
174; AVX512F-NEXT:    retq
175;
176; AVX512F-32-LABEL: shuffle_v8f64_00112233:
177; AVX512F-32:       # %bb.0:
178; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
179; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
180; AVX512F-32-NEXT:    retl
181  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
182  ret <8 x double> %shuffle
183}
184
185define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
186; AVX512F-LABEL: shuffle_v8f64_00001111:
187; AVX512F:       # %bb.0:
188; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
189; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
190; AVX512F-NEXT:    retq
191;
192; AVX512F-32-LABEL: shuffle_v8f64_00001111:
193; AVX512F-32:       # %bb.0:
194; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
195; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
196; AVX512F-32-NEXT:    retl
197  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
198  ret <8 x double> %shuffle
199}
200
201define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
202;
203; ALL-LABEL: shuffle_v8f64_81a3c5e7:
204; ALL:       # %bb.0:
205; ALL-NEXT:    vshufpd {{.*#+}} zmm0 = zmm1[0],zmm0[1],zmm1[2],zmm0[3],zmm1[4],zmm0[5],zmm1[6],zmm0[7]
206; ALL-NEXT:    ret{{[l|q]}}
207  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
208  ret <8 x double> %shuffle
209}
210
211define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
212;
213; AVX512F-LABEL: shuffle_v8f64_08080808:
214; AVX512F:       # %bb.0:
215; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
216; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
217; AVX512F-NEXT:    retq
218;
219; AVX512F-32-LABEL: shuffle_v8f64_08080808:
220; AVX512F-32:       # %bb.0:
221; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
222; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
223; AVX512F-32-NEXT:    retl
224  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
225  ret <8 x double> %shuffle
226}
227
228define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
229;
230; AVX512F-LABEL: shuffle_v8f64_08084c4c:
231; AVX512F:       # %bb.0:
232; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
233; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
234; AVX512F-NEXT:    retq
235;
236; AVX512F-32-LABEL: shuffle_v8f64_08084c4c:
237; AVX512F-32:       # %bb.0:
238; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
239; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
240; AVX512F-32-NEXT:    retl
241  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
242  ret <8 x double> %shuffle
243}
244
245define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
246;
247; AVX512F-LABEL: shuffle_v8f64_8823cc67:
248; AVX512F:       # %bb.0:
249; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
250; AVX512F-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
251; AVX512F-NEXT:    vmovapd %zmm2, %zmm0
252; AVX512F-NEXT:    retq
253;
254; AVX512F-32-LABEL: shuffle_v8f64_8823cc67:
255; AVX512F-32:       # %bb.0:
256; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
257; AVX512F-32-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
258; AVX512F-32-NEXT:    vmovapd %zmm2, %zmm0
259; AVX512F-32-NEXT:    retl
260  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
261  ret <8 x double> %shuffle
262}
263
264define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
265;
266; AVX512F-LABEL: shuffle_v8f64_9832dc76:
267; AVX512F:       # %bb.0:
268; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
269; AVX512F-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
270; AVX512F-NEXT:    vmovapd %zmm2, %zmm0
271; AVX512F-NEXT:    retq
272;
273; AVX512F-32-LABEL: shuffle_v8f64_9832dc76:
274; AVX512F-32:       # %bb.0:
275; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
276; AVX512F-32-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
277; AVX512F-32-NEXT:    vmovapd %zmm2, %zmm0
278; AVX512F-32-NEXT:    retl
279  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
280  ret <8 x double> %shuffle
281}
282
283define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
284;
285; AVX512F-LABEL: shuffle_v8f64_9810dc54:
286; AVX512F:       # %bb.0:
287; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
288; AVX512F-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
289; AVX512F-NEXT:    vmovapd %zmm2, %zmm0
290; AVX512F-NEXT:    retq
291;
292; AVX512F-32-LABEL: shuffle_v8f64_9810dc54:
293; AVX512F-32:       # %bb.0:
294; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
295; AVX512F-32-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
296; AVX512F-32-NEXT:    vmovapd %zmm2, %zmm0
297; AVX512F-32-NEXT:    retl
298  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
299  ret <8 x double> %shuffle
300}
301
302define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
303;
304; AVX512F-LABEL: shuffle_v8f64_08194c5d:
305; AVX512F:       # %bb.0:
306; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
307; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
308; AVX512F-NEXT:    retq
309;
310; AVX512F-32-LABEL: shuffle_v8f64_08194c5d:
311; AVX512F-32:       # %bb.0:
312; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
313; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
314; AVX512F-32-NEXT:    retl
315  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
316  ret <8 x double> %shuffle
317}
318
319define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
320;
321; AVX512F-LABEL: shuffle_v8f64_2a3b6e7f:
322; AVX512F:       # %bb.0:
323; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
324; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
325; AVX512F-NEXT:    retq
326;
327; AVX512F-32-LABEL: shuffle_v8f64_2a3b6e7f:
328; AVX512F-32:       # %bb.0:
329; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
330; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
331; AVX512F-32-NEXT:    retl
332  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
333  ret <8 x double> %shuffle
334}
335
336define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
337;
338; AVX512F-LABEL: shuffle_v8f64_08192a3b:
339; AVX512F:       # %bb.0:
340; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
341; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
342; AVX512F-NEXT:    retq
343;
344; AVX512F-32-LABEL: shuffle_v8f64_08192a3b:
345; AVX512F-32:       # %bb.0:
346; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
347; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
348; AVX512F-32-NEXT:    retl
349  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
350  ret <8 x double> %shuffle
351}
352
353define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
354;
355; AVX512F-LABEL: shuffle_v8f64_08991abb:
356; AVX512F:       # %bb.0:
357; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
358; AVX512F-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
359; AVX512F-NEXT:    vmovapd %zmm2, %zmm0
360; AVX512F-NEXT:    retq
361;
362; AVX512F-32-LABEL: shuffle_v8f64_08991abb:
363; AVX512F-32:       # %bb.0:
364; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
365; AVX512F-32-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
366; AVX512F-32-NEXT:    vmovapd %zmm2, %zmm0
367; AVX512F-32-NEXT:    retl
368  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
369  ret <8 x double> %shuffle
370}
371
372define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
373;
374; AVX512F-LABEL: shuffle_v8f64_091b2d3f:
375; AVX512F:       # %bb.0:
376; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
377; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
378; AVX512F-NEXT:    retq
379;
380; AVX512F-32-LABEL: shuffle_v8f64_091b2d3f:
381; AVX512F-32:       # %bb.0:
382; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
383; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
384; AVX512F-32-NEXT:    retl
385  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
386  ret <8 x double> %shuffle
387}
388
389define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
390;
391; AVX512F-LABEL: shuffle_v8f64_09ab1def:
392; AVX512F:       # %bb.0:
393; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
394; AVX512F-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
395; AVX512F-NEXT:    vmovapd %zmm2, %zmm0
396; AVX512F-NEXT:    retq
397;
398; AVX512F-32-LABEL: shuffle_v8f64_09ab1def:
399; AVX512F-32:       # %bb.0:
400; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
401; AVX512F-32-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
402; AVX512F-32-NEXT:    vmovapd %zmm2, %zmm0
403; AVX512F-32-NEXT:    retl
404  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
405  ret <8 x double> %shuffle
406}
407
408define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
409;
410; ALL-LABEL: shuffle_v8f64_00014445:
411; ALL:       # %bb.0:
412; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
413; ALL-NEXT:    ret{{[l|q]}}
414  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
415  ret <8 x double> %shuffle
416}
417
418define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
419;
420; ALL-LABEL: shuffle_v8f64_00204464:
421; ALL:       # %bb.0:
422; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
423; ALL-NEXT:    ret{{[l|q]}}
424  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
425  ret <8 x double> %shuffle
426}
427
428define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
429;
430; ALL-LABEL: shuffle_v8f64_03004744:
431; ALL:       # %bb.0:
432; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
433; ALL-NEXT:    ret{{[l|q]}}
434  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
435  ret <8 x double> %shuffle
436}
437
438define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
439;
440; ALL-LABEL: shuffle_v8f64_10005444:
441; ALL:       # %bb.0:
442; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
443; ALL-NEXT:    ret{{[l|q]}}
444  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
445  ret <8 x double> %shuffle
446}
447
448define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
449;
450; ALL-LABEL: shuffle_v8f64_22006644:
451; ALL:       # %bb.0:
452; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
453; ALL-NEXT:    ret{{[l|q]}}
454  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
455  ret <8 x double> %shuffle
456}
457
458define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
459;
460; ALL-LABEL: shuffle_v8f64_33307774:
461; ALL:       # %bb.0:
462; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
463; ALL-NEXT:    ret{{[l|q]}}
464  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
465  ret <8 x double> %shuffle
466}
467
468define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
469;
470; ALL-LABEL: shuffle_v8f64_32107654:
471; ALL:       # %bb.0:
472; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
473; ALL-NEXT:    ret{{[l|q]}}
474  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
475  ret <8 x double> %shuffle
476}
477
478define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
479;
480; ALL-LABEL: shuffle_v8f64_00234467:
481; ALL:       # %bb.0:
482; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
483; ALL-NEXT:    ret{{[l|q]}}
484  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
485  ret <8 x double> %shuffle
486}
487
488define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
489;
490; ALL-LABEL: shuffle_v8f64_00224466:
491; ALL:       # %bb.0:
492; ALL-NEXT:    vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
493; ALL-NEXT:    ret{{[l|q]}}
494  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
495  ret <8 x double> %shuffle
496}
497
498define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
499;
500; ALL-LABEL: shuffle_v8f64_10325476:
501; ALL:       # %bb.0:
502; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
503; ALL-NEXT:    ret{{[l|q]}}
504  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
505  ret <8 x double> %shuffle
506}
507
508define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
509;
510; ALL-LABEL: shuffle_v8f64_11335577:
511; ALL:       # %bb.0:
512; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7]
513; ALL-NEXT:    ret{{[l|q]}}
514  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
515  ret <8 x double> %shuffle
516}
517
518define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
519;
520; ALL-LABEL: shuffle_v8f64_10235467:
521; ALL:       # %bb.0:
522; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
523; ALL-NEXT:    ret{{[l|q]}}
524  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
525  ret <8 x double> %shuffle
526}
527
528define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
529;
530; ALL-LABEL: shuffle_v8f64_10225466:
531; ALL:       # %bb.0:
532; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
533; ALL-NEXT:    ret{{[l|q]}}
534  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
535  ret <8 x double> %shuffle
536}
537
538define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
539;
540; AVX512F-LABEL: shuffle_v8f64_00015444:
541; AVX512F:       # %bb.0:
542; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
543; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
544; AVX512F-NEXT:    retq
545;
546; AVX512F-32-LABEL: shuffle_v8f64_00015444:
547; AVX512F-32:       # %bb.0:
548; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
549; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
550; AVX512F-32-NEXT:    retl
551  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
552  ret <8 x double> %shuffle
553}
554
555define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
556;
557; AVX512F-LABEL: shuffle_v8f64_00204644:
558; AVX512F:       # %bb.0:
559; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
560; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
561; AVX512F-NEXT:    retq
562;
563; AVX512F-32-LABEL: shuffle_v8f64_00204644:
564; AVX512F-32:       # %bb.0:
565; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
566; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
567; AVX512F-32-NEXT:    retl
568  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
569  ret <8 x double> %shuffle
570}
571
572define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
573;
574; AVX512F-LABEL: shuffle_v8f64_03004474:
575; AVX512F:       # %bb.0:
576; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
577; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
578; AVX512F-NEXT:    retq
579;
580; AVX512F-32-LABEL: shuffle_v8f64_03004474:
581; AVX512F-32:       # %bb.0:
582; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
583; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
584; AVX512F-32-NEXT:    retl
585  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
586  ret <8 x double> %shuffle
587}
588
589define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
590;
591; AVX512F-LABEL: shuffle_v8f64_10004444:
592; AVX512F:       # %bb.0:
593; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
594; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
595; AVX512F-NEXT:    retq
596;
597; AVX512F-32-LABEL: shuffle_v8f64_10004444:
598; AVX512F-32:       # %bb.0:
599; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
600; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
601; AVX512F-32-NEXT:    retl
602  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
603  ret <8 x double> %shuffle
604}
605
606define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
607;
608; AVX512F-LABEL: shuffle_v8f64_22006446:
609; AVX512F:       # %bb.0:
610; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
611; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
612; AVX512F-NEXT:    retq
613;
614; AVX512F-32-LABEL: shuffle_v8f64_22006446:
615; AVX512F-32:       # %bb.0:
616; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
617; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
618; AVX512F-32-NEXT:    retl
619  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
620  ret <8 x double> %shuffle
621}
622
623define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
624;
625; AVX512F-LABEL: shuffle_v8f64_33307474:
626; AVX512F:       # %bb.0:
627; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
628; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
629; AVX512F-NEXT:    retq
630;
631; AVX512F-32-LABEL: shuffle_v8f64_33307474:
632; AVX512F-32:       # %bb.0:
633; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
634; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
635; AVX512F-32-NEXT:    retl
636  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
637  ret <8 x double> %shuffle
638}
639
640define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
641;
642; AVX512F-LABEL: shuffle_v8f64_32104567:
643; AVX512F:       # %bb.0:
644; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
645; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
646; AVX512F-NEXT:    retq
647;
648; AVX512F-32-LABEL: shuffle_v8f64_32104567:
649; AVX512F-32:       # %bb.0:
650; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
651; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
652; AVX512F-32-NEXT:    retl
653  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
654  ret <8 x double> %shuffle
655}
656
657define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
658;
659; AVX512F-LABEL: shuffle_v8f64_00236744:
660; AVX512F:       # %bb.0:
661; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
662; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
663; AVX512F-NEXT:    retq
664;
665; AVX512F-32-LABEL: shuffle_v8f64_00236744:
666; AVX512F-32:       # %bb.0:
667; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
668; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
669; AVX512F-32-NEXT:    retl
670  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
671  ret <8 x double> %shuffle
672}
673
674define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
675;
676; AVX512F-LABEL: shuffle_v8f64_00226644:
677; AVX512F:       # %bb.0:
678; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
679; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
680; AVX512F-NEXT:    retq
681;
682; AVX512F-32-LABEL: shuffle_v8f64_00226644:
683; AVX512F-32:       # %bb.0:
684; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
685; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
686; AVX512F-32-NEXT:    retl
687  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
688  ret <8 x double> %shuffle
689}
690
691define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
692;
693; ALL-LABEL: shuffle_v8f64_10324567:
694; ALL:       # %bb.0:
695; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7]
696; ALL-NEXT:    ret{{[l|q]}}
697  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
698  ret <8 x double> %shuffle
699}
700
701define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
702;
703; ALL-LABEL: shuffle_v8f64_11334567:
704; ALL:       # %bb.0:
705; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,5,6,7]
706; ALL-NEXT:    ret{{[l|q]}}
707  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
708  ret <8 x double> %shuffle
709}
710
711define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
712;
713; ALL-LABEL: shuffle_v8f64_01235467:
714; ALL:       # %bb.0:
715; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,7]
716; ALL-NEXT:    ret{{[l|q]}}
717  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
718  ret <8 x double> %shuffle
719}
720
721define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
722;
723; ALL-LABEL: shuffle_v8f64_01235466:
724; ALL:       # %bb.0:
725; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,6]
726; ALL-NEXT:    ret{{[l|q]}}
727  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
728  ret <8 x double> %shuffle
729}
730
731define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
732;
733; AVX512F-LABEL: shuffle_v8f64_002u6u44:
734; AVX512F:       # %bb.0:
735; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
736; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
737; AVX512F-NEXT:    retq
738;
739; AVX512F-32-LABEL: shuffle_v8f64_002u6u44:
740; AVX512F-32:       # %bb.0:
741; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
742; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
743; AVX512F-32-NEXT:    retl
744  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
745  ret <8 x double> %shuffle
746}
747
748define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
749;
750; AVX512F-LABEL: shuffle_v8f64_00uu66uu:
751; AVX512F:       # %bb.0:
752; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
753; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
754; AVX512F-NEXT:    retq
755;
756; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu:
757; AVX512F-32:       # %bb.0:
758; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
759; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
760; AVX512F-32-NEXT:    retl
761  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
762  ret <8 x double> %shuffle
763}
764
765define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
766;
767; ALL-LABEL: shuffle_v8f64_103245uu:
768; ALL:       # %bb.0:
769; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,6]
770; ALL-NEXT:    ret{{[l|q]}}
771  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
772  ret <8 x double> %shuffle
773}
774
775define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
776;
777; ALL-LABEL: shuffle_v8f64_1133uu67:
778; ALL:       # %bb.0:
779; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,4,6,7]
780; ALL-NEXT:    ret{{[l|q]}}
781  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
782  ret <8 x double> %shuffle
783}
784
785define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
786;
787; ALL-LABEL: shuffle_v8f64_0uu354uu:
788; ALL:       # %bb.0:
789; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,5,4,6,6]
790; ALL-NEXT:    ret{{[l|q]}}
791  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
792  ret <8 x double> %shuffle
793}
794
795define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
796;
797; ALL-LABEL: shuffle_v8f64_uuu3uu66:
798; ALL:       # %bb.0:
799; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,6]
800; ALL-NEXT:    ret{{[l|q]}}
801  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
802  ret <8 x double> %shuffle
803}
804
805define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
806;
807; AVX512F-LABEL: shuffle_v8f64_c348cda0:
808; AVX512F:       # %bb.0:
809; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [4,11,12,0,4,5,2,8]
810; AVX512F-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
811; AVX512F-NEXT:    vmovapd %zmm2, %zmm0
812; AVX512F-NEXT:    retq
813;
814; AVX512F-32-LABEL: shuffle_v8f64_c348cda0:
815; AVX512F-32:       # %bb.0:
816; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [4,0,11,0,12,0,0,0,4,0,5,0,2,0,8,0]
817; AVX512F-32-NEXT:    vpermi2pd %zmm0, %zmm1, %zmm2
818; AVX512F-32-NEXT:    vmovapd %zmm2, %zmm0
819; AVX512F-32-NEXT:    retl
820  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
821  ret <8 x double> %shuffle
822}
823
824define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
825;
826; AVX512F-LABEL: shuffle_v8f64_f511235a:
827; AVX512F:       # %bb.0:
828; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [15,5,1,1,2,3,5,10]
829; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
830; AVX512F-NEXT:    retq
831;
832; AVX512F-32-LABEL: shuffle_v8f64_f511235a:
833; AVX512F-32:       # %bb.0:
834; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [15,0,5,0,1,0,1,0,2,0,3,0,5,0,10,0]
835; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
836; AVX512F-32-NEXT:    retl
837  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
838  ret <8 x double> %shuffle
839}
840
841define <8 x double> @shuffle_v8f64_1z2z5z6z(<8 x double> %a, <8 x double> %b) {
842; AVX512F-LABEL: shuffle_v8f64_1z2z5z6z:
843; AVX512F:       # %bb.0:
844; AVX512F-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
845; AVX512F-NEXT:    vmovapd {{.*#+}} zmm2 = [1,8,2,8,5,8,6,8]
846; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
847; AVX512F-NEXT:    retq
848;
849; AVX512F-32-LABEL: shuffle_v8f64_1z2z5z6z:
850; AVX512F-32:       # %bb.0:
851; AVX512F-32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
852; AVX512F-32-NEXT:    vmovapd {{.*#+}} zmm2 = [1,0,8,0,2,0,8,0,5,0,8,0,6,0,8,0]
853; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
854; AVX512F-32-NEXT:    retl
855  %shuffle = shufflevector <8 x double> %a, <8 x double> <double 0.000000e+00, double undef, double undef, double undef, double undef, double undef, double undef, double undef>, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 5, i32 8, i32 6, i32 8>
856  ret <8 x double> %shuffle
857}
858
859define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
860;
861; ALL-LABEL: shuffle_v8i64_00000000:
862; ALL:       # %bb.0:
863; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
864; ALL-NEXT:    ret{{[l|q]}}
865  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
866  ret <8 x i64> %shuffle
867}
868
869define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) {
870; ALL-LABEL: shuffle_v8i64_44444444:
871; ALL:       # %bb.0:
872; ALL-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
873; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
874; ALL-NEXT:    ret{{[l|q]}}
875  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
876  ret <8 x i64> %shuffle
877}
878
879define <8 x i64> @shuffle_v8i64_66666666(<8 x i64> %a, <8 x i64> %b) {
880; ALL-LABEL: shuffle_v8i64_66666666:
881; ALL:       # %bb.0:
882; ALL-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
883; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
884; ALL-NEXT:    ret{{[l|q]}}
885  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
886  ret <8 x i64> %shuffle
887}
888
889define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
890;
891; AVX512F-LABEL: shuffle_v8i64_00000010:
892; AVX512F:       # %bb.0:
893; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
894; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
895; AVX512F-NEXT:    retq
896;
897; AVX512F-32-LABEL: shuffle_v8i64_00000010:
898; AVX512F-32:       # %bb.0:
899; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
900; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
901; AVX512F-32-NEXT:    retl
902  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
903  ret <8 x i64> %shuffle
904}
905
906define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
907;
908; AVX512F-LABEL: shuffle_v8i64_00000200:
909; AVX512F:       # %bb.0:
910; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
911; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
912; AVX512F-NEXT:    retq
913;
914; AVX512F-32-LABEL: shuffle_v8i64_00000200:
915; AVX512F-32:       # %bb.0:
916; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
917; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
918; AVX512F-32-NEXT:    retl
919  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
920  ret <8 x i64> %shuffle
921}
922
923define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
924;
925; AVX512F-LABEL: shuffle_v8i64_00003000:
926; AVX512F:       # %bb.0:
927; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
928; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
929; AVX512F-NEXT:    retq
930;
931; AVX512F-32-LABEL: shuffle_v8i64_00003000:
932; AVX512F-32:       # %bb.0:
933; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
934; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
935; AVX512F-32-NEXT:    retl
936  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
937  ret <8 x i64> %shuffle
938}
939
940define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
941;
942; AVX512F-LABEL: shuffle_v8i64_00040000:
943; AVX512F:       # %bb.0:
944; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
945; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
946; AVX512F-NEXT:    retq
947;
948; AVX512F-32-LABEL: shuffle_v8i64_00040000:
949; AVX512F-32:       # %bb.0:
950; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
951; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
952; AVX512F-32-NEXT:    retl
953  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
954  ret <8 x i64> %shuffle
955}
956
957define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
958;
959; AVX512F-LABEL: shuffle_v8i64_00500000:
960; AVX512F:       # %bb.0:
961; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
962; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
963; AVX512F-NEXT:    retq
964;
965; AVX512F-32-LABEL: shuffle_v8i64_00500000:
966; AVX512F-32:       # %bb.0:
967; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
968; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
969; AVX512F-32-NEXT:    retl
970  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
971  ret <8 x i64> %shuffle
972}
973
974define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
975;
976; AVX512F-LABEL: shuffle_v8i64_06000000:
977; AVX512F:       # %bb.0:
978; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
979; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
980; AVX512F-NEXT:    retq
981;
982; AVX512F-32-LABEL: shuffle_v8i64_06000000:
983; AVX512F-32:       # %bb.0:
984; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
985; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
986; AVX512F-32-NEXT:    retl
987  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
988  ret <8 x i64> %shuffle
989}
990
991define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
992;
993; AVX512F-LABEL: shuffle_v8i64_70000000:
994; AVX512F:       # %bb.0:
995; AVX512F-NEXT:    movl $7, %eax
996; AVX512F-NEXT:    vmovq %rax, %xmm1
997; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
998; AVX512F-NEXT:    retq
999;
1000; AVX512F-32-LABEL: shuffle_v8i64_70000000:
1001; AVX512F-32:       # %bb.0:
1002; AVX512F-32-NEXT:    movl $7, %eax
1003; AVX512F-32-NEXT:    vmovd %eax, %xmm1
1004; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1005; AVX512F-32-NEXT:    retl
1006  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1007  ret <8 x i64> %shuffle
1008}
1009
1010define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
1011; ALL-LABEL: shuffle_v8i64_01014545:
1012; ALL:       # %bb.0:
1013; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
1014; ALL-NEXT:    ret{{[l|q]}}
1015
1016  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
1017  ret <8 x i64> %shuffle
1018}
1019
1020define <8 x i64> @shuffle_v8i64_01014545_mem(<8 x i64>* %ptr, <8 x i64> %b) {
1021; AVX512F-LABEL: shuffle_v8i64_01014545_mem:
1022; AVX512F:       # %bb.0:
1023; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = mem[0,1,0,1,4,5,4,5]
1024; AVX512F-NEXT:    retq
1025;
1026; AVX512F-32-LABEL: shuffle_v8i64_01014545_mem:
1027; AVX512F-32:       # %bb.0:
1028; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1029; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = mem[0,1,0,1,4,5,4,5]
1030; AVX512F-32-NEXT:    retl
1031
1032  %a = load <8 x i64>, <8 x i64>* %ptr
1033  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
1034  ret <8 x i64> %shuffle
1035}
1036
1037define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
1038;
1039; AVX512F-LABEL: shuffle_v8i64_00112233:
1040; AVX512F:       # %bb.0:
1041; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
1042; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1043; AVX512F-NEXT:    retq
1044;
1045; AVX512F-32-LABEL: shuffle_v8i64_00112233:
1046; AVX512F-32:       # %bb.0:
1047; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
1048; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1049; AVX512F-32-NEXT:    retl
1050  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
1051  ret <8 x i64> %shuffle
1052}
1053
1054define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
1055;
1056; AVX512F-LABEL: shuffle_v8i64_00001111:
1057; AVX512F:       # %bb.0:
1058; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
1059; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1060; AVX512F-NEXT:    retq
1061;
1062; AVX512F-32-LABEL: shuffle_v8i64_00001111:
1063; AVX512F-32:       # %bb.0:
1064; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
1065; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1066; AVX512F-32-NEXT:    retl
1067  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1068  ret <8 x i64> %shuffle
1069}
1070
1071define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
1072;
1073; ALL-LABEL: shuffle_v8i64_81a3c5e7:
1074; ALL:       # %bb.0:
1075; ALL-NEXT:    movb $-86, %al
1076; ALL-NEXT:    kmovw %eax, %k1
1077; ALL-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
1078; ALL-NEXT:    ret{{[l|q]}}
1079  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1080  ret <8 x i64> %shuffle
1081}
1082
1083define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
1084;
1085; AVX512F-LABEL: shuffle_v8i64_08080808:
1086; AVX512F:       # %bb.0:
1087; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
1088; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1089; AVX512F-NEXT:    retq
1090;
1091; AVX512F-32-LABEL: shuffle_v8i64_08080808:
1092; AVX512F-32:       # %bb.0:
1093; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
1094; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1095; AVX512F-32-NEXT:    retl
1096  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1097  ret <8 x i64> %shuffle
1098}
1099
1100define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
1101;
1102; AVX512F-LABEL: shuffle_v8i64_08084c4c:
1103; AVX512F:       # %bb.0:
1104; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
1105; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1106; AVX512F-NEXT:    retq
1107;
1108; AVX512F-32-LABEL: shuffle_v8i64_08084c4c:
1109; AVX512F-32:       # %bb.0:
1110; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
1111; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1112; AVX512F-32-NEXT:    retl
1113  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1114  ret <8 x i64> %shuffle
1115}
1116
1117define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
1118;
1119; AVX512F-LABEL: shuffle_v8i64_8823cc67:
1120; AVX512F:       # %bb.0:
1121; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
1122; AVX512F-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1123; AVX512F-NEXT:    vmovdqa64 %zmm2, %zmm0
1124; AVX512F-NEXT:    retq
1125;
1126; AVX512F-32-LABEL: shuffle_v8i64_8823cc67:
1127; AVX512F-32:       # %bb.0:
1128; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
1129; AVX512F-32-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1130; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
1131; AVX512F-32-NEXT:    retl
1132  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1133  ret <8 x i64> %shuffle
1134}
1135
1136define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
1137;
1138; AVX512F-LABEL: shuffle_v8i64_9832dc76:
1139; AVX512F:       # %bb.0:
1140; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
1141; AVX512F-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1142; AVX512F-NEXT:    vmovdqa64 %zmm2, %zmm0
1143; AVX512F-NEXT:    retq
1144;
1145; AVX512F-32-LABEL: shuffle_v8i64_9832dc76:
1146; AVX512F-32:       # %bb.0:
1147; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
1148; AVX512F-32-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1149; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
1150; AVX512F-32-NEXT:    retl
1151  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1152  ret <8 x i64> %shuffle
1153}
1154
1155define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
1156;
1157; AVX512F-LABEL: shuffle_v8i64_9810dc54:
1158; AVX512F:       # %bb.0:
1159; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
1160; AVX512F-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1161; AVX512F-NEXT:    vmovdqa64 %zmm2, %zmm0
1162; AVX512F-NEXT:    retq
1163;
1164; AVX512F-32-LABEL: shuffle_v8i64_9810dc54:
1165; AVX512F-32:       # %bb.0:
1166; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
1167; AVX512F-32-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1168; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
1169; AVX512F-32-NEXT:    retl
1170  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1171  ret <8 x i64> %shuffle
1172}
1173
1174define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
1175;
1176; AVX512F-LABEL: shuffle_v8i64_08194c5d:
1177; AVX512F:       # %bb.0:
1178; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
1179; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1180; AVX512F-NEXT:    retq
1181;
1182; AVX512F-32-LABEL: shuffle_v8i64_08194c5d:
1183; AVX512F-32:       # %bb.0:
1184; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
1185; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1186; AVX512F-32-NEXT:    retl
1187  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1188  ret <8 x i64> %shuffle
1189}
1190
1191define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
1192;
1193; AVX512F-LABEL: shuffle_v8i64_2a3b6e7f:
1194; AVX512F:       # %bb.0:
1195; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
1196; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1197; AVX512F-NEXT:    retq
1198;
1199; AVX512F-32-LABEL: shuffle_v8i64_2a3b6e7f:
1200; AVX512F-32:       # %bb.0:
1201; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
1202; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1203; AVX512F-32-NEXT:    retl
1204  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1205  ret <8 x i64> %shuffle
1206}
1207
1208define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
1209;
1210; AVX512F-LABEL: shuffle_v8i64_08192a3b:
1211; AVX512F:       # %bb.0:
1212; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
1213; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1214; AVX512F-NEXT:    retq
1215;
1216; AVX512F-32-LABEL: shuffle_v8i64_08192a3b:
1217; AVX512F-32:       # %bb.0:
1218; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
1219; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1220; AVX512F-32-NEXT:    retl
1221  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1222  ret <8 x i64> %shuffle
1223}
1224
1225define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
1226;
1227; AVX512F-LABEL: shuffle_v8i64_08991abb:
1228; AVX512F:       # %bb.0:
1229; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
1230; AVX512F-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1231; AVX512F-NEXT:    vmovdqa64 %zmm2, %zmm0
1232; AVX512F-NEXT:    retq
1233;
1234; AVX512F-32-LABEL: shuffle_v8i64_08991abb:
1235; AVX512F-32:       # %bb.0:
1236; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
1237; AVX512F-32-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1238; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
1239; AVX512F-32-NEXT:    retl
1240  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1241  ret <8 x i64> %shuffle
1242}
1243
1244define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
1245;
1246; AVX512F-LABEL: shuffle_v8i64_091b2d3f:
1247; AVX512F:       # %bb.0:
1248; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
1249; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1250; AVX512F-NEXT:    retq
1251;
1252; AVX512F-32-LABEL: shuffle_v8i64_091b2d3f:
1253; AVX512F-32:       # %bb.0:
1254; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
1255; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1256; AVX512F-32-NEXT:    retl
1257  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1258  ret <8 x i64> %shuffle
1259}
1260
1261define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
1262;
1263; AVX512F-LABEL: shuffle_v8i64_09ab1def:
1264; AVX512F:       # %bb.0:
1265; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
1266; AVX512F-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1267; AVX512F-NEXT:    vmovdqa64 %zmm2, %zmm0
1268; AVX512F-NEXT:    retq
1269;
1270; AVX512F-32-LABEL: shuffle_v8i64_09ab1def:
1271; AVX512F-32:       # %bb.0:
1272; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
1273; AVX512F-32-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1274; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
1275; AVX512F-32-NEXT:    retl
1276  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1277  ret <8 x i64> %shuffle
1278}
1279
1280define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
1281;
1282; ALL-LABEL: shuffle_v8i64_00014445:
1283; ALL:       # %bb.0:
1284; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
1285; ALL-NEXT:    ret{{[l|q]}}
1286  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1287  ret <8 x i64> %shuffle
1288}
1289
1290define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
1291;
1292; ALL-LABEL: shuffle_v8i64_00204464:
1293; ALL:       # %bb.0:
1294; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
1295; ALL-NEXT:    ret{{[l|q]}}
1296  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1297  ret <8 x i64> %shuffle
1298}
1299
1300define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
1301;
1302; ALL-LABEL: shuffle_v8i64_03004744:
1303; ALL:       # %bb.0:
1304; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
1305; ALL-NEXT:    ret{{[l|q]}}
1306  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1307  ret <8 x i64> %shuffle
1308}
1309
1310define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
1311;
1312; ALL-LABEL: shuffle_v8i64_10005444:
1313; ALL:       # %bb.0:
1314; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
1315; ALL-NEXT:    ret{{[l|q]}}
1316  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1317  ret <8 x i64> %shuffle
1318}
1319
1320define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
1321;
1322; ALL-LABEL: shuffle_v8i64_22006644:
1323; ALL:       # %bb.0:
1324; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
1325; ALL-NEXT:    ret{{[l|q]}}
1326  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1327  ret <8 x i64> %shuffle
1328}
1329
1330define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
1331;
1332; ALL-LABEL: shuffle_v8i64_33307774:
1333; ALL:       # %bb.0:
1334; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
1335; ALL-NEXT:    ret{{[l|q]}}
1336  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1337  ret <8 x i64> %shuffle
1338}
1339
1340define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
1341;
1342; ALL-LABEL: shuffle_v8i64_32107654:
1343; ALL:       # %bb.0:
1344; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
1345; ALL-NEXT:    ret{{[l|q]}}
1346  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1347  ret <8 x i64> %shuffle
1348}
1349
1350define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
1351;
1352; ALL-LABEL: shuffle_v8i64_00234467:
1353; ALL:       # %bb.0:
1354; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
1355; ALL-NEXT:    ret{{[l|q]}}
1356  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1357  ret <8 x i64> %shuffle
1358}
1359
1360define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
1361;
1362; ALL-LABEL: shuffle_v8i64_00224466:
1363; ALL:       # %bb.0:
1364; ALL-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
1365; ALL-NEXT:    ret{{[l|q]}}
1366  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1367  ret <8 x i64> %shuffle
1368}
1369
1370define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
1371;
1372; ALL-LABEL: shuffle_v8i64_10325476:
1373; ALL:       # %bb.0:
1374; ALL-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
1375; ALL-NEXT:    ret{{[l|q]}}
1376  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1377  ret <8 x i64> %shuffle
1378}
1379
1380define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
1381;
1382; ALL-LABEL: shuffle_v8i64_11335577:
1383; ALL:       # %bb.0:
1384; ALL-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
1385; ALL-NEXT:    ret{{[l|q]}}
1386  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1387  ret <8 x i64> %shuffle
1388}
1389
1390define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
1391;
1392; ALL-LABEL: shuffle_v8i64_10235467:
1393; ALL:       # %bb.0:
1394; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
1395; ALL-NEXT:    ret{{[l|q]}}
1396  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1397  ret <8 x i64> %shuffle
1398}
1399
1400define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
1401;
1402; ALL-LABEL: shuffle_v8i64_10225466:
1403; ALL:       # %bb.0:
1404; ALL-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
1405; ALL-NEXT:    ret{{[l|q]}}
1406  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1407  ret <8 x i64> %shuffle
1408}
1409
1410define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
1411;
1412; AVX512F-LABEL: shuffle_v8i64_00015444:
1413; AVX512F:       # %bb.0:
1414; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
1415; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1416; AVX512F-NEXT:    retq
1417;
1418; AVX512F-32-LABEL: shuffle_v8i64_00015444:
1419; AVX512F-32:       # %bb.0:
1420; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
1421; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1422; AVX512F-32-NEXT:    retl
1423  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1424  ret <8 x i64> %shuffle
1425}
1426
1427define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
1428;
1429; AVX512F-LABEL: shuffle_v8i64_00204644:
1430; AVX512F:       # %bb.0:
1431; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
1432; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1433; AVX512F-NEXT:    retq
1434;
1435; AVX512F-32-LABEL: shuffle_v8i64_00204644:
1436; AVX512F-32:       # %bb.0:
1437; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
1438; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1439; AVX512F-32-NEXT:    retl
1440  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1441  ret <8 x i64> %shuffle
1442}
1443
1444define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
1445;
1446; AVX512F-LABEL: shuffle_v8i64_03004474:
1447; AVX512F:       # %bb.0:
1448; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
1449; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1450; AVX512F-NEXT:    retq
1451;
1452; AVX512F-32-LABEL: shuffle_v8i64_03004474:
1453; AVX512F-32:       # %bb.0:
1454; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
1455; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1456; AVX512F-32-NEXT:    retl
1457  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1458  ret <8 x i64> %shuffle
1459}
1460
1461define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
1462;
1463; AVX512F-LABEL: shuffle_v8i64_10004444:
1464; AVX512F:       # %bb.0:
1465; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
1466; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1467; AVX512F-NEXT:    retq
1468;
1469; AVX512F-32-LABEL: shuffle_v8i64_10004444:
1470; AVX512F-32:       # %bb.0:
1471; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
1472; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1473; AVX512F-32-NEXT:    retl
1474  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1475  ret <8 x i64> %shuffle
1476}
1477
1478define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
1479;
1480; AVX512F-LABEL: shuffle_v8i64_22006446:
1481; AVX512F:       # %bb.0:
1482; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
1483; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1484; AVX512F-NEXT:    retq
1485;
1486; AVX512F-32-LABEL: shuffle_v8i64_22006446:
1487; AVX512F-32:       # %bb.0:
1488; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
1489; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1490; AVX512F-32-NEXT:    retl
1491  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1492  ret <8 x i64> %shuffle
1493}
1494
1495define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
1496;
1497; AVX512F-LABEL: shuffle_v8i64_33307474:
1498; AVX512F:       # %bb.0:
1499; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
1500; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1501; AVX512F-NEXT:    retq
1502;
1503; AVX512F-32-LABEL: shuffle_v8i64_33307474:
1504; AVX512F-32:       # %bb.0:
1505; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
1506; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1507; AVX512F-32-NEXT:    retl
1508  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1509  ret <8 x i64> %shuffle
1510}
1511
1512define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
1513;
1514; AVX512F-LABEL: shuffle_v8i64_32104567:
1515; AVX512F:       # %bb.0:
1516; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
1517; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1518; AVX512F-NEXT:    retq
1519;
1520; AVX512F-32-LABEL: shuffle_v8i64_32104567:
1521; AVX512F-32:       # %bb.0:
1522; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
1523; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1524; AVX512F-32-NEXT:    retl
1525  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1526  ret <8 x i64> %shuffle
1527}
1528
1529define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
1530;
1531; AVX512F-LABEL: shuffle_v8i64_00236744:
1532; AVX512F:       # %bb.0:
1533; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
1534; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1535; AVX512F-NEXT:    retq
1536;
1537; AVX512F-32-LABEL: shuffle_v8i64_00236744:
1538; AVX512F-32:       # %bb.0:
1539; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
1540; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1541; AVX512F-32-NEXT:    retl
1542  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1543  ret <8 x i64> %shuffle
1544}
1545
1546define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
1547;
1548; AVX512F-LABEL: shuffle_v8i64_00226644:
1549; AVX512F:       # %bb.0:
1550; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
1551; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1552; AVX512F-NEXT:    retq
1553;
1554; AVX512F-32-LABEL: shuffle_v8i64_00226644:
1555; AVX512F-32:       # %bb.0:
1556; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
1557; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1558; AVX512F-32-NEXT:    retl
1559  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1560  ret <8 x i64> %shuffle
1561}
1562
1563define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
1564;
1565; AVX512F-LABEL: shuffle_v8i64_10324567:
1566; AVX512F:       # %bb.0:
1567; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
1568; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1569; AVX512F-NEXT:    retq
1570;
1571; AVX512F-32-LABEL: shuffle_v8i64_10324567:
1572; AVX512F-32:       # %bb.0:
1573; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
1574; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1575; AVX512F-32-NEXT:    retl
1576  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1577  ret <8 x i64> %shuffle
1578}
1579
1580define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
1581;
1582; AVX512F-LABEL: shuffle_v8i64_11334567:
1583; AVX512F:       # %bb.0:
1584; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
1585; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1586; AVX512F-NEXT:    retq
1587;
1588; AVX512F-32-LABEL: shuffle_v8i64_11334567:
1589; AVX512F-32:       # %bb.0:
1590; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
1591; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1592; AVX512F-32-NEXT:    retl
1593  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1594  ret <8 x i64> %shuffle
1595}
1596
1597define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
1598;
1599; AVX512F-LABEL: shuffle_v8i64_01235467:
1600; AVX512F:       # %bb.0:
1601; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
1602; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1603; AVX512F-NEXT:    retq
1604;
1605; AVX512F-32-LABEL: shuffle_v8i64_01235467:
1606; AVX512F-32:       # %bb.0:
1607; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
1608; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1609; AVX512F-32-NEXT:    retl
1610  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1611  ret <8 x i64> %shuffle
1612}
1613
1614define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
1615;
1616; AVX512F-LABEL: shuffle_v8i64_01235466:
1617; AVX512F:       # %bb.0:
1618; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
1619; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1620; AVX512F-NEXT:    retq
1621;
1622; AVX512F-32-LABEL: shuffle_v8i64_01235466:
1623; AVX512F-32:       # %bb.0:
1624; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
1625; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1626; AVX512F-32-NEXT:    retl
1627  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1628  ret <8 x i64> %shuffle
1629}
1630
1631define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
1632;
1633; AVX512F-LABEL: shuffle_v8i64_002u6u44:
1634; AVX512F:       # %bb.0:
1635; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
1636; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1637; AVX512F-NEXT:    retq
1638;
1639; AVX512F-32-LABEL: shuffle_v8i64_002u6u44:
1640; AVX512F-32:       # %bb.0:
1641; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
1642; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1643; AVX512F-32-NEXT:    retl
1644  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1645  ret <8 x i64> %shuffle
1646}
1647
1648define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
1649;
1650; AVX512F-LABEL: shuffle_v8i64_00uu66uu:
1651; AVX512F:       # %bb.0:
1652; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
1653; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1654; AVX512F-NEXT:    retq
1655;
1656; AVX512F-32-LABEL: shuffle_v8i64_00uu66uu:
1657; AVX512F-32:       # %bb.0:
1658; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
1659; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1660; AVX512F-32-NEXT:    retl
1661  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1662  ret <8 x i64> %shuffle
1663}
1664
1665define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
1666;
1667; AVX512F-LABEL: shuffle_v8i64_103245uu:
1668; AVX512F:       # %bb.0:
1669; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
1670; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1671; AVX512F-NEXT:    retq
1672;
1673; AVX512F-32-LABEL: shuffle_v8i64_103245uu:
1674; AVX512F-32:       # %bb.0:
1675; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
1676; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1677; AVX512F-32-NEXT:    retl
1678  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1679  ret <8 x i64> %shuffle
1680}
1681
1682define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
1683;
1684; AVX512F-LABEL: shuffle_v8i64_1133uu67:
1685; AVX512F:       # %bb.0:
1686; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
1687; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1688; AVX512F-NEXT:    retq
1689;
1690; AVX512F-32-LABEL: shuffle_v8i64_1133uu67:
1691; AVX512F-32:       # %bb.0:
1692; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
1693; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1694; AVX512F-32-NEXT:    retl
1695  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1696  ret <8 x i64> %shuffle
1697}
1698
1699define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
1700;
1701; AVX512F-LABEL: shuffle_v8i64_0uu354uu:
1702; AVX512F:       # %bb.0:
1703; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
1704; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1705; AVX512F-NEXT:    retq
1706;
1707; AVX512F-32-LABEL: shuffle_v8i64_0uu354uu:
1708; AVX512F-32:       # %bb.0:
1709; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
1710; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1711; AVX512F-32-NEXT:    retl
1712  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1713  ret <8 x i64> %shuffle
1714}
1715
1716define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
1717;
1718; AVX512F-LABEL: shuffle_v8i64_uuu3uu66:
1719; AVX512F:       # %bb.0:
1720; AVX512F-NEXT:    vmovaps {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
1721; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1722; AVX512F-NEXT:    retq
1723;
1724; AVX512F-32-LABEL: shuffle_v8i64_uuu3uu66:
1725; AVX512F-32:       # %bb.0:
1726; AVX512F-32-NEXT:    vmovaps {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
1727; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
1728; AVX512F-32-NEXT:    retl
1729  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1730  ret <8 x i64> %shuffle
1731}
1732
1733define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
1734;
1735; AVX512F-LABEL: shuffle_v8i64_6caa87e5:
1736; AVX512F:       # %bb.0:
1737; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,4,2,2,0,15,6,13]
1738; AVX512F-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1739; AVX512F-NEXT:    vmovdqa64 %zmm2, %zmm0
1740; AVX512F-NEXT:    retq
1741;
1742; AVX512F-32-LABEL: shuffle_v8i64_6caa87e5:
1743; AVX512F-32:       # %bb.0:
1744; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,0,4,0,2,0,2,0,0,0,15,0,6,0,13,0]
1745; AVX512F-32-NEXT:    vpermi2q %zmm0, %zmm1, %zmm2
1746; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
1747; AVX512F-32-NEXT:    retl
1748  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1749  ret <8 x i64> %shuffle
1750}
1751
1752define <8 x double> @shuffle_v8f64_082a4c6e(<8 x double> %a, <8 x double> %b) {
1753;
1754; ALL-LABEL: shuffle_v8f64_082a4c6e:
1755; ALL:       # %bb.0:
1756; ALL-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1757; ALL-NEXT:    ret{{[l|q]}}
1758  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1759  ret <8 x double> %shuffle
1760}
1761
1762define <8 x double> @shuffle_v8f64_0z2z4z6z(<8 x double> %a, <8 x double> %b) {
1763;
1764; ALL-LABEL: shuffle_v8f64_0z2z4z6z:
1765; ALL:       # %bb.0:
1766; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1767; ALL-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1768; ALL-NEXT:    ret{{[l|q]}}
1769  %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32><i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
1770  ret <8 x double> %shuffle
1771}
1772
1773define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) {
1774;
1775; ALL-LABEL: shuffle_v8i64_082a4c6e:
1776; ALL:       # %bb.0:
1777; ALL-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1778; ALL-NEXT:    ret{{[l|q]}}
1779  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1780  ret <8 x i64> %shuffle
1781}
1782
1783define <8 x i64> @shuffle_v8i64_z8zazcze(<8 x i64> %a, <8 x i64> %b) {
1784;
1785; ALL-LABEL: shuffle_v8i64_z8zazcze:
1786; ALL:       # %bb.0:
1787; ALL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1788; ALL-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1789; ALL-NEXT:    ret{{[l|q]}}
1790  %shuffle = shufflevector <8 x i64> zeroinitializer, <8 x i64> %b, <8 x i32><i32 7, i32 8, i32 5, i32 10, i32 3, i32 12, i32 1, i32 14>
1791  ret <8 x i64> %shuffle
1792}
1793
1794define <8 x double> @shuffle_v8f64_193b5d7f(<8 x double> %a, <8 x double> %b) {
1795;
1796; ALL-LABEL: shuffle_v8f64_193b5d7f:
1797; ALL:       # %bb.0:
1798; ALL-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1799; ALL-NEXT:    ret{{[l|q]}}
1800  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1801  ret <8 x double> %shuffle
1802}
1803
1804define <8 x double> @shuffle_v8f64_z9zbzdzf(<8 x double> %a, <8 x double> %b) {
1805;
1806; ALL-LABEL: shuffle_v8f64_z9zbzdzf:
1807; ALL:       # %bb.0:
1808; ALL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1809; ALL-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1810; ALL-NEXT:    ret{{[l|q]}}
1811  %shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32><i32 0, i32 9, i32 0, i32 11, i32 0, i32 13, i32 0, i32 15>
1812  ret <8 x double> %shuffle
1813}
1814
1815define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
1816;
1817; ALL-LABEL: shuffle_v8i64_193b5d7f:
1818; ALL:       # %bb.0:
1819; ALL-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1820; ALL-NEXT:    ret{{[l|q]}}
1821  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1822  ret <8 x i64> %shuffle
1823}
1824
1825define <8 x i64> @shuffle_v8i64_1z3z5z7z(<8 x i64> %a, <8 x i64> %b) {
1826;
1827; ALL-LABEL: shuffle_v8i64_1z3z5z7z:
1828; ALL:       # %bb.0:
1829; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1830; ALL-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1831; ALL-NEXT:    ret{{[l|q]}}
1832  %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 15, i32 5, i32 8, i32 7, i32 15>
1833  ret <8 x i64> %shuffle
1834}
1835
1836define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
1837; ALL-LABEL: test_vshuff64x2_512:
1838; ALL:       # %bb.0:
1839; ALL-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
1840; ALL-NEXT:    ret{{[l|q]}}
1841  %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
1842  ret <8 x double> %res
1843}
1844
1845define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
1846; ALL-LABEL: test_vshuff64x2_512_maskz:
1847; ALL:       # %bb.0:
1848; ALL-NEXT:    vpmovsxwq %xmm2, %zmm2
1849; ALL-NEXT:    vpsllq $63, %zmm2, %zmm2
1850; ALL-NEXT:    vptestmq %zmm2, %zmm2, %k1
1851; ALL-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
1852; ALL-NEXT:    ret{{[l|q]}}
1853  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
1854  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
1855  ret <8 x double> %res
1856}
1857
1858define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
1859; ALL-LABEL: test_vshufi64x2_512_mask:
1860; ALL:       # %bb.0:
1861; ALL-NEXT:    vpmovsxwq %xmm2, %zmm2
1862; ALL-NEXT:    vpsllq $63, %zmm2, %zmm2
1863; ALL-NEXT:    vptestmq %zmm2, %zmm2, %k1
1864; ALL-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
1865; ALL-NEXT:    ret{{[l|q]}}
1866  %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
1867  %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
1868  ret <8 x i64> %res
1869}
1870
1871define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
1872; AVX512F-LABEL: test_vshuff64x2_512_mem:
1873; AVX512F:       # %bb.0:
1874; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
1875; AVX512F-NEXT:    retq
1876;
1877; AVX512F-32-LABEL: test_vshuff64x2_512_mem:
1878; AVX512F-32:       # %bb.0:
1879; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1880; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
1881; AVX512F-32-NEXT:    retl
1882  %x1   = load <8 x double>,<8 x double> *%ptr,align 1
1883  %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
1884  ret <8 x double> %res
1885}
1886
1887define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
1888; AVX512F-LABEL: test_vshuff64x2_512_mem_mask:
1889; AVX512F:       # %bb.0:
1890; AVX512F-NEXT:    vpmovsxwq %xmm1, %zmm1
1891; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
1892; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
1893; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
1894; AVX512F-NEXT:    retq
1895;
1896; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
1897; AVX512F-32:       # %bb.0:
1898; AVX512F-32-NEXT:    vpmovsxwq %xmm1, %zmm1
1899; AVX512F-32-NEXT:    vpsllq $63, %zmm1, %zmm1
1900; AVX512F-32-NEXT:    vptestmq %zmm1, %zmm1, %k1
1901; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1902; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
1903; AVX512F-32-NEXT:    retl
1904  %x1 = load <8 x double>,<8 x double> *%ptr,align 1
1905  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
1906  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> %x
1907  ret <8 x double> %res
1908}
1909
1910define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
1911; AVX512F-LABEL: test_vshuff64x2_512_mem_maskz:
1912; AVX512F:       # %bb.0:
1913; AVX512F-NEXT:    vpmovsxwq %xmm1, %zmm1
1914; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
1915; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
1916; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
1917; AVX512F-NEXT:    retq
1918;
1919; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
1920; AVX512F-32:       # %bb.0:
1921; AVX512F-32-NEXT:    vpmovsxwq %xmm1, %zmm1
1922; AVX512F-32-NEXT:    vpsllq $63, %zmm1, %zmm1
1923; AVX512F-32-NEXT:    vptestmq %zmm1, %zmm1, %k1
1924; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1925; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
1926; AVX512F-32-NEXT:    retl
1927  %x1 = load <8 x double>,<8 x double> *%ptr,align 1
1928  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
1929  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
1930  ret <8 x double> %res
1931}
1932
1933define <8 x double> @shuffle_v8f64_23014567(<8 x double> %a0, <8 x double> %a1) {
1934; ALL-LABEL: shuffle_v8f64_23014567:
1935; ALL:       # %bb.0:
1936; ALL-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,4,5,6,7]
1937; ALL-NEXT:    ret{{[l|q]}}
1938  %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
1939  ret <8 x double> %1
1940}
1941
1942define <8 x double> @shuffle_v8f64_2301uu67(<8 x double> %a0, <8 x double> %a1) {
1943; ALL-LABEL: shuffle_v8f64_2301uu67:
1944; ALL:       # %bb.0:
1945; ALL-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,0,1,6,7]
1946; ALL-NEXT:    ret{{[l|q]}}
1947  %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 6, i32 7>
1948  ret <8 x double> %1
1949}
1950
1951define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) {
1952; ALL-LABEL: shuffle_v8f64_2301uuuu:
1953; ALL:       # %bb.0:
1954; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm1[2,3,0,1]
1955; ALL-NEXT:    ret{{[l|q]}}
1956  %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
1957  ret <8 x double> %1
1958}
1959
1960define <8 x double> @shuffle_v8f64_uuu2301(<8 x double> %a0, <8 x double> %a1) {
1961; ALL-LABEL: shuffle_v8f64_uuu2301:
1962; ALL:       # %bb.0:
1963; ALL-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1],zmm1[2,3,0,1]
1964; ALL-NEXT:    ret{{[l|q]}}
1965  %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1>
1966  ret <8 x double> %1
1967}
1968
1969define <8 x i64> @shuffle_v8i64_0zzzzzzz(<8 x i64> %a) {
1970; ALL-LABEL: shuffle_v8i64_0zzzzzzz:
1971; ALL:       # %bb.0:
1972; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1973; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1974; ALL-NEXT:    ret{{[l|q]}}
1975  %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1976  ret <8 x i64> %shuffle
1977}
1978
1979define <8 x double> @shuffle_v8f64_0zzzzzzz(<8 x double> %a) {
1980; ALL-LABEL: shuffle_v8f64_0zzzzzzz:
1981; ALL:       # %bb.0:
1982; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1983; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1984; ALL-NEXT:    ret{{[l|q]}}
1985  %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1986  ret <8 x double> %shuffle
1987}
1988
1989define <8 x i64> @shuffle_v8i64_12345678(<8 x i64> %a, <8 x i64> %b) {
1990;
1991; ALL-LABEL: shuffle_v8i64_12345678:
1992; ALL:       # %bb.0:
1993; ALL-NEXT:    valignq {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7],zmm1[0]
1994; ALL-NEXT:    ret{{[l|q]}}
1995  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
1996  ret <8 x i64> %shuffle
1997}
1998
1999define <8 x i64> @shuffle_v8i64_12345670(<8 x i64> %a) {
2000;
2001; ALL-LABEL: shuffle_v8i64_12345670:
2002; ALL:       # %bb.0:
2003; ALL-NEXT:    valignq {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,0]
2004; ALL-NEXT:    ret{{[l|q]}}
2005  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
2006  ret <8 x i64> %shuffle
2007}
2008
2009define <8 x i64> @mask_shuffle_v8i64_12345678(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passthru, i8 %mask) {
2010;
2011; AVX512F-LABEL: mask_shuffle_v8i64_12345678:
2012; AVX512F:       # %bb.0:
2013; AVX512F-NEXT:    kmovw %edi, %k1
2014; AVX512F-NEXT:    valignq {{.*#+}} zmm2 {%k1} = zmm0[1,2,3,4,5,6,7],zmm1[0]
2015; AVX512F-NEXT:    vmovdqa64 %zmm2, %zmm0
2016; AVX512F-NEXT:    retq
2017;
2018; AVX512F-32-LABEL: mask_shuffle_v8i64_12345678:
2019; AVX512F-32:       # %bb.0:
2020; AVX512F-32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2021; AVX512F-32-NEXT:    kmovw %eax, %k1
2022; AVX512F-32-NEXT:    valignq {{.*#+}} zmm2 {%k1} = zmm0[1,2,3,4,5,6,7],zmm1[0]
2023; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
2024; AVX512F-32-NEXT:    retl
2025  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2026  %mask.cast = bitcast i8 %mask to <8 x i1>
2027  %res = select <8 x i1> %mask.cast, <8 x i64> %shuffle, <8 x i64> %passthru
2028  ret <8 x i64> %res
2029}
2030
2031define <8 x i64> @mask_shuffle_v8i64_12345670(<8 x i64> %a, <8 x i64> %passthru, i8 %mask) {
2032;
2033; AVX512F-LABEL: mask_shuffle_v8i64_12345670:
2034; AVX512F:       # %bb.0:
2035; AVX512F-NEXT:    kmovw %edi, %k1
2036; AVX512F-NEXT:    valignq {{.*#+}} zmm1 {%k1} = zmm0[1,2,3,4,5,6,7,0]
2037; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm0
2038; AVX512F-NEXT:    retq
2039;
2040; AVX512F-32-LABEL: mask_shuffle_v8i64_12345670:
2041; AVX512F-32:       # %bb.0:
2042; AVX512F-32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2043; AVX512F-32-NEXT:    kmovw %eax, %k1
2044; AVX512F-32-NEXT:    valignq {{.*#+}} zmm1 {%k1} = zmm0[1,2,3,4,5,6,7,0]
2045; AVX512F-32-NEXT:    vmovdqa64 %zmm1, %zmm0
2046; AVX512F-32-NEXT:    retl
2047  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
2048  %mask.cast = bitcast i8 %mask to <8 x i1>
2049  %res = select <8 x i1> %mask.cast, <8 x i64> %shuffle, <8 x i64> %passthru
2050  ret <8 x i64> %res
2051}
2052
2053define <8 x i64> @maskz_shuffle_v8i64_12345678(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2054;
2055; AVX512F-LABEL: maskz_shuffle_v8i64_12345678:
2056; AVX512F:       # %bb.0:
2057; AVX512F-NEXT:    kmovw %edi, %k1
2058; AVX512F-NEXT:    valignq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,4,5,6,7],zmm1[0]
2059; AVX512F-NEXT:    retq
2060;
2061; AVX512F-32-LABEL: maskz_shuffle_v8i64_12345678:
2062; AVX512F-32:       # %bb.0:
2063; AVX512F-32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2064; AVX512F-32-NEXT:    kmovw %eax, %k1
2065; AVX512F-32-NEXT:    valignq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,4,5,6,7],zmm1[0]
2066; AVX512F-32-NEXT:    retl
2067  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2068  %mask.cast = bitcast i8 %mask to <8 x i1>
2069  %res = select <8 x i1> %mask.cast, <8 x i64> %shuffle, <8 x i64> zeroinitializer
2070  ret <8 x i64> %res
2071}
2072
2073define <8 x i64> @maskz_shuffle_v8i64_12345670(<8 x i64> %a, i8 %mask) {
2074;
2075; AVX512F-LABEL: maskz_shuffle_v8i64_12345670:
2076; AVX512F:       # %bb.0:
2077; AVX512F-NEXT:    kmovw %edi, %k1
2078; AVX512F-NEXT:    valignq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,4,5,6,7,0]
2079; AVX512F-NEXT:    retq
2080;
2081; AVX512F-32-LABEL: maskz_shuffle_v8i64_12345670:
2082; AVX512F-32:       # %bb.0:
2083; AVX512F-32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2084; AVX512F-32-NEXT:    kmovw %eax, %k1
2085; AVX512F-32-NEXT:    valignq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,4,5,6,7,0]
2086; AVX512F-32-NEXT:    retl
2087  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
2088  %mask.cast = bitcast i8 %mask to <8 x i1>
2089  %res = select <8 x i1> %mask.cast, <8 x i64> %shuffle, <8 x i64> zeroinitializer
2090  ret <8 x i64> %res
2091}
2092
2093define <8 x double> @shuffle_v8f64_012389AB(<8 x double> %a, <8 x double> %b) {
2094; ALL-LABEL: shuffle_v8f64_012389AB:
2095; ALL:       # %bb.0:
2096; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
2097; ALL-NEXT:    ret{{[l|q]}}
2098  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2099  ret <8 x double> %shuffle
2100}
2101
2102define <8 x double> @shuffle_v8f64_89AB0123(<8 x double> %a, <8 x double> %b) {
2103; ALL-LABEL: shuffle_v8f64_89AB0123:
2104; ALL:       # %bb.0:
2105; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
2106; ALL-NEXT:    ret{{[l|q]}}
2107  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
2108  ret <8 x double> %shuffle
2109}
2110
2111define <8 x double> @shuffle_v8f64_01230123(<8 x double> %a, <8 x double> %b) {
2112; ALL-LABEL: shuffle_v8f64_01230123:
2113; ALL:       # %bb.0:
2114; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
2115; ALL-NEXT:    ret{{[l|q]}}
2116  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
2117  ret <8 x double> %shuffle
2118}
2119
2120define <8 x i64> @shuffle_v8i64_012389AB(<8 x i64> %a, <8 x i64> %b) {
2121; ALL-LABEL: shuffle_v8i64_012389AB:
2122; ALL:       # %bb.0:
2123; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
2124; ALL-NEXT:    ret{{[l|q]}}
2125  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2126  ret <8 x i64> %shuffle
2127}
2128
2129define <8 x i64> @shuffle_v8i64_89AB0123(<8 x i64> %a, <8 x i64> %b) {
2130; ALL-LABEL: shuffle_v8i64_89AB0123:
2131; ALL:       # %bb.0:
2132; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
2133; ALL-NEXT:    ret{{[l|q]}}
2134  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
2135  ret <8 x i64> %shuffle
2136}
2137
2138define <8 x i64> @shuffle_v8i64_01230123(<8 x i64> %a, <8 x i64> %b) {
2139; ALL-LABEL: shuffle_v8i64_01230123:
2140; ALL:       # %bb.0:
2141; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
2142; ALL-NEXT:    ret{{[l|q]}}
2143  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
2144  ret <8 x i64> %shuffle
2145}
2146
2147define <8 x double> @shuffle_v8f64_89234567(<8 x double> %a, <8 x double> %b) {
2148; ALL-LABEL: shuffle_v8f64_89234567:
2149; ALL:       # %bb.0:
2150; ALL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
2151; ALL-NEXT:    ret{{[l|q]}}
2152  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2153  ret <8 x double> %shuffle
2154}
2155
2156define <8 x double> @shuffle_v8f64_01894567(<8 x double> %a, <8 x double> %b) {
2157; ALL-LABEL: shuffle_v8f64_01894567:
2158; ALL:       # %bb.0:
2159; ALL-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
2160; ALL-NEXT:    ret{{[l|q]}}
2161  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
2162  ret <8 x double> %shuffle
2163}
2164
2165define <8 x double> @shuffle_v8f64_01238967(<8 x double> %a, <8 x double> %b) {
2166; ALL-LABEL: shuffle_v8f64_01238967:
2167; ALL:       # %bb.0:
2168; ALL-NEXT:    vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
2169; ALL-NEXT:    ret{{[l|q]}}
2170  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
2171  ret <8 x double> %shuffle
2172}
2173
2174define <8 x double> @shuffle_v8f64_01234589(<8 x double> %a, <8 x double> %b) {
2175; ALL-LABEL: shuffle_v8f64_01234589:
2176; ALL:       # %bb.0:
2177; ALL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
2178; ALL-NEXT:    ret{{[l|q]}}
2179  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
2180  ret <8 x double> %shuffle
2181}
2182
2183define <8 x i64> @shuffle_v8i64_89234567(<8 x i64> %a, <8 x i64> %b) {
2184; ALL-LABEL: shuffle_v8i64_89234567:
2185; ALL:       # %bb.0:
2186; ALL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
2187; ALL-NEXT:    ret{{[l|q]}}
2188  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2189  ret <8 x i64> %shuffle
2190}
2191
2192define <8 x i64> @shuffle_v8i64_01894567(<8 x i64> %a, <8 x i64> %b) {
2193; ALL-LABEL: shuffle_v8i64_01894567:
2194; ALL:       # %bb.0:
2195; ALL-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
2196; ALL-NEXT:    ret{{[l|q]}}
2197  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
2198  ret <8 x i64> %shuffle
2199}
2200
2201define <8 x i64> @shuffle_v8i64_01238967(<8 x i64> %a, <8 x i64> %b) {
2202; ALL-LABEL: shuffle_v8i64_01238967:
2203; ALL:       # %bb.0:
2204; ALL-NEXT:    vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
2205; ALL-NEXT:    ret{{[l|q]}}
2206  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
2207  ret <8 x i64> %shuffle
2208}
2209
2210define <8 x i64> @shuffle_v8i64_01234589(<8 x i64> %a, <8 x i64> %b) {
2211; ALL-LABEL: shuffle_v8i64_01234589:
2212; ALL:       # %bb.0:
2213; ALL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
2214; ALL-NEXT:    ret{{[l|q]}}
2215  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
2216  ret <8 x i64> %shuffle
2217}
2218
2219define <8 x double> @shuffle_v4f64_v8f64_22222222(<4 x double> %a) {
2220; ALL-LABEL: shuffle_v4f64_v8f64_22222222:
2221; ALL:       # %bb.0:
2222; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm0
2223; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
2224; ALL-NEXT:    ret{{[l|q]}}
2225  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
2226  ret <8 x double> %shuffle
2227}
2228
2229define <8 x i64> @shuffle_v2i64_v8i64_01010101(<2 x i64> %a) {
2230; ALL-LABEL: shuffle_v2i64_v8i64_01010101:
2231; ALL:       # %bb.0:
2232; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2233; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2234; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
2235; ALL-NEXT:    ret{{[l|q]}}
2236  %shuffle = shufflevector <2 x i64> %a, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
2237  ret <8 x i64> %shuffle
2238}
2239
2240define <8 x double> @shuffle_v2f64_v8f64_01010101(<2 x double> %a) {
2241; ALL-LABEL: shuffle_v2f64_v8f64_01010101:
2242; ALL:       # %bb.0:
2243; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2244; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2245; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
2246; ALL-NEXT:    ret{{[l|q]}}
2247  %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
2248  ret <8 x double> %shuffle
2249}
2250
2251;FIXME: compressp
2252define <4 x double> @test_v8f64_2346 (<8 x double> %v) {
2253; ALL-LABEL: test_v8f64_2346:
2254; ALL:       # %bb.0:
2255; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
2256; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
2257; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2]
2258; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2259; ALL-NEXT:    ret{{[l|q]}}
2260  %res = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
2261  ret <4 x double> %res
2262}
2263
2264;FIXME: compressp
2265define <2 x double> @test_v8f64_34 (<8 x double> %v) {
2266; ALL-LABEL: test_v8f64_34:
2267; ALL:       # %bb.0:
2268; ALL-NEXT:    vextractf32x4 $2, %zmm0, %xmm1
2269; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm0
2270; ALL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2271; ALL-NEXT:    vzeroupper
2272; ALL-NEXT:    ret{{[l|q]}}
2273  %res = shufflevector <8 x double> %v, <8 x double> undef, <2 x i32> <i32 3, i32 4>
2274  ret <2 x double> %res
2275}
2276
2277; FIXME: vpcompress
2278define <4 x i64> @test_v8i64_1257 (<8 x i64> %v) {
2279; ALL-LABEL: test_v8i64_1257:
2280; ALL:       # %bb.0:
2281; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
2282; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,3]
2283; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,2,3]
2284; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2285; ALL-NEXT:    ret{{[l|q]}}
2286  %res = shufflevector <8 x i64> %v, <8 x i64> undef, <4 x i32> <i32 1, i32 2, i32 5, i32 7>
2287  ret <4 x i64> %res
2288}
2289
2290define <2 x i64> @test_v8i64_2_5 (<8 x i64> %v) {
2291; ALL-LABEL: test_v8i64_2_5:
2292; ALL:       # %bb.0:
2293; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
2294; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
2295; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,2,3]
2296; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2297; ALL-NEXT:    vzeroupper
2298; ALL-NEXT:    ret{{[l|q]}}
2299  %res = shufflevector <8 x i64> %v, <8 x i64> undef, <2 x i32> <i32 2, i32 5>
2300  ret <2 x i64> %res
2301}
2302
2303define <8 x i64> @test_v8i64_insert_zero_128(<8 x i64> %a) {
2304; ALL-LABEL: test_v8i64_insert_zero_128:
2305; ALL:       # %bb.0:
2306; ALL-NEXT:    movb $3, %al
2307; ALL-NEXT:    kmovw %eax, %k1
2308; ALL-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
2309; ALL-NEXT:    ret{{[l|q]}}
2310  %res = shufflevector <8 x i64> %a, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9>
2311  ret <8 x i64> %res
2312}
2313
2314define <8 x i64> @test_v8i64_insert_zero_256(<8 x i64> %a) {
2315; ALL-LABEL: test_v8i64_insert_zero_256:
2316; ALL:       # %bb.0:
2317; ALL-NEXT:    vmovaps %ymm0, %ymm0
2318; ALL-NEXT:    ret{{[l|q]}}
2319  %res = shufflevector <8 x i64> %a, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
2320  ret <8 x i64> %res
2321}
2322