• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
4
5define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
6; AVX512F-LABEL: shuffle_v8f64_00000000:
7; AVX512F:       # BB#0:
8; AVX512F-NEXT:    vbroadcastsd %xmm0, %zmm0
9; AVX512F-NEXT:    retq
10;
11; AVX512F-32-LABEL: shuffle_v8f64_00000000:
12; AVX512F-32:       # BB#0:
13; AVX512F-32-NEXT:    vbroadcastsd %xmm0, %zmm0
14; AVX512F-32-NEXT:    retl
15  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16  ret <8 x double> %shuffle
17}
18
19define <8 x double> @shuffle_v8f64_22222222(<8 x double> %a, <8 x double> %b) {
20; AVX512F-LABEL: shuffle_v8f64_22222222:
21; AVX512F:       # BB#0:
22; AVX512F-NEXT:    vextractf32x4 $1, %zmm0, %xmm0
23; AVX512F-NEXT:    vbroadcastsd %xmm0, %zmm0
24; AVX512F-NEXT:    retq
25;
26; AVX512F-32-LABEL: shuffle_v8f64_22222222:
27; AVX512F-32:       # BB#0:
28; AVX512F-32-NEXT:    vextractf32x4 $1, %zmm0, %xmm0
29; AVX512F-32-NEXT:    vbroadcastsd %xmm0, %zmm0
30; AVX512F-32-NEXT:    retl
31  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
32  ret <8 x double> %shuffle
33}
34
35define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) {
36; AVX512F-LABEL: shuffle_v8f64_44444444:
37; AVX512F:       # BB#0:
38; AVX512F-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
39; AVX512F-NEXT:    vbroadcastsd %xmm0, %zmm0
40; AVX512F-NEXT:    retq
41;
42; AVX512F-32-LABEL: shuffle_v8f64_44444444:
43; AVX512F-32:       # BB#0:
44; AVX512F-32-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
45; AVX512F-32-NEXT:    vbroadcastsd %xmm0, %zmm0
46; AVX512F-32-NEXT:    retl
47  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
48  ret <8 x double> %shuffle
49}
50
51define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
52; AVX512F-LABEL: shuffle_v8f64_00000010:
53; AVX512F:       # BB#0:
54; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
55; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
56; AVX512F-NEXT:    retq
57;
58; AVX512F-32-LABEL: shuffle_v8f64_00000010:
59; AVX512F-32:       # BB#0:
60; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
61; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
62; AVX512F-32-NEXT:    retl
63  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
64  ret <8 x double> %shuffle
65}
66
67define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
68; AVX512F-LABEL: shuffle_v8f64_00000200:
69; AVX512F:       # BB#0:
70; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
71; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
72; AVX512F-NEXT:    retq
73;
74; AVX512F-32-LABEL: shuffle_v8f64_00000200:
75; AVX512F-32:       # BB#0:
76; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
77; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
78; AVX512F-32-NEXT:    retl
79  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
80  ret <8 x double> %shuffle
81}
82
83define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
84; AVX512F-LABEL: shuffle_v8f64_00003000:
85; AVX512F:       # BB#0:
86; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
87; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
88; AVX512F-NEXT:    retq
89;
90; AVX512F-32-LABEL: shuffle_v8f64_00003000:
91; AVX512F-32:       # BB#0:
92; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
93; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
94; AVX512F-32-NEXT:    retl
95  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
96  ret <8 x double> %shuffle
97}
98
99define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
100; AVX512F-LABEL: shuffle_v8f64_00040000:
101; AVX512F:       # BB#0:
102; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
103; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
104; AVX512F-NEXT:    retq
105;
106; AVX512F-32-LABEL: shuffle_v8f64_00040000:
107; AVX512F-32:       # BB#0:
108; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
109; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
110; AVX512F-32-NEXT:    retl
111  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
112  ret <8 x double> %shuffle
113}
114
115define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
116; AVX512F-LABEL: shuffle_v8f64_00500000:
117; AVX512F:       # BB#0:
118; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
119; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
120; AVX512F-NEXT:    retq
121;
122; AVX512F-32-LABEL: shuffle_v8f64_00500000:
123; AVX512F-32:       # BB#0:
124; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
125; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
126; AVX512F-32-NEXT:    retl
127  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
128  ret <8 x double> %shuffle
129}
130
131define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
132; AVX512F-LABEL: shuffle_v8f64_06000000:
133; AVX512F:       # BB#0:
134; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
135; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
136; AVX512F-NEXT:    retq
137;
138; AVX512F-32-LABEL: shuffle_v8f64_06000000:
139; AVX512F-32:       # BB#0:
140; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
141; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
142; AVX512F-32-NEXT:    retl
143  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
144  ret <8 x double> %shuffle
145}
146
147define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
148; AVX512F-LABEL: shuffle_v8f64_70000000:
149; AVX512F:       # BB#0:
150; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
151; AVX512F-NEXT:    movl $7, %eax
152; AVX512F-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm2
153; AVX512F-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm1
154; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
155; AVX512F-NEXT:    retq
156;
157; AVX512F-32-LABEL: shuffle_v8f64_70000000:
158; AVX512F-32:       # BB#0:
159; AVX512F-32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
160; AVX512F-32-NEXT:    movl $7, %eax
161; AVX512F-32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
162; AVX512F-32-NEXT:    vpxord %zmm2, %zmm2, %zmm2
163; AVX512F-32-NEXT:    vinserti32x4 $0, %xmm1, %zmm2, %zmm1
164; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
165; AVX512F-32-NEXT:    retl
166  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
167  ret <8 x double> %shuffle
168}
169
170define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
171; AVX512F-LABEL: shuffle_v8f64_01014545:
172; AVX512F:       # BB#0:
173; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
174; AVX512F-NEXT:    retq
175;
176; AVX512F-32-LABEL: shuffle_v8f64_01014545:
177; AVX512F-32:       # BB#0:
178; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
179; AVX512F-32-NEXT:    retl
180  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
181  ret <8 x double> %shuffle
182}
183
184define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
185; AVX512F-LABEL: shuffle_v8f64_00112233:
186; AVX512F:       # BB#0:
187; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
188; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
189; AVX512F-NEXT:    retq
190;
191; AVX512F-32-LABEL: shuffle_v8f64_00112233:
192; AVX512F-32:       # BB#0:
193; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
194; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
195; AVX512F-32-NEXT:    retl
196  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
197  ret <8 x double> %shuffle
198}
199
200define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
201; AVX512F-LABEL: shuffle_v8f64_00001111:
202; AVX512F:       # BB#0:
203; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
204; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
205; AVX512F-NEXT:    retq
206;
207; AVX512F-32-LABEL: shuffle_v8f64_00001111:
208; AVX512F-32:       # BB#0:
209; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
210; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
211; AVX512F-32-NEXT:    retl
212  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
213  ret <8 x double> %shuffle
214}
215
216define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
217;
218; AVX512F-LABEL: shuffle_v8f64_81a3c5e7:
219; AVX512F:       # BB#0:
220; AVX512F-NEXT:    vshufpd {{.*#+}} zmm0 = zmm1[0],zmm0[1],zmm1[2],zmm0[3],zmm1[4],zmm0[5],zmm1[6],zmm0[7]
221; AVX512F-NEXT:    retq
222;
223; AVX512F-32-LABEL: shuffle_v8f64_81a3c5e7:
224; AVX512F-32:       # BB#0:
225; AVX512F-32-NEXT:    vshufpd {{.*#+}} zmm0 = zmm1[0],zmm0[1],zmm1[2],zmm0[3],zmm1[4],zmm0[5],zmm1[6],zmm0[7]
226; AVX512F-32-NEXT:    retl
227  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
228  ret <8 x double> %shuffle
229}
230
231define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
232;
233; AVX512F-LABEL: shuffle_v8f64_08080808:
234; AVX512F:       # BB#0:
235; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
236; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
237; AVX512F-NEXT:    retq
238;
239; AVX512F-32-LABEL: shuffle_v8f64_08080808:
240; AVX512F-32:       # BB#0:
241; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
242; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
243; AVX512F-32-NEXT:    retl
244  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
245  ret <8 x double> %shuffle
246}
247
248define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
249;
250; AVX512F-LABEL: shuffle_v8f64_08084c4c:
251; AVX512F:       # BB#0:
252; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
253; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
254; AVX512F-NEXT:    retq
255;
256; AVX512F-32-LABEL: shuffle_v8f64_08084c4c:
257; AVX512F-32:       # BB#0:
258; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
259; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
260; AVX512F-32-NEXT:    retl
261  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
262  ret <8 x double> %shuffle
263}
264
265define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
266;
267; AVX512F-LABEL: shuffle_v8f64_8823cc67:
268; AVX512F:       # BB#0:
269; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
270; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
271; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
272; AVX512F-NEXT:    retq
273;
274; AVX512F-32-LABEL: shuffle_v8f64_8823cc67:
275; AVX512F-32:       # BB#0:
276; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
277; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
278; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
279; AVX512F-32-NEXT:    retl
280  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
281  ret <8 x double> %shuffle
282}
283
284define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
285;
286; AVX512F-LABEL: shuffle_v8f64_9832dc76:
287; AVX512F:       # BB#0:
288; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
289; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
290; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
291; AVX512F-NEXT:    retq
292;
293; AVX512F-32-LABEL: shuffle_v8f64_9832dc76:
294; AVX512F-32:       # BB#0:
295; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
296; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
297; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
298; AVX512F-32-NEXT:    retl
299  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
300  ret <8 x double> %shuffle
301}
302
303define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
304;
305; AVX512F-LABEL: shuffle_v8f64_9810dc54:
306; AVX512F:       # BB#0:
307; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
308; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
309; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
310; AVX512F-NEXT:    retq
311;
312; AVX512F-32-LABEL: shuffle_v8f64_9810dc54:
313; AVX512F-32:       # BB#0:
314; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
315; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
316; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
317; AVX512F-32-NEXT:    retl
318  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
319  ret <8 x double> %shuffle
320}
321
322define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
323;
324; AVX512F-LABEL: shuffle_v8f64_08194c5d:
325; AVX512F:       # BB#0:
326; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
327; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
328; AVX512F-NEXT:    retq
329;
330; AVX512F-32-LABEL: shuffle_v8f64_08194c5d:
331; AVX512F-32:       # BB#0:
332; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
333; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
334; AVX512F-32-NEXT:    retl
335  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
336  ret <8 x double> %shuffle
337}
338
339define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
340;
341; AVX512F-LABEL: shuffle_v8f64_2a3b6e7f:
342; AVX512F:       # BB#0:
343; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
344; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
345; AVX512F-NEXT:    retq
346;
347; AVX512F-32-LABEL: shuffle_v8f64_2a3b6e7f:
348; AVX512F-32:       # BB#0:
349; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
350; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
351; AVX512F-32-NEXT:    retl
352  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
353  ret <8 x double> %shuffle
354}
355
356define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
357;
358; AVX512F-LABEL: shuffle_v8f64_08192a3b:
359; AVX512F:       # BB#0:
360; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
361; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
362; AVX512F-NEXT:    retq
363;
364; AVX512F-32-LABEL: shuffle_v8f64_08192a3b:
365; AVX512F-32:       # BB#0:
366; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
367; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
368; AVX512F-32-NEXT:    retl
369  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
370  ret <8 x double> %shuffle
371}
372
373define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
374;
375; AVX512F-LABEL: shuffle_v8f64_08991abb:
376; AVX512F:       # BB#0:
377; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
378; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
379; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
380; AVX512F-NEXT:    retq
381;
382; AVX512F-32-LABEL: shuffle_v8f64_08991abb:
383; AVX512F-32:       # BB#0:
384; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
385; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
386; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
387; AVX512F-32-NEXT:    retl
388  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
389  ret <8 x double> %shuffle
390}
391
392define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
393;
394; AVX512F-LABEL: shuffle_v8f64_091b2d3f:
395; AVX512F:       # BB#0:
396; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
397; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
398; AVX512F-NEXT:    retq
399;
400; AVX512F-32-LABEL: shuffle_v8f64_091b2d3f:
401; AVX512F-32:       # BB#0:
402; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
403; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
404; AVX512F-32-NEXT:    retl
405  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
406  ret <8 x double> %shuffle
407}
408
409define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
410;
411; AVX512F-LABEL: shuffle_v8f64_09ab1def:
412; AVX512F:       # BB#0:
413; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
414; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
415; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
416; AVX512F-NEXT:    retq
417;
418; AVX512F-32-LABEL: shuffle_v8f64_09ab1def:
419; AVX512F-32:       # BB#0:
420; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
421; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
422; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
423; AVX512F-32-NEXT:    retl
424  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
425  ret <8 x double> %shuffle
426}
427
428define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
429;
430; AVX512F-LABEL: shuffle_v8f64_00014445:
431; AVX512F:       # BB#0:
432; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
433; AVX512F-NEXT:    retq
434;
435; AVX512F-32-LABEL: shuffle_v8f64_00014445:
436; AVX512F-32:       # BB#0:
437; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
438; AVX512F-32-NEXT:    retl
439  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
440  ret <8 x double> %shuffle
441}
442
443define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
444;
445; AVX512F-LABEL: shuffle_v8f64_00204464:
446; AVX512F:       # BB#0:
447; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
448; AVX512F-NEXT:    retq
449;
450; AVX512F-32-LABEL: shuffle_v8f64_00204464:
451; AVX512F-32:       # BB#0:
452; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
453; AVX512F-32-NEXT:    retl
454  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
455  ret <8 x double> %shuffle
456}
457
458define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
459;
460; AVX512F-LABEL: shuffle_v8f64_03004744:
461; AVX512F:       # BB#0:
462; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
463; AVX512F-NEXT:    retq
464;
465; AVX512F-32-LABEL: shuffle_v8f64_03004744:
466; AVX512F-32:       # BB#0:
467; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
468; AVX512F-32-NEXT:    retl
469  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
470  ret <8 x double> %shuffle
471}
472
473define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
474;
475; AVX512F-LABEL: shuffle_v8f64_10005444:
476; AVX512F:       # BB#0:
477; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
478; AVX512F-NEXT:    retq
479;
480; AVX512F-32-LABEL: shuffle_v8f64_10005444:
481; AVX512F-32:       # BB#0:
482; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
483; AVX512F-32-NEXT:    retl
484  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
485  ret <8 x double> %shuffle
486}
487
488define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
489;
490; AVX512F-LABEL: shuffle_v8f64_22006644:
491; AVX512F:       # BB#0:
492; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
493; AVX512F-NEXT:    retq
494;
495; AVX512F-32-LABEL: shuffle_v8f64_22006644:
496; AVX512F-32:       # BB#0:
497; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
498; AVX512F-32-NEXT:    retl
499  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
500  ret <8 x double> %shuffle
501}
502
503define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
504;
505; AVX512F-LABEL: shuffle_v8f64_33307774:
506; AVX512F:       # BB#0:
507; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
508; AVX512F-NEXT:    retq
509;
510; AVX512F-32-LABEL: shuffle_v8f64_33307774:
511; AVX512F-32:       # BB#0:
512; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
513; AVX512F-32-NEXT:    retl
514  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
515  ret <8 x double> %shuffle
516}
517
518define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
519;
520; AVX512F-LABEL: shuffle_v8f64_32107654:
521; AVX512F:       # BB#0:
522; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
523; AVX512F-NEXT:    retq
524;
525; AVX512F-32-LABEL: shuffle_v8f64_32107654:
526; AVX512F-32:       # BB#0:
527; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
528; AVX512F-32-NEXT:    retl
529  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
530  ret <8 x double> %shuffle
531}
532
533define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
534;
535; AVX512F-LABEL: shuffle_v8f64_00234467:
536; AVX512F:       # BB#0:
537; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
538; AVX512F-NEXT:    retq
539;
540; AVX512F-32-LABEL: shuffle_v8f64_00234467:
541; AVX512F-32:       # BB#0:
542; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
543; AVX512F-32-NEXT:    retl
544  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
545  ret <8 x double> %shuffle
546}
547
548define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
549;
550; AVX512F-LABEL: shuffle_v8f64_00224466:
551; AVX512F:       # BB#0:
552; AVX512F-NEXT:    vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
553; AVX512F-NEXT:    retq
554;
555; AVX512F-32-LABEL: shuffle_v8f64_00224466:
556; AVX512F-32:       # BB#0:
557; AVX512F-32-NEXT:    vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
558; AVX512F-32-NEXT:    retl
559  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
560  ret <8 x double> %shuffle
561}
562
563define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
564;
565; AVX512F-LABEL: shuffle_v8f64_10325476:
566; AVX512F:       # BB#0:
567; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
568; AVX512F-NEXT:    retq
569;
570; AVX512F-32-LABEL: shuffle_v8f64_10325476:
571; AVX512F-32:       # BB#0:
572; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
573; AVX512F-32-NEXT:    retl
574  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
575  ret <8 x double> %shuffle
576}
577
578define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
579;
580; AVX512F-LABEL: shuffle_v8f64_11335577:
581; AVX512F:       # BB#0:
582; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7]
583; AVX512F-NEXT:    retq
584;
585; AVX512F-32-LABEL: shuffle_v8f64_11335577:
586; AVX512F-32:       # BB#0:
587; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7]
588; AVX512F-32-NEXT:    retl
589  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
590  ret <8 x double> %shuffle
591}
592
593define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
594;
595; AVX512F-LABEL: shuffle_v8f64_10235467:
596; AVX512F:       # BB#0:
597; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
598; AVX512F-NEXT:    retq
599;
600; AVX512F-32-LABEL: shuffle_v8f64_10235467:
601; AVX512F-32:       # BB#0:
602; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
603; AVX512F-32-NEXT:    retl
604  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
605  ret <8 x double> %shuffle
606}
607
608define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
609;
610; AVX512F-LABEL: shuffle_v8f64_10225466:
611; AVX512F:       # BB#0:
612; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
613; AVX512F-NEXT:    retq
614;
615; AVX512F-32-LABEL: shuffle_v8f64_10225466:
616; AVX512F-32:       # BB#0:
617; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
618; AVX512F-32-NEXT:    retl
619  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
620  ret <8 x double> %shuffle
621}
622
623define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
624;
625; AVX512F-LABEL: shuffle_v8f64_00015444:
626; AVX512F:       # BB#0:
627; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
628; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
629; AVX512F-NEXT:    retq
630;
631; AVX512F-32-LABEL: shuffle_v8f64_00015444:
632; AVX512F-32:       # BB#0:
633; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
634; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
635; AVX512F-32-NEXT:    retl
636  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
637  ret <8 x double> %shuffle
638}
639
640define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
641;
642; AVX512F-LABEL: shuffle_v8f64_00204644:
643; AVX512F:       # BB#0:
644; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
645; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
646; AVX512F-NEXT:    retq
647;
648; AVX512F-32-LABEL: shuffle_v8f64_00204644:
649; AVX512F-32:       # BB#0:
650; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
651; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
652; AVX512F-32-NEXT:    retl
653  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
654  ret <8 x double> %shuffle
655}
656
657define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
658;
659; AVX512F-LABEL: shuffle_v8f64_03004474:
660; AVX512F:       # BB#0:
661; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
662; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
663; AVX512F-NEXT:    retq
664;
665; AVX512F-32-LABEL: shuffle_v8f64_03004474:
666; AVX512F-32:       # BB#0:
667; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
668; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
669; AVX512F-32-NEXT:    retl
670  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
671  ret <8 x double> %shuffle
672}
673
674define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
675;
676; AVX512F-LABEL: shuffle_v8f64_10004444:
677; AVX512F:       # BB#0:
678; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
679; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
680; AVX512F-NEXT:    retq
681;
682; AVX512F-32-LABEL: shuffle_v8f64_10004444:
683; AVX512F-32:       # BB#0:
684; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
685; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
686; AVX512F-32-NEXT:    retl
687  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
688  ret <8 x double> %shuffle
689}
690
691define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
692;
693; AVX512F-LABEL: shuffle_v8f64_22006446:
694; AVX512F:       # BB#0:
695; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
696; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
697; AVX512F-NEXT:    retq
698;
699; AVX512F-32-LABEL: shuffle_v8f64_22006446:
700; AVX512F-32:       # BB#0:
701; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
702; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
703; AVX512F-32-NEXT:    retl
704  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
705  ret <8 x double> %shuffle
706}
707
708define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
709;
710; AVX512F-LABEL: shuffle_v8f64_33307474:
711; AVX512F:       # BB#0:
712; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
713; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
714; AVX512F-NEXT:    retq
715;
716; AVX512F-32-LABEL: shuffle_v8f64_33307474:
717; AVX512F-32:       # BB#0:
718; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
719; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
720; AVX512F-32-NEXT:    retl
721  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
722  ret <8 x double> %shuffle
723}
724
725define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
726;
727; AVX512F-LABEL: shuffle_v8f64_32104567:
728; AVX512F:       # BB#0:
729; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
730; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
731; AVX512F-NEXT:    retq
732;
733; AVX512F-32-LABEL: shuffle_v8f64_32104567:
734; AVX512F-32:       # BB#0:
735; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
736; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
737; AVX512F-32-NEXT:    retl
738  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
739  ret <8 x double> %shuffle
740}
741
742define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
743;
744; AVX512F-LABEL: shuffle_v8f64_00236744:
745; AVX512F:       # BB#0:
746; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
747; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
748; AVX512F-NEXT:    retq
749;
750; AVX512F-32-LABEL: shuffle_v8f64_00236744:
751; AVX512F-32:       # BB#0:
752; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
753; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
754; AVX512F-32-NEXT:    retl
755  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
756  ret <8 x double> %shuffle
757}
758
759define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
760;
761; AVX512F-LABEL: shuffle_v8f64_00226644:
762; AVX512F:       # BB#0:
763; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
764; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
765; AVX512F-NEXT:    retq
766;
767; AVX512F-32-LABEL: shuffle_v8f64_00226644:
768; AVX512F-32:       # BB#0:
769; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
770; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
771; AVX512F-32-NEXT:    retl
772  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
773  ret <8 x double> %shuffle
774}
775
776define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
777;
778; AVX512F-LABEL: shuffle_v8f64_10324567:
779; AVX512F:       # BB#0:
780; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7]
781; AVX512F-NEXT:    retq
782;
783; AVX512F-32-LABEL: shuffle_v8f64_10324567:
784; AVX512F-32:       # BB#0:
785; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7]
786; AVX512F-32-NEXT:    retl
787  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
788  ret <8 x double> %shuffle
789}
790
791define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
792;
793; AVX512F-LABEL: shuffle_v8f64_11334567:
794; AVX512F:       # BB#0:
795; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,5,6,7]
796; AVX512F-NEXT:    retq
797;
798; AVX512F-32-LABEL: shuffle_v8f64_11334567:
799; AVX512F-32:       # BB#0:
800; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,5,6,7]
801; AVX512F-32-NEXT:    retl
802  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
803  ret <8 x double> %shuffle
804}
805
806define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
807;
808; AVX512F-LABEL: shuffle_v8f64_01235467:
809; AVX512F:       # BB#0:
810; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,7]
811; AVX512F-NEXT:    retq
812;
813; AVX512F-32-LABEL: shuffle_v8f64_01235467:
814; AVX512F-32:       # BB#0:
815; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,7]
816; AVX512F-32-NEXT:    retl
817  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
818  ret <8 x double> %shuffle
819}
820
821define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
822;
823; AVX512F-LABEL: shuffle_v8f64_01235466:
824; AVX512F:       # BB#0:
825; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,6]
826; AVX512F-NEXT:    retq
827;
828; AVX512F-32-LABEL: shuffle_v8f64_01235466:
829; AVX512F-32:       # BB#0:
830; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,6]
831; AVX512F-32-NEXT:    retl
832  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
833  ret <8 x double> %shuffle
834}
835
836define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
837;
838; AVX512F-LABEL: shuffle_v8f64_002u6u44:
839; AVX512F:       # BB#0:
840; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
841; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
842; AVX512F-NEXT:    retq
843;
844; AVX512F-32-LABEL: shuffle_v8f64_002u6u44:
845; AVX512F-32:       # BB#0:
846; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
847; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
848; AVX512F-32-NEXT:    retl
849  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
850  ret <8 x double> %shuffle
851}
852
853define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
854;
855; AVX512F-LABEL: shuffle_v8f64_00uu66uu:
856; AVX512F:       # BB#0:
857; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
858; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
859; AVX512F-NEXT:    retq
860;
861; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu:
862; AVX512F-32:       # BB#0:
863; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
864; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
865; AVX512F-32-NEXT:    retl
866  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
867  ret <8 x double> %shuffle
868}
869
870define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
871;
872; AVX512F-LABEL: shuffle_v8f64_103245uu:
873; AVX512F:       # BB#0:
874; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,6]
875; AVX512F-NEXT:    retq
876;
877; AVX512F-32-LABEL: shuffle_v8f64_103245uu:
878; AVX512F-32:       # BB#0:
879; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,6]
880; AVX512F-32-NEXT:    retl
881  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
882  ret <8 x double> %shuffle
883}
884
885define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
886;
887; AVX512F-LABEL: shuffle_v8f64_1133uu67:
888; AVX512F:       # BB#0:
889; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,4,6,7]
890; AVX512F-NEXT:    retq
891;
892; AVX512F-32-LABEL: shuffle_v8f64_1133uu67:
893; AVX512F-32:       # BB#0:
894; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,4,6,7]
895; AVX512F-32-NEXT:    retl
896  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
897  ret <8 x double> %shuffle
898}
899
900define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
901;
902; AVX512F-LABEL: shuffle_v8f64_0uu354uu:
903; AVX512F:       # BB#0:
904; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,5,4,6,6]
905; AVX512F-NEXT:    retq
906;
907; AVX512F-32-LABEL: shuffle_v8f64_0uu354uu:
908; AVX512F-32:       # BB#0:
909; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,5,4,6,6]
910; AVX512F-32-NEXT:    retl
911  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
912  ret <8 x double> %shuffle
913}
914
915define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
916;
917; AVX512F-LABEL: shuffle_v8f64_uuu3uu66:
918; AVX512F:       # BB#0:
919; AVX512F-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,6]
920; AVX512F-NEXT:    retq
921;
922; AVX512F-32-LABEL: shuffle_v8f64_uuu3uu66:
923; AVX512F-32:       # BB#0:
924; AVX512F-32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,6]
925; AVX512F-32-NEXT:    retl
926  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
927  ret <8 x double> %shuffle
928}
929
930define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
931;
932; AVX512F-LABEL: shuffle_v8f64_c348cda0:
933; AVX512F:       # BB#0:
934; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,11,12,0,4,5,2,8]
935; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
936; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
937; AVX512F-NEXT:    retq
938;
939; AVX512F-32-LABEL: shuffle_v8f64_c348cda0:
940; AVX512F-32:       # BB#0:
941; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,0,11,0,12,0,0,0,4,0,5,0,2,0,8,0]
942; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
943; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
944; AVX512F-32-NEXT:    retl
945  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
946  ret <8 x double> %shuffle
947}
948
949define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
950;
951; AVX512F-LABEL: shuffle_v8f64_f511235a:
952; AVX512F:       # BB#0:
953; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,5,1,1,2,3,5,10]
954; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
955; AVX512F-NEXT:    retq
956;
957; AVX512F-32-LABEL: shuffle_v8f64_f511235a:
958; AVX512F-32:       # BB#0:
959; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,0,5,0,1,0,1,0,2,0,3,0,5,0,10,0]
960; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
961; AVX512F-32-NEXT:    retl
962  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
963  ret <8 x double> %shuffle
964}
965
966define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
967;
968; AVX512F-LABEL: shuffle_v8i64_00000000:
969; AVX512F:       # BB#0:
970; AVX512F-NEXT:    vpbroadcastq %xmm0, %zmm0
971; AVX512F-NEXT:    retq
972;
973; AVX512F-32-LABEL: shuffle_v8i64_00000000:
974; AVX512F-32:       # BB#0:
975; AVX512F-32-NEXT:    vpbroadcastq %xmm0, %zmm0
976; AVX512F-32-NEXT:    retl
977  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
978  ret <8 x i64> %shuffle
979}
980
981define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) {
982; AVX512F-LABEL: shuffle_v8i64_44444444:
983; AVX512F:       # BB#0:
984; AVX512F-NEXT:    vextracti32x4 $2, %zmm0, %xmm0
985; AVX512F-NEXT:    vpbroadcastq %xmm0, %zmm0
986; AVX512F-NEXT:    retq
987;
988; AVX512F-32-LABEL: shuffle_v8i64_44444444:
989; AVX512F-32:       # BB#0:
990; AVX512F-32-NEXT:    vextracti32x4 $2, %zmm0, %xmm0
991; AVX512F-32-NEXT:    vpbroadcastq %xmm0, %zmm0
992; AVX512F-32-NEXT:    retl
993  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
994  ret <8 x i64> %shuffle
995}
996
997define <8 x i64> @shuffle_v8i64_66666666(<8 x i64> %a, <8 x i64> %b) {
998; AVX512F-LABEL: shuffle_v8i64_66666666:
999; AVX512F:       # BB#0:
1000; AVX512F-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
1001; AVX512F-NEXT:    vpbroadcastq %xmm0, %zmm0
1002; AVX512F-NEXT:    retq
1003;
1004; AVX512F-32-LABEL: shuffle_v8i64_66666666:
1005; AVX512F-32:       # BB#0:
1006; AVX512F-32-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
1007; AVX512F-32-NEXT:    vpbroadcastq %xmm0, %zmm0
1008; AVX512F-32-NEXT:    retl
1009  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
1010  ret <8 x i64> %shuffle
1011}
1012
1013define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
1014;
1015; AVX512F-LABEL: shuffle_v8i64_00000010:
1016; AVX512F:       # BB#0:
1017; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
1018; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1019; AVX512F-NEXT:    retq
1020;
1021; AVX512F-32-LABEL: shuffle_v8i64_00000010:
1022; AVX512F-32:       # BB#0:
1023; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
1024; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1025; AVX512F-32-NEXT:    retl
1026  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
1027  ret <8 x i64> %shuffle
1028}
1029
1030define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
1031;
1032; AVX512F-LABEL: shuffle_v8i64_00000200:
1033; AVX512F:       # BB#0:
1034; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
1035; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1036; AVX512F-NEXT:    retq
1037;
1038; AVX512F-32-LABEL: shuffle_v8i64_00000200:
1039; AVX512F-32:       # BB#0:
1040; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
1041; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1042; AVX512F-32-NEXT:    retl
1043  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
1044  ret <8 x i64> %shuffle
1045}
1046
1047define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
1048;
1049; AVX512F-LABEL: shuffle_v8i64_00003000:
1050; AVX512F:       # BB#0:
1051; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
1052; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1053; AVX512F-NEXT:    retq
1054;
1055; AVX512F-32-LABEL: shuffle_v8i64_00003000:
1056; AVX512F-32:       # BB#0:
1057; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
1058; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1059; AVX512F-32-NEXT:    retl
1060  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
1061  ret <8 x i64> %shuffle
1062}
1063
1064define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
1065;
1066; AVX512F-LABEL: shuffle_v8i64_00040000:
1067; AVX512F:       # BB#0:
1068; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
1069; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1070; AVX512F-NEXT:    retq
1071;
1072; AVX512F-32-LABEL: shuffle_v8i64_00040000:
1073; AVX512F-32:       # BB#0:
1074; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
1075; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1076; AVX512F-32-NEXT:    retl
1077  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
1078  ret <8 x i64> %shuffle
1079}
1080
1081define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
1082;
1083; AVX512F-LABEL: shuffle_v8i64_00500000:
1084; AVX512F:       # BB#0:
1085; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
1086; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1087; AVX512F-NEXT:    retq
1088;
1089; AVX512F-32-LABEL: shuffle_v8i64_00500000:
1090; AVX512F-32:       # BB#0:
1091; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
1092; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1093; AVX512F-32-NEXT:    retl
1094  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
1095  ret <8 x i64> %shuffle
1096}
1097
1098define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
1099;
1100; AVX512F-LABEL: shuffle_v8i64_06000000:
1101; AVX512F:       # BB#0:
1102; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
1103; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1104; AVX512F-NEXT:    retq
1105;
1106; AVX512F-32-LABEL: shuffle_v8i64_06000000:
1107; AVX512F-32:       # BB#0:
1108; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
1109; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1110; AVX512F-32-NEXT:    retl
1111  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1112  ret <8 x i64> %shuffle
1113}
1114
1115define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
1116;
1117; AVX512F-LABEL: shuffle_v8i64_70000000:
1118; AVX512F:       # BB#0:
1119; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
1120; AVX512F-NEXT:    movl $7, %eax
1121; AVX512F-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm2
1122; AVX512F-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm1
1123; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1124; AVX512F-NEXT:    retq
1125;
1126; AVX512F-32-LABEL: shuffle_v8i64_70000000:
1127; AVX512F-32:       # BB#0:
1128; AVX512F-32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1129; AVX512F-32-NEXT:    movl $7, %eax
1130; AVX512F-32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
1131; AVX512F-32-NEXT:    vpxord %zmm2, %zmm2, %zmm2
1132; AVX512F-32-NEXT:    vinserti32x4 $0, %xmm1, %zmm2, %zmm1
1133; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1134; AVX512F-32-NEXT:    retl
1135  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1136  ret <8 x i64> %shuffle
1137}
1138
1139define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
1140; AVX512F-LABEL: shuffle_v8i64_01014545:
1141; AVX512F:       # BB#0:
1142; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
1143; AVX512F-NEXT:    retq
1144;
1145; AVX512F-32-LABEL: shuffle_v8i64_01014545:
1146; AVX512F-32:       # BB#0:
1147; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
1148; AVX512F-32-NEXT:    retl
1149
1150  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
1151  ret <8 x i64> %shuffle
1152}
1153
1154define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
1155;
1156; AVX512F-LABEL: shuffle_v8i64_00112233:
1157; AVX512F:       # BB#0:
1158; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
1159; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1160; AVX512F-NEXT:    retq
1161;
1162; AVX512F-32-LABEL: shuffle_v8i64_00112233:
1163; AVX512F-32:       # BB#0:
1164; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
1165; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1166; AVX512F-32-NEXT:    retl
1167  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
1168  ret <8 x i64> %shuffle
1169}
1170
1171define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
1172;
1173; AVX512F-LABEL: shuffle_v8i64_00001111:
1174; AVX512F:       # BB#0:
1175; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
1176; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1177; AVX512F-NEXT:    retq
1178;
1179; AVX512F-32-LABEL: shuffle_v8i64_00001111:
1180; AVX512F-32:       # BB#0:
1181; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
1182; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1183; AVX512F-32-NEXT:    retl
1184  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1185  ret <8 x i64> %shuffle
1186}
1187
1188define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
1189;
1190; AVX512F-LABEL: shuffle_v8i64_81a3c5e7:
1191; AVX512F:       # BB#0:
1192; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
1193; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1194; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1195; AVX512F-NEXT:    retq
1196;
1197; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7:
1198; AVX512F-32:       # BB#0:
1199; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
1200; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1201; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1202; AVX512F-32-NEXT:    retl
1203  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1204  ret <8 x i64> %shuffle
1205}
1206
1207define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
1208;
1209; AVX512F-LABEL: shuffle_v8i64_08080808:
1210; AVX512F:       # BB#0:
1211; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
1212; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1213; AVX512F-NEXT:    retq
1214;
1215; AVX512F-32-LABEL: shuffle_v8i64_08080808:
1216; AVX512F-32:       # BB#0:
1217; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
1218; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1219; AVX512F-32-NEXT:    retl
1220  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1221  ret <8 x i64> %shuffle
1222}
1223
1224define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
1225;
1226; AVX512F-LABEL: shuffle_v8i64_08084c4c:
1227; AVX512F:       # BB#0:
1228; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
1229; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1230; AVX512F-NEXT:    retq
1231;
1232; AVX512F-32-LABEL: shuffle_v8i64_08084c4c:
1233; AVX512F-32:       # BB#0:
1234; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
1235; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1236; AVX512F-32-NEXT:    retl
1237  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1238  ret <8 x i64> %shuffle
1239}
1240
1241define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
1242;
1243; AVX512F-LABEL: shuffle_v8i64_8823cc67:
1244; AVX512F:       # BB#0:
1245; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
1246; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1247; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1248; AVX512F-NEXT:    retq
1249;
1250; AVX512F-32-LABEL: shuffle_v8i64_8823cc67:
1251; AVX512F-32:       # BB#0:
1252; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
1253; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1254; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1255; AVX512F-32-NEXT:    retl
1256  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1257  ret <8 x i64> %shuffle
1258}
1259
1260define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
1261;
1262; AVX512F-LABEL: shuffle_v8i64_9832dc76:
1263; AVX512F:       # BB#0:
1264; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
1265; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1266; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1267; AVX512F-NEXT:    retq
1268;
1269; AVX512F-32-LABEL: shuffle_v8i64_9832dc76:
1270; AVX512F-32:       # BB#0:
1271; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
1272; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1273; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1274; AVX512F-32-NEXT:    retl
1275  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1276  ret <8 x i64> %shuffle
1277}
1278
1279define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
1280;
1281; AVX512F-LABEL: shuffle_v8i64_9810dc54:
1282; AVX512F:       # BB#0:
1283; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
1284; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1285; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1286; AVX512F-NEXT:    retq
1287;
1288; AVX512F-32-LABEL: shuffle_v8i64_9810dc54:
1289; AVX512F-32:       # BB#0:
1290; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
1291; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1292; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1293; AVX512F-32-NEXT:    retl
1294  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1295  ret <8 x i64> %shuffle
1296}
1297
1298define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
1299;
1300; AVX512F-LABEL: shuffle_v8i64_08194c5d:
1301; AVX512F:       # BB#0:
1302; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
1303; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1304; AVX512F-NEXT:    retq
1305;
1306; AVX512F-32-LABEL: shuffle_v8i64_08194c5d:
1307; AVX512F-32:       # BB#0:
1308; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
1309; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1310; AVX512F-32-NEXT:    retl
1311  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1312  ret <8 x i64> %shuffle
1313}
1314
1315define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
1316;
1317; AVX512F-LABEL: shuffle_v8i64_2a3b6e7f:
1318; AVX512F:       # BB#0:
1319; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
1320; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1321; AVX512F-NEXT:    retq
1322;
1323; AVX512F-32-LABEL: shuffle_v8i64_2a3b6e7f:
1324; AVX512F-32:       # BB#0:
1325; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
1326; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1327; AVX512F-32-NEXT:    retl
1328  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1329  ret <8 x i64> %shuffle
1330}
1331
1332define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
1333;
1334; AVX512F-LABEL: shuffle_v8i64_08192a3b:
1335; AVX512F:       # BB#0:
1336; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
1337; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1338; AVX512F-NEXT:    retq
1339;
1340; AVX512F-32-LABEL: shuffle_v8i64_08192a3b:
1341; AVX512F-32:       # BB#0:
1342; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
1343; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1344; AVX512F-32-NEXT:    retl
1345  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1346  ret <8 x i64> %shuffle
1347}
1348
1349define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
1350;
1351; AVX512F-LABEL: shuffle_v8i64_08991abb:
1352; AVX512F:       # BB#0:
1353; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
1354; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1355; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1356; AVX512F-NEXT:    retq
1357;
1358; AVX512F-32-LABEL: shuffle_v8i64_08991abb:
1359; AVX512F-32:       # BB#0:
1360; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
1361; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1362; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1363; AVX512F-32-NEXT:    retl
1364  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1365  ret <8 x i64> %shuffle
1366}
1367
1368define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
1369;
1370; AVX512F-LABEL: shuffle_v8i64_091b2d3f:
1371; AVX512F:       # BB#0:
1372; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
1373; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1374; AVX512F-NEXT:    retq
1375;
1376; AVX512F-32-LABEL: shuffle_v8i64_091b2d3f:
1377; AVX512F-32:       # BB#0:
1378; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
1379; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
1380; AVX512F-32-NEXT:    retl
1381  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1382  ret <8 x i64> %shuffle
1383}
1384
1385define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
1386;
1387; AVX512F-LABEL: shuffle_v8i64_09ab1def:
1388; AVX512F:       # BB#0:
1389; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
1390; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1391; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1392; AVX512F-NEXT:    retq
1393;
1394; AVX512F-32-LABEL: shuffle_v8i64_09ab1def:
1395; AVX512F-32:       # BB#0:
1396; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
1397; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1398; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1399; AVX512F-32-NEXT:    retl
1400  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1401  ret <8 x i64> %shuffle
1402}
1403
1404define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
1405;
1406; AVX512F-LABEL: shuffle_v8i64_00014445:
1407; AVX512F:       # BB#0:
1408; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
1409; AVX512F-NEXT:    retq
1410;
1411; AVX512F-32-LABEL: shuffle_v8i64_00014445:
1412; AVX512F-32:       # BB#0:
1413; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
1414; AVX512F-32-NEXT:    retl
1415  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1416  ret <8 x i64> %shuffle
1417}
1418
1419define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
1420;
1421; AVX512F-LABEL: shuffle_v8i64_00204464:
1422; AVX512F:       # BB#0:
1423; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
1424; AVX512F-NEXT:    retq
1425;
1426; AVX512F-32-LABEL: shuffle_v8i64_00204464:
1427; AVX512F-32:       # BB#0:
1428; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
1429; AVX512F-32-NEXT:    retl
1430  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1431  ret <8 x i64> %shuffle
1432}
1433
1434define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
1435;
1436; AVX512F-LABEL: shuffle_v8i64_03004744:
1437; AVX512F:       # BB#0:
1438; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
1439; AVX512F-NEXT:    retq
1440;
1441; AVX512F-32-LABEL: shuffle_v8i64_03004744:
1442; AVX512F-32:       # BB#0:
1443; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
1444; AVX512F-32-NEXT:    retl
1445  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1446  ret <8 x i64> %shuffle
1447}
1448
1449define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
1450;
1451; AVX512F-LABEL: shuffle_v8i64_10005444:
1452; AVX512F:       # BB#0:
1453; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
1454; AVX512F-NEXT:    retq
1455;
1456; AVX512F-32-LABEL: shuffle_v8i64_10005444:
1457; AVX512F-32:       # BB#0:
1458; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
1459; AVX512F-32-NEXT:    retl
1460  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1461  ret <8 x i64> %shuffle
1462}
1463
1464define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
1465;
1466; AVX512F-LABEL: shuffle_v8i64_22006644:
1467; AVX512F:       # BB#0:
1468; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
1469; AVX512F-NEXT:    retq
1470;
1471; AVX512F-32-LABEL: shuffle_v8i64_22006644:
1472; AVX512F-32:       # BB#0:
1473; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
1474; AVX512F-32-NEXT:    retl
1475  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1476  ret <8 x i64> %shuffle
1477}
1478
1479define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
1480;
1481; AVX512F-LABEL: shuffle_v8i64_33307774:
1482; AVX512F:       # BB#0:
1483; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
1484; AVX512F-NEXT:    retq
1485;
1486; AVX512F-32-LABEL: shuffle_v8i64_33307774:
1487; AVX512F-32:       # BB#0:
1488; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
1489; AVX512F-32-NEXT:    retl
1490  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1491  ret <8 x i64> %shuffle
1492}
1493
1494define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
1495;
1496; AVX512F-LABEL: shuffle_v8i64_32107654:
1497; AVX512F:       # BB#0:
1498; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
1499; AVX512F-NEXT:    retq
1500;
1501; AVX512F-32-LABEL: shuffle_v8i64_32107654:
1502; AVX512F-32:       # BB#0:
1503; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
1504; AVX512F-32-NEXT:    retl
1505  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1506  ret <8 x i64> %shuffle
1507}
1508
1509define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
1510;
1511; AVX512F-LABEL: shuffle_v8i64_00234467:
1512; AVX512F:       # BB#0:
1513; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
1514; AVX512F-NEXT:    retq
1515;
1516; AVX512F-32-LABEL: shuffle_v8i64_00234467:
1517; AVX512F-32:       # BB#0:
1518; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
1519; AVX512F-32-NEXT:    retl
1520  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1521  ret <8 x i64> %shuffle
1522}
1523
1524define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
1525;
1526; AVX512F-LABEL: shuffle_v8i64_00224466:
1527; AVX512F:       # BB#0:
1528; AVX512F-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
1529; AVX512F-NEXT:    retq
1530;
1531; AVX512F-32-LABEL: shuffle_v8i64_00224466:
1532; AVX512F-32:       # BB#0:
1533; AVX512F-32-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
1534; AVX512F-32-NEXT:    retl
1535  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1536  ret <8 x i64> %shuffle
1537}
1538
1539define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
1540;
1541; AVX512F-LABEL: shuffle_v8i64_10325476:
1542; AVX512F:       # BB#0:
1543; AVX512F-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
1544; AVX512F-NEXT:    retq
1545;
1546; AVX512F-32-LABEL: shuffle_v8i64_10325476:
1547; AVX512F-32:       # BB#0:
1548; AVX512F-32-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
1549; AVX512F-32-NEXT:    retl
1550  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1551  ret <8 x i64> %shuffle
1552}
1553
1554define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
1555;
1556; AVX512F-LABEL: shuffle_v8i64_11335577:
1557; AVX512F:       # BB#0:
1558; AVX512F-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
1559; AVX512F-NEXT:    retq
1560;
1561; AVX512F-32-LABEL: shuffle_v8i64_11335577:
1562; AVX512F-32:       # BB#0:
1563; AVX512F-32-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
1564; AVX512F-32-NEXT:    retl
1565  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1566  ret <8 x i64> %shuffle
1567}
1568
1569define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
1570;
1571; AVX512F-LABEL: shuffle_v8i64_10235467:
1572; AVX512F:       # BB#0:
1573; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
1574; AVX512F-NEXT:    retq
1575;
1576; AVX512F-32-LABEL: shuffle_v8i64_10235467:
1577; AVX512F-32:       # BB#0:
1578; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
1579; AVX512F-32-NEXT:    retl
1580  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1581  ret <8 x i64> %shuffle
1582}
1583
1584define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
1585;
1586; AVX512F-LABEL: shuffle_v8i64_10225466:
1587; AVX512F:       # BB#0:
1588; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
1589; AVX512F-NEXT:    retq
1590;
1591; AVX512F-32-LABEL: shuffle_v8i64_10225466:
1592; AVX512F-32:       # BB#0:
1593; AVX512F-32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
1594; AVX512F-32-NEXT:    retl
1595  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1596  ret <8 x i64> %shuffle
1597}
1598
1599define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
1600;
1601; AVX512F-LABEL: shuffle_v8i64_00015444:
1602; AVX512F:       # BB#0:
1603; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
1604; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1605; AVX512F-NEXT:    retq
1606;
1607; AVX512F-32-LABEL: shuffle_v8i64_00015444:
1608; AVX512F-32:       # BB#0:
1609; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
1610; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1611; AVX512F-32-NEXT:    retl
1612  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1613  ret <8 x i64> %shuffle
1614}
1615
1616define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
1617;
1618; AVX512F-LABEL: shuffle_v8i64_00204644:
1619; AVX512F:       # BB#0:
1620; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
1621; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1622; AVX512F-NEXT:    retq
1623;
1624; AVX512F-32-LABEL: shuffle_v8i64_00204644:
1625; AVX512F-32:       # BB#0:
1626; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
1627; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1628; AVX512F-32-NEXT:    retl
1629  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1630  ret <8 x i64> %shuffle
1631}
1632
1633define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
1634;
1635; AVX512F-LABEL: shuffle_v8i64_03004474:
1636; AVX512F:       # BB#0:
1637; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
1638; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1639; AVX512F-NEXT:    retq
1640;
1641; AVX512F-32-LABEL: shuffle_v8i64_03004474:
1642; AVX512F-32:       # BB#0:
1643; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
1644; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1645; AVX512F-32-NEXT:    retl
1646  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1647  ret <8 x i64> %shuffle
1648}
1649
1650define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
1651;
1652; AVX512F-LABEL: shuffle_v8i64_10004444:
1653; AVX512F:       # BB#0:
1654; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
1655; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1656; AVX512F-NEXT:    retq
1657;
1658; AVX512F-32-LABEL: shuffle_v8i64_10004444:
1659; AVX512F-32:       # BB#0:
1660; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
1661; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1662; AVX512F-32-NEXT:    retl
1663  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1664  ret <8 x i64> %shuffle
1665}
1666
1667define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
1668;
1669; AVX512F-LABEL: shuffle_v8i64_22006446:
1670; AVX512F:       # BB#0:
1671; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
1672; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1673; AVX512F-NEXT:    retq
1674;
1675; AVX512F-32-LABEL: shuffle_v8i64_22006446:
1676; AVX512F-32:       # BB#0:
1677; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
1678; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1679; AVX512F-32-NEXT:    retl
1680  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1681  ret <8 x i64> %shuffle
1682}
1683
1684define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
1685;
1686; AVX512F-LABEL: shuffle_v8i64_33307474:
1687; AVX512F:       # BB#0:
1688; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
1689; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1690; AVX512F-NEXT:    retq
1691;
1692; AVX512F-32-LABEL: shuffle_v8i64_33307474:
1693; AVX512F-32:       # BB#0:
1694; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
1695; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1696; AVX512F-32-NEXT:    retl
1697  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1698  ret <8 x i64> %shuffle
1699}
1700
1701define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
1702;
1703; AVX512F-LABEL: shuffle_v8i64_32104567:
1704; AVX512F:       # BB#0:
1705; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
1706; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1707; AVX512F-NEXT:    retq
1708;
1709; AVX512F-32-LABEL: shuffle_v8i64_32104567:
1710; AVX512F-32:       # BB#0:
1711; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
1712; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1713; AVX512F-32-NEXT:    retl
1714  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1715  ret <8 x i64> %shuffle
1716}
1717
1718define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
1719;
1720; AVX512F-LABEL: shuffle_v8i64_00236744:
1721; AVX512F:       # BB#0:
1722; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
1723; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1724; AVX512F-NEXT:    retq
1725;
1726; AVX512F-32-LABEL: shuffle_v8i64_00236744:
1727; AVX512F-32:       # BB#0:
1728; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
1729; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1730; AVX512F-32-NEXT:    retl
1731  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1732  ret <8 x i64> %shuffle
1733}
1734
1735define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
1736;
1737; AVX512F-LABEL: shuffle_v8i64_00226644:
1738; AVX512F:       # BB#0:
1739; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
1740; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1741; AVX512F-NEXT:    retq
1742;
1743; AVX512F-32-LABEL: shuffle_v8i64_00226644:
1744; AVX512F-32:       # BB#0:
1745; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
1746; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1747; AVX512F-32-NEXT:    retl
1748  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1749  ret <8 x i64> %shuffle
1750}
1751
1752define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
1753;
1754; AVX512F-LABEL: shuffle_v8i64_10324567:
1755; AVX512F:       # BB#0:
1756; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
1757; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1758; AVX512F-NEXT:    retq
1759;
1760; AVX512F-32-LABEL: shuffle_v8i64_10324567:
1761; AVX512F-32:       # BB#0:
1762; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
1763; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1764; AVX512F-32-NEXT:    retl
1765  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1766  ret <8 x i64> %shuffle
1767}
1768
1769define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
1770;
1771; AVX512F-LABEL: shuffle_v8i64_11334567:
1772; AVX512F:       # BB#0:
1773; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
1774; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1775; AVX512F-NEXT:    retq
1776;
1777; AVX512F-32-LABEL: shuffle_v8i64_11334567:
1778; AVX512F-32:       # BB#0:
1779; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
1780; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1781; AVX512F-32-NEXT:    retl
1782  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1783  ret <8 x i64> %shuffle
1784}
1785
1786define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
1787;
1788; AVX512F-LABEL: shuffle_v8i64_01235467:
1789; AVX512F:       # BB#0:
1790; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
1791; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1792; AVX512F-NEXT:    retq
1793;
1794; AVX512F-32-LABEL: shuffle_v8i64_01235467:
1795; AVX512F-32:       # BB#0:
1796; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
1797; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1798; AVX512F-32-NEXT:    retl
1799  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1800  ret <8 x i64> %shuffle
1801}
1802
1803define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
1804;
1805; AVX512F-LABEL: shuffle_v8i64_01235466:
1806; AVX512F:       # BB#0:
1807; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
1808; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1809; AVX512F-NEXT:    retq
1810;
1811; AVX512F-32-LABEL: shuffle_v8i64_01235466:
1812; AVX512F-32:       # BB#0:
1813; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
1814; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1815; AVX512F-32-NEXT:    retl
1816  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1817  ret <8 x i64> %shuffle
1818}
1819
1820define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
1821;
1822; AVX512F-LABEL: shuffle_v8i64_002u6u44:
1823; AVX512F:       # BB#0:
1824; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
1825; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1826; AVX512F-NEXT:    retq
1827;
1828; AVX512F-32-LABEL: shuffle_v8i64_002u6u44:
1829; AVX512F-32:       # BB#0:
1830; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
1831; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1832; AVX512F-32-NEXT:    retl
1833  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1834  ret <8 x i64> %shuffle
1835}
1836
1837define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
1838;
1839; AVX512F-LABEL: shuffle_v8i64_00uu66uu:
1840; AVX512F:       # BB#0:
1841; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
1842; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1843; AVX512F-NEXT:    retq
1844;
1845; AVX512F-32-LABEL: shuffle_v8i64_00uu66uu:
1846; AVX512F-32:       # BB#0:
1847; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
1848; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1849; AVX512F-32-NEXT:    retl
1850  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1851  ret <8 x i64> %shuffle
1852}
1853
1854define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
1855;
1856; AVX512F-LABEL: shuffle_v8i64_103245uu:
1857; AVX512F:       # BB#0:
1858; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
1859; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1860; AVX512F-NEXT:    retq
1861;
1862; AVX512F-32-LABEL: shuffle_v8i64_103245uu:
1863; AVX512F-32:       # BB#0:
1864; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
1865; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1866; AVX512F-32-NEXT:    retl
1867  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1868  ret <8 x i64> %shuffle
1869}
1870
1871define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
1872;
1873; AVX512F-LABEL: shuffle_v8i64_1133uu67:
1874; AVX512F:       # BB#0:
1875; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
1876; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1877; AVX512F-NEXT:    retq
1878;
1879; AVX512F-32-LABEL: shuffle_v8i64_1133uu67:
1880; AVX512F-32:       # BB#0:
1881; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
1882; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1883; AVX512F-32-NEXT:    retl
1884  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1885  ret <8 x i64> %shuffle
1886}
1887
1888define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
1889;
1890; AVX512F-LABEL: shuffle_v8i64_0uu354uu:
1891; AVX512F:       # BB#0:
1892; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
1893; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1894; AVX512F-NEXT:    retq
1895;
1896; AVX512F-32-LABEL: shuffle_v8i64_0uu354uu:
1897; AVX512F-32:       # BB#0:
1898; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
1899; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1900; AVX512F-32-NEXT:    retl
1901  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1902  ret <8 x i64> %shuffle
1903}
1904
1905define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
1906;
1907; AVX512F-LABEL: shuffle_v8i64_uuu3uu66:
1908; AVX512F:       # BB#0:
1909; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
1910; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1911; AVX512F-NEXT:    retq
1912;
1913; AVX512F-32-LABEL: shuffle_v8i64_uuu3uu66:
1914; AVX512F-32:       # BB#0:
1915; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
1916; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
1917; AVX512F-32-NEXT:    retl
1918  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1919  ret <8 x i64> %shuffle
1920}
1921
1922define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
1923;
1924; AVX512F-LABEL: shuffle_v8i64_6caa87e5:
1925; AVX512F:       # BB#0:
1926; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,4,2,2,0,15,6,13]
1927; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1928; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1929; AVX512F-NEXT:    retq
1930;
1931; AVX512F-32-LABEL: shuffle_v8i64_6caa87e5:
1932; AVX512F-32:       # BB#0:
1933; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,0,4,0,2,0,2,0,0,0,15,0,6,0,13,0]
1934; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
1935; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1936; AVX512F-32-NEXT:    retl
1937  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1938  ret <8 x i64> %shuffle
1939}
1940
1941define <8 x double> @shuffle_v8f64_082a4c6e(<8 x double> %a, <8 x double> %b) {
1942;
1943; AVX512F-LABEL: shuffle_v8f64_082a4c6e:
1944; AVX512F:       # BB#0:
1945; AVX512F-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1946; AVX512F-NEXT:    retq
1947;
1948; AVX512F-32-LABEL: shuffle_v8f64_082a4c6e:
1949; AVX512F-32:       # BB#0:
1950; AVX512F-32-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1951; AVX512F-32-NEXT:    retl
1952  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1953  ret <8 x double> %shuffle
1954}
1955
1956define <8 x double> @shuffle_v8f64_0z2z4z6z(<8 x double> %a, <8 x double> %b) {
1957;
1958; AVX512F-LABEL: shuffle_v8f64_0z2z4z6z:
1959; AVX512F:       # BB#0:
1960; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
1961; AVX512F-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1962; AVX512F-NEXT:    retq
1963;
1964; AVX512F-32-LABEL: shuffle_v8f64_0z2z4z6z:
1965; AVX512F-32:       # BB#0:
1966; AVX512F-32-NEXT:    vpxord %zmm1, %zmm1, %zmm1
1967; AVX512F-32-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1968; AVX512F-32-NEXT:    retl
1969  %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32><i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
1970  ret <8 x double> %shuffle
1971}
1972
1973define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) {
1974;
1975; AVX512F-LABEL: shuffle_v8i64_082a4c6e:
1976; AVX512F:       # BB#0:
1977; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1978; AVX512F-NEXT:    retq
1979;
1980; AVX512F-32-LABEL: shuffle_v8i64_082a4c6e:
1981; AVX512F-32:       # BB#0:
1982; AVX512F-32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1983; AVX512F-32-NEXT:    retl
1984  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1985  ret <8 x i64> %shuffle
1986}
1987
1988define <8 x i64> @shuffle_v8i64_z8zazcze(<8 x i64> %a, <8 x i64> %b) {
1989;
1990; AVX512F-LABEL: shuffle_v8i64_z8zazcze:
1991; AVX512F:       # BB#0:
1992; AVX512F-NEXT:    vpxord %zmm0, %zmm0, %zmm0
1993; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1994; AVX512F-NEXT:    retq
1995;
1996; AVX512F-32-LABEL: shuffle_v8i64_z8zazcze:
1997; AVX512F-32:       # BB#0:
1998; AVX512F-32-NEXT:    vpxord %zmm0, %zmm0, %zmm0
1999; AVX512F-32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
2000; AVX512F-32-NEXT:    retl
2001  %shuffle = shufflevector <8 x i64> zeroinitializer, <8 x i64> %b, <8 x i32><i32 7, i32 8, i32 5, i32 10, i32 3, i32 12, i32 1, i32 14>
2002  ret <8 x i64> %shuffle
2003}
2004
2005define <8 x double> @shuffle_v8f64_193b5d7f(<8 x double> %a, <8 x double> %b) {
2006;
2007; AVX512F-LABEL: shuffle_v8f64_193b5d7f:
2008; AVX512F:       # BB#0:
2009; AVX512F-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2010; AVX512F-NEXT:    retq
2011;
2012; AVX512F-32-LABEL: shuffle_v8f64_193b5d7f:
2013; AVX512F-32:       # BB#0:
2014; AVX512F-32-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2015; AVX512F-32-NEXT:    retl
2016  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2017  ret <8 x double> %shuffle
2018}
2019
2020define <8 x double> @shuffle_v8f64_z9zbzdzf(<8 x double> %a, <8 x double> %b) {
2021;
2022; AVX512F-LABEL: shuffle_v8f64_z9zbzdzf:
2023; AVX512F:       # BB#0:
2024; AVX512F-NEXT:    vpxord %zmm0, %zmm0, %zmm0
2025; AVX512F-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2026; AVX512F-NEXT:    retq
2027;
2028; AVX512F-32-LABEL: shuffle_v8f64_z9zbzdzf:
2029; AVX512F-32:       # BB#0:
2030; AVX512F-32-NEXT:    vpxord %zmm0, %zmm0, %zmm0
2031; AVX512F-32-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2032; AVX512F-32-NEXT:    retl
2033  %shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32><i32 0, i32 9, i32 0, i32 11, i32 0, i32 13, i32 0, i32 15>
2034  ret <8 x double> %shuffle
2035}
2036
2037define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
2038;
2039; AVX512F-LABEL: shuffle_v8i64_193b5d7f:
2040; AVX512F:       # BB#0:
2041; AVX512F-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2042; AVX512F-NEXT:    retq
2043;
2044; AVX512F-32-LABEL: shuffle_v8i64_193b5d7f:
2045; AVX512F-32:       # BB#0:
2046; AVX512F-32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2047; AVX512F-32-NEXT:    retl
2048  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2049  ret <8 x i64> %shuffle
2050}
2051
2052define <8 x i64> @shuffle_v8i64_1z3z5z7z(<8 x i64> %a, <8 x i64> %b) {
2053;
2054; AVX512F-LABEL: shuffle_v8i64_1z3z5z7z:
2055; AVX512F:       # BB#0:
2056; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
2057; AVX512F-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2058; AVX512F-NEXT:    retq
2059;
2060; AVX512F-32-LABEL: shuffle_v8i64_1z3z5z7z:
2061; AVX512F-32:       # BB#0:
2062; AVX512F-32-NEXT:    vpxord %zmm1, %zmm1, %zmm1
2063; AVX512F-32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2064; AVX512F-32-NEXT:    retl
2065  %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 15, i32 5, i32 8, i32 7, i32 15>
2066  ret <8 x i64> %shuffle
2067}
2068
2069define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
2070; AVX512F-LABEL: test_vshuff64x2_512:
2071; AVX512F:       # BB#0:
2072; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2073; AVX512F-NEXT:    retq
2074;
2075; AVX512F-32-LABEL: test_vshuff64x2_512:
2076; AVX512F-32:       # BB#0:
2077; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2078; AVX512F-32-NEXT:    retl
2079  %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
2080  ret <8 x double> %res
2081}
2082
2083define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
2084; AVX512F-LABEL: test_vshuff64x2_512_maskz:
2085; AVX512F:       # BB#0:
2086; AVX512F-NEXT:    vpmovsxwq %xmm2, %zmm2
2087; AVX512F-NEXT:    vpsllq $63, %zmm2, %zmm2
2088; AVX512F-NEXT:    vptestmq %zmm2, %zmm2, %k1
2089; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
2090; AVX512F-NEXT:    retq
2091;
2092; AVX512F-32-LABEL: test_vshuff64x2_512_maskz:
2093; AVX512F-32:       # BB#0:
2094; AVX512F-32-NEXT:    vpmovsxwq %xmm2, %zmm2
2095; AVX512F-32-NEXT:    vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
2096; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
2097; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
2098; AVX512F-32-NEXT:    retl
2099  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
2100  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
2101  ret <8 x double> %res
2102}
2103
2104define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
2105; AVX512F-LABEL: test_vshufi64x2_512_mask:
2106; AVX512F:       # BB#0:
2107; AVX512F-NEXT:    vpmovsxwq %xmm2, %zmm2
2108; AVX512F-NEXT:    vpsllq $63, %zmm2, %zmm2
2109; AVX512F-NEXT:    vptestmq %zmm2, %zmm2, %k1
2110; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
2111; AVX512F-NEXT:    retq
2112;
2113; AVX512F-32-LABEL: test_vshufi64x2_512_mask:
2114; AVX512F-32:       # BB#0:
2115; AVX512F-32-NEXT:    vpmovsxwq %xmm2, %zmm2
2116; AVX512F-32-NEXT:    vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
2117; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
2118; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
2119; AVX512F-32-NEXT:    retl
2120  %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
2121  %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
2122  ret <8 x i64> %res
2123}
2124
2125define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
2126; AVX512F-LABEL: test_vshuff64x2_512_mem:
2127; AVX512F:       # BB#0:
2128; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2129; AVX512F-NEXT:    retq
2130;
2131; AVX512F-32-LABEL: test_vshuff64x2_512_mem:
2132; AVX512F-32:       # BB#0:
2133; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2134; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2135; AVX512F-32-NEXT:    retl
2136  %x1   = load <8 x double>,<8 x double> *%ptr,align 1
2137  %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
2138  ret <8 x double> %res
2139}
2140
2141define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
2142; AVX512F-LABEL: test_vshuff64x2_512_mem_mask:
2143; AVX512F:       # BB#0:
2144; AVX512F-NEXT:    vpmovsxwq %xmm1, %zmm1
2145; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
2146; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
2147; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
2148; AVX512F-NEXT:    retq
2149;
2150; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
2151; AVX512F-32:       # BB#0:
2152; AVX512F-32-NEXT:    vpmovsxwq %xmm1, %zmm1
2153; AVX512F-32-NEXT:    vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
2154; AVX512F-32-NEXT:    vptestmq %zmm1, %zmm1, %k1
2155; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2156; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
2157; AVX512F-32-NEXT:    retl
2158  %x1 = load <8 x double>,<8 x double> *%ptr,align 1
2159  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
2160  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> %x
2161  ret <8 x double> %res
2162}
2163
2164define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
2165; AVX512F-LABEL: test_vshuff64x2_512_mem_maskz:
2166; AVX512F:       # BB#0:
2167; AVX512F-NEXT:    vpmovsxwq %xmm1, %zmm1
2168; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
2169; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
2170; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
2171; AVX512F-NEXT:    retq
2172;
2173; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
2174; AVX512F-32:       # BB#0:
2175; AVX512F-32-NEXT:    vpmovsxwq %xmm1, %zmm1
2176; AVX512F-32-NEXT:    vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
2177; AVX512F-32-NEXT:    vptestmq %zmm1, %zmm1, %k1
2178; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2179; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
2180; AVX512F-32-NEXT:    retl
2181  %x1 = load <8 x double>,<8 x double> *%ptr,align 1
2182  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
2183  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
2184  ret <8 x double> %res
2185}
2186
2187define <16 x float> @test_vshuff32x4_512(<16 x float> %x, <16 x float> %x1) nounwind {
2188; AVX512F-LABEL: test_vshuff32x4_512:
2189; AVX512F:       # BB#0:
2190; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
2191; AVX512F-NEXT:    retq
2192;
2193; AVX512F-32-LABEL: test_vshuff32x4_512:
2194; AVX512F-32:       # BB#0:
2195; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
2196; AVX512F-32-NEXT:    retl
2197  %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
2198  ret <16 x float> %res
2199}
2200
2201define <8 x double> @shuffle_v8f64_23014567(<8 x double> %a0, <8 x double> %a1) {
2202; AVX512F-LABEL: shuffle_v8f64_23014567:
2203; AVX512F:       # BB#0:
2204; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,4,5,6,7]
2205; AVX512F-NEXT:    retq
2206;
2207; AVX512F-32-LABEL: shuffle_v8f64_23014567:
2208; AVX512F-32:       # BB#0:
2209; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,4,5,6,7]
2210; AVX512F-32-NEXT:    retl
2211  %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
2212  ret <8 x double> %1
2213}
2214
2215define <8 x double> @shuffle_v8f64_2301uu67(<8 x double> %a0, <8 x double> %a1) {
2216; AVX512F-LABEL: shuffle_v8f64_2301uu67:
2217; AVX512F:       # BB#0:
2218; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,0,1,6,7]
2219; AVX512F-NEXT:    retq
2220;
2221; AVX512F-32-LABEL: shuffle_v8f64_2301uu67:
2222; AVX512F-32:       # BB#0:
2223; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,0,1,6,7]
2224; AVX512F-32-NEXT:    retl
2225  %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 6, i32 7>
2226  ret <8 x double> %1
2227}
2228
2229define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) {
2230; AVX512F-LABEL: shuffle_v8f64_2301uuuu:
2231; AVX512F:       # BB#0:
2232; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
2233; AVX512F-NEXT:    retq
2234;
2235; AVX512F-32-LABEL: shuffle_v8f64_2301uuuu:
2236; AVX512F-32:       # BB#0:
2237; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
2238; AVX512F-32-NEXT:    retl
2239  %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
2240  ret <8 x double> %1
2241}
2242
2243define <8 x double> @shuffle_v8f64_uuu2301(<8 x double> %a0, <8 x double> %a1) {
2244; AVX512F-LABEL: shuffle_v8f64_uuu2301:
2245; AVX512F:       # BB#0:
2246; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1],zmm1[2,3,0,1]
2247; AVX512F-NEXT:    retq
2248;
2249; AVX512F-32-LABEL: shuffle_v8f64_uuu2301:
2250; AVX512F-32:       # BB#0:
2251; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1],zmm1[2,3,0,1]
2252; AVX512F-32-NEXT:    retl
2253  %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1>
2254  ret <8 x double> %1
2255}
2256