• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
3
4target triple = "x86_64-unknown-unknown"
5
6define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
7; ALL-LABEL: shuffle_v8f64_00000000:
8; ALL:       # BB#0:
9; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
10; ALL-NEXT:    retq
11  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12  ret <8 x double> %shuffle
13}
14
15define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
16; ALL-LABEL: shuffle_v8f64_00000010:
17; ALL:       # BB#0:
18; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
19; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
20; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
21; ALL-NEXT:    retq
22  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
23  ret <8 x double> %shuffle
24}
25
26define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
27; ALL-LABEL: shuffle_v8f64_00000200:
28; ALL:       # BB#0:
29; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
30; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
31; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
32; ALL-NEXT:    retq
33  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
34  ret <8 x double> %shuffle
35}
36
37define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
38; ALL-LABEL: shuffle_v8f64_00003000:
39; ALL:       # BB#0:
40; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
41; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
42; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
43; ALL-NEXT:    retq
44  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
45  ret <8 x double> %shuffle
46}
47
48define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
49; ALL-LABEL: shuffle_v8f64_00040000:
50; ALL:       # BB#0:
51; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
52; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
53; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
54; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
55; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
56; ALL-NEXT:    retq
57  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
58  ret <8 x double> %shuffle
59}
60
61define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
62; ALL-LABEL: shuffle_v8f64_00500000:
63; ALL:       # BB#0:
64; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
65; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
66; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,0]
67; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
68; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
69; ALL-NEXT:    retq
70  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
71  ret <8 x double> %shuffle
72}
73
74define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
75; ALL-LABEL: shuffle_v8f64_06000000:
76; ALL:       # BB#0:
77; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
78; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
79; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,0,0]
80; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
81; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
82; ALL-NEXT:    retq
83  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
84  ret <8 x double> %shuffle
85}
86
87define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
88; ALL-LABEL: shuffle_v8f64_70000000:
89; ALL:       # BB#0:
90; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
91; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
92; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,0,0,0]
93; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
94; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
95; ALL-NEXT:    retq
96  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
97  ret <8 x double> %shuffle
98}
99
100define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
101; ALL-LABEL: shuffle_v8f64_01014545:
102; ALL:       # BB#0:
103; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
104; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
105; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
106; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
107; ALL-NEXT:    retq
108  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
109  ret <8 x double> %shuffle
110}
111
112define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
113; ALL-LABEL: shuffle_v8f64_00112233:
114; ALL:       # BB#0:
115; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,1,1]
116; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
117; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
118; ALL-NEXT:    retq
119  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
120  ret <8 x double> %shuffle
121}
122
123define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
124; ALL-LABEL: shuffle_v8f64_00001111:
125; ALL:       # BB#0:
126; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
127; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
128; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
129; ALL-NEXT:    retq
130  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
131  ret <8 x double> %shuffle
132}
133
134define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
135; ALL-LABEL: shuffle_v8f64_81a3c5e7:
136; ALL:       # BB#0:
137; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
138; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
139; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
140; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
141; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
142; ALL-NEXT:    retq
143  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
144  ret <8 x double> %shuffle
145}
146
147define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
148; ALL-LABEL: shuffle_v8f64_08080808:
149; ALL:       # BB#0:
150; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
151; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
152; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
153; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
154; ALL-NEXT:    retq
155  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
156  ret <8 x double> %shuffle
157}
158
159define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
160; ALL-LABEL: shuffle_v8f64_08084c4c:
161; ALL:       # BB#0:
162; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
163; ALL-NEXT:    vinsertf128 $1, %xmm2, %ymm2, %ymm2
164; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
165; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
166; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3]
167; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
168; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
169; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
170; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
171; ALL-NEXT:    retq
172  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
173  ret <8 x double> %shuffle
174}
175
176define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
177; ALL-LABEL: shuffle_v8f64_8823cc67:
178; ALL:       # BB#0:
179; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
180; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
181; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
182; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
183; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
184; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
185; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
186; ALL-NEXT:    retq
187  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
188  ret <8 x double> %shuffle
189}
190
191define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
192; ALL-LABEL: shuffle_v8f64_9832dc76:
193; ALL:       # BB#0:
194; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm1[0,1],ymm0[2,3]
195; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
196; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
197; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
198; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
199; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
200; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
201; ALL-NEXT:    retq
202  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
203  ret <8 x double> %shuffle
204}
205
206define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
207; ALL-LABEL: shuffle_v8f64_9810dc54:
208; ALL:       # BB#0:
209; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm2
210; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
211; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
212; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
213; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
214; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
215; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
216; ALL-NEXT:    retq
217  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
218  ret <8 x double> %shuffle
219}
220
221define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
222; ALL-LABEL: shuffle_v8f64_08194c5d:
223; ALL:       # BB#0:
224; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
225; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,1]
226; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3
227; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,1,1,3]
228; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
229; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
230; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
231; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
232; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
233; ALL-NEXT:    retq
234  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
235  ret <8 x double> %shuffle
236}
237
238define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
239; ALL-LABEL: shuffle_v8f64_2a3b6e7f:
240; ALL:       # BB#0:
241; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
242; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,2,3]
243; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3
244; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,1,3,3]
245; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
246; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
247; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
248; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
249; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
250; ALL-NEXT:    retq
251  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
252  ret <8 x double> %shuffle
253}
254
255define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
256; ALL-LABEL: shuffle_v8f64_08192a3b:
257; ALL:       # BB#0:
258; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,2,2,3]
259; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
260; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
261; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
262; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
263; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
264; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
265; ALL-NEXT:    retq
266  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
267  ret <8 x double> %shuffle
268}
269
270define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
271; ALL-LABEL: shuffle_v8f64_08991abb:
272; ALL:       # BB#0:
273; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1]
274; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm0[0],ymm2[1,2,3]
275; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
276; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,3]
277; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
278; ALL-NEXT:    retq
279  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
280  ret <8 x double> %shuffle
281}
282
283define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
284; ALL-LABEL: shuffle_v8f64_091b2d3f:
285; ALL:       # BB#0:
286; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
287; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
288; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
289; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
290; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
291; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
292; ALL-NEXT:    retq
293  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
294  ret <8 x double> %shuffle
295}
296
297define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
298; ALL-LABEL: shuffle_v8f64_09ab1def:
299; ALL:       # BB#0:
300; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
301; ALL-NEXT:    vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2]
302; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3]
303; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
304; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
305; ALL-NEXT:    retq
306  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
307  ret <8 x double> %shuffle
308}
309
310define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
311; ALL-LABEL: shuffle_v8f64_00014445:
312; ALL:       # BB#0:
313; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
314; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
315; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
316; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
317; ALL-NEXT:    retq
318  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
319  ret <8 x double> %shuffle
320}
321
322define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
323; ALL-LABEL: shuffle_v8f64_00204464:
324; ALL:       # BB#0:
325; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
326; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
327; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
328; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
329; ALL-NEXT:    retq
330  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
331  ret <8 x double> %shuffle
332}
333
334define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
335; ALL-LABEL: shuffle_v8f64_03004744:
336; ALL:       # BB#0:
337; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
338; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
339; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
340; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
341; ALL-NEXT:    retq
342  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
343  ret <8 x double> %shuffle
344}
345
346define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
347; ALL-LABEL: shuffle_v8f64_10005444:
348; ALL:       # BB#0:
349; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
350; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
351; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
352; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
353; ALL-NEXT:    retq
354  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
355  ret <8 x double> %shuffle
356}
357
358define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
359; ALL-LABEL: shuffle_v8f64_22006644:
360; ALL:       # BB#0:
361; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
362; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
363; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
364; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
365; ALL-NEXT:    retq
366  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
367  ret <8 x double> %shuffle
368}
369
370define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
371; ALL-LABEL: shuffle_v8f64_33307774:
372; ALL:       # BB#0:
373; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
374; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
375; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
376; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
377; ALL-NEXT:    retq
378  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
379  ret <8 x double> %shuffle
380}
381
382define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
383; ALL-LABEL: shuffle_v8f64_32107654:
384; ALL:       # BB#0:
385; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
386; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
387; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
388; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
389; ALL-NEXT:    retq
390  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
391  ret <8 x double> %shuffle
392}
393
394define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
395; ALL-LABEL: shuffle_v8f64_00234467:
396; ALL:       # BB#0:
397; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
398; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
399; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
400; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
401; ALL-NEXT:    retq
402  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
403  ret <8 x double> %shuffle
404}
405
406define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
407; ALL-LABEL: shuffle_v8f64_00224466:
408; ALL:       # BB#0:
409; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
410; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
411; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
412; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
413; ALL-NEXT:    retq
414  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
415  ret <8 x double> %shuffle
416}
417
418define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
419; ALL-LABEL: shuffle_v8f64_10325476:
420; ALL:       # BB#0:
421; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
422; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
423; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
424; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
425; ALL-NEXT:    retq
426  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
427  ret <8 x double> %shuffle
428}
429
430define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
431; ALL-LABEL: shuffle_v8f64_11335577:
432; ALL:       # BB#0:
433; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
434; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
435; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
436; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
437; ALL-NEXT:    retq
438  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
439  ret <8 x double> %shuffle
440}
441
442define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
443; ALL-LABEL: shuffle_v8f64_10235467:
444; ALL:       # BB#0:
445; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,3]
446; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
447; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
448; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
449; ALL-NEXT:    retq
450  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
451  ret <8 x double> %shuffle
452}
453
454define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
455; ALL-LABEL: shuffle_v8f64_10225466:
456; ALL:       # BB#0:
457; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,2]
458; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
459; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
460; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
461; ALL-NEXT:    retq
462  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
463  ret <8 x double> %shuffle
464}
465
466define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
467; ALL-LABEL: shuffle_v8f64_00015444:
468; ALL:       # BB#0:
469; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
470; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
471; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
472; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
473; ALL-NEXT:    retq
474  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
475  ret <8 x double> %shuffle
476}
477
478define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
479; ALL-LABEL: shuffle_v8f64_00204644:
480; ALL:       # BB#0:
481; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
482; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
483; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
484; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
485; ALL-NEXT:    retq
486  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
487  ret <8 x double> %shuffle
488}
489
490define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
491; ALL-LABEL: shuffle_v8f64_03004474:
492; ALL:       # BB#0:
493; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
494; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
495; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,0]
496; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
497; ALL-NEXT:    retq
498  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
499  ret <8 x double> %shuffle
500}
501
502define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
503; ALL-LABEL: shuffle_v8f64_10004444:
504; ALL:       # BB#0:
505; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
506; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
507; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
508; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
509; ALL-NEXT:    retq
510  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
511  ret <8 x double> %shuffle
512}
513
514define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
515; ALL-LABEL: shuffle_v8f64_22006446:
516; ALL:       # BB#0:
517; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
518; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
519; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,0,0,2]
520; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
521; ALL-NEXT:    retq
522  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
523  ret <8 x double> %shuffle
524}
525
526define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
527; ALL-LABEL: shuffle_v8f64_33307474:
528; ALL:       # BB#0:
529; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
530; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
531; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,3,0]
532; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
533; ALL-NEXT:    retq
534  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
535  ret <8 x double> %shuffle
536}
537
538define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
539; ALL-LABEL: shuffle_v8f64_32104567:
540; ALL:       # BB#0:
541; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
542; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
543; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
544; ALL-NEXT:    retq
545  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
546  ret <8 x double> %shuffle
547}
548
549define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
550; ALL-LABEL: shuffle_v8f64_00236744:
551; ALL:       # BB#0:
552; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
553; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
554; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,0]
555; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
556; ALL-NEXT:    retq
557  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
558  ret <8 x double> %shuffle
559}
560
561define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
562; ALL-LABEL: shuffle_v8f64_00226644:
563; ALL:       # BB#0:
564; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
565; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
566; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
567; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
568; ALL-NEXT:    retq
569  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
570  ret <8 x double> %shuffle
571}
572
573define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
574; ALL-LABEL: shuffle_v8f64_10324567:
575; ALL:       # BB#0:
576; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
577; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
578; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
579; ALL-NEXT:    retq
580  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
581  ret <8 x double> %shuffle
582}
583
584define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
585; ALL-LABEL: shuffle_v8f64_11334567:
586; ALL:       # BB#0:
587; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
588; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
589; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
590; ALL-NEXT:    retq
591  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
592  ret <8 x double> %shuffle
593}
594
595define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
596; ALL-LABEL: shuffle_v8f64_01235467:
597; ALL:       # BB#0:
598; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
599; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,3]
600; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
601; ALL-NEXT:    retq
602  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
603  ret <8 x double> %shuffle
604}
605
606define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
607; ALL-LABEL: shuffle_v8f64_01235466:
608; ALL:       # BB#0:
609; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
610; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
611; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
612; ALL-NEXT:    retq
613  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
614  ret <8 x double> %shuffle
615}
616
617define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
618; ALL-LABEL: shuffle_v8f64_002u6u44:
619; ALL:       # BB#0:
620; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
621; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
622; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
623; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
624; ALL-NEXT:    retq
625  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
626  ret <8 x double> %shuffle
627}
628
629define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
630; ALL-LABEL: shuffle_v8f64_00uu66uu:
631; ALL:       # BB#0:
632; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
633; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
634; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
635; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
636; ALL-NEXT:    retq
637  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
638  ret <8 x double> %shuffle
639}
640
641define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
642; ALL-LABEL: shuffle_v8f64_103245uu:
643; ALL:       # BB#0:
644; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
645; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
646; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
647; ALL-NEXT:    retq
648  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
649  ret <8 x double> %shuffle
650}
651
652define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
653; ALL-LABEL: shuffle_v8f64_1133uu67:
654; ALL:       # BB#0:
655; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
656; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
657; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
658; ALL-NEXT:    retq
659  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
660  ret <8 x double> %shuffle
661}
662
663define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
664; ALL-LABEL: shuffle_v8f64_0uu354uu:
665; ALL:       # BB#0:
666; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
667; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
668; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
669; ALL-NEXT:    retq
670  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
671  ret <8 x double> %shuffle
672}
673
674define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
675; ALL-LABEL: shuffle_v8f64_uuu3uu66:
676; ALL:       # BB#0:
677; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
678; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
679; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
680; ALL-NEXT:    retq
681  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
682  ret <8 x double> %shuffle
683}
684
685define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
686; ALL-LABEL: shuffle_v8f64_c348cda0:
687; ALL:       # BB#0:
688; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
689; ALL-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm2[0,1]
690; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
691; ALL-NEXT:    vbroadcastsd %xmm1, %ymm4
692; ALL-NEXT:    vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
693; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
694; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2],ymm3[3]
695; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
696; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
697; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
698; ALL-NEXT:    retq
699  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
700  ret <8 x double> %shuffle
701}
702
703define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
704; ALL-LABEL: shuffle_v8f64_f511235a:
705; ALL:       # BB#0:
706; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
707; ALL-NEXT:    vblendpd {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2,3]
708; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,3,1,3]
709; ALL-NEXT:    vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
710; ALL-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3]
711; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
712; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3]
713; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
714; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
715; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
716; ALL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm0, %zmm0
717; ALL-NEXT:    retq
718  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
719  ret <8 x double> %shuffle
720}
721
722define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
723; ALL-LABEL: shuffle_v8i64_00000000:
724; ALL:       # BB#0:
725; ALL-NEXT:    vpbroadcastq %xmm0, %zmm0
726; ALL-NEXT:    retq
727  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
728  ret <8 x i64> %shuffle
729}
730
731define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
732; ALL-LABEL: shuffle_v8i64_00000010:
733; ALL:       # BB#0:
734; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
735; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
736; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
737; ALL-NEXT:    retq
738  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
739  ret <8 x i64> %shuffle
740}
741
742define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
743; ALL-LABEL: shuffle_v8i64_00000200:
744; ALL:       # BB#0:
745; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
746; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
747; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
748; ALL-NEXT:    retq
749  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
750  ret <8 x i64> %shuffle
751}
752
753define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
754; ALL-LABEL: shuffle_v8i64_00003000:
755; ALL:       # BB#0:
756; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
757; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
758; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
759; ALL-NEXT:    retq
760  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
761  ret <8 x i64> %shuffle
762}
763
764define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
765; ALL-LABEL: shuffle_v8i64_00040000:
766; ALL:       # BB#0:
767; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
768; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
769; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
770; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
771; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
772; ALL-NEXT:    retq
773  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
774  ret <8 x i64> %shuffle
775}
776
777define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
778; ALL-LABEL: shuffle_v8i64_00500000:
779; ALL:       # BB#0:
780; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
781; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
782; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,0]
783; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
784; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
785; ALL-NEXT:    retq
786  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
787  ret <8 x i64> %shuffle
788}
789
790define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
791; ALL-LABEL: shuffle_v8i64_06000000:
792; ALL:       # BB#0:
793; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
794; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
795; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,0,0]
796; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
797; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
798; ALL-NEXT:    retq
799  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
800  ret <8 x i64> %shuffle
801}
802
803define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
804; ALL-LABEL: shuffle_v8i64_70000000:
805; ALL:       # BB#0:
806; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
807; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
808; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,0,0,0]
809; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
810; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
811; ALL-NEXT:    retq
812  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
813  ret <8 x i64> %shuffle
814}
815
816define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
817; ALL-LABEL: shuffle_v8i64_01014545:
818; ALL:       # BB#0:
819; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
820; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
821; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
822; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
823; ALL-NEXT:    retq
824  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
825  ret <8 x i64> %shuffle
826}
827
828define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
829; ALL-LABEL: shuffle_v8i64_00112233:
830; ALL:       # BB#0:
831; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,1,1]
832; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,3,3]
833; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
834; ALL-NEXT:    retq
835  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
836  ret <8 x i64> %shuffle
837}
838
839define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
840; ALL-LABEL: shuffle_v8i64_00001111:
841; ALL:       # BB#0:
842; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
843; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
844; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
845; ALL-NEXT:    retq
846  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
847  ret <8 x i64> %shuffle
848}
849
850define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
851; ALL-LABEL: shuffle_v8i64_81a3c5e7:
852; ALL:       # BB#0:
853; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
854; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
855; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
856; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
857; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
858; ALL-NEXT:    retq
859  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
860  ret <8 x i64> %shuffle
861}
862
863define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
864; ALL-LABEL: shuffle_v8i64_08080808:
865; ALL:       # BB#0:
866; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
867; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
868; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
869; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
870; ALL-NEXT:    retq
871  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
872  ret <8 x i64> %shuffle
873}
874
875define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
876; ALL-LABEL: shuffle_v8i64_08084c4c:
877; ALL:       # BB#0:
878; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
879; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm2, %ymm2
880; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
881; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
882; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5],ymm3[6,7]
883; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
884; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
885; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
886; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
887; ALL-NEXT:    retq
888  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
889  ret <8 x i64> %shuffle
890}
891
892define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
893; ALL-LABEL: shuffle_v8i64_8823cc67:
894; ALL:       # BB#0:
895; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
896; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
897; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
898; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
899; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
900; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
901; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
902; ALL-NEXT:    retq
903  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
904  ret <8 x i64> %shuffle
905}
906
907define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
908; ALL-LABEL: shuffle_v8i64_9832dc76:
909; ALL:       # BB#0:
910; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
911; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
912; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
913; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
914; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
915; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
916; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
917; ALL-NEXT:    retq
918  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
919  ret <8 x i64> %shuffle
920}
921
922define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
923; ALL-LABEL: shuffle_v8i64_9810dc54:
924; ALL:       # BB#0:
925; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm2
926; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
927; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
928; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
929; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
930; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
931; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
932; ALL-NEXT:    retq
933  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
934  ret <8 x i64> %shuffle
935}
936
937define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
938; ALL-LABEL: shuffle_v8i64_08194c5d:
939; ALL:       # BB#0:
940; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
941; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
942; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
943; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
944; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
945; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
946; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
947; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
948; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
949; ALL-NEXT:    retq
950  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
951  ret <8 x i64> %shuffle
952}
953
954define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
955; ALL-LABEL: shuffle_v8i64_2a3b6e7f:
956; ALL:       # BB#0:
957; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
958; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
959; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
960; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[2,1,3,3]
961; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
962; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
963; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
964; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
965; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
966; ALL-NEXT:    retq
967  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
968  ret <8 x i64> %shuffle
969}
970
971define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
972; ALL-LABEL: shuffle_v8i64_08192a3b:
973; ALL:       # BB#0:
974; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,2,2,3]
975; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
976; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
977; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
978; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
979; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
980; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
981; ALL-NEXT:    retq
982  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
983  ret <8 x i64> %shuffle
984}
985
986define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
987; ALL-LABEL: shuffle_v8i64_08991abb:
988; ALL:       # BB#0:
989; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,0,1,1]
990; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3,4,5,6,7]
991; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
992; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,3,3]
993; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
994; ALL-NEXT:    retq
995  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
996  ret <8 x i64> %shuffle
997}
998
999define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
1000; ALL-LABEL: shuffle_v8i64_091b2d3f:
1001; ALL:       # BB#0:
1002; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1003; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
1004; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
1005; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1006; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1007; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1008; ALL-NEXT:    retq
1009  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1010  ret <8 x i64> %shuffle
1011}
1012
1013define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
1014; ALL-LABEL: shuffle_v8i64_09ab1def:
1015; ALL:       # BB#0:
1016; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1017; ALL-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
1018; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
1019; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
1020; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1021; ALL-NEXT:    retq
1022  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1023  ret <8 x i64> %shuffle
1024}
1025
1026define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
1027; ALL-LABEL: shuffle_v8i64_00014445:
1028; ALL:       # BB#0:
1029; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
1030; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1031; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
1032; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1033; ALL-NEXT:    retq
1034  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1035  ret <8 x i64> %shuffle
1036}
1037
1038define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
1039; ALL-LABEL: shuffle_v8i64_00204464:
1040; ALL:       # BB#0:
1041; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
1042; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1043; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
1044; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1045; ALL-NEXT:    retq
1046  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1047  ret <8 x i64> %shuffle
1048}
1049
1050define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
1051; ALL-LABEL: shuffle_v8i64_03004744:
1052; ALL:       # BB#0:
1053; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
1054; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1055; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
1056; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1057; ALL-NEXT:    retq
1058  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1059  ret <8 x i64> %shuffle
1060}
1061
1062define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
1063; ALL-LABEL: shuffle_v8i64_10005444:
1064; ALL:       # BB#0:
1065; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
1066; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1067; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
1068; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1069; ALL-NEXT:    retq
1070  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1071  ret <8 x i64> %shuffle
1072}
1073
1074define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
1075; ALL-LABEL: shuffle_v8i64_22006644:
1076; ALL:       # BB#0:
1077; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
1078; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1079; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
1080; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1081; ALL-NEXT:    retq
1082  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1083  ret <8 x i64> %shuffle
1084}
1085
1086define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
1087; ALL-LABEL: shuffle_v8i64_33307774:
1088; ALL:       # BB#0:
1089; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
1090; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1091; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
1092; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1093; ALL-NEXT:    retq
1094  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1095  ret <8 x i64> %shuffle
1096}
1097
1098define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
1099; ALL-LABEL: shuffle_v8i64_32107654:
1100; ALL:       # BB#0:
1101; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
1102; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1103; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
1104; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1105; ALL-NEXT:    retq
1106  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1107  ret <8 x i64> %shuffle
1108}
1109
1110define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
1111; ALL-LABEL: shuffle_v8i64_00234467:
1112; ALL:       # BB#0:
1113; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
1114; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1115; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
1116; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1117; ALL-NEXT:    retq
1118  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1119  ret <8 x i64> %shuffle
1120}
1121
1122define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
1123; ALL-LABEL: shuffle_v8i64_00224466:
1124; ALL:       # BB#0:
1125; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
1126; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1127; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1128; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1129; ALL-NEXT:    retq
1130  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1131  ret <8 x i64> %shuffle
1132}
1133
1134define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
1135; ALL-LABEL: shuffle_v8i64_10325476:
1136; ALL:       # BB#0:
1137; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
1138; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1139; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1140; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1141; ALL-NEXT:    retq
1142  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1143  ret <8 x i64> %shuffle
1144}
1145
1146define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
1147; ALL-LABEL: shuffle_v8i64_11335577:
1148; ALL:       # BB#0:
1149; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
1150; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1151; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
1152; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1153; ALL-NEXT:    retq
1154  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1155  ret <8 x i64> %shuffle
1156}
1157
1158define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
1159; ALL-LABEL: shuffle_v8i64_10235467:
1160; ALL:       # BB#0:
1161; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,3]
1162; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1163; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,3]
1164; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1165; ALL-NEXT:    retq
1166  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1167  ret <8 x i64> %shuffle
1168}
1169
1170define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
1171; ALL-LABEL: shuffle_v8i64_10225466:
1172; ALL:       # BB#0:
1173; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,2]
1174; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1175; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,2]
1176; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1177; ALL-NEXT:    retq
1178  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1179  ret <8 x i64> %shuffle
1180}
1181
1182define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
1183; ALL-LABEL: shuffle_v8i64_00015444:
1184; ALL:       # BB#0:
1185; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
1186; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1187; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
1188; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1189; ALL-NEXT:    retq
1190  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1191  ret <8 x i64> %shuffle
1192}
1193
1194define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
1195; ALL-LABEL: shuffle_v8i64_00204644:
1196; ALL:       # BB#0:
1197; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
1198; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1199; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
1200; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1201; ALL-NEXT:    retq
1202  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1203  ret <8 x i64> %shuffle
1204}
1205
1206define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
1207; ALL-LABEL: shuffle_v8i64_03004474:
1208; ALL:       # BB#0:
1209; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
1210; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1211; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,3,0]
1212; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1213; ALL-NEXT:    retq
1214  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1215  ret <8 x i64> %shuffle
1216}
1217
1218define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
1219; ALL-LABEL: shuffle_v8i64_10004444:
1220; ALL:       # BB#0:
1221; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
1222; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1223; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
1224; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1225; ALL-NEXT:    retq
1226  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1227  ret <8 x i64> %shuffle
1228}
1229
1230define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
1231; ALL-LABEL: shuffle_v8i64_22006446:
1232; ALL:       # BB#0:
1233; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
1234; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1235; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,0,0,2]
1236; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1237; ALL-NEXT:    retq
1238  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1239  ret <8 x i64> %shuffle
1240}
1241
1242define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
1243; ALL-LABEL: shuffle_v8i64_33307474:
1244; ALL:       # BB#0:
1245; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
1246; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1247; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,3,0]
1248; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1249; ALL-NEXT:    retq
1250  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1251  ret <8 x i64> %shuffle
1252}
1253
1254define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
1255; ALL-LABEL: shuffle_v8i64_32104567:
1256; ALL:       # BB#0:
1257; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
1258; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1259; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1260; ALL-NEXT:    retq
1261  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1262  ret <8 x i64> %shuffle
1263}
1264
1265define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
1266; ALL-LABEL: shuffle_v8i64_00236744:
1267; ALL:       # BB#0:
1268; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
1269; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1270; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,0]
1271; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1272; ALL-NEXT:    retq
1273  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1274  ret <8 x i64> %shuffle
1275}
1276
1277define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
1278; ALL-LABEL: shuffle_v8i64_00226644:
1279; ALL:       # BB#0:
1280; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
1281; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1282; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
1283; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1284; ALL-NEXT:    retq
1285  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1286  ret <8 x i64> %shuffle
1287}
1288
1289define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
1290; ALL-LABEL: shuffle_v8i64_10324567:
1291; ALL:       # BB#0:
1292; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
1293; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1294; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1295; ALL-NEXT:    retq
1296  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1297  ret <8 x i64> %shuffle
1298}
1299
1300define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
1301; ALL-LABEL: shuffle_v8i64_11334567:
1302; ALL:       # BB#0:
1303; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
1304; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1305; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1306; ALL-NEXT:    retq
1307  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1308  ret <8 x i64> %shuffle
1309}
1310
1311define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
1312; ALL-LABEL: shuffle_v8i64_01235467:
1313; ALL:       # BB#0:
1314; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1315; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,3]
1316; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1317; ALL-NEXT:    retq
1318  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1319  ret <8 x i64> %shuffle
1320}
1321
1322define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
1323; ALL-LABEL: shuffle_v8i64_01235466:
1324; ALL:       # BB#0:
1325; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1326; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,2]
1327; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1328; ALL-NEXT:    retq
1329  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1330  ret <8 x i64> %shuffle
1331}
1332
1333define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
1334; ALL-LABEL: shuffle_v8i64_002u6u44:
1335; ALL:       # BB#0:
1336; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
1337; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1338; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,0,0]
1339; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1340; ALL-NEXT:    retq
1341  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1342  ret <8 x i64> %shuffle
1343}
1344
1345define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
1346; ALL-LABEL: shuffle_v8i64_00uu66uu:
1347; ALL:       # BB#0:
1348; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
1349; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1350; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
1351; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1352; ALL-NEXT:    retq
1353  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1354  ret <8 x i64> %shuffle
1355}
1356
1357define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
1358; ALL-LABEL: shuffle_v8i64_103245uu:
1359; ALL:       # BB#0:
1360; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
1361; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1362; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1363; ALL-NEXT:    retq
1364  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1365  ret <8 x i64> %shuffle
1366}
1367
1368define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
1369; ALL-LABEL: shuffle_v8i64_1133uu67:
1370; ALL:       # BB#0:
1371; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
1372; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1373; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1374; ALL-NEXT:    retq
1375  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1376  ret <8 x i64> %shuffle
1377}
1378
1379define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
1380; ALL-LABEL: shuffle_v8i64_0uu354uu:
1381; ALL:       # BB#0:
1382; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1383; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,0,1,6,7,4,5]
1384; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1385; ALL-NEXT:    retq
1386  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1387  ret <8 x i64> %shuffle
1388}
1389
1390define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
1391; ALL-LABEL: shuffle_v8i64_uuu3uu66:
1392; ALL:       # BB#0:
1393; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1394; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
1395; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1396; ALL-NEXT:    retq
1397  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1398  ret <8 x i64> %shuffle
1399}
1400
1401define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
1402; ALL-LABEL: shuffle_v8i64_6caa87e5:
1403; ALL:       # BB#0:
1404; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1405; ALL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1406; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
1407; ALL-NEXT:    vpblendd {{.*#+}} ymm3 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
1408; ALL-NEXT:    vpblendd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2,3],ymm3[4,5],ymm0[6,7]
1409; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
1410; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
1411; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
1412; ALL-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
1413; ALL-NEXT:    retq
1414  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1415  ret <8 x i64> %shuffle
1416}
1417
1418define <8 x double> @shuffle_v8f64_082a4c6e(<8 x double> %a, <8 x double> %b) {
1419; ALL-LABEL: shuffle_v8f64_082a4c6e:
1420; ALL:       # BB#0:
1421; ALL-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1422; ALL-NEXT:    retq
1423  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1424  ret <8 x double> %shuffle
1425}
1426
1427define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) {
1428; ALL-LABEL: shuffle_v8i64_082a4c6e:
1429; ALL:       # BB#0:
1430; ALL-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1431; ALL-NEXT:    retq
1432  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1433  ret <8 x i64> %shuffle
1434}
1435
1436define <8 x double> @shuffle_v8f64_193b5d7f(<8 x double> %a, <8 x double> %b) {
1437; ALL-LABEL: shuffle_v8f64_193b5d7f:
1438; ALL:       # BB#0:
1439; ALL-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1440; ALL-NEXT:    retq
1441  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1442  ret <8 x double> %shuffle
1443}
1444
1445define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
1446; ALL-LABEL: shuffle_v8i64_193b5d7f:
1447; ALL:       # BB#0:
1448; ALL-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1449; ALL-NEXT:    retq
1450  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1451  ret <8 x i64> %shuffle
1452}
1453