• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
3; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4
5target triple = "x86_64-unknown-unknown"
6
7define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
8; AVX1-LABEL: shuffle_v8f32_00000000:
9; AVX1:       # BB#0:
10; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
11; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
12; AVX1-NEXT:    retq
13;
14; AVX2-LABEL: shuffle_v8f32_00000000:
15; AVX2:       # BB#0:
16; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
17; AVX2-NEXT:    retq
18  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19  ret <8 x float> %shuffle
20}
21
22define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
23; AVX1-LABEL: shuffle_v8f32_00000010:
24; AVX1:       # BB#0:
25; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
26; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
27; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
28; AVX1-NEXT:    retq
29;
30; AVX2-LABEL: shuffle_v8f32_00000010:
31; AVX2:       # BB#0:
32; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
33; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
34; AVX2-NEXT:    retq
35  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
36  ret <8 x float> %shuffle
37}
38
39define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
40; AVX1-LABEL: shuffle_v8f32_00000200:
41; AVX1:       # BB#0:
42; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
43; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
44; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
45; AVX1-NEXT:    retq
46;
47; AVX2-LABEL: shuffle_v8f32_00000200:
48; AVX2:       # BB#0:
49; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
50; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
51; AVX2-NEXT:    retq
52  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
53  ret <8 x float> %shuffle
54}
55
56define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
57; AVX1-LABEL: shuffle_v8f32_00003000:
58; AVX1:       # BB#0:
59; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
60; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
61; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
62; AVX1-NEXT:    retq
63;
64; AVX2-LABEL: shuffle_v8f32_00003000:
65; AVX2:       # BB#0:
66; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
67; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
68; AVX2-NEXT:    retq
69  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
70  ret <8 x float> %shuffle
71}
72
73define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
74; AVX1-LABEL: shuffle_v8f32_00040000:
75; AVX1:       # BB#0:
76; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
77; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
78; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
79; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
80; AVX1-NEXT:    retq
81;
82; AVX2-LABEL: shuffle_v8f32_00040000:
83; AVX2:       # BB#0:
84; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
85; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
86; AVX2-NEXT:    retq
87  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
88  ret <8 x float> %shuffle
89}
90
91define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
92; AVX1-LABEL: shuffle_v8f32_00500000:
93; AVX1:       # BB#0:
94; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
95; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
96; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
97; AVX1-NEXT:    retq
98;
99; AVX2-LABEL: shuffle_v8f32_00500000:
100; AVX2:       # BB#0:
101; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
102; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
103; AVX2-NEXT:    retq
104  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
105  ret <8 x float> %shuffle
106}
107
108define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
109; AVX1-LABEL: shuffle_v8f32_06000000:
110; AVX1:       # BB#0:
111; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
112; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
113; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
114; AVX1-NEXT:    retq
115;
116; AVX2-LABEL: shuffle_v8f32_06000000:
117; AVX2:       # BB#0:
118; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
119; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
120; AVX2-NEXT:    retq
121  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
122  ret <8 x float> %shuffle
123}
124
125define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
126; AVX1-LABEL: shuffle_v8f32_70000000:
127; AVX1:       # BB#0:
128; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
129; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
130; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
131; AVX1-NEXT:    retq
132;
133; AVX2-LABEL: shuffle_v8f32_70000000:
134; AVX2:       # BB#0:
135; AVX2-NEXT:    movl $7, %eax
136; AVX2-NEXT:    vmovd %eax, %xmm1
137; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
138; AVX2-NEXT:    retq
139  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
140  ret <8 x float> %shuffle
141}
142
143define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
144; ALL-LABEL: shuffle_v8f32_01014545:
145; ALL:       # BB#0:
146; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
147; ALL-NEXT:    retq
148  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
149  ret <8 x float> %shuffle
150}
151
152define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
153; AVX1-LABEL: shuffle_v8f32_00112233:
154; AVX1:       # BB#0:
155; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
156; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
157; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
158; AVX1-NEXT:    retq
159;
160; AVX2-LABEL: shuffle_v8f32_00112233:
161; AVX2:       # BB#0:
162; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
163; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
164; AVX2-NEXT:    retq
165  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
166  ret <8 x float> %shuffle
167}
168
169define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
170; AVX1-LABEL: shuffle_v8f32_00001111:
171; AVX1:       # BB#0:
172; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
173; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
174; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
175; AVX1-NEXT:    retq
176;
177; AVX2-LABEL: shuffle_v8f32_00001111:
178; AVX2:       # BB#0:
179; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
180; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
181; AVX2-NEXT:    retq
182  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
183  ret <8 x float> %shuffle
184}
185
186define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
187; ALL-LABEL: shuffle_v8f32_81a3c5e7:
188; ALL:       # BB#0:
189; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
190; ALL-NEXT:    retq
191  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
192  ret <8 x float> %shuffle
193}
194
195define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
196; AVX1-LABEL: shuffle_v8f32_08080808:
197; AVX1:       # BB#0:
198; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
199; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
200; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
201; AVX1-NEXT:    retq
202;
203; AVX2-LABEL: shuffle_v8f32_08080808:
204; AVX2:       # BB#0:
205; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
206; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
207; AVX2-NEXT:    retq
208  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
209  ret <8 x float> %shuffle
210}
211
212define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
213; ALL-LABEL: shuffle_v8f32_08084c4c:
214; ALL:       # BB#0:
215; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
216; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
217; ALL-NEXT:    retq
218  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
219  ret <8 x float> %shuffle
220}
221
222define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
223; ALL-LABEL: shuffle_v8f32_8823cc67:
224; ALL:       # BB#0:
225; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
226; ALL-NEXT:    retq
227  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
228  ret <8 x float> %shuffle
229}
230
231define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
232; ALL-LABEL: shuffle_v8f32_9832dc76:
233; ALL:       # BB#0:
234; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
235; ALL-NEXT:    retq
236  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
237  ret <8 x float> %shuffle
238}
239
240define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
241; ALL-LABEL: shuffle_v8f32_9810dc54:
242; ALL:       # BB#0:
243; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
244; ALL-NEXT:    retq
245  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
246  ret <8 x float> %shuffle
247}
248
249define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
250; ALL-LABEL: shuffle_v8f32_08194c5d:
251; ALL:       # BB#0:
252; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
253; ALL-NEXT:    retq
254  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
255  ret <8 x float> %shuffle
256}
257
258define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
259; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
260; ALL:       # BB#0:
261; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
262; ALL-NEXT:    retq
263  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
264  ret <8 x float> %shuffle
265}
266
267define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
268; AVX1-LABEL: shuffle_v8f32_08192a3b:
269; AVX1:       # BB#0:
270; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
271; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
272; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
273; AVX1-NEXT:    retq
274;
275; AVX2-LABEL: shuffle_v8f32_08192a3b:
276; AVX2:       # BB#0:
277; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
278; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
279; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
280; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
281; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
282; AVX2-NEXT:    retq
283  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
284  ret <8 x float> %shuffle
285}
286
287define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
288; AVX1-LABEL: shuffle_v8f32_08991abb:
289; AVX1:       # BB#0:
290; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
291; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
292; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
293; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
294; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
295; AVX1-NEXT:    retq
296;
297; AVX2-LABEL: shuffle_v8f32_08991abb:
298; AVX2:       # BB#0:
299; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
300; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
301; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
302; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
303; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
304; AVX2-NEXT:    retq
305  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
306  ret <8 x float> %shuffle
307}
308
309define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
310; AVX1-LABEL: shuffle_v8f32_091b2d3f:
311; AVX1:       # BB#0:
312; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
313; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
314; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
315; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
316; AVX1-NEXT:    retq
317;
318; AVX2-LABEL: shuffle_v8f32_091b2d3f:
319; AVX2:       # BB#0:
320; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
321; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
322; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
323; AVX2-NEXT:    retq
324  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
325  ret <8 x float> %shuffle
326}
327
328define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
329; AVX1-LABEL: shuffle_v8f32_09ab1def:
330; AVX1:       # BB#0:
331; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
332; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
333; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
334; AVX1-NEXT:    retq
335;
336; AVX2-LABEL: shuffle_v8f32_09ab1def:
337; AVX2:       # BB#0:
338; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
339; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
340; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
341; AVX2-NEXT:    retq
342  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
343  ret <8 x float> %shuffle
344}
345
346define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
347; ALL-LABEL: shuffle_v8f32_00014445:
348; ALL:       # BB#0:
349; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
350; ALL-NEXT:    retq
351  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
352  ret <8 x float> %shuffle
353}
354
355define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
356; ALL-LABEL: shuffle_v8f32_00204464:
357; ALL:       # BB#0:
358; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
359; ALL-NEXT:    retq
360  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
361  ret <8 x float> %shuffle
362}
363
364define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
365; ALL-LABEL: shuffle_v8f32_03004744:
366; ALL:       # BB#0:
367; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
368; ALL-NEXT:    retq
369  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
370  ret <8 x float> %shuffle
371}
372
373define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
374; ALL-LABEL: shuffle_v8f32_10005444:
375; ALL:       # BB#0:
376; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
377; ALL-NEXT:    retq
378  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
379  ret <8 x float> %shuffle
380}
381
382define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
383; ALL-LABEL: shuffle_v8f32_22006644:
384; ALL:       # BB#0:
385; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
386; ALL-NEXT:    retq
387  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
388  ret <8 x float> %shuffle
389}
390
391define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
392; ALL-LABEL: shuffle_v8f32_33307774:
393; ALL:       # BB#0:
394; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
395; ALL-NEXT:    retq
396  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
397  ret <8 x float> %shuffle
398}
399
400define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
401; ALL-LABEL: shuffle_v8f32_32107654:
402; ALL:       # BB#0:
403; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
404; ALL-NEXT:    retq
405  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
406  ret <8 x float> %shuffle
407}
408
409define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
410; ALL-LABEL: shuffle_v8f32_00234467:
411; ALL:       # BB#0:
412; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
413; ALL-NEXT:    retq
414  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
415  ret <8 x float> %shuffle
416}
417
418define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
419; ALL-LABEL: shuffle_v8f32_00224466:
420; ALL:       # BB#0:
421; ALL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
422; ALL-NEXT:    retq
423  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
424  ret <8 x float> %shuffle
425}
426
427define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
428; ALL-LABEL: shuffle_v8f32_10325476:
429; ALL:       # BB#0:
430; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
431; ALL-NEXT:    retq
432  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
433  ret <8 x float> %shuffle
434}
435
436define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
437; ALL-LABEL: shuffle_v8f32_11335577:
438; ALL:       # BB#0:
439; ALL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
440; ALL-NEXT:    retq
441  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
442  ret <8 x float> %shuffle
443}
444
445define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
446; ALL-LABEL: shuffle_v8f32_10235467:
447; ALL:       # BB#0:
448; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
449; ALL-NEXT:    retq
450  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
451  ret <8 x float> %shuffle
452}
453
454define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
455; ALL-LABEL: shuffle_v8f32_10225466:
456; ALL:       # BB#0:
457; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
458; ALL-NEXT:    retq
459  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
460  ret <8 x float> %shuffle
461}
462
463define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
464; ALL-LABEL: shuffle_v8f32_00015444:
465; ALL:       # BB#0:
466; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
467; ALL-NEXT:    retq
468  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
469  ret <8 x float> %shuffle
470}
471
472define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
473; ALL-LABEL: shuffle_v8f32_00204644:
474; ALL:       # BB#0:
475; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
476; ALL-NEXT:    retq
477  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
478  ret <8 x float> %shuffle
479}
480
481define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
482; ALL-LABEL: shuffle_v8f32_03004474:
483; ALL:       # BB#0:
484; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
485; ALL-NEXT:    retq
486  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
487  ret <8 x float> %shuffle
488}
489
490define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
491; ALL-LABEL: shuffle_v8f32_10004444:
492; ALL:       # BB#0:
493; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
494; ALL-NEXT:    retq
495  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
496  ret <8 x float> %shuffle
497}
498
499define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
500; ALL-LABEL: shuffle_v8f32_22006446:
501; ALL:       # BB#0:
502; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
503; ALL-NEXT:    retq
504  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
505  ret <8 x float> %shuffle
506}
507
508define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
509; ALL-LABEL: shuffle_v8f32_33307474:
510; ALL:       # BB#0:
511; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
512; ALL-NEXT:    retq
513  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
514  ret <8 x float> %shuffle
515}
516
517define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
518; ALL-LABEL: shuffle_v8f32_32104567:
519; ALL:       # BB#0:
520; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
521; ALL-NEXT:    retq
522  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
523  ret <8 x float> %shuffle
524}
525
526define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
527; ALL-LABEL: shuffle_v8f32_00236744:
528; ALL:       # BB#0:
529; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
530; ALL-NEXT:    retq
531  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
532  ret <8 x float> %shuffle
533}
534
535define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
536; ALL-LABEL: shuffle_v8f32_00226644:
537; ALL:       # BB#0:
538; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
539; ALL-NEXT:    retq
540  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
541  ret <8 x float> %shuffle
542}
543
544define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
545; ALL-LABEL: shuffle_v8f32_10324567:
546; ALL:       # BB#0:
547; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
548; ALL-NEXT:    retq
549  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
550  ret <8 x float> %shuffle
551}
552
553define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
554; ALL-LABEL: shuffle_v8f32_11334567:
555; ALL:       # BB#0:
556; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
557; ALL-NEXT:    retq
558  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
559  ret <8 x float> %shuffle
560}
561
562define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
563; ALL-LABEL: shuffle_v8f32_01235467:
564; ALL:       # BB#0:
565; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
566; ALL-NEXT:    retq
567  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
568  ret <8 x float> %shuffle
569}
570
571define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
572; ALL-LABEL: shuffle_v8f32_01235466:
573; ALL:       # BB#0:
574; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
575; ALL-NEXT:    retq
576  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
577  ret <8 x float> %shuffle
578}
579
580define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
581; ALL-LABEL: shuffle_v8f32_002u6u44:
582; ALL:       # BB#0:
583; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
584; ALL-NEXT:    retq
585  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
586  ret <8 x float> %shuffle
587}
588
589define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
590; ALL-LABEL: shuffle_v8f32_00uu66uu:
591; ALL:       # BB#0:
592; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
593; ALL-NEXT:    retq
594  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
595  ret <8 x float> %shuffle
596}
597
598define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
599; ALL-LABEL: shuffle_v8f32_103245uu:
600; ALL:       # BB#0:
601; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
602; ALL-NEXT:    retq
603  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
604  ret <8 x float> %shuffle
605}
606
607define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
608; ALL-LABEL: shuffle_v8f32_1133uu67:
609; ALL:       # BB#0:
610; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
611; ALL-NEXT:    retq
612  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
613  ret <8 x float> %shuffle
614}
615
616define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
617; ALL-LABEL: shuffle_v8f32_0uu354uu:
618; ALL:       # BB#0:
619; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
620; ALL-NEXT:    retq
621  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
622  ret <8 x float> %shuffle
623}
624
625define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
626; ALL-LABEL: shuffle_v8f32_uuu3uu66:
627; ALL:       # BB#0:
628; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
629; ALL-NEXT:    retq
630  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
631  ret <8 x float> %shuffle
632}
633
634define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
635; AVX1-LABEL: shuffle_v8f32_c348cda0:
636; AVX1:       # BB#0:
637; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
638; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
639; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
640; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
641; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
642; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
643; AVX1-NEXT:    retq
644;
645; AVX2-LABEL: shuffle_v8f32_c348cda0:
646; AVX2:       # BB#0:
647; AVX2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
648; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
649; AVX2-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
650; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,1]
651; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
652; AVX2-NEXT:    retq
653  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
654  ret <8 x float> %shuffle
655}
656
657define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
658; AVX1-LABEL: shuffle_v8f32_f511235a:
659; AVX1:       # BB#0:
660; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
661; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
662; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
663; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
664; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
665; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
666; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
667; AVX1-NEXT:    retq
668;
669; AVX2-LABEL: shuffle_v8f32_f511235a:
670; AVX2:       # BB#0:
671; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
672; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
673; AVX2-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
674; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
675; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
676; AVX2-NEXT:    retq
677  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
678  ret <8 x float> %shuffle
679}
680
681define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
682; AVX1-LABEL: shuffle_v8f32_32103210:
683; AVX1:       # BB#0:
684; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
685; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
686; AVX1-NEXT:    retq
687;
688; AVX2-LABEL: shuffle_v8f32_32103210:
689; AVX2:       # BB#0:
690; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
691; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
692; AVX2-NEXT:    retq
693  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
694  ret <8 x float> %shuffle
695}
696
697define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
698; ALL-LABEL: shuffle_v8f32_76547654:
699; ALL:       # BB#0:
700; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
701; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
702; ALL-NEXT:    retq
703  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
704  ret <8 x float> %shuffle
705}
706
707define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
708; ALL-LABEL: shuffle_v8f32_76543210:
709; ALL:       # BB#0:
710; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
711; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
712; ALL-NEXT:    retq
713  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
714  ret <8 x float> %shuffle
715}
716
717define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
718; ALL-LABEL: shuffle_v8f32_3210ba98:
719; ALL:       # BB#0:
720; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
721; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
722; ALL-NEXT:    retq
723  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
724  ret <8 x float> %shuffle
725}
726
727define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
728; ALL-LABEL: shuffle_v8f32_3210fedc:
729; ALL:       # BB#0:
730; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
731; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
732; ALL-NEXT:    retq
733  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
734  ret <8 x float> %shuffle
735}
736
737define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
738; ALL-LABEL: shuffle_v8f32_7654fedc:
739; ALL:       # BB#0:
740; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
741; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
742; ALL-NEXT:    retq
743  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
744  ret <8 x float> %shuffle
745}
746
747define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
748; ALL-LABEL: shuffle_v8f32_fedc7654:
749; ALL:       # BB#0:
750; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
751; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
752; ALL-NEXT:    retq
753  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
754  ret <8 x float> %shuffle
755}
756
757define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
758; AVX1-LABEL: PR21138:
759; AVX1:       # BB#0:
760; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
761; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
762; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
763; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
764; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
765; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
766; AVX1-NEXT:    retq
767;
768; AVX2-LABEL: PR21138:
769; AVX2:       # BB#0:
770; AVX2-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[1,3,1,3,5,7,5,7]
771; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
772; AVX2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,3,5,7,5,7]
773; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
774; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
775; AVX2-NEXT:    retq
776  %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
777  ret <8 x float> %shuffle
778}
779
780define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
781; ALL-LABEL: shuffle_v8f32_ba987654:
782; ALL:       # BB#0:
783; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
784; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
785; ALL-NEXT:    retq
786  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
787  ret <8 x float> %shuffle
788}
789
790define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
791; ALL-LABEL: shuffle_v8f32_ba983210:
792; ALL:       # BB#0:
793; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
794; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
795; ALL-NEXT:    retq
796  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0>
797  ret <8 x float> %shuffle
798}
799
800define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
801; ALL-LABEL: shuffle_v8f32_80u1c4u5:
802; ALL:       # BB#0:
803; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
804; ALL-NEXT:    retq
805  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
806  ret <8 x float> %shuffle
807}
808
809define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
810; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
811; ALL:       # BB#0:
812; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
813; ALL-NEXT:    retq
814  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
815  ret <8 x float> %shuffle
816}
817
818define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
819; ALL-LABEL: shuffle_v8f32_uuuu1111:
820; ALL:       # BB#0:
821; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
822; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
823; ALL-NEXT:    retq
824  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
825  ret <8 x float> %shuffle
826}
827
828define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
829; AVX1-LABEL: shuffle_v8f32_44444444:
830; AVX1:       # BB#0:
831; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
832; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
833; AVX1-NEXT:    retq
834;
835; AVX2-LABEL: shuffle_v8f32_44444444:
836; AVX2:       # BB#0:
837; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
838; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
839; AVX2-NEXT:    retq
840  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
841  ret <8 x float> %shuffle
842}
843
844define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) {
845; ALL-LABEL: shuffle_v8f32_1188uuuu:
846; ALL:       # BB#0:
847; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
848; ALL-NEXT:    retq
849  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
850  ret <8 x float> %shuffle
851}
852
853define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
854; ALL-LABEL: shuffle_v8f32_uuuu3210:
855; ALL:       # BB#0:
856; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
857; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
858; ALL-NEXT:    retq
859  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
860  ret <8 x float> %shuffle
861}
862
863define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
864; ALL-LABEL: shuffle_v8f32_uuuu1188:
865; ALL:       # BB#0:
866; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
867; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
868; ALL-NEXT:    retq
869  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
870  ret <8 x float> %shuffle
871}
872
873define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) {
874; ALL-LABEL: shuffle_v8f32_1111uuuu:
875; ALL:       # BB#0:
876; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
877; ALL-NEXT:    retq
878  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
879  ret <8 x float> %shuffle
880}
881
882define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
883; ALL-LABEL: shuffle_v8f32_5555uuuu:
884; ALL:       # BB#0:
885; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm0
886; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
887; ALL-NEXT:    retq
888  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
889  ret <8 x float> %shuffle
890}
891
892define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
893; AVX1-LABEL: shuffle_v8i32_00000000:
894; AVX1:       # BB#0:
895; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
896; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
897; AVX1-NEXT:    retq
898;
899; AVX2-LABEL: shuffle_v8i32_00000000:
900; AVX2:       # BB#0:
901; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
902; AVX2-NEXT:    retq
903  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
904  ret <8 x i32> %shuffle
905}
906
907define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
908; AVX1-LABEL: shuffle_v8i32_00000010:
909; AVX1:       # BB#0:
910; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
911; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
912; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
913; AVX1-NEXT:    retq
914;
915; AVX2-LABEL: shuffle_v8i32_00000010:
916; AVX2:       # BB#0:
917; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
918; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
919; AVX2-NEXT:    retq
920  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
921  ret <8 x i32> %shuffle
922}
923
924define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
925; AVX1-LABEL: shuffle_v8i32_00000200:
926; AVX1:       # BB#0:
927; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
928; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,0]
929; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
930; AVX1-NEXT:    retq
931;
932; AVX2-LABEL: shuffle_v8i32_00000200:
933; AVX2:       # BB#0:
934; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
935; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
936; AVX2-NEXT:    retq
937  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
938  ret <8 x i32> %shuffle
939}
940
941define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
942; AVX1-LABEL: shuffle_v8i32_00003000:
943; AVX1:       # BB#0:
944; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
945; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
946; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
947; AVX1-NEXT:    retq
948;
949; AVX2-LABEL: shuffle_v8i32_00003000:
950; AVX2:       # BB#0:
951; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
952; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
953; AVX2-NEXT:    retq
954  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
955  ret <8 x i32> %shuffle
956}
957
958define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
959; AVX1-LABEL: shuffle_v8i32_00040000:
960; AVX1:       # BB#0:
961; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
962; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
963; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
964; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
965; AVX1-NEXT:    retq
966;
967; AVX2-LABEL: shuffle_v8i32_00040000:
968; AVX2:       # BB#0:
969; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
970; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
971; AVX2-NEXT:    retq
972  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
973  ret <8 x i32> %shuffle
974}
975
976define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
977; AVX1-LABEL: shuffle_v8i32_00500000:
978; AVX1:       # BB#0:
979; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
980; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
981; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
982; AVX1-NEXT:    retq
983;
984; AVX2-LABEL: shuffle_v8i32_00500000:
985; AVX2:       # BB#0:
986; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
987; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
988; AVX2-NEXT:    retq
989  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
990  ret <8 x i32> %shuffle
991}
992
993define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
994; AVX1-LABEL: shuffle_v8i32_06000000:
995; AVX1:       # BB#0:
996; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
997; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
998; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
999; AVX1-NEXT:    retq
1000;
1001; AVX2-LABEL: shuffle_v8i32_06000000:
1002; AVX2:       # BB#0:
1003; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
1004; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1005; AVX2-NEXT:    retq
1006  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1007  ret <8 x i32> %shuffle
1008}
1009
1010define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
1011; AVX1-LABEL: shuffle_v8i32_70000000:
1012; AVX1:       # BB#0:
1013; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1014; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1015; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
1016; AVX1-NEXT:    retq
1017;
1018; AVX2-LABEL: shuffle_v8i32_70000000:
1019; AVX2:       # BB#0:
1020; AVX2-NEXT:    movl $7, %eax
1021; AVX2-NEXT:    vmovd %eax, %xmm1
1022; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1023; AVX2-NEXT:    retq
1024  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1025  ret <8 x i32> %shuffle
1026}
1027
1028define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
1029; AVX1-LABEL: shuffle_v8i32_01014545:
1030; AVX1:       # BB#0:
1031; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
1032; AVX1-NEXT:    retq
1033;
1034; AVX2-LABEL: shuffle_v8i32_01014545:
1035; AVX2:       # BB#0:
1036; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1037; AVX2-NEXT:    retq
1038  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
1039  ret <8 x i32> %shuffle
1040}
1041
1042define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
1043; AVX1-LABEL: shuffle_v8i32_00112233:
1044; AVX1:       # BB#0:
1045; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
1046; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1047; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1048; AVX1-NEXT:    retq
1049;
1050; AVX2-LABEL: shuffle_v8i32_00112233:
1051; AVX2:       # BB#0:
1052; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
1053; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1054; AVX2-NEXT:    retq
1055  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
1056  ret <8 x i32> %shuffle
1057}
1058
1059define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
1060; AVX1-LABEL: shuffle_v8i32_00001111:
1061; AVX1:       # BB#0:
1062; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
1063; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1064; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1065; AVX1-NEXT:    retq
1066;
1067; AVX2-LABEL: shuffle_v8i32_00001111:
1068; AVX2:       # BB#0:
1069; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1070; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1071; AVX2-NEXT:    retq
1072  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1073  ret <8 x i32> %shuffle
1074}
1075
1076define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1077; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
1078; AVX1:       # BB#0:
1079; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1080; AVX1-NEXT:    retq
1081;
1082; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
1083; AVX2:       # BB#0:
1084; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1085; AVX2-NEXT:    retq
1086  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1087  ret <8 x i32> %shuffle
1088}
1089
1090define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1091; AVX1-LABEL: shuffle_v8i32_08080808:
1092; AVX1:       # BB#0:
1093; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
1094; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1095; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1096; AVX1-NEXT:    retq
1097;
1098; AVX2-LABEL: shuffle_v8i32_08080808:
1099; AVX2:       # BB#0:
1100; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1101; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
1102; AVX2-NEXT:    retq
1103  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1104  ret <8 x i32> %shuffle
1105}
1106
1107define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1108; AVX1-LABEL: shuffle_v8i32_08084c4c:
1109; AVX1:       # BB#0:
1110; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1111; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1112; AVX1-NEXT:    retq
1113;
1114; AVX2-LABEL: shuffle_v8i32_08084c4c:
1115; AVX2:       # BB#0:
1116; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1117; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1118; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1119; AVX2-NEXT:    retq
1120  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1121  ret <8 x i32> %shuffle
1122}
1123
1124define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1125; AVX1-LABEL: shuffle_v8i32_8823cc67:
1126; AVX1:       # BB#0:
1127; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1128; AVX1-NEXT:    retq
1129;
1130; AVX2-LABEL: shuffle_v8i32_8823cc67:
1131; AVX2:       # BB#0:
1132; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1133; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1134; AVX2-NEXT:    retq
1135  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1136  ret <8 x i32> %shuffle
1137}
1138
1139define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1140; AVX1-LABEL: shuffle_v8i32_9832dc76:
1141; AVX1:       # BB#0:
1142; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1143; AVX1-NEXT:    retq
1144;
1145; AVX2-LABEL: shuffle_v8i32_9832dc76:
1146; AVX2:       # BB#0:
1147; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1148; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1149; AVX2-NEXT:    retq
1150  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1151  ret <8 x i32> %shuffle
1152}
1153
1154define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1155; AVX1-LABEL: shuffle_v8i32_9810dc54:
1156; AVX1:       # BB#0:
1157; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1158; AVX1-NEXT:    retq
1159;
1160; AVX2-LABEL: shuffle_v8i32_9810dc54:
1161; AVX2:       # BB#0:
1162; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1163; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1164; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1165; AVX2-NEXT:    retq
1166  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1167  ret <8 x i32> %shuffle
1168}
1169
1170define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1171; AVX1-LABEL: shuffle_v8i32_08194c5d:
1172; AVX1:       # BB#0:
1173; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1174; AVX1-NEXT:    retq
1175;
1176; AVX2-LABEL: shuffle_v8i32_08194c5d:
1177; AVX2:       # BB#0:
1178; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1179; AVX2-NEXT:    retq
1180  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1181  ret <8 x i32> %shuffle
1182}
1183
1184define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1185; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1186; AVX1:       # BB#0:
1187; AVX1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1188; AVX1-NEXT:    retq
1189;
1190; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1191; AVX2:       # BB#0:
1192; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1193; AVX2-NEXT:    retq
1194  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1195  ret <8 x i32> %shuffle
1196}
1197
1198define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1199; AVX1-LABEL: shuffle_v8i32_08192a3b:
1200; AVX1:       # BB#0:
1201; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1202; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1203; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1204; AVX1-NEXT:    retq
1205;
1206; AVX2-LABEL: shuffle_v8i32_08192a3b:
1207; AVX2:       # BB#0:
1208; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1209; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
1210; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1211; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1212; AVX2-NEXT:    retq
1213  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1214  ret <8 x i32> %shuffle
1215}
1216
1217define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1218; AVX1-LABEL: shuffle_v8i32_08991abb:
1219; AVX1:       # BB#0:
1220; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1221; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1222; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1223; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1224; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1225; AVX1-NEXT:    retq
1226;
1227; AVX2-LABEL: shuffle_v8i32_08991abb:
1228; AVX2:       # BB#0:
1229; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1230; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
1231; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1232; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
1233; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1234; AVX2-NEXT:    retq
1235  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1236  ret <8 x i32> %shuffle
1237}
1238
1239define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1240; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1241; AVX1:       # BB#0:
1242; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
1243; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
1244; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1245; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1246; AVX1-NEXT:    retq
1247;
1248; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1249; AVX2:       # BB#0:
1250; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1251; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1252; AVX2-NEXT:    retq
1253  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1254  ret <8 x i32> %shuffle
1255}
1256
1257define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1258; AVX1-LABEL: shuffle_v8i32_09ab1def:
1259; AVX1:       # BB#0:
1260; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1261; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1262; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1263; AVX1-NEXT:    retq
1264;
1265; AVX2-LABEL: shuffle_v8i32_09ab1def:
1266; AVX2:       # BB#0:
1267; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1268; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
1269; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1270; AVX2-NEXT:    retq
1271  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1272  ret <8 x i32> %shuffle
1273}
1274
1275define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1276; AVX1-LABEL: shuffle_v8i32_00014445:
1277; AVX1:       # BB#0:
1278; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1279; AVX1-NEXT:    retq
1280;
1281; AVX2-LABEL: shuffle_v8i32_00014445:
1282; AVX2:       # BB#0:
1283; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1284; AVX2-NEXT:    retq
1285  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1286  ret <8 x i32> %shuffle
1287}
1288
1289define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1290; AVX1-LABEL: shuffle_v8i32_00204464:
1291; AVX1:       # BB#0:
1292; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1293; AVX1-NEXT:    retq
1294;
1295; AVX2-LABEL: shuffle_v8i32_00204464:
1296; AVX2:       # BB#0:
1297; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1298; AVX2-NEXT:    retq
1299  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1300  ret <8 x i32> %shuffle
1301}
1302
1303define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1304; AVX1-LABEL: shuffle_v8i32_03004744:
1305; AVX1:       # BB#0:
1306; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1307; AVX1-NEXT:    retq
1308;
1309; AVX2-LABEL: shuffle_v8i32_03004744:
1310; AVX2:       # BB#0:
1311; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1312; AVX2-NEXT:    retq
1313  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1314  ret <8 x i32> %shuffle
1315}
1316
1317define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1318; AVX1-LABEL: shuffle_v8i32_10005444:
1319; AVX1:       # BB#0:
1320; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1321; AVX1-NEXT:    retq
1322;
1323; AVX2-LABEL: shuffle_v8i32_10005444:
1324; AVX2:       # BB#0:
1325; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1326; AVX2-NEXT:    retq
1327  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1328  ret <8 x i32> %shuffle
1329}
1330
1331define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1332; AVX1-LABEL: shuffle_v8i32_22006644:
1333; AVX1:       # BB#0:
1334; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1335; AVX1-NEXT:    retq
1336;
1337; AVX2-LABEL: shuffle_v8i32_22006644:
1338; AVX2:       # BB#0:
1339; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1340; AVX2-NEXT:    retq
1341  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1342  ret <8 x i32> %shuffle
1343}
1344
1345define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1346; AVX1-LABEL: shuffle_v8i32_33307774:
1347; AVX1:       # BB#0:
1348; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1349; AVX1-NEXT:    retq
1350;
1351; AVX2-LABEL: shuffle_v8i32_33307774:
1352; AVX2:       # BB#0:
1353; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1354; AVX2-NEXT:    retq
1355  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1356  ret <8 x i32> %shuffle
1357}
1358
1359define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1360; AVX1-LABEL: shuffle_v8i32_32107654:
1361; AVX1:       # BB#0:
1362; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1363; AVX1-NEXT:    retq
1364;
1365; AVX2-LABEL: shuffle_v8i32_32107654:
1366; AVX2:       # BB#0:
1367; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1368; AVX2-NEXT:    retq
1369  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1370  ret <8 x i32> %shuffle
1371}
1372
1373define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1374; AVX1-LABEL: shuffle_v8i32_00234467:
1375; AVX1:       # BB#0:
1376; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1377; AVX1-NEXT:    retq
1378;
1379; AVX2-LABEL: shuffle_v8i32_00234467:
1380; AVX2:       # BB#0:
1381; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1382; AVX2-NEXT:    retq
1383  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1384  ret <8 x i32> %shuffle
1385}
1386
1387define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1388; AVX1-LABEL: shuffle_v8i32_00224466:
1389; AVX1:       # BB#0:
1390; AVX1-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1391; AVX1-NEXT:    retq
1392;
1393; AVX2-LABEL: shuffle_v8i32_00224466:
1394; AVX2:       # BB#0:
1395; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1396; AVX2-NEXT:    retq
1397  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1398  ret <8 x i32> %shuffle
1399}
1400
1401define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1402; AVX1-LABEL: shuffle_v8i32_10325476:
1403; AVX1:       # BB#0:
1404; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1405; AVX1-NEXT:    retq
1406;
1407; AVX2-LABEL: shuffle_v8i32_10325476:
1408; AVX2:       # BB#0:
1409; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1410; AVX2-NEXT:    retq
1411  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1412  ret <8 x i32> %shuffle
1413}
1414
1415define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1416; AVX1-LABEL: shuffle_v8i32_11335577:
1417; AVX1:       # BB#0:
1418; AVX1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1419; AVX1-NEXT:    retq
1420;
1421; AVX2-LABEL: shuffle_v8i32_11335577:
1422; AVX2:       # BB#0:
1423; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1424; AVX2-NEXT:    retq
1425  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1426  ret <8 x i32> %shuffle
1427}
1428
1429define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1430; AVX1-LABEL: shuffle_v8i32_10235467:
1431; AVX1:       # BB#0:
1432; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1433; AVX1-NEXT:    retq
1434;
1435; AVX2-LABEL: shuffle_v8i32_10235467:
1436; AVX2:       # BB#0:
1437; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1438; AVX2-NEXT:    retq
1439  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1440  ret <8 x i32> %shuffle
1441}
1442
1443define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1444; AVX1-LABEL: shuffle_v8i32_10225466:
1445; AVX1:       # BB#0:
1446; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1447; AVX1-NEXT:    retq
1448;
1449; AVX2-LABEL: shuffle_v8i32_10225466:
1450; AVX2:       # BB#0:
1451; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1452; AVX2-NEXT:    retq
1453  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1454  ret <8 x i32> %shuffle
1455}
1456
1457define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1458; AVX1-LABEL: shuffle_v8i32_00015444:
1459; AVX1:       # BB#0:
1460; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1461; AVX1-NEXT:    retq
1462;
1463; AVX2-LABEL: shuffle_v8i32_00015444:
1464; AVX2:       # BB#0:
1465; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1466; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1467; AVX2-NEXT:    retq
1468  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1469  ret <8 x i32> %shuffle
1470}
1471
1472define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1473; AVX1-LABEL: shuffle_v8i32_00204644:
1474; AVX1:       # BB#0:
1475; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1476; AVX1-NEXT:    retq
1477;
1478; AVX2-LABEL: shuffle_v8i32_00204644:
1479; AVX2:       # BB#0:
1480; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1481; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1482; AVX2-NEXT:    retq
1483  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1484  ret <8 x i32> %shuffle
1485}
1486
1487define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1488; AVX1-LABEL: shuffle_v8i32_03004474:
1489; AVX1:       # BB#0:
1490; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1491; AVX1-NEXT:    retq
1492;
1493; AVX2-LABEL: shuffle_v8i32_03004474:
1494; AVX2:       # BB#0:
1495; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1496; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1497; AVX2-NEXT:    retq
1498  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1499  ret <8 x i32> %shuffle
1500}
1501
1502define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1503; AVX1-LABEL: shuffle_v8i32_10004444:
1504; AVX1:       # BB#0:
1505; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1506; AVX1-NEXT:    retq
1507;
1508; AVX2-LABEL: shuffle_v8i32_10004444:
1509; AVX2:       # BB#0:
1510; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1511; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1512; AVX2-NEXT:    retq
1513  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1514  ret <8 x i32> %shuffle
1515}
1516
1517define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1518; AVX1-LABEL: shuffle_v8i32_22006446:
1519; AVX1:       # BB#0:
1520; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1521; AVX1-NEXT:    retq
1522;
1523; AVX2-LABEL: shuffle_v8i32_22006446:
1524; AVX2:       # BB#0:
1525; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1526; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1527; AVX2-NEXT:    retq
1528  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1529  ret <8 x i32> %shuffle
1530}
1531
1532define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1533; AVX1-LABEL: shuffle_v8i32_33307474:
1534; AVX1:       # BB#0:
1535; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1536; AVX1-NEXT:    retq
1537;
1538; AVX2-LABEL: shuffle_v8i32_33307474:
1539; AVX2:       # BB#0:
1540; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1541; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1542; AVX2-NEXT:    retq
1543  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1544  ret <8 x i32> %shuffle
1545}
1546
1547define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1548; AVX1-LABEL: shuffle_v8i32_32104567:
1549; AVX1:       # BB#0:
1550; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1551; AVX1-NEXT:    retq
1552;
1553; AVX2-LABEL: shuffle_v8i32_32104567:
1554; AVX2:       # BB#0:
1555; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1556; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1557; AVX2-NEXT:    retq
1558  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1559  ret <8 x i32> %shuffle
1560}
1561
1562define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1563; AVX1-LABEL: shuffle_v8i32_00236744:
1564; AVX1:       # BB#0:
1565; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1566; AVX1-NEXT:    retq
1567;
1568; AVX2-LABEL: shuffle_v8i32_00236744:
1569; AVX2:       # BB#0:
1570; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1571; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1572; AVX2-NEXT:    retq
1573  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1574  ret <8 x i32> %shuffle
1575}
1576
1577define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1578; AVX1-LABEL: shuffle_v8i32_00226644:
1579; AVX1:       # BB#0:
1580; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1581; AVX1-NEXT:    retq
1582;
1583; AVX2-LABEL: shuffle_v8i32_00226644:
1584; AVX2:       # BB#0:
1585; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1586; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1587; AVX2-NEXT:    retq
1588  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1589  ret <8 x i32> %shuffle
1590}
1591
1592define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1593; AVX1-LABEL: shuffle_v8i32_10324567:
1594; AVX1:       # BB#0:
1595; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1596; AVX1-NEXT:    retq
1597;
1598; AVX2-LABEL: shuffle_v8i32_10324567:
1599; AVX2:       # BB#0:
1600; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1601; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1602; AVX2-NEXT:    retq
1603  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1604  ret <8 x i32> %shuffle
1605}
1606
1607define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1608; AVX1-LABEL: shuffle_v8i32_11334567:
1609; AVX1:       # BB#0:
1610; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1611; AVX1-NEXT:    retq
1612;
1613; AVX2-LABEL: shuffle_v8i32_11334567:
1614; AVX2:       # BB#0:
1615; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1616; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1617; AVX2-NEXT:    retq
1618  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1619  ret <8 x i32> %shuffle
1620}
1621
1622define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1623; AVX1-LABEL: shuffle_v8i32_01235467:
1624; AVX1:       # BB#0:
1625; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1626; AVX1-NEXT:    retq
1627;
1628; AVX2-LABEL: shuffle_v8i32_01235467:
1629; AVX2:       # BB#0:
1630; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1631; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1632; AVX2-NEXT:    retq
1633  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1634  ret <8 x i32> %shuffle
1635}
1636
1637define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1638; AVX1-LABEL: shuffle_v8i32_01235466:
1639; AVX1:       # BB#0:
1640; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1641; AVX1-NEXT:    retq
1642;
1643; AVX2-LABEL: shuffle_v8i32_01235466:
1644; AVX2:       # BB#0:
1645; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1646; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1647; AVX2-NEXT:    retq
1648  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1649  ret <8 x i32> %shuffle
1650}
1651
1652define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1653; AVX1-LABEL: shuffle_v8i32_002u6u44:
1654; AVX1:       # BB#0:
1655; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1656; AVX1-NEXT:    retq
1657;
1658; AVX2-LABEL: shuffle_v8i32_002u6u44:
1659; AVX2:       # BB#0:
1660; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1661; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1662; AVX2-NEXT:    retq
1663  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1664  ret <8 x i32> %shuffle
1665}
1666
1667define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1668; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1669; AVX1:       # BB#0:
1670; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1671; AVX1-NEXT:    retq
1672;
1673; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1674; AVX2:       # BB#0:
1675; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1676; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1677; AVX2-NEXT:    retq
1678  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1679  ret <8 x i32> %shuffle
1680}
1681
1682define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1683; AVX1-LABEL: shuffle_v8i32_103245uu:
1684; AVX1:       # BB#0:
1685; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1686; AVX1-NEXT:    retq
1687;
1688; AVX2-LABEL: shuffle_v8i32_103245uu:
1689; AVX2:       # BB#0:
1690; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1691; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1692; AVX2-NEXT:    retq
1693  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1694  ret <8 x i32> %shuffle
1695}
1696
1697define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1698; AVX1-LABEL: shuffle_v8i32_1133uu67:
1699; AVX1:       # BB#0:
1700; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1701; AVX1-NEXT:    retq
1702;
1703; AVX2-LABEL: shuffle_v8i32_1133uu67:
1704; AVX2:       # BB#0:
1705; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1706; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1707; AVX2-NEXT:    retq
1708  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1709  ret <8 x i32> %shuffle
1710}
1711
1712define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1713; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1714; AVX1:       # BB#0:
1715; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1716; AVX1-NEXT:    retq
1717;
1718; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1719; AVX2:       # BB#0:
1720; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1721; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1722; AVX2-NEXT:    retq
1723  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1724  ret <8 x i32> %shuffle
1725}
1726
1727define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1728; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1729; AVX1:       # BB#0:
1730; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1731; AVX1-NEXT:    retq
1732;
1733; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1734; AVX2:       # BB#0:
1735; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1736; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1737; AVX2-NEXT:    retq
1738  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1739  ret <8 x i32> %shuffle
1740}
1741
1742define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1743; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1744; AVX1:       # BB#0:
1745; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1746; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1747; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1748; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1749; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1750; AVX1-NEXT:    retq
1751;
1752; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1753; AVX2:       # BB#0:
1754; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1755; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6]
1756; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,1,0,3]
1757; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1758; AVX2-NEXT:    retq
1759  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1760  ret <8 x i32> %shuffle
1761}
1762
1763define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1764; AVX1-LABEL: shuffle_v8i32_32103210:
1765; AVX1:       # BB#0:
1766; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1767; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1768; AVX1-NEXT:    retq
1769;
1770; AVX2-LABEL: shuffle_v8i32_32103210:
1771; AVX2:       # BB#0:
1772; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
1773; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1774; AVX2-NEXT:    retq
1775  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1776  ret <8 x i32> %shuffle
1777}
1778
1779define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1780; AVX1-LABEL: shuffle_v8i32_76547654:
1781; AVX1:       # BB#0:
1782; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1783; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1784; AVX1-NEXT:    retq
1785;
1786; AVX2-LABEL: shuffle_v8i32_76547654:
1787; AVX2:       # BB#0:
1788; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1789; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1790; AVX2-NEXT:    retq
1791  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1792  ret <8 x i32> %shuffle
1793}
1794
1795define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1796; AVX1-LABEL: shuffle_v8i32_76543210:
1797; AVX1:       # BB#0:
1798; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1799; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1800; AVX1-NEXT:    retq
1801;
1802; AVX2-LABEL: shuffle_v8i32_76543210:
1803; AVX2:       # BB#0:
1804; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1805; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1806; AVX2-NEXT:    retq
1807  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1808  ret <8 x i32> %shuffle
1809}
1810
1811define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1812; AVX1-LABEL: shuffle_v8i32_3210ba98:
1813; AVX1:       # BB#0:
1814; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1815; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1816; AVX1-NEXT:    retq
1817;
1818; AVX2-LABEL: shuffle_v8i32_3210ba98:
1819; AVX2:       # BB#0:
1820; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1821; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1822; AVX2-NEXT:    retq
1823  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1824  ret <8 x i32> %shuffle
1825}
1826
1827define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1828; AVX1-LABEL: shuffle_v8i32_3210fedc:
1829; AVX1:       # BB#0:
1830; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1831; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1832; AVX1-NEXT:    retq
1833;
1834; AVX2-LABEL: shuffle_v8i32_3210fedc:
1835; AVX2:       # BB#0:
1836; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1837; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1838; AVX2-NEXT:    retq
1839  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1840  ret <8 x i32> %shuffle
1841}
1842
1843define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1844; AVX1-LABEL: shuffle_v8i32_7654fedc:
1845; AVX1:       # BB#0:
1846; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1847; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1848; AVX1-NEXT:    retq
1849;
1850; AVX2-LABEL: shuffle_v8i32_7654fedc:
1851; AVX2:       # BB#0:
1852; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1853; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1854; AVX2-NEXT:    retq
1855  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1856  ret <8 x i32> %shuffle
1857}
1858
1859define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1860; AVX1-LABEL: shuffle_v8i32_fedc7654:
1861; AVX1:       # BB#0:
1862; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1863; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1864; AVX1-NEXT:    retq
1865;
1866; AVX2-LABEL: shuffle_v8i32_fedc7654:
1867; AVX2:       # BB#0:
1868; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1869; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1870; AVX2-NEXT:    retq
1871  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1872  ret <8 x i32> %shuffle
1873}
1874
1875define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1876; AVX1-LABEL: shuffle_v8i32_ba987654:
1877; AVX1:       # BB#0:
1878; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1879; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1880; AVX1-NEXT:    retq
1881;
1882; AVX2-LABEL: shuffle_v8i32_ba987654:
1883; AVX2:       # BB#0:
1884; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1885; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1886; AVX2-NEXT:    retq
1887  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1888  ret <8 x i32> %shuffle
1889}
1890
1891define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1892; AVX1-LABEL: shuffle_v8i32_ba983210:
1893; AVX1:       # BB#0:
1894; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1895; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1896; AVX1-NEXT:    retq
1897;
1898; AVX2-LABEL: shuffle_v8i32_ba983210:
1899; AVX2:       # BB#0:
1900; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1901; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1902; AVX2-NEXT:    retq
1903  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1904  ret <8 x i32> %shuffle
1905}
1906
1907define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
1908; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
1909; AVX1:       # BB#0:
1910; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
1911; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1912; AVX1-NEXT:    retq
1913;
1914; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
1915; AVX2:       # BB#0:
1916; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1917; AVX2-NEXT:    retq
1918  %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
1919  ret <8 x i32> %shuffle
1920}
1921
1922define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
1923; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
1924; AVX1:       # BB#0:
1925; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
1926; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1927; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1928; AVX1-NEXT:    retq
1929;
1930; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
1931; AVX2:       # BB#0:
1932; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1933; AVX2-NEXT:    retq
1934  %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
1935  ret <8 x i32> %shuffle
1936}
1937
1938define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1939; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
1940; AVX1:       # BB#0:
1941; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1942; AVX1-NEXT:    retq
1943;
1944; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
1945; AVX2:       # BB#0:
1946; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1947; AVX2-NEXT:    retq
1948  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
1949  ret <8 x i32> %shuffle
1950}
1951
1952define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
1953; AVX1-LABEL: shuffle_v8i32_uuuu1111:
1954; AVX1:       # BB#0:
1955; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1956; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1957; AVX1-NEXT:    retq
1958;
1959; AVX2-LABEL: shuffle_v8i32_uuuu1111:
1960; AVX2:       # BB#0:
1961; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1962; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
1963; AVX2-NEXT:    retq
1964  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
1965  ret <8 x i32> %shuffle
1966}
1967
1968define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
1969; ALL-LABEL: shuffle_v8i32_2222uuuu:
1970; ALL:       # BB#0:
1971; ALL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1972; ALL-NEXT:    retq
1973  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
1974  ret <8 x i32> %shuffle
1975}
1976
1977define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
1978; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
1979; ALL:       # BB#0:
1980; ALL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1981; ALL-NEXT:    retq
1982  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
1983  ret <8 x i32> %shuffle
1984}
1985
1986define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
1987; AVX1-LABEL: shuffle_v8i32_44444444:
1988; AVX1:       # BB#0:
1989; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1990; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1991; AVX1-NEXT:    retq
1992;
1993; AVX2-LABEL: shuffle_v8i32_44444444:
1994; AVX2:       # BB#0:
1995; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
1996; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
1997; AVX2-NEXT:    retq
1998  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1999  ret <8 x i32> %shuffle
2000}
2001
2002define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
2003; AVX1-LABEL: shuffle_v8i32_5555uuuu:
2004; AVX1:       # BB#0:
2005; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2006; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2007; AVX1-NEXT:    retq
2008;
2009; AVX2-LABEL: shuffle_v8i32_5555uuuu:
2010; AVX2:       # BB#0:
2011; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
2012; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2013; AVX2-NEXT:    retq
2014  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
2015  ret <8 x i32> %shuffle
2016}
2017
2018define <8 x float> @splat_mem_v8f32_2(float* %p) {
2019; ALL-LABEL: splat_mem_v8f32_2:
2020; ALL:       # BB#0:
2021; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
2022; ALL-NEXT:    retq
2023  %1 = load float, float* %p
2024  %2 = insertelement <4 x float> undef, float %1, i32 0
2025  %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
2026  ret <8 x float> %3
2027}
2028
2029define <8 x float> @splat_v8f32(<4 x float> %r) {
2030; AVX1-LABEL: splat_v8f32:
2031; AVX1:       # BB#0:
2032; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
2033; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2034; AVX1-NEXT:    retq
2035;
2036; AVX2-LABEL: splat_v8f32:
2037; AVX2:       # BB#0:
2038; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
2039; AVX2-NEXT:    retq
2040  %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
2041  ret <8 x float> %1
2042}
2043
2044;
2045; Shuffle to logical bit shifts
2046;
2047
2048define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
2049; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
2050; AVX1:       # BB#0:
2051; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
2052; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
2053; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
2054; AVX1-NEXT:    retq
2055;
2056; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
2057; AVX2:       # BB#0:
2058; AVX2-NEXT:    vpsllq $32, %ymm0, %ymm0
2059; AVX2-NEXT:    retq
2060  %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
2061  ret <8 x i32> %shuffle
2062}
2063
2064define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
2065; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
2066; AVX1:       # BB#0:
2067; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
2068; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
2069; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
2070; AVX1-NEXT:    retq
2071;
2072; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
2073; AVX2:       # BB#0:
2074; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
2075; AVX2-NEXT:    retq
2076  %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
2077  ret <8 x i32> %shuffle
2078}
2079
2080define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
2081; AVX1-LABEL: shuffle_v8i32_B012F456:
2082; AVX1:       # BB#0:
2083; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
2084; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
2085; AVX1-NEXT:    retq
2086;
2087; AVX2-LABEL: shuffle_v8i32_B012F456:
2088; AVX2:       # BB#0:
2089; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
2090; AVX2-NEXT:    retq
2091  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
2092  ret <8 x i32> %shuffle
2093}
2094
2095define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
2096; AVX1-LABEL: shuffle_v8i32_1238567C:
2097; AVX1:       # BB#0:
2098; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
2099; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
2100; AVX1-NEXT:    retq
2101;
2102; AVX2-LABEL: shuffle_v8i32_1238567C:
2103; AVX2:       # BB#0:
2104; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
2105; AVX2-NEXT:    retq
2106  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
2107  ret <8 x i32> %shuffle
2108}
2109
2110define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
2111; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
2112; AVX1:       # BB#0:
2113; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
2114; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
2115; AVX1-NEXT:    retq
2116;
2117; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
2118; AVX2:       # BB#0:
2119; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2120; AVX2-NEXT:    retq
2121  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
2122  ret <8 x i32> %shuffle
2123}
2124
2125define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
2126; AVX1-LABEL: shuffle_v8i32_389A7CDE:
2127; AVX1:       # BB#0:
2128; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2129; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2130; AVX1-NEXT:    retq
2131;
2132; AVX2-LABEL: shuffle_v8i32_389A7CDE:
2133; AVX2:       # BB#0:
2134; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2135; AVX2-NEXT:    retq
2136  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
2137  ret <8 x i32> %shuffle
2138}
2139
2140define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
2141; AVX1-LABEL: shuffle_v8i32_30127456:
2142; AVX1:       # BB#0:
2143; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2144; AVX1-NEXT:    retq
2145;
2146; AVX2-LABEL: shuffle_v8i32_30127456:
2147; AVX2:       # BB#0:
2148; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2149; AVX2-NEXT:    retq
2150  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
2151  ret <8 x i32> %shuffle
2152}
2153
2154define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
2155; AVX1-LABEL: shuffle_v8i32_12305674:
2156; AVX1:       # BB#0:
2157; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2158; AVX1-NEXT:    retq
2159;
2160; AVX2-LABEL: shuffle_v8i32_12305674:
2161; AVX2:       # BB#0:
2162; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2163; AVX2-NEXT:    retq
2164  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2165  ret <8 x i32> %shuffle
2166}
2167
2168define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2169; ALL-LABEL: concat_v2f32_1:
2170; ALL:       # BB#0: # %entry
2171; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2172; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2173; ALL-NEXT:    retq
2174entry:
2175  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2176  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2177  %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2178  %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2179  %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
2180  ret <8 x float> %tmp76
2181}
2182
2183define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2184; ALL-LABEL: concat_v2f32_2:
2185; ALL:       # BB#0: # %entry
2186; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2187; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2188; ALL-NEXT:    retq
2189entry:
2190  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2191  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2192  %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2193  ret <8 x float> %tmp76
2194}
2195
2196define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2197; ALL-LABEL: concat_v2f32_3:
2198; ALL:       # BB#0: # %entry
2199; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2200; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2201; ALL-NEXT:    retq
2202entry:
2203  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2204  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2205  %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2206  %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2207  ret <8 x float> %res
2208}
2209
2210define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
2211; ALL-LABEL: insert_mem_and_zero_v8i32:
2212; ALL:       # BB#0:
2213; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2214; ALL-NEXT:    retq
2215  %a = load i32, i32* %ptr
2216  %v = insertelement <8 x i32> undef, i32 %a, i32 0
2217  %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2218  ret <8 x i32> %shuffle
2219}
2220
2221define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
2222; AVX1-LABEL: concat_v8i32_0123CDEF:
2223; AVX1:       # BB#0:
2224; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
2225; AVX1-NEXT:    retq
2226;
2227; AVX2-LABEL: concat_v8i32_0123CDEF:
2228; AVX2:       # BB#0:
2229; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2230; AVX2-NEXT:    retq
2231  %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2232  %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2233  %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2234  ret <8 x i32> %shuf
2235}
2236
2237define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
2238; ALL-LABEL: concat_v8i32_4567CDEF_bc:
2239; ALL:       # BB#0:
2240; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2241; ALL-NEXT:    retq
2242  %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2243  %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
2244  %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
2245  %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
2246  %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2247  %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
2248  ret <8 x i32> %shuffle32
2249}
2250
2251define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
2252; ALL-LABEL: concat_v8f32_4567CDEF_bc:
2253; ALL:       # BB#0:
2254; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2255; ALL-NEXT:    retq
2256  %a0 = bitcast <8 x float> %f0 to <4 x i64>
2257  %a1 = bitcast <8 x float> %f1 to <8 x i32>
2258  %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
2259  %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2260  %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
2261  %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
2262  %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2263  %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
2264  ret <8 x float> %shuffle32
2265}
2266
2267define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
2268; ALL-LABEL: insert_dup_mem_v8i32:
2269; ALL:       # BB#0:
2270; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
2271; ALL-NEXT:    retq
2272  %tmp = load i32, i32* %ptr, align 4
2273  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2274  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
2275  ret <8 x i32> %tmp2
2276}
2277