• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
3; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4
5target triple = "x86_64-unknown-unknown"
6
7define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
8; AVX1-LABEL: shuffle_v8f32_00000000:
9; AVX1:       # BB#0:
10; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
11; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
12; AVX1-NEXT:    retq
13;
14; AVX2-LABEL: shuffle_v8f32_00000000:
15; AVX2:       # BB#0:
16; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
17; AVX2-NEXT:    retq
18  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19  ret <8 x float> %shuffle
20}
21
22define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
23; AVX1-LABEL: shuffle_v8f32_00000010:
24; AVX1:       # BB#0:
25; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
26; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
27; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
28; AVX1-NEXT:    retq
29;
30; AVX2-LABEL: shuffle_v8f32_00000010:
31; AVX2:       # BB#0:
32; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
33; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
34; AVX2-NEXT:    retq
35  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
36  ret <8 x float> %shuffle
37}
38
39define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
40; AVX1-LABEL: shuffle_v8f32_00000200:
41; AVX1:       # BB#0:
42; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
43; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
44; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
45; AVX1-NEXT:    retq
46;
47; AVX2-LABEL: shuffle_v8f32_00000200:
48; AVX2:       # BB#0:
49; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
50; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
51; AVX2-NEXT:    retq
52  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
53  ret <8 x float> %shuffle
54}
55
56define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
57; AVX1-LABEL: shuffle_v8f32_00003000:
58; AVX1:       # BB#0:
59; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
60; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
61; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
62; AVX1-NEXT:    retq
63;
64; AVX2-LABEL: shuffle_v8f32_00003000:
65; AVX2:       # BB#0:
66; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
67; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
68; AVX2-NEXT:    retq
69  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
70  ret <8 x float> %shuffle
71}
72
73define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
74; AVX1-LABEL: shuffle_v8f32_00040000:
75; AVX1:       # BB#0:
76; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
77; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
78; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
79; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
80; AVX1-NEXT:    retq
81;
82; AVX2-LABEL: shuffle_v8f32_00040000:
83; AVX2:       # BB#0:
84; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
85; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
86; AVX2-NEXT:    retq
87  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
88  ret <8 x float> %shuffle
89}
90
91define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
92; AVX1-LABEL: shuffle_v8f32_00500000:
93; AVX1:       # BB#0:
94; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
95; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
96; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
97; AVX1-NEXT:    retq
98;
99; AVX2-LABEL: shuffle_v8f32_00500000:
100; AVX2:       # BB#0:
101; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
102; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
103; AVX2-NEXT:    retq
104  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
105  ret <8 x float> %shuffle
106}
107
108define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
109; AVX1-LABEL: shuffle_v8f32_06000000:
110; AVX1:       # BB#0:
111; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
112; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
113; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
114; AVX1-NEXT:    retq
115;
116; AVX2-LABEL: shuffle_v8f32_06000000:
117; AVX2:       # BB#0:
118; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
119; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
120; AVX2-NEXT:    retq
121  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
122  ret <8 x float> %shuffle
123}
124
125define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
126; AVX1-LABEL: shuffle_v8f32_70000000:
127; AVX1:       # BB#0:
128; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
129; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
130; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
131; AVX1-NEXT:    retq
132;
133; AVX2-LABEL: shuffle_v8f32_70000000:
134; AVX2:       # BB#0:
135; AVX2-NEXT:    movl $7, %eax
136; AVX2-NEXT:    vmovd %eax, %xmm1
137; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
138; AVX2-NEXT:    retq
139  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
140  ret <8 x float> %shuffle
141}
142
143define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
144; ALL-LABEL: shuffle_v8f32_01014545:
145; ALL:       # BB#0:
146; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
147; ALL-NEXT:    retq
148  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
149  ret <8 x float> %shuffle
150}
151
152define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
153; AVX1-LABEL: shuffle_v8f32_00112233:
154; AVX1:       # BB#0:
155; AVX1-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
156; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
157; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
158; AVX1-NEXT:    retq
159;
160; AVX2-LABEL: shuffle_v8f32_00112233:
161; AVX2:       # BB#0:
162; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
163; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
164; AVX2-NEXT:    retq
165  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
166  ret <8 x float> %shuffle
167}
168
169define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
170; AVX1-LABEL: shuffle_v8f32_00001111:
171; AVX1:       # BB#0:
172; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
173; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
174; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
175; AVX1-NEXT:    retq
176;
177; AVX2-LABEL: shuffle_v8f32_00001111:
178; AVX2:       # BB#0:
179; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
180; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
181; AVX2-NEXT:    retq
182  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
183  ret <8 x float> %shuffle
184}
185
186define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
187; ALL-LABEL: shuffle_v8f32_81a3c5e7:
188; ALL:       # BB#0:
189; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
190; ALL-NEXT:    retq
191  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
192  ret <8 x float> %shuffle
193}
194
195define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
196; AVX1-LABEL: shuffle_v8f32_08080808:
197; AVX1:       # BB#0:
198; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
199; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
200; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
201; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
202; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
203; AVX1-NEXT:    retq
204;
205; AVX2-LABEL: shuffle_v8f32_08080808:
206; AVX2:       # BB#0:
207; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
208; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
209; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
210; AVX2-NEXT:    retq
211  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
212  ret <8 x float> %shuffle
213}
214
215define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
216; ALL-LABEL: shuffle_v8f32_08084c4c:
217; ALL:       # BB#0:
218; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
219; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
220; ALL-NEXT:    retq
221  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
222  ret <8 x float> %shuffle
223}
224
225define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
226; ALL-LABEL: shuffle_v8f32_8823cc67:
227; ALL:       # BB#0:
228; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
229; ALL-NEXT:    retq
230  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
231  ret <8 x float> %shuffle
232}
233
234define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
235; ALL-LABEL: shuffle_v8f32_9832dc76:
236; ALL:       # BB#0:
237; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
238; ALL-NEXT:    retq
239  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
240  ret <8 x float> %shuffle
241}
242
243define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
244; ALL-LABEL: shuffle_v8f32_9810dc54:
245; ALL:       # BB#0:
246; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
247; ALL-NEXT:    retq
248  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
249  ret <8 x float> %shuffle
250}
251
252define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
253; ALL-LABEL: shuffle_v8f32_08194c5d:
254; ALL:       # BB#0:
255; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
256; ALL-NEXT:    retq
257  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
258  ret <8 x float> %shuffle
259}
260
261define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
262; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
263; ALL:       # BB#0:
264; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
265; ALL-NEXT:    retq
266  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
267  ret <8 x float> %shuffle
268}
269
270define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
271; AVX1-LABEL: shuffle_v8f32_08192a3b:
272; AVX1:       # BB#0:
273; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
274; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
275; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
276; AVX1-NEXT:    retq
277;
278; AVX2-LABEL: shuffle_v8f32_08192a3b:
279; AVX2:       # BB#0:
280; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
281; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
282; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
283; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
284; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
285; AVX2-NEXT:    retq
286  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
287  ret <8 x float> %shuffle
288}
289
290define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
291; AVX1-LABEL: shuffle_v8f32_08991abb:
292; AVX1:       # BB#0:
293; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
294; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
295; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
296; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
297; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
298; AVX1-NEXT:    retq
299;
300; AVX2-LABEL: shuffle_v8f32_08991abb:
301; AVX2:       # BB#0:
302; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
303; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
304; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
305; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
306; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
307; AVX2-NEXT:    retq
308  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
309  ret <8 x float> %shuffle
310}
311
312define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
313; AVX1-LABEL: shuffle_v8f32_091b2d3f:
314; AVX1:       # BB#0:
315; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
316; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
317; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
318; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
319; AVX1-NEXT:    retq
320;
321; AVX2-LABEL: shuffle_v8f32_091b2d3f:
322; AVX2:       # BB#0:
323; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
324; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
325; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
326; AVX2-NEXT:    retq
327  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
328  ret <8 x float> %shuffle
329}
330
331define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
332; AVX1-LABEL: shuffle_v8f32_09ab1def:
333; AVX1:       # BB#0:
334; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
335; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
336; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
337; AVX1-NEXT:    retq
338;
339; AVX2-LABEL: shuffle_v8f32_09ab1def:
340; AVX2:       # BB#0:
341; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
342; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
343; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
344; AVX2-NEXT:    retq
345  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
346  ret <8 x float> %shuffle
347}
348
349define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
350; ALL-LABEL: shuffle_v8f32_00014445:
351; ALL:       # BB#0:
352; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
353; ALL-NEXT:    retq
354  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
355  ret <8 x float> %shuffle
356}
357
358define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
359; ALL-LABEL: shuffle_v8f32_00204464:
360; ALL:       # BB#0:
361; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
362; ALL-NEXT:    retq
363  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
364  ret <8 x float> %shuffle
365}
366
367define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
368; ALL-LABEL: shuffle_v8f32_03004744:
369; ALL:       # BB#0:
370; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
371; ALL-NEXT:    retq
372  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
373  ret <8 x float> %shuffle
374}
375
376define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
377; ALL-LABEL: shuffle_v8f32_10005444:
378; ALL:       # BB#0:
379; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
380; ALL-NEXT:    retq
381  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
382  ret <8 x float> %shuffle
383}
384
385define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
386; ALL-LABEL: shuffle_v8f32_22006644:
387; ALL:       # BB#0:
388; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
389; ALL-NEXT:    retq
390  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
391  ret <8 x float> %shuffle
392}
393
394define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
395; ALL-LABEL: shuffle_v8f32_33307774:
396; ALL:       # BB#0:
397; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
398; ALL-NEXT:    retq
399  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
400  ret <8 x float> %shuffle
401}
402
403define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
404; ALL-LABEL: shuffle_v8f32_32107654:
405; ALL:       # BB#0:
406; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
407; ALL-NEXT:    retq
408  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
409  ret <8 x float> %shuffle
410}
411
412define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
413; ALL-LABEL: shuffle_v8f32_00234467:
414; ALL:       # BB#0:
415; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
416; ALL-NEXT:    retq
417  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
418  ret <8 x float> %shuffle
419}
420
421define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
422; ALL-LABEL: shuffle_v8f32_00224466:
423; ALL:       # BB#0:
424; ALL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
425; ALL-NEXT:    retq
426  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
427  ret <8 x float> %shuffle
428}
429
430define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
431; ALL-LABEL: shuffle_v8f32_10325476:
432; ALL:       # BB#0:
433; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
434; ALL-NEXT:    retq
435  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
436  ret <8 x float> %shuffle
437}
438
439define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
440; ALL-LABEL: shuffle_v8f32_11335577:
441; ALL:       # BB#0:
442; ALL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
443; ALL-NEXT:    retq
444  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
445  ret <8 x float> %shuffle
446}
447
448define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
449; ALL-LABEL: shuffle_v8f32_10235467:
450; ALL:       # BB#0:
451; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
452; ALL-NEXT:    retq
453  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
454  ret <8 x float> %shuffle
455}
456
457define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
458; ALL-LABEL: shuffle_v8f32_10225466:
459; ALL:       # BB#0:
460; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
461; ALL-NEXT:    retq
462  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
463  ret <8 x float> %shuffle
464}
465
466define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
467; ALL-LABEL: shuffle_v8f32_00015444:
468; ALL:       # BB#0:
469; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
470; ALL-NEXT:    retq
471  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
472  ret <8 x float> %shuffle
473}
474
475define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
476; ALL-LABEL: shuffle_v8f32_00204644:
477; ALL:       # BB#0:
478; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
479; ALL-NEXT:    retq
480  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
481  ret <8 x float> %shuffle
482}
483
484define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
485; ALL-LABEL: shuffle_v8f32_03004474:
486; ALL:       # BB#0:
487; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
488; ALL-NEXT:    retq
489  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
490  ret <8 x float> %shuffle
491}
492
493define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
494; ALL-LABEL: shuffle_v8f32_10004444:
495; ALL:       # BB#0:
496; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
497; ALL-NEXT:    retq
498  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
499  ret <8 x float> %shuffle
500}
501
502define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
503; ALL-LABEL: shuffle_v8f32_22006446:
504; ALL:       # BB#0:
505; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
506; ALL-NEXT:    retq
507  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
508  ret <8 x float> %shuffle
509}
510
511define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
512; ALL-LABEL: shuffle_v8f32_33307474:
513; ALL:       # BB#0:
514; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
515; ALL-NEXT:    retq
516  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
517  ret <8 x float> %shuffle
518}
519
520define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
521; ALL-LABEL: shuffle_v8f32_32104567:
522; ALL:       # BB#0:
523; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
524; ALL-NEXT:    retq
525  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
526  ret <8 x float> %shuffle
527}
528
529define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
530; ALL-LABEL: shuffle_v8f32_00236744:
531; ALL:       # BB#0:
532; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
533; ALL-NEXT:    retq
534  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
535  ret <8 x float> %shuffle
536}
537
538define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
539; ALL-LABEL: shuffle_v8f32_00226644:
540; ALL:       # BB#0:
541; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
542; ALL-NEXT:    retq
543  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
544  ret <8 x float> %shuffle
545}
546
547define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
548; ALL-LABEL: shuffle_v8f32_10324567:
549; ALL:       # BB#0:
550; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
551; ALL-NEXT:    retq
552  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
553  ret <8 x float> %shuffle
554}
555
556define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
557; ALL-LABEL: shuffle_v8f32_11334567:
558; ALL:       # BB#0:
559; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
560; ALL-NEXT:    retq
561  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
562  ret <8 x float> %shuffle
563}
564
565define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
566; ALL-LABEL: shuffle_v8f32_01235467:
567; ALL:       # BB#0:
568; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
569; ALL-NEXT:    retq
570  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
571  ret <8 x float> %shuffle
572}
573
574define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
575; ALL-LABEL: shuffle_v8f32_01235466:
576; ALL:       # BB#0:
577; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
578; ALL-NEXT:    retq
579  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
580  ret <8 x float> %shuffle
581}
582
583define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
584; ALL-LABEL: shuffle_v8f32_002u6u44:
585; ALL:       # BB#0:
586; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
587; ALL-NEXT:    retq
588  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
589  ret <8 x float> %shuffle
590}
591
592define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
593; ALL-LABEL: shuffle_v8f32_00uu66uu:
594; ALL:       # BB#0:
595; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
596; ALL-NEXT:    retq
597  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
598  ret <8 x float> %shuffle
599}
600
601define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
602; ALL-LABEL: shuffle_v8f32_103245uu:
603; ALL:       # BB#0:
604; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
605; ALL-NEXT:    retq
606  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
607  ret <8 x float> %shuffle
608}
609
610define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
611; ALL-LABEL: shuffle_v8f32_1133uu67:
612; ALL:       # BB#0:
613; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
614; ALL-NEXT:    retq
615  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
616  ret <8 x float> %shuffle
617}
618
619define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
620; ALL-LABEL: shuffle_v8f32_0uu354uu:
621; ALL:       # BB#0:
622; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
623; ALL-NEXT:    retq
624  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
625  ret <8 x float> %shuffle
626}
627
628define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
629; ALL-LABEL: shuffle_v8f32_uuu3uu66:
630; ALL:       # BB#0:
631; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
632; ALL-NEXT:    retq
633  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
634  ret <8 x float> %shuffle
635}
636
637define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
638; AVX1-LABEL: shuffle_v8f32_c348cda0:
639; AVX1:       # BB#0:
640; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
641; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
642; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
643; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
644; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
645; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
646; AVX1-NEXT:    retq
647;
648; AVX2-LABEL: shuffle_v8f32_c348cda0:
649; AVX2:       # BB#0:
650; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
651; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
652; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
653; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
654; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
655; AVX2-NEXT:    retq
656  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
657  ret <8 x float> %shuffle
658}
659
660define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
661; AVX1-LABEL: shuffle_v8f32_f511235a:
662; AVX1:       # BB#0:
663; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
664; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
665; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
666; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
667; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
668; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
669; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
670; AVX1-NEXT:    retq
671;
672; AVX2-LABEL: shuffle_v8f32_f511235a:
673; AVX2:       # BB#0:
674; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
675; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
676; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
677; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
678; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
679; AVX2-NEXT:    retq
680  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
681  ret <8 x float> %shuffle
682}
683
684define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
685; AVX1-LABEL: shuffle_v8f32_32103210:
686; AVX1:       # BB#0:
687; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
688; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
689; AVX1-NEXT:    retq
690;
691; AVX2-LABEL: shuffle_v8f32_32103210:
692; AVX2:       # BB#0:
693; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
694; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
695; AVX2-NEXT:    retq
696  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
697  ret <8 x float> %shuffle
698}
699
700define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
701; AVX1-LABEL: shuffle_v8f32_76547654:
702; AVX1:       # BB#0:
703; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
704; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
705; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
706; AVX1-NEXT:    retq
707;
708; AVX2-LABEL: shuffle_v8f32_76547654:
709; AVX2:       # BB#0:
710; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
711; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
712; AVX2-NEXT:    retq
713  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
714  ret <8 x float> %shuffle
715}
716
717define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
718; AVX1-LABEL: shuffle_v8f32_76543210:
719; AVX1:       # BB#0:
720; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
721; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
722; AVX1-NEXT:    retq
723;
724; AVX2-LABEL: shuffle_v8f32_76543210:
725; AVX2:       # BB#0:
726; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
727; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
728; AVX2-NEXT:    retq
729  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
730  ret <8 x float> %shuffle
731}
732
733define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
734; ALL-LABEL: shuffle_v8f32_3210ba98:
735; ALL:       # BB#0:
736; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
737; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
738; ALL-NEXT:    retq
739  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
740  ret <8 x float> %shuffle
741}
742
743define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
744; ALL-LABEL: shuffle_v8f32_3210fedc:
745; ALL:       # BB#0:
746; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
747; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
748; ALL-NEXT:    retq
749  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
750  ret <8 x float> %shuffle
751}
752
753define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
754; ALL-LABEL: shuffle_v8f32_7654fedc:
755; ALL:       # BB#0:
756; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
757; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
758; ALL-NEXT:    retq
759  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
760  ret <8 x float> %shuffle
761}
762
763define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
764; ALL-LABEL: shuffle_v8f32_fedc7654:
765; ALL:       # BB#0:
766; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
767; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
768; ALL-NEXT:    retq
769  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
770  ret <8 x float> %shuffle
771}
772
773define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
774; AVX1-LABEL: PR21138:
775; AVX1:       # BB#0:
776; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
777; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
778; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
779; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
780; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
781; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
782; AVX1-NEXT:    retq
783;
784; AVX2-LABEL: PR21138:
785; AVX2:       # BB#0:
786; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
787; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
788; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
789; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
790; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
791; AVX2-NEXT:    retq
792  %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
793  ret <8 x float> %shuffle
794}
795
796define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
797; ALL-LABEL: shuffle_v8f32_ba987654:
798; ALL:       # BB#0:
799; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
800; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
801; ALL-NEXT:    retq
802  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
803  ret <8 x float> %shuffle
804}
805
806define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
807; ALL-LABEL: shuffle_v8f32_ba983210:
808; ALL:       # BB#0:
809; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
810; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
811; ALL-NEXT:    retq
812  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
813  ret <8 x float> %shuffle
814}
815
816define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
817; ALL-LABEL: shuffle_v8f32_80u1c4u5:
818; ALL:       # BB#0:
819; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
820; ALL-NEXT:    retq
821  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
822  ret <8 x float> %shuffle
823}
824
825define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
826; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
827; ALL:       # BB#0:
828; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
829; ALL-NEXT:    retq
830  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
831  ret <8 x float> %shuffle
832}
833
834define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
835; AVX1-LABEL: shuffle_v8f32_uuuu1111:
836; AVX1:       # BB#0:
837; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
838; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
839; AVX1-NEXT:    retq
840;
841; AVX2-LABEL: shuffle_v8f32_uuuu1111:
842; AVX2:       # BB#0:
843; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
844; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
845; AVX2-NEXT:    retq
846  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
847  ret <8 x float> %shuffle
848}
849
850define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
851; AVX1-LABEL: shuffle_v8f32_44444444:
852; AVX1:       # BB#0:
853; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
854; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
855; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
856; AVX1-NEXT:    retq
857;
858; AVX2-LABEL: shuffle_v8f32_44444444:
859; AVX2:       # BB#0:
860; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
861; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
862; AVX2-NEXT:    retq
863  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
864  ret <8 x float> %shuffle
865}
866
867define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
868; AVX1-LABEL: shuffle_v8f32_5555uuuu:
869; AVX1:       # BB#0:
870; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
871; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
872; AVX1-NEXT:    retq
873;
874; AVX2-LABEL: shuffle_v8f32_5555uuuu:
875; AVX2:       # BB#0:
876; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
877; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
878; AVX2-NEXT:    retq
879  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
880  ret <8 x float> %shuffle
881}
882
883define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
884; AVX1-LABEL: shuffle_v8i32_00000000:
885; AVX1:       # BB#0:
886; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
887; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
888; AVX1-NEXT:    retq
889;
890; AVX2-LABEL: shuffle_v8i32_00000000:
891; AVX2:       # BB#0:
892; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
893; AVX2-NEXT:    retq
894  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
895  ret <8 x i32> %shuffle
896}
897
898define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
899; AVX1-LABEL: shuffle_v8i32_00000010:
900; AVX1:       # BB#0:
901; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
902; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
903; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
904; AVX1-NEXT:    retq
905;
906; AVX2-LABEL: shuffle_v8i32_00000010:
907; AVX2:       # BB#0:
908; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
909; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
910; AVX2-NEXT:    retq
911  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
912  ret <8 x i32> %shuffle
913}
914
915define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
916; AVX1-LABEL: shuffle_v8i32_00000200:
917; AVX1:       # BB#0:
918; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
919; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
920; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
921; AVX1-NEXT:    retq
922;
923; AVX2-LABEL: shuffle_v8i32_00000200:
924; AVX2:       # BB#0:
925; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
926; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
927; AVX2-NEXT:    retq
928  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
929  ret <8 x i32> %shuffle
930}
931
932define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
933; AVX1-LABEL: shuffle_v8i32_00003000:
934; AVX1:       # BB#0:
935; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
936; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
937; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
938; AVX1-NEXT:    retq
939;
940; AVX2-LABEL: shuffle_v8i32_00003000:
941; AVX2:       # BB#0:
942; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
943; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
944; AVX2-NEXT:    retq
945  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
946  ret <8 x i32> %shuffle
947}
948
949define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
950; AVX1-LABEL: shuffle_v8i32_00040000:
951; AVX1:       # BB#0:
952; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
953; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
954; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
955; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
956; AVX1-NEXT:    retq
957;
958; AVX2-LABEL: shuffle_v8i32_00040000:
959; AVX2:       # BB#0:
960; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
961; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
962; AVX2-NEXT:    retq
963  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
964  ret <8 x i32> %shuffle
965}
966
967define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
968; AVX1-LABEL: shuffle_v8i32_00500000:
969; AVX1:       # BB#0:
970; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
971; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
972; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
973; AVX1-NEXT:    retq
974;
975; AVX2-LABEL: shuffle_v8i32_00500000:
976; AVX2:       # BB#0:
977; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
978; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
979; AVX2-NEXT:    retq
980  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
981  ret <8 x i32> %shuffle
982}
983
984define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
985; AVX1-LABEL: shuffle_v8i32_06000000:
986; AVX1:       # BB#0:
987; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
988; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
989; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
990; AVX1-NEXT:    retq
991;
992; AVX2-LABEL: shuffle_v8i32_06000000:
993; AVX2:       # BB#0:
994; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
995; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
996; AVX2-NEXT:    retq
997  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
998  ret <8 x i32> %shuffle
999}
1000
1001define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
1002; AVX1-LABEL: shuffle_v8i32_70000000:
1003; AVX1:       # BB#0:
1004; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1005; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1006; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
1007; AVX1-NEXT:    retq
1008;
1009; AVX2-LABEL: shuffle_v8i32_70000000:
1010; AVX2:       # BB#0:
1011; AVX2-NEXT:    movl $7, %eax
1012; AVX2-NEXT:    vmovd %eax, %xmm1
1013; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1014; AVX2-NEXT:    retq
1015  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1016  ret <8 x i32> %shuffle
1017}
1018
1019define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
1020; AVX1-LABEL: shuffle_v8i32_01014545:
1021; AVX1:       # BB#0:
1022; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
1023; AVX1-NEXT:    retq
1024;
1025; AVX2-LABEL: shuffle_v8i32_01014545:
1026; AVX2:       # BB#0:
1027; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1028; AVX2-NEXT:    retq
1029  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
1030  ret <8 x i32> %shuffle
1031}
1032
1033define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
1034; AVX1-LABEL: shuffle_v8i32_00112233:
1035; AVX1:       # BB#0:
1036; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
1037; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
1038; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1039; AVX1-NEXT:    retq
1040;
1041; AVX2-LABEL: shuffle_v8i32_00112233:
1042; AVX2:       # BB#0:
1043; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
1044; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1045; AVX2-NEXT:    retq
1046  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
1047  ret <8 x i32> %shuffle
1048}
1049
1050define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
1051; AVX1-LABEL: shuffle_v8i32_00001111:
1052; AVX1:       # BB#0:
1053; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1054; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1055; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1056; AVX1-NEXT:    retq
1057;
1058; AVX2-LABEL: shuffle_v8i32_00001111:
1059; AVX2:       # BB#0:
1060; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1061; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1062; AVX2-NEXT:    retq
1063  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1064  ret <8 x i32> %shuffle
1065}
1066
1067define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1068; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
1069; AVX1:       # BB#0:
1070; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1071; AVX1-NEXT:    retq
1072;
1073; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
1074; AVX2:       # BB#0:
1075; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1076; AVX2-NEXT:    retq
1077  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1078  ret <8 x i32> %shuffle
1079}
1080
1081define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1082; AVX1-LABEL: shuffle_v8i32_08080808:
1083; AVX1:       # BB#0:
1084; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
1085; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
1086; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1087; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1088; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1089; AVX1-NEXT:    retq
1090;
1091; AVX2-LABEL: shuffle_v8i32_08080808:
1092; AVX2:       # BB#0:
1093; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
1094; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
1095; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1096; AVX2-NEXT:    retq
1097  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1098  ret <8 x i32> %shuffle
1099}
1100
1101define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1102; AVX1-LABEL: shuffle_v8i32_08084c4c:
1103; AVX1:       # BB#0:
1104; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1105; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1106; AVX1-NEXT:    retq
1107;
1108; AVX2-LABEL: shuffle_v8i32_08084c4c:
1109; AVX2:       # BB#0:
1110; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1111; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1112; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1113; AVX2-NEXT:    retq
1114  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1115  ret <8 x i32> %shuffle
1116}
1117
1118define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1119; AVX1-LABEL: shuffle_v8i32_8823cc67:
1120; AVX1:       # BB#0:
1121; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1122; AVX1-NEXT:    retq
1123;
1124; AVX2-LABEL: shuffle_v8i32_8823cc67:
1125; AVX2:       # BB#0:
1126; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1127; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1128; AVX2-NEXT:    retq
1129  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1130  ret <8 x i32> %shuffle
1131}
1132
1133define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1134; AVX1-LABEL: shuffle_v8i32_9832dc76:
1135; AVX1:       # BB#0:
1136; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1137; AVX1-NEXT:    retq
1138;
1139; AVX2-LABEL: shuffle_v8i32_9832dc76:
1140; AVX2:       # BB#0:
1141; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1142; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1143; AVX2-NEXT:    retq
1144  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1145  ret <8 x i32> %shuffle
1146}
1147
1148define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1149; AVX1-LABEL: shuffle_v8i32_9810dc54:
1150; AVX1:       # BB#0:
1151; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1152; AVX1-NEXT:    retq
1153;
1154; AVX2-LABEL: shuffle_v8i32_9810dc54:
1155; AVX2:       # BB#0:
1156; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1157; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1158; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1159; AVX2-NEXT:    retq
1160  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1161  ret <8 x i32> %shuffle
1162}
1163
1164define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1165; AVX1-LABEL: shuffle_v8i32_08194c5d:
1166; AVX1:       # BB#0:
1167; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1168; AVX1-NEXT:    retq
1169;
1170; AVX2-LABEL: shuffle_v8i32_08194c5d:
1171; AVX2:       # BB#0:
1172; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1173; AVX2-NEXT:    retq
1174  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1175  ret <8 x i32> %shuffle
1176}
1177
1178define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1179; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1180; AVX1:       # BB#0:
1181; AVX1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1182; AVX1-NEXT:    retq
1183;
1184; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1185; AVX2:       # BB#0:
1186; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1187; AVX2-NEXT:    retq
1188  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1189  ret <8 x i32> %shuffle
1190}
1191
1192define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1193; AVX1-LABEL: shuffle_v8i32_08192a3b:
1194; AVX1:       # BB#0:
1195; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1196; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1197; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1198; AVX1-NEXT:    retq
1199;
1200; AVX2-LABEL: shuffle_v8i32_08192a3b:
1201; AVX2:       # BB#0:
1202; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1203; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
1204; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1205; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1206; AVX2-NEXT:    retq
1207  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1208  ret <8 x i32> %shuffle
1209}
1210
1211define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1212; AVX1-LABEL: shuffle_v8i32_08991abb:
1213; AVX1:       # BB#0:
1214; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1215; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1216; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1217; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1218; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1219; AVX1-NEXT:    retq
1220;
1221; AVX2-LABEL: shuffle_v8i32_08991abb:
1222; AVX2:       # BB#0:
1223; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1224; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
1225; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1226; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
1227; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1228; AVX2-NEXT:    retq
1229  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1230  ret <8 x i32> %shuffle
1231}
1232
1233define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1234; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1235; AVX1:       # BB#0:
1236; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1237; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1238; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
1239; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1240; AVX1-NEXT:    retq
1241;
1242; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1243; AVX2:       # BB#0:
1244; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1245; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1246; AVX2-NEXT:    retq
1247  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1248  ret <8 x i32> %shuffle
1249}
1250
1251define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1252; AVX1-LABEL: shuffle_v8i32_09ab1def:
1253; AVX1:       # BB#0:
1254; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1255; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1256; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1257; AVX1-NEXT:    retq
1258;
1259; AVX2-LABEL: shuffle_v8i32_09ab1def:
1260; AVX2:       # BB#0:
1261; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1262; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
1263; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1264; AVX2-NEXT:    retq
1265  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1266  ret <8 x i32> %shuffle
1267}
1268
1269define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1270; AVX1-LABEL: shuffle_v8i32_00014445:
1271; AVX1:       # BB#0:
1272; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1273; AVX1-NEXT:    retq
1274;
1275; AVX2-LABEL: shuffle_v8i32_00014445:
1276; AVX2:       # BB#0:
1277; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1278; AVX2-NEXT:    retq
1279  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1280  ret <8 x i32> %shuffle
1281}
1282
1283define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1284; AVX1-LABEL: shuffle_v8i32_00204464:
1285; AVX1:       # BB#0:
1286; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1287; AVX1-NEXT:    retq
1288;
1289; AVX2-LABEL: shuffle_v8i32_00204464:
1290; AVX2:       # BB#0:
1291; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1292; AVX2-NEXT:    retq
1293  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1294  ret <8 x i32> %shuffle
1295}
1296
1297define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1298; AVX1-LABEL: shuffle_v8i32_03004744:
1299; AVX1:       # BB#0:
1300; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1301; AVX1-NEXT:    retq
1302;
1303; AVX2-LABEL: shuffle_v8i32_03004744:
1304; AVX2:       # BB#0:
1305; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1306; AVX2-NEXT:    retq
1307  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1308  ret <8 x i32> %shuffle
1309}
1310
1311define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1312; AVX1-LABEL: shuffle_v8i32_10005444:
1313; AVX1:       # BB#0:
1314; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1315; AVX1-NEXT:    retq
1316;
1317; AVX2-LABEL: shuffle_v8i32_10005444:
1318; AVX2:       # BB#0:
1319; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1320; AVX2-NEXT:    retq
1321  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1322  ret <8 x i32> %shuffle
1323}
1324
1325define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1326; AVX1-LABEL: shuffle_v8i32_22006644:
1327; AVX1:       # BB#0:
1328; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1329; AVX1-NEXT:    retq
1330;
1331; AVX2-LABEL: shuffle_v8i32_22006644:
1332; AVX2:       # BB#0:
1333; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1334; AVX2-NEXT:    retq
1335  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1336  ret <8 x i32> %shuffle
1337}
1338
1339define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1340; AVX1-LABEL: shuffle_v8i32_33307774:
1341; AVX1:       # BB#0:
1342; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1343; AVX1-NEXT:    retq
1344;
1345; AVX2-LABEL: shuffle_v8i32_33307774:
1346; AVX2:       # BB#0:
1347; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1348; AVX2-NEXT:    retq
1349  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1350  ret <8 x i32> %shuffle
1351}
1352
1353define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1354; AVX1-LABEL: shuffle_v8i32_32107654:
1355; AVX1:       # BB#0:
1356; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1357; AVX1-NEXT:    retq
1358;
1359; AVX2-LABEL: shuffle_v8i32_32107654:
1360; AVX2:       # BB#0:
1361; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1362; AVX2-NEXT:    retq
1363  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1364  ret <8 x i32> %shuffle
1365}
1366
1367define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1368; AVX1-LABEL: shuffle_v8i32_00234467:
1369; AVX1:       # BB#0:
1370; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1371; AVX1-NEXT:    retq
1372;
1373; AVX2-LABEL: shuffle_v8i32_00234467:
1374; AVX2:       # BB#0:
1375; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1376; AVX2-NEXT:    retq
1377  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1378  ret <8 x i32> %shuffle
1379}
1380
1381define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1382; AVX1-LABEL: shuffle_v8i32_00224466:
1383; AVX1:       # BB#0:
1384; AVX1-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1385; AVX1-NEXT:    retq
1386;
1387; AVX2-LABEL: shuffle_v8i32_00224466:
1388; AVX2:       # BB#0:
1389; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1390; AVX2-NEXT:    retq
1391  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1392  ret <8 x i32> %shuffle
1393}
1394
1395define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1396; AVX1-LABEL: shuffle_v8i32_10325476:
1397; AVX1:       # BB#0:
1398; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1399; AVX1-NEXT:    retq
1400;
1401; AVX2-LABEL: shuffle_v8i32_10325476:
1402; AVX2:       # BB#0:
1403; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1404; AVX2-NEXT:    retq
1405  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1406  ret <8 x i32> %shuffle
1407}
1408
1409define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1410; AVX1-LABEL: shuffle_v8i32_11335577:
1411; AVX1:       # BB#0:
1412; AVX1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1413; AVX1-NEXT:    retq
1414;
1415; AVX2-LABEL: shuffle_v8i32_11335577:
1416; AVX2:       # BB#0:
1417; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1418; AVX2-NEXT:    retq
1419  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1420  ret <8 x i32> %shuffle
1421}
1422
1423define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1424; AVX1-LABEL: shuffle_v8i32_10235467:
1425; AVX1:       # BB#0:
1426; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1427; AVX1-NEXT:    retq
1428;
1429; AVX2-LABEL: shuffle_v8i32_10235467:
1430; AVX2:       # BB#0:
1431; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1432; AVX2-NEXT:    retq
1433  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1434  ret <8 x i32> %shuffle
1435}
1436
1437define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1438; AVX1-LABEL: shuffle_v8i32_10225466:
1439; AVX1:       # BB#0:
1440; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1441; AVX1-NEXT:    retq
1442;
1443; AVX2-LABEL: shuffle_v8i32_10225466:
1444; AVX2:       # BB#0:
1445; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1446; AVX2-NEXT:    retq
1447  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1448  ret <8 x i32> %shuffle
1449}
1450
1451define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1452; AVX1-LABEL: shuffle_v8i32_00015444:
1453; AVX1:       # BB#0:
1454; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1455; AVX1-NEXT:    retq
1456;
1457; AVX2-LABEL: shuffle_v8i32_00015444:
1458; AVX2:       # BB#0:
1459; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1460; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1461; AVX2-NEXT:    retq
1462  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1463  ret <8 x i32> %shuffle
1464}
1465
1466define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1467; AVX1-LABEL: shuffle_v8i32_00204644:
1468; AVX1:       # BB#0:
1469; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1470; AVX1-NEXT:    retq
1471;
1472; AVX2-LABEL: shuffle_v8i32_00204644:
1473; AVX2:       # BB#0:
1474; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1475; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1476; AVX2-NEXT:    retq
1477  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1478  ret <8 x i32> %shuffle
1479}
1480
1481define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1482; AVX1-LABEL: shuffle_v8i32_03004474:
1483; AVX1:       # BB#0:
1484; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1485; AVX1-NEXT:    retq
1486;
1487; AVX2-LABEL: shuffle_v8i32_03004474:
1488; AVX2:       # BB#0:
1489; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1490; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1491; AVX2-NEXT:    retq
1492  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1493  ret <8 x i32> %shuffle
1494}
1495
1496define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1497; AVX1-LABEL: shuffle_v8i32_10004444:
1498; AVX1:       # BB#0:
1499; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1500; AVX1-NEXT:    retq
1501;
1502; AVX2-LABEL: shuffle_v8i32_10004444:
1503; AVX2:       # BB#0:
1504; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1505; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1506; AVX2-NEXT:    retq
1507  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1508  ret <8 x i32> %shuffle
1509}
1510
1511define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1512; AVX1-LABEL: shuffle_v8i32_22006446:
1513; AVX1:       # BB#0:
1514; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1515; AVX1-NEXT:    retq
1516;
1517; AVX2-LABEL: shuffle_v8i32_22006446:
1518; AVX2:       # BB#0:
1519; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1520; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1521; AVX2-NEXT:    retq
1522  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1523  ret <8 x i32> %shuffle
1524}
1525
1526define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1527; AVX1-LABEL: shuffle_v8i32_33307474:
1528; AVX1:       # BB#0:
1529; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1530; AVX1-NEXT:    retq
1531;
1532; AVX2-LABEL: shuffle_v8i32_33307474:
1533; AVX2:       # BB#0:
1534; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1535; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1536; AVX2-NEXT:    retq
1537  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1538  ret <8 x i32> %shuffle
1539}
1540
1541define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1542; AVX1-LABEL: shuffle_v8i32_32104567:
1543; AVX1:       # BB#0:
1544; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1545; AVX1-NEXT:    retq
1546;
1547; AVX2-LABEL: shuffle_v8i32_32104567:
1548; AVX2:       # BB#0:
1549; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1550; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1551; AVX2-NEXT:    retq
1552  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1553  ret <8 x i32> %shuffle
1554}
1555
1556define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1557; AVX1-LABEL: shuffle_v8i32_00236744:
1558; AVX1:       # BB#0:
1559; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1560; AVX1-NEXT:    retq
1561;
1562; AVX2-LABEL: shuffle_v8i32_00236744:
1563; AVX2:       # BB#0:
1564; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1565; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1566; AVX2-NEXT:    retq
1567  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1568  ret <8 x i32> %shuffle
1569}
1570
1571define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1572; AVX1-LABEL: shuffle_v8i32_00226644:
1573; AVX1:       # BB#0:
1574; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1575; AVX1-NEXT:    retq
1576;
1577; AVX2-LABEL: shuffle_v8i32_00226644:
1578; AVX2:       # BB#0:
1579; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1580; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1581; AVX2-NEXT:    retq
1582  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1583  ret <8 x i32> %shuffle
1584}
1585
1586define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1587; AVX1-LABEL: shuffle_v8i32_10324567:
1588; AVX1:       # BB#0:
1589; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1590; AVX1-NEXT:    retq
1591;
1592; AVX2-LABEL: shuffle_v8i32_10324567:
1593; AVX2:       # BB#0:
1594; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1595; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1596; AVX2-NEXT:    retq
1597  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1598  ret <8 x i32> %shuffle
1599}
1600
1601define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1602; AVX1-LABEL: shuffle_v8i32_11334567:
1603; AVX1:       # BB#0:
1604; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1605; AVX1-NEXT:    retq
1606;
1607; AVX2-LABEL: shuffle_v8i32_11334567:
1608; AVX2:       # BB#0:
1609; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1610; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1611; AVX2-NEXT:    retq
1612  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1613  ret <8 x i32> %shuffle
1614}
1615
1616define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1617; AVX1-LABEL: shuffle_v8i32_01235467:
1618; AVX1:       # BB#0:
1619; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1620; AVX1-NEXT:    retq
1621;
1622; AVX2-LABEL: shuffle_v8i32_01235467:
1623; AVX2:       # BB#0:
1624; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1625; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1626; AVX2-NEXT:    retq
1627  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1628  ret <8 x i32> %shuffle
1629}
1630
1631define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1632; AVX1-LABEL: shuffle_v8i32_01235466:
1633; AVX1:       # BB#0:
1634; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1635; AVX1-NEXT:    retq
1636;
1637; AVX2-LABEL: shuffle_v8i32_01235466:
1638; AVX2:       # BB#0:
1639; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1640; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1641; AVX2-NEXT:    retq
1642  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1643  ret <8 x i32> %shuffle
1644}
1645
1646define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1647; AVX1-LABEL: shuffle_v8i32_002u6u44:
1648; AVX1:       # BB#0:
1649; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1650; AVX1-NEXT:    retq
1651;
1652; AVX2-LABEL: shuffle_v8i32_002u6u44:
1653; AVX2:       # BB#0:
1654; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1655; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1656; AVX2-NEXT:    retq
1657  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1658  ret <8 x i32> %shuffle
1659}
1660
1661define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1662; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1663; AVX1:       # BB#0:
1664; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1665; AVX1-NEXT:    retq
1666;
1667; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1668; AVX2:       # BB#0:
1669; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1670; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1671; AVX2-NEXT:    retq
1672  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1673  ret <8 x i32> %shuffle
1674}
1675
1676define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1677; AVX1-LABEL: shuffle_v8i32_103245uu:
1678; AVX1:       # BB#0:
1679; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1680; AVX1-NEXT:    retq
1681;
1682; AVX2-LABEL: shuffle_v8i32_103245uu:
1683; AVX2:       # BB#0:
1684; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1685; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1686; AVX2-NEXT:    retq
1687  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1688  ret <8 x i32> %shuffle
1689}
1690
1691define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1692; AVX1-LABEL: shuffle_v8i32_1133uu67:
1693; AVX1:       # BB#0:
1694; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1695; AVX1-NEXT:    retq
1696;
1697; AVX2-LABEL: shuffle_v8i32_1133uu67:
1698; AVX2:       # BB#0:
1699; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1700; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1701; AVX2-NEXT:    retq
1702  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1703  ret <8 x i32> %shuffle
1704}
1705
1706define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1707; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1708; AVX1:       # BB#0:
1709; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1710; AVX1-NEXT:    retq
1711;
1712; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1713; AVX2:       # BB#0:
1714; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1715; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1716; AVX2-NEXT:    retq
1717  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1718  ret <8 x i32> %shuffle
1719}
1720
1721define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1722; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1723; AVX1:       # BB#0:
1724; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1725; AVX1-NEXT:    retq
1726;
1727; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1728; AVX2:       # BB#0:
1729; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1730; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1731; AVX2-NEXT:    retq
1732  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1733  ret <8 x i32> %shuffle
1734}
1735
1736define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1737; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1738; AVX1:       # BB#0:
1739; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1740; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1741; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1742; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1743; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1744; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1745; AVX1-NEXT:    retq
1746;
1747; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1748; AVX2:       # BB#0:
1749; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
1750; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
1751; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1752; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1753; AVX2-NEXT:    retq
1754  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1755  ret <8 x i32> %shuffle
1756}
1757
1758define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1759; AVX1-LABEL: shuffle_v8i32_32103210:
1760; AVX1:       # BB#0:
1761; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1762; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1763; AVX1-NEXT:    retq
1764;
1765; AVX2-LABEL: shuffle_v8i32_32103210:
1766; AVX2:       # BB#0:
1767; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1768; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1769; AVX2-NEXT:    retq
1770  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1771  ret <8 x i32> %shuffle
1772}
1773
1774define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1775; AVX1-LABEL: shuffle_v8i32_76547654:
1776; AVX1:       # BB#0:
1777; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1778; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1779; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1780; AVX1-NEXT:    retq
1781;
1782; AVX2-LABEL: shuffle_v8i32_76547654:
1783; AVX2:       # BB#0:
1784; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1785; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1786; AVX2-NEXT:    retq
1787  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1788  ret <8 x i32> %shuffle
1789}
1790
1791define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1792; AVX1-LABEL: shuffle_v8i32_76543210:
1793; AVX1:       # BB#0:
1794; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1795; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1796; AVX1-NEXT:    retq
1797;
1798; AVX2-LABEL: shuffle_v8i32_76543210:
1799; AVX2:       # BB#0:
1800; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1801; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1802; AVX2-NEXT:    retq
1803  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1804  ret <8 x i32> %shuffle
1805}
1806
1807define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1808; AVX1-LABEL: shuffle_v8i32_3210ba98:
1809; AVX1:       # BB#0:
1810; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1811; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1812; AVX1-NEXT:    retq
1813;
1814; AVX2-LABEL: shuffle_v8i32_3210ba98:
1815; AVX2:       # BB#0:
1816; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1817; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1818; AVX2-NEXT:    retq
1819  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1820  ret <8 x i32> %shuffle
1821}
1822
1823define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1824; AVX1-LABEL: shuffle_v8i32_3210fedc:
1825; AVX1:       # BB#0:
1826; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1827; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1828; AVX1-NEXT:    retq
1829;
1830; AVX2-LABEL: shuffle_v8i32_3210fedc:
1831; AVX2:       # BB#0:
1832; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1833; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1834; AVX2-NEXT:    retq
1835  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1836  ret <8 x i32> %shuffle
1837}
1838
1839define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1840; AVX1-LABEL: shuffle_v8i32_7654fedc:
1841; AVX1:       # BB#0:
1842; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1843; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1844; AVX1-NEXT:    retq
1845;
1846; AVX2-LABEL: shuffle_v8i32_7654fedc:
1847; AVX2:       # BB#0:
1848; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1849; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1850; AVX2-NEXT:    retq
1851  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1852  ret <8 x i32> %shuffle
1853}
1854
1855define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1856; AVX1-LABEL: shuffle_v8i32_fedc7654:
1857; AVX1:       # BB#0:
1858; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1859; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1860; AVX1-NEXT:    retq
1861;
1862; AVX2-LABEL: shuffle_v8i32_fedc7654:
1863; AVX2:       # BB#0:
1864; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1865; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1866; AVX2-NEXT:    retq
1867  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1868  ret <8 x i32> %shuffle
1869}
1870
1871define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1872; AVX1-LABEL: shuffle_v8i32_ba987654:
1873; AVX1:       # BB#0:
1874; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1875; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1876; AVX1-NEXT:    retq
1877;
1878; AVX2-LABEL: shuffle_v8i32_ba987654:
1879; AVX2:       # BB#0:
1880; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1881; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1882; AVX2-NEXT:    retq
1883  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1884  ret <8 x i32> %shuffle
1885}
1886
1887define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1888; AVX1-LABEL: shuffle_v8i32_ba983210:
1889; AVX1:       # BB#0:
1890; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1891; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1892; AVX1-NEXT:    retq
1893;
1894; AVX2-LABEL: shuffle_v8i32_ba983210:
1895; AVX2:       # BB#0:
1896; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1897; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1898; AVX2-NEXT:    retq
1899  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1900  ret <8 x i32> %shuffle
1901}
1902
1903define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
1904; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
1905; AVX1:       # BB#0:
1906; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1907; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1908; AVX1-NEXT:    retq
1909;
1910; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
1911; AVX2:       # BB#0:
1912; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1913; AVX2-NEXT:    retq
1914  %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
1915  ret <8 x i32> %shuffle
1916}
1917
1918define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
1919; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
1920; AVX1:       # BB#0:
1921; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1922; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1923; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1924; AVX1-NEXT:    retq
1925;
1926; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
1927; AVX2:       # BB#0:
1928; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1929; AVX2-NEXT:    retq
1930  %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
1931  ret <8 x i32> %shuffle
1932}
1933
1934define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1935; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
1936; AVX1:       # BB#0:
1937; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1938; AVX1-NEXT:    retq
1939;
1940; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
1941; AVX2:       # BB#0:
1942; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1943; AVX2-NEXT:    retq
1944  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
1945  ret <8 x i32> %shuffle
1946}
1947
1948define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
1949; AVX1-LABEL: shuffle_v8i32_uuuu1111:
1950; AVX1:       # BB#0:
1951; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1952; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1953; AVX1-NEXT:    retq
1954;
1955; AVX2-LABEL: shuffle_v8i32_uuuu1111:
1956; AVX2:       # BB#0:
1957; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
1958; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1959; AVX2-NEXT:    retq
1960  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
1961  ret <8 x i32> %shuffle
1962}
1963
1964define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
1965; AVX1-LABEL: shuffle_v8i32_44444444:
1966; AVX1:       # BB#0:
1967; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1968; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1969; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
1970; AVX1-NEXT:    retq
1971;
1972; AVX2-LABEL: shuffle_v8i32_44444444:
1973; AVX2:       # BB#0:
1974; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
1975; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1976; AVX2-NEXT:    retq
1977  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1978  ret <8 x i32> %shuffle
1979}
1980
1981define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
1982; AVX1-LABEL: shuffle_v8i32_5555uuuu:
1983; AVX1:       # BB#0:
1984; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1985; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1986; AVX1-NEXT:    retq
1987;
1988; AVX2-LABEL: shuffle_v8i32_5555uuuu:
1989; AVX2:       # BB#0:
1990; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
1991; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
1992; AVX2-NEXT:    retq
1993  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
1994  ret <8 x i32> %shuffle
1995}
1996
1997define <8 x float> @splat_mem_v8f32_2(float* %p) {
1998; ALL-LABEL: splat_mem_v8f32_2:
1999; ALL:       # BB#0:
2000; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
2001; ALL-NEXT:    retq
2002  %1 = load float, float* %p
2003  %2 = insertelement <4 x float> undef, float %1, i32 0
2004  %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
2005  ret <8 x float> %3
2006}
2007
2008define <8 x float> @splat_v8f32(<4 x float> %r) {
2009; AVX1-LABEL: splat_v8f32:
2010; AVX1:       # BB#0:
2011; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
2012; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
2013; AVX1-NEXT:    retq
2014;
2015; AVX2-LABEL: splat_v8f32:
2016; AVX2:       # BB#0:
2017; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
2018; AVX2-NEXT:    retq
2019  %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
2020  ret <8 x float> %1
2021}
2022
2023;
2024; Shuffle to logical bit shifts
2025;
2026
2027define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
2028; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
2029; AVX1:       # BB#0:
2030; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2031; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
2032; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
2033; AVX1-NEXT:    retq
2034;
2035; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
2036; AVX2:       # BB#0:
2037; AVX2-NEXT:    vpsllq $32, %ymm0, %ymm0
2038; AVX2-NEXT:    retq
2039  %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
2040  ret <8 x i32> %shuffle
2041}
2042
2043define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
2044; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
2045; AVX1:       # BB#0:
2046; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2047; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
2048; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
2049; AVX1-NEXT:    retq
2050;
2051; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
2052; AVX2:       # BB#0:
2053; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
2054; AVX2-NEXT:    retq
2055  %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
2056  ret <8 x i32> %shuffle
2057}
2058
2059define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
2060; AVX1-LABEL: shuffle_v8i32_B012F456:
2061; AVX1:       # BB#0:
2062; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
2063; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
2064; AVX1-NEXT:    retq
2065;
2066; AVX2-LABEL: shuffle_v8i32_B012F456:
2067; AVX2:       # BB#0:
2068; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
2069; AVX2-NEXT:    retq
2070  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
2071  ret <8 x i32> %shuffle
2072}
2073
2074define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
2075; AVX1-LABEL: shuffle_v8i32_1238567C:
2076; AVX1:       # BB#0:
2077; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
2078; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
2079; AVX1-NEXT:    retq
2080;
2081; AVX2-LABEL: shuffle_v8i32_1238567C:
2082; AVX2:       # BB#0:
2083; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
2084; AVX2-NEXT:    retq
2085  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
2086  ret <8 x i32> %shuffle
2087}
2088
2089define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
2090; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
2091; AVX1:       # BB#0:
2092; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
2093; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
2094; AVX1-NEXT:    retq
2095;
2096; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
2097; AVX2:       # BB#0:
2098; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2099; AVX2-NEXT:    retq
2100  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
2101  ret <8 x i32> %shuffle
2102}
2103
2104define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
2105; AVX1-LABEL: shuffle_v8i32_389A7CDE:
2106; AVX1:       # BB#0:
2107; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2108; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2109; AVX1-NEXT:    retq
2110;
2111; AVX2-LABEL: shuffle_v8i32_389A7CDE:
2112; AVX2:       # BB#0:
2113; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2114; AVX2-NEXT:    retq
2115  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
2116  ret <8 x i32> %shuffle
2117}
2118
2119define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
2120; AVX1-LABEL: shuffle_v8i32_30127456:
2121; AVX1:       # BB#0:
2122; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2123; AVX1-NEXT:    retq
2124;
2125; AVX2-LABEL: shuffle_v8i32_30127456:
2126; AVX2:       # BB#0:
2127; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2128; AVX2-NEXT:    retq
2129  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
2130  ret <8 x i32> %shuffle
2131}
2132
2133define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
2134; AVX1-LABEL: shuffle_v8i32_12305674:
2135; AVX1:       # BB#0:
2136; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2137; AVX1-NEXT:    retq
2138;
2139; AVX2-LABEL: shuffle_v8i32_12305674:
2140; AVX2:       # BB#0:
2141; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2142; AVX2-NEXT:    retq
2143  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2144  ret <8 x i32> %shuffle
2145}
2146
2147define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2148; ALL-LABEL: concat_v2f32_1:
2149; ALL:       # BB#0: # %entry
2150; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2151; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
2152; ALL-NEXT:    retq
2153entry:
2154  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2155  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2156  %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2157  %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2158  %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
2159  ret <8 x float> %tmp76
2160}
2161
2162define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2163; ALL-LABEL: concat_v2f32_2:
2164; ALL:       # BB#0: # %entry
2165; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2166; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
2167; ALL-NEXT:    retq
2168entry:
2169  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2170  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2171  %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2172  ret <8 x float> %tmp76
2173}
2174
2175define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2176; ALL-LABEL: concat_v2f32_3:
2177; ALL:       # BB#0: # %entry
2178; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
2179; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
2180; ALL-NEXT:    retq
2181entry:
2182  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2183  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2184  %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2185  %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2186  ret <8 x float> %res
2187}
2188
2189define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
2190; ALL-LABEL: insert_mem_and_zero_v8i32:
2191; ALL:       # BB#0:
2192; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2193; ALL-NEXT:    retq
2194  %a = load i32, i32* %ptr
2195  %v = insertelement <8 x i32> undef, i32 %a, i32 0
2196  %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2197  ret <8 x i32> %shuffle
2198}
2199
2200define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
2201; AVX1-LABEL: concat_v8i32_0123CDEF:
2202; AVX1:       # BB#0:
2203; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
2204; AVX1-NEXT:    retq
2205;
2206; AVX2-LABEL: concat_v8i32_0123CDEF:
2207; AVX2:       # BB#0:
2208; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2209; AVX2-NEXT:    retq
2210  %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2211  %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2212  %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2213  ret <8 x i32> %shuf
2214}
2215
2216define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
2217; ALL-LABEL: concat_v8i32_4567CDEF_bc:
2218; ALL:       # BB#0:
2219; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2220; ALL-NEXT:    retq
2221  %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2222  %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
2223  %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
2224  %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
2225  %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2226  %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
2227  ret <8 x i32> %shuffle32
2228}
2229
2230define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
2231; ALL-LABEL: concat_v8f32_4567CDEF_bc:
2232; ALL:       # BB#0:
2233; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2234; ALL-NEXT:    retq
2235  %a0 = bitcast <8 x float> %f0 to <4 x i64>
2236  %a1 = bitcast <8 x float> %f1 to <8 x i32>
2237  %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
2238  %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2239  %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
2240  %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
2241  %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2242  %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
2243  ret <8 x float> %shuffle32
2244}
2245
2246define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
2247; ALL-LABEL: insert_dup_mem_v8i32:
2248; ALL:       # BB#0:
2249; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
2250; ALL-NEXT:    retq
2251  %tmp = load i32, i32* %ptr, align 4
2252  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2253  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
2254  ret <8 x i32> %tmp2
2255}
2256