• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7
8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9target triple = "x86_64-unknown-unknown"
10
11define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
12; SSE-LABEL: shuffle_v8i16_01012323:
13; SSE:       # BB#0:
14; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: shuffle_v8i16_01012323:
18; AVX:       # BB#0:
19; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
20; AVX-NEXT:    retq
21  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
22  ret <8 x i16> %shuffle
23}
24define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
25; SSE-LABEL: shuffle_v8i16_67452301:
26; SSE:       # BB#0:
27; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
28; SSE-NEXT:    retq
29;
30; AVX-LABEL: shuffle_v8i16_67452301:
31; AVX:       # BB#0:
32; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
33; AVX-NEXT:    retq
34  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
35  ret <8 x i16> %shuffle
36}
37define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
38; SSE2-LABEL: shuffle_v8i16_456789AB:
39; SSE2:       # BB#0:
40; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
41; SSE2-NEXT:    retq
42;
43; SSSE3-LABEL: shuffle_v8i16_456789AB:
44; SSSE3:       # BB#0:
45; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
46; SSSE3-NEXT:    movdqa %xmm1, %xmm0
47; SSSE3-NEXT:    retq
48;
49; SSE41-LABEL: shuffle_v8i16_456789AB:
50; SSE41:       # BB#0:
51; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
52; SSE41-NEXT:    movdqa %xmm1, %xmm0
53; SSE41-NEXT:    retq
54;
55; AVX-LABEL: shuffle_v8i16_456789AB:
56; AVX:       # BB#0:
57; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
58; AVX-NEXT:    retq
59  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
60  ret <8 x i16> %shuffle
61}
62
63define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
64; SSE-LABEL: shuffle_v8i16_00000000:
65; SSE:       # BB#0:
66; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
67; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
68; SSE-NEXT:    retq
69;
70; AVX1-LABEL: shuffle_v8i16_00000000:
71; AVX1:       # BB#0:
72; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
73; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
74; AVX1-NEXT:    retq
75;
76; AVX2-LABEL: shuffle_v8i16_00000000:
77; AVX2:       # BB#0:
78; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
79; AVX2-NEXT:    retq
80  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
81  ret <8 x i16> %shuffle
82}
83define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
84; SSE-LABEL: shuffle_v8i16_00004444:
85; SSE:       # BB#0:
86; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
87; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
88; SSE-NEXT:    retq
89;
90; AVX-LABEL: shuffle_v8i16_00004444:
91; AVX:       # BB#0:
92; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
93; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
94; AVX-NEXT:    retq
95  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
96  ret <8 x i16> %shuffle
97}
98define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
99; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
100; SSE:       # BB#0:
101; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
102; SSE-NEXT:    retq
103;
104; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
105; AVX:       # BB#0:
106; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
107; AVX-NEXT:    retq
108  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
109  ret <8 x i16> %shuffle
110}
111define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
112; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
113; SSE:       # BB#0:
114; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
115; SSE-NEXT:    retq
116;
117; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
118; AVX:       # BB#0:
119; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
120; AVX-NEXT:    retq
121  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
122  ret <8 x i16> %shuffle
123}
124define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
125; SSE-LABEL: shuffle_v8i16_31206745:
126; SSE:       # BB#0:
127; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
128; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
129; SSE-NEXT:    retq
130;
131; AVX-LABEL: shuffle_v8i16_31206745:
132; AVX:       # BB#0:
133; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
134; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
135; AVX-NEXT:    retq
136  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
137  ret <8 x i16> %shuffle
138}
139define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
140; SSE2-LABEL: shuffle_v8i16_44440000:
141; SSE2:       # BB#0:
142; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
143; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
144; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
145; SSE2-NEXT:    retq
146;
147; SSSE3-LABEL: shuffle_v8i16_44440000:
148; SSSE3:       # BB#0:
149; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
150; SSSE3-NEXT:    retq
151;
152; SSE41-LABEL: shuffle_v8i16_44440000:
153; SSE41:       # BB#0:
154; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
155; SSE41-NEXT:    retq
156;
157; AVX-LABEL: shuffle_v8i16_44440000:
158; AVX:       # BB#0:
159; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
160; AVX-NEXT:    retq
161  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
162  ret <8 x i16> %shuffle
163}
164define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
165; SSE-LABEL: shuffle_v8i16_23016745:
166; SSE:       # BB#0:
167; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
168; SSE-NEXT:    retq
169;
170; AVX-LABEL: shuffle_v8i16_23016745:
171; AVX:       # BB#0:
172; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
173; AVX-NEXT:    retq
174  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
175  ret <8 x i16> %shuffle
176}
177define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
178; SSE-LABEL: shuffle_v8i16_23026745:
179; SSE:       # BB#0:
180; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
181; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
182; SSE-NEXT:    retq
183;
184; AVX-LABEL: shuffle_v8i16_23026745:
185; AVX:       # BB#0:
186; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
187; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
188; AVX-NEXT:    retq
189  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
190  ret <8 x i16> %shuffle
191}
192define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
193; SSE-LABEL: shuffle_v8i16_23016747:
194; SSE:       # BB#0:
195; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
196; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
197; SSE-NEXT:    retq
198;
199; AVX-LABEL: shuffle_v8i16_23016747:
200; AVX:       # BB#0:
201; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
202; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
203; AVX-NEXT:    retq
204  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
205  ret <8 x i16> %shuffle
206}
207define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
208; SSE2-LABEL: shuffle_v8i16_75643120:
209; SSE2:       # BB#0:
210; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
211; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
212; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
213; SSE2-NEXT:    retq
214;
215; SSSE3-LABEL: shuffle_v8i16_75643120:
216; SSSE3:       # BB#0:
217; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
218; SSSE3-NEXT:    retq
219;
220; SSE41-LABEL: shuffle_v8i16_75643120:
221; SSE41:       # BB#0:
222; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
223; SSE41-NEXT:    retq
224;
225; AVX-LABEL: shuffle_v8i16_75643120:
226; AVX:       # BB#0:
227; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
228; AVX-NEXT:    retq
229  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
230  ret <8 x i16> %shuffle
231}
232
233define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
234; SSE2-LABEL: shuffle_v8i16_10545410:
235; SSE2:       # BB#0:
236; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
237; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
238; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
239; SSE2-NEXT:    retq
240;
241; SSSE3-LABEL: shuffle_v8i16_10545410:
242; SSSE3:       # BB#0:
243; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
244; SSSE3-NEXT:    retq
245;
246; SSE41-LABEL: shuffle_v8i16_10545410:
247; SSE41:       # BB#0:
248; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
249; SSE41-NEXT:    retq
250;
251; AVX-LABEL: shuffle_v8i16_10545410:
252; AVX:       # BB#0:
253; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
254; AVX-NEXT:    retq
255  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
256  ret <8 x i16> %shuffle
257}
258define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
259; SSE2-LABEL: shuffle_v8i16_54105410:
260; SSE2:       # BB#0:
261; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
262; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
263; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
264; SSE2-NEXT:    retq
265;
266; SSSE3-LABEL: shuffle_v8i16_54105410:
267; SSSE3:       # BB#0:
268; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
269; SSSE3-NEXT:    retq
270;
271; SSE41-LABEL: shuffle_v8i16_54105410:
272; SSE41:       # BB#0:
273; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
274; SSE41-NEXT:    retq
275;
276; AVX-LABEL: shuffle_v8i16_54105410:
277; AVX:       # BB#0:
278; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
279; AVX-NEXT:    retq
280  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
281  ret <8 x i16> %shuffle
282}
283define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
284; SSE2-LABEL: shuffle_v8i16_54101054:
285; SSE2:       # BB#0:
286; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
287; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
288; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
289; SSE2-NEXT:    retq
290;
291; SSSE3-LABEL: shuffle_v8i16_54101054:
292; SSSE3:       # BB#0:
293; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
294; SSSE3-NEXT:    retq
295;
296; SSE41-LABEL: shuffle_v8i16_54101054:
297; SSE41:       # BB#0:
298; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
299; SSE41-NEXT:    retq
300;
301; AVX-LABEL: shuffle_v8i16_54101054:
302; AVX:       # BB#0:
303; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
304; AVX-NEXT:    retq
305  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
306  ret <8 x i16> %shuffle
307}
308define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
309; SSE2-LABEL: shuffle_v8i16_04400440:
310; SSE2:       # BB#0:
311; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
312; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
313; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
314; SSE2-NEXT:    retq
315;
316; SSSE3-LABEL: shuffle_v8i16_04400440:
317; SSSE3:       # BB#0:
318; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
319; SSSE3-NEXT:    retq
320;
321; SSE41-LABEL: shuffle_v8i16_04400440:
322; SSE41:       # BB#0:
323; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
324; SSE41-NEXT:    retq
325;
326; AVX-LABEL: shuffle_v8i16_04400440:
327; AVX:       # BB#0:
328; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
329; AVX-NEXT:    retq
330  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
331  ret <8 x i16> %shuffle
332}
333define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
334; SSE2-LABEL: shuffle_v8i16_40044004:
335; SSE2:       # BB#0:
336; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
337; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
338; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
339; SSE2-NEXT:    retq
340;
341; SSSE3-LABEL: shuffle_v8i16_40044004:
342; SSSE3:       # BB#0:
343; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
344; SSSE3-NEXT:    retq
345;
346; SSE41-LABEL: shuffle_v8i16_40044004:
347; SSE41:       # BB#0:
348; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
349; SSE41-NEXT:    retq
350;
351; AVX-LABEL: shuffle_v8i16_40044004:
352; AVX:       # BB#0:
353; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
354; AVX-NEXT:    retq
355  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
356  ret <8 x i16> %shuffle
357}
358
359define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
360; SSE2-LABEL: shuffle_v8i16_26405173:
361; SSE2:       # BB#0:
362; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
363; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
364; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
365; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
366; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
367; SSE2-NEXT:    retq
368;
369; SSSE3-LABEL: shuffle_v8i16_26405173:
370; SSSE3:       # BB#0:
371; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
372; SSSE3-NEXT:    retq
373;
374; SSE41-LABEL: shuffle_v8i16_26405173:
375; SSE41:       # BB#0:
376; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
377; SSE41-NEXT:    retq
378;
379; AVX-LABEL: shuffle_v8i16_26405173:
380; AVX:       # BB#0:
381; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
382; AVX-NEXT:    retq
383  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
384  ret <8 x i16> %shuffle
385}
386define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
387; SSE2-LABEL: shuffle_v8i16_20645173:
388; SSE2:       # BB#0:
389; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
390; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
391; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
392; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
393; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
394; SSE2-NEXT:    retq
395;
396; SSSE3-LABEL: shuffle_v8i16_20645173:
397; SSSE3:       # BB#0:
398; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
399; SSSE3-NEXT:    retq
400;
401; SSE41-LABEL: shuffle_v8i16_20645173:
402; SSE41:       # BB#0:
403; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
404; SSE41-NEXT:    retq
405;
406; AVX-LABEL: shuffle_v8i16_20645173:
407; AVX:       # BB#0:
408; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
409; AVX-NEXT:    retq
410  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
411  ret <8 x i16> %shuffle
412}
413define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
414; SSE2-LABEL: shuffle_v8i16_26401375:
415; SSE2:       # BB#0:
416; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
417; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
418; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
419; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
420; SSE2-NEXT:    retq
421;
422; SSSE3-LABEL: shuffle_v8i16_26401375:
423; SSSE3:       # BB#0:
424; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
425; SSSE3-NEXT:    retq
426;
427; SSE41-LABEL: shuffle_v8i16_26401375:
428; SSE41:       # BB#0:
429; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
430; SSE41-NEXT:    retq
431;
432; AVX-LABEL: shuffle_v8i16_26401375:
433; AVX:       # BB#0:
434; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
435; AVX-NEXT:    retq
436  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
437  ret <8 x i16> %shuffle
438}
439
440define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
441; SSE2-LABEL: shuffle_v8i16_66751643:
442; SSE2:       # BB#0:
443; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
444; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
445; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
446; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
447; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
448; SSE2-NEXT:    retq
449;
450; SSSE3-LABEL: shuffle_v8i16_66751643:
451; SSSE3:       # BB#0:
452; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
453; SSSE3-NEXT:    retq
454;
455; SSE41-LABEL: shuffle_v8i16_66751643:
456; SSE41:       # BB#0:
457; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
458; SSE41-NEXT:    retq
459;
460; AVX-LABEL: shuffle_v8i16_66751643:
461; AVX:       # BB#0:
462; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
463; AVX-NEXT:    retq
464  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
465  ret <8 x i16> %shuffle
466}
467
468define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
469; SSE2-LABEL: shuffle_v8i16_60514754:
470; SSE2:       # BB#0:
471; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
472; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
473; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
474; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
475; SSE2-NEXT:    retq
476;
477; SSSE3-LABEL: shuffle_v8i16_60514754:
478; SSSE3:       # BB#0:
479; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
480; SSSE3-NEXT:    retq
481;
482; SSE41-LABEL: shuffle_v8i16_60514754:
483; SSE41:       # BB#0:
484; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
485; SSE41-NEXT:    retq
486;
487; AVX-LABEL: shuffle_v8i16_60514754:
488; AVX:       # BB#0:
489; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
490; AVX-NEXT:    retq
491  %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
492  ret <8 x i16> %shuffle
493}
494
495define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
496; SSE2-LABEL: shuffle_v8i16_00444444:
497; SSE2:       # BB#0:
498; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
499; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
500; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
501; SSE2-NEXT:    retq
502;
503; SSSE3-LABEL: shuffle_v8i16_00444444:
504; SSSE3:       # BB#0:
505; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
506; SSSE3-NEXT:    retq
507;
508; SSE41-LABEL: shuffle_v8i16_00444444:
509; SSE41:       # BB#0:
510; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
511; SSE41-NEXT:    retq
512;
513; AVX-LABEL: shuffle_v8i16_00444444:
514; AVX:       # BB#0:
515; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
516; AVX-NEXT:    retq
517  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
518  ret <8 x i16> %shuffle
519}
520define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
521; SSE2-LABEL: shuffle_v8i16_44004444:
522; SSE2:       # BB#0:
523; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
524; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
525; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
526; SSE2-NEXT:    retq
527;
528; SSSE3-LABEL: shuffle_v8i16_44004444:
529; SSSE3:       # BB#0:
530; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
531; SSSE3-NEXT:    retq
532;
533; SSE41-LABEL: shuffle_v8i16_44004444:
534; SSE41:       # BB#0:
535; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
536; SSE41-NEXT:    retq
537;
538; AVX-LABEL: shuffle_v8i16_44004444:
539; AVX:       # BB#0:
540; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
541; AVX-NEXT:    retq
542  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
543  ret <8 x i16> %shuffle
544}
545define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
546; SSE2-LABEL: shuffle_v8i16_04404444:
547; SSE2:       # BB#0:
548; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
549; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
550; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
551; SSE2-NEXT:    retq
552;
553; SSSE3-LABEL: shuffle_v8i16_04404444:
554; SSSE3:       # BB#0:
555; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
556; SSSE3-NEXT:    retq
557;
558; SSE41-LABEL: shuffle_v8i16_04404444:
559; SSE41:       # BB#0:
560; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
561; SSE41-NEXT:    retq
562;
563; AVX-LABEL: shuffle_v8i16_04404444:
564; AVX:       # BB#0:
565; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
566; AVX-NEXT:    retq
567  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
568  ret <8 x i16> %shuffle
569}
570define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
571; SSE2-LABEL: shuffle_v8i16_04400000:
572; SSE2:       # BB#0:
573; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
574; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
575; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
576; SSE2-NEXT:    retq
577;
578; SSSE3-LABEL: shuffle_v8i16_04400000:
579; SSSE3:       # BB#0:
580; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
581; SSSE3-NEXT:    retq
582;
583; SSE41-LABEL: shuffle_v8i16_04400000:
584; SSE41:       # BB#0:
585; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
586; SSE41-NEXT:    retq
587;
588; AVX-LABEL: shuffle_v8i16_04400000:
589; AVX:       # BB#0:
590; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
591; AVX-NEXT:    retq
592  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
593  ret <8 x i16> %shuffle
594}
595define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
596; SSE-LABEL: shuffle_v8i16_04404567:
597; SSE:       # BB#0:
598; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
599; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
600; SSE-NEXT:    retq
601;
602; AVX-LABEL: shuffle_v8i16_04404567:
603; AVX:       # BB#0:
604; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
605; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
606; AVX-NEXT:    retq
607  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
608  ret <8 x i16> %shuffle
609}
610
611define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
612; SSE2-LABEL: shuffle_v8i16_0X444444:
613; SSE2:       # BB#0:
614; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
615; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
616; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
617; SSE2-NEXT:    retq
618;
619; SSSE3-LABEL: shuffle_v8i16_0X444444:
620; SSSE3:       # BB#0:
621; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
622; SSSE3-NEXT:    retq
623;
624; SSE41-LABEL: shuffle_v8i16_0X444444:
625; SSE41:       # BB#0:
626; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
627; SSE41-NEXT:    retq
628;
629; AVX-LABEL: shuffle_v8i16_0X444444:
630; AVX:       # BB#0:
631; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
632; AVX-NEXT:    retq
633  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
634  ret <8 x i16> %shuffle
635}
636define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
637; SSE2-LABEL: shuffle_v8i16_44X04444:
638; SSE2:       # BB#0:
639; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
640; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
641; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
642; SSE2-NEXT:    retq
643;
644; SSSE3-LABEL: shuffle_v8i16_44X04444:
645; SSSE3:       # BB#0:
646; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
647; SSSE3-NEXT:    retq
648;
649; SSE41-LABEL: shuffle_v8i16_44X04444:
650; SSE41:       # BB#0:
651; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
652; SSE41-NEXT:    retq
653;
654; AVX-LABEL: shuffle_v8i16_44X04444:
655; AVX:       # BB#0:
656; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
657; AVX-NEXT:    retq
658  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
659  ret <8 x i16> %shuffle
660}
661define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
662; SSE2-LABEL: shuffle_v8i16_X4404444:
663; SSE2:       # BB#0:
664; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
665; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
666; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
667; SSE2-NEXT:    retq
668;
669; SSSE3-LABEL: shuffle_v8i16_X4404444:
670; SSSE3:       # BB#0:
671; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
672; SSSE3-NEXT:    retq
673;
674; SSE41-LABEL: shuffle_v8i16_X4404444:
675; SSE41:       # BB#0:
676; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
677; SSE41-NEXT:    retq
678;
679; AVX-LABEL: shuffle_v8i16_X4404444:
680; AVX:       # BB#0:
681; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
682; AVX-NEXT:    retq
683  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
684  ret <8 x i16> %shuffle
685}
686
687define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
688; SSE2-LABEL: shuffle_v8i16_0127XXXX:
689; SSE2:       # BB#0:
690; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
691; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
692; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
693; SSE2-NEXT:    retq
694;
695; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
696; SSSE3:       # BB#0:
697; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
698; SSSE3-NEXT:    retq
699;
700; SSE41-LABEL: shuffle_v8i16_0127XXXX:
701; SSE41:       # BB#0:
702; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
703; SSE41-NEXT:    retq
704;
705; AVX-LABEL: shuffle_v8i16_0127XXXX:
706; AVX:       # BB#0:
707; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
708; AVX-NEXT:    retq
709  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
710  ret <8 x i16> %shuffle
711}
712
713define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
714; SSE2-LABEL: shuffle_v8i16_XXXX4563:
715; SSE2:       # BB#0:
716; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
717; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
718; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
719; SSE2-NEXT:    retq
720;
721; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
722; SSSE3:       # BB#0:
723; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
724; SSSE3-NEXT:    retq
725;
726; SSE41-LABEL: shuffle_v8i16_XXXX4563:
727; SSE41:       # BB#0:
728; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
729; SSE41-NEXT:    retq
730;
731; AVX-LABEL: shuffle_v8i16_XXXX4563:
732; AVX:       # BB#0:
733; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
734; AVX-NEXT:    retq
735  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
736  ret <8 x i16> %shuffle
737}
738
739define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
740; SSE2-LABEL: shuffle_v8i16_4563XXXX:
741; SSE2:       # BB#0:
742; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
743; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
744; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
745; SSE2-NEXT:    retq
746;
747; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
748; SSSE3:       # BB#0:
749; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
750; SSSE3-NEXT:    retq
751;
752; SSE41-LABEL: shuffle_v8i16_4563XXXX:
753; SSE41:       # BB#0:
754; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
755; SSE41-NEXT:    retq
756;
757; AVX-LABEL: shuffle_v8i16_4563XXXX:
758; AVX:       # BB#0:
759; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
760; AVX-NEXT:    retq
761  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
762  ret <8 x i16> %shuffle
763}
764
765define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
766; SSE2-LABEL: shuffle_v8i16_01274563:
767; SSE2:       # BB#0:
768; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
769; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
770; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
771; SSE2-NEXT:    retq
772;
773; SSSE3-LABEL: shuffle_v8i16_01274563:
774; SSSE3:       # BB#0:
775; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
776; SSSE3-NEXT:    retq
777;
778; SSE41-LABEL: shuffle_v8i16_01274563:
779; SSE41:       # BB#0:
780; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
781; SSE41-NEXT:    retq
782;
783; AVX-LABEL: shuffle_v8i16_01274563:
784; AVX:       # BB#0:
785; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
786; AVX-NEXT:    retq
787  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
788  ret <8 x i16> %shuffle
789}
790
791define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
792; SSE2-LABEL: shuffle_v8i16_45630127:
793; SSE2:       # BB#0:
794; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
795; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
796; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
797; SSE2-NEXT:    retq
798;
799; SSSE3-LABEL: shuffle_v8i16_45630127:
800; SSSE3:       # BB#0:
801; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
802; SSSE3-NEXT:    retq
803;
804; SSE41-LABEL: shuffle_v8i16_45630127:
805; SSE41:       # BB#0:
806; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
807; SSE41-NEXT:    retq
808;
809; AVX-LABEL: shuffle_v8i16_45630127:
810; AVX:       # BB#0:
811; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
812; AVX-NEXT:    retq
813  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
814  ret <8 x i16> %shuffle
815}
816
817define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
818; SSE2-LABEL: shuffle_v8i16_37102735:
819; SSE2:       # BB#0:
820; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
821; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
822; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
823; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
824; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
825; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
826; SSE2-NEXT:    retq
827;
828; SSSE3-LABEL: shuffle_v8i16_37102735:
829; SSSE3:       # BB#0:
830; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
831; SSSE3-NEXT:    retq
832;
833; SSE41-LABEL: shuffle_v8i16_37102735:
834; SSE41:       # BB#0:
835; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
836; SSE41-NEXT:    retq
837;
838; AVX-LABEL: shuffle_v8i16_37102735:
839; AVX:       # BB#0:
840; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
841; AVX-NEXT:    retq
842  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
843  ret <8 x i16> %shuffle
844}
845
846define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
847; SSE-LABEL: shuffle_v8i16_08192a3b:
848; SSE:       # BB#0:
849; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
850; SSE-NEXT:    retq
851;
852; AVX-LABEL: shuffle_v8i16_08192a3b:
853; AVX:       # BB#0:
854; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
855; AVX-NEXT:    retq
856  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
857  ret <8 x i16> %shuffle
858}
859
860define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
861; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
862; SSE:       # BB#0:
863; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
864; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
865; SSE-NEXT:    retq
866;
867; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
868; AVX:       # BB#0:
869; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
870; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
871; AVX-NEXT:    retq
872  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
873  ret <8 x i16> %shuffle
874}
875
876define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
877; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
878; SSE:       # BB#0:
879; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
880; SSE-NEXT:    retq
881;
882; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
883; AVX:       # BB#0:
884; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
885; AVX-NEXT:    retq
886  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
887  ret <8 x i16> %shuffle
888}
889
890define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
891; SSE-LABEL: shuffle_v8i16_48596a7b:
892; SSE:       # BB#0:
893; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
894; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
895; SSE-NEXT:    retq
896;
897; AVX-LABEL: shuffle_v8i16_48596a7b:
898; AVX:       # BB#0:
899; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
900; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
901; AVX-NEXT:    retq
902  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
903  ret <8 x i16> %shuffle
904}
905
906define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
907; SSE-LABEL: shuffle_v8i16_08196e7f:
908; SSE:       # BB#0:
909; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
910; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
911; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
912; SSE-NEXT:    retq
913;
914; AVX-LABEL: shuffle_v8i16_08196e7f:
915; AVX:       # BB#0:
916; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
917; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
918; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
919; AVX-NEXT:    retq
920  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
921  ret <8 x i16> %shuffle
922}
923
924define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
925; SSE-LABEL: shuffle_v8i16_0c1d6879:
926; SSE:       # BB#0:
927; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
928; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
929; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
930; SSE-NEXT:    retq
931;
932; AVX-LABEL: shuffle_v8i16_0c1d6879:
933; AVX:       # BB#0:
934; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
935; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
936; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
937; AVX-NEXT:    retq
938  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
939  ret <8 x i16> %shuffle
940}
941
942define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
943; SSE-LABEL: shuffle_v8i16_109832ba:
944; SSE:       # BB#0:
945; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
946; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
947; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
948; SSE-NEXT:    retq
949;
950; AVX-LABEL: shuffle_v8i16_109832ba:
951; AVX:       # BB#0:
952; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
953; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
954; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
955; AVX-NEXT:    retq
956  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
957  ret <8 x i16> %shuffle
958}
959
960define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
961; SSE-LABEL: shuffle_v8i16_8091a2b3:
962; SSE:       # BB#0:
963; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
964; SSE-NEXT:    movdqa %xmm1, %xmm0
965; SSE-NEXT:    retq
966;
967; AVX-LABEL: shuffle_v8i16_8091a2b3:
968; AVX:       # BB#0:
969; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
970; AVX-NEXT:    retq
971  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
972  ret <8 x i16> %shuffle
973}
974define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
975; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
976; SSE:       # BB#0:
977; SSE-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
978; SSE-NEXT:    movdqa %xmm1, %xmm0
979; SSE-NEXT:    retq
980;
981; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
982; AVX:       # BB#0:
983; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
984; AVX-NEXT:    retq
985  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
986  ret <8 x i16> %shuffle
987}
988
989define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
990; SSE-LABEL: shuffle_v8i16_0213cedf:
991; SSE:       # BB#0:
992; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
993; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
994; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
995; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
996; SSE-NEXT:    retq
997;
998; AVX-LABEL: shuffle_v8i16_0213cedf:
999; AVX:       # BB#0:
1000; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1001; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1002; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1003; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1004; AVX-NEXT:    retq
1005  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1006  ret <8 x i16> %shuffle
1007}
1008
1009define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1010; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1011; SSE2:       # BB#0:
1012; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1013; SSE2-NEXT:    pand %xmm2, %xmm0
1014; SSE2-NEXT:    pandn %xmm1, %xmm2
1015; SSE2-NEXT:    por %xmm0, %xmm2
1016; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1017; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1018; SSE2-NEXT:    retq
1019;
1020; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1021; SSSE3:       # BB#0:
1022; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1023; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1024; SSSE3-NEXT:    por %xmm1, %xmm0
1025; SSSE3-NEXT:    retq
1026;
1027; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1028; SSE41:       # BB#0:
1029; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1030; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1031; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1032; SSE41-NEXT:    retq
1033;
1034; AVX-LABEL: shuffle_v8i16_443aXXXX:
1035; AVX:       # BB#0:
1036; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1037; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1038; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1039; AVX-NEXT:    retq
1040  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1041  ret <8 x i16> %shuffle
1042}
1043
1044define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1045; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1046; SSE2:       # BB#0:
1047; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1048; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
1049; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1050; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1051; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1052; SSE2-NEXT:    retq
1053;
1054; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1055; SSSE3:       # BB#0:
1056; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1057; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1058; SSSE3-NEXT:    por %xmm1, %xmm0
1059; SSSE3-NEXT:    retq
1060;
1061; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1062; SSE41:       # BB#0:
1063; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1064; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1065; SSE41-NEXT:    retq
1066;
1067; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1068; AVX1:       # BB#0:
1069; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1070; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1071; AVX1-NEXT:    retq
1072;
1073; AVX2-LABEL: shuffle_v8i16_032dXXXX:
1074; AVX2:       # BB#0:
1075; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1076; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1077; AVX2-NEXT:    retq
1078  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1079  ret <8 x i16> %shuffle
1080}
1081define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1082; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1083; SSE:       # BB#0:
1084; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1085; SSE-NEXT:    retq
1086;
1087; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1088; AVX:       # BB#0:
1089; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1090; AVX-NEXT:    retq
1091  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1092  ret <8 x i16> %shuffle
1093}
1094
1095define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1096; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1097; SSE2:       # BB#0:
1098; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1099; SSE2-NEXT:    pand %xmm2, %xmm0
1100; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1101; SSE2-NEXT:    pandn %xmm1, %xmm2
1102; SSE2-NEXT:    por %xmm2, %xmm0
1103; SSE2-NEXT:    retq
1104;
1105; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1106; SSSE3:       # BB#0:
1107; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1108; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1109; SSSE3-NEXT:    por %xmm1, %xmm0
1110; SSSE3-NEXT:    retq
1111;
1112; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1113; SSE41:       # BB#0:
1114; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1115; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1116; SSE41-NEXT:    retq
1117;
1118; AVX-LABEL: shuffle_v8i16_012dXXXX:
1119; AVX:       # BB#0:
1120; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1121; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1122; AVX-NEXT:    retq
1123  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1124  ret <8 x i16> %shuffle
1125}
1126
1127define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1128; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1129; SSE2:       # BB#0:
1130; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1131; SSE2-NEXT:    pand %xmm2, %xmm1
1132; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1133; SSE2-NEXT:    pandn %xmm0, %xmm2
1134; SSE2-NEXT:    por %xmm1, %xmm2
1135; SSE2-NEXT:    movdqa %xmm2, %xmm0
1136; SSE2-NEXT:    retq
1137;
1138; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1139; SSSE3:       # BB#0:
1140; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1141; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1142; SSSE3-NEXT:    por %xmm1, %xmm0
1143; SSSE3-NEXT:    retq
1144;
1145; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1146; SSE41:       # BB#0:
1147; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1148; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1149; SSE41-NEXT:    retq
1150;
1151; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1152; AVX1:       # BB#0:
1153; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1154; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1155; AVX1-NEXT:    retq
1156;
1157; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
1158; AVX2:       # BB#0:
1159; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
1160; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1161; AVX2-NEXT:    retq
1162  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1163  ret <8 x i16> %shuffle
1164}
1165
1166define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1167; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1168; SSE2:       # BB#0:
1169; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1170; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1171; SSE2-NEXT:    pand %xmm2, %xmm1
1172; SSE2-NEXT:    pandn %xmm0, %xmm2
1173; SSE2-NEXT:    por %xmm1, %xmm2
1174; SSE2-NEXT:    movdqa %xmm2, %xmm0
1175; SSE2-NEXT:    retq
1176;
1177; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1178; SSSE3:       # BB#0:
1179; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1180; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1181; SSSE3-NEXT:    por %xmm1, %xmm0
1182; SSSE3-NEXT:    retq
1183;
1184; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1185; SSE41:       # BB#0:
1186; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1187; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1188; SSE41-NEXT:    retq
1189;
1190; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1191; AVX:       # BB#0:
1192; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1193; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1194; AVX-NEXT:    retq
1195  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1196  ret <8 x i16> %shuffle
1197}
1198
1199define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1200; SSE2-LABEL: shuffle_v8i16_012dcde3:
1201; SSE2:       # BB#0:
1202; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1203; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1]
1204; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1205; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1206; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1207; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1208; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1209; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1210; SSE2-NEXT:    retq
1211;
1212; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1213; SSSE3:       # BB#0:
1214; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1215; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1216; SSSE3-NEXT:    por %xmm1, %xmm0
1217; SSSE3-NEXT:    retq
1218;
1219; SSE41-LABEL: shuffle_v8i16_012dcde3:
1220; SSE41:       # BB#0:
1221; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1222; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1223; SSE41-NEXT:    retq
1224;
1225; AVX1-LABEL: shuffle_v8i16_012dcde3:
1226; AVX1:       # BB#0:
1227; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1228; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1229; AVX1-NEXT:    retq
1230;
1231; AVX2-LABEL: shuffle_v8i16_012dcde3:
1232; AVX2:       # BB#0:
1233; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1234; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1235; AVX2-NEXT:    retq
1236  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1237  ret <8 x i16> %shuffle
1238}
1239
1240define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1241; SSE2-LABEL: shuffle_v8i16_0923cde7:
1242; SSE2:       # BB#0:
1243; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1244; SSE2-NEXT:    andps %xmm2, %xmm0
1245; SSE2-NEXT:    andnps %xmm1, %xmm2
1246; SSE2-NEXT:    orps %xmm2, %xmm0
1247; SSE2-NEXT:    retq
1248;
1249; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1250; SSSE3:       # BB#0:
1251; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1252; SSSE3-NEXT:    andps %xmm2, %xmm0
1253; SSSE3-NEXT:    andnps %xmm1, %xmm2
1254; SSSE3-NEXT:    orps %xmm2, %xmm0
1255; SSSE3-NEXT:    retq
1256;
1257; SSE41-LABEL: shuffle_v8i16_0923cde7:
1258; SSE41:       # BB#0:
1259; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1260; SSE41-NEXT:    retq
1261;
1262; AVX-LABEL: shuffle_v8i16_0923cde7:
1263; AVX:       # BB#0:
1264; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1265; AVX-NEXT:    retq
1266  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1267  ret <8 x i16> %shuffle
1268}
1269
1270define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1271; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1272; SSE2:       # BB#0:
1273; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1274; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1275; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1276; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1277; SSE2-NEXT:    pand %xmm1, %xmm0
1278; SSE2-NEXT:    pandn %xmm2, %xmm1
1279; SSE2-NEXT:    por %xmm0, %xmm1
1280; SSE2-NEXT:    movdqa %xmm1, %xmm0
1281; SSE2-NEXT:    retq
1282;
1283; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1284; SSSE3:       # BB#0:
1285; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1286; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1287; SSSE3-NEXT:    por %xmm1, %xmm0
1288; SSSE3-NEXT:    retq
1289;
1290; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1291; SSE41:       # BB#0:
1292; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1293; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1294; SSE41-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1295; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1296; SSE41-NEXT:    retq
1297;
1298; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1299; AVX1:       # BB#0:
1300; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1301; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1302; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1303; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1304; AVX1-NEXT:    retq
1305;
1306; AVX2-LABEL: shuffle_v8i16_XXX1X579:
1307; AVX2:       # BB#0:
1308; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
1309; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1310; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1311; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1312; AVX2-NEXT:    retq
1313  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1314  ret <8 x i16> %shuffle
1315}
1316
1317define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1318; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1319; SSE2:       # BB#0:
1320; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
1321; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
1322; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
1323; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
1324; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1325; SSE2-NEXT:    retq
1326;
1327; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1328; SSSE3:       # BB#0:
1329; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1330; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1331; SSSE3-NEXT:    por %xmm1, %xmm0
1332; SSSE3-NEXT:    retq
1333;
1334; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1335; SSE41:       # BB#0:
1336; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1337; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1338; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1339; SSE41-NEXT:    retq
1340;
1341; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1342; AVX1:       # BB#0:
1343; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1344; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1345; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1346; AVX1-NEXT:    retq
1347;
1348; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
1349; AVX2:       # BB#0:
1350; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1351; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1352; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1353; AVX2-NEXT:    retq
1354  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1355  ret <8 x i16> %shuffle
1356}
1357
1358define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1359; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1360; SSE:       # BB#0:
1361; SSE-NEXT:    movzwl %di, %eax
1362; SSE-NEXT:    movd %eax, %xmm0
1363; SSE-NEXT:    retq
1364;
1365; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1366; AVX:       # BB#0:
1367; AVX-NEXT:    movzwl %di, %eax
1368; AVX-NEXT:    vmovd %eax, %xmm0
1369; AVX-NEXT:    retq
1370  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1371  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1372  ret <8 x i16> %shuffle
1373}
1374
1375define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1376; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1377; SSE:       # BB#0:
1378; SSE-NEXT:    pxor %xmm0, %xmm0
1379; SSE-NEXT:    pinsrw $1, %edi, %xmm0
1380; SSE-NEXT:    retq
1381;
1382; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1383; AVX:       # BB#0:
1384; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1385; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
1386; AVX-NEXT:    retq
1387  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1388  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1389  ret <8 x i16> %shuffle
1390}
1391
1392define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1393; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1394; SSE:       # BB#0:
1395; SSE-NEXT:    pxor %xmm0, %xmm0
1396; SSE-NEXT:    pinsrw $5, %edi, %xmm0
1397; SSE-NEXT:    retq
1398;
1399; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1400; AVX:       # BB#0:
1401; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1402; AVX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
1403; AVX-NEXT:    retq
1404  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1405  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1406  ret <8 x i16> %shuffle
1407}
1408
1409define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1410; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1411; SSE:       # BB#0:
1412; SSE-NEXT:    pxor %xmm0, %xmm0
1413; SSE-NEXT:    pinsrw $7, %edi, %xmm0
1414; SSE-NEXT:    retq
1415;
1416; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1417; AVX:       # BB#0:
1418; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1419; AVX-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0
1420; AVX-NEXT:    retq
1421  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1422  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1423  ret <8 x i16> %shuffle
1424}
1425
1426define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1427; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1428; SSE:       # BB#0:
1429; SSE-NEXT:    pxor %xmm0, %xmm0
1430; SSE-NEXT:    pinsrw $2, %edi, %xmm0
1431; SSE-NEXT:    retq
1432;
1433; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1434; AVX:       # BB#0:
1435; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1436; AVX-NEXT:    vpinsrw $2, %edi, %xmm0, %xmm0
1437; AVX-NEXT:    retq
1438  %a = insertelement <8 x i16> undef, i16 %i, i32 3
1439  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1440  ret <8 x i16> %shuffle
1441}
1442
1443define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1444; SSE2-LABEL: shuffle_v8i16_def01234:
1445; SSE2:       # BB#0:
1446; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1447; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1448; SSE2-NEXT:    por %xmm1, %xmm0
1449; SSE2-NEXT:    retq
1450;
1451; SSSE3-LABEL: shuffle_v8i16_def01234:
1452; SSSE3:       # BB#0:
1453; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1454; SSSE3-NEXT:    retq
1455;
1456; SSE41-LABEL: shuffle_v8i16_def01234:
1457; SSE41:       # BB#0:
1458; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1459; SSE41-NEXT:    retq
1460;
1461; AVX-LABEL: shuffle_v8i16_def01234:
1462; AVX:       # BB#0:
1463; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1464; AVX-NEXT:    retq
1465  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1466  ret <8 x i16> %shuffle
1467}
1468
1469define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1470; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1471; SSE2:       # BB#0:
1472; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1473; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1474; SSE2-NEXT:    por %xmm1, %xmm0
1475; SSE2-NEXT:    retq
1476;
1477; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1478; SSSE3:       # BB#0:
1479; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1480; SSSE3-NEXT:    retq
1481;
1482; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1483; SSE41:       # BB#0:
1484; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1485; SSE41-NEXT:    retq
1486;
1487; AVX-LABEL: shuffle_v8i16_ueuu123u:
1488; AVX:       # BB#0:
1489; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1490; AVX-NEXT:    retq
1491  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1492  ret <8 x i16> %shuffle
1493}
1494
1495define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1496; SSE2-LABEL: shuffle_v8i16_56701234:
1497; SSE2:       # BB#0:
1498; SSE2-NEXT:    movdqa %xmm0, %xmm1
1499; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1500; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1501; SSE2-NEXT:    por %xmm1, %xmm0
1502; SSE2-NEXT:    retq
1503;
1504; SSSE3-LABEL: shuffle_v8i16_56701234:
1505; SSSE3:       # BB#0:
1506; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1507; SSSE3-NEXT:    retq
1508;
1509; SSE41-LABEL: shuffle_v8i16_56701234:
1510; SSE41:       # BB#0:
1511; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1512; SSE41-NEXT:    retq
1513;
1514; AVX-LABEL: shuffle_v8i16_56701234:
1515; AVX:       # BB#0:
1516; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1517; AVX-NEXT:    retq
1518  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1519  ret <8 x i16> %shuffle
1520}
1521
1522define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1523; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1524; SSE2:       # BB#0:
1525; SSE2-NEXT:    movdqa %xmm0, %xmm1
1526; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1527; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1528; SSE2-NEXT:    por %xmm1, %xmm0
1529; SSE2-NEXT:    retq
1530;
1531; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1532; SSSE3:       # BB#0:
1533; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1534; SSSE3-NEXT:    retq
1535;
1536; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1537; SSE41:       # BB#0:
1538; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1539; SSE41-NEXT:    retq
1540;
1541; AVX-LABEL: shuffle_v8i16_u6uu123u:
1542; AVX:       # BB#0:
1543; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1544; AVX-NEXT:    retq
1545  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1546  ret <8 x i16> %shuffle
1547}
1548
1549define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1550; SSE-LABEL: shuffle_v8i16_uuuu123u:
1551; SSE:       # BB#0:
1552; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1553; SSE-NEXT:    retq
1554;
1555; AVX-LABEL: shuffle_v8i16_uuuu123u:
1556; AVX:       # BB#0:
1557; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1558; AVX-NEXT:    retq
1559  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1560  ret <8 x i16> %shuffle
1561}
1562
1563define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1564; SSE2-LABEL: shuffle_v8i16_bcdef012:
1565; SSE2:       # BB#0:
1566; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1567; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1568; SSE2-NEXT:    por %xmm1, %xmm0
1569; SSE2-NEXT:    retq
1570;
1571; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1572; SSSE3:       # BB#0:
1573; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1574; SSSE3-NEXT:    retq
1575;
1576; SSE41-LABEL: shuffle_v8i16_bcdef012:
1577; SSE41:       # BB#0:
1578; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1579; SSE41-NEXT:    retq
1580;
1581; AVX-LABEL: shuffle_v8i16_bcdef012:
1582; AVX:       # BB#0:
1583; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1584; AVX-NEXT:    retq
1585  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1586  ret <8 x i16> %shuffle
1587}
1588
1589define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1590; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1591; SSE2:       # BB#0:
1592; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1593; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1594; SSE2-NEXT:    por %xmm1, %xmm0
1595; SSE2-NEXT:    retq
1596;
1597; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1598; SSSE3:       # BB#0:
1599; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1600; SSSE3-NEXT:    retq
1601;
1602; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1603; SSE41:       # BB#0:
1604; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1605; SSE41-NEXT:    retq
1606;
1607; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1608; AVX:       # BB#0:
1609; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1610; AVX-NEXT:    retq
1611  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1612  ret <8 x i16> %shuffle
1613}
1614
1615define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1616; SSE2-LABEL: shuffle_v8i16_34567012:
1617; SSE2:       # BB#0:
1618; SSE2-NEXT:    movdqa %xmm0, %xmm1
1619; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1620; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1621; SSE2-NEXT:    por %xmm1, %xmm0
1622; SSE2-NEXT:    retq
1623;
1624; SSSE3-LABEL: shuffle_v8i16_34567012:
1625; SSSE3:       # BB#0:
1626; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1627; SSSE3-NEXT:    retq
1628;
1629; SSE41-LABEL: shuffle_v8i16_34567012:
1630; SSE41:       # BB#0:
1631; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1632; SSE41-NEXT:    retq
1633;
1634; AVX-LABEL: shuffle_v8i16_34567012:
1635; AVX:       # BB#0:
1636; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1637; AVX-NEXT:    retq
1638  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1639  ret <8 x i16> %shuffle
1640}
1641
1642define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1643; SSE2-LABEL: shuffle_v8i16_u456uu1u:
1644; SSE2:       # BB#0:
1645; SSE2-NEXT:    movdqa %xmm0, %xmm1
1646; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1647; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1648; SSE2-NEXT:    por %xmm1, %xmm0
1649; SSE2-NEXT:    retq
1650;
1651; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
1652; SSSE3:       # BB#0:
1653; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1654; SSSE3-NEXT:    retq
1655;
1656; SSE41-LABEL: shuffle_v8i16_u456uu1u:
1657; SSE41:       # BB#0:
1658; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1659; SSE41-NEXT:    retq
1660;
1661; AVX-LABEL: shuffle_v8i16_u456uu1u:
1662; AVX:       # BB#0:
1663; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1664; AVX-NEXT:    retq
1665  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
1666  ret <8 x i16> %shuffle
1667}
1668
1669define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
1670; SSE-LABEL: shuffle_v8i16_u456uuuu:
1671; SSE:       # BB#0:
1672; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1673; SSE-NEXT:    retq
1674;
1675; AVX-LABEL: shuffle_v8i16_u456uuuu:
1676; AVX:       # BB#0:
1677; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1678; AVX-NEXT:    retq
1679  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1680  ret <8 x i16> %shuffle
1681}
1682
1683define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
1684; SSE2-LABEL: shuffle_v8i16_3456789a:
1685; SSE2:       # BB#0:
1686; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1687; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1688; SSE2-NEXT:    por %xmm1, %xmm0
1689; SSE2-NEXT:    retq
1690;
1691; SSSE3-LABEL: shuffle_v8i16_3456789a:
1692; SSSE3:       # BB#0:
1693; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1694; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1695; SSSE3-NEXT:    retq
1696;
1697; SSE41-LABEL: shuffle_v8i16_3456789a:
1698; SSE41:       # BB#0:
1699; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1700; SSE41-NEXT:    movdqa %xmm1, %xmm0
1701; SSE41-NEXT:    retq
1702;
1703; AVX-LABEL: shuffle_v8i16_3456789a:
1704; AVX:       # BB#0:
1705; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1706; AVX-NEXT:    retq
1707  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
1708  ret <8 x i16> %shuffle
1709}
1710
1711define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
1712; SSE2-LABEL: shuffle_v8i16_u456uu9u:
1713; SSE2:       # BB#0:
1714; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1715; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1716; SSE2-NEXT:    por %xmm1, %xmm0
1717; SSE2-NEXT:    retq
1718;
1719; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
1720; SSSE3:       # BB#0:
1721; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1722; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1723; SSSE3-NEXT:    retq
1724;
1725; SSE41-LABEL: shuffle_v8i16_u456uu9u:
1726; SSE41:       # BB#0:
1727; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1728; SSE41-NEXT:    movdqa %xmm1, %xmm0
1729; SSE41-NEXT:    retq
1730;
1731; AVX-LABEL: shuffle_v8i16_u456uu9u:
1732; AVX:       # BB#0:
1733; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1734; AVX-NEXT:    retq
1735  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
1736  ret <8 x i16> %shuffle
1737}
1738
1739define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
1740; SSE2-LABEL: shuffle_v8i16_56789abc:
1741; SSE2:       # BB#0:
1742; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1743; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1744; SSE2-NEXT:    por %xmm1, %xmm0
1745; SSE2-NEXT:    retq
1746;
1747; SSSE3-LABEL: shuffle_v8i16_56789abc:
1748; SSSE3:       # BB#0:
1749; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1750; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1751; SSSE3-NEXT:    retq
1752;
1753; SSE41-LABEL: shuffle_v8i16_56789abc:
1754; SSE41:       # BB#0:
1755; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1756; SSE41-NEXT:    movdqa %xmm1, %xmm0
1757; SSE41-NEXT:    retq
1758;
1759; AVX-LABEL: shuffle_v8i16_56789abc:
1760; AVX:       # BB#0:
1761; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1762; AVX-NEXT:    retq
1763  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1764  ret <8 x i16> %shuffle
1765}
1766
1767define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
1768; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
1769; SSE2:       # BB#0:
1770; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1771; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1772; SSE2-NEXT:    por %xmm1, %xmm0
1773; SSE2-NEXT:    retq
1774;
1775; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
1776; SSSE3:       # BB#0:
1777; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1778; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1779; SSSE3-NEXT:    retq
1780;
1781; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
1782; SSE41:       # BB#0:
1783; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1784; SSE41-NEXT:    movdqa %xmm1, %xmm0
1785; SSE41-NEXT:    retq
1786;
1787; AVX-LABEL: shuffle_v8i16_u6uu9abu:
1788; AVX:       # BB#0:
1789; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1790; AVX-NEXT:    retq
1791  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
1792  ret <8 x i16> %shuffle
1793}
1794
1795define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
1796; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
1797; SSE2:       # BB#0:
1798; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1799; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1800; SSE2-NEXT:    retq
1801;
1802; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
1803; SSSE3:       # BB#0:
1804; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1805; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1806; SSSE3-NEXT:    retq
1807;
1808; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
1809; SSE41:       # BB#0:
1810; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1811; SSE41-NEXT:    retq
1812;
1813; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
1814; AVX:       # BB#0:
1815; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1816; AVX-NEXT:    retq
1817  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
1818  ret <8 x i16> %shuffle
1819}
1820
1821define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
1822; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
1823; SSE2:       # BB#0:
1824; SSE2-NEXT:    pxor %xmm1, %xmm1
1825; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1826; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1827; SSE2-NEXT:    retq
1828;
1829; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
1830; SSSE3:       # BB#0:
1831; SSSE3-NEXT:    pxor %xmm1, %xmm1
1832; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1833; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1834; SSSE3-NEXT:    retq
1835;
1836; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
1837; SSE41:       # BB#0:
1838; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1839; SSE41-NEXT:    retq
1840;
1841; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
1842; AVX:       # BB#0:
1843; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1844; AVX-NEXT:    retq
1845  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1846  ret <8 x i16> %shuffle
1847}
1848
1849define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
1850; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
1851; SSE2:       # BB#0:
1852; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1853; SSE2-NEXT:    retq
1854;
1855; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
1856; SSSE3:       # BB#0:
1857; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1858; SSSE3-NEXT:    retq
1859;
1860; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
1861; SSE41:       # BB#0:
1862; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1863; SSE41-NEXT:    retq
1864;
1865; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
1866; AVX:       # BB#0:
1867; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1868; AVX-NEXT:    retq
1869  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
1870  ret <8 x i16> %shuffle
1871}
1872
1873define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
1874; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
1875; SSE2:       # BB#0:
1876; SSE2-NEXT:    pxor %xmm1, %xmm1
1877; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1878; SSE2-NEXT:    retq
1879;
1880; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
1881; SSSE3:       # BB#0:
1882; SSSE3-NEXT:    pxor %xmm1, %xmm1
1883; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1884; SSSE3-NEXT:    retq
1885;
1886; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
1887; SSE41:       # BB#0:
1888; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1889; SSE41-NEXT:    retq
1890;
1891; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
1892; AVX:       # BB#0:
1893; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1894; AVX-NEXT:    retq
1895  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1896  ret <8 x i16> %shuffle
1897}
1898
1899;
1900; Shuffle to logical bit shifts
1901;
1902define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
1903; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
1904; SSE:       # BB#0:
1905; SSE-NEXT:    pslld $16, %xmm0
1906; SSE-NEXT:    retq
1907;
1908; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
1909; AVX:       # BB#0:
1910; AVX-NEXT:    vpslld $16, %xmm0, %xmm0
1911; AVX-NEXT:    retq
1912  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
1913  ret <8 x i16> %shuffle
1914}
1915
1916define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
1917; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
1918; SSE:       # BB#0:
1919; SSE-NEXT:    psllq $48, %xmm0
1920; SSE-NEXT:    retq
1921;
1922; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
1923; AVX:       # BB#0:
1924; AVX-NEXT:    vpsllq $48, %xmm0, %xmm0
1925; AVX-NEXT:    retq
1926  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
1927  ret <8 x i16> %shuffle
1928}
1929
1930define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
1931; SSE-LABEL: shuffle_v8i16_zz01zX4X:
1932; SSE:       # BB#0:
1933; SSE-NEXT:    psllq $32, %xmm0
1934; SSE-NEXT:    retq
1935;
1936; AVX-LABEL: shuffle_v8i16_zz01zX4X:
1937; AVX:       # BB#0:
1938; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
1939; AVX-NEXT:    retq
1940  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
1941  ret <8 x i16> %shuffle
1942}
1943
1944define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
1945; SSE-LABEL: shuffle_v8i16_z0X2z456:
1946; SSE:       # BB#0:
1947; SSE-NEXT:    psllq $16, %xmm0
1948; SSE-NEXT:    retq
1949;
1950; AVX-LABEL: shuffle_v8i16_z0X2z456:
1951; AVX:       # BB#0:
1952; AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
1953; AVX-NEXT:    retq
1954  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
1955  ret <8 x i16> %shuffle
1956}
1957
1958define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
1959; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
1960; SSE:       # BB#0:
1961; SSE-NEXT:    psrld $16, %xmm0
1962; SSE-NEXT:    retq
1963;
1964; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
1965; AVX:       # BB#0:
1966; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
1967; AVX-NEXT:    retq
1968  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
1969  ret <8 x i16> %shuffle
1970}
1971
1972define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
1973; SSE-LABEL: shuffle_v8i16_1X3z567z:
1974; SSE:       # BB#0:
1975; SSE-NEXT:    psrlq $16, %xmm0
1976; SSE-NEXT:    retq
1977;
1978; AVX-LABEL: shuffle_v8i16_1X3z567z:
1979; AVX:       # BB#0:
1980; AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
1981; AVX-NEXT:    retq
1982  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
1983  ret <8 x i16> %shuffle
1984}
1985
1986define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
1987; SSE-LABEL: shuffle_v8i16_23zz67zz:
1988; SSE:       # BB#0:
1989; SSE-NEXT:    psrlq $32, %xmm0
1990; SSE-NEXT:    retq
1991;
1992; AVX-LABEL: shuffle_v8i16_23zz67zz:
1993; AVX:       # BB#0:
1994; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
1995; AVX-NEXT:    retq
1996  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
1997  ret <8 x i16> %shuffle
1998}
1999
2000define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2001; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2002; SSE:       # BB#0:
2003; SSE-NEXT:    psrlq $48, %xmm0
2004; SSE-NEXT:    retq
2005;
2006; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2007; AVX:       # BB#0:
2008; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
2009; AVX-NEXT:    retq
2010  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2011  ret <8 x i16> %shuffle
2012}
2013
2014define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2015; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2016; SSE:       # BB#0:
2017; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
2018; SSE-NEXT:    retq
2019;
2020; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2021; AVX:       # BB#0:
2022; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2023; AVX-NEXT:    retq
2024  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2025  ret <8 x i16> %shuffle
2026}
2027
2028define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2029; SSE2-LABEL: shuffle_v8i16_0z234567:
2030; SSE2:       # BB#0:
2031; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2032; SSE2-NEXT:    retq
2033;
2034; SSSE3-LABEL: shuffle_v8i16_0z234567:
2035; SSSE3:       # BB#0:
2036; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2037; SSSE3-NEXT:    retq
2038;
2039; SSE41-LABEL: shuffle_v8i16_0z234567:
2040; SSE41:       # BB#0:
2041; SSE41-NEXT:    pxor %xmm1, %xmm1
2042; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2043; SSE41-NEXT:    retq
2044;
2045; AVX-LABEL: shuffle_v8i16_0z234567:
2046; AVX:       # BB#0:
2047; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2048; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2049; AVX-NEXT:    retq
2050  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2051  ret <8 x i16> %shuffle
2052}
2053
2054define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2055; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2056; SSE2:       # BB#0:
2057; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2058; SSE2-NEXT:    retq
2059;
2060; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2061; SSSE3:       # BB#0:
2062; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2063; SSSE3-NEXT:    retq
2064;
2065; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2066; SSE41:       # BB#0:
2067; SSE41-NEXT:    pxor %xmm1, %xmm1
2068; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2069; SSE41-NEXT:    retq
2070;
2071; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2072; AVX:       # BB#0:
2073; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2074; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2075; AVX-NEXT:    retq
2076  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2077  ret <8 x i16> %shuffle
2078}
2079
2080define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2081; SSE2-LABEL: shuffle_v8i16_0123456z:
2082; SSE2:       # BB#0:
2083; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2084; SSE2-NEXT:    retq
2085;
2086; SSSE3-LABEL: shuffle_v8i16_0123456z:
2087; SSSE3:       # BB#0:
2088; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2089; SSSE3-NEXT:    retq
2090;
2091; SSE41-LABEL: shuffle_v8i16_0123456z:
2092; SSE41:       # BB#0:
2093; SSE41-NEXT:    pxor %xmm1, %xmm1
2094; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2095; SSE41-NEXT:    retq
2096;
2097; AVX-LABEL: shuffle_v8i16_0123456z:
2098; AVX:       # BB#0:
2099; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2100; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2101; AVX-NEXT:    retq
2102  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2103  ret <8 x i16> %shuffle
2104}
2105
2106define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2107; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2108; SSE:       # BB#0:
2109; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2110; SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2111; SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2112; SSE-NEXT:    movdqa %xmm1, %xmm0
2113; SSE-NEXT:    retq
2114;
2115; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
2116; AVX:       # BB#0:
2117; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2118; AVX-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2119; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2120; AVX-NEXT:    retq
2121  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2122  ret <8 x i16> %shuffle
2123}
2124
2125define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2126; SSE-LABEL: shuffle_v8i16_8012345u:
2127; SSE:       # BB#0:
2128; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2129; SSE-NEXT:    retq
2130;
2131; AVX-LABEL: shuffle_v8i16_8012345u:
2132; AVX:       # BB#0:
2133; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2134; AVX-NEXT:    retq
2135  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2136
2137  ret <8 x i16> %shuffle
2138}
2139
2140define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
2141; SSE2-LABEL: mask_v8i16_012345ef:
2142; SSE2:       # BB#0:
2143; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2144; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2145; SSE2-NEXT:    movaps %xmm1, %xmm0
2146; SSE2-NEXT:    retq
2147;
2148; SSSE3-LABEL: mask_v8i16_012345ef:
2149; SSSE3:       # BB#0:
2150; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2151; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2152; SSSE3-NEXT:    movaps %xmm1, %xmm0
2153; SSSE3-NEXT:    retq
2154;
2155; SSE41-LABEL: mask_v8i16_012345ef:
2156; SSE41:       # BB#0:
2157; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
2158; SSE41-NEXT:    retq
2159;
2160; AVX1-LABEL: mask_v8i16_012345ef:
2161; AVX1:       # BB#0:
2162; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
2163; AVX1-NEXT:    retq
2164;
2165; AVX2-LABEL: mask_v8i16_012345ef:
2166; AVX2:       # BB#0:
2167; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2168; AVX2-NEXT:    retq
2169  %1 = bitcast <8 x i16> %a to <2 x i64>
2170  %2 = bitcast <8 x i16> %b to <2 x i64>
2171  %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
2172  %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
2173  %5 = or <2 x i64> %4, %3
2174  %6 = bitcast <2 x i64> %5 to <8 x i16>
2175  ret <8 x i16> %6
2176}
2177
2178define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
2179; SSE2-LABEL: insert_dup_mem_v8i16_i32:
2180; SSE2:       # BB#0:
2181; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2182; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2183; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2184; SSE2-NEXT:    retq
2185;
2186; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
2187; SSSE3:       # BB#0:
2188; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2189; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2190; SSSE3-NEXT:    retq
2191;
2192; SSE41-LABEL: insert_dup_mem_v8i16_i32:
2193; SSE41:       # BB#0:
2194; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2195; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2196; SSE41-NEXT:    retq
2197;
2198; AVX1-LABEL: insert_dup_mem_v8i16_i32:
2199; AVX1:       # BB#0:
2200; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2201; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2202; AVX1-NEXT:    retq
2203;
2204; AVX2-LABEL: insert_dup_mem_v8i16_i32:
2205; AVX2:       # BB#0:
2206; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
2207; AVX2-NEXT:    retq
2208  %tmp = load i32, i32* %ptr, align 4
2209  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2210  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2211  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
2212  ret <8 x i16> %tmp3
2213}
2214
2215define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
2216; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16:
2217; SSE2:       # BB#0:
2218; SSE2-NEXT:    movswl (%rdi), %eax
2219; SSE2-NEXT:    movd %eax, %xmm0
2220; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2221; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2222; SSE2-NEXT:    retq
2223;
2224; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
2225; SSSE3:       # BB#0:
2226; SSSE3-NEXT:    movswl (%rdi), %eax
2227; SSSE3-NEXT:    movd %eax, %xmm0
2228; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2229; SSSE3-NEXT:    retq
2230;
2231; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16:
2232; SSE41:       # BB#0:
2233; SSE41-NEXT:    movswl (%rdi), %eax
2234; SSE41-NEXT:    movd %eax, %xmm0
2235; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2236; SSE41-NEXT:    retq
2237;
2238; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
2239; AVX1:       # BB#0:
2240; AVX1-NEXT:    movswl (%rdi), %eax
2241; AVX1-NEXT:    vmovd %eax, %xmm0
2242; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2243; AVX1-NEXT:    retq
2244;
2245; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
2246; AVX2:       # BB#0:
2247; AVX2-NEXT:    movswl (%rdi), %eax
2248; AVX2-NEXT:    vmovd %eax, %xmm0
2249; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2250; AVX2-NEXT:    retq
2251  %tmp = load i16, i16* %ptr, align 2
2252  %tmp1 = sext i16 %tmp to i32
2253  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2254  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2255  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
2256  ret <8 x i16> %tmp4
2257}
2258
2259define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
2260; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32:
2261; SSE2:       # BB#0:
2262; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2263; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2264; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2265; SSE2-NEXT:    retq
2266;
2267; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32:
2268; SSSE3:       # BB#0:
2269; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2270; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2271; SSSE3-NEXT:    retq
2272;
2273; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32:
2274; SSE41:       # BB#0:
2275; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2276; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2277; SSE41-NEXT:    retq
2278;
2279; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
2280; AVX1:       # BB#0:
2281; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2282; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2283; AVX1-NEXT:    retq
2284;
2285; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
2286; AVX2:       # BB#0:
2287; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0
2288; AVX2-NEXT:    retq
2289  %tmp = load i32, i32* %ptr, align 4
2290  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2291  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2292  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2293  ret <8 x i16> %tmp3
2294}
2295
2296define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
2297; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2298; SSE2:       # BB#0:
2299; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2300; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
2301; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2302; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2303; SSE2-NEXT:    retq
2304;
2305; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
2306; SSSE3:       # BB#0:
2307; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2308; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2309; SSSE3-NEXT:    retq
2310;
2311; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
2312; SSE41:       # BB#0:
2313; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2314; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2315; SSE41-NEXT:    retq
2316;
2317; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
2318; AVX1:       # BB#0:
2319; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2320; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2321; AVX1-NEXT:    retq
2322;
2323; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2324; AVX2:       # BB#0:
2325; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0
2326; AVX2-NEXT:    retq
2327  %tmp = load i32, i32* %ptr, align 4
2328  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
2329  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2330  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2331  ret <8 x i16> %tmp3
2332}
2333
2334define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
2335; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2336; SSE2:       # BB#0:
2337; SSE2-NEXT:    movswl (%rdi), %eax
2338; SSE2-NEXT:    movd %eax, %xmm0
2339; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2340; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2341; SSE2-NEXT:    retq
2342;
2343; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2344; SSSE3:       # BB#0:
2345; SSSE3-NEXT:    movswl (%rdi), %eax
2346; SSSE3-NEXT:    movd %eax, %xmm0
2347; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2348; SSSE3-NEXT:    retq
2349;
2350; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2351; SSE41:       # BB#0:
2352; SSE41-NEXT:    movswl (%rdi), %eax
2353; SSE41-NEXT:    movd %eax, %xmm0
2354; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2355; SSE41-NEXT:    retq
2356;
2357; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2358; AVX1:       # BB#0:
2359; AVX1-NEXT:    movswl (%rdi), %eax
2360; AVX1-NEXT:    vmovd %eax, %xmm0
2361; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2362; AVX1-NEXT:    retq
2363;
2364; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2365; AVX2:       # BB#0:
2366; AVX2-NEXT:    movswl (%rdi), %eax
2367; AVX2-NEXT:    shrl $16, %eax
2368; AVX2-NEXT:    vmovd %eax, %xmm0
2369; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2370; AVX2-NEXT:    retq
2371  %tmp = load i16, i16* %ptr, align 2
2372  %tmp1 = sext i16 %tmp to i32
2373  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2374  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2375  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2376  ret <8 x i16> %tmp4
2377}
2378
2379define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
2380; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2381; SSE2:       # BB#0:
2382; SSE2-NEXT:    movswl (%rdi), %eax
2383; SSE2-NEXT:    movd %eax, %xmm0
2384; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
2385; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2386; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2387; SSE2-NEXT:    retq
2388;
2389; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2390; SSSE3:       # BB#0:
2391; SSSE3-NEXT:    movswl (%rdi), %eax
2392; SSSE3-NEXT:    movd %eax, %xmm0
2393; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2394; SSSE3-NEXT:    retq
2395;
2396; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2397; SSE41:       # BB#0:
2398; SSE41-NEXT:    movswl (%rdi), %eax
2399; SSE41-NEXT:    movd %eax, %xmm0
2400; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2401; SSE41-NEXT:    retq
2402;
2403; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2404; AVX1:       # BB#0:
2405; AVX1-NEXT:    movswl (%rdi), %eax
2406; AVX1-NEXT:    vmovd %eax, %xmm0
2407; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2408; AVX1-NEXT:    retq
2409;
2410; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2411; AVX2:       # BB#0:
2412; AVX2-NEXT:    movswl (%rdi), %eax
2413; AVX2-NEXT:    shrl $16, %eax
2414; AVX2-NEXT:    vmovd %eax, %xmm0
2415; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2416; AVX2-NEXT:    retq
2417  %tmp = load i16, i16* %ptr, align 2
2418  %tmp1 = sext i16 %tmp to i32
2419  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
2420  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2421  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2422  ret <8 x i16> %tmp4
2423}
2424