• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7
8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9target triple = "x86_64-unknown-unknown"
10
11define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
12; SSE-LABEL: shuffle_v8i16_01012323:
13; SSE:       # BB#0:
14; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: shuffle_v8i16_01012323:
18; AVX:       # BB#0:
19; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
20; AVX-NEXT:    retq
21  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
22  ret <8 x i16> %shuffle
23}
24define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
25; SSE-LABEL: shuffle_v8i16_67452301:
26; SSE:       # BB#0:
27; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
28; SSE-NEXT:    retq
29;
30; AVX-LABEL: shuffle_v8i16_67452301:
31; AVX:       # BB#0:
32; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
33; AVX-NEXT:    retq
34  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
35  ret <8 x i16> %shuffle
36}
37define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
38; SSE2-LABEL: shuffle_v8i16_456789AB:
39; SSE2:       # BB#0:
40; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
41; SSE2-NEXT:    retq
42;
43; SSSE3-LABEL: shuffle_v8i16_456789AB:
44; SSSE3:       # BB#0:
45; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
46; SSSE3-NEXT:    movdqa %xmm1, %xmm0
47; SSSE3-NEXT:    retq
48;
49; SSE41-LABEL: shuffle_v8i16_456789AB:
50; SSE41:       # BB#0:
51; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
52; SSE41-NEXT:    movdqa %xmm1, %xmm0
53; SSE41-NEXT:    retq
54;
55; AVX-LABEL: shuffle_v8i16_456789AB:
56; AVX:       # BB#0:
57; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
58; AVX-NEXT:    retq
59  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
60  ret <8 x i16> %shuffle
61}
62
63define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
64; SSE2-LABEL: shuffle_v8i16_00000000:
65; SSE2:       # BB#0:
66; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
67; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
68; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
69; SSE2-NEXT:    retq
70;
71; SSSE3-LABEL: shuffle_v8i16_00000000:
72; SSSE3:       # BB#0:
73; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
74; SSSE3-NEXT:    retq
75;
76; SSE41-LABEL: shuffle_v8i16_00000000:
77; SSE41:       # BB#0:
78; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
79; SSE41-NEXT:    retq
80;
81; AVX1-LABEL: shuffle_v8i16_00000000:
82; AVX1:       # BB#0:
83; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
84; AVX1-NEXT:    retq
85;
86; AVX2-LABEL: shuffle_v8i16_00000000:
87; AVX2:       # BB#0:
88; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
89; AVX2-NEXT:    retq
90  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
91  ret <8 x i16> %shuffle
92}
93define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
94; SSE-LABEL: shuffle_v8i16_00004444:
95; SSE:       # BB#0:
96; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
97; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
98; SSE-NEXT:    retq
99;
100; AVX-LABEL: shuffle_v8i16_00004444:
101; AVX:       # BB#0:
102; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
103; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
104; AVX-NEXT:    retq
105  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
106  ret <8 x i16> %shuffle
107}
108define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
109; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
110; SSE:       # BB#0:
111; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
112; SSE-NEXT:    retq
113;
114; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
115; AVX:       # BB#0:
116; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
117; AVX-NEXT:    retq
118  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
119  ret <8 x i16> %shuffle
120}
121define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
122; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
123; SSE:       # BB#0:
124; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
125; SSE-NEXT:    retq
126;
127; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
128; AVX:       # BB#0:
129; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
130; AVX-NEXT:    retq
131  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
132  ret <8 x i16> %shuffle
133}
134define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
135; SSE-LABEL: shuffle_v8i16_31206745:
136; SSE:       # BB#0:
137; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
138; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
139; SSE-NEXT:    retq
140;
141; AVX-LABEL: shuffle_v8i16_31206745:
142; AVX:       # BB#0:
143; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
144; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
145; AVX-NEXT:    retq
146  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
147  ret <8 x i16> %shuffle
148}
149define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
150; SSE2-LABEL: shuffle_v8i16_44440000:
151; SSE2:       # BB#0:
152; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
153; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
154; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
155; SSE2-NEXT:    retq
156;
157; SSSE3-LABEL: shuffle_v8i16_44440000:
158; SSSE3:       # BB#0:
159; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
160; SSSE3-NEXT:    retq
161;
162; SSE41-LABEL: shuffle_v8i16_44440000:
163; SSE41:       # BB#0:
164; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
165; SSE41-NEXT:    retq
166;
167; AVX-LABEL: shuffle_v8i16_44440000:
168; AVX:       # BB#0:
169; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
170; AVX-NEXT:    retq
171  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
172  ret <8 x i16> %shuffle
173}
174define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
175; SSE-LABEL: shuffle_v8i16_23016745:
176; SSE:       # BB#0:
177; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
178; SSE-NEXT:    retq
179;
180; AVX-LABEL: shuffle_v8i16_23016745:
181; AVX:       # BB#0:
182; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
183; AVX-NEXT:    retq
184  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
185  ret <8 x i16> %shuffle
186}
187define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
188; SSE-LABEL: shuffle_v8i16_23026745:
189; SSE:       # BB#0:
190; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
191; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
192; SSE-NEXT:    retq
193;
194; AVX-LABEL: shuffle_v8i16_23026745:
195; AVX:       # BB#0:
196; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
197; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
198; AVX-NEXT:    retq
199  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
200  ret <8 x i16> %shuffle
201}
202define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
203; SSE-LABEL: shuffle_v8i16_23016747:
204; SSE:       # BB#0:
205; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
206; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
207; SSE-NEXT:    retq
208;
209; AVX-LABEL: shuffle_v8i16_23016747:
210; AVX:       # BB#0:
211; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
212; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
213; AVX-NEXT:    retq
214  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
215  ret <8 x i16> %shuffle
216}
217define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
218; SSE2-LABEL: shuffle_v8i16_75643120:
219; SSE2:       # BB#0:
220; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
221; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
222; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
223; SSE2-NEXT:    retq
224;
225; SSSE3-LABEL: shuffle_v8i16_75643120:
226; SSSE3:       # BB#0:
227; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
228; SSSE3-NEXT:    retq
229;
230; SSE41-LABEL: shuffle_v8i16_75643120:
231; SSE41:       # BB#0:
232; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
233; SSE41-NEXT:    retq
234;
235; AVX-LABEL: shuffle_v8i16_75643120:
236; AVX:       # BB#0:
237; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
238; AVX-NEXT:    retq
239  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
240  ret <8 x i16> %shuffle
241}
242
243define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
244; SSE2-LABEL: shuffle_v8i16_10545410:
245; SSE2:       # BB#0:
246; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
247; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
248; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
249; SSE2-NEXT:    retq
250;
251; SSSE3-LABEL: shuffle_v8i16_10545410:
252; SSSE3:       # BB#0:
253; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
254; SSSE3-NEXT:    retq
255;
256; SSE41-LABEL: shuffle_v8i16_10545410:
257; SSE41:       # BB#0:
258; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
259; SSE41-NEXT:    retq
260;
261; AVX-LABEL: shuffle_v8i16_10545410:
262; AVX:       # BB#0:
263; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
264; AVX-NEXT:    retq
265  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
266  ret <8 x i16> %shuffle
267}
268define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
269; SSE2-LABEL: shuffle_v8i16_54105410:
270; SSE2:       # BB#0:
271; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
272; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
273; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
274; SSE2-NEXT:    retq
275;
276; SSSE3-LABEL: shuffle_v8i16_54105410:
277; SSSE3:       # BB#0:
278; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
279; SSSE3-NEXT:    retq
280;
281; SSE41-LABEL: shuffle_v8i16_54105410:
282; SSE41:       # BB#0:
283; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
284; SSE41-NEXT:    retq
285;
286; AVX-LABEL: shuffle_v8i16_54105410:
287; AVX:       # BB#0:
288; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
289; AVX-NEXT:    retq
290  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
291  ret <8 x i16> %shuffle
292}
293define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
294; SSE2-LABEL: shuffle_v8i16_54101054:
295; SSE2:       # BB#0:
296; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
297; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
298; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
299; SSE2-NEXT:    retq
300;
301; SSSE3-LABEL: shuffle_v8i16_54101054:
302; SSSE3:       # BB#0:
303; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
304; SSSE3-NEXT:    retq
305;
306; SSE41-LABEL: shuffle_v8i16_54101054:
307; SSE41:       # BB#0:
308; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
309; SSE41-NEXT:    retq
310;
311; AVX-LABEL: shuffle_v8i16_54101054:
312; AVX:       # BB#0:
313; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
314; AVX-NEXT:    retq
315  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
316  ret <8 x i16> %shuffle
317}
318define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
319; SSE2-LABEL: shuffle_v8i16_04400440:
320; SSE2:       # BB#0:
321; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
322; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
323; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
324; SSE2-NEXT:    retq
325;
326; SSSE3-LABEL: shuffle_v8i16_04400440:
327; SSSE3:       # BB#0:
328; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
329; SSSE3-NEXT:    retq
330;
331; SSE41-LABEL: shuffle_v8i16_04400440:
332; SSE41:       # BB#0:
333; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
334; SSE41-NEXT:    retq
335;
336; AVX-LABEL: shuffle_v8i16_04400440:
337; AVX:       # BB#0:
338; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
339; AVX-NEXT:    retq
340  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
341  ret <8 x i16> %shuffle
342}
343define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
344; SSE2-LABEL: shuffle_v8i16_40044004:
345; SSE2:       # BB#0:
346; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
347; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
348; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
349; SSE2-NEXT:    retq
350;
351; SSSE3-LABEL: shuffle_v8i16_40044004:
352; SSSE3:       # BB#0:
353; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
354; SSSE3-NEXT:    retq
355;
356; SSE41-LABEL: shuffle_v8i16_40044004:
357; SSE41:       # BB#0:
358; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
359; SSE41-NEXT:    retq
360;
361; AVX-LABEL: shuffle_v8i16_40044004:
362; AVX:       # BB#0:
363; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
364; AVX-NEXT:    retq
365  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
366  ret <8 x i16> %shuffle
367}
368
369define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
370; SSE2-LABEL: shuffle_v8i16_26405173:
371; SSE2:       # BB#0:
372; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
373; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
374; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
375; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
376; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
377; SSE2-NEXT:    retq
378;
379; SSSE3-LABEL: shuffle_v8i16_26405173:
380; SSSE3:       # BB#0:
381; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
382; SSSE3-NEXT:    retq
383;
384; SSE41-LABEL: shuffle_v8i16_26405173:
385; SSE41:       # BB#0:
386; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
387; SSE41-NEXT:    retq
388;
389; AVX-LABEL: shuffle_v8i16_26405173:
390; AVX:       # BB#0:
391; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
392; AVX-NEXT:    retq
393  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
394  ret <8 x i16> %shuffle
395}
396define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
397; SSE2-LABEL: shuffle_v8i16_20645173:
398; SSE2:       # BB#0:
399; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
400; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
401; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
402; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
403; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
404; SSE2-NEXT:    retq
405;
406; SSSE3-LABEL: shuffle_v8i16_20645173:
407; SSSE3:       # BB#0:
408; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
409; SSSE3-NEXT:    retq
410;
411; SSE41-LABEL: shuffle_v8i16_20645173:
412; SSE41:       # BB#0:
413; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
414; SSE41-NEXT:    retq
415;
416; AVX-LABEL: shuffle_v8i16_20645173:
417; AVX:       # BB#0:
418; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
419; AVX-NEXT:    retq
420  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
421  ret <8 x i16> %shuffle
422}
423define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
424; SSE2-LABEL: shuffle_v8i16_26401375:
425; SSE2:       # BB#0:
426; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
427; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
428; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
429; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
430; SSE2-NEXT:    retq
431;
432; SSSE3-LABEL: shuffle_v8i16_26401375:
433; SSSE3:       # BB#0:
434; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
435; SSSE3-NEXT:    retq
436;
437; SSE41-LABEL: shuffle_v8i16_26401375:
438; SSE41:       # BB#0:
439; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
440; SSE41-NEXT:    retq
441;
442; AVX-LABEL: shuffle_v8i16_26401375:
443; AVX:       # BB#0:
444; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
445; AVX-NEXT:    retq
446  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
447  ret <8 x i16> %shuffle
448}
449
450define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
451; SSE2-LABEL: shuffle_v8i16_66751643:
452; SSE2:       # BB#0:
453; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
454; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
455; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
456; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
457; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
458; SSE2-NEXT:    retq
459;
460; SSSE3-LABEL: shuffle_v8i16_66751643:
461; SSSE3:       # BB#0:
462; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
463; SSSE3-NEXT:    retq
464;
465; SSE41-LABEL: shuffle_v8i16_66751643:
466; SSE41:       # BB#0:
467; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
468; SSE41-NEXT:    retq
469;
470; AVX-LABEL: shuffle_v8i16_66751643:
471; AVX:       # BB#0:
472; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
473; AVX-NEXT:    retq
474  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
475  ret <8 x i16> %shuffle
476}
477
478define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
479; SSE2-LABEL: shuffle_v8i16_60514754:
480; SSE2:       # BB#0:
481; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
482; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
483; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
484; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
485; SSE2-NEXT:    retq
486;
487; SSSE3-LABEL: shuffle_v8i16_60514754:
488; SSSE3:       # BB#0:
489; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
490; SSSE3-NEXT:    retq
491;
492; SSE41-LABEL: shuffle_v8i16_60514754:
493; SSE41:       # BB#0:
494; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
495; SSE41-NEXT:    retq
496;
497; AVX-LABEL: shuffle_v8i16_60514754:
498; AVX:       # BB#0:
499; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
500; AVX-NEXT:    retq
501  %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
502  ret <8 x i16> %shuffle
503}
504
505define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
506; SSE2-LABEL: shuffle_v8i16_00444444:
507; SSE2:       # BB#0:
508; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
509; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
510; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
511; SSE2-NEXT:    retq
512;
513; SSSE3-LABEL: shuffle_v8i16_00444444:
514; SSSE3:       # BB#0:
515; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
516; SSSE3-NEXT:    retq
517;
518; SSE41-LABEL: shuffle_v8i16_00444444:
519; SSE41:       # BB#0:
520; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
521; SSE41-NEXT:    retq
522;
523; AVX-LABEL: shuffle_v8i16_00444444:
524; AVX:       # BB#0:
525; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
526; AVX-NEXT:    retq
527  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
528  ret <8 x i16> %shuffle
529}
530define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
531; SSE2-LABEL: shuffle_v8i16_44004444:
532; SSE2:       # BB#0:
533; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
534; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
535; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
536; SSE2-NEXT:    retq
537;
538; SSSE3-LABEL: shuffle_v8i16_44004444:
539; SSSE3:       # BB#0:
540; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
541; SSSE3-NEXT:    retq
542;
543; SSE41-LABEL: shuffle_v8i16_44004444:
544; SSE41:       # BB#0:
545; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
546; SSE41-NEXT:    retq
547;
548; AVX-LABEL: shuffle_v8i16_44004444:
549; AVX:       # BB#0:
550; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
551; AVX-NEXT:    retq
552  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
553  ret <8 x i16> %shuffle
554}
555define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
556; SSE2-LABEL: shuffle_v8i16_04404444:
557; SSE2:       # BB#0:
558; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
559; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
560; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
561; SSE2-NEXT:    retq
562;
563; SSSE3-LABEL: shuffle_v8i16_04404444:
564; SSSE3:       # BB#0:
565; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
566; SSSE3-NEXT:    retq
567;
568; SSE41-LABEL: shuffle_v8i16_04404444:
569; SSE41:       # BB#0:
570; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
571; SSE41-NEXT:    retq
572;
573; AVX-LABEL: shuffle_v8i16_04404444:
574; AVX:       # BB#0:
575; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
576; AVX-NEXT:    retq
577  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
578  ret <8 x i16> %shuffle
579}
580define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
581; SSE2-LABEL: shuffle_v8i16_04400000:
582; SSE2:       # BB#0:
583; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
584; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
585; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
586; SSE2-NEXT:    retq
587;
588; SSSE3-LABEL: shuffle_v8i16_04400000:
589; SSSE3:       # BB#0:
590; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
591; SSSE3-NEXT:    retq
592;
593; SSE41-LABEL: shuffle_v8i16_04400000:
594; SSE41:       # BB#0:
595; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
596; SSE41-NEXT:    retq
597;
598; AVX-LABEL: shuffle_v8i16_04400000:
599; AVX:       # BB#0:
600; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
601; AVX-NEXT:    retq
602  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
603  ret <8 x i16> %shuffle
604}
605define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
606; SSE-LABEL: shuffle_v8i16_04404567:
607; SSE:       # BB#0:
608; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
609; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
610; SSE-NEXT:    retq
611;
612; AVX-LABEL: shuffle_v8i16_04404567:
613; AVX:       # BB#0:
614; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
615; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
616; AVX-NEXT:    retq
617  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
618  ret <8 x i16> %shuffle
619}
620
621define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
622; SSE2-LABEL: shuffle_v8i16_0X444444:
623; SSE2:       # BB#0:
624; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
625; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
626; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
627; SSE2-NEXT:    retq
628;
629; SSSE3-LABEL: shuffle_v8i16_0X444444:
630; SSSE3:       # BB#0:
631; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
632; SSSE3-NEXT:    retq
633;
634; SSE41-LABEL: shuffle_v8i16_0X444444:
635; SSE41:       # BB#0:
636; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
637; SSE41-NEXT:    retq
638;
639; AVX-LABEL: shuffle_v8i16_0X444444:
640; AVX:       # BB#0:
641; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
642; AVX-NEXT:    retq
643  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
644  ret <8 x i16> %shuffle
645}
646define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
647; SSE2-LABEL: shuffle_v8i16_44X04444:
648; SSE2:       # BB#0:
649; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
650; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
651; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
652; SSE2-NEXT:    retq
653;
654; SSSE3-LABEL: shuffle_v8i16_44X04444:
655; SSSE3:       # BB#0:
656; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
657; SSSE3-NEXT:    retq
658;
659; SSE41-LABEL: shuffle_v8i16_44X04444:
660; SSE41:       # BB#0:
661; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
662; SSE41-NEXT:    retq
663;
664; AVX-LABEL: shuffle_v8i16_44X04444:
665; AVX:       # BB#0:
666; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
667; AVX-NEXT:    retq
668  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
669  ret <8 x i16> %shuffle
670}
671define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
672; SSE2-LABEL: shuffle_v8i16_X4404444:
673; SSE2:       # BB#0:
674; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
675; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
676; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
677; SSE2-NEXT:    retq
678;
679; SSSE3-LABEL: shuffle_v8i16_X4404444:
680; SSSE3:       # BB#0:
681; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
682; SSSE3-NEXT:    retq
683;
684; SSE41-LABEL: shuffle_v8i16_X4404444:
685; SSE41:       # BB#0:
686; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
687; SSE41-NEXT:    retq
688;
689; AVX-LABEL: shuffle_v8i16_X4404444:
690; AVX:       # BB#0:
691; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
692; AVX-NEXT:    retq
693  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
694  ret <8 x i16> %shuffle
695}
696
697define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
698; SSE2-LABEL: shuffle_v8i16_0127XXXX:
699; SSE2:       # BB#0:
700; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
701; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
702; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
703; SSE2-NEXT:    retq
704;
705; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
706; SSSE3:       # BB#0:
707; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
708; SSSE3-NEXT:    retq
709;
710; SSE41-LABEL: shuffle_v8i16_0127XXXX:
711; SSE41:       # BB#0:
712; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
713; SSE41-NEXT:    retq
714;
715; AVX-LABEL: shuffle_v8i16_0127XXXX:
716; AVX:       # BB#0:
717; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
718; AVX-NEXT:    retq
719  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
720  ret <8 x i16> %shuffle
721}
722
723define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
724; SSE2-LABEL: shuffle_v8i16_XXXX4563:
725; SSE2:       # BB#0:
726; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
727; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
728; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
729; SSE2-NEXT:    retq
730;
731; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
732; SSSE3:       # BB#0:
733; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
734; SSSE3-NEXT:    retq
735;
736; SSE41-LABEL: shuffle_v8i16_XXXX4563:
737; SSE41:       # BB#0:
738; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
739; SSE41-NEXT:    retq
740;
741; AVX-LABEL: shuffle_v8i16_XXXX4563:
742; AVX:       # BB#0:
743; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
744; AVX-NEXT:    retq
745  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
746  ret <8 x i16> %shuffle
747}
748
749define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
750; SSE2-LABEL: shuffle_v8i16_4563XXXX:
751; SSE2:       # BB#0:
752; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
753; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
754; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
755; SSE2-NEXT:    retq
756;
757; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
758; SSSE3:       # BB#0:
759; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
760; SSSE3-NEXT:    retq
761;
762; SSE41-LABEL: shuffle_v8i16_4563XXXX:
763; SSE41:       # BB#0:
764; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
765; SSE41-NEXT:    retq
766;
767; AVX-LABEL: shuffle_v8i16_4563XXXX:
768; AVX:       # BB#0:
769; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
770; AVX-NEXT:    retq
771  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
772  ret <8 x i16> %shuffle
773}
774
775define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
776; SSE2-LABEL: shuffle_v8i16_01274563:
777; SSE2:       # BB#0:
778; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
779; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
780; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
781; SSE2-NEXT:    retq
782;
783; SSSE3-LABEL: shuffle_v8i16_01274563:
784; SSSE3:       # BB#0:
785; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
786; SSSE3-NEXT:    retq
787;
788; SSE41-LABEL: shuffle_v8i16_01274563:
789; SSE41:       # BB#0:
790; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
791; SSE41-NEXT:    retq
792;
793; AVX-LABEL: shuffle_v8i16_01274563:
794; AVX:       # BB#0:
795; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
796; AVX-NEXT:    retq
797  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
798  ret <8 x i16> %shuffle
799}
800
801define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
802; SSE2-LABEL: shuffle_v8i16_45630127:
803; SSE2:       # BB#0:
804; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
805; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
806; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
807; SSE2-NEXT:    retq
808;
809; SSSE3-LABEL: shuffle_v8i16_45630127:
810; SSSE3:       # BB#0:
811; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
812; SSSE3-NEXT:    retq
813;
814; SSE41-LABEL: shuffle_v8i16_45630127:
815; SSE41:       # BB#0:
816; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
817; SSE41-NEXT:    retq
818;
819; AVX-LABEL: shuffle_v8i16_45630127:
820; AVX:       # BB#0:
821; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
822; AVX-NEXT:    retq
823  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
824  ret <8 x i16> %shuffle
825}
826
827define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
828; SSE2-LABEL: shuffle_v8i16_37102735:
829; SSE2:       # BB#0:
830; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
831; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
832; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
833; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
834; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
835; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
836; SSE2-NEXT:    retq
837;
838; SSSE3-LABEL: shuffle_v8i16_37102735:
839; SSSE3:       # BB#0:
840; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
841; SSSE3-NEXT:    retq
842;
843; SSE41-LABEL: shuffle_v8i16_37102735:
844; SSE41:       # BB#0:
845; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
846; SSE41-NEXT:    retq
847;
848; AVX-LABEL: shuffle_v8i16_37102735:
849; AVX:       # BB#0:
850; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
851; AVX-NEXT:    retq
852  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
853  ret <8 x i16> %shuffle
854}
855
856define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
857; SSE-LABEL: shuffle_v8i16_08192a3b:
858; SSE:       # BB#0:
859; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
860; SSE-NEXT:    retq
861;
862; AVX-LABEL: shuffle_v8i16_08192a3b:
863; AVX:       # BB#0:
864; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
865; AVX-NEXT:    retq
866  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
867  ret <8 x i16> %shuffle
868}
869
870define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
871; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
872; SSE:       # BB#0:
873; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
874; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
875; SSE-NEXT:    retq
876;
877; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
878; AVX:       # BB#0:
879; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
880; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
881; AVX-NEXT:    retq
882  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
883  ret <8 x i16> %shuffle
884}
885
886define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
887; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
888; SSE:       # BB#0:
889; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
890; SSE-NEXT:    retq
891;
892; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
893; AVX:       # BB#0:
894; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
895; AVX-NEXT:    retq
896  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
897  ret <8 x i16> %shuffle
898}
899
900define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
901; SSE-LABEL: shuffle_v8i16_48596a7b:
902; SSE:       # BB#0:
903; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
904; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
905; SSE-NEXT:    retq
906;
907; AVX-LABEL: shuffle_v8i16_48596a7b:
908; AVX:       # BB#0:
909; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
910; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
911; AVX-NEXT:    retq
912  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
913  ret <8 x i16> %shuffle
914}
915
916define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
917; SSE-LABEL: shuffle_v8i16_08196e7f:
918; SSE:       # BB#0:
919; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
920; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
921; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
922; SSE-NEXT:    retq
923;
924; AVX-LABEL: shuffle_v8i16_08196e7f:
925; AVX:       # BB#0:
926; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
927; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
928; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
929; AVX-NEXT:    retq
930  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
931  ret <8 x i16> %shuffle
932}
933
934define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
935; SSE-LABEL: shuffle_v8i16_0c1d6879:
936; SSE:       # BB#0:
937; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
938; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
939; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
940; SSE-NEXT:    retq
941;
942; AVX-LABEL: shuffle_v8i16_0c1d6879:
943; AVX:       # BB#0:
944; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
945; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
946; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
947; AVX-NEXT:    retq
948  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
949  ret <8 x i16> %shuffle
950}
951
952define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
953; SSE-LABEL: shuffle_v8i16_109832ba:
954; SSE:       # BB#0:
955; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
956; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
957; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
958; SSE-NEXT:    retq
959;
960; AVX-LABEL: shuffle_v8i16_109832ba:
961; AVX:       # BB#0:
962; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
963; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
964; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
965; AVX-NEXT:    retq
966  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
967  ret <8 x i16> %shuffle
968}
969
970define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
971; SSE-LABEL: shuffle_v8i16_8091a2b3:
972; SSE:       # BB#0:
973; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
974; SSE-NEXT:    movdqa %xmm1, %xmm0
975; SSE-NEXT:    retq
976;
977; AVX-LABEL: shuffle_v8i16_8091a2b3:
978; AVX:       # BB#0:
979; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
980; AVX-NEXT:    retq
981  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
982  ret <8 x i16> %shuffle
983}
984define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
985; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
986; SSE:       # BB#0:
987; SSE-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
988; SSE-NEXT:    movdqa %xmm1, %xmm0
989; SSE-NEXT:    retq
990;
991; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
992; AVX:       # BB#0:
993; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
994; AVX-NEXT:    retq
995  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
996  ret <8 x i16> %shuffle
997}
998
999define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
1000; SSE-LABEL: shuffle_v8i16_0213cedf:
1001; SSE:       # BB#0:
1002; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1003; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1004; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1005; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1006; SSE-NEXT:    retq
1007;
1008; AVX-LABEL: shuffle_v8i16_0213cedf:
1009; AVX:       # BB#0:
1010; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1011; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1012; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1013; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1014; AVX-NEXT:    retq
1015  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1016  ret <8 x i16> %shuffle
1017}
1018
1019define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1020; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1021; SSE2:       # BB#0:
1022; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1023; SSE2-NEXT:    pand %xmm2, %xmm0
1024; SSE2-NEXT:    pandn %xmm1, %xmm2
1025; SSE2-NEXT:    por %xmm0, %xmm2
1026; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1027; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1028; SSE2-NEXT:    retq
1029;
1030; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1031; SSSE3:       # BB#0:
1032; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1033; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1034; SSSE3-NEXT:    por %xmm1, %xmm0
1035; SSSE3-NEXT:    retq
1036;
1037; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1038; SSE41:       # BB#0:
1039; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1040; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1041; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1042; SSE41-NEXT:    retq
1043;
1044; AVX-LABEL: shuffle_v8i16_443aXXXX:
1045; AVX:       # BB#0:
1046; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1047; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1048; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1049; AVX-NEXT:    retq
1050  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1051  ret <8 x i16> %shuffle
1052}
1053
1054define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1055; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1056; SSE2:       # BB#0:
1057; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1058; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
1059; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1060; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1061; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1062; SSE2-NEXT:    retq
1063;
1064; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1065; SSSE3:       # BB#0:
1066; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1067; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1068; SSSE3-NEXT:    por %xmm1, %xmm0
1069; SSSE3-NEXT:    retq
1070;
1071; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1072; SSE41:       # BB#0:
1073; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1074; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1075; SSE41-NEXT:    retq
1076;
1077; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1078; AVX1:       # BB#0:
1079; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1080; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1081; AVX1-NEXT:    retq
1082;
1083; AVX2-LABEL: shuffle_v8i16_032dXXXX:
1084; AVX2:       # BB#0:
1085; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1086; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1087; AVX2-NEXT:    retq
1088  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1089  ret <8 x i16> %shuffle
1090}
1091define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1092; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1093; SSE:       # BB#0:
1094; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1095; SSE-NEXT:    retq
1096;
1097; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1098; AVX:       # BB#0:
1099; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1100; AVX-NEXT:    retq
1101  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1102  ret <8 x i16> %shuffle
1103}
1104
1105define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1106; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1107; SSE2:       # BB#0:
1108; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1109; SSE2-NEXT:    pand %xmm2, %xmm0
1110; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1111; SSE2-NEXT:    pandn %xmm1, %xmm2
1112; SSE2-NEXT:    por %xmm2, %xmm0
1113; SSE2-NEXT:    retq
1114;
1115; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1116; SSSE3:       # BB#0:
1117; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1118; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1119; SSSE3-NEXT:    por %xmm1, %xmm0
1120; SSSE3-NEXT:    retq
1121;
1122; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1123; SSE41:       # BB#0:
1124; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1125; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1126; SSE41-NEXT:    retq
1127;
1128; AVX-LABEL: shuffle_v8i16_012dXXXX:
1129; AVX:       # BB#0:
1130; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1131; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1132; AVX-NEXT:    retq
1133  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1134  ret <8 x i16> %shuffle
1135}
1136
1137define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1138; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1139; SSE2:       # BB#0:
1140; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1141; SSE2-NEXT:    pand %xmm2, %xmm1
1142; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1143; SSE2-NEXT:    pandn %xmm0, %xmm2
1144; SSE2-NEXT:    por %xmm1, %xmm2
1145; SSE2-NEXT:    movdqa %xmm2, %xmm0
1146; SSE2-NEXT:    retq
1147;
1148; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1149; SSSE3:       # BB#0:
1150; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1151; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1152; SSSE3-NEXT:    por %xmm1, %xmm0
1153; SSSE3-NEXT:    retq
1154;
1155; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1156; SSE41:       # BB#0:
1157; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1158; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1159; SSE41-NEXT:    retq
1160;
1161; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1162; AVX1:       # BB#0:
1163; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1164; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1165; AVX1-NEXT:    retq
1166;
1167; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
1168; AVX2:       # BB#0:
1169; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
1170; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1171; AVX2-NEXT:    retq
1172  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1173  ret <8 x i16> %shuffle
1174}
1175
1176define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1177; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1178; SSE2:       # BB#0:
1179; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1180; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1181; SSE2-NEXT:    pand %xmm2, %xmm1
1182; SSE2-NEXT:    pandn %xmm0, %xmm2
1183; SSE2-NEXT:    por %xmm1, %xmm2
1184; SSE2-NEXT:    movdqa %xmm2, %xmm0
1185; SSE2-NEXT:    retq
1186;
1187; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1188; SSSE3:       # BB#0:
1189; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1190; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1191; SSSE3-NEXT:    por %xmm1, %xmm0
1192; SSSE3-NEXT:    retq
1193;
1194; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1195; SSE41:       # BB#0:
1196; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1197; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1198; SSE41-NEXT:    retq
1199;
1200; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1201; AVX:       # BB#0:
1202; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1203; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1204; AVX-NEXT:    retq
1205  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1206  ret <8 x i16> %shuffle
1207}
1208
1209define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1210; SSE2-LABEL: shuffle_v8i16_012dcde3:
1211; SSE2:       # BB#0:
1212; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1213; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1]
1214; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1215; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1216; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1217; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1218; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1219; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1220; SSE2-NEXT:    retq
1221;
1222; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1223; SSSE3:       # BB#0:
1224; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1225; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1226; SSSE3-NEXT:    por %xmm1, %xmm0
1227; SSSE3-NEXT:    retq
1228;
1229; SSE41-LABEL: shuffle_v8i16_012dcde3:
1230; SSE41:       # BB#0:
1231; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1232; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1233; SSE41-NEXT:    retq
1234;
1235; AVX1-LABEL: shuffle_v8i16_012dcde3:
1236; AVX1:       # BB#0:
1237; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1238; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1239; AVX1-NEXT:    retq
1240;
1241; AVX2-LABEL: shuffle_v8i16_012dcde3:
1242; AVX2:       # BB#0:
1243; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1244; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1245; AVX2-NEXT:    retq
1246  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1247  ret <8 x i16> %shuffle
1248}
1249
1250define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1251; SSE2-LABEL: shuffle_v8i16_0923cde7:
1252; SSE2:       # BB#0:
1253; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1254; SSE2-NEXT:    andps %xmm2, %xmm0
1255; SSE2-NEXT:    andnps %xmm1, %xmm2
1256; SSE2-NEXT:    orps %xmm2, %xmm0
1257; SSE2-NEXT:    retq
1258;
1259; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1260; SSSE3:       # BB#0:
1261; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1262; SSSE3-NEXT:    andps %xmm2, %xmm0
1263; SSSE3-NEXT:    andnps %xmm1, %xmm2
1264; SSSE3-NEXT:    orps %xmm2, %xmm0
1265; SSSE3-NEXT:    retq
1266;
1267; SSE41-LABEL: shuffle_v8i16_0923cde7:
1268; SSE41:       # BB#0:
1269; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1270; SSE41-NEXT:    retq
1271;
1272; AVX-LABEL: shuffle_v8i16_0923cde7:
1273; AVX:       # BB#0:
1274; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1275; AVX-NEXT:    retq
1276  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1277  ret <8 x i16> %shuffle
1278}
1279
1280define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1281; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1282; SSE2:       # BB#0:
1283; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1284; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1285; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1286; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1287; SSE2-NEXT:    pand %xmm1, %xmm0
1288; SSE2-NEXT:    pandn %xmm2, %xmm1
1289; SSE2-NEXT:    por %xmm0, %xmm1
1290; SSE2-NEXT:    movdqa %xmm1, %xmm0
1291; SSE2-NEXT:    retq
1292;
1293; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1294; SSSE3:       # BB#0:
1295; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1296; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1297; SSSE3-NEXT:    por %xmm1, %xmm0
1298; SSSE3-NEXT:    retq
1299;
1300; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1301; SSE41:       # BB#0:
1302; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1303; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1304; SSE41-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1305; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1306; SSE41-NEXT:    retq
1307;
1308; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1309; AVX1:       # BB#0:
1310; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1311; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1312; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1313; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1314; AVX1-NEXT:    retq
1315;
1316; AVX2-LABEL: shuffle_v8i16_XXX1X579:
1317; AVX2:       # BB#0:
1318; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
1319; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1320; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1321; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1322; AVX2-NEXT:    retq
1323  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1324  ret <8 x i16> %shuffle
1325}
1326
1327define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1328; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1329; SSE2:       # BB#0:
1330; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
1331; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
1332; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
1333; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
1334; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1335; SSE2-NEXT:    retq
1336;
1337; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1338; SSSE3:       # BB#0:
1339; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1340; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1341; SSSE3-NEXT:    por %xmm1, %xmm0
1342; SSSE3-NEXT:    retq
1343;
1344; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1345; SSE41:       # BB#0:
1346; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1347; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1348; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1349; SSE41-NEXT:    retq
1350;
1351; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1352; AVX1:       # BB#0:
1353; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1354; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1355; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1356; AVX1-NEXT:    retq
1357;
1358; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
1359; AVX2:       # BB#0:
1360; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1361; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1362; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1363; AVX2-NEXT:    retq
1364  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1365  ret <8 x i16> %shuffle
1366}
1367
1368define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1369; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1370; SSE:       # BB#0:
1371; SSE-NEXT:    movzwl %di, %eax
1372; SSE-NEXT:    movd %eax, %xmm0
1373; SSE-NEXT:    retq
1374;
1375; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1376; AVX:       # BB#0:
1377; AVX-NEXT:    movzwl %di, %eax
1378; AVX-NEXT:    vmovd %eax, %xmm0
1379; AVX-NEXT:    retq
1380  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1381  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1382  ret <8 x i16> %shuffle
1383}
1384
1385define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1386; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1387; SSE:       # BB#0:
1388; SSE-NEXT:    pxor %xmm0, %xmm0
1389; SSE-NEXT:    pinsrw $1, %edi, %xmm0
1390; SSE-NEXT:    retq
1391;
1392; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1393; AVX:       # BB#0:
1394; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1395; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
1396; AVX-NEXT:    retq
1397  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1398  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1399  ret <8 x i16> %shuffle
1400}
1401
1402define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1403; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1404; SSE:       # BB#0:
1405; SSE-NEXT:    pxor %xmm0, %xmm0
1406; SSE-NEXT:    pinsrw $5, %edi, %xmm0
1407; SSE-NEXT:    retq
1408;
1409; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1410; AVX:       # BB#0:
1411; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1412; AVX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
1413; AVX-NEXT:    retq
1414  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1415  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1416  ret <8 x i16> %shuffle
1417}
1418
1419define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1420; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1421; SSE:       # BB#0:
1422; SSE-NEXT:    pxor %xmm0, %xmm0
1423; SSE-NEXT:    pinsrw $7, %edi, %xmm0
1424; SSE-NEXT:    retq
1425;
1426; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1427; AVX:       # BB#0:
1428; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1429; AVX-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0
1430; AVX-NEXT:    retq
1431  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1432  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1433  ret <8 x i16> %shuffle
1434}
1435
1436define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1437; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1438; SSE:       # BB#0:
1439; SSE-NEXT:    pxor %xmm0, %xmm0
1440; SSE-NEXT:    pinsrw $2, %edi, %xmm0
1441; SSE-NEXT:    retq
1442;
1443; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1444; AVX:       # BB#0:
1445; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1446; AVX-NEXT:    vpinsrw $2, %edi, %xmm0, %xmm0
1447; AVX-NEXT:    retq
1448  %a = insertelement <8 x i16> undef, i16 %i, i32 3
1449  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1450  ret <8 x i16> %shuffle
1451}
1452
1453define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1454; SSE2-LABEL: shuffle_v8i16_def01234:
1455; SSE2:       # BB#0:
1456; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1457; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1458; SSE2-NEXT:    por %xmm1, %xmm0
1459; SSE2-NEXT:    retq
1460;
1461; SSSE3-LABEL: shuffle_v8i16_def01234:
1462; SSSE3:       # BB#0:
1463; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1464; SSSE3-NEXT:    retq
1465;
1466; SSE41-LABEL: shuffle_v8i16_def01234:
1467; SSE41:       # BB#0:
1468; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1469; SSE41-NEXT:    retq
1470;
1471; AVX-LABEL: shuffle_v8i16_def01234:
1472; AVX:       # BB#0:
1473; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1474; AVX-NEXT:    retq
1475  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1476  ret <8 x i16> %shuffle
1477}
1478
1479define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1480; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1481; SSE2:       # BB#0:
1482; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1483; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1484; SSE2-NEXT:    por %xmm1, %xmm0
1485; SSE2-NEXT:    retq
1486;
1487; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1488; SSSE3:       # BB#0:
1489; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1490; SSSE3-NEXT:    retq
1491;
1492; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1493; SSE41:       # BB#0:
1494; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1495; SSE41-NEXT:    retq
1496;
1497; AVX-LABEL: shuffle_v8i16_ueuu123u:
1498; AVX:       # BB#0:
1499; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1500; AVX-NEXT:    retq
1501  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1502  ret <8 x i16> %shuffle
1503}
1504
1505define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1506; SSE2-LABEL: shuffle_v8i16_56701234:
1507; SSE2:       # BB#0:
1508; SSE2-NEXT:    movdqa %xmm0, %xmm1
1509; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1510; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1511; SSE2-NEXT:    por %xmm1, %xmm0
1512; SSE2-NEXT:    retq
1513;
1514; SSSE3-LABEL: shuffle_v8i16_56701234:
1515; SSSE3:       # BB#0:
1516; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1517; SSSE3-NEXT:    retq
1518;
1519; SSE41-LABEL: shuffle_v8i16_56701234:
1520; SSE41:       # BB#0:
1521; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1522; SSE41-NEXT:    retq
1523;
1524; AVX-LABEL: shuffle_v8i16_56701234:
1525; AVX:       # BB#0:
1526; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1527; AVX-NEXT:    retq
1528  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1529  ret <8 x i16> %shuffle
1530}
1531
1532define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1533; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1534; SSE2:       # BB#0:
1535; SSE2-NEXT:    movdqa %xmm0, %xmm1
1536; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1537; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1538; SSE2-NEXT:    por %xmm1, %xmm0
1539; SSE2-NEXT:    retq
1540;
1541; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1542; SSSE3:       # BB#0:
1543; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1544; SSSE3-NEXT:    retq
1545;
1546; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1547; SSE41:       # BB#0:
1548; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1549; SSE41-NEXT:    retq
1550;
1551; AVX-LABEL: shuffle_v8i16_u6uu123u:
1552; AVX:       # BB#0:
1553; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1554; AVX-NEXT:    retq
1555  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1556  ret <8 x i16> %shuffle
1557}
1558
1559define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1560; SSE-LABEL: shuffle_v8i16_uuuu123u:
1561; SSE:       # BB#0:
1562; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1563; SSE-NEXT:    retq
1564;
1565; AVX-LABEL: shuffle_v8i16_uuuu123u:
1566; AVX:       # BB#0:
1567; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1568; AVX-NEXT:    retq
1569  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1570  ret <8 x i16> %shuffle
1571}
1572
1573define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1574; SSE2-LABEL: shuffle_v8i16_bcdef012:
1575; SSE2:       # BB#0:
1576; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1577; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1578; SSE2-NEXT:    por %xmm1, %xmm0
1579; SSE2-NEXT:    retq
1580;
1581; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1582; SSSE3:       # BB#0:
1583; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1584; SSSE3-NEXT:    retq
1585;
1586; SSE41-LABEL: shuffle_v8i16_bcdef012:
1587; SSE41:       # BB#0:
1588; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1589; SSE41-NEXT:    retq
1590;
1591; AVX-LABEL: shuffle_v8i16_bcdef012:
1592; AVX:       # BB#0:
1593; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1594; AVX-NEXT:    retq
1595  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1596  ret <8 x i16> %shuffle
1597}
1598
1599define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1600; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1601; SSE2:       # BB#0:
1602; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1603; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1604; SSE2-NEXT:    por %xmm1, %xmm0
1605; SSE2-NEXT:    retq
1606;
1607; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1608; SSSE3:       # BB#0:
1609; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1610; SSSE3-NEXT:    retq
1611;
1612; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1613; SSE41:       # BB#0:
1614; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1615; SSE41-NEXT:    retq
1616;
1617; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1618; AVX:       # BB#0:
1619; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1620; AVX-NEXT:    retq
1621  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1622  ret <8 x i16> %shuffle
1623}
1624
1625define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1626; SSE2-LABEL: shuffle_v8i16_34567012:
1627; SSE2:       # BB#0:
1628; SSE2-NEXT:    movdqa %xmm0, %xmm1
1629; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1630; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1631; SSE2-NEXT:    por %xmm1, %xmm0
1632; SSE2-NEXT:    retq
1633;
1634; SSSE3-LABEL: shuffle_v8i16_34567012:
1635; SSSE3:       # BB#0:
1636; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1637; SSSE3-NEXT:    retq
1638;
1639; SSE41-LABEL: shuffle_v8i16_34567012:
1640; SSE41:       # BB#0:
1641; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1642; SSE41-NEXT:    retq
1643;
1644; AVX-LABEL: shuffle_v8i16_34567012:
1645; AVX:       # BB#0:
1646; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1647; AVX-NEXT:    retq
1648  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1649  ret <8 x i16> %shuffle
1650}
1651
1652define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1653; SSE2-LABEL: shuffle_v8i16_u456uu1u:
1654; SSE2:       # BB#0:
1655; SSE2-NEXT:    movdqa %xmm0, %xmm1
1656; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1657; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1658; SSE2-NEXT:    por %xmm1, %xmm0
1659; SSE2-NEXT:    retq
1660;
1661; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
1662; SSSE3:       # BB#0:
1663; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1664; SSSE3-NEXT:    retq
1665;
1666; SSE41-LABEL: shuffle_v8i16_u456uu1u:
1667; SSE41:       # BB#0:
1668; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1669; SSE41-NEXT:    retq
1670;
1671; AVX-LABEL: shuffle_v8i16_u456uu1u:
1672; AVX:       # BB#0:
1673; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1674; AVX-NEXT:    retq
1675  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
1676  ret <8 x i16> %shuffle
1677}
1678
1679define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
1680; SSE-LABEL: shuffle_v8i16_u456uuuu:
1681; SSE:       # BB#0:
1682; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1683; SSE-NEXT:    retq
1684;
1685; AVX-LABEL: shuffle_v8i16_u456uuuu:
1686; AVX:       # BB#0:
1687; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1688; AVX-NEXT:    retq
1689  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1690  ret <8 x i16> %shuffle
1691}
1692
1693define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
1694; SSE2-LABEL: shuffle_v8i16_3456789a:
1695; SSE2:       # BB#0:
1696; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1697; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1698; SSE2-NEXT:    por %xmm1, %xmm0
1699; SSE2-NEXT:    retq
1700;
1701; SSSE3-LABEL: shuffle_v8i16_3456789a:
1702; SSSE3:       # BB#0:
1703; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1704; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1705; SSSE3-NEXT:    retq
1706;
1707; SSE41-LABEL: shuffle_v8i16_3456789a:
1708; SSE41:       # BB#0:
1709; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1710; SSE41-NEXT:    movdqa %xmm1, %xmm0
1711; SSE41-NEXT:    retq
1712;
1713; AVX-LABEL: shuffle_v8i16_3456789a:
1714; AVX:       # BB#0:
1715; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1716; AVX-NEXT:    retq
1717  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
1718  ret <8 x i16> %shuffle
1719}
1720
1721define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
1722; SSE2-LABEL: shuffle_v8i16_u456uu9u:
1723; SSE2:       # BB#0:
1724; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1725; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1726; SSE2-NEXT:    por %xmm1, %xmm0
1727; SSE2-NEXT:    retq
1728;
1729; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
1730; SSSE3:       # BB#0:
1731; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1732; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1733; SSSE3-NEXT:    retq
1734;
1735; SSE41-LABEL: shuffle_v8i16_u456uu9u:
1736; SSE41:       # BB#0:
1737; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1738; SSE41-NEXT:    movdqa %xmm1, %xmm0
1739; SSE41-NEXT:    retq
1740;
1741; AVX-LABEL: shuffle_v8i16_u456uu9u:
1742; AVX:       # BB#0:
1743; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1744; AVX-NEXT:    retq
1745  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
1746  ret <8 x i16> %shuffle
1747}
1748
1749define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
1750; SSE2-LABEL: shuffle_v8i16_56789abc:
1751; SSE2:       # BB#0:
1752; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1753; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1754; SSE2-NEXT:    por %xmm1, %xmm0
1755; SSE2-NEXT:    retq
1756;
1757; SSSE3-LABEL: shuffle_v8i16_56789abc:
1758; SSSE3:       # BB#0:
1759; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1760; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1761; SSSE3-NEXT:    retq
1762;
1763; SSE41-LABEL: shuffle_v8i16_56789abc:
1764; SSE41:       # BB#0:
1765; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1766; SSE41-NEXT:    movdqa %xmm1, %xmm0
1767; SSE41-NEXT:    retq
1768;
1769; AVX-LABEL: shuffle_v8i16_56789abc:
1770; AVX:       # BB#0:
1771; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1772; AVX-NEXT:    retq
1773  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1774  ret <8 x i16> %shuffle
1775}
1776
1777define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
1778; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
1779; SSE2:       # BB#0:
1780; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1781; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1782; SSE2-NEXT:    por %xmm1, %xmm0
1783; SSE2-NEXT:    retq
1784;
1785; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
1786; SSSE3:       # BB#0:
1787; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1788; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1789; SSSE3-NEXT:    retq
1790;
1791; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
1792; SSE41:       # BB#0:
1793; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1794; SSE41-NEXT:    movdqa %xmm1, %xmm0
1795; SSE41-NEXT:    retq
1796;
1797; AVX-LABEL: shuffle_v8i16_u6uu9abu:
1798; AVX:       # BB#0:
1799; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1800; AVX-NEXT:    retq
1801  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
1802  ret <8 x i16> %shuffle
1803}
1804
1805define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
1806; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
1807; SSE2:       # BB#0:
1808; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1809; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1810; SSE2-NEXT:    retq
1811;
1812; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
1813; SSSE3:       # BB#0:
1814; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1815; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1816; SSSE3-NEXT:    retq
1817;
1818; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
1819; SSE41:       # BB#0:
1820; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1821; SSE41-NEXT:    retq
1822;
1823; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
1824; AVX:       # BB#0:
1825; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1826; AVX-NEXT:    retq
1827  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
1828  ret <8 x i16> %shuffle
1829}
1830
1831define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
1832; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
1833; SSE2:       # BB#0:
1834; SSE2-NEXT:    pxor %xmm1, %xmm1
1835; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1836; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1837; SSE2-NEXT:    retq
1838;
1839; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
1840; SSSE3:       # BB#0:
1841; SSSE3-NEXT:    pxor %xmm1, %xmm1
1842; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1843; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1844; SSSE3-NEXT:    retq
1845;
1846; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
1847; SSE41:       # BB#0:
1848; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1849; SSE41-NEXT:    retq
1850;
1851; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
1852; AVX:       # BB#0:
1853; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1854; AVX-NEXT:    retq
1855  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1856  ret <8 x i16> %shuffle
1857}
1858
1859define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
1860; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
1861; SSE2:       # BB#0:
1862; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1863; SSE2-NEXT:    retq
1864;
1865; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
1866; SSSE3:       # BB#0:
1867; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1868; SSSE3-NEXT:    retq
1869;
1870; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
1871; SSE41:       # BB#0:
1872; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1873; SSE41-NEXT:    retq
1874;
1875; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
1876; AVX:       # BB#0:
1877; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1878; AVX-NEXT:    retq
1879  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
1880  ret <8 x i16> %shuffle
1881}
1882
1883define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
1884; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
1885; SSE2:       # BB#0:
1886; SSE2-NEXT:    pxor %xmm1, %xmm1
1887; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1888; SSE2-NEXT:    retq
1889;
1890; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
1891; SSSE3:       # BB#0:
1892; SSSE3-NEXT:    pxor %xmm1, %xmm1
1893; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1894; SSSE3-NEXT:    retq
1895;
1896; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
1897; SSE41:       # BB#0:
1898; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1899; SSE41-NEXT:    retq
1900;
1901; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
1902; AVX:       # BB#0:
1903; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1904; AVX-NEXT:    retq
1905  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1906  ret <8 x i16> %shuffle
1907}
1908
1909;
1910; Shuffle to logical bit shifts
1911;
1912define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
1913; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
1914; SSE:       # BB#0:
1915; SSE-NEXT:    pslld $16, %xmm0
1916; SSE-NEXT:    retq
1917;
1918; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
1919; AVX:       # BB#0:
1920; AVX-NEXT:    vpslld $16, %xmm0, %xmm0
1921; AVX-NEXT:    retq
1922  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
1923  ret <8 x i16> %shuffle
1924}
1925
1926define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
1927; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
1928; SSE:       # BB#0:
1929; SSE-NEXT:    psllq $48, %xmm0
1930; SSE-NEXT:    retq
1931;
1932; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
1933; AVX:       # BB#0:
1934; AVX-NEXT:    vpsllq $48, %xmm0, %xmm0
1935; AVX-NEXT:    retq
1936  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
1937  ret <8 x i16> %shuffle
1938}
1939
1940define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
1941; SSE-LABEL: shuffle_v8i16_zz01zX4X:
1942; SSE:       # BB#0:
1943; SSE-NEXT:    psllq $32, %xmm0
1944; SSE-NEXT:    retq
1945;
1946; AVX-LABEL: shuffle_v8i16_zz01zX4X:
1947; AVX:       # BB#0:
1948; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
1949; AVX-NEXT:    retq
1950  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
1951  ret <8 x i16> %shuffle
1952}
1953
1954define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
1955; SSE-LABEL: shuffle_v8i16_z0X2z456:
1956; SSE:       # BB#0:
1957; SSE-NEXT:    psllq $16, %xmm0
1958; SSE-NEXT:    retq
1959;
1960; AVX-LABEL: shuffle_v8i16_z0X2z456:
1961; AVX:       # BB#0:
1962; AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
1963; AVX-NEXT:    retq
1964  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
1965  ret <8 x i16> %shuffle
1966}
1967
1968define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
1969; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
1970; SSE:       # BB#0:
1971; SSE-NEXT:    psrld $16, %xmm0
1972; SSE-NEXT:    retq
1973;
1974; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
1975; AVX:       # BB#0:
1976; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
1977; AVX-NEXT:    retq
1978  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
1979  ret <8 x i16> %shuffle
1980}
1981
1982define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
1983; SSE-LABEL: shuffle_v8i16_1X3z567z:
1984; SSE:       # BB#0:
1985; SSE-NEXT:    psrlq $16, %xmm0
1986; SSE-NEXT:    retq
1987;
1988; AVX-LABEL: shuffle_v8i16_1X3z567z:
1989; AVX:       # BB#0:
1990; AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
1991; AVX-NEXT:    retq
1992  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
1993  ret <8 x i16> %shuffle
1994}
1995
1996define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
1997; SSE-LABEL: shuffle_v8i16_23zz67zz:
1998; SSE:       # BB#0:
1999; SSE-NEXT:    psrlq $32, %xmm0
2000; SSE-NEXT:    retq
2001;
2002; AVX-LABEL: shuffle_v8i16_23zz67zz:
2003; AVX:       # BB#0:
2004; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
2005; AVX-NEXT:    retq
2006  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2007  ret <8 x i16> %shuffle
2008}
2009
2010define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2011; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2012; SSE:       # BB#0:
2013; SSE-NEXT:    psrlq $48, %xmm0
2014; SSE-NEXT:    retq
2015;
2016; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2017; AVX:       # BB#0:
2018; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
2019; AVX-NEXT:    retq
2020  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2021  ret <8 x i16> %shuffle
2022}
2023
2024define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2025; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2026; SSE:       # BB#0:
2027; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
2028; SSE-NEXT:    retq
2029;
2030; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2031; AVX:       # BB#0:
2032; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2033; AVX-NEXT:    retq
2034  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2035  ret <8 x i16> %shuffle
2036}
2037
2038define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2039; SSE2-LABEL: shuffle_v8i16_0z234567:
2040; SSE2:       # BB#0:
2041; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2042; SSE2-NEXT:    retq
2043;
2044; SSSE3-LABEL: shuffle_v8i16_0z234567:
2045; SSSE3:       # BB#0:
2046; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2047; SSSE3-NEXT:    retq
2048;
2049; SSE41-LABEL: shuffle_v8i16_0z234567:
2050; SSE41:       # BB#0:
2051; SSE41-NEXT:    pxor %xmm1, %xmm1
2052; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2053; SSE41-NEXT:    retq
2054;
2055; AVX-LABEL: shuffle_v8i16_0z234567:
2056; AVX:       # BB#0:
2057; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2058; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2059; AVX-NEXT:    retq
2060  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2061  ret <8 x i16> %shuffle
2062}
2063
2064define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2065; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2066; SSE2:       # BB#0:
2067; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2068; SSE2-NEXT:    retq
2069;
2070; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2071; SSSE3:       # BB#0:
2072; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2073; SSSE3-NEXT:    retq
2074;
2075; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2076; SSE41:       # BB#0:
2077; SSE41-NEXT:    pxor %xmm1, %xmm1
2078; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2079; SSE41-NEXT:    retq
2080;
2081; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2082; AVX:       # BB#0:
2083; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2084; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2085; AVX-NEXT:    retq
2086  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2087  ret <8 x i16> %shuffle
2088}
2089
2090define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2091; SSE2-LABEL: shuffle_v8i16_0123456z:
2092; SSE2:       # BB#0:
2093; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2094; SSE2-NEXT:    retq
2095;
2096; SSSE3-LABEL: shuffle_v8i16_0123456z:
2097; SSSE3:       # BB#0:
2098; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2099; SSSE3-NEXT:    retq
2100;
2101; SSE41-LABEL: shuffle_v8i16_0123456z:
2102; SSE41:       # BB#0:
2103; SSE41-NEXT:    pxor %xmm1, %xmm1
2104; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2105; SSE41-NEXT:    retq
2106;
2107; AVX-LABEL: shuffle_v8i16_0123456z:
2108; AVX:       # BB#0:
2109; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2110; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2111; AVX-NEXT:    retq
2112  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2113  ret <8 x i16> %shuffle
2114}
2115
2116define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2117; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2118; SSE:       # BB#0:
2119; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2120; SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2121; SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2122; SSE-NEXT:    movdqa %xmm1, %xmm0
2123; SSE-NEXT:    retq
2124;
2125; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
2126; AVX:       # BB#0:
2127; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2128; AVX-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2129; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2130; AVX-NEXT:    retq
2131  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2132  ret <8 x i16> %shuffle
2133}
2134
2135define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2136; SSE-LABEL: shuffle_v8i16_8012345u:
2137; SSE:       # BB#0:
2138; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2139; SSE-NEXT:    retq
2140;
2141; AVX-LABEL: shuffle_v8i16_8012345u:
2142; AVX:       # BB#0:
2143; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2144; AVX-NEXT:    retq
2145  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2146
2147  ret <8 x i16> %shuffle
2148}
2149
2150define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
2151; SSE2-LABEL: insert_dup_mem_v8i16_i32:
2152; SSE2:       # BB#0:
2153; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2154; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2155; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2156; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2157; SSE2-NEXT:    retq
2158;
2159; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
2160; SSSE3:       # BB#0:
2161; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2162; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2163; SSSE3-NEXT:    retq
2164;
2165; SSE41-LABEL: insert_dup_mem_v8i16_i32:
2166; SSE41:       # BB#0:
2167; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2168; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2169; SSE41-NEXT:    retq
2170;
2171; AVX1-LABEL: insert_dup_mem_v8i16_i32:
2172; AVX1:       # BB#0:
2173; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2174; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2175; AVX1-NEXT:    retq
2176;
2177; AVX2-LABEL: insert_dup_mem_v8i16_i32:
2178; AVX2:       # BB#0:
2179; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
2180; AVX2-NEXT:    retq
2181  %tmp = load i32, i32* %ptr, align 4
2182  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2183  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2184  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
2185  ret <8 x i16> %tmp3
2186}
2187
2188define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
2189; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16:
2190; SSE2:       # BB#0:
2191; SSE2-NEXT:    movswl (%rdi), %eax
2192; SSE2-NEXT:    movd %eax, %xmm0
2193; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2194; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2195; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2196; SSE2-NEXT:    retq
2197;
2198; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
2199; SSSE3:       # BB#0:
2200; SSSE3-NEXT:    movswl (%rdi), %eax
2201; SSSE3-NEXT:    movd %eax, %xmm0
2202; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2203; SSSE3-NEXT:    retq
2204;
2205; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16:
2206; SSE41:       # BB#0:
2207; SSE41-NEXT:    movswl (%rdi), %eax
2208; SSE41-NEXT:    movd %eax, %xmm0
2209; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2210; SSE41-NEXT:    retq
2211;
2212; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
2213; AVX1:       # BB#0:
2214; AVX1-NEXT:    movswl (%rdi), %eax
2215; AVX1-NEXT:    vmovd %eax, %xmm0
2216; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2217; AVX1-NEXT:    retq
2218;
2219; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
2220; AVX2:       # BB#0:
2221; AVX2-NEXT:    movswl (%rdi), %eax
2222; AVX2-NEXT:    vmovd %eax, %xmm0
2223; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2224; AVX2-NEXT:    retq
2225  %tmp = load i16, i16* %ptr, align 2
2226  %tmp1 = sext i16 %tmp to i32
2227  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2228  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2229  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
2230  ret <8 x i16> %tmp4
2231}
2232
2233define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
2234; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32:
2235; SSE2:       # BB#0:
2236; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2237; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2238; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2239; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2240; SSE2-NEXT:    retq
2241;
2242; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32:
2243; SSSE3:       # BB#0:
2244; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2245; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2246; SSSE3-NEXT:    retq
2247;
2248; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32:
2249; SSE41:       # BB#0:
2250; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2251; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2252; SSE41-NEXT:    retq
2253;
2254; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
2255; AVX1:       # BB#0:
2256; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2257; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2258; AVX1-NEXT:    retq
2259;
2260; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
2261; AVX2:       # BB#0:
2262; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0
2263; AVX2-NEXT:    retq
2264  %tmp = load i32, i32* %ptr, align 4
2265  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2266  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2267  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2268  ret <8 x i16> %tmp3
2269}
2270
2271define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
2272; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2273; SSE2:       # BB#0:
2274; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2275; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
2276; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2277; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
2278; SSE2-NEXT:    retq
2279;
2280; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
2281; SSSE3:       # BB#0:
2282; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2283; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2284; SSSE3-NEXT:    retq
2285;
2286; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
2287; SSE41:       # BB#0:
2288; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2289; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2290; SSE41-NEXT:    retq
2291;
2292; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
2293; AVX1:       # BB#0:
2294; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2295; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2296; AVX1-NEXT:    retq
2297;
2298; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2299; AVX2:       # BB#0:
2300; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0
2301; AVX2-NEXT:    retq
2302  %tmp = load i32, i32* %ptr, align 4
2303  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
2304  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2305  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2306  ret <8 x i16> %tmp3
2307}
2308
2309define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
2310; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2311; SSE2:       # BB#0:
2312; SSE2-NEXT:    movswl (%rdi), %eax
2313; SSE2-NEXT:    movd %eax, %xmm0
2314; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2315; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2316; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2317; SSE2-NEXT:    retq
2318;
2319; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2320; SSSE3:       # BB#0:
2321; SSSE3-NEXT:    movswl (%rdi), %eax
2322; SSSE3-NEXT:    movd %eax, %xmm0
2323; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2324; SSSE3-NEXT:    retq
2325;
2326; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2327; SSE41:       # BB#0:
2328; SSE41-NEXT:    movswl (%rdi), %eax
2329; SSE41-NEXT:    movd %eax, %xmm0
2330; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2331; SSE41-NEXT:    retq
2332;
2333; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2334; AVX1:       # BB#0:
2335; AVX1-NEXT:    movswl (%rdi), %eax
2336; AVX1-NEXT:    vmovd %eax, %xmm0
2337; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2338; AVX1-NEXT:    retq
2339;
2340; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2341; AVX2:       # BB#0:
2342; AVX2-NEXT:    movswl (%rdi), %eax
2343; AVX2-NEXT:    shrl $16, %eax
2344; AVX2-NEXT:    vmovd %eax, %xmm0
2345; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2346; AVX2-NEXT:    retq
2347  %tmp = load i16, i16* %ptr, align 2
2348  %tmp1 = sext i16 %tmp to i32
2349  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2350  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2351  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2352  ret <8 x i16> %tmp4
2353}
2354
2355define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
2356; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2357; SSE2:       # BB#0:
2358; SSE2-NEXT:    movswl (%rdi), %eax
2359; SSE2-NEXT:    movd %eax, %xmm0
2360; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
2361; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2362; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
2363; SSE2-NEXT:    retq
2364;
2365; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2366; SSSE3:       # BB#0:
2367; SSSE3-NEXT:    movswl (%rdi), %eax
2368; SSSE3-NEXT:    movd %eax, %xmm0
2369; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2370; SSSE3-NEXT:    retq
2371;
2372; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2373; SSE41:       # BB#0:
2374; SSE41-NEXT:    movswl (%rdi), %eax
2375; SSE41-NEXT:    movd %eax, %xmm0
2376; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2377; SSE41-NEXT:    retq
2378;
2379; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2380; AVX1:       # BB#0:
2381; AVX1-NEXT:    movswl (%rdi), %eax
2382; AVX1-NEXT:    vmovd %eax, %xmm0
2383; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2384; AVX1-NEXT:    retq
2385;
2386; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2387; AVX2:       # BB#0:
2388; AVX2-NEXT:    movswl (%rdi), %eax
2389; AVX2-NEXT:    shrl $16, %eax
2390; AVX2-NEXT:    vmovd %eax, %xmm0
2391; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2392; AVX2-NEXT:    retq
2393  %tmp = load i16, i16* %ptr, align 2
2394  %tmp1 = sext i16 %tmp to i32
2395  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
2396  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2397  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2398  ret <8 x i16> %tmp4
2399}
2400