• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST
10
11define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
12; SSE-LABEL: shuffle_v8i16_01012323:
13; SSE:       # %bb.0:
14; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: shuffle_v8i16_01012323:
18; AVX:       # %bb.0:
19; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
20; AVX-NEXT:    retq
21  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
22  ret <8 x i16> %shuffle
23}
24define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
25; SSE-LABEL: shuffle_v8i16_67452301:
26; SSE:       # %bb.0:
27; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
28; SSE-NEXT:    retq
29;
30; AVX-LABEL: shuffle_v8i16_67452301:
31; AVX:       # %bb.0:
32; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
33; AVX-NEXT:    retq
34  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
35  ret <8 x i16> %shuffle
36}
37define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
38; SSE2-LABEL: shuffle_v8i16_456789AB:
39; SSE2:       # %bb.0:
40; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
41; SSE2-NEXT:    retq
42;
43; SSSE3-LABEL: shuffle_v8i16_456789AB:
44; SSSE3:       # %bb.0:
45; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
46; SSSE3-NEXT:    movdqa %xmm1, %xmm0
47; SSSE3-NEXT:    retq
48;
49; SSE41-LABEL: shuffle_v8i16_456789AB:
50; SSE41:       # %bb.0:
51; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
52; SSE41-NEXT:    movdqa %xmm1, %xmm0
53; SSE41-NEXT:    retq
54;
55; AVX-LABEL: shuffle_v8i16_456789AB:
56; AVX:       # %bb.0:
57; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
58; AVX-NEXT:    retq
59  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
60  ret <8 x i16> %shuffle
61}
62
63define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
64; SSE-LABEL: shuffle_v8i16_00000000:
65; SSE:       # %bb.0:
66; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
67; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
68; SSE-NEXT:    retq
69;
70; AVX1-LABEL: shuffle_v8i16_00000000:
71; AVX1:       # %bb.0:
72; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
73; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
74; AVX1-NEXT:    retq
75;
76; AVX2OR512VL-LABEL: shuffle_v8i16_00000000:
77; AVX2OR512VL:       # %bb.0:
78; AVX2OR512VL-NEXT:    vpbroadcastw %xmm0, %xmm0
79; AVX2OR512VL-NEXT:    retq
80  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
81  ret <8 x i16> %shuffle
82}
83define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
84; SSE-LABEL: shuffle_v8i16_00004444:
85; SSE:       # %bb.0:
86; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
87; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
88; SSE-NEXT:    retq
89;
90; AVX1-LABEL: shuffle_v8i16_00004444:
91; AVX1:       # %bb.0:
92; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
93; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
94; AVX1-NEXT:    retq
95;
96; AVX2-SLOW-LABEL: shuffle_v8i16_00004444:
97; AVX2-SLOW:       # %bb.0:
98; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
99; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
100; AVX2-SLOW-NEXT:    retq
101;
102; AVX2-FAST-LABEL: shuffle_v8i16_00004444:
103; AVX2-FAST:       # %bb.0:
104; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
105; AVX2-FAST-NEXT:    retq
106;
107; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444:
108; AVX512VL-SLOW:       # %bb.0:
109; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
111; AVX512VL-SLOW-NEXT:    retq
112;
113; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444:
114; AVX512VL-FAST:       # %bb.0:
115; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
116; AVX512VL-FAST-NEXT:    retq
117  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
118  ret <8 x i16> %shuffle
119}
120define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
121; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
122; SSE:       # %bb.0:
123; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
124; SSE-NEXT:    retq
125;
126; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
127; AVX:       # %bb.0:
128; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
129; AVX-NEXT:    retq
130  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
131  ret <8 x i16> %shuffle
132}
133define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
134; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
135; SSE:       # %bb.0:
136; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
137; SSE-NEXT:    retq
138;
139; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
140; AVX:       # %bb.0:
141; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
142; AVX-NEXT:    retq
143  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
144  ret <8 x i16> %shuffle
145}
146define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
147; SSE-LABEL: shuffle_v8i16_31206745:
148; SSE:       # %bb.0:
149; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
150; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
151; SSE-NEXT:    retq
152;
153; AVX1-LABEL: shuffle_v8i16_31206745:
154; AVX1:       # %bb.0:
155; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
156; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
157; AVX1-NEXT:    retq
158;
159; AVX2-SLOW-LABEL: shuffle_v8i16_31206745:
160; AVX2-SLOW:       # %bb.0:
161; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
162; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
163; AVX2-SLOW-NEXT:    retq
164;
165; AVX2-FAST-LABEL: shuffle_v8i16_31206745:
166; AVX2-FAST:       # %bb.0:
167; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
168; AVX2-FAST-NEXT:    retq
169;
170; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
171; AVX512VL-SLOW:       # %bb.0:
172; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
173; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
174; AVX512VL-SLOW-NEXT:    retq
175;
176; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
177; AVX512VL-FAST:       # %bb.0:
178; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
179; AVX512VL-FAST-NEXT:    retq
180  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
181  ret <8 x i16> %shuffle
182}
183define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
184; SSE2-LABEL: shuffle_v8i16_44440000:
185; SSE2:       # %bb.0:
186; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
187; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
188; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
189; SSE2-NEXT:    retq
190;
191; SSSE3-LABEL: shuffle_v8i16_44440000:
192; SSSE3:       # %bb.0:
193; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
194; SSSE3-NEXT:    retq
195;
196; SSE41-LABEL: shuffle_v8i16_44440000:
197; SSE41:       # %bb.0:
198; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
199; SSE41-NEXT:    retq
200;
201; AVX-LABEL: shuffle_v8i16_44440000:
202; AVX:       # %bb.0:
203; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
204; AVX-NEXT:    retq
205  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
206  ret <8 x i16> %shuffle
207}
208define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
209; SSE-LABEL: shuffle_v8i16_23016745:
210; SSE:       # %bb.0:
211; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
212; SSE-NEXT:    retq
213;
214; AVX-LABEL: shuffle_v8i16_23016745:
215; AVX:       # %bb.0:
216; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2]
217; AVX-NEXT:    retq
218  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
219  ret <8 x i16> %shuffle
220}
221define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
222; SSE-LABEL: shuffle_v8i16_23026745:
223; SSE:       # %bb.0:
224; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
225; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
226; SSE-NEXT:    retq
227;
228; AVX1-LABEL: shuffle_v8i16_23026745:
229; AVX1:       # %bb.0:
230; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
231; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
232; AVX1-NEXT:    retq
233;
234; AVX2-SLOW-LABEL: shuffle_v8i16_23026745:
235; AVX2-SLOW:       # %bb.0:
236; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
237; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
238; AVX2-SLOW-NEXT:    retq
239;
240; AVX2-FAST-LABEL: shuffle_v8i16_23026745:
241; AVX2-FAST:       # %bb.0:
242; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
243; AVX2-FAST-NEXT:    retq
244;
245; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
246; AVX512VL-SLOW:       # %bb.0:
247; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
248; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
249; AVX512VL-SLOW-NEXT:    retq
250;
251; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
252; AVX512VL-FAST:       # %bb.0:
253; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
254; AVX512VL-FAST-NEXT:    retq
255  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
256  ret <8 x i16> %shuffle
257}
258define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
259; SSE-LABEL: shuffle_v8i16_23016747:
260; SSE:       # %bb.0:
261; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
262; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
263; SSE-NEXT:    retq
264;
265; AVX1-LABEL: shuffle_v8i16_23016747:
266; AVX1:       # %bb.0:
267; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
268; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
269; AVX1-NEXT:    retq
270;
271; AVX2-SLOW-LABEL: shuffle_v8i16_23016747:
272; AVX2-SLOW:       # %bb.0:
273; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
274; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
275; AVX2-SLOW-NEXT:    retq
276;
277; AVX2-FAST-LABEL: shuffle_v8i16_23016747:
278; AVX2-FAST:       # %bb.0:
279; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
280; AVX2-FAST-NEXT:    retq
281;
282; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747:
283; AVX512VL-SLOW:       # %bb.0:
284; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
285; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
286; AVX512VL-SLOW-NEXT:    retq
287;
288; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747:
289; AVX512VL-FAST:       # %bb.0:
290; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
291; AVX512VL-FAST-NEXT:    retq
292  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
293  ret <8 x i16> %shuffle
294}
295define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
296; SSE2-LABEL: shuffle_v8i16_75643120:
297; SSE2:       # %bb.0:
298; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
299; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
300; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
301; SSE2-NEXT:    retq
302;
303; SSSE3-LABEL: shuffle_v8i16_75643120:
304; SSSE3:       # %bb.0:
305; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
306; SSSE3-NEXT:    retq
307;
308; SSE41-LABEL: shuffle_v8i16_75643120:
309; SSE41:       # %bb.0:
310; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
311; SSE41-NEXT:    retq
312;
313; AVX-LABEL: shuffle_v8i16_75643120:
314; AVX:       # %bb.0:
315; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
316; AVX-NEXT:    retq
317  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
318  ret <8 x i16> %shuffle
319}
320
321define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
322; SSE2-LABEL: shuffle_v8i16_10545410:
323; SSE2:       # %bb.0:
324; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
325; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
326; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
327; SSE2-NEXT:    retq
328;
329; SSSE3-LABEL: shuffle_v8i16_10545410:
330; SSSE3:       # %bb.0:
331; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
332; SSSE3-NEXT:    retq
333;
334; SSE41-LABEL: shuffle_v8i16_10545410:
335; SSE41:       # %bb.0:
336; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
337; SSE41-NEXT:    retq
338;
339; AVX-LABEL: shuffle_v8i16_10545410:
340; AVX:       # %bb.0:
341; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
342; AVX-NEXT:    retq
343  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
344  ret <8 x i16> %shuffle
345}
346define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
347; SSE2-LABEL: shuffle_v8i16_54105410:
348; SSE2:       # %bb.0:
349; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
350; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
351; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
352; SSE2-NEXT:    retq
353;
354; SSSE3-LABEL: shuffle_v8i16_54105410:
355; SSSE3:       # %bb.0:
356; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
357; SSSE3-NEXT:    retq
358;
359; SSE41-LABEL: shuffle_v8i16_54105410:
360; SSE41:       # %bb.0:
361; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
362; SSE41-NEXT:    retq
363;
364; AVX-LABEL: shuffle_v8i16_54105410:
365; AVX:       # %bb.0:
366; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
367; AVX-NEXT:    retq
368  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
369  ret <8 x i16> %shuffle
370}
371define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
372; SSE2-LABEL: shuffle_v8i16_54101054:
373; SSE2:       # %bb.0:
374; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
375; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
376; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
377; SSE2-NEXT:    retq
378;
379; SSSE3-LABEL: shuffle_v8i16_54101054:
380; SSSE3:       # %bb.0:
381; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
382; SSSE3-NEXT:    retq
383;
384; SSE41-LABEL: shuffle_v8i16_54101054:
385; SSE41:       # %bb.0:
386; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
387; SSE41-NEXT:    retq
388;
389; AVX-LABEL: shuffle_v8i16_54101054:
390; AVX:       # %bb.0:
391; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
392; AVX-NEXT:    retq
393  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
394  ret <8 x i16> %shuffle
395}
396define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
397; SSE2-LABEL: shuffle_v8i16_04400440:
398; SSE2:       # %bb.0:
399; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
400; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
401; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
402; SSE2-NEXT:    retq
403;
404; SSSE3-LABEL: shuffle_v8i16_04400440:
405; SSSE3:       # %bb.0:
406; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
407; SSSE3-NEXT:    retq
408;
409; SSE41-LABEL: shuffle_v8i16_04400440:
410; SSE41:       # %bb.0:
411; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
412; SSE41-NEXT:    retq
413;
414; AVX-LABEL: shuffle_v8i16_04400440:
415; AVX:       # %bb.0:
416; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
417; AVX-NEXT:    retq
418  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
419  ret <8 x i16> %shuffle
420}
421define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
422; SSE2-LABEL: shuffle_v8i16_40044004:
423; SSE2:       # %bb.0:
424; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
425; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
426; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
427; SSE2-NEXT:    retq
428;
429; SSSE3-LABEL: shuffle_v8i16_40044004:
430; SSSE3:       # %bb.0:
431; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
432; SSSE3-NEXT:    retq
433;
434; SSE41-LABEL: shuffle_v8i16_40044004:
435; SSE41:       # %bb.0:
436; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
437; SSE41-NEXT:    retq
438;
439; AVX-LABEL: shuffle_v8i16_40044004:
440; AVX:       # %bb.0:
441; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
442; AVX-NEXT:    retq
443  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
444  ret <8 x i16> %shuffle
445}
446
447define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
448; SSE2-LABEL: shuffle_v8i16_26405173:
449; SSE2:       # %bb.0:
450; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
451; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
452; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
453; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
454; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
455; SSE2-NEXT:    retq
456;
457; SSSE3-LABEL: shuffle_v8i16_26405173:
458; SSSE3:       # %bb.0:
459; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
460; SSSE3-NEXT:    retq
461;
462; SSE41-LABEL: shuffle_v8i16_26405173:
463; SSE41:       # %bb.0:
464; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
465; SSE41-NEXT:    retq
466;
467; AVX-LABEL: shuffle_v8i16_26405173:
468; AVX:       # %bb.0:
469; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
470; AVX-NEXT:    retq
471  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
472  ret <8 x i16> %shuffle
473}
474define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
475; SSE2-LABEL: shuffle_v8i16_20645173:
476; SSE2:       # %bb.0:
477; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
478; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
479; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
480; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
481; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
482; SSE2-NEXT:    retq
483;
484; SSSE3-LABEL: shuffle_v8i16_20645173:
485; SSSE3:       # %bb.0:
486; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
487; SSSE3-NEXT:    retq
488;
489; SSE41-LABEL: shuffle_v8i16_20645173:
490; SSE41:       # %bb.0:
491; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
492; SSE41-NEXT:    retq
493;
494; AVX-LABEL: shuffle_v8i16_20645173:
495; AVX:       # %bb.0:
496; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
497; AVX-NEXT:    retq
498  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
499  ret <8 x i16> %shuffle
500}
501define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
502; SSE2-LABEL: shuffle_v8i16_26401375:
503; SSE2:       # %bb.0:
504; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
505; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
506; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
507; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
508; SSE2-NEXT:    retq
509;
510; SSSE3-LABEL: shuffle_v8i16_26401375:
511; SSSE3:       # %bb.0:
512; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
513; SSSE3-NEXT:    retq
514;
515; SSE41-LABEL: shuffle_v8i16_26401375:
516; SSE41:       # %bb.0:
517; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
518; SSE41-NEXT:    retq
519;
520; AVX-LABEL: shuffle_v8i16_26401375:
521; AVX:       # %bb.0:
522; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
523; AVX-NEXT:    retq
524  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
525  ret <8 x i16> %shuffle
526}
527
528define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
529; SSE2-LABEL: shuffle_v8i16_66751643:
530; SSE2:       # %bb.0:
531; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
532; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
533; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
534; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
535; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
536; SSE2-NEXT:    retq
537;
538; SSSE3-LABEL: shuffle_v8i16_66751643:
539; SSSE3:       # %bb.0:
540; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
541; SSSE3-NEXT:    retq
542;
543; SSE41-LABEL: shuffle_v8i16_66751643:
544; SSE41:       # %bb.0:
545; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
546; SSE41-NEXT:    retq
547;
548; AVX-LABEL: shuffle_v8i16_66751643:
549; AVX:       # %bb.0:
550; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
551; AVX-NEXT:    retq
552  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
553  ret <8 x i16> %shuffle
554}
555
556define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
557; SSE2-LABEL: shuffle_v8i16_60514754:
558; SSE2:       # %bb.0:
559; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
560; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
561; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
562; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
563; SSE2-NEXT:    retq
564;
565; SSSE3-LABEL: shuffle_v8i16_60514754:
566; SSSE3:       # %bb.0:
567; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
568; SSSE3-NEXT:    retq
569;
570; SSE41-LABEL: shuffle_v8i16_60514754:
571; SSE41:       # %bb.0:
572; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
573; SSE41-NEXT:    retq
574;
575; AVX-LABEL: shuffle_v8i16_60514754:
576; AVX:       # %bb.0:
577; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
578; AVX-NEXT:    retq
579  %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
580  ret <8 x i16> %shuffle
581}
582
583define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
584; SSE2-LABEL: shuffle_v8i16_00444444:
585; SSE2:       # %bb.0:
586; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
587; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
588; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
589; SSE2-NEXT:    retq
590;
591; SSSE3-LABEL: shuffle_v8i16_00444444:
592; SSSE3:       # %bb.0:
593; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
594; SSSE3-NEXT:    retq
595;
596; SSE41-LABEL: shuffle_v8i16_00444444:
597; SSE41:       # %bb.0:
598; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
599; SSE41-NEXT:    retq
600;
601; AVX-LABEL: shuffle_v8i16_00444444:
602; AVX:       # %bb.0:
603; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
604; AVX-NEXT:    retq
605  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
606  ret <8 x i16> %shuffle
607}
608define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
609; SSE2-LABEL: shuffle_v8i16_44004444:
610; SSE2:       # %bb.0:
611; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
612; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
613; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
614; SSE2-NEXT:    retq
615;
616; SSSE3-LABEL: shuffle_v8i16_44004444:
617; SSSE3:       # %bb.0:
618; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
619; SSSE3-NEXT:    retq
620;
621; SSE41-LABEL: shuffle_v8i16_44004444:
622; SSE41:       # %bb.0:
623; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
624; SSE41-NEXT:    retq
625;
626; AVX-LABEL: shuffle_v8i16_44004444:
627; AVX:       # %bb.0:
628; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
629; AVX-NEXT:    retq
630  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
631  ret <8 x i16> %shuffle
632}
633define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
634; SSE2-LABEL: shuffle_v8i16_04404444:
635; SSE2:       # %bb.0:
636; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
637; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
638; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
639; SSE2-NEXT:    retq
640;
641; SSSE3-LABEL: shuffle_v8i16_04404444:
642; SSSE3:       # %bb.0:
643; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
644; SSSE3-NEXT:    retq
645;
646; SSE41-LABEL: shuffle_v8i16_04404444:
647; SSE41:       # %bb.0:
648; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
649; SSE41-NEXT:    retq
650;
651; AVX-LABEL: shuffle_v8i16_04404444:
652; AVX:       # %bb.0:
653; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
654; AVX-NEXT:    retq
655  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
656  ret <8 x i16> %shuffle
657}
658define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
659; SSE2-LABEL: shuffle_v8i16_04400000:
660; SSE2:       # %bb.0:
661; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
662; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
663; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
664; SSE2-NEXT:    retq
665;
666; SSSE3-LABEL: shuffle_v8i16_04400000:
667; SSSE3:       # %bb.0:
668; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
669; SSSE3-NEXT:    retq
670;
671; SSE41-LABEL: shuffle_v8i16_04400000:
672; SSE41:       # %bb.0:
673; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
674; SSE41-NEXT:    retq
675;
676; AVX-LABEL: shuffle_v8i16_04400000:
677; AVX:       # %bb.0:
678; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
679; AVX-NEXT:    retq
680  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
681  ret <8 x i16> %shuffle
682}
683define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
684; SSE-LABEL: shuffle_v8i16_04404567:
685; SSE:       # %bb.0:
686; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
687; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
688; SSE-NEXT:    retq
689;
690; AVX1-LABEL: shuffle_v8i16_04404567:
691; AVX1:       # %bb.0:
692; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
693; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
694; AVX1-NEXT:    retq
695;
696; AVX2-SLOW-LABEL: shuffle_v8i16_04404567:
697; AVX2-SLOW:       # %bb.0:
698; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
699; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
700; AVX2-SLOW-NEXT:    retq
701;
702; AVX2-FAST-LABEL: shuffle_v8i16_04404567:
703; AVX2-FAST:       # %bb.0:
704; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
705; AVX2-FAST-NEXT:    retq
706;
707; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567:
708; AVX512VL-SLOW:       # %bb.0:
709; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
710; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
711; AVX512VL-SLOW-NEXT:    retq
712;
713; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567:
714; AVX512VL-FAST:       # %bb.0:
715; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
716; AVX512VL-FAST-NEXT:    retq
717  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
718  ret <8 x i16> %shuffle
719}
720
721define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
722; SSE2-LABEL: shuffle_v8i16_0X444444:
723; SSE2:       # %bb.0:
724; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
725; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
726; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
727; SSE2-NEXT:    retq
728;
729; SSSE3-LABEL: shuffle_v8i16_0X444444:
730; SSSE3:       # %bb.0:
731; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
732; SSSE3-NEXT:    retq
733;
734; SSE41-LABEL: shuffle_v8i16_0X444444:
735; SSE41:       # %bb.0:
736; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
737; SSE41-NEXT:    retq
738;
739; AVX-LABEL: shuffle_v8i16_0X444444:
740; AVX:       # %bb.0:
741; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
742; AVX-NEXT:    retq
743  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
744  ret <8 x i16> %shuffle
745}
746define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
747; SSE2-LABEL: shuffle_v8i16_44X04444:
748; SSE2:       # %bb.0:
749; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
750; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
751; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
752; SSE2-NEXT:    retq
753;
754; SSSE3-LABEL: shuffle_v8i16_44X04444:
755; SSSE3:       # %bb.0:
756; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
757; SSSE3-NEXT:    retq
758;
759; SSE41-LABEL: shuffle_v8i16_44X04444:
760; SSE41:       # %bb.0:
761; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
762; SSE41-NEXT:    retq
763;
764; AVX-LABEL: shuffle_v8i16_44X04444:
765; AVX:       # %bb.0:
766; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
767; AVX-NEXT:    retq
768  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
769  ret <8 x i16> %shuffle
770}
771define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
772; SSE2-LABEL: shuffle_v8i16_X4404444:
773; SSE2:       # %bb.0:
774; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
775; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
776; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
777; SSE2-NEXT:    retq
778;
779; SSSE3-LABEL: shuffle_v8i16_X4404444:
780; SSSE3:       # %bb.0:
781; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
782; SSSE3-NEXT:    retq
783;
784; SSE41-LABEL: shuffle_v8i16_X4404444:
785; SSE41:       # %bb.0:
786; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
787; SSE41-NEXT:    retq
788;
789; AVX-LABEL: shuffle_v8i16_X4404444:
790; AVX:       # %bb.0:
791; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
792; AVX-NEXT:    retq
793  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
794  ret <8 x i16> %shuffle
795}
796
797define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
798; SSE2-LABEL: shuffle_v8i16_0127XXXX:
799; SSE2:       # %bb.0:
800; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
801; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
802; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
803; SSE2-NEXT:    retq
804;
805; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
806; SSSE3:       # %bb.0:
807; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
808; SSSE3-NEXT:    retq
809;
810; SSE41-LABEL: shuffle_v8i16_0127XXXX:
811; SSE41:       # %bb.0:
812; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
813; SSE41-NEXT:    retq
814;
815; AVX-LABEL: shuffle_v8i16_0127XXXX:
816; AVX:       # %bb.0:
817; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
818; AVX-NEXT:    retq
819  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
820  ret <8 x i16> %shuffle
821}
822
823define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
824; SSE2-LABEL: shuffle_v8i16_XXXX4563:
825; SSE2:       # %bb.0:
826; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
827; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
828; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
829; SSE2-NEXT:    retq
830;
831; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
832; SSSE3:       # %bb.0:
833; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
834; SSSE3-NEXT:    retq
835;
836; SSE41-LABEL: shuffle_v8i16_XXXX4563:
837; SSE41:       # %bb.0:
838; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
839; SSE41-NEXT:    retq
840;
841; AVX-LABEL: shuffle_v8i16_XXXX4563:
842; AVX:       # %bb.0:
843; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
844; AVX-NEXT:    retq
845  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
846  ret <8 x i16> %shuffle
847}
848
849define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
850; SSE2-LABEL: shuffle_v8i16_4563XXXX:
851; SSE2:       # %bb.0:
852; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
853; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
854; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
855; SSE2-NEXT:    retq
856;
857; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
858; SSSE3:       # %bb.0:
859; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
860; SSSE3-NEXT:    retq
861;
862; SSE41-LABEL: shuffle_v8i16_4563XXXX:
863; SSE41:       # %bb.0:
864; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
865; SSE41-NEXT:    retq
866;
867; AVX-LABEL: shuffle_v8i16_4563XXXX:
868; AVX:       # %bb.0:
869; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
870; AVX-NEXT:    retq
871  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
872  ret <8 x i16> %shuffle
873}
874
875define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
876; SSE2-LABEL: shuffle_v8i16_01274563:
877; SSE2:       # %bb.0:
878; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
879; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
880; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
881; SSE2-NEXT:    retq
882;
883; SSSE3-LABEL: shuffle_v8i16_01274563:
884; SSSE3:       # %bb.0:
885; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
886; SSSE3-NEXT:    retq
887;
888; SSE41-LABEL: shuffle_v8i16_01274563:
889; SSE41:       # %bb.0:
890; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
891; SSE41-NEXT:    retq
892;
893; AVX-LABEL: shuffle_v8i16_01274563:
894; AVX:       # %bb.0:
895; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
896; AVX-NEXT:    retq
897  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
898  ret <8 x i16> %shuffle
899}
900
901define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
902; SSE2-LABEL: shuffle_v8i16_45630127:
903; SSE2:       # %bb.0:
904; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
905; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
906; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
907; SSE2-NEXT:    retq
908;
909; SSSE3-LABEL: shuffle_v8i16_45630127:
910; SSSE3:       # %bb.0:
911; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
912; SSSE3-NEXT:    retq
913;
914; SSE41-LABEL: shuffle_v8i16_45630127:
915; SSE41:       # %bb.0:
916; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
917; SSE41-NEXT:    retq
918;
919; AVX-LABEL: shuffle_v8i16_45630127:
920; AVX:       # %bb.0:
921; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
922; AVX-NEXT:    retq
923  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
924  ret <8 x i16> %shuffle
925}
926
927define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
928; SSE2-LABEL: shuffle_v8i16_37102735:
929; SSE2:       # %bb.0:
930; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
931; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
932; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
933; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
934; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
935; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
936; SSE2-NEXT:    retq
937;
938; SSSE3-LABEL: shuffle_v8i16_37102735:
939; SSSE3:       # %bb.0:
940; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
941; SSSE3-NEXT:    retq
942;
943; SSE41-LABEL: shuffle_v8i16_37102735:
944; SSE41:       # %bb.0:
945; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
946; SSE41-NEXT:    retq
947;
948; AVX-LABEL: shuffle_v8i16_37102735:
949; AVX:       # %bb.0:
950; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
951; AVX-NEXT:    retq
952  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
953  ret <8 x i16> %shuffle
954}
955
956define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
957; SSE-LABEL: shuffle_v8i16_08192a3b:
958; SSE:       # %bb.0:
959; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
960; SSE-NEXT:    retq
961;
962; AVX-LABEL: shuffle_v8i16_08192a3b:
963; AVX:       # %bb.0:
964; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
965; AVX-NEXT:    retq
966  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
967  ret <8 x i16> %shuffle
968}
969
970define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
971; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
972; SSE:       # %bb.0:
973; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
974; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
975; SSE-NEXT:    retq
976;
977; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
978; AVX:       # %bb.0:
979; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
980; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
981; AVX-NEXT:    retq
982  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
983  ret <8 x i16> %shuffle
984}
985
986define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
987; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
988; SSE:       # %bb.0:
989; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
990; SSE-NEXT:    retq
991;
992; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
993; AVX:       # %bb.0:
994; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
995; AVX-NEXT:    retq
996  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
997  ret <8 x i16> %shuffle
998}
999
1000define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
1001; SSE-LABEL: shuffle_v8i16_48596a7b:
1002; SSE:       # %bb.0:
1003; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1004; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1005; SSE-NEXT:    retq
1006;
1007; AVX-LABEL: shuffle_v8i16_48596a7b:
1008; AVX:       # %bb.0:
1009; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1010; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1011; AVX-NEXT:    retq
1012  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
1013  ret <8 x i16> %shuffle
1014}
1015
1016define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
1017; SSE-LABEL: shuffle_v8i16_08196e7f:
1018; SSE:       # %bb.0:
1019; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1020; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1021; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1022; SSE-NEXT:    retq
1023;
1024; AVX-LABEL: shuffle_v8i16_08196e7f:
1025; AVX:       # %bb.0:
1026; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1027; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1028; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1029; AVX-NEXT:    retq
1030  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
1031  ret <8 x i16> %shuffle
1032}
1033
1034define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
1035; SSE-LABEL: shuffle_v8i16_0c1d6879:
1036; SSE:       # %bb.0:
1037; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1038; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1039; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1040; SSE-NEXT:    retq
1041;
1042; AVX-LABEL: shuffle_v8i16_0c1d6879:
1043; AVX:       # %bb.0:
1044; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1045; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1046; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1047; AVX-NEXT:    retq
1048  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
1049  ret <8 x i16> %shuffle
1050}
1051
1052define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
1053; SSE-LABEL: shuffle_v8i16_109832ba:
1054; SSE:       # %bb.0:
1055; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1056; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1057; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1058; SSE-NEXT:    retq
1059;
1060; AVX1-LABEL: shuffle_v8i16_109832ba:
1061; AVX1:       # %bb.0:
1062; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1063; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1064; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1065; AVX1-NEXT:    retq
1066;
1067; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba:
1068; AVX2-SLOW:       # %bb.0:
1069; AVX2-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1070; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1071; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1072; AVX2-SLOW-NEXT:    retq
1073;
1074; AVX2-FAST-LABEL: shuffle_v8i16_109832ba:
1075; AVX2-FAST:       # %bb.0:
1076; AVX2-FAST-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1077; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1078; AVX2-FAST-NEXT:    retq
1079;
1080; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba:
1081; AVX512VL-SLOW:       # %bb.0:
1082; AVX512VL-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1083; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1084; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1085; AVX512VL-SLOW-NEXT:    retq
1086;
1087; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba:
1088; AVX512VL-FAST:       # %bb.0:
1089; AVX512VL-FAST-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1090; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1091; AVX512VL-FAST-NEXT:    retq
1092  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
1093  ret <8 x i16> %shuffle
1094}
1095
1096define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
1097; SSE-LABEL: shuffle_v8i16_8091a2b3:
1098; SSE:       # %bb.0:
1099; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1100; SSE-NEXT:    movdqa %xmm1, %xmm0
1101; SSE-NEXT:    retq
1102;
1103; AVX-LABEL: shuffle_v8i16_8091a2b3:
1104; AVX:       # %bb.0:
1105; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1106; AVX-NEXT:    retq
1107  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
1108  ret <8 x i16> %shuffle
1109}
1110define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
1111; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
1112; SSE:       # %bb.0:
1113; SSE-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1114; SSE-NEXT:    movdqa %xmm1, %xmm0
1115; SSE-NEXT:    retq
1116;
1117; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
1118; AVX:       # %bb.0:
1119; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1120; AVX-NEXT:    retq
1121  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
1122  ret <8 x i16> %shuffle
1123}
1124
1125define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
1126; SSE2-LABEL: shuffle_v8i16_0213cedf:
1127; SSE2:       # %bb.0:
1128; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1129; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1130; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1131; SSE2-NEXT:    retq
1132;
1133; SSSE3-LABEL: shuffle_v8i16_0213cedf:
1134; SSSE3:       # %bb.0:
1135; SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
1136; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
1137; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1138; SSSE3-NEXT:    retq
1139;
1140; SSE41-LABEL: shuffle_v8i16_0213cedf:
1141; SSE41:       # %bb.0:
1142; SSE41-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1143; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1144; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1145; SSE41-NEXT:    retq
1146;
1147; AVX1-LABEL: shuffle_v8i16_0213cedf:
1148; AVX1:       # %bb.0:
1149; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1150; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1151; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1152; AVX1-NEXT:    retq
1153;
1154; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
1155; AVX2-SLOW:       # %bb.0:
1156; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1157; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1158; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1159; AVX2-SLOW-NEXT:    retq
1160;
1161; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
1162; AVX2-FAST:       # %bb.0:
1163; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
1164; AVX2-FAST-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1165; AVX2-FAST-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1166; AVX2-FAST-NEXT:    retq
1167;
1168; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
1169; AVX512VL-SLOW:       # %bb.0:
1170; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1171; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
1172; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1173; AVX512VL-SLOW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1174; AVX512VL-SLOW-NEXT:    retq
1175;
1176; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
1177; AVX512VL-FAST:       # %bb.0:
1178; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
1179; AVX512VL-FAST-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1180; AVX512VL-FAST-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1181; AVX512VL-FAST-NEXT:    retq
1182  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1183  ret <8 x i16> %shuffle
1184}
1185
1186define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1187; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1188; SSE2:       # %bb.0:
1189; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1190; SSE2-NEXT:    pand %xmm2, %xmm0
1191; SSE2-NEXT:    pandn %xmm1, %xmm2
1192; SSE2-NEXT:    por %xmm0, %xmm2
1193; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1194; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1195; SSE2-NEXT:    retq
1196;
1197; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1198; SSSE3:       # %bb.0:
1199; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1200; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1201; SSSE3-NEXT:    por %xmm1, %xmm0
1202; SSSE3-NEXT:    retq
1203;
1204; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1205; SSE41:       # %bb.0:
1206; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1207; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1208; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1209; SSE41-NEXT:    retq
1210;
1211; AVX1-LABEL: shuffle_v8i16_443aXXXX:
1212; AVX1:       # %bb.0:
1213; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1214; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1215; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1216; AVX1-NEXT:    retq
1217;
1218; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1219; AVX2-SLOW:       # %bb.0:
1220; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1221; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1222; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1223; AVX2-SLOW-NEXT:    retq
1224;
1225; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX:
1226; AVX2-FAST:       # %bb.0:
1227; AVX2-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1228; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1229; AVX2-FAST-NEXT:    retq
1230;
1231; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1232; AVX512VL-SLOW:       # %bb.0:
1233; AVX512VL-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1234; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1235; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1236; AVX512VL-SLOW-NEXT:    retq
1237;
1238; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX:
1239; AVX512VL-FAST:       # %bb.0:
1240; AVX512VL-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1241; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1242; AVX512VL-FAST-NEXT:    retq
1243  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1244  ret <8 x i16> %shuffle
1245}
1246
1247define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1248; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1249; SSE2:       # %bb.0:
1250; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1251; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,0]
1252; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,6,5,6,7]
1253; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1254; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1255; SSE2-NEXT:    retq
1256;
1257; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1258; SSSE3:       # %bb.0:
1259; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1260; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1261; SSSE3-NEXT:    por %xmm1, %xmm0
1262; SSSE3-NEXT:    retq
1263;
1264; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1265; SSE41:       # %bb.0:
1266; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1267; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1268; SSE41-NEXT:    retq
1269;
1270; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1271; AVX1:       # %bb.0:
1272; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1273; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1274; AVX1-NEXT:    retq
1275;
1276; AVX2OR512VL-LABEL: shuffle_v8i16_032dXXXX:
1277; AVX2OR512VL:       # %bb.0:
1278; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1279; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1280; AVX2OR512VL-NEXT:    retq
1281  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1282  ret <8 x i16> %shuffle
1283}
1284define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1285; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1286; SSE:       # %bb.0:
1287; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1288; SSE-NEXT:    retq
1289;
1290; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1291; AVX:       # %bb.0:
1292; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3]
1293; AVX-NEXT:    retq
1294  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1295  ret <8 x i16> %shuffle
1296}
1297
1298define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1299; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1300; SSE2:       # %bb.0:
1301; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1302; SSE2-NEXT:    pand %xmm2, %xmm0
1303; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1304; SSE2-NEXT:    pandn %xmm1, %xmm2
1305; SSE2-NEXT:    por %xmm2, %xmm0
1306; SSE2-NEXT:    retq
1307;
1308; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1309; SSSE3:       # %bb.0:
1310; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1311; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1312; SSSE3-NEXT:    por %xmm1, %xmm0
1313; SSSE3-NEXT:    retq
1314;
1315; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1316; SSE41:       # %bb.0:
1317; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1318; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1319; SSE41-NEXT:    retq
1320;
1321; AVX-LABEL: shuffle_v8i16_012dXXXX:
1322; AVX:       # %bb.0:
1323; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1324; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1325; AVX-NEXT:    retq
1326  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1327  ret <8 x i16> %shuffle
1328}
1329
1330define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1331; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1332; SSE2:       # %bb.0:
1333; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1334; SSE2-NEXT:    pand %xmm2, %xmm1
1335; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1336; SSE2-NEXT:    pandn %xmm0, %xmm2
1337; SSE2-NEXT:    por %xmm1, %xmm2
1338; SSE2-NEXT:    movdqa %xmm2, %xmm0
1339; SSE2-NEXT:    retq
1340;
1341; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1342; SSSE3:       # %bb.0:
1343; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1344; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1345; SSSE3-NEXT:    por %xmm1, %xmm0
1346; SSSE3-NEXT:    retq
1347;
1348; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1349; SSE41:       # %bb.0:
1350; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1351; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1352; SSE41-NEXT:    retq
1353;
1354; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1355; AVX1:       # %bb.0:
1356; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1357; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1358; AVX1-NEXT:    retq
1359;
1360; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3:
1361; AVX2OR512VL:       # %bb.0:
1362; AVX2OR512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
1363; AVX2OR512VL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1364; AVX2OR512VL-NEXT:    retq
1365  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1366  ret <8 x i16> %shuffle
1367}
1368
1369define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1370; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1371; SSE2:       # %bb.0:
1372; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1373; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1374; SSE2-NEXT:    pand %xmm2, %xmm1
1375; SSE2-NEXT:    pandn %xmm0, %xmm2
1376; SSE2-NEXT:    por %xmm1, %xmm2
1377; SSE2-NEXT:    movdqa %xmm2, %xmm0
1378; SSE2-NEXT:    retq
1379;
1380; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1381; SSSE3:       # %bb.0:
1382; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1383; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1384; SSSE3-NEXT:    por %xmm1, %xmm0
1385; SSSE3-NEXT:    retq
1386;
1387; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1388; SSE41:       # %bb.0:
1389; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1390; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1391; SSE41-NEXT:    retq
1392;
1393; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1394; AVX:       # %bb.0:
1395; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1396; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1397; AVX-NEXT:    retq
1398  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1399  ret <8 x i16> %shuffle
1400}
1401
1402define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1403; SSE2-LABEL: shuffle_v8i16_012dcde3:
1404; SSE2:       # %bb.0:
1405; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1406; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3,2,1]
1407; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[3,1,2,0,4,5,6,7]
1408; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1409; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1410; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1411; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1412; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1413; SSE2-NEXT:    retq
1414;
1415; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1416; SSSE3:       # %bb.0:
1417; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1418; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1419; SSSE3-NEXT:    por %xmm1, %xmm0
1420; SSSE3-NEXT:    retq
1421;
1422; SSE41-LABEL: shuffle_v8i16_012dcde3:
1423; SSE41:       # %bb.0:
1424; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1425; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1426; SSE41-NEXT:    retq
1427;
1428; AVX1-LABEL: shuffle_v8i16_012dcde3:
1429; AVX1:       # %bb.0:
1430; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1431; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1432; AVX1-NEXT:    retq
1433;
1434; AVX2OR512VL-LABEL: shuffle_v8i16_012dcde3:
1435; AVX2OR512VL:       # %bb.0:
1436; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1437; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1438; AVX2OR512VL-NEXT:    retq
1439  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1440  ret <8 x i16> %shuffle
1441}
1442
1443define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1444; SSE2-LABEL: shuffle_v8i16_0923cde7:
1445; SSE2:       # %bb.0:
1446; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1447; SSE2-NEXT:    andps %xmm2, %xmm0
1448; SSE2-NEXT:    andnps %xmm1, %xmm2
1449; SSE2-NEXT:    orps %xmm2, %xmm0
1450; SSE2-NEXT:    retq
1451;
1452; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1453; SSSE3:       # %bb.0:
1454; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1455; SSSE3-NEXT:    andps %xmm2, %xmm0
1456; SSSE3-NEXT:    andnps %xmm1, %xmm2
1457; SSSE3-NEXT:    orps %xmm2, %xmm0
1458; SSSE3-NEXT:    retq
1459;
1460; SSE41-LABEL: shuffle_v8i16_0923cde7:
1461; SSE41:       # %bb.0:
1462; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1463; SSE41-NEXT:    retq
1464;
1465; AVX-LABEL: shuffle_v8i16_0923cde7:
1466; AVX:       # %bb.0:
1467; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1468; AVX-NEXT:    retq
1469  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1470  ret <8 x i16> %shuffle
1471}
1472
1473define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1474; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1475; SSE2:       # %bb.0:
1476; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1477; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1478; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1479; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1480; SSE2-NEXT:    pand %xmm1, %xmm0
1481; SSE2-NEXT:    pandn %xmm2, %xmm1
1482; SSE2-NEXT:    por %xmm0, %xmm1
1483; SSE2-NEXT:    movdqa %xmm1, %xmm0
1484; SSE2-NEXT:    retq
1485;
1486; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1487; SSSE3:       # %bb.0:
1488; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1489; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1490; SSSE3-NEXT:    por %xmm1, %xmm0
1491; SSSE3-NEXT:    retq
1492;
1493; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1494; SSE41:       # %bb.0:
1495; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1496; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1497; SSE41-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1498; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1499; SSE41-NEXT:    retq
1500;
1501; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1502; AVX1:       # %bb.0:
1503; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1504; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1505; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1506; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1507; AVX1-NEXT:    retq
1508;
1509; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1510; AVX2-SLOW:       # %bb.0:
1511; AVX2-SLOW-NEXT:    vpbroadcastd %xmm1, %xmm1
1512; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1513; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1514; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1515; AVX2-SLOW-NEXT:    retq
1516;
1517; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579:
1518; AVX2-FAST:       # %bb.0:
1519; AVX2-FAST-NEXT:    vpbroadcastd %xmm1, %xmm1
1520; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
1521; AVX2-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1522; AVX2-FAST-NEXT:    retq
1523;
1524; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1525; AVX512VL-SLOW:       # %bb.0:
1526; AVX512VL-SLOW-NEXT:    vpbroadcastd %xmm1, %xmm1
1527; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1528; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1529; AVX512VL-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1530; AVX512VL-SLOW-NEXT:    retq
1531;
1532; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579:
1533; AVX512VL-FAST:       # %bb.0:
1534; AVX512VL-FAST-NEXT:    vpbroadcastd %xmm1, %xmm1
1535; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
1536; AVX512VL-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1537; AVX512VL-FAST-NEXT:    retq
1538  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1539  ret <8 x i16> %shuffle
1540}
1541
1542define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1543; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1544; SSE2:       # %bb.0:
1545; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
1546; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1547; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1548; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,2],xmm1[2,3]
1549; SSE2-NEXT:    retq
1550;
1551; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1552; SSSE3:       # %bb.0:
1553; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1554; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1555; SSSE3-NEXT:    por %xmm1, %xmm0
1556; SSSE3-NEXT:    retq
1557;
1558; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1559; SSE41:       # %bb.0:
1560; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1561; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1562; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1563; SSE41-NEXT:    retq
1564;
1565; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1566; AVX1:       # %bb.0:
1567; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1568; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1569; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1570; AVX1-NEXT:    retq
1571;
1572; AVX2OR512VL-LABEL: shuffle_v8i16_XX4X8acX:
1573; AVX2OR512VL:       # %bb.0:
1574; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1575; AVX2OR512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1576; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1577; AVX2OR512VL-NEXT:    retq
1578  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1579  ret <8 x i16> %shuffle
1580}
1581
1582define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1583; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1584; SSE:       # %bb.0:
1585; SSE-NEXT:    movzwl %di, %eax
1586; SSE-NEXT:    movd %eax, %xmm0
1587; SSE-NEXT:    retq
1588;
1589; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1590; AVX:       # %bb.0:
1591; AVX-NEXT:    movzwl %di, %eax
1592; AVX-NEXT:    vmovd %eax, %xmm0
1593; AVX-NEXT:    retq
1594  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1595  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1596  ret <8 x i16> %shuffle
1597}
1598
1599define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1600; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1601; SSE:       # %bb.0:
1602; SSE-NEXT:    pxor %xmm0, %xmm0
1603; SSE-NEXT:    pinsrw $1, %edi, %xmm0
1604; SSE-NEXT:    retq
1605;
1606; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1607; AVX:       # %bb.0:
1608; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1609; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
1610; AVX-NEXT:    retq
1611  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1612  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1613  ret <8 x i16> %shuffle
1614}
1615
1616define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1617; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1618; SSE:       # %bb.0:
1619; SSE-NEXT:    pxor %xmm0, %xmm0
1620; SSE-NEXT:    pinsrw $5, %edi, %xmm0
1621; SSE-NEXT:    retq
1622;
1623; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1624; AVX:       # %bb.0:
1625; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1626; AVX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
1627; AVX-NEXT:    retq
1628  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1629  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1630  ret <8 x i16> %shuffle
1631}
1632
1633define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1634; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1635; SSE:       # %bb.0:
1636; SSE-NEXT:    pxor %xmm0, %xmm0
1637; SSE-NEXT:    pinsrw $7, %edi, %xmm0
1638; SSE-NEXT:    retq
1639;
1640; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1641; AVX:       # %bb.0:
1642; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1643; AVX-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0
1644; AVX-NEXT:    retq
1645  %a = insertelement <8 x i16> undef, i16 %i, i32 0
1646  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1647  ret <8 x i16> %shuffle
1648}
1649
1650define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1651; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1652; SSE:       # %bb.0:
1653; SSE-NEXT:    pxor %xmm0, %xmm0
1654; SSE-NEXT:    pinsrw $2, %edi, %xmm0
1655; SSE-NEXT:    retq
1656;
1657; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1658; AVX:       # %bb.0:
1659; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1660; AVX-NEXT:    vpinsrw $2, %edi, %xmm0, %xmm0
1661; AVX-NEXT:    retq
1662  %a = insertelement <8 x i16> undef, i16 %i, i32 3
1663  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1664  ret <8 x i16> %shuffle
1665}
1666
1667define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1668; SSE2-LABEL: shuffle_v8i16_def01234:
1669; SSE2:       # %bb.0:
1670; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1671; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1672; SSE2-NEXT:    por %xmm1, %xmm0
1673; SSE2-NEXT:    retq
1674;
1675; SSSE3-LABEL: shuffle_v8i16_def01234:
1676; SSSE3:       # %bb.0:
1677; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1678; SSSE3-NEXT:    retq
1679;
1680; SSE41-LABEL: shuffle_v8i16_def01234:
1681; SSE41:       # %bb.0:
1682; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1683; SSE41-NEXT:    retq
1684;
1685; AVX-LABEL: shuffle_v8i16_def01234:
1686; AVX:       # %bb.0:
1687; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1688; AVX-NEXT:    retq
1689  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1690  ret <8 x i16> %shuffle
1691}
1692
1693define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1694; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1695; SSE2:       # %bb.0:
1696; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1697; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1698; SSE2-NEXT:    por %xmm1, %xmm0
1699; SSE2-NEXT:    retq
1700;
1701; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1702; SSSE3:       # %bb.0:
1703; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1704; SSSE3-NEXT:    retq
1705;
1706; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1707; SSE41:       # %bb.0:
1708; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1709; SSE41-NEXT:    retq
1710;
1711; AVX-LABEL: shuffle_v8i16_ueuu123u:
1712; AVX:       # %bb.0:
1713; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1714; AVX-NEXT:    retq
1715  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1716  ret <8 x i16> %shuffle
1717}
1718
1719define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1720; SSE2-LABEL: shuffle_v8i16_56701234:
1721; SSE2:       # %bb.0:
1722; SSE2-NEXT:    movdqa %xmm0, %xmm1
1723; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1724; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1725; SSE2-NEXT:    por %xmm1, %xmm0
1726; SSE2-NEXT:    retq
1727;
1728; SSSE3-LABEL: shuffle_v8i16_56701234:
1729; SSSE3:       # %bb.0:
1730; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1731; SSSE3-NEXT:    retq
1732;
1733; SSE41-LABEL: shuffle_v8i16_56701234:
1734; SSE41:       # %bb.0:
1735; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1736; SSE41-NEXT:    retq
1737;
1738; AVX-LABEL: shuffle_v8i16_56701234:
1739; AVX:       # %bb.0:
1740; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1741; AVX-NEXT:    retq
1742  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1743  ret <8 x i16> %shuffle
1744}
1745
1746define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1747; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1748; SSE2:       # %bb.0:
1749; SSE2-NEXT:    movdqa %xmm0, %xmm1
1750; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1751; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1752; SSE2-NEXT:    por %xmm1, %xmm0
1753; SSE2-NEXT:    retq
1754;
1755; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1756; SSSE3:       # %bb.0:
1757; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1758; SSSE3-NEXT:    retq
1759;
1760; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1761; SSE41:       # %bb.0:
1762; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1763; SSE41-NEXT:    retq
1764;
1765; AVX-LABEL: shuffle_v8i16_u6uu123u:
1766; AVX:       # %bb.0:
1767; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1768; AVX-NEXT:    retq
1769  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1770  ret <8 x i16> %shuffle
1771}
1772
1773define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1774; SSE-LABEL: shuffle_v8i16_uuuu123u:
1775; SSE:       # %bb.0:
1776; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1777; SSE-NEXT:    retq
1778;
1779; AVX-LABEL: shuffle_v8i16_uuuu123u:
1780; AVX:       # %bb.0:
1781; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1782; AVX-NEXT:    retq
1783  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1784  ret <8 x i16> %shuffle
1785}
1786
1787define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1788; SSE2-LABEL: shuffle_v8i16_bcdef012:
1789; SSE2:       # %bb.0:
1790; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1791; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1792; SSE2-NEXT:    por %xmm1, %xmm0
1793; SSE2-NEXT:    retq
1794;
1795; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1796; SSSE3:       # %bb.0:
1797; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1798; SSSE3-NEXT:    retq
1799;
1800; SSE41-LABEL: shuffle_v8i16_bcdef012:
1801; SSE41:       # %bb.0:
1802; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1803; SSE41-NEXT:    retq
1804;
1805; AVX-LABEL: shuffle_v8i16_bcdef012:
1806; AVX:       # %bb.0:
1807; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1808; AVX-NEXT:    retq
1809  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1810  ret <8 x i16> %shuffle
1811}
1812
1813define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1814; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1815; SSE2:       # %bb.0:
1816; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1817; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1818; SSE2-NEXT:    por %xmm1, %xmm0
1819; SSE2-NEXT:    retq
1820;
1821; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1822; SSSE3:       # %bb.0:
1823; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1824; SSSE3-NEXT:    retq
1825;
1826; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1827; SSE41:       # %bb.0:
1828; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1829; SSE41-NEXT:    retq
1830;
1831; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1832; AVX:       # %bb.0:
1833; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1834; AVX-NEXT:    retq
1835  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1836  ret <8 x i16> %shuffle
1837}
1838
1839define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1840; SSE2-LABEL: shuffle_v8i16_34567012:
1841; SSE2:       # %bb.0:
1842; SSE2-NEXT:    movdqa %xmm0, %xmm1
1843; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1844; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1845; SSE2-NEXT:    por %xmm1, %xmm0
1846; SSE2-NEXT:    retq
1847;
1848; SSSE3-LABEL: shuffle_v8i16_34567012:
1849; SSSE3:       # %bb.0:
1850; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1851; SSSE3-NEXT:    retq
1852;
1853; SSE41-LABEL: shuffle_v8i16_34567012:
1854; SSE41:       # %bb.0:
1855; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1856; SSE41-NEXT:    retq
1857;
1858; AVX-LABEL: shuffle_v8i16_34567012:
1859; AVX:       # %bb.0:
1860; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1861; AVX-NEXT:    retq
1862  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1863  ret <8 x i16> %shuffle
1864}
1865
1866define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1867; SSE2-LABEL: shuffle_v8i16_u456uu1u:
1868; SSE2:       # %bb.0:
1869; SSE2-NEXT:    movdqa %xmm0, %xmm1
1870; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1871; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1872; SSE2-NEXT:    por %xmm1, %xmm0
1873; SSE2-NEXT:    retq
1874;
1875; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
1876; SSSE3:       # %bb.0:
1877; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1878; SSSE3-NEXT:    retq
1879;
1880; SSE41-LABEL: shuffle_v8i16_u456uu1u:
1881; SSE41:       # %bb.0:
1882; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1883; SSE41-NEXT:    retq
1884;
1885; AVX-LABEL: shuffle_v8i16_u456uu1u:
1886; AVX:       # %bb.0:
1887; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1888; AVX-NEXT:    retq
1889  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
1890  ret <8 x i16> %shuffle
1891}
1892
1893define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
1894; SSE-LABEL: shuffle_v8i16_u456uuuu:
1895; SSE:       # %bb.0:
1896; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1897; SSE-NEXT:    retq
1898;
1899; AVX-LABEL: shuffle_v8i16_u456uuuu:
1900; AVX:       # %bb.0:
1901; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1902; AVX-NEXT:    retq
1903  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1904  ret <8 x i16> %shuffle
1905}
1906
1907define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
1908; SSE2-LABEL: shuffle_v8i16_3456789a:
1909; SSE2:       # %bb.0:
1910; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1911; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1912; SSE2-NEXT:    por %xmm1, %xmm0
1913; SSE2-NEXT:    retq
1914;
1915; SSSE3-LABEL: shuffle_v8i16_3456789a:
1916; SSSE3:       # %bb.0:
1917; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1918; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1919; SSSE3-NEXT:    retq
1920;
1921; SSE41-LABEL: shuffle_v8i16_3456789a:
1922; SSE41:       # %bb.0:
1923; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1924; SSE41-NEXT:    movdqa %xmm1, %xmm0
1925; SSE41-NEXT:    retq
1926;
1927; AVX-LABEL: shuffle_v8i16_3456789a:
1928; AVX:       # %bb.0:
1929; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1930; AVX-NEXT:    retq
1931  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
1932  ret <8 x i16> %shuffle
1933}
1934
1935define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
1936; SSE2-LABEL: shuffle_v8i16_u456uu9u:
1937; SSE2:       # %bb.0:
1938; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1939; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1940; SSE2-NEXT:    por %xmm1, %xmm0
1941; SSE2-NEXT:    retq
1942;
1943; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
1944; SSSE3:       # %bb.0:
1945; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1946; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1947; SSSE3-NEXT:    retq
1948;
1949; SSE41-LABEL: shuffle_v8i16_u456uu9u:
1950; SSE41:       # %bb.0:
1951; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1952; SSE41-NEXT:    movdqa %xmm1, %xmm0
1953; SSE41-NEXT:    retq
1954;
1955; AVX-LABEL: shuffle_v8i16_u456uu9u:
1956; AVX:       # %bb.0:
1957; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1958; AVX-NEXT:    retq
1959  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
1960  ret <8 x i16> %shuffle
1961}
1962
1963define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
1964; SSE2-LABEL: shuffle_v8i16_56789abc:
1965; SSE2:       # %bb.0:
1966; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1967; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1968; SSE2-NEXT:    por %xmm1, %xmm0
1969; SSE2-NEXT:    retq
1970;
1971; SSSE3-LABEL: shuffle_v8i16_56789abc:
1972; SSSE3:       # %bb.0:
1973; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1974; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1975; SSSE3-NEXT:    retq
1976;
1977; SSE41-LABEL: shuffle_v8i16_56789abc:
1978; SSE41:       # %bb.0:
1979; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1980; SSE41-NEXT:    movdqa %xmm1, %xmm0
1981; SSE41-NEXT:    retq
1982;
1983; AVX-LABEL: shuffle_v8i16_56789abc:
1984; AVX:       # %bb.0:
1985; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1986; AVX-NEXT:    retq
1987  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1988  ret <8 x i16> %shuffle
1989}
1990
1991define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
1992; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
1993; SSE2:       # %bb.0:
1994; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1995; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1996; SSE2-NEXT:    por %xmm1, %xmm0
1997; SSE2-NEXT:    retq
1998;
1999; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
2000; SSSE3:       # %bb.0:
2001; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2002; SSSE3-NEXT:    movdqa %xmm1, %xmm0
2003; SSSE3-NEXT:    retq
2004;
2005; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
2006; SSE41:       # %bb.0:
2007; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2008; SSE41-NEXT:    movdqa %xmm1, %xmm0
2009; SSE41-NEXT:    retq
2010;
2011; AVX-LABEL: shuffle_v8i16_u6uu9abu:
2012; AVX:       # %bb.0:
2013; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2014; AVX-NEXT:    retq
2015  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
2016  ret <8 x i16> %shuffle
2017}
2018
2019define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
2020; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
2021; SSE2:       # %bb.0:
2022; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2023; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2024; SSE2-NEXT:    retq
2025;
2026; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
2027; SSSE3:       # %bb.0:
2028; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2029; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2030; SSSE3-NEXT:    retq
2031;
2032; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
2033; SSE41:       # %bb.0:
2034; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2035; SSE41-NEXT:    retq
2036;
2037; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
2038; AVX:       # %bb.0:
2039; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2040; AVX-NEXT:    retq
2041  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
2042  ret <8 x i16> %shuffle
2043}
2044
2045define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
2046; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
2047; SSE2:       # %bb.0:
2048; SSE2-NEXT:    pxor %xmm1, %xmm1
2049; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2050; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2051; SSE2-NEXT:    retq
2052;
2053; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
2054; SSSE3:       # %bb.0:
2055; SSSE3-NEXT:    pxor %xmm1, %xmm1
2056; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2057; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2058; SSSE3-NEXT:    retq
2059;
2060; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
2061; SSE41:       # %bb.0:
2062; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2063; SSE41-NEXT:    retq
2064;
2065; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
2066; AVX:       # %bb.0:
2067; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2068; AVX-NEXT:    retq
2069  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
2070  ret <8 x i16> %shuffle
2071}
2072
2073define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
2074; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
2075; SSE2:       # %bb.0:
2076; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2077; SSE2-NEXT:    retq
2078;
2079; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
2080; SSSE3:       # %bb.0:
2081; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2082; SSSE3-NEXT:    retq
2083;
2084; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
2085; SSE41:       # %bb.0:
2086; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2087; SSE41-NEXT:    retq
2088;
2089; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
2090; AVX:       # %bb.0:
2091; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2092; AVX-NEXT:    retq
2093  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
2094  ret <8 x i16> %shuffle
2095}
2096
2097define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
2098; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
2099; SSE2:       # %bb.0:
2100; SSE2-NEXT:    pxor %xmm1, %xmm1
2101; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2102; SSE2-NEXT:    retq
2103;
2104; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
2105; SSSE3:       # %bb.0:
2106; SSSE3-NEXT:    pxor %xmm1, %xmm1
2107; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2108; SSSE3-NEXT:    retq
2109;
2110; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
2111; SSE41:       # %bb.0:
2112; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2113; SSE41-NEXT:    retq
2114;
2115; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
2116; AVX:       # %bb.0:
2117; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2118; AVX-NEXT:    retq
2119  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
2120  ret <8 x i16> %shuffle
2121}
2122
2123define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) {
2124; SSE-LABEL: shuffle_v8i16_01100110:
2125; SSE:       # %bb.0:
2126; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2127; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2128; SSE-NEXT:    retq
2129;
2130; AVX1-LABEL: shuffle_v8i16_01100110:
2131; AVX1:       # %bb.0:
2132; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2133; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2134; AVX1-NEXT:    retq
2135;
2136; AVX2-SLOW-LABEL: shuffle_v8i16_01100110:
2137; AVX2-SLOW:       # %bb.0:
2138; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2139; AVX2-SLOW-NEXT:    vpbroadcastq %xmm0, %xmm0
2140; AVX2-SLOW-NEXT:    retq
2141;
2142; AVX2-FAST-LABEL: shuffle_v8i16_01100110:
2143; AVX2-FAST:       # %bb.0:
2144; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2145; AVX2-FAST-NEXT:    retq
2146;
2147; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110:
2148; AVX512VL-SLOW:       # %bb.0:
2149; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2150; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2151; AVX512VL-SLOW-NEXT:    retq
2152;
2153; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110:
2154; AVX512VL-FAST:       # %bb.0:
2155; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2156; AVX512VL-FAST-NEXT:    retq
2157  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0>
2158  ret <8 x i16> %shuffle
2159}
2160
2161define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) {
2162; SSE-LABEL: shuffle_v8i16_01u0u110:
2163; SSE:       # %bb.0:
2164; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2165; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2166; SSE-NEXT:    retq
2167;
2168; AVX1-LABEL: shuffle_v8i16_01u0u110:
2169; AVX1:       # %bb.0:
2170; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2171; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2172; AVX1-NEXT:    retq
2173;
2174; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110:
2175; AVX2-SLOW:       # %bb.0:
2176; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2177; AVX2-SLOW-NEXT:    vpbroadcastq %xmm0, %xmm0
2178; AVX2-SLOW-NEXT:    retq
2179;
2180; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110:
2181; AVX2-FAST:       # %bb.0:
2182; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2183; AVX2-FAST-NEXT:    retq
2184;
2185; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110:
2186; AVX512VL-SLOW:       # %bb.0:
2187; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2188; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2189; AVX512VL-SLOW-NEXT:    retq
2190;
2191; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110:
2192; AVX512VL-FAST:       # %bb.0:
2193; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2194; AVX512VL-FAST-NEXT:    retq
2195  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0>
2196  ret <8 x i16> %shuffle
2197}
2198
2199define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) {
2200; SSE-LABEL: shuffle_v8i16_467uu675:
2201; SSE:       # %bb.0:
2202; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2203; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2204; SSE-NEXT:    retq
2205;
2206; AVX1-LABEL: shuffle_v8i16_467uu675:
2207; AVX1:       # %bb.0:
2208; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2209; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2210; AVX1-NEXT:    retq
2211;
2212; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675:
2213; AVX2-SLOW:       # %bb.0:
2214; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2215; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2216; AVX2-SLOW-NEXT:    retq
2217;
2218; AVX2-FAST-LABEL: shuffle_v8i16_467uu675:
2219; AVX2-FAST:       # %bb.0:
2220; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2221; AVX2-FAST-NEXT:    retq
2222;
2223; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675:
2224; AVX512VL-SLOW:       # %bb.0:
2225; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2226; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2227; AVX512VL-SLOW-NEXT:    retq
2228;
2229; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675:
2230; AVX512VL-FAST:       # %bb.0:
2231; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
2232; AVX512VL-FAST-NEXT:    retq
2233  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5>
2234  ret <8 x i16> %shuffle
2235}
2236
2237;
2238; Shuffle to logical bit shifts
2239;
2240define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
2241; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
2242; SSE:       # %bb.0:
2243; SSE-NEXT:    pslld $16, %xmm0
2244; SSE-NEXT:    retq
2245;
2246; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
2247; AVX:       # %bb.0:
2248; AVX-NEXT:    vpslld $16, %xmm0, %xmm0
2249; AVX-NEXT:    retq
2250  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
2251  ret <8 x i16> %shuffle
2252}
2253
2254define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
2255; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
2256; SSE:       # %bb.0:
2257; SSE-NEXT:    psllq $48, %xmm0
2258; SSE-NEXT:    retq
2259;
2260; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
2261; AVX:       # %bb.0:
2262; AVX-NEXT:    vpsllq $48, %xmm0, %xmm0
2263; AVX-NEXT:    retq
2264  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
2265  ret <8 x i16> %shuffle
2266}
2267
2268define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
2269; SSE-LABEL: shuffle_v8i16_zz01zX4X:
2270; SSE:       # %bb.0:
2271; SSE-NEXT:    psllq $32, %xmm0
2272; SSE-NEXT:    retq
2273;
2274; AVX-LABEL: shuffle_v8i16_zz01zX4X:
2275; AVX:       # %bb.0:
2276; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
2277; AVX-NEXT:    retq
2278  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
2279  ret <8 x i16> %shuffle
2280}
2281
2282define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
2283; SSE-LABEL: shuffle_v8i16_z0X2z456:
2284; SSE:       # %bb.0:
2285; SSE-NEXT:    psllq $16, %xmm0
2286; SSE-NEXT:    retq
2287;
2288; AVX-LABEL: shuffle_v8i16_z0X2z456:
2289; AVX:       # %bb.0:
2290; AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
2291; AVX-NEXT:    retq
2292  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
2293  ret <8 x i16> %shuffle
2294}
2295
2296define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
2297; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
2298; SSE:       # %bb.0:
2299; SSE-NEXT:    psrld $16, %xmm0
2300; SSE-NEXT:    retq
2301;
2302; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
2303; AVX:       # %bb.0:
2304; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
2305; AVX-NEXT:    retq
2306  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
2307  ret <8 x i16> %shuffle
2308}
2309
2310define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
2311; SSE-LABEL: shuffle_v8i16_1X3z567z:
2312; SSE:       # %bb.0:
2313; SSE-NEXT:    psrlq $16, %xmm0
2314; SSE-NEXT:    retq
2315;
2316; AVX-LABEL: shuffle_v8i16_1X3z567z:
2317; AVX:       # %bb.0:
2318; AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
2319; AVX-NEXT:    retq
2320  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
2321  ret <8 x i16> %shuffle
2322}
2323
2324define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
2325; SSE-LABEL: shuffle_v8i16_23zz67zz:
2326; SSE:       # %bb.0:
2327; SSE-NEXT:    psrlq $32, %xmm0
2328; SSE-NEXT:    retq
2329;
2330; AVX-LABEL: shuffle_v8i16_23zz67zz:
2331; AVX:       # %bb.0:
2332; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
2333; AVX-NEXT:    retq
2334  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2335  ret <8 x i16> %shuffle
2336}
2337
2338define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2339; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2340; SSE:       # %bb.0:
2341; SSE-NEXT:    psrlq $48, %xmm0
2342; SSE-NEXT:    retq
2343;
2344; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2345; AVX:       # %bb.0:
2346; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
2347; AVX-NEXT:    retq
2348  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2349  ret <8 x i16> %shuffle
2350}
2351
2352define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2353; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2354; SSE:       # %bb.0:
2355; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
2356; SSE-NEXT:    retq
2357;
2358; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2359; AVX:       # %bb.0:
2360; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2361; AVX-NEXT:    retq
2362  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2363  ret <8 x i16> %shuffle
2364}
2365
2366define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2367; SSE2-LABEL: shuffle_v8i16_0z234567:
2368; SSE2:       # %bb.0:
2369; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2370; SSE2-NEXT:    retq
2371;
2372; SSSE3-LABEL: shuffle_v8i16_0z234567:
2373; SSSE3:       # %bb.0:
2374; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2375; SSSE3-NEXT:    retq
2376;
2377; SSE41-LABEL: shuffle_v8i16_0z234567:
2378; SSE41:       # %bb.0:
2379; SSE41-NEXT:    pxor %xmm1, %xmm1
2380; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2381; SSE41-NEXT:    retq
2382;
2383; AVX-LABEL: shuffle_v8i16_0z234567:
2384; AVX:       # %bb.0:
2385; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2386; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2387; AVX-NEXT:    retq
2388  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2389  ret <8 x i16> %shuffle
2390}
2391
2392define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2393; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2394; SSE2:       # %bb.0:
2395; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2396; SSE2-NEXT:    retq
2397;
2398; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2399; SSSE3:       # %bb.0:
2400; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2401; SSSE3-NEXT:    retq
2402;
2403; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2404; SSE41:       # %bb.0:
2405; SSE41-NEXT:    pxor %xmm1, %xmm1
2406; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2407; SSE41-NEXT:    retq
2408;
2409; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2410; AVX:       # %bb.0:
2411; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2412; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2413; AVX-NEXT:    retq
2414  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2415  ret <8 x i16> %shuffle
2416}
2417
2418define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2419; SSE2-LABEL: shuffle_v8i16_0123456z:
2420; SSE2:       # %bb.0:
2421; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
2422; SSE2-NEXT:    retq
2423;
2424; SSSE3-LABEL: shuffle_v8i16_0123456z:
2425; SSSE3:       # %bb.0:
2426; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
2427; SSSE3-NEXT:    retq
2428;
2429; SSE41-LABEL: shuffle_v8i16_0123456z:
2430; SSE41:       # %bb.0:
2431; SSE41-NEXT:    pxor %xmm1, %xmm1
2432; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2433; SSE41-NEXT:    retq
2434;
2435; AVX-LABEL: shuffle_v8i16_0123456z:
2436; AVX:       # %bb.0:
2437; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2438; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2439; AVX-NEXT:    retq
2440  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2441  ret <8 x i16> %shuffle
2442}
2443
2444define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2445; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2446; SSE:       # %bb.0:
2447; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2448; SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2449; SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2450; SSE-NEXT:    movdqa %xmm1, %xmm0
2451; SSE-NEXT:    retq
2452;
2453; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
2454; AVX:       # %bb.0:
2455; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2456; AVX-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2457; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2458; AVX-NEXT:    retq
2459  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2460  ret <8 x i16> %shuffle
2461}
2462
2463define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2464; SSE-LABEL: shuffle_v8i16_8012345u:
2465; SSE:       # %bb.0:
2466; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2467; SSE-NEXT:    retq
2468;
2469; AVX-LABEL: shuffle_v8i16_8012345u:
2470; AVX:       # %bb.0:
2471; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2472; AVX-NEXT:    retq
2473  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2474
2475  ret <8 x i16> %shuffle
2476}
2477
2478define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
2479; SSE2-LABEL: mask_v8i16_012345ef:
2480; SSE2:       # %bb.0:
2481; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2482; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2483; SSE2-NEXT:    movaps %xmm1, %xmm0
2484; SSE2-NEXT:    retq
2485;
2486; SSSE3-LABEL: mask_v8i16_012345ef:
2487; SSSE3:       # %bb.0:
2488; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2489; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2490; SSSE3-NEXT:    movaps %xmm1, %xmm0
2491; SSSE3-NEXT:    retq
2492;
2493; SSE41-LABEL: mask_v8i16_012345ef:
2494; SSE41:       # %bb.0:
2495; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2496; SSE41-NEXT:    retq
2497;
2498; AVX-LABEL: mask_v8i16_012345ef:
2499; AVX:       # %bb.0:
2500; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2501; AVX-NEXT:    retq
2502  %1 = bitcast <8 x i16> %a to <2 x i64>
2503  %2 = bitcast <8 x i16> %b to <2 x i64>
2504  %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
2505  %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
2506  %5 = or <2 x i64> %4, %3
2507  %6 = bitcast <2 x i64> %5 to <8 x i16>
2508  ret <8 x i16> %6
2509}
2510
2511define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
2512; SSE-LABEL: insert_dup_mem_v8i16_i32:
2513; SSE:       # %bb.0:
2514; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2515; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2516; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2517; SSE-NEXT:    retq
2518;
2519; AVX1-LABEL: insert_dup_mem_v8i16_i32:
2520; AVX1:       # %bb.0:
2521; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2522; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2523; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2524; AVX1-NEXT:    retq
2525;
2526; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32:
2527; AVX2OR512VL:       # %bb.0:
2528; AVX2OR512VL-NEXT:    vpbroadcastw (%rdi), %xmm0
2529; AVX2OR512VL-NEXT:    retq
2530  %tmp = load i32, i32* %ptr, align 4
2531  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2532  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2533  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
2534  ret <8 x i16> %tmp3
2535}
2536
2537define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
2538; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
2539; SSE:       # %bb.0:
2540; SSE-NEXT:    movswl (%rdi), %eax
2541; SSE-NEXT:    movd %eax, %xmm0
2542; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2543; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2544; SSE-NEXT:    retq
2545;
2546; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
2547; AVX1:       # %bb.0:
2548; AVX1-NEXT:    movswl (%rdi), %eax
2549; AVX1-NEXT:    vmovd %eax, %xmm0
2550; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
2551; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2552; AVX1-NEXT:    retq
2553;
2554; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
2555; AVX2:       # %bb.0:
2556; AVX2-NEXT:    movswl (%rdi), %eax
2557; AVX2-NEXT:    vmovd %eax, %xmm0
2558; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2559; AVX2-NEXT:    retq
2560;
2561; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
2562; AVX512VL:       # %bb.0:
2563; AVX512VL-NEXT:    movswl (%rdi), %eax
2564; AVX512VL-NEXT:    vpbroadcastw %eax, %xmm0
2565; AVX512VL-NEXT:    retq
2566  %tmp = load i16, i16* %ptr, align 2
2567  %tmp1 = sext i16 %tmp to i32
2568  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2569  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2570  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
2571  ret <8 x i16> %tmp4
2572}
2573
2574define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
2575; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
2576; SSE:       # %bb.0:
2577; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2578; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2579; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2580; SSE-NEXT:    retq
2581;
2582; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
2583; AVX1:       # %bb.0:
2584; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2585; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2586; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2587; AVX1-NEXT:    retq
2588;
2589; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32:
2590; AVX2OR512VL:       # %bb.0:
2591; AVX2OR512VL-NEXT:    vpbroadcastw 2(%rdi), %xmm0
2592; AVX2OR512VL-NEXT:    retq
2593  %tmp = load i32, i32* %ptr, align 4
2594  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2595  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2596  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2597  ret <8 x i16> %tmp3
2598}
2599
2600define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
2601; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2602; SSE2:       # %bb.0:
2603; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2604; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7]
2605; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2606; SSE2-NEXT:    retq
2607;
2608; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
2609; SSSE3:       # %bb.0:
2610; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2611; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2612; SSSE3-NEXT:    retq
2613;
2614; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
2615; SSE41:       # %bb.0:
2616; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2617; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2618; SSE41-NEXT:    retq
2619;
2620; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
2621; AVX1:       # %bb.0:
2622; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2623; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2624; AVX1-NEXT:    retq
2625;
2626; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32:
2627; AVX2OR512VL:       # %bb.0:
2628; AVX2OR512VL-NEXT:    vpbroadcastw 2(%rdi), %xmm0
2629; AVX2OR512VL-NEXT:    retq
2630  %tmp = load i32, i32* %ptr, align 4
2631  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
2632  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2633  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2634  ret <8 x i16> %tmp3
2635}
2636
2637define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
2638; SSE-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2639; SSE:       # %bb.0:
2640; SSE-NEXT:    movswl (%rdi), %eax
2641; SSE-NEXT:    movd %eax, %xmm0
2642; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2643; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2644; SSE-NEXT:    retq
2645;
2646; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2647; AVX1:       # %bb.0:
2648; AVX1-NEXT:    movswl (%rdi), %eax
2649; AVX1-NEXT:    vmovd %eax, %xmm0
2650; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
2651; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2652; AVX1-NEXT:    retq
2653;
2654; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2655; AVX2:       # %bb.0:
2656; AVX2-NEXT:    movswl (%rdi), %eax
2657; AVX2-NEXT:    shrl $16, %eax
2658; AVX2-NEXT:    vmovd %eax, %xmm0
2659; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2660; AVX2-NEXT:    retq
2661;
2662; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2663; AVX512VL:       # %bb.0:
2664; AVX512VL-NEXT:    movswl (%rdi), %eax
2665; AVX512VL-NEXT:    shrl $16, %eax
2666; AVX512VL-NEXT:    vpbroadcastw %eax, %xmm0
2667; AVX512VL-NEXT:    retq
2668  %tmp = load i16, i16* %ptr, align 2
2669  %tmp1 = sext i16 %tmp to i32
2670  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2671  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2672  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2673  ret <8 x i16> %tmp4
2674}
2675
2676define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
2677; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2678; SSE2:       # %bb.0:
2679; SSE2-NEXT:    movswl (%rdi), %eax
2680; SSE2-NEXT:    movd %eax, %xmm0
2681; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7]
2682; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2683; SSE2-NEXT:    retq
2684;
2685; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2686; SSSE3:       # %bb.0:
2687; SSSE3-NEXT:    movswl (%rdi), %eax
2688; SSSE3-NEXT:    movd %eax, %xmm0
2689; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2690; SSSE3-NEXT:    retq
2691;
2692; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2693; SSE41:       # %bb.0:
2694; SSE41-NEXT:    movswl (%rdi), %eax
2695; SSE41-NEXT:    movd %eax, %xmm0
2696; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2697; SSE41-NEXT:    retq
2698;
2699; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2700; AVX1:       # %bb.0:
2701; AVX1-NEXT:    movswl (%rdi), %eax
2702; AVX1-NEXT:    vmovd %eax, %xmm0
2703; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2704; AVX1-NEXT:    retq
2705;
2706; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2707; AVX2:       # %bb.0:
2708; AVX2-NEXT:    movswl (%rdi), %eax
2709; AVX2-NEXT:    shrl $16, %eax
2710; AVX2-NEXT:    vmovd %eax, %xmm0
2711; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
2712; AVX2-NEXT:    retq
2713;
2714; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2715; AVX512VL:       # %bb.0:
2716; AVX512VL-NEXT:    movswl (%rdi), %eax
2717; AVX512VL-NEXT:    shrl $16, %eax
2718; AVX512VL-NEXT:    vpbroadcastw %eax, %xmm0
2719; AVX512VL-NEXT:    retq
2720  %tmp = load i16, i16* %ptr, align 2
2721  %tmp1 = sext i16 %tmp to i32
2722  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
2723  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2724  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2725  ret <8 x i16> %tmp4
2726}
2727