• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AMD10H
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2
5
6;
7; EXTRQI
8;
9
10; A length of zero is equivalent to a bit length of 64.
11define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) {
12; ALL-LABEL: extrqi_len0_idx0:
13; ALL:       # %bb.0:
14; ALL-NEXT:    retq
15  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0)
16  ret <2 x i64> %1
17}
18
19define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) {
20; ALL-LABEL: extrqi_len8_idx16:
21; ALL:       # %bb.0:
22; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
23; ALL-NEXT:    retq
24  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16)
25  ret <2 x i64> %1
26}
27
28; If the length + index exceeds the bottom 64 bits the result is undefined.
29define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) {
30; ALL-LABEL: extrqi_len32_idx48:
31; ALL:       # %bb.0:
32; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
33; ALL-NEXT:    retq
34  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48)
35  ret <2 x i64> %1
36}
37
38define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) {
39; AMD10H-LABEL: shuf_0zzzuuuuuuuuuuuu:
40; AMD10H:       # %bb.0:
41; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
42; AMD10H-NEXT:    retq
43;
44; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu:
45; BTVER1:       # %bb.0:
46; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
47; BTVER1-NEXT:    retq
48;
49; BTVER2-LABEL: shuf_0zzzuuuuuuuuuuuu:
50; BTVER2:       # %bb.0:
51; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
52; BTVER2-NEXT:    retq
53  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
54  ret <16 x i8> %s
55}
56
57define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) {
58; AMD10H-LABEL: shuf_0zzzzzzz1zzzzzzz:
59; AMD10H:       # %bb.0:
60; AMD10H-NEXT:    movdqa %xmm0, %xmm1
61; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
62; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
63; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
64; AMD10H-NEXT:    retq
65;
66; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz:
67; BTVER1:       # %bb.0:
68; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
69; BTVER1-NEXT:    retq
70;
71; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz:
72; BTVER2:       # %bb.0:
73; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
74; BTVER2-NEXT:    retq
75  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
76  ret <16 x i8> %s
77}
78
79define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) {
80; AMD10H-LABEL: shuf_2zzzzzzz3zzzzzzz:
81; AMD10H:       # %bb.0:
82; AMD10H-NEXT:    movdqa %xmm0, %xmm1
83; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
84; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
85; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
86; AMD10H-NEXT:    retq
87;
88; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
89; BTVER1:       # %bb.0:
90; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
91; BTVER1-NEXT:    retq
92;
93; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz:
94; BTVER2:       # %bb.0:
95; BTVER2-NEXT:    vpsrld $16, %xmm0, %xmm0
96; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
97; BTVER2-NEXT:    retq
98  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
99  ret <16 x i8> %s
100}
101
102define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) {
103; AMD10H-LABEL: shuf_01zzuuuuuuuuuuuu:
104; AMD10H:       # %bb.0:
105; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
106; AMD10H-NEXT:    retq
107;
108; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu:
109; BTVER1:       # %bb.0:
110; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
111; BTVER1-NEXT:    retq
112;
113; BTVER2-LABEL: shuf_01zzuuuuuuuuuuuu:
114; BTVER2:       # %bb.0:
115; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
116; BTVER2-NEXT:    retq
117  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
118  ret <16 x i8> %s
119}
120
121define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) {
122; AMD10H-LABEL: shuf_01zzzzzz23zzzzzz:
123; AMD10H:       # %bb.0:
124; AMD10H-NEXT:    movdqa %xmm0, %xmm1
125; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
126; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
127; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
128; AMD10H-NEXT:    retq
129;
130; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz:
131; BTVER1:       # %bb.0:
132; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
133; BTVER1-NEXT:    retq
134;
135; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz:
136; BTVER2:       # %bb.0:
137; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
138; BTVER2-NEXT:    retq
139  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
140  ret <16 x i8> %s
141}
142
143define <16 x i8> @shuf_1zzzuuuuuuuuuuuu(<16 x i8> %a0) {
144; ALL-LABEL: shuf_1zzzuuuuuuuuuuuu:
145; ALL:       # %bb.0:
146; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
147; ALL-NEXT:    retq
148  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
149  ret <16 x i8> %s
150}
151
152define <8 x i16> @shuf_1zzzuuuu(<8 x i16> %a0) {
153; ALL-LABEL: shuf_1zzzuuuu:
154; ALL:       # %bb.0:
155; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
156; ALL-NEXT:    retq
157  %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
158  ret <8 x i16> %s
159}
160
161define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) {
162; ALL-LABEL: shuf_12zzuuuu:
163; ALL:       # %bb.0:
164; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
165; ALL-NEXT:    retq
166  %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
167  ret <8 x i16> %s
168}
169
170define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) {
171; AMD10H-LABEL: shuf_012zuuuu:
172; AMD10H:       # %bb.0:
173; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
174; AMD10H-NEXT:    retq
175;
176; BTVER1-LABEL: shuf_012zuuuu:
177; BTVER1:       # %bb.0:
178; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
179; BTVER1-NEXT:    retq
180;
181; BTVER2-LABEL: shuf_012zuuuu:
182; BTVER2:       # %bb.0:
183; BTVER2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
184; BTVER2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
185; BTVER2-NEXT:    retq
186  %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
187  ret <8 x i16> %s
188}
189
190define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) {
191; AMD10H-LABEL: shuf_0zzz1zzz:
192; AMD10H:       # %bb.0:
193; AMD10H-NEXT:    movdqa %xmm0, %xmm1
194; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
195; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
196; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
197; AMD10H-NEXT:    retq
198;
199; BTVER1-LABEL: shuf_0zzz1zzz:
200; BTVER1:       # %bb.0:
201; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
202; BTVER1-NEXT:    retq
203;
204; BTVER2-LABEL: shuf_0zzz1zzz:
205; BTVER2:       # %bb.0:
206; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
207; BTVER2-NEXT:    retq
208  %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8>
209  ret <8 x i16> %s
210}
211
212define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
213; AMD10H-LABEL: shuf_0z1z:
214; AMD10H:       # %bb.0:
215; AMD10H-NEXT:    xorps %xmm1, %xmm1
216; AMD10H-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
217; AMD10H-NEXT:    retq
218;
219; BTVER1-LABEL: shuf_0z1z:
220; BTVER1:       # %bb.0:
221; BTVER1-NEXT:    xorps %xmm1, %xmm1
222; BTVER1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
223; BTVER1-NEXT:    retq
224;
225; BTVER2-LABEL: shuf_0z1z:
226; BTVER2:       # %bb.0:
227; BTVER2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
228; BTVER2-NEXT:    retq
229  %s = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
230  ret <4 x i32> %s
231}
232
233;
234; INSERTQI
235;
236
237; A length of zero is equivalent to a bit length of 64.
238define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) {
239; AMD10H-LABEL: insertqi_len0_idx0:
240; AMD10H:       # %bb.0:
241; AMD10H-NEXT:    movaps %xmm1, %xmm0
242; AMD10H-NEXT:    retq
243;
244; BTVER1-LABEL: insertqi_len0_idx0:
245; BTVER1:       # %bb.0:
246; BTVER1-NEXT:    movaps %xmm1, %xmm0
247; BTVER1-NEXT:    retq
248;
249; BTVER2-LABEL: insertqi_len0_idx0:
250; BTVER2:       # %bb.0:
251; BTVER2-NEXT:    vmovaps %xmm1, %xmm0
252; BTVER2-NEXT:    retq
253  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0)
254  ret <2 x i64> %1
255}
256
257define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) {
258; ALL-LABEL: insertqi_len8_idx16:
259; ALL:       # %bb.0:
260; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
261; ALL-NEXT:    retq
262  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16)
263  ret <2 x i64> %1
264}
265
266; If the length + index exceeds the bottom 64 bits the result is undefined
267define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) {
268; ALL-LABEL: insertqi_len32_idx48:
269; ALL:       # %bb.0:
270; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
271; ALL-NEXT:    retq
272  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48)
273  ret <2 x i64> %1
274}
275
276define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
277; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu:
278; ALL:       # %bb.0:
279; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
280; ALL-NEXT:    retq
281  %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
282  ret <16 x i8> %s
283}
284
285define <16 x i8> @shuf_0_16_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
286; ALL-LABEL: shuf_0_16_2_3_uuuu_uuuu_uuuu:
287; ALL:       # %bb.0:
288; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u]
289; ALL-NEXT:    retq
290  %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
291  ret <16 x i8> %s
292}
293
294define <16 x i8> @shuf_16_1_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
295; ALL-LABEL: shuf_16_1_2_3_uuuu_uuuu_uuuu:
296; ALL:       # %bb.0:
297; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
298; ALL-NEXT:    retq
299  %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
300  ret <16 x i8> %s
301}
302
303define <8 x i16> @shuf_0823uuuu(<8 x i16> %a0, <8 x i16> %a1) {
304; ALL-LABEL: shuf_0823uuuu:
305; ALL:       # %bb.0:
306; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[4,5,6,7,u,u,u,u,u,u,u,u]
307; ALL-NEXT:    retq
308  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
309  ret <8 x i16> %s
310}
311
312define <8 x i16> @shuf_0183uuuu(<8 x i16> %a0, <8 x i16> %a1) {
313; ALL-LABEL: shuf_0183uuuu:
314; ALL:       # %bb.0:
315; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[0,1],xmm0[6,7,u,u,u,u,u,u,u,u]
316; ALL-NEXT:    retq
317  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
318  ret <8 x i16> %s
319}
320
321define <8 x i16> @shuf_0128uuuu(<8 x i16> %a0, <8 x i16> %a1) {
322; ALL-LABEL: shuf_0128uuuu:
323; ALL:       # %bb.0:
324; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[0,1],xmm0[u,u,u,u,u,u,u,u]
325; ALL-NEXT:    retq
326  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
327  ret <8 x i16> %s
328}
329
330define <8 x i16> @shuf_0893uuuu(<8 x i16> %a0, <8 x i16> %a1) {
331; ALL-LABEL: shuf_0893uuuu:
332; ALL:       # %bb.0:
333; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u]
334; ALL-NEXT:    retq
335  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
336  ret <8 x i16> %s
337}
338
339define <8 x i16> @shuf_089Auuuu(<8 x i16> %a0, <8 x i16> %a1) {
340; ALL-LABEL: shuf_089Auuuu:
341; ALL:       # %bb.0:
342; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3,4,5],xmm0[u,u,u,u,u,u,u,u]
343; ALL-NEXT:    retq
344  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
345  ret <8 x i16> %s
346}
347
348define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) {
349; ALL-LABEL: shuf_089uuuuu:
350; ALL:       # %bb.0:
351; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u]
352; ALL-NEXT:    retq
353  %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
354  ret <8 x i16> %s
355}
356
357;
358; Special Cases
359;
360
361; Out of range.
362define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) {
363; AMD10H-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
364; AMD10H:       # %bb.0:
365; AMD10H-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
366; AMD10H-NEXT:    andpd {{.*}}(%rip), %xmm0
367; AMD10H-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
368; AMD10H-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
369; AMD10H-NEXT:    packuswb %xmm0, %xmm0
370; AMD10H-NEXT:    retq
371;
372; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
373; BTVER1:       # %bb.0:
374; BTVER1-NEXT:    psrld $16, %xmm1
375; BTVER1-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
376; BTVER1-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
377; BTVER1-NEXT:    retq
378;
379; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
380; BTVER2:       # %bb.0:
381; BTVER2-NEXT:    vpsrld $16, %xmm1, %xmm1
382; BTVER2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
383; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
384; BTVER2-NEXT:    retq
385  %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
386  ret <16 x i8> %1
387}
388
389define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
390; AMD10H-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
391; AMD10H:       # %bb.0:
392; AMD10H-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
393; AMD10H-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
394; AMD10H-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
395; AMD10H-NEXT:    retq
396;
397; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
398; BTVER1:       # %bb.0:
399; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
400; BTVER1-NEXT:    retq
401;
402; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
403; BTVER2:       # %bb.0:
404; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
405; BTVER2-NEXT:    retq
406  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
407  ret <16 x i8> %1
408}
409
410define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
411; AMD10H-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
412; AMD10H:       # %bb.0:
413; AMD10H-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
414; AMD10H-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
415; AMD10H-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
416; AMD10H-NEXT:    retq
417;
418; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
419; BTVER1:       # %bb.0:
420; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
421; BTVER1-NEXT:    retq
422;
423; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
424; BTVER2:       # %bb.0:
425; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
426; BTVER2-NEXT:    retq
427  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
428  ret <16 x i8> %1
429}
430
431define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
432; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
433; ALL:       # %bb.0:
434; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
435; ALL-NEXT:    retq
436  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
437  ret <16 x i8> %1
438}
439
440declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
441declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
442