• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-32 --check-prefix=SSE2-32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-64 --check-prefix=SSE2-64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE-32 --check-prefix=SSE41-32
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE-64 --check-prefix=SSE41-64
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-32 --check-prefix=AVX1-32
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64 --check-prefix=AVX1-64
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX-32 --check-prefix=AVX2-32
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX-64 --check-prefix=AVX2-64
10
11define <2 x double> @test_buildvector_v2f64(double %a0, double %a1) {
12; SSE-32-LABEL: test_buildvector_v2f64:
13; SSE-32:       # %bb.0:
14; SSE-32-NEXT:    movups {{[0-9]+}}(%esp), %xmm0
15; SSE-32-NEXT:    retl
16;
17; SSE-64-LABEL: test_buildvector_v2f64:
18; SSE-64:       # %bb.0:
19; SSE-64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
20; SSE-64-NEXT:    retq
21;
22; AVX-32-LABEL: test_buildvector_v2f64:
23; AVX-32:       # %bb.0:
24; AVX-32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
25; AVX-32-NEXT:    retl
26;
27; AVX-64-LABEL: test_buildvector_v2f64:
28; AVX-64:       # %bb.0:
29; AVX-64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
30; AVX-64-NEXT:    retq
31  %ins0 = insertelement <2 x double> undef, double %a0, i32 0
32  %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1
33  ret <2 x double> %ins1
34}
35
36define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, float %a3) {
37; SSE-32-LABEL: test_buildvector_v4f32:
38; SSE-32:       # %bb.0:
39; SSE-32-NEXT:    movups {{[0-9]+}}(%esp), %xmm0
40; SSE-32-NEXT:    retl
41;
42; SSE2-64-LABEL: test_buildvector_v4f32:
43; SSE2-64:       # %bb.0:
44; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
45; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
46; SSE2-64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
47; SSE2-64-NEXT:    retq
48;
49; SSE41-64-LABEL: test_buildvector_v4f32:
50; SSE41-64:       # %bb.0:
51; SSE41-64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
52; SSE41-64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
53; SSE41-64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
54; SSE41-64-NEXT:    retq
55;
56; AVX-32-LABEL: test_buildvector_v4f32:
57; AVX-32:       # %bb.0:
58; AVX-32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
59; AVX-32-NEXT:    retl
60;
61; AVX-64-LABEL: test_buildvector_v4f32:
62; AVX-64:       # %bb.0:
63; AVX-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
64; AVX-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
65; AVX-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
66; AVX-64-NEXT:    retq
67  %ins0 = insertelement <4 x float> undef, float %a0, i32 0
68  %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1
69  %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2
70  %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3
71  ret <4 x float> %ins3
72}
73
74define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) {
75; SSE-32-LABEL: test_buildvector_v2i64:
76; SSE-32:       # %bb.0:
77; SSE-32-NEXT:    movups {{[0-9]+}}(%esp), %xmm0
78; SSE-32-NEXT:    retl
79;
80; SSE-64-LABEL: test_buildvector_v2i64:
81; SSE-64:       # %bb.0:
82; SSE-64-NEXT:    movq %rsi, %xmm1
83; SSE-64-NEXT:    movq %rdi, %xmm0
84; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
85; SSE-64-NEXT:    retq
86;
87; AVX-32-LABEL: test_buildvector_v2i64:
88; AVX-32:       # %bb.0:
89; AVX-32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
90; AVX-32-NEXT:    retl
91;
92; AVX-64-LABEL: test_buildvector_v2i64:
93; AVX-64:       # %bb.0:
94; AVX-64-NEXT:    vmovq %rsi, %xmm0
95; AVX-64-NEXT:    vmovq %rdi, %xmm1
96; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
97; AVX-64-NEXT:    retq
98  %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0
99  %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1
100  ret <2 x i64> %ins1
101}
102
103define <4 x i32> @test_buildvector_v4i32(i32 %f0, i32 %f1, i32 %f2, i32 %f3) {
104; SSE-32-LABEL: test_buildvector_v4i32:
105; SSE-32:       # %bb.0:
106; SSE-32-NEXT:    movups {{[0-9]+}}(%esp), %xmm0
107; SSE-32-NEXT:    retl
108;
109; SSE2-64-LABEL: test_buildvector_v4i32:
110; SSE2-64:       # %bb.0:
111; SSE2-64-NEXT:    movd %ecx, %xmm0
112; SSE2-64-NEXT:    movd %edx, %xmm1
113; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
114; SSE2-64-NEXT:    movd %esi, %xmm2
115; SSE2-64-NEXT:    movd %edi, %xmm0
116; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
117; SSE2-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
118; SSE2-64-NEXT:    retq
119;
120; SSE41-64-LABEL: test_buildvector_v4i32:
121; SSE41-64:       # %bb.0:
122; SSE41-64-NEXT:    movd %edi, %xmm0
123; SSE41-64-NEXT:    pinsrd $1, %esi, %xmm0
124; SSE41-64-NEXT:    pinsrd $2, %edx, %xmm0
125; SSE41-64-NEXT:    pinsrd $3, %ecx, %xmm0
126; SSE41-64-NEXT:    retq
127;
128; AVX-32-LABEL: test_buildvector_v4i32:
129; AVX-32:       # %bb.0:
130; AVX-32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
131; AVX-32-NEXT:    retl
132;
133; AVX-64-LABEL: test_buildvector_v4i32:
134; AVX-64:       # %bb.0:
135; AVX-64-NEXT:    vmovd %edi, %xmm0
136; AVX-64-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
137; AVX-64-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
138; AVX-64-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
139; AVX-64-NEXT:    retq
140  %ins0 = insertelement <4 x i32> undef, i32 %f0, i32 0
141  %ins1 = insertelement <4 x i32> %ins0, i32 %f1, i32 1
142  %ins2 = insertelement <4 x i32> %ins1, i32 %f2, i32 2
143  %ins3 = insertelement <4 x i32> %ins2, i32 %f3, i32 3
144  ret <4 x i32> %ins3
145}
146
147define <8 x i16> @test_buildvector_v8i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
148; SSE2-32-LABEL: test_buildvector_v8i16:
149; SSE2-32:       # %bb.0:
150; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
151; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
152; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
153; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
154; SSE2-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
155; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
156; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
157; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
158; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
159; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
160; SSE2-32-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
161; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
162; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
163; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
164; SSE2-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
165; SSE2-32-NEXT:    retl
166;
167; SSE2-64-LABEL: test_buildvector_v8i16:
168; SSE2-64:       # %bb.0:
169; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
170; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
171; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
172; SSE2-64-NEXT:    movd %r9d, %xmm0
173; SSE2-64-NEXT:    movd %r8d, %xmm2
174; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
175; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
176; SSE2-64-NEXT:    movd %ecx, %xmm0
177; SSE2-64-NEXT:    movd %edx, %xmm1
178; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
179; SSE2-64-NEXT:    movd %esi, %xmm3
180; SSE2-64-NEXT:    movd %edi, %xmm0
181; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
182; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
183; SSE2-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
184; SSE2-64-NEXT:    retq
185;
186; SSE41-32-LABEL: test_buildvector_v8i16:
187; SSE41-32:       # %bb.0:
188; SSE41-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
189; SSE41-32-NEXT:    pinsrw $1, {{[0-9]+}}(%esp), %xmm0
190; SSE41-32-NEXT:    pinsrw $2, {{[0-9]+}}(%esp), %xmm0
191; SSE41-32-NEXT:    pinsrw $3, {{[0-9]+}}(%esp), %xmm0
192; SSE41-32-NEXT:    pinsrw $4, {{[0-9]+}}(%esp), %xmm0
193; SSE41-32-NEXT:    pinsrw $5, {{[0-9]+}}(%esp), %xmm0
194; SSE41-32-NEXT:    pinsrw $6, {{[0-9]+}}(%esp), %xmm0
195; SSE41-32-NEXT:    pinsrw $7, {{[0-9]+}}(%esp), %xmm0
196; SSE41-32-NEXT:    retl
197;
198; SSE41-64-LABEL: test_buildvector_v8i16:
199; SSE41-64:       # %bb.0:
200; SSE41-64-NEXT:    movd %edi, %xmm0
201; SSE41-64-NEXT:    pinsrw $1, %esi, %xmm0
202; SSE41-64-NEXT:    pinsrw $2, %edx, %xmm0
203; SSE41-64-NEXT:    pinsrw $3, %ecx, %xmm0
204; SSE41-64-NEXT:    pinsrw $4, %r8d, %xmm0
205; SSE41-64-NEXT:    pinsrw $5, %r9d, %xmm0
206; SSE41-64-NEXT:    pinsrw $6, {{[0-9]+}}(%rsp), %xmm0
207; SSE41-64-NEXT:    pinsrw $7, {{[0-9]+}}(%rsp), %xmm0
208; SSE41-64-NEXT:    retq
209;
210; AVX-32-LABEL: test_buildvector_v8i16:
211; AVX-32:       # %bb.0:
212; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
213; AVX-32-NEXT:    vpinsrw $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
214; AVX-32-NEXT:    vpinsrw $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
215; AVX-32-NEXT:    vpinsrw $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
216; AVX-32-NEXT:    vpinsrw $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
217; AVX-32-NEXT:    vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0
218; AVX-32-NEXT:    vpinsrw $6, {{[0-9]+}}(%esp), %xmm0, %xmm0
219; AVX-32-NEXT:    vpinsrw $7, {{[0-9]+}}(%esp), %xmm0, %xmm0
220; AVX-32-NEXT:    retl
221;
222; AVX-64-LABEL: test_buildvector_v8i16:
223; AVX-64:       # %bb.0:
224; AVX-64-NEXT:    vmovd %edi, %xmm0
225; AVX-64-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0
226; AVX-64-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0
227; AVX-64-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
228; AVX-64-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0
229; AVX-64-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0
230; AVX-64-NEXT:    vpinsrw $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
231; AVX-64-NEXT:    vpinsrw $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0
232; AVX-64-NEXT:    retq
233  %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
234  %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
235  %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2
236  %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
237  %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
238  %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
239  %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6
240  %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7
241  ret <8 x i16> %ins7
242}
243
244define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) {
245; SSE2-32-LABEL: test_buildvector_v16i8:
246; SSE2-32:       # %bb.0:
247; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
248; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
249; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
250; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
251; SSE2-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
252; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
253; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
254; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
255; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
256; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
257; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
258; SSE2-32-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
259; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
260; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
261; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
262; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
263; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
264; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
265; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
266; SSE2-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
267; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
268; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
269; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
270; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
271; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
272; SSE2-32-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
273; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
274; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
275; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
276; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
277; SSE2-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
278; SSE2-32-NEXT:    retl
279;
280; SSE2-64-LABEL: test_buildvector_v16i8:
281; SSE2-64:       # %bb.0:
282; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
283; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
284; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
285; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
286; SSE2-64-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
287; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
288; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
289; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
290; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
291; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
292; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
293; SSE2-64-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
294; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
295; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
296; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
297; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
298; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
299; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
300; SSE2-64-NEXT:    movd %r9d, %xmm0
301; SSE2-64-NEXT:    movd %r8d, %xmm2
302; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
303; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
304; SSE2-64-NEXT:    movd %ecx, %xmm0
305; SSE2-64-NEXT:    movd %edx, %xmm1
306; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
307; SSE2-64-NEXT:    movd %esi, %xmm4
308; SSE2-64-NEXT:    movd %edi, %xmm0
309; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
310; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
311; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
312; SSE2-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
313; SSE2-64-NEXT:    retq
314;
315; SSE41-32-LABEL: test_buildvector_v16i8:
316; SSE41-32:       # %bb.0:
317; SSE41-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
318; SSE41-32-NEXT:    pinsrb $1, {{[0-9]+}}(%esp), %xmm0
319; SSE41-32-NEXT:    pinsrb $2, {{[0-9]+}}(%esp), %xmm0
320; SSE41-32-NEXT:    pinsrb $3, {{[0-9]+}}(%esp), %xmm0
321; SSE41-32-NEXT:    pinsrb $4, {{[0-9]+}}(%esp), %xmm0
322; SSE41-32-NEXT:    pinsrb $5, {{[0-9]+}}(%esp), %xmm0
323; SSE41-32-NEXT:    pinsrb $6, {{[0-9]+}}(%esp), %xmm0
324; SSE41-32-NEXT:    pinsrb $7, {{[0-9]+}}(%esp), %xmm0
325; SSE41-32-NEXT:    pinsrb $8, {{[0-9]+}}(%esp), %xmm0
326; SSE41-32-NEXT:    pinsrb $9, {{[0-9]+}}(%esp), %xmm0
327; SSE41-32-NEXT:    pinsrb $10, {{[0-9]+}}(%esp), %xmm0
328; SSE41-32-NEXT:    pinsrb $11, {{[0-9]+}}(%esp), %xmm0
329; SSE41-32-NEXT:    pinsrb $12, {{[0-9]+}}(%esp), %xmm0
330; SSE41-32-NEXT:    pinsrb $13, {{[0-9]+}}(%esp), %xmm0
331; SSE41-32-NEXT:    pinsrb $14, {{[0-9]+}}(%esp), %xmm0
332; SSE41-32-NEXT:    pinsrb $15, {{[0-9]+}}(%esp), %xmm0
333; SSE41-32-NEXT:    retl
334;
335; SSE41-64-LABEL: test_buildvector_v16i8:
336; SSE41-64:       # %bb.0:
337; SSE41-64-NEXT:    movd %edi, %xmm0
338; SSE41-64-NEXT:    pinsrb $1, %esi, %xmm0
339; SSE41-64-NEXT:    pinsrb $2, %edx, %xmm0
340; SSE41-64-NEXT:    pinsrb $3, %ecx, %xmm0
341; SSE41-64-NEXT:    pinsrb $4, %r8d, %xmm0
342; SSE41-64-NEXT:    pinsrb $5, %r9d, %xmm0
343; SSE41-64-NEXT:    pinsrb $6, {{[0-9]+}}(%rsp), %xmm0
344; SSE41-64-NEXT:    pinsrb $7, {{[0-9]+}}(%rsp), %xmm0
345; SSE41-64-NEXT:    pinsrb $8, {{[0-9]+}}(%rsp), %xmm0
346; SSE41-64-NEXT:    pinsrb $9, {{[0-9]+}}(%rsp), %xmm0
347; SSE41-64-NEXT:    pinsrb $10, {{[0-9]+}}(%rsp), %xmm0
348; SSE41-64-NEXT:    pinsrb $11, {{[0-9]+}}(%rsp), %xmm0
349; SSE41-64-NEXT:    pinsrb $12, {{[0-9]+}}(%rsp), %xmm0
350; SSE41-64-NEXT:    pinsrb $13, {{[0-9]+}}(%rsp), %xmm0
351; SSE41-64-NEXT:    pinsrb $14, {{[0-9]+}}(%rsp), %xmm0
352; SSE41-64-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
353; SSE41-64-NEXT:    retq
354;
355; AVX-32-LABEL: test_buildvector_v16i8:
356; AVX-32:       # %bb.0:
357; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
358; AVX-32-NEXT:    vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
359; AVX-32-NEXT:    vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
360; AVX-32-NEXT:    vpinsrb $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
361; AVX-32-NEXT:    vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
362; AVX-32-NEXT:    vpinsrb $5, {{[0-9]+}}(%esp), %xmm0, %xmm0
363; AVX-32-NEXT:    vpinsrb $6, {{[0-9]+}}(%esp), %xmm0, %xmm0
364; AVX-32-NEXT:    vpinsrb $7, {{[0-9]+}}(%esp), %xmm0, %xmm0
365; AVX-32-NEXT:    vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0
366; AVX-32-NEXT:    vpinsrb $9, {{[0-9]+}}(%esp), %xmm0, %xmm0
367; AVX-32-NEXT:    vpinsrb $10, {{[0-9]+}}(%esp), %xmm0, %xmm0
368; AVX-32-NEXT:    vpinsrb $11, {{[0-9]+}}(%esp), %xmm0, %xmm0
369; AVX-32-NEXT:    vpinsrb $12, {{[0-9]+}}(%esp), %xmm0, %xmm0
370; AVX-32-NEXT:    vpinsrb $13, {{[0-9]+}}(%esp), %xmm0, %xmm0
371; AVX-32-NEXT:    vpinsrb $14, {{[0-9]+}}(%esp), %xmm0, %xmm0
372; AVX-32-NEXT:    vpinsrb $15, {{[0-9]+}}(%esp), %xmm0, %xmm0
373; AVX-32-NEXT:    retl
374;
375; AVX-64-LABEL: test_buildvector_v16i8:
376; AVX-64:       # %bb.0:
377; AVX-64-NEXT:    vmovd %edi, %xmm0
378; AVX-64-NEXT:    vpinsrb $1, %esi, %xmm0, %xmm0
379; AVX-64-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
380; AVX-64-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
381; AVX-64-NEXT:    vpinsrb $4, %r8d, %xmm0, %xmm0
382; AVX-64-NEXT:    vpinsrb $5, %r9d, %xmm0, %xmm0
383; AVX-64-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
384; AVX-64-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0
385; AVX-64-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0
386; AVX-64-NEXT:    vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0
387; AVX-64-NEXT:    vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0
388; AVX-64-NEXT:    vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0
389; AVX-64-NEXT:    vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0
390; AVX-64-NEXT:    vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0
391; AVX-64-NEXT:    vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0
392; AVX-64-NEXT:    vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
393; AVX-64-NEXT:    retq
394  %ins0  = insertelement <16 x i8> undef,  i8 %a0,  i32 0
395  %ins1  = insertelement <16 x i8> %ins0,  i8 %a1,  i32 1
396  %ins2  = insertelement <16 x i8> %ins1,  i8 %a2,  i32 2
397  %ins3  = insertelement <16 x i8> %ins2,  i8 %a3,  i32 3
398  %ins4  = insertelement <16 x i8> %ins3,  i8 %a4,  i32 4
399  %ins5  = insertelement <16 x i8> %ins4,  i8 %a5,  i32 5
400  %ins6  = insertelement <16 x i8> %ins5,  i8 %a6,  i32 6
401  %ins7  = insertelement <16 x i8> %ins6,  i8 %a7,  i32 7
402  %ins8  = insertelement <16 x i8> %ins7,  i8 %a8,  i32 8
403  %ins9  = insertelement <16 x i8> %ins8,  i8 %a9,  i32 9
404  %ins10 = insertelement <16 x i8> %ins9,  i8 %a10, i32 10
405  %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
406  %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
407  %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13
408  %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14
409  %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
410  ret <16 x i8> %ins15
411}
412
413; PR30780
414
415define <4 x i32> @test_buildvector_v4i32_splat_sext_i8(i8 %in) {
416; SSE-32-LABEL: test_buildvector_v4i32_splat_sext_i8:
417; SSE-32:       # %bb.0:
418; SSE-32-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
419; SSE-32-NEXT:    movd %eax, %xmm0
420; SSE-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
421; SSE-32-NEXT:    retl
422;
423; SSE-64-LABEL: test_buildvector_v4i32_splat_sext_i8:
424; SSE-64:       # %bb.0:
425; SSE-64-NEXT:    movsbl %dil, %eax
426; SSE-64-NEXT:    movd %eax, %xmm0
427; SSE-64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
428; SSE-64-NEXT:    retq
429;
430; AVX1-32-LABEL: test_buildvector_v4i32_splat_sext_i8:
431; AVX1-32:       # %bb.0:
432; AVX1-32-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
433; AVX1-32-NEXT:    vmovd %eax, %xmm0
434; AVX1-32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
435; AVX1-32-NEXT:    retl
436;
437; AVX1-64-LABEL: test_buildvector_v4i32_splat_sext_i8:
438; AVX1-64:       # %bb.0:
439; AVX1-64-NEXT:    movsbl %dil, %eax
440; AVX1-64-NEXT:    vmovd %eax, %xmm0
441; AVX1-64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
442; AVX1-64-NEXT:    retq
443;
444; AVX2-32-LABEL: test_buildvector_v4i32_splat_sext_i8:
445; AVX2-32:       # %bb.0:
446; AVX2-32-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
447; AVX2-32-NEXT:    vmovd %eax, %xmm0
448; AVX2-32-NEXT:    vpbroadcastd %xmm0, %xmm0
449; AVX2-32-NEXT:    retl
450;
451; AVX2-64-LABEL: test_buildvector_v4i32_splat_sext_i8:
452; AVX2-64:       # %bb.0:
453; AVX2-64-NEXT:    movsbl %dil, %eax
454; AVX2-64-NEXT:    vmovd %eax, %xmm0
455; AVX2-64-NEXT:    vpbroadcastd %xmm0, %xmm0
456; AVX2-64-NEXT:    retq
457  %ext = sext i8 %in to i32
458  %insert = insertelement <4 x i32> undef, i32 %ext, i32 0
459  %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
460  ret <4 x i32> %splat
461}
462
463define <4 x i32> @test_buildvector_v4i32_splat_zext_i8(i8 %in) {
464; SSE-32-LABEL: test_buildvector_v4i32_splat_zext_i8:
465; SSE-32:       # %bb.0:
466; SSE-32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
467; SSE-32-NEXT:    movd %eax, %xmm0
468; SSE-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
469; SSE-32-NEXT:    retl
470;
471; SSE-64-LABEL: test_buildvector_v4i32_splat_zext_i8:
472; SSE-64:       # %bb.0:
473; SSE-64-NEXT:    movzbl %dil, %eax
474; SSE-64-NEXT:    movd %eax, %xmm0
475; SSE-64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
476; SSE-64-NEXT:    retq
477;
478; AVX1-32-LABEL: test_buildvector_v4i32_splat_zext_i8:
479; AVX1-32:       # %bb.0:
480; AVX1-32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
481; AVX1-32-NEXT:    vmovd %eax, %xmm0
482; AVX1-32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
483; AVX1-32-NEXT:    retl
484;
485; AVX1-64-LABEL: test_buildvector_v4i32_splat_zext_i8:
486; AVX1-64:       # %bb.0:
487; AVX1-64-NEXT:    movzbl %dil, %eax
488; AVX1-64-NEXT:    vmovd %eax, %xmm0
489; AVX1-64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
490; AVX1-64-NEXT:    retq
491;
492; AVX2-32-LABEL: test_buildvector_v4i32_splat_zext_i8:
493; AVX2-32:       # %bb.0:
494; AVX2-32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
495; AVX2-32-NEXT:    vmovd %eax, %xmm0
496; AVX2-32-NEXT:    vpbroadcastd %xmm0, %xmm0
497; AVX2-32-NEXT:    retl
498;
499; AVX2-64-LABEL: test_buildvector_v4i32_splat_zext_i8:
500; AVX2-64:       # %bb.0:
501; AVX2-64-NEXT:    movzbl %dil, %eax
502; AVX2-64-NEXT:    vmovd %eax, %xmm0
503; AVX2-64-NEXT:    vpbroadcastd %xmm0, %xmm0
504; AVX2-64-NEXT:    retq
505  %ext = zext i8 %in to i32
506  %insert = insertelement <4 x i32> undef, i32 %ext, i32 0
507  %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
508  ret <4 x i32> %splat
509}
510
511; PR37502 - https://bugs.llvm.org/show_bug.cgi?id=37502
512; Don't use a series of insertps when movddup will do.
513
514define <4 x float> @PR37502(float %x, float %y) {
515; SSE2-32-LABEL: PR37502:
516; SSE2-32:       # %bb.0:
517; SSE2-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
518; SSE2-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
519; SSE2-32-NEXT:    retl
520;
521; SSE2-64-LABEL: PR37502:
522; SSE2-64:       # %bb.0:
523; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
524; SSE2-64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
525; SSE2-64-NEXT:    retq
526;
527; SSE41-32-LABEL: PR37502:
528; SSE41-32:       # %bb.0:
529; SSE41-32-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
530; SSE41-32-NEXT:    retl
531;
532; SSE41-64-LABEL: PR37502:
533; SSE41-64:       # %bb.0:
534; SSE41-64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
535; SSE41-64-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
536; SSE41-64-NEXT:    retq
537;
538; AVX-32-LABEL: PR37502:
539; AVX-32:       # %bb.0:
540; AVX-32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
541; AVX-32-NEXT:    retl
542;
543; AVX-64-LABEL: PR37502:
544; AVX-64:       # %bb.0:
545; AVX-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
546; AVX-64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
547; AVX-64-NEXT:    retq
548  %i0 = insertelement <4 x float> undef, float %x, i32 0
549  %i1 = insertelement <4 x float> %i0, float %y, i32 1
550  %i2 = insertelement <4 x float> %i1, float %x, i32 2
551  %i3 = insertelement <4 x float> %i2, float %y, i32 3
552  ret <4 x float> %i3
553}
554
555