• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
9
10define <2 x double> @insert_v2f64_z1(<2 x double> %a) {
11; SSE2-LABEL: insert_v2f64_z1:
12; SSE2:       # %bb.0:
13; SSE2-NEXT:    xorpd %xmm1, %xmm1
14; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
15; SSE2-NEXT:    retq
16;
17; SSE3-LABEL: insert_v2f64_z1:
18; SSE3:       # %bb.0:
19; SSE3-NEXT:    xorpd %xmm1, %xmm1
20; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
21; SSE3-NEXT:    retq
22;
23; SSSE3-LABEL: insert_v2f64_z1:
24; SSSE3:       # %bb.0:
25; SSSE3-NEXT:    xorpd %xmm1, %xmm1
26; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
27; SSSE3-NEXT:    retq
28;
29; SSE41-LABEL: insert_v2f64_z1:
30; SSE41:       # %bb.0:
31; SSE41-NEXT:    xorps %xmm1, %xmm1
32; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
33; SSE41-NEXT:    retq
34;
35; AVX-LABEL: insert_v2f64_z1:
36; AVX:       # %bb.0:
37; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
38; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
39; AVX-NEXT:    retq
40  %1 = insertelement <2 x double> %a, double 0.0, i32 0
41  ret <2 x double> %1
42}
43
44define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) {
45; SSE2-LABEL: insert_v4f64_0zz3:
46; SSE2:       # %bb.0:
47; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
48; SSE2-NEXT:    xorpd %xmm2, %xmm2
49; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
50; SSE2-NEXT:    retq
51;
52; SSE3-LABEL: insert_v4f64_0zz3:
53; SSE3:       # %bb.0:
54; SSE3-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
55; SSE3-NEXT:    xorpd %xmm2, %xmm2
56; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
57; SSE3-NEXT:    retq
58;
59; SSSE3-LABEL: insert_v4f64_0zz3:
60; SSSE3:       # %bb.0:
61; SSSE3-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
62; SSSE3-NEXT:    xorpd %xmm2, %xmm2
63; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
64; SSSE3-NEXT:    retq
65;
66; SSE41-LABEL: insert_v4f64_0zz3:
67; SSE41:       # %bb.0:
68; SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
69; SSE41-NEXT:    xorps %xmm2, %xmm2
70; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
71; SSE41-NEXT:    retq
72;
73; AVX-LABEL: insert_v4f64_0zz3:
74; AVX:       # %bb.0:
75; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
76; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
77; AVX-NEXT:    retq
78  %1 = insertelement <4 x double> %a, double 0.0, i32 1
79  %2 = insertelement <4 x double> %1, double 0.0, i32 2
80  ret <4 x double> %2
81}
82
83define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) {
84; SSE2-LABEL: insert_v2i64_z1:
85; SSE2:       # %bb.0:
86; SSE2-NEXT:    xorpd %xmm1, %xmm1
87; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
88; SSE2-NEXT:    retq
89;
90; SSE3-LABEL: insert_v2i64_z1:
91; SSE3:       # %bb.0:
92; SSE3-NEXT:    xorpd %xmm1, %xmm1
93; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
94; SSE3-NEXT:    retq
95;
96; SSSE3-LABEL: insert_v2i64_z1:
97; SSSE3:       # %bb.0:
98; SSSE3-NEXT:    xorpd %xmm1, %xmm1
99; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
100; SSSE3-NEXT:    retq
101;
102; SSE41-LABEL: insert_v2i64_z1:
103; SSE41:       # %bb.0:
104; SSE41-NEXT:    xorps %xmm1, %xmm1
105; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
106; SSE41-NEXT:    retq
107;
108; AVX-LABEL: insert_v2i64_z1:
109; AVX:       # %bb.0:
110; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
111; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
112; AVX-NEXT:    retq
113  %1 = insertelement <2 x i64> %a, i64 0, i32 0
114  ret <2 x i64> %1
115}
116
117define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) {
118; SSE2-LABEL: insert_v4i64_01z3:
119; SSE2:       # %bb.0:
120; SSE2-NEXT:    xorpd %xmm2, %xmm2
121; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
122; SSE2-NEXT:    retq
123;
124; SSE3-LABEL: insert_v4i64_01z3:
125; SSE3:       # %bb.0:
126; SSE3-NEXT:    xorpd %xmm2, %xmm2
127; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
128; SSE3-NEXT:    retq
129;
130; SSSE3-LABEL: insert_v4i64_01z3:
131; SSSE3:       # %bb.0:
132; SSSE3-NEXT:    xorpd %xmm2, %xmm2
133; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
134; SSSE3-NEXT:    retq
135;
136; SSE41-LABEL: insert_v4i64_01z3:
137; SSE41:       # %bb.0:
138; SSE41-NEXT:    xorps %xmm2, %xmm2
139; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
140; SSE41-NEXT:    retq
141;
142; AVX-LABEL: insert_v4i64_01z3:
143; AVX:       # %bb.0:
144; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
145; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
146; AVX-NEXT:    retq
147  %1 = insertelement <4 x i64> %a, i64 0, i32 2
148  ret <4 x i64> %1
149}
150
151define <4 x float> @insert_v4f32_01z3(<4 x float> %a) {
152; SSE2-LABEL: insert_v4f32_01z3:
153; SSE2:       # %bb.0:
154; SSE2-NEXT:    xorps %xmm1, %xmm1
155; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
156; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
157; SSE2-NEXT:    retq
158;
159; SSE3-LABEL: insert_v4f32_01z3:
160; SSE3:       # %bb.0:
161; SSE3-NEXT:    xorps %xmm1, %xmm1
162; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
163; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
164; SSE3-NEXT:    retq
165;
166; SSSE3-LABEL: insert_v4f32_01z3:
167; SSSE3:       # %bb.0:
168; SSSE3-NEXT:    xorps %xmm1, %xmm1
169; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
170; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
171; SSSE3-NEXT:    retq
172;
173; SSE41-LABEL: insert_v4f32_01z3:
174; SSE41:       # %bb.0:
175; SSE41-NEXT:    xorps %xmm1, %xmm1
176; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
177; SSE41-NEXT:    retq
178;
179; AVX-LABEL: insert_v4f32_01z3:
180; AVX:       # %bb.0:
181; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
182; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
183; AVX-NEXT:    retq
184  %1 = insertelement <4 x float> %a, float 0.0, i32 2
185  ret <4 x float> %1
186}
187
188define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) {
189; SSE2-LABEL: insert_v8f32_z12345z7:
190; SSE2:       # %bb.0:
191; SSE2-NEXT:    xorps %xmm2, %xmm2
192; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
193; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
194; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
195; SSE2-NEXT:    retq
196;
197; SSE3-LABEL: insert_v8f32_z12345z7:
198; SSE3:       # %bb.0:
199; SSE3-NEXT:    xorps %xmm2, %xmm2
200; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
201; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
202; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
203; SSE3-NEXT:    retq
204;
205; SSSE3-LABEL: insert_v8f32_z12345z7:
206; SSSE3:       # %bb.0:
207; SSSE3-NEXT:    xorps %xmm2, %xmm2
208; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
209; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
210; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
211; SSSE3-NEXT:    retq
212;
213; SSE41-LABEL: insert_v8f32_z12345z7:
214; SSE41:       # %bb.0:
215; SSE41-NEXT:    xorps %xmm2, %xmm2
216; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
217; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
218; SSE41-NEXT:    retq
219;
220; AVX-LABEL: insert_v8f32_z12345z7:
221; AVX:       # %bb.0:
222; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
223; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
224; AVX-NEXT:    retq
225  %1 = insertelement <8 x float> %a, float 0.0, i32 0
226  %2 = insertelement <8 x float> %1, float 0.0, i32 6
227  ret <8 x float> %2
228}
229
230define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) {
231; SSE2-LABEL: insert_v4i32_01z3:
232; SSE2:       # %bb.0:
233; SSE2-NEXT:    xorps %xmm1, %xmm1
234; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
235; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
236; SSE2-NEXT:    retq
237;
238; SSE3-LABEL: insert_v4i32_01z3:
239; SSE3:       # %bb.0:
240; SSE3-NEXT:    xorps %xmm1, %xmm1
241; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
242; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
243; SSE3-NEXT:    retq
244;
245; SSSE3-LABEL: insert_v4i32_01z3:
246; SSSE3:       # %bb.0:
247; SSSE3-NEXT:    xorps %xmm1, %xmm1
248; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
249; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
250; SSSE3-NEXT:    retq
251;
252; SSE41-LABEL: insert_v4i32_01z3:
253; SSE41:       # %bb.0:
254; SSE41-NEXT:    xorps %xmm1, %xmm1
255; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
256; SSE41-NEXT:    retq
257;
258; AVX-LABEL: insert_v4i32_01z3:
259; AVX:       # %bb.0:
260; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
261; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
262; AVX-NEXT:    retq
263  %1 = insertelement <4 x i32> %a, i32 0, i32 2
264  ret <4 x i32> %1
265}
266
267define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) {
268; SSE2-LABEL: insert_v8i32_z12345z7:
269; SSE2:       # %bb.0:
270; SSE2-NEXT:    xorps %xmm2, %xmm2
271; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
272; SSE2-NEXT:    xorps %xmm2, %xmm2
273; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
274; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
275; SSE2-NEXT:    retq
276;
277; SSE3-LABEL: insert_v8i32_z12345z7:
278; SSE3:       # %bb.0:
279; SSE3-NEXT:    xorps %xmm2, %xmm2
280; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
281; SSE3-NEXT:    xorps %xmm2, %xmm2
282; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
283; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
284; SSE3-NEXT:    retq
285;
286; SSSE3-LABEL: insert_v8i32_z12345z7:
287; SSSE3:       # %bb.0:
288; SSSE3-NEXT:    xorps %xmm2, %xmm2
289; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
290; SSSE3-NEXT:    xorps %xmm2, %xmm2
291; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
292; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
293; SSSE3-NEXT:    retq
294;
295; SSE41-LABEL: insert_v8i32_z12345z7:
296; SSE41:       # %bb.0:
297; SSE41-NEXT:    xorps %xmm2, %xmm2
298; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
299; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
300; SSE41-NEXT:    retq
301;
302; AVX-LABEL: insert_v8i32_z12345z7:
303; AVX:       # %bb.0:
304; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
305; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
306; AVX-NEXT:    retq
307  %1 = insertelement <8 x i32> %a, i32 0, i32 0
308  %2 = insertelement <8 x i32> %1, i32 0, i32 6
309  ret <8 x i32> %2
310}
311
312define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) {
313; SSE2-LABEL: insert_v8i16_z12345z7:
314; SSE2:       # %bb.0:
315; SSE2-NEXT:    xorl %eax, %eax
316; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
317; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
318; SSE2-NEXT:    retq
319;
320; SSE3-LABEL: insert_v8i16_z12345z7:
321; SSE3:       # %bb.0:
322; SSE3-NEXT:    xorl %eax, %eax
323; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
324; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
325; SSE3-NEXT:    retq
326;
327; SSSE3-LABEL: insert_v8i16_z12345z7:
328; SSSE3:       # %bb.0:
329; SSSE3-NEXT:    xorl %eax, %eax
330; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
331; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
332; SSSE3-NEXT:    retq
333;
334; SSE41-LABEL: insert_v8i16_z12345z7:
335; SSE41:       # %bb.0:
336; SSE41-NEXT:    pxor %xmm1, %xmm1
337; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
338; SSE41-NEXT:    retq
339;
340; AVX-LABEL: insert_v8i16_z12345z7:
341; AVX:       # %bb.0:
342; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
343; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
344; AVX-NEXT:    retq
345  %1 = insertelement <8 x i16> %a, i16 0, i32 0
346  %2 = insertelement <8 x i16> %1, i16 0, i32 6
347  ret <8 x i16> %2
348}
349
350define <16 x i16> @insert_v16i16_z12345z789ABCDEz(<16 x i16> %a) {
351; SSE2-LABEL: insert_v16i16_z12345z789ABCDEz:
352; SSE2:       # %bb.0:
353; SSE2-NEXT:    xorl %eax, %eax
354; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
355; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
356; SSE2-NEXT:    pinsrw $7, %eax, %xmm1
357; SSE2-NEXT:    retq
358;
359; SSE3-LABEL: insert_v16i16_z12345z789ABCDEz:
360; SSE3:       # %bb.0:
361; SSE3-NEXT:    xorl %eax, %eax
362; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
363; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
364; SSE3-NEXT:    pinsrw $7, %eax, %xmm1
365; SSE3-NEXT:    retq
366;
367; SSSE3-LABEL: insert_v16i16_z12345z789ABCDEz:
368; SSSE3:       # %bb.0:
369; SSSE3-NEXT:    xorl %eax, %eax
370; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
371; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
372; SSSE3-NEXT:    pinsrw $7, %eax, %xmm1
373; SSSE3-NEXT:    retq
374;
375; SSE41-LABEL: insert_v16i16_z12345z789ABCDEz:
376; SSE41:       # %bb.0:
377; SSE41-NEXT:    pxor %xmm2, %xmm2
378; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
379; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
380; SSE41-NEXT:    retq
381;
382; AVX-LABEL: insert_v16i16_z12345z789ABCDEz:
383; AVX:       # %bb.0:
384; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
385; AVX-NEXT:    retq
386  %1 = insertelement <16 x i16> %a, i16 0, i32 0
387  %2 = insertelement <16 x i16> %1, i16 0, i32 6
388  %3 = insertelement <16 x i16> %2, i16 0, i32 15
389  ret <16 x i16> %3
390}
391
392define <16 x i8> @insert_v16i8_z123456789ABCDEz(<16 x i8> %a) {
393; SSE2-LABEL: insert_v16i8_z123456789ABCDEz:
394; SSE2:       # %bb.0:
395; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
396; SSE2-NEXT:    retq
397;
398; SSE3-LABEL: insert_v16i8_z123456789ABCDEz:
399; SSE3:       # %bb.0:
400; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
401; SSE3-NEXT:    retq
402;
403; SSSE3-LABEL: insert_v16i8_z123456789ABCDEz:
404; SSSE3:       # %bb.0:
405; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
406; SSSE3-NEXT:    retq
407;
408; SSE41-LABEL: insert_v16i8_z123456789ABCDEz:
409; SSE41:       # %bb.0:
410; SSE41-NEXT:    xorl %eax, %eax
411; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
412; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
413; SSE41-NEXT:    retq
414;
415; AVX1-LABEL: insert_v16i8_z123456789ABCDEz:
416; AVX1:       # %bb.0:
417; AVX1-NEXT:    xorl %eax, %eax
418; AVX1-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
419; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
420; AVX1-NEXT:    retq
421;
422; AVX2-SLOW-LABEL: insert_v16i8_z123456789ABCDEz:
423; AVX2-SLOW:       # %bb.0:
424; AVX2-SLOW-NEXT:    xorl %eax, %eax
425; AVX2-SLOW-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
426; AVX2-SLOW-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
427; AVX2-SLOW-NEXT:    retq
428;
429; AVX2-FAST-LABEL: insert_v16i8_z123456789ABCDEz:
430; AVX2-FAST:       # %bb.0:
431; AVX2-FAST-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
432; AVX2-FAST-NEXT:    retq
433  %1 = insertelement <16 x i8> %a, i8 0, i32 0
434  %2 = insertelement <16 x i8> %1, i8 0, i32 15
435  ret <16 x i8> %2
436}
437
438define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
439; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
440; SSE2:       # %bb.0:
441; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
442; SSE2-NEXT:    andps {{.*}}(%rip), %xmm1
443; SSE2-NEXT:    retq
444;
445; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
446; SSE3:       # %bb.0:
447; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
448; SSE3-NEXT:    andps {{.*}}(%rip), %xmm1
449; SSE3-NEXT:    retq
450;
451; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
452; SSSE3:       # %bb.0:
453; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
454; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm1
455; SSSE3-NEXT:    retq
456;
457; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
458; SSE41:       # %bb.0:
459; SSE41-NEXT:    xorl %eax, %eax
460; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
461; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
462; SSE41-NEXT:    pxor %xmm2, %xmm2
463; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
464; SSE41-NEXT:    retq
465;
466; AVX1-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
467; AVX1:       # %bb.0:
468; AVX1-NEXT:    xorl %eax, %eax
469; AVX1-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
470; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
471; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
472; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
473; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
474; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
475; AVX1-NEXT:    retq
476;
477; AVX2-SLOW-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
478; AVX2-SLOW:       # %bb.0:
479; AVX2-SLOW-NEXT:    xorl %eax, %eax
480; AVX2-SLOW-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
481; AVX2-SLOW-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
482; AVX2-SLOW-NEXT:    vextracti128 $1, %ymm0, %xmm0
483; AVX2-SLOW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
484; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
485; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
486; AVX2-SLOW-NEXT:    retq
487;
488; AVX2-FAST-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
489; AVX2-FAST:       # %bb.0:
490; AVX2-FAST-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm1
491; AVX2-FAST-NEXT:    vextracti128 $1, %ymm0, %xmm0
492; AVX2-FAST-NEXT:    vpxor %xmm2, %xmm2, %xmm2
493; AVX2-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
494; AVX2-FAST-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
495; AVX2-FAST-NEXT:    retq
496  %1 = insertelement <32 x i8> %a, i8 0, i32 0
497  %2 = insertelement <32 x i8> %1, i8 0, i32 15
498  %3 = insertelement <32 x i8> %2, i8 0, i32 30
499  %4 = insertelement <32 x i8> %3, i8 0, i32 31
500  ret <32 x i8> %4
501}
502
503define <4 x i32> @PR41512(i32 %x, i32 %y) {
504; SSE-LABEL: PR41512:
505; SSE:       # %bb.0:
506; SSE-NEXT:    movd %edi, %xmm0
507; SSE-NEXT:    movd %esi, %xmm1
508; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
509; SSE-NEXT:    retq
510;
511; AVX-LABEL: PR41512:
512; AVX:       # %bb.0:
513; AVX-NEXT:    vmovd %edi, %xmm0
514; AVX-NEXT:    vmovd %esi, %xmm1
515; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
516; AVX-NEXT:    retq
517  %ins1 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %x, i32 0
518  %ins2 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %y, i32 0
519  %r = shufflevector <4 x i32> %ins1, <4 x i32> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
520  ret <4 x i32> %r
521}
522
523define <4 x i64> @PR41512_v4i64(i64 %x, i64 %y) {
524; SSE-LABEL: PR41512_v4i64:
525; SSE:       # %bb.0:
526; SSE-NEXT:    movq %rdi, %xmm0
527; SSE-NEXT:    movq %rsi, %xmm1
528; SSE-NEXT:    retq
529;
530; AVX1-LABEL: PR41512_v4i64:
531; AVX1:       # %bb.0:
532; AVX1-NEXT:    vmovq %rdi, %xmm0
533; AVX1-NEXT:    vmovq %rsi, %xmm1
534; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
535; AVX1-NEXT:    retq
536;
537; AVX2-LABEL: PR41512_v4i64:
538; AVX2:       # %bb.0:
539; AVX2-NEXT:    vmovq %rdi, %xmm0
540; AVX2-NEXT:    vmovq %rsi, %xmm1
541; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
542; AVX2-NEXT:    retq
543  %ins1 = insertelement <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, i64 %x, i32 0
544  %ins2 = insertelement <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, i64 %y, i32 0
545  %r = shufflevector <4 x i64> %ins1, <4 x i64> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
546  ret <4 x i64> %r
547}
548
549define <8 x float> @PR41512_v8f32(float %x, float %y) {
550; SSE2-LABEL: PR41512_v8f32:
551; SSE2:       # %bb.0:
552; SSE2-NEXT:    xorps %xmm2, %xmm2
553; SSE2-NEXT:    xorps %xmm3, %xmm3
554; SSE2-NEXT:    movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3]
555; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
556; SSE2-NEXT:    movaps %xmm3, %xmm0
557; SSE2-NEXT:    movaps %xmm2, %xmm1
558; SSE2-NEXT:    retq
559;
560; SSE3-LABEL: PR41512_v8f32:
561; SSE3:       # %bb.0:
562; SSE3-NEXT:    xorps %xmm2, %xmm2
563; SSE3-NEXT:    xorps %xmm3, %xmm3
564; SSE3-NEXT:    movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3]
565; SSE3-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
566; SSE3-NEXT:    movaps %xmm3, %xmm0
567; SSE3-NEXT:    movaps %xmm2, %xmm1
568; SSE3-NEXT:    retq
569;
570; SSSE3-LABEL: PR41512_v8f32:
571; SSSE3:       # %bb.0:
572; SSSE3-NEXT:    xorps %xmm2, %xmm2
573; SSSE3-NEXT:    xorps %xmm3, %xmm3
574; SSSE3-NEXT:    movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3]
575; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
576; SSSE3-NEXT:    movaps %xmm3, %xmm0
577; SSSE3-NEXT:    movaps %xmm2, %xmm1
578; SSSE3-NEXT:    retq
579;
580; SSE41-LABEL: PR41512_v8f32:
581; SSE41:       # %bb.0:
582; SSE41-NEXT:    xorps %xmm2, %xmm2
583; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
584; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
585; SSE41-NEXT:    retq
586;
587; AVX-LABEL: PR41512_v8f32:
588; AVX:       # %bb.0:
589; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
590; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
591; AVX-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
592; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
593; AVX-NEXT:    retq
594  %ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0
595  %ins2 = insertelement <8 x float> zeroinitializer, float %y, i32 0
596  %r = shufflevector <8 x float> %ins1, <8 x float> %ins2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
597  ret <8 x float> %r
598}
599
600define <4 x i32> @PR41512_loads(i32* %p1, i32* %p2) {
601; SSE-LABEL: PR41512_loads:
602; SSE:       # %bb.0:
603; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
604; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
605; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
606; SSE-NEXT:    retq
607;
608; AVX-LABEL: PR41512_loads:
609; AVX:       # %bb.0:
610; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
611; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
612; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
613; AVX-NEXT:    retq
614  %x = load i32, i32* %p1
615  %y = load i32, i32* %p2
616  %ins1 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %x, i32 0
617  %ins2 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %y, i32 0
618  %r = shufflevector <4 x i32> %ins1, <4 x i32> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
619  ret <4 x i32> %r
620}
621