• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10target triple = "x86_64-unknown-unknown"
11
12define <2 x double> @insert_v2f64_z1(<2 x double> %a) {
13; SSE2-LABEL: insert_v2f64_z1:
14; SSE2:       # BB#0:
15; SSE2-NEXT:    xorpd %xmm1, %xmm1
16; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
17; SSE2-NEXT:    retq
18;
19; SSE3-LABEL: insert_v2f64_z1:
20; SSE3:       # BB#0:
21; SSE3-NEXT:    xorpd %xmm1, %xmm1
22; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
23; SSE3-NEXT:    retq
24;
25; SSSE3-LABEL: insert_v2f64_z1:
26; SSSE3:       # BB#0:
27; SSSE3-NEXT:    xorpd %xmm1, %xmm1
28; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
29; SSSE3-NEXT:    retq
30;
31; SSE41-LABEL: insert_v2f64_z1:
32; SSE41:       # BB#0:
33; SSE41-NEXT:    xorpd %xmm1, %xmm1
34; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
35; SSE41-NEXT:    retq
36;
37; AVX-LABEL: insert_v2f64_z1:
38; AVX:       # BB#0:
39; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
40; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
41; AVX-NEXT:    retq
42  %1 = insertelement <2 x double> %a, double 0.0, i32 0
43  ret <2 x double> %1
44}
45
46define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) {
47; SSE2-LABEL: insert_v4f64_0zz3:
48; SSE2:       # BB#0:
49; SSE2-NEXT:    xorpd %xmm2, %xmm2
50; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
51; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
52; SSE2-NEXT:    retq
53;
54; SSE3-LABEL: insert_v4f64_0zz3:
55; SSE3:       # BB#0:
56; SSE3-NEXT:    xorpd %xmm2, %xmm2
57; SSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
58; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
59; SSE3-NEXT:    retq
60;
61; SSSE3-LABEL: insert_v4f64_0zz3:
62; SSSE3:       # BB#0:
63; SSSE3-NEXT:    xorpd %xmm2, %xmm2
64; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
65; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
66; SSSE3-NEXT:    retq
67;
68; SSE41-LABEL: insert_v4f64_0zz3:
69; SSE41:       # BB#0:
70; SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
71; SSE41-NEXT:    xorpd %xmm2, %xmm2
72; SSE41-NEXT:    blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
73; SSE41-NEXT:    retq
74;
75; AVX-LABEL: insert_v4f64_0zz3:
76; AVX:       # BB#0:
77; AVX-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
78; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
79; AVX-NEXT:    retq
80  %1 = insertelement <4 x double> %a, double 0.0, i32 1
81  %2 = insertelement <4 x double> %1, double 0.0, i32 2
82  ret <4 x double> %2
83}
84
85define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) {
86; SSE2-LABEL: insert_v2i64_z1:
87; SSE2:       # BB#0:
88; SSE2-NEXT:    xorpd %xmm1, %xmm1
89; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
90; SSE2-NEXT:    retq
91;
92; SSE3-LABEL: insert_v2i64_z1:
93; SSE3:       # BB#0:
94; SSE3-NEXT:    xorpd %xmm1, %xmm1
95; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
96; SSE3-NEXT:    retq
97;
98; SSSE3-LABEL: insert_v2i64_z1:
99; SSSE3:       # BB#0:
100; SSSE3-NEXT:    xorpd %xmm1, %xmm1
101; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
102; SSSE3-NEXT:    retq
103;
104; SSE41-LABEL: insert_v2i64_z1:
105; SSE41:       # BB#0:
106; SSE41-NEXT:    pxor %xmm1, %xmm1
107; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
108; SSE41-NEXT:    retq
109;
110; AVX1-LABEL: insert_v2i64_z1:
111; AVX1:       # BB#0:
112; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
113; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
114; AVX1-NEXT:    retq
115;
116; AVX2-LABEL: insert_v2i64_z1:
117; AVX2:       # BB#0:
118; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
119; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
120; AVX2-NEXT:    retq
121  %1 = insertelement <2 x i64> %a, i64 0, i32 0
122  ret <2 x i64> %1
123}
124
125define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) {
126; SSE2-LABEL: insert_v4i64_01z3:
127; SSE2:       # BB#0:
128; SSE2-NEXT:    xorpd %xmm2, %xmm2
129; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
130; SSE2-NEXT:    retq
131;
132; SSE3-LABEL: insert_v4i64_01z3:
133; SSE3:       # BB#0:
134; SSE3-NEXT:    xorpd %xmm2, %xmm2
135; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
136; SSE3-NEXT:    retq
137;
138; SSSE3-LABEL: insert_v4i64_01z3:
139; SSSE3:       # BB#0:
140; SSSE3-NEXT:    xorpd %xmm2, %xmm2
141; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
142; SSSE3-NEXT:    retq
143;
144; SSE41-LABEL: insert_v4i64_01z3:
145; SSE41:       # BB#0:
146; SSE41-NEXT:    pxor %xmm2, %xmm2
147; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
148; SSE41-NEXT:    retq
149;
150; AVX1-LABEL: insert_v4i64_01z3:
151; AVX1:       # BB#0:
152; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
153; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
154; AVX1-NEXT:    retq
155;
156; AVX2-LABEL: insert_v4i64_01z3:
157; AVX2:       # BB#0:
158; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
159; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
160; AVX2-NEXT:    retq
161  %1 = insertelement <4 x i64> %a, i64 0, i32 2
162  ret <4 x i64> %1
163}
164
165define <4 x float> @insert_v4f32_01z3(<4 x float> %a) {
166; SSE2-LABEL: insert_v4f32_01z3:
167; SSE2:       # BB#0:
168; SSE2-NEXT:    xorps %xmm1, %xmm1
169; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
170; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
171; SSE2-NEXT:    retq
172;
173; SSE3-LABEL: insert_v4f32_01z3:
174; SSE3:       # BB#0:
175; SSE3-NEXT:    xorps %xmm1, %xmm1
176; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
177; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
178; SSE3-NEXT:    retq
179;
180; SSSE3-LABEL: insert_v4f32_01z3:
181; SSSE3:       # BB#0:
182; SSSE3-NEXT:    xorps %xmm1, %xmm1
183; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
184; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
185; SSSE3-NEXT:    retq
186;
187; SSE41-LABEL: insert_v4f32_01z3:
188; SSE41:       # BB#0:
189; SSE41-NEXT:    xorps %xmm1, %xmm1
190; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
191; SSE41-NEXT:    retq
192;
193; AVX-LABEL: insert_v4f32_01z3:
194; AVX:       # BB#0:
195; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
196; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
197; AVX-NEXT:    retq
198  %1 = insertelement <4 x float> %a, float 0.0, i32 2
199  ret <4 x float> %1
200}
201
202define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) {
203; SSE2-LABEL: insert_v8f32_z12345z7:
204; SSE2:       # BB#0:
205; SSE2-NEXT:    xorps %xmm2, %xmm2
206; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
207; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
208; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
209; SSE2-NEXT:    retq
210;
211; SSE3-LABEL: insert_v8f32_z12345z7:
212; SSE3:       # BB#0:
213; SSE3-NEXT:    xorps %xmm2, %xmm2
214; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
215; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
216; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
217; SSE3-NEXT:    retq
218;
219; SSSE3-LABEL: insert_v8f32_z12345z7:
220; SSSE3:       # BB#0:
221; SSSE3-NEXT:    xorps %xmm2, %xmm2
222; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
223; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
224; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
225; SSSE3-NEXT:    retq
226;
227; SSE41-LABEL: insert_v8f32_z12345z7:
228; SSE41:       # BB#0:
229; SSE41-NEXT:    xorps %xmm2, %xmm2
230; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
231; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
232; SSE41-NEXT:    retq
233;
234; AVX-LABEL: insert_v8f32_z12345z7:
235; AVX:       # BB#0:
236; AVX-NEXT:    vxorps %ymm1, %ymm1, %ymm1
237; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
238; AVX-NEXT:    retq
239  %1 = insertelement <8 x float> %a, float 0.0, i32 0
240  %2 = insertelement <8 x float> %1, float 0.0, i32 6
241  ret <8 x float> %2
242}
243
244define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) {
245; SSE2-LABEL: insert_v4i32_01z3:
246; SSE2:       # BB#0:
247; SSE2-NEXT:    xorl %eax, %eax
248; SSE2-NEXT:    movd %eax, %xmm1
249; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
250; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
251; SSE2-NEXT:    retq
252;
253; SSE3-LABEL: insert_v4i32_01z3:
254; SSE3:       # BB#0:
255; SSE3-NEXT:    xorl %eax, %eax
256; SSE3-NEXT:    movd %eax, %xmm1
257; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
258; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
259; SSE3-NEXT:    retq
260;
261; SSSE3-LABEL: insert_v4i32_01z3:
262; SSSE3:       # BB#0:
263; SSSE3-NEXT:    xorl %eax, %eax
264; SSSE3-NEXT:    movd %eax, %xmm1
265; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
266; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
267; SSSE3-NEXT:    retq
268;
269; SSE41-LABEL: insert_v4i32_01z3:
270; SSE41:       # BB#0:
271; SSE41-NEXT:    pxor %xmm1, %xmm1
272; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
273; SSE41-NEXT:    retq
274;
275; AVX1-LABEL: insert_v4i32_01z3:
276; AVX1:       # BB#0:
277; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
278; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
279; AVX1-NEXT:    retq
280;
281; AVX2-LABEL: insert_v4i32_01z3:
282; AVX2:       # BB#0:
283; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
284; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
285; AVX2-NEXT:    retq
286  %1 = insertelement <4 x i32> %a, i32 0, i32 2
287  ret <4 x i32> %1
288}
289
290define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) {
291; SSE2-LABEL: insert_v8i32_z12345z7:
292; SSE2:       # BB#0:
293; SSE2-NEXT:    xorps %xmm2, %xmm2
294; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
295; SSE2-NEXT:    xorl %eax, %eax
296; SSE2-NEXT:    movd %eax, %xmm2
297; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
298; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
299; SSE2-NEXT:    retq
300;
301; SSE3-LABEL: insert_v8i32_z12345z7:
302; SSE3:       # BB#0:
303; SSE3-NEXT:    xorps %xmm2, %xmm2
304; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
305; SSE3-NEXT:    xorl %eax, %eax
306; SSE3-NEXT:    movd %eax, %xmm2
307; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
308; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
309; SSE3-NEXT:    retq
310;
311; SSSE3-LABEL: insert_v8i32_z12345z7:
312; SSSE3:       # BB#0:
313; SSSE3-NEXT:    xorps %xmm2, %xmm2
314; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
315; SSSE3-NEXT:    xorl %eax, %eax
316; SSSE3-NEXT:    movd %eax, %xmm2
317; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
318; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
319; SSSE3-NEXT:    retq
320;
321; SSE41-LABEL: insert_v8i32_z12345z7:
322; SSE41:       # BB#0:
323; SSE41-NEXT:    pxor %xmm2, %xmm2
324; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
325; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
326; SSE41-NEXT:    retq
327;
328; AVX1-LABEL: insert_v8i32_z12345z7:
329; AVX1:       # BB#0:
330; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
331; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
332; AVX1-NEXT:    retq
333;
334; AVX2-LABEL: insert_v8i32_z12345z7:
335; AVX2:       # BB#0:
336; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
337; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
338; AVX2-NEXT:    retq
339  %1 = insertelement <8 x i32> %a, i32 0, i32 0
340  %2 = insertelement <8 x i32> %1, i32 0, i32 6
341  ret <8 x i32> %2
342}
343
344define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) {
345; SSE2-LABEL: insert_v8i16_z12345z7:
346; SSE2:       # BB#0:
347; SSE2-NEXT:    xorl %eax, %eax
348; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
349; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
350; SSE2-NEXT:    retq
351;
352; SSE3-LABEL: insert_v8i16_z12345z7:
353; SSE3:       # BB#0:
354; SSE3-NEXT:    xorl %eax, %eax
355; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
356; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
357; SSE3-NEXT:    retq
358;
359; SSSE3-LABEL: insert_v8i16_z12345z7:
360; SSSE3:       # BB#0:
361; SSSE3-NEXT:    xorl %eax, %eax
362; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
363; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
364; SSSE3-NEXT:    retq
365;
366; SSE41-LABEL: insert_v8i16_z12345z7:
367; SSE41:       # BB#0:
368; SSE41-NEXT:    pxor %xmm1, %xmm1
369; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
370; SSE41-NEXT:    retq
371;
372; AVX-LABEL: insert_v8i16_z12345z7:
373; AVX:       # BB#0:
374; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
375; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
376; AVX-NEXT:    retq
377  %1 = insertelement <8 x i16> %a, i16 0, i32 0
378  %2 = insertelement <8 x i16> %1, i16 0, i32 6
379  ret <8 x i16> %2
380}
381
382define <16 x i16> @insert_v16i16_z12345z789ABZDEz(<16 x i16> %a) {
383; SSE2-LABEL: insert_v16i16_z12345z789ABZDEz:
384; SSE2:       # BB#0:
385; SSE2-NEXT:    xorl %eax, %eax
386; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
387; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
388; SSE2-NEXT:    pinsrw $7, %eax, %xmm1
389; SSE2-NEXT:    retq
390;
391; SSE3-LABEL: insert_v16i16_z12345z789ABZDEz:
392; SSE3:       # BB#0:
393; SSE3-NEXT:    xorl %eax, %eax
394; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
395; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
396; SSE3-NEXT:    pinsrw $7, %eax, %xmm1
397; SSE3-NEXT:    retq
398;
399; SSSE3-LABEL: insert_v16i16_z12345z789ABZDEz:
400; SSSE3:       # BB#0:
401; SSSE3-NEXT:    xorl %eax, %eax
402; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
403; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
404; SSSE3-NEXT:    pinsrw $7, %eax, %xmm1
405; SSSE3-NEXT:    retq
406;
407; SSE41-LABEL: insert_v16i16_z12345z789ABZDEz:
408; SSE41:       # BB#0:
409; SSE41-NEXT:    pxor %xmm2, %xmm2
410; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
411; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
412; SSE41-NEXT:    retq
413;
414; AVX1-LABEL: insert_v16i16_z12345z789ABZDEz:
415; AVX1:       # BB#0:
416; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
417; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3,4,5,6,7]
418; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
419; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7]
420; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
421; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
422; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
423; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
424; AVX1-NEXT:    retq
425;
426; AVX2-LABEL: insert_v16i16_z12345z789ABZDEz:
427; AVX2:       # BB#0:
428; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
429; AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3,4,5,6,7]
430; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
431; AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7]
432; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
433; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
434; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
435; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
436; AVX2-NEXT:    retq
437  %1 = insertelement <16 x i16> %a, i16 0, i32 0
438  %2 = insertelement <16 x i16> %1, i16 0, i32 6
439  %3 = insertelement <16 x i16> %2, i16 0, i32 15
440  ret <16 x i16> %3
441}
442
443define <16 x i8> @insert_v16i8_z123456789ABZDEz(<16 x i8> %a) {
444; SSE2-LABEL: insert_v16i8_z123456789ABZDEz:
445; SSE2:       # BB#0:
446; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
447; SSE2-NEXT:    pand %xmm1, %xmm0
448; SSE2-NEXT:    xorl %eax, %eax
449; SSE2-NEXT:    movd %eax, %xmm2
450; SSE2-NEXT:    pandn %xmm2, %xmm1
451; SSE2-NEXT:    por %xmm1, %xmm0
452; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
453; SSE2-NEXT:    pand %xmm1, %xmm0
454; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
455; SSE2-NEXT:    pandn %xmm2, %xmm1
456; SSE2-NEXT:    por %xmm1, %xmm0
457; SSE2-NEXT:    retq
458;
459; SSE3-LABEL: insert_v16i8_z123456789ABZDEz:
460; SSE3:       # BB#0:
461; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
462; SSE3-NEXT:    pand %xmm1, %xmm0
463; SSE3-NEXT:    xorl %eax, %eax
464; SSE3-NEXT:    movd %eax, %xmm2
465; SSE3-NEXT:    pandn %xmm2, %xmm1
466; SSE3-NEXT:    por %xmm1, %xmm0
467; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
468; SSE3-NEXT:    pand %xmm1, %xmm0
469; SSE3-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
470; SSE3-NEXT:    pandn %xmm2, %xmm1
471; SSE3-NEXT:    por %xmm1, %xmm0
472; SSE3-NEXT:    retq
473;
474; SSSE3-LABEL: insert_v16i8_z123456789ABZDEz:
475; SSSE3:       # BB#0:
476; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
477; SSSE3-NEXT:    xorl %eax, %eax
478; SSSE3-NEXT:    movd %eax, %xmm1
479; SSSE3-NEXT:    movdqa %xmm1, %xmm2
480; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
481; SSSE3-NEXT:    por %xmm2, %xmm0
482; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
483; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
484; SSSE3-NEXT:    por %xmm1, %xmm0
485; SSSE3-NEXT:    retq
486;
487; SSE41-LABEL: insert_v16i8_z123456789ABZDEz:
488; SSE41:       # BB#0:
489; SSE41-NEXT:    xorl %eax, %eax
490; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
491; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
492; SSE41-NEXT:    retq
493;
494; AVX-LABEL: insert_v16i8_z123456789ABZDEz:
495; AVX:       # BB#0:
496; AVX-NEXT:    xorl %eax, %eax
497; AVX-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
498; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
499; AVX-NEXT:    retq
500  %1 = insertelement <16 x i8> %a, i8 0, i32 0
501  %2 = insertelement <16 x i8> %1, i8 0, i32 15
502  ret <16 x i8> %2
503}
504
505define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
506; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
507; SSE2:       # BB#0:
508; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
509; SSE2-NEXT:    pand %xmm2, %xmm0
510; SSE2-NEXT:    xorl %eax, %eax
511; SSE2-NEXT:    movd %eax, %xmm3
512; SSE2-NEXT:    pandn %xmm3, %xmm2
513; SSE2-NEXT:    por %xmm2, %xmm0
514; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
515; SSE2-NEXT:    pand %xmm2, %xmm0
516; SSE2-NEXT:    movdqa %xmm3, %xmm4
517; SSE2-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
518; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
519; SSE2-NEXT:    pand %xmm5, %xmm1
520; SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
521; SSE2-NEXT:    pandn %xmm3, %xmm5
522; SSE2-NEXT:    por %xmm5, %xmm1
523; SSE2-NEXT:    pand %xmm2, %xmm1
524; SSE2-NEXT:    pandn %xmm4, %xmm2
525; SSE2-NEXT:    por %xmm2, %xmm0
526; SSE2-NEXT:    por %xmm2, %xmm1
527; SSE2-NEXT:    retq
528;
529; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
530; SSE3:       # BB#0:
531; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
532; SSE3-NEXT:    pand %xmm2, %xmm0
533; SSE3-NEXT:    xorl %eax, %eax
534; SSE3-NEXT:    movd %eax, %xmm3
535; SSE3-NEXT:    pandn %xmm3, %xmm2
536; SSE3-NEXT:    por %xmm2, %xmm0
537; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
538; SSE3-NEXT:    pand %xmm2, %xmm0
539; SSE3-NEXT:    movdqa %xmm3, %xmm4
540; SSE3-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
541; SSE3-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
542; SSE3-NEXT:    pand %xmm5, %xmm1
543; SSE3-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
544; SSE3-NEXT:    pandn %xmm3, %xmm5
545; SSE3-NEXT:    por %xmm5, %xmm1
546; SSE3-NEXT:    pand %xmm2, %xmm1
547; SSE3-NEXT:    pandn %xmm4, %xmm2
548; SSE3-NEXT:    por %xmm2, %xmm0
549; SSE3-NEXT:    por %xmm2, %xmm1
550; SSE3-NEXT:    retq
551;
552; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
553; SSSE3:       # BB#0:
554; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
555; SSSE3-NEXT:    xorl %eax, %eax
556; SSSE3-NEXT:    movd %eax, %xmm2
557; SSSE3-NEXT:    movdqa %xmm2, %xmm3
558; SSSE3-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
559; SSSE3-NEXT:    por %xmm3, %xmm0
560; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,128]
561; SSSE3-NEXT:    pshufb %xmm3, %xmm0
562; SSSE3-NEXT:    movdqa %xmm2, %xmm4
563; SSSE3-NEXT:    pshufb {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
564; SSSE3-NEXT:    por %xmm4, %xmm0
565; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15]
566; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0],zero
567; SSSE3-NEXT:    por %xmm2, %xmm1
568; SSSE3-NEXT:    pshufb %xmm3, %xmm1
569; SSSE3-NEXT:    por %xmm4, %xmm1
570; SSSE3-NEXT:    retq
571;
572; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
573; SSE41:       # BB#0:
574; SSE41-NEXT:    xorl %eax, %eax
575; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
576; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
577; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
578; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
579; SSE41-NEXT:    retq
580;
581; AVX1-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
582; AVX1:       # BB#0:
583; AVX1-NEXT:    xorl %eax, %eax
584; AVX1-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
585; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
586; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm1
587; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
588; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
589; AVX1-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
590; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
591; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
592; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
593; AVX1-NEXT:    retq
594;
595; AVX2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
596; AVX2:       # BB#0:
597; AVX2-NEXT:    xorl %eax, %eax
598; AVX2-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
599; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
600; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm1
601; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
602; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
603; AVX2-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
604; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
605; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
606; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
607; AVX2-NEXT:    retq
608  %1 = insertelement <32 x i8> %a, i8 0, i32 0
609  %2 = insertelement <32 x i8> %1, i8 0, i32 15
610  %3 = insertelement <32 x i8> %2, i8 0, i32 30
611  %4 = insertelement <32 x i8> %3, i8 0, i32 31
612  ret <32 x i8> %4
613}
614