• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
10
11define <2 x i64> @insert_v2i64_x1(<2 x i64> %a) {
12; SSE2-LABEL: insert_v2i64_x1:
13; SSE2:       # %bb.0:
14; SSE2-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
15; SSE2-NEXT:    retq
16;
17; SSE3-LABEL: insert_v2i64_x1:
18; SSE3:       # %bb.0:
19; SSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
20; SSE3-NEXT:    retq
21;
22; SSSE3-LABEL: insert_v2i64_x1:
23; SSSE3:       # %bb.0:
24; SSSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
25; SSSE3-NEXT:    retq
26;
27; SSE41-LABEL: insert_v2i64_x1:
28; SSE41:       # %bb.0:
29; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
30; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
31; SSE41-NEXT:    retq
32;
33; AVX1-LABEL: insert_v2i64_x1:
34; AVX1:       # %bb.0:
35; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
36; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
37; AVX1-NEXT:    retq
38;
39; AVX2-LABEL: insert_v2i64_x1:
40; AVX2:       # %bb.0:
41; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
42; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
43; AVX2-NEXT:    retq
44;
45; AVX512-LABEL: insert_v2i64_x1:
46; AVX512:       # %bb.0:
47; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
48; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
49; AVX512-NEXT:    retq
50  %1 = insertelement <2 x i64> %a, i64 -1, i32 0
51  ret <2 x i64> %1
52}
53
54define <4 x i64> @insert_v4i64_01x3(<4 x i64> %a) {
55; SSE2-LABEL: insert_v4i64_01x3:
56; SSE2:       # %bb.0:
57; SSE2-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
58; SSE2-NEXT:    retq
59;
60; SSE3-LABEL: insert_v4i64_01x3:
61; SSE3:       # %bb.0:
62; SSE3-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
63; SSE3-NEXT:    retq
64;
65; SSSE3-LABEL: insert_v4i64_01x3:
66; SSSE3:       # %bb.0:
67; SSSE3-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
68; SSSE3-NEXT:    retq
69;
70; SSE41-LABEL: insert_v4i64_01x3:
71; SSE41:       # %bb.0:
72; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
73; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
74; SSE41-NEXT:    retq
75;
76; AVX1-LABEL: insert_v4i64_01x3:
77; AVX1:       # %bb.0:
78; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
79; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
80; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
81; AVX1-NEXT:    retq
82;
83; AVX2-LABEL: insert_v4i64_01x3:
84; AVX2:       # %bb.0:
85; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
86; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
87; AVX2-NEXT:    retq
88;
89; AVX512-LABEL: insert_v4i64_01x3:
90; AVX512:       # %bb.0:
91; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
92; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
93; AVX512-NEXT:    retq
94  %1 = insertelement <4 x i64> %a, i64 -1, i32 2
95  ret <4 x i64> %1
96}
97
98define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
99; SSE2-LABEL: insert_v4i32_01x3:
100; SSE2:       # %bb.0:
101; SSE2-NEXT:    movl $-1, %eax
102; SSE2-NEXT:    movd %eax, %xmm1
103; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
104; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
105; SSE2-NEXT:    retq
106;
107; SSE3-LABEL: insert_v4i32_01x3:
108; SSE3:       # %bb.0:
109; SSE3-NEXT:    movl $-1, %eax
110; SSE3-NEXT:    movd %eax, %xmm1
111; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
112; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
113; SSE3-NEXT:    retq
114;
115; SSSE3-LABEL: insert_v4i32_01x3:
116; SSSE3:       # %bb.0:
117; SSSE3-NEXT:    movl $-1, %eax
118; SSSE3-NEXT:    movd %eax, %xmm1
119; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
120; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
121; SSSE3-NEXT:    retq
122;
123; SSE41-LABEL: insert_v4i32_01x3:
124; SSE41:       # %bb.0:
125; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
126; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
127; SSE41-NEXT:    retq
128;
129; AVX1-LABEL: insert_v4i32_01x3:
130; AVX1:       # %bb.0:
131; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
132; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
133; AVX1-NEXT:    retq
134;
135; AVX2-LABEL: insert_v4i32_01x3:
136; AVX2:       # %bb.0:
137; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
138; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
139; AVX2-NEXT:    retq
140;
141; AVX512-LABEL: insert_v4i32_01x3:
142; AVX512:       # %bb.0:
143; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
144; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
145; AVX512-NEXT:    retq
146  %1 = insertelement <4 x i32> %a, i32 -1, i32 2
147  ret <4 x i32> %1
148}
149
150define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
151; SSE2-LABEL: insert_v8i32_x12345x7:
152; SSE2:       # %bb.0:
153; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
154; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
155; SSE2-NEXT:    movl $-1, %eax
156; SSE2-NEXT:    movd %eax, %xmm2
157; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
158; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
159; SSE2-NEXT:    retq
160;
161; SSE3-LABEL: insert_v8i32_x12345x7:
162; SSE3:       # %bb.0:
163; SSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
164; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
165; SSE3-NEXT:    movl $-1, %eax
166; SSE3-NEXT:    movd %eax, %xmm2
167; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
168; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
169; SSE3-NEXT:    retq
170;
171; SSSE3-LABEL: insert_v8i32_x12345x7:
172; SSSE3:       # %bb.0:
173; SSSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
174; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
175; SSSE3-NEXT:    movl $-1, %eax
176; SSSE3-NEXT:    movd %eax, %xmm2
177; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
178; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
179; SSSE3-NEXT:    retq
180;
181; SSE41-LABEL: insert_v8i32_x12345x7:
182; SSE41:       # %bb.0:
183; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
184; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
185; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
186; SSE41-NEXT:    retq
187;
188; AVX1-LABEL: insert_v8i32_x12345x7:
189; AVX1:       # %bb.0:
190; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
191; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
192; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
193; AVX1-NEXT:    retq
194;
195; AVX2-LABEL: insert_v8i32_x12345x7:
196; AVX2:       # %bb.0:
197; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
198; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
199; AVX2-NEXT:    retq
200;
201; AVX512-LABEL: insert_v8i32_x12345x7:
202; AVX512:       # %bb.0:
203; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
204; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
205; AVX512-NEXT:    retq
206  %1 = insertelement <8 x i32> %a, i32 -1, i32 0
207  %2 = insertelement <8 x i32> %1, i32 -1, i32 6
208  ret <8 x i32> %2
209}
210
211define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
212; SSE2-LABEL: insert_v8i16_x12345x7:
213; SSE2:       # %bb.0:
214; SSE2-NEXT:    movl $65535, %eax # imm = 0xFFFF
215; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
216; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
217; SSE2-NEXT:    retq
218;
219; SSE3-LABEL: insert_v8i16_x12345x7:
220; SSE3:       # %bb.0:
221; SSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
222; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
223; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
224; SSE3-NEXT:    retq
225;
226; SSSE3-LABEL: insert_v8i16_x12345x7:
227; SSSE3:       # %bb.0:
228; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
229; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
230; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
231; SSSE3-NEXT:    retq
232;
233; SSE41-LABEL: insert_v8i16_x12345x7:
234; SSE41:       # %bb.0:
235; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
236; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
237; SSE41-NEXT:    retq
238;
239; AVX-LABEL: insert_v8i16_x12345x7:
240; AVX:       # %bb.0:
241; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
242; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
243; AVX-NEXT:    retq
244  %1 = insertelement <8 x i16> %a, i16 -1, i32 0
245  %2 = insertelement <8 x i16> %1, i16 -1, i32 6
246  ret <8 x i16> %2
247}
248
249define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
250; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
251; SSE2:       # %bb.0:
252; SSE2-NEXT:    movl $65535, %eax # imm = 0xFFFF
253; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
254; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
255; SSE2-NEXT:    pinsrw $7, %eax, %xmm1
256; SSE2-NEXT:    retq
257;
258; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
259; SSE3:       # %bb.0:
260; SSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
261; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
262; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
263; SSE3-NEXT:    pinsrw $7, %eax, %xmm1
264; SSE3-NEXT:    retq
265;
266; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
267; SSSE3:       # %bb.0:
268; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
269; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
270; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
271; SSSE3-NEXT:    pinsrw $7, %eax, %xmm1
272; SSSE3-NEXT:    retq
273;
274; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
275; SSE41:       # %bb.0:
276; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
277; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
278; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
279; SSE41-NEXT:    retq
280;
281; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
282; AVX1:       # %bb.0:
283; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
284; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
285; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
286; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
287; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
288; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
289; AVX1-NEXT:    retq
290;
291; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
292; AVX2:       # %bb.0:
293; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
294; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
295; AVX2-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
296; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
297; AVX2-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
298; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
299; AVX2-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
300; AVX2-NEXT:    retq
301;
302; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx:
303; AVX512F:       # %bb.0:
304; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
305; AVX512F-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
306; AVX512F-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
307; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
308; AVX512F-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
309; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
310; AVX512F-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
311; AVX512F-NEXT:    retq
312;
313; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx:
314; AVX512VL:       # %bb.0:
315; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
316; AVX512VL-NEXT:    movw $1, %ax
317; AVX512VL-NEXT:    kmovd %eax, %k1
318; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
319; AVX512VL-NEXT:    movw $64, %ax
320; AVX512VL-NEXT:    kmovd %eax, %k1
321; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
322; AVX512VL-NEXT:    movw $-32768, %ax # imm = 0x8000
323; AVX512VL-NEXT:    kmovd %eax, %k1
324; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
325; AVX512VL-NEXT:    retq
326  %1 = insertelement <16 x i16> %a, i16 -1, i32 0
327  %2 = insertelement <16 x i16> %1, i16 -1, i32 6
328  %3 = insertelement <16 x i16> %2, i16 -1, i32 15
329  ret <16 x i16> %3
330}
331
332define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
333; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
334; SSE2:       # %bb.0:
335; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
336; SSE2-NEXT:    pand %xmm1, %xmm0
337; SSE2-NEXT:    movl $255, %eax
338; SSE2-NEXT:    movd %eax, %xmm2
339; SSE2-NEXT:    pandn %xmm2, %xmm1
340; SSE2-NEXT:    por %xmm1, %xmm0
341; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
342; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
343; SSE2-NEXT:    por %xmm2, %xmm0
344; SSE2-NEXT:    retq
345;
346; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
347; SSE3:       # %bb.0:
348; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
349; SSE3-NEXT:    pand %xmm1, %xmm0
350; SSE3-NEXT:    movl $255, %eax
351; SSE3-NEXT:    movd %eax, %xmm2
352; SSE3-NEXT:    pandn %xmm2, %xmm1
353; SSE3-NEXT:    por %xmm1, %xmm0
354; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
355; SSE3-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
356; SSE3-NEXT:    por %xmm2, %xmm0
357; SSE3-NEXT:    retq
358;
359; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
360; SSSE3:       # %bb.0:
361; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
362; SSSE3-NEXT:    movl $255, %eax
363; SSSE3-NEXT:    movd %eax, %xmm1
364; SSSE3-NEXT:    movdqa %xmm1, %xmm2
365; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
366; SSSE3-NEXT:    por %xmm2, %xmm0
367; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
368; SSSE3-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
369; SSSE3-NEXT:    por %xmm1, %xmm0
370; SSSE3-NEXT:    retq
371;
372; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
373; SSE41:       # %bb.0:
374; SSE41-NEXT:    movl $255, %eax
375; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
376; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
377; SSE41-NEXT:    retq
378;
379; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
380; AVX:       # %bb.0:
381; AVX-NEXT:    movl $255, %eax
382; AVX-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
383; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
384; AVX-NEXT:    retq
385  %1 = insertelement <16 x i8> %a, i8 -1, i32 0
386  %2 = insertelement <16 x i8> %1, i8 -1, i32 15
387  ret <16 x i8> %2
388}
389
390define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
391; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
392; SSE2:       # %bb.0:
393; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
394; SSE2-NEXT:    pand %xmm2, %xmm0
395; SSE2-NEXT:    movl $255, %eax
396; SSE2-NEXT:    movd %eax, %xmm3
397; SSE2-NEXT:    pandn %xmm3, %xmm2
398; SSE2-NEXT:    por %xmm2, %xmm0
399; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
400; SSE2-NEXT:    pand %xmm2, %xmm0
401; SSE2-NEXT:    movdqa %xmm3, %xmm4
402; SSE2-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
403; SSE2-NEXT:    por %xmm4, %xmm0
404; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
405; SSE2-NEXT:    pand %xmm5, %xmm1
406; SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
407; SSE2-NEXT:    pandn %xmm3, %xmm5
408; SSE2-NEXT:    por %xmm5, %xmm1
409; SSE2-NEXT:    pand %xmm2, %xmm1
410; SSE2-NEXT:    por %xmm4, %xmm1
411; SSE2-NEXT:    retq
412;
413; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
414; SSE3:       # %bb.0:
415; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
416; SSE3-NEXT:    pand %xmm2, %xmm0
417; SSE3-NEXT:    movl $255, %eax
418; SSE3-NEXT:    movd %eax, %xmm3
419; SSE3-NEXT:    pandn %xmm3, %xmm2
420; SSE3-NEXT:    por %xmm2, %xmm0
421; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
422; SSE3-NEXT:    pand %xmm2, %xmm0
423; SSE3-NEXT:    movdqa %xmm3, %xmm4
424; SSE3-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
425; SSE3-NEXT:    por %xmm4, %xmm0
426; SSE3-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
427; SSE3-NEXT:    pand %xmm5, %xmm1
428; SSE3-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
429; SSE3-NEXT:    pandn %xmm3, %xmm5
430; SSE3-NEXT:    por %xmm5, %xmm1
431; SSE3-NEXT:    pand %xmm2, %xmm1
432; SSE3-NEXT:    por %xmm4, %xmm1
433; SSE3-NEXT:    retq
434;
435; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
436; SSSE3:       # %bb.0:
437; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
438; SSSE3-NEXT:    movl $255, %eax
439; SSSE3-NEXT:    movd %eax, %xmm2
440; SSSE3-NEXT:    movdqa %xmm2, %xmm3
441; SSSE3-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
442; SSSE3-NEXT:    por %xmm3, %xmm0
443; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,128]
444; SSSE3-NEXT:    pshufb %xmm3, %xmm0
445; SSSE3-NEXT:    movdqa %xmm2, %xmm4
446; SSSE3-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
447; SSSE3-NEXT:    por %xmm4, %xmm0
448; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15]
449; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0],zero
450; SSSE3-NEXT:    por %xmm2, %xmm1
451; SSSE3-NEXT:    pshufb %xmm3, %xmm1
452; SSSE3-NEXT:    por %xmm4, %xmm1
453; SSSE3-NEXT:    retq
454;
455; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
456; SSE41:       # %bb.0:
457; SSE41-NEXT:    movl $255, %eax
458; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
459; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
460; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
461; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
462; SSE41-NEXT:    retq
463;
464; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
465; AVX1:       # %bb.0:
466; AVX1-NEXT:    movl $255, %eax
467; AVX1-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
468; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
469; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
470; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
471; AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
472; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
473; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
474; AVX1-NEXT:    retq
475;
476; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
477; AVX2:       # %bb.0:
478; AVX2-NEXT:    movl $255, %eax
479; AVX2-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
480; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
481; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
482; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
483; AVX2-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
484; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
485; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
486; AVX2-NEXT:    retq
487;
488; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
489; AVX512:       # %bb.0:
490; AVX512-NEXT:    movl $255, %eax
491; AVX512-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
492; AVX512-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
493; AVX512-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
494; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
495; AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
496; AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
497; AVX512-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
498; AVX512-NEXT:    retq
499  %1 = insertelement <32 x i8> %a, i8 -1, i32 0
500  %2 = insertelement <32 x i8> %1, i8 -1, i32 15
501  %3 = insertelement <32 x i8> %2, i8 -1, i32 30
502  %4 = insertelement <32 x i8> %3, i8 -1, i32 31
503  ret <32 x i8> %4
504}
505