• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
10
11define <2 x i64> @insert_v2i64_x1(<2 x i64> %a) {
12; SSE2-LABEL: insert_v2i64_x1:
13; SSE2:       # %bb.0:
14; SSE2-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
15; SSE2-NEXT:    retq
16;
17; SSE3-LABEL: insert_v2i64_x1:
18; SSE3:       # %bb.0:
19; SSE3-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
20; SSE3-NEXT:    retq
21;
22; SSSE3-LABEL: insert_v2i64_x1:
23; SSSE3:       # %bb.0:
24; SSSE3-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
25; SSSE3-NEXT:    retq
26;
27; SSE41-LABEL: insert_v2i64_x1:
28; SSE41:       # %bb.0:
29; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
30; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
31; SSE41-NEXT:    retq
32;
33; AVX1-LABEL: insert_v2i64_x1:
34; AVX1:       # %bb.0:
35; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
36; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
37; AVX1-NEXT:    retq
38;
39; AVX2-LABEL: insert_v2i64_x1:
40; AVX2:       # %bb.0:
41; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
42; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
43; AVX2-NEXT:    retq
44;
45; AVX512-LABEL: insert_v2i64_x1:
46; AVX512:       # %bb.0:
47; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
48; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
49; AVX512-NEXT:    retq
50  %1 = insertelement <2 x i64> %a, i64 -1, i32 0
51  ret <2 x i64> %1
52}
53
54define <4 x i64> @insert_v4i64_01x3(<4 x i64> %a) {
55; SSE2-LABEL: insert_v4i64_01x3:
56; SSE2:       # %bb.0:
57; SSE2-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
58; SSE2-NEXT:    retq
59;
60; SSE3-LABEL: insert_v4i64_01x3:
61; SSE3:       # %bb.0:
62; SSE3-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
63; SSE3-NEXT:    retq
64;
65; SSSE3-LABEL: insert_v4i64_01x3:
66; SSSE3:       # %bb.0:
67; SSSE3-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
68; SSSE3-NEXT:    retq
69;
70; SSE41-LABEL: insert_v4i64_01x3:
71; SSE41:       # %bb.0:
72; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
73; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
74; SSE41-NEXT:    retq
75;
76; AVX1-LABEL: insert_v4i64_01x3:
77; AVX1:       # %bb.0:
78; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
79; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
80; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
81; AVX1-NEXT:    retq
82;
83; AVX2-LABEL: insert_v4i64_01x3:
84; AVX2:       # %bb.0:
85; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
86; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
87; AVX2-NEXT:    retq
88;
89; AVX512-LABEL: insert_v4i64_01x3:
90; AVX512:       # %bb.0:
91; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
92; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
93; AVX512-NEXT:    retq
94  %1 = insertelement <4 x i64> %a, i64 -1, i32 2
95  ret <4 x i64> %1
96}
97
98define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
99; SSE2-LABEL: insert_v4i32_01x3:
100; SSE2:       # %bb.0:
101; SSE2-NEXT:    movl $-1, %eax
102; SSE2-NEXT:    movd %eax, %xmm1
103; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
104; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
105; SSE2-NEXT:    retq
106;
107; SSE3-LABEL: insert_v4i32_01x3:
108; SSE3:       # %bb.0:
109; SSE3-NEXT:    movl $-1, %eax
110; SSE3-NEXT:    movd %eax, %xmm1
111; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
112; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
113; SSE3-NEXT:    retq
114;
115; SSSE3-LABEL: insert_v4i32_01x3:
116; SSSE3:       # %bb.0:
117; SSSE3-NEXT:    movl $-1, %eax
118; SSSE3-NEXT:    movd %eax, %xmm1
119; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
120; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
121; SSSE3-NEXT:    retq
122;
123; SSE41-LABEL: insert_v4i32_01x3:
124; SSE41:       # %bb.0:
125; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
126; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
127; SSE41-NEXT:    retq
128;
129; AVX1-LABEL: insert_v4i32_01x3:
130; AVX1:       # %bb.0:
131; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
132; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
133; AVX1-NEXT:    retq
134;
135; AVX2-LABEL: insert_v4i32_01x3:
136; AVX2:       # %bb.0:
137; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
138; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
139; AVX2-NEXT:    retq
140;
141; AVX512-LABEL: insert_v4i32_01x3:
142; AVX512:       # %bb.0:
143; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
144; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
145; AVX512-NEXT:    retq
146  %1 = insertelement <4 x i32> %a, i32 -1, i32 2
147  ret <4 x i32> %1
148}
149
150define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
151; SSE2-LABEL: insert_v8i32_x12345x7:
152; SSE2:       # %bb.0:
153; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
154; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
155; SSE2-NEXT:    movl $-1, %eax
156; SSE2-NEXT:    movd %eax, %xmm2
157; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
158; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
159; SSE2-NEXT:    retq
160;
161; SSE3-LABEL: insert_v8i32_x12345x7:
162; SSE3:       # %bb.0:
163; SSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
164; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
165; SSE3-NEXT:    movl $-1, %eax
166; SSE3-NEXT:    movd %eax, %xmm2
167; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
168; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
169; SSE3-NEXT:    retq
170;
171; SSSE3-LABEL: insert_v8i32_x12345x7:
172; SSSE3:       # %bb.0:
173; SSSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
174; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
175; SSSE3-NEXT:    movl $-1, %eax
176; SSSE3-NEXT:    movd %eax, %xmm2
177; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
178; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
179; SSSE3-NEXT:    retq
180;
181; SSE41-LABEL: insert_v8i32_x12345x7:
182; SSE41:       # %bb.0:
183; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
184; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
185; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
186; SSE41-NEXT:    retq
187;
188; AVX1-LABEL: insert_v8i32_x12345x7:
189; AVX1:       # %bb.0:
190; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
191; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
192; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
193; AVX1-NEXT:    retq
194;
195; AVX2-LABEL: insert_v8i32_x12345x7:
196; AVX2:       # %bb.0:
197; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
198; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
199; AVX2-NEXT:    retq
200;
201; AVX512-LABEL: insert_v8i32_x12345x7:
202; AVX512:       # %bb.0:
203; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
204; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
205; AVX512-NEXT:    retq
206  %1 = insertelement <8 x i32> %a, i32 -1, i32 0
207  %2 = insertelement <8 x i32> %1, i32 -1, i32 6
208  ret <8 x i32> %2
209}
210
211define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
212; SSE2-LABEL: insert_v8i16_x12345x7:
213; SSE2:       # %bb.0:
214; SSE2-NEXT:    movl $65535, %eax # imm = 0xFFFF
215; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
216; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
217; SSE2-NEXT:    retq
218;
219; SSE3-LABEL: insert_v8i16_x12345x7:
220; SSE3:       # %bb.0:
221; SSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
222; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
223; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
224; SSE3-NEXT:    retq
225;
226; SSSE3-LABEL: insert_v8i16_x12345x7:
227; SSSE3:       # %bb.0:
228; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
229; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
230; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
231; SSSE3-NEXT:    retq
232;
233; SSE41-LABEL: insert_v8i16_x12345x7:
234; SSE41:       # %bb.0:
235; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
236; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
237; SSE41-NEXT:    retq
238;
239; AVX-LABEL: insert_v8i16_x12345x7:
240; AVX:       # %bb.0:
241; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
242; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
243; AVX-NEXT:    retq
244  %1 = insertelement <8 x i16> %a, i16 -1, i32 0
245  %2 = insertelement <8 x i16> %1, i16 -1, i32 6
246  ret <8 x i16> %2
247}
248
249define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
250; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
251; SSE2:       # %bb.0:
252; SSE2-NEXT:    movl $65535, %eax # imm = 0xFFFF
253; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
254; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
255; SSE2-NEXT:    pinsrw $7, %eax, %xmm1
256; SSE2-NEXT:    retq
257;
258; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
259; SSE3:       # %bb.0:
260; SSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
261; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
262; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
263; SSE3-NEXT:    pinsrw $7, %eax, %xmm1
264; SSE3-NEXT:    retq
265;
266; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
267; SSSE3:       # %bb.0:
268; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
269; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
270; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
271; SSSE3-NEXT:    pinsrw $7, %eax, %xmm1
272; SSSE3-NEXT:    retq
273;
274; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
275; SSE41:       # %bb.0:
276; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
277; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
278; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
279; SSE41-NEXT:    retq
280;
281; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
282; AVX1:       # %bb.0:
283; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
284; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
285; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
286; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
287; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
288; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
289; AVX1-NEXT:    retq
290;
291; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
292; AVX2:       # %bb.0:
293; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
294; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
295; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
296; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
297; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
298; AVX2-NEXT:    retq
299;
300; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx:
301; AVX512F:       # %bb.0:
302; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
303; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
304; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,1,2,3,4,5,38,7,8,9,10,11,12,13,14,47]
305; AVX512F-NEXT:    vpermt2w %zmm1, %zmm2, %zmm0
306; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
307; AVX512F-NEXT:    retq
308;
309; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx:
310; AVX512VL:       # %bb.0:
311; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
312; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,1,2,3,4,5,22,7,8,9,10,11,12,13,14,31]
313; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
314; AVX512VL-NEXT:    retq
315  %1 = insertelement <16 x i16> %a, i16 -1, i32 0
316  %2 = insertelement <16 x i16> %1, i16 -1, i32 6
317  %3 = insertelement <16 x i16> %2, i16 -1, i32 15
318  ret <16 x i16> %3
319}
320
321define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
322; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
323; SSE2:       # %bb.0:
324; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
325; SSE2-NEXT:    pand %xmm1, %xmm0
326; SSE2-NEXT:    movl $255, %eax
327; SSE2-NEXT:    movd %eax, %xmm2
328; SSE2-NEXT:    pandn %xmm2, %xmm1
329; SSE2-NEXT:    por %xmm1, %xmm0
330; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
331; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
332; SSE2-NEXT:    por %xmm2, %xmm0
333; SSE2-NEXT:    retq
334;
335; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
336; SSE3:       # %bb.0:
337; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
338; SSE3-NEXT:    pand %xmm1, %xmm0
339; SSE3-NEXT:    movl $255, %eax
340; SSE3-NEXT:    movd %eax, %xmm2
341; SSE3-NEXT:    pandn %xmm2, %xmm1
342; SSE3-NEXT:    por %xmm1, %xmm0
343; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
344; SSE3-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
345; SSE3-NEXT:    por %xmm2, %xmm0
346; SSE3-NEXT:    retq
347;
348; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
349; SSSE3:       # %bb.0:
350; SSSE3-NEXT:    movl $255, %eax
351; SSSE3-NEXT:    movd %eax, %xmm1
352; SSSE3-NEXT:    movdqa %xmm1, %xmm2
353; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm2[0]
354; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero
355; SSSE3-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
356; SSSE3-NEXT:    por %xmm2, %xmm1
357; SSSE3-NEXT:    movdqa %xmm1, %xmm0
358; SSSE3-NEXT:    retq
359;
360; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
361; SSE41:       # %bb.0:
362; SSE41-NEXT:    movl $255, %eax
363; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
364; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
365; SSE41-NEXT:    retq
366;
367; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
368; AVX:       # %bb.0:
369; AVX-NEXT:    movl $255, %eax
370; AVX-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
371; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
372; AVX-NEXT:    retq
373  %1 = insertelement <16 x i8> %a, i8 -1, i32 0
374  %2 = insertelement <16 x i8> %1, i8 -1, i32 15
375  ret <16 x i8> %2
376}
377
378define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
379; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
380; SSE2:       # %bb.0:
381; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
382; SSE2-NEXT:    pand %xmm2, %xmm0
383; SSE2-NEXT:    movl $255, %eax
384; SSE2-NEXT:    movd %eax, %xmm3
385; SSE2-NEXT:    pandn %xmm3, %xmm2
386; SSE2-NEXT:    por %xmm2, %xmm0
387; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
388; SSE2-NEXT:    pand %xmm2, %xmm0
389; SSE2-NEXT:    movdqa %xmm3, %xmm4
390; SSE2-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
391; SSE2-NEXT:    por %xmm4, %xmm0
392; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
393; SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
394; SSE2-NEXT:    por %xmm3, %xmm1
395; SSE2-NEXT:    pand %xmm2, %xmm1
396; SSE2-NEXT:    por %xmm4, %xmm1
397; SSE2-NEXT:    retq
398;
399; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
400; SSE3:       # %bb.0:
401; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
402; SSE3-NEXT:    pand %xmm2, %xmm0
403; SSE3-NEXT:    movl $255, %eax
404; SSE3-NEXT:    movd %eax, %xmm3
405; SSE3-NEXT:    pandn %xmm3, %xmm2
406; SSE3-NEXT:    por %xmm2, %xmm0
407; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
408; SSE3-NEXT:    pand %xmm2, %xmm0
409; SSE3-NEXT:    movdqa %xmm3, %xmm4
410; SSE3-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
411; SSE3-NEXT:    por %xmm4, %xmm0
412; SSE3-NEXT:    pand {{.*}}(%rip), %xmm1
413; SSE3-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
414; SSE3-NEXT:    por %xmm3, %xmm1
415; SSE3-NEXT:    pand %xmm2, %xmm1
416; SSE3-NEXT:    por %xmm4, %xmm1
417; SSE3-NEXT:    retq
418;
419; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
420; SSSE3:       # %bb.0:
421; SSSE3-NEXT:    movl $255, %eax
422; SSSE3-NEXT:    movd %eax, %xmm3
423; SSSE3-NEXT:    movdqa %xmm3, %xmm2
424; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm2[0]
425; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero
426; SSSE3-NEXT:    movdqa %xmm3, %xmm0
427; SSSE3-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
428; SSSE3-NEXT:    por %xmm0, %xmm2
429; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[u]
430; SSSE3-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
431; SSSE3-NEXT:    por %xmm3, %xmm1
432; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
433; SSSE3-NEXT:    por %xmm0, %xmm1
434; SSSE3-NEXT:    movdqa %xmm2, %xmm0
435; SSSE3-NEXT:    retq
436;
437; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
438; SSE41:       # %bb.0:
439; SSE41-NEXT:    movl $255, %eax
440; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
441; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
442; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
443; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
444; SSE41-NEXT:    retq
445;
446; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
447; AVX1:       # %bb.0:
448; AVX1-NEXT:    movl $255, %eax
449; AVX1-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
450; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
451; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
452; AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
453; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
454; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
455; AVX1-NEXT:    retq
456;
457; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
458; AVX2:       # %bb.0:
459; AVX2-NEXT:    movl $255, %eax
460; AVX2-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
461; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
462; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
463; AVX2-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
464; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
465; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
466; AVX2-NEXT:    retq
467;
468; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
469; AVX512:       # %bb.0:
470; AVX512-NEXT:    movl $255, %eax
471; AVX512-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
472; AVX512-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
473; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
474; AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
475; AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
476; AVX512-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
477; AVX512-NEXT:    retq
478  %1 = insertelement <32 x i8> %a, i8 -1, i32 0
479  %2 = insertelement <32 x i8> %1, i8 -1, i32 15
480  %3 = insertelement <32 x i8> %2, i8 -1, i32 30
481  %4 = insertelement <32 x i8> %3, i8 -1, i32 31
482  ret <32 x i8> %4
483}
484