• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s
2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s
3
4;CHECK-LABEL: test1:
5;CHECK: vinsertps
6;CHECK: vinsertf32x4
7;CHECK: ret
8define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
9  %rrr = load float, float* %br
10  %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
11  %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
12  ret <16 x float> %rrr3
13}
14
15define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
16; KNL-LABEL: test2:
17; KNL:       ## BB#0:
18; KNL-NEXT:    vmovhpd (%rdi), %xmm0, %xmm2
19; KNL-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
20; KNL-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
21; KNL-NEXT:    vmovsd %xmm1, %xmm2, %xmm1
22; KNL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
23; KNL-NEXT:    retq
24;
25; SKX-LABEL: test2:
26; SKX:       ## BB#0:
27; SKX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm2
28; SKX-NEXT:    vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
29; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm2
30; SKX-NEXT:    vmovsd %xmm1, %xmm2, %xmm1
31; SKX-NEXT:    vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
32; SKX-NEXT:    retq
33  %rrr = load double, double* %br
34  %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
35  %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
36  ret <8 x double> %rrr3
37}
38
39;CHECK-LABEL: test3:
40;CHECK: vextractf32x4 $1
41;CHECK: vinsertf32x4 $0
42;CHECK: ret
43define <16 x float> @test3(<16 x float> %x) nounwind {
44  %eee = extractelement <16 x float> %x, i32 4
45  %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
46  ret <16 x float> %rrr2
47}
48
49define <8 x i64> @test4(<8 x i64> %x) nounwind {
50; KNL-LABEL: test4:
51; KNL:       ## BB#0:
52; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
53; KNL-NEXT:    vmovq %xmm1, %rax
54; KNL-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
55; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
56; KNL-NEXT:    retq
57;
58; SKX-LABEL: test4:
59; SKX:       ## BB#0:
60; SKX-NEXT:    vextracti64x2 $2, %zmm0, %xmm1
61; SKX-NEXT:    vmovq %xmm1, %rax
62; SKX-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
63; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
64; SKX-NEXT:    retq
65  %eee = extractelement <8 x i64> %x, i32 4
66  %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
67  ret <8 x i64> %rrr2
68}
69
70;CHECK-LABEL: test5:
71;CHECK: vextractps
72;CHECK: ret
73define i32 @test5(<4 x float> %x) nounwind {
74  %ef = extractelement <4 x float> %x, i32 3
75  %ei = bitcast float %ef to i32
76  ret i32 %ei
77}
78
79;CHECK-LABEL: test6:
80;CHECK: vextractps {{.*}}, (%rdi)
81;CHECK: ret
82define void @test6(<4 x float> %x, float* %out) nounwind {
83  %ef = extractelement <4 x float> %x, i32 3
84  store float %ef, float* %out, align 4
85  ret void
86}
87
88;CHECK-LABEL: test7
89;CHECK: vmovd
90;CHECK: vpermps %zmm
91;CHECK: ret
92define float @test7(<16 x float> %x, i32 %ind) nounwind {
93  %e = extractelement <16 x float> %x, i32 %ind
94  ret float %e
95}
96
97;CHECK-LABEL: test8
98;CHECK: vmovq
99;CHECK: vpermpd %zmm
100;CHECK: ret
101define double @test8(<8 x double> %x, i32 %ind) nounwind {
102  %e = extractelement <8 x double> %x, i32 %ind
103  ret double %e
104}
105
106;CHECK-LABEL: test9
107;CHECK: vmovd
108;CHECK: vpermps %ymm
109;CHECK: ret
110define float @test9(<8 x float> %x, i32 %ind) nounwind {
111  %e = extractelement <8 x float> %x, i32 %ind
112  ret float %e
113}
114
115;CHECK-LABEL: test10
116;CHECK: vmovd
117;CHECK: vpermd %zmm
118;CHECK: vmovd  %xmm0, %eax
119;CHECK: ret
120define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
121  %e = extractelement <16 x i32> %x, i32 %ind
122  ret i32 %e
123}
124
125;CHECK-LABEL: test11
126;CHECK: vpcmpltud
127;CHECK: kshiftlw $11
128;CHECK: kshiftrw $15
129;CHECK: testb
130;CHECK: je
131;CHECK: ret
132;CHECK: ret
133define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
134  %cmp_res = icmp ult <16 x i32> %a, %b
135  %ia = extractelement <16 x i1> %cmp_res, i32 4
136  br i1 %ia, label %A, label %B
137  A:
138    ret <16 x i32>%b
139  B:
140   %c = add <16 x i32>%b, %a
141   ret <16 x i32>%c
142}
143
144;CHECK-LABEL: test12
145;CHECK: vpcmpgtq
146;CHECK: kshiftlw $15
147;CHECK: kshiftrw $15
148;CHECK: testb
149;CHECK: ret
150
151define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
152
153  %cmpvector_func.i = icmp slt <16 x i64> %a, %b
154  %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0
155  %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
156  ret i64 %res
157}
158
159;CHECK-LABEL: test13
160;CHECK: cmpl    %esi, %edi
161;CHECK: setb    %al
162;CHECK: andl    $1, %eax
163;CHECK: kmovw   %eax, %k0
164;CHECK: movw    $-4
165;CHECK: korw
166define i16 @test13(i32 %a, i32 %b) {
167  %cmp_res = icmp ult i32 %a, %b
168  %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
169  %res = bitcast <16 x i1> %maskv to i16
170  ret i16 %res
171}
172
173;CHECK-LABEL: test14
174;CHECK: vpcmpgtq
175;KNL: kshiftlw $11
176;KNL: kshiftrw $15
177;KNL: testb
178;SKX: kshiftlb $3
179;SKX: kshiftrb $7
180;SKX: testb
181;CHECK: ret
182
183define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
184
185  %cmpvector_func.i = icmp slt <8 x i64> %a, %b
186  %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
187  %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
188  ret i64 %res
189}
190
191;CHECK-LABEL: test15
192;CHECK: movb (%rdi), %al
193;CHECK: andb $1, %al
194;CHECK: movw    $-1, %ax
195;CHECK: cmovew
196define i16 @test15(i1 *%addr) {
197  %x = load i1 , i1 * %addr, align 1
198  %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
199  %x2 = bitcast <16 x i1>%x1 to i16
200  ret i16 %x2
201}
202
203;CHECK-LABEL: test16
204;CHECK: movb (%rdi), %al
205;CHECK: andw $1, %ax
206;CHECK: kmovw
207;CHECK: kshiftlw        $10
208;CHECK: korw
209;CHECK: ret
210define i16 @test16(i1 *%addr, i16 %a) {
211  %x = load i1 , i1 * %addr, align 128
212  %a1 = bitcast i16 %a to <16 x i1>
213  %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
214  %x2 = bitcast <16 x i1>%x1 to i16
215  ret i16 %x2
216}
217
218;CHECK-LABEL: test17
219;KNL: movb (%rdi), %al
220;KNL: andw $1, %ax
221;KNL: kshiftlw $4
222;KNL: korw
223;SKX: kshiftlb $4
224;SKX: korb
225;CHECK: ret
226define i8 @test17(i1 *%addr, i8 %a) {
227  %x = load i1 , i1 * %addr, align 128
228  %a1 = bitcast i8 %a to <8 x i1>
229  %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
230  %x2 = bitcast <8 x i1>%x1 to i8
231  ret i8 %x2
232}
233
234define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
235; SKX-LABEL: extract_v8i64:
236; SKX:       ## BB#0:
237; SKX-NEXT:    vpextrq $1, %xmm0, %rax
238; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm0
239; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
240; SKX-NEXT:    retq
241  %r1 = extractelement <8 x i64> %x, i32 1
242  %r2 = extractelement <8 x i64> %x, i32 3
243  store i64 %r2, i64* %dst, align 1
244  ret i64 %r1
245}
246
247define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
248; SKX-LABEL: extract_v4i64:
249; SKX:       ## BB#0:
250; SKX-NEXT:    vpextrq $1, %xmm0, %rax
251; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
252; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
253; SKX-NEXT:    retq
254  %r1 = extractelement <4 x i64> %x, i32 1
255  %r2 = extractelement <4 x i64> %x, i32 3
256  store i64 %r2, i64* %dst, align 1
257  ret i64 %r1
258}
259
260define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
261; SKX-LABEL: extract_v2i64:
262; SKX:       ## BB#0:
263; SKX-NEXT:    vmovq %xmm0, %rax
264; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
265; SKX-NEXT:    retq
266  %r1 = extractelement <2 x i64> %x, i32 0
267  %r2 = extractelement <2 x i64> %x, i32 1
268  store i64 %r2, i64* %dst, align 1
269  ret i64 %r1
270}
271
272define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
273; SKX-LABEL: extract_v16i32:
274; SKX:       ## BB#0:
275; SKX-NEXT:    vpextrd $1, %xmm0, %eax
276; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
277; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
278; SKX-NEXT:    retq
279  %r1 = extractelement <16 x i32> %x, i32 1
280  %r2 = extractelement <16 x i32> %x, i32 5
281  store i32 %r2, i32* %dst, align 1
282  ret i32 %r1
283}
284
285define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
286; SKX-LABEL: extract_v8i32:
287; SKX:       ## BB#0:
288; SKX-NEXT:    vpextrd $1, %xmm0, %eax
289; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
290; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
291; SKX-NEXT:    retq
292  %r1 = extractelement <8 x i32> %x, i32 1
293  %r2 = extractelement <8 x i32> %x, i32 5
294  store i32 %r2, i32* %dst, align 1
295  ret i32 %r1
296}
297
298define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
299; SKX-LABEL: extract_v4i32:
300; SKX:       ## BB#0:
301; SKX-NEXT:    vpextrd $1, %xmm0, %eax
302; SKX-NEXT:    vpextrd $3, %xmm0, (%rdi)
303; SKX-NEXT:    retq
304  %r1 = extractelement <4 x i32> %x, i32 1
305  %r2 = extractelement <4 x i32> %x, i32 3
306  store i32 %r2, i32* %dst, align 1
307  ret i32 %r1
308}
309
310define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
311; SKX-LABEL: extract_v32i16:
312; SKX:       ## BB#0:
313; SKX-NEXT:    vpextrw $1, %xmm0, %eax
314; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
315; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
316; SKX-NEXT:    retq
317  %r1 = extractelement <32 x i16> %x, i32 1
318  %r2 = extractelement <32 x i16> %x, i32 9
319  store i16 %r2, i16* %dst, align 1
320  ret i16 %r1
321}
322
323define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
324; SKX-LABEL: extract_v16i16:
325; SKX:       ## BB#0:
326; SKX-NEXT:    vpextrw $1, %xmm0, %eax
327; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
328; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
329; SKX-NEXT:    retq
330  %r1 = extractelement <16 x i16> %x, i32 1
331  %r2 = extractelement <16 x i16> %x, i32 9
332  store i16 %r2, i16* %dst, align 1
333  ret i16 %r1
334}
335
336define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
337; SKX-LABEL: extract_v8i16:
338; SKX:       ## BB#0:
339; SKX-NEXT:    vpextrw $1, %xmm0, %eax
340; SKX-NEXT:    vpextrw $3, %xmm0, (%rdi)
341; SKX-NEXT:    retq
342  %r1 = extractelement <8 x i16> %x, i32 1
343  %r2 = extractelement <8 x i16> %x, i32 3
344  store i16 %r2, i16* %dst, align 1
345  ret i16 %r1
346}
347
348define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
349; SKX-LABEL: extract_v64i8:
350; SKX:       ## BB#0:
351; SKX-NEXT:    vpextrb $1, %xmm0, %eax
352; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
353; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
354; SKX-NEXT:    retq
355  %r1 = extractelement <64 x i8> %x, i32 1
356  %r2 = extractelement <64 x i8> %x, i32 17
357  store i8 %r2, i8* %dst, align 1
358  ret i8 %r1
359}
360
361define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
362; SKX-LABEL: extract_v32i8:
363; SKX:       ## BB#0:
364; SKX-NEXT:    vpextrb $1, %xmm0, %eax
365; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
366; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
367; SKX-NEXT:    retq
368  %r1 = extractelement <32 x i8> %x, i32 1
369  %r2 = extractelement <32 x i8> %x, i32 17
370  store i8 %r2, i8* %dst, align 1
371  ret i8 %r1
372}
373
374define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
375; SKX-LABEL: extract_v16i8:
376; SKX:       ## BB#0:
377; SKX-NEXT:    vpextrb $1, %xmm0, %eax
378; SKX-NEXT:    vpextrb $3, %xmm0, (%rdi)
379; SKX-NEXT:    retq
380  %r1 = extractelement <16 x i8> %x, i32 1
381  %r2 = extractelement <16 x i8> %x, i32 3
382  store i8 %r2, i8* %dst, align 1
383  ret i8 %r1
384}
385
386define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
387; SKX-LABEL: insert_v8i64:
388; SKX:       ## BB#0:
389; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
390; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
391; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm1
392; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
393; SKX-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
394; SKX-NEXT:    retq
395  %val = load i64, i64* %ptr
396  %r1 = insertelement <8 x i64> %x, i64 %val, i32 1
397  %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3
398  ret <8 x i64> %r2
399}
400
401define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
402; SKX-LABEL: insert_v4i64:
403; SKX:       ## BB#0:
404; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
405; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
406; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
407; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
408; SKX-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0
409; SKX-NEXT:    retq
410  %val = load i64, i64* %ptr
411  %r1 = insertelement <4 x i64> %x, i64 %val, i32 1
412  %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3
413  ret <4 x i64> %r2
414}
415
416define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
417; SKX-LABEL: insert_v2i64:
418; SKX:       ## BB#0:
419; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm0
420; SKX-NEXT:    vpinsrq $3, %rdi, %xmm0, %xmm0
421; SKX-NEXT:    retq
422  %val = load i64, i64* %ptr
423  %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
424  %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
425  ret <2 x i64> %r2
426}
427
428define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) {
429; SKX-LABEL: insert_v16i32:
430; SKX:       ## BB#0:
431; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
432; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
433; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
434; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
435; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
436; SKX-NEXT:    retq
437  %val = load i32, i32* %ptr
438  %r1 = insertelement <16 x i32> %x, i32 %val, i32 1
439  %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5
440  ret <16 x i32> %r2
441}
442
443define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) {
444; KNL-LABEL: insert_v8i32:
445; KNL:       ## BB#0:
446; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
447; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
448; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
449; KNL-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
450; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
451; KNL-NEXT:    retq
452;
453; SKX-LABEL: insert_v8i32:
454; SKX:       ## BB#0:
455; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
456; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
457; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
458; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
459; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
460; SKX-NEXT:    retq
461  %val = load i32, i32* %ptr
462  %r1 = insertelement <8 x i32> %x, i32 %val, i32 1
463  %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5
464  ret <8 x i32> %r2
465}
466
467define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
468; KNL-LABEL: insert_v4i32:
469; KNL:       ## BB#0:
470; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
471; KNL-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
472; KNL-NEXT:    retq
473;
474; SKX-LABEL: insert_v4i32:
475; SKX:       ## BB#0:
476; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
477; SKX-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
478; SKX-NEXT:    retq
479  %val = load i32, i32* %ptr
480  %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
481  %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
482  ret <4 x i32> %r2
483}
484
485define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) {
486; KNL-LABEL: insert_v32i16:
487; KNL:       ## BB#0:
488; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm2
489; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
490; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
491; KNL-NEXT:    vpinsrw $1, %edi, %xmm2, %xmm2
492; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
493; KNL-NEXT:    retq
494;
495; SKX-LABEL: insert_v32i16:
496; SKX:       ## BB#0:
497; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
498; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
499; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
500; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
501; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
502; SKX-NEXT:    retq
503  %val = load i16, i16* %ptr
504  %r1 = insertelement <32 x i16> %x, i16 %val, i32 1
505  %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9
506  ret <32 x i16> %r2
507}
508
509define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) {
510; KNL-LABEL: insert_v16i16:
511; KNL:       ## BB#0:
512; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
513; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
514; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
515; KNL-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
516; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
517; KNL-NEXT:    retq
518;
519; SKX-LABEL: insert_v16i16:
520; SKX:       ## BB#0:
521; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
522; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
523; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
524; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
525; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
526; SKX-NEXT:    retq
527  %val = load i16, i16* %ptr
528  %r1 = insertelement <16 x i16> %x, i16 %val, i32 1
529  %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9
530  ret <16 x i16> %r2
531}
532
533define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
534; KNL-LABEL: insert_v8i16:
535; KNL:       ## BB#0:
536; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
537; KNL-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
538; KNL-NEXT:    retq
539;
540; SKX-LABEL: insert_v8i16:
541; SKX:       ## BB#0:
542; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
543; SKX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
544; SKX-NEXT:    retq
545  %val = load i16, i16* %ptr
546  %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
547  %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
548  ret <8 x i16> %r2
549}
550
551define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) {
552; KNL-LABEL: insert_v64i8:
553; KNL:       ## BB#0:
554; KNL-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm2
555; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
556; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
557; KNL-NEXT:    vpinsrb $2, %edi, %xmm2, %xmm2
558; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
559; KNL-NEXT:    retq
560;
561; SKX-LABEL: insert_v64i8:
562; SKX:       ## BB#0:
563; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
564; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
565; SKX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
566; SKX-NEXT:    vpinsrb $2, %edi, %xmm1, %xmm1
567; SKX-NEXT:    vinserti32x4 $3, %xmm1, %zmm0, %zmm0
568; SKX-NEXT:    retq
569  %val = load i8, i8* %ptr
570  %r1 = insertelement <64 x i8> %x, i8 %val, i32 1
571  %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50
572  ret <64 x i8> %r2
573}
574
575define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
576; SKX-LABEL: insert_v32i8:
577; SKX:       ## BB#0:
578; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
579; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
580; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
581; SKX-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
582; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
583; SKX-NEXT:    retq
584  %val = load i8, i8* %ptr
585  %r1 = insertelement <32 x i8> %x, i8 %val, i32 1
586  %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17
587  ret <32 x i8> %r2
588}
589
590define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
591; KNL-LABEL: insert_v16i8:
592; KNL:       ## BB#0:
593; KNL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
594; KNL-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
595; KNL-NEXT:    retq
596;
597; SKX-LABEL: insert_v16i8:
598; SKX:       ## BB#0:
599; SKX-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
600; SKX-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
601; SKX-NEXT:    retq
602  %val = load i8, i8* %ptr
603  %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
604  %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
605  ret <16 x i8> %r2
606}
607
608define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
609; KNL-LABEL: test_insert_128_v8i64:
610; KNL:       ## BB#0:
611; KNL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
612; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
613; KNL-NEXT:    retq
614;
615; SKX-LABEL: test_insert_128_v8i64:
616; SKX:       ## BB#0:
617; SKX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
618; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
619; SKX-NEXT:    retq
620  %r = insertelement <8 x i64> %x, i64 %y, i32 1
621  ret <8 x i64> %r
622}
623
624define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) {
625; KNL-LABEL: test_insert_128_v16i32:
626; KNL:       ## BB#0:
627; KNL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
628; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
629; KNL-NEXT:    retq
630;
631; SKX-LABEL: test_insert_128_v16i32:
632; SKX:       ## BB#0:
633; SKX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
634; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
635; SKX-NEXT:    retq
636  %r = insertelement <16 x i32> %x, i32 %y, i32 1
637  ret <16 x i32> %r
638}
639
640define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
641; KNL-LABEL: test_insert_128_v8f64:
642; KNL:       ## BB#0:
643; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
644; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
645; KNL-NEXT:    retq
646;
647; SKX-LABEL: test_insert_128_v8f64:
648; SKX:       ## BB#0:
649; SKX-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm1
650; SKX-NEXT:    vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
651; SKX-NEXT:    retq
652  %r = insertelement <8 x double> %x, double %y, i32 1
653  ret <8 x double> %r
654}
655
656define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
657; KNL-LABEL: test_insert_128_v16f32:
658; KNL:       ## BB#0:
659; KNL-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm1
660; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
661; KNL-NEXT:    retq
662;
663; SKX-LABEL: test_insert_128_v16f32:
664; SKX:       ## BB#0:
665; SKX-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm1
666; SKX-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
667; SKX-NEXT:    retq
668  %r = insertelement <16 x float> %x, float %y, i32 1
669  ret <16 x float> %r
670}
671
672define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
673; KNL-LABEL: test_insert_128_v16i16:
674; KNL:       ## BB#0:
675; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
676; KNL-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
677; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
678; KNL-NEXT:    retq
679;
680; SKX-LABEL: test_insert_128_v16i16:
681; SKX:       ## BB#0:
682; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
683; SKX-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
684; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
685; SKX-NEXT:    retq
686  %r = insertelement <16 x i16> %x, i16 %y, i32 10
687  ret <16 x i16> %r
688}
689
690define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
691; KNL-LABEL: test_insert_128_v32i8:
692; KNL:       ## BB#0:
693; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
694; KNL-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
695; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
696; KNL-NEXT:    retq
697;
698; SKX-LABEL: test_insert_128_v32i8:
699; SKX:       ## BB#0:
700; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
701; SKX-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
702; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
703; SKX-NEXT:    retq
704  %r = insertelement <32 x i8> %x, i8 %y, i32 20
705  ret <32 x i8> %r
706}
707