1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s 2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s 3 4;CHECK-LABEL: test1: 5;CHECK: vinsertps 6;CHECK: vinsertf32x4 7;CHECK: ret 8define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { 9 %rrr = load float, float* %br 10 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 11 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 12 ret <16 x float> %rrr3 13} 14 15define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { 16; KNL-LABEL: test2: 17; KNL: ## BB#0: 18; KNL-NEXT: vmovhpd (%rdi), %xmm0, %xmm2 19; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 20; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2 21; KNL-NEXT: vmovsd %xmm1, %xmm2, %xmm1 22; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 23; KNL-NEXT: retq 24; 25; SKX-LABEL: test2: 26; SKX: ## BB#0: 27; SKX-NEXT: vmovhpd (%rdi), %xmm0, %xmm2 28; SKX-NEXT: vinsertf64x2 $0, %xmm2, %zmm0, %zmm0 29; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm2 30; SKX-NEXT: vmovsd %xmm1, %xmm2, %xmm1 31; SKX-NEXT: vinsertf64x2 $3, %xmm1, %zmm0, %zmm0 32; SKX-NEXT: retq 33 %rrr = load double, double* %br 34 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 35 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 36 ret <8 x double> %rrr3 37} 38 39;CHECK-LABEL: test3: 40;CHECK: vextractf32x4 $1 41;CHECK: vinsertf32x4 $0 42;CHECK: ret 43define <16 x float> @test3(<16 x float> %x) nounwind { 44 %eee = extractelement <16 x float> %x, i32 4 45 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 46 ret <16 x float> %rrr2 47} 48 49define <8 x i64> @test4(<8 x i64> %x) nounwind { 50; KNL-LABEL: test4: 51; KNL: ## BB#0: 52; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm1 53; KNL-NEXT: vmovq %xmm1, %rax 54; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 55; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 56; KNL-NEXT: retq 57; 58; SKX-LABEL: test4: 59; SKX: ## BB#0: 60; SKX-NEXT: vextracti64x2 $2, %zmm0, %xmm1 61; SKX-NEXT: vmovq %xmm1, %rax 62; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 63; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 64; SKX-NEXT: retq 65 %eee = extractelement <8 x i64> %x, i32 4 66 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 67 ret <8 x i64> %rrr2 68} 69 70;CHECK-LABEL: test5: 71;CHECK: vextractps 72;CHECK: ret 73define i32 @test5(<4 x float> %x) nounwind { 74 %ef = extractelement <4 x float> %x, i32 3 75 %ei = bitcast float %ef to i32 76 ret i32 %ei 77} 78 79;CHECK-LABEL: test6: 80;CHECK: vextractps {{.*}}, (%rdi) 81;CHECK: ret 82define void @test6(<4 x float> %x, float* %out) nounwind { 83 %ef = extractelement <4 x float> %x, i32 3 84 store float %ef, float* %out, align 4 85 ret void 86} 87 88;CHECK-LABEL: test7 89;CHECK: vmovd 90;CHECK: vpermps %zmm 91;CHECK: ret 92define float @test7(<16 x float> %x, i32 %ind) nounwind { 93 %e = extractelement <16 x float> %x, i32 %ind 94 ret float %e 95} 96 97;CHECK-LABEL: test8 98;CHECK: vmovq 99;CHECK: vpermpd %zmm 100;CHECK: ret 101define double @test8(<8 x double> %x, i32 %ind) nounwind { 102 %e = extractelement <8 x double> %x, i32 %ind 103 ret double %e 104} 105 106;CHECK-LABEL: test9 107;CHECK: vmovd 108;CHECK: vpermps %ymm 109;CHECK: ret 110define float @test9(<8 x float> %x, i32 %ind) nounwind { 111 %e = extractelement <8 x float> %x, i32 %ind 112 ret float %e 113} 114 115;CHECK-LABEL: test10 116;CHECK: vmovd 117;CHECK: vpermd %zmm 118;CHECK: vmovd %xmm0, %eax 119;CHECK: ret 120define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 121 %e = extractelement <16 x i32> %x, i32 %ind 122 ret i32 %e 123} 124 125;CHECK-LABEL: test11 126;CHECK: vpcmpltud 127;CHECK: kshiftlw $11 128;CHECK: kshiftrw $15 129;CHECK: testb 130;CHECK: je 131;CHECK: ret 132;CHECK: ret 133define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { 134 %cmp_res = icmp ult <16 x i32> %a, %b 135 %ia = extractelement <16 x i1> %cmp_res, i32 4 136 br i1 %ia, label %A, label %B 137 A: 138 ret <16 x i32>%b 139 B: 140 %c = add <16 x i32>%b, %a 141 ret <16 x i32>%c 142} 143 144;CHECK-LABEL: test12 145;CHECK: vpcmpgtq 146;CHECK: kshiftlw $15 147;CHECK: kshiftrw $15 148;CHECK: testb 149;CHECK: ret 150 151define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { 152 153 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 154 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 155 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 156 ret i64 %res 157} 158 159;CHECK-LABEL: test13 160;CHECK: cmpl %esi, %edi 161;CHECK: setb %al 162;CHECK: andl $1, %eax 163;CHECK: kmovw %eax, %k0 164;CHECK: movw $-4 165;CHECK: korw 166define i16 @test13(i32 %a, i32 %b) { 167 %cmp_res = icmp ult i32 %a, %b 168 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 169 %res = bitcast <16 x i1> %maskv to i16 170 ret i16 %res 171} 172 173;CHECK-LABEL: test14 174;CHECK: vpcmpgtq 175;KNL: kshiftlw $11 176;KNL: kshiftrw $15 177;KNL: testb 178;SKX: kshiftlb $3 179;SKX: kshiftrb $7 180;SKX: testb 181;CHECK: ret 182 183define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { 184 185 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 186 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 187 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 188 ret i64 %res 189} 190 191;CHECK-LABEL: test15 192;CHECK: movb (%rdi), %al 193;CHECK: andb $1, %al 194;CHECK: movw $-1, %ax 195;CHECK: cmovew 196define i16 @test15(i1 *%addr) { 197 %x = load i1 , i1 * %addr, align 1 198 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 199 %x2 = bitcast <16 x i1>%x1 to i16 200 ret i16 %x2 201} 202 203;CHECK-LABEL: test16 204;CHECK: movb (%rdi), %al 205;CHECK: andw $1, %ax 206;CHECK: kmovw 207;CHECK: kshiftlw $10 208;CHECK: korw 209;CHECK: ret 210define i16 @test16(i1 *%addr, i16 %a) { 211 %x = load i1 , i1 * %addr, align 128 212 %a1 = bitcast i16 %a to <16 x i1> 213 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 214 %x2 = bitcast <16 x i1>%x1 to i16 215 ret i16 %x2 216} 217 218;CHECK-LABEL: test17 219;KNL: movb (%rdi), %al 220;KNL: andw $1, %ax 221;KNL: kshiftlw $4 222;KNL: korw 223;SKX: kshiftlb $4 224;SKX: korb 225;CHECK: ret 226define i8 @test17(i1 *%addr, i8 %a) { 227 %x = load i1 , i1 * %addr, align 128 228 %a1 = bitcast i8 %a to <8 x i1> 229 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4 230 %x2 = bitcast <8 x i1>%x1 to i8 231 ret i8 %x2 232} 233 234define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) { 235; SKX-LABEL: extract_v8i64: 236; SKX: ## BB#0: 237; SKX-NEXT: vpextrq $1, %xmm0, %rax 238; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm0 239; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 240; SKX-NEXT: retq 241 %r1 = extractelement <8 x i64> %x, i32 1 242 %r2 = extractelement <8 x i64> %x, i32 3 243 store i64 %r2, i64* %dst, align 1 244 ret i64 %r1 245} 246 247define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) { 248; SKX-LABEL: extract_v4i64: 249; SKX: ## BB#0: 250; SKX-NEXT: vpextrq $1, %xmm0, %rax 251; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 252; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 253; SKX-NEXT: retq 254 %r1 = extractelement <4 x i64> %x, i32 1 255 %r2 = extractelement <4 x i64> %x, i32 3 256 store i64 %r2, i64* %dst, align 1 257 ret i64 %r1 258} 259 260define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { 261; SKX-LABEL: extract_v2i64: 262; SKX: ## BB#0: 263; SKX-NEXT: vmovq %xmm0, %rax 264; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 265; SKX-NEXT: retq 266 %r1 = extractelement <2 x i64> %x, i32 0 267 %r2 = extractelement <2 x i64> %x, i32 1 268 store i64 %r2, i64* %dst, align 1 269 ret i64 %r1 270} 271 272define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) { 273; SKX-LABEL: extract_v16i32: 274; SKX: ## BB#0: 275; SKX-NEXT: vpextrd $1, %xmm0, %eax 276; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 277; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) 278; SKX-NEXT: retq 279 %r1 = extractelement <16 x i32> %x, i32 1 280 %r2 = extractelement <16 x i32> %x, i32 5 281 store i32 %r2, i32* %dst, align 1 282 ret i32 %r1 283} 284 285define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { 286; SKX-LABEL: extract_v8i32: 287; SKX: ## BB#0: 288; SKX-NEXT: vpextrd $1, %xmm0, %eax 289; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 290; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) 291; SKX-NEXT: retq 292 %r1 = extractelement <8 x i32> %x, i32 1 293 %r2 = extractelement <8 x i32> %x, i32 5 294 store i32 %r2, i32* %dst, align 1 295 ret i32 %r1 296} 297 298define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { 299; SKX-LABEL: extract_v4i32: 300; SKX: ## BB#0: 301; SKX-NEXT: vpextrd $1, %xmm0, %eax 302; SKX-NEXT: vpextrd $3, %xmm0, (%rdi) 303; SKX-NEXT: retq 304 %r1 = extractelement <4 x i32> %x, i32 1 305 %r2 = extractelement <4 x i32> %x, i32 3 306 store i32 %r2, i32* %dst, align 1 307 ret i32 %r1 308} 309 310define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) { 311; SKX-LABEL: extract_v32i16: 312; SKX: ## BB#0: 313; SKX-NEXT: vpextrw $1, %xmm0, %eax 314; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 315; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) 316; SKX-NEXT: retq 317 %r1 = extractelement <32 x i16> %x, i32 1 318 %r2 = extractelement <32 x i16> %x, i32 9 319 store i16 %r2, i16* %dst, align 1 320 ret i16 %r1 321} 322 323define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) { 324; SKX-LABEL: extract_v16i16: 325; SKX: ## BB#0: 326; SKX-NEXT: vpextrw $1, %xmm0, %eax 327; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 328; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) 329; SKX-NEXT: retq 330 %r1 = extractelement <16 x i16> %x, i32 1 331 %r2 = extractelement <16 x i16> %x, i32 9 332 store i16 %r2, i16* %dst, align 1 333 ret i16 %r1 334} 335 336define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) { 337; SKX-LABEL: extract_v8i16: 338; SKX: ## BB#0: 339; SKX-NEXT: vpextrw $1, %xmm0, %eax 340; SKX-NEXT: vpextrw $3, %xmm0, (%rdi) 341; SKX-NEXT: retq 342 %r1 = extractelement <8 x i16> %x, i32 1 343 %r2 = extractelement <8 x i16> %x, i32 3 344 store i16 %r2, i16* %dst, align 1 345 ret i16 %r1 346} 347 348define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) { 349; SKX-LABEL: extract_v64i8: 350; SKX: ## BB#0: 351; SKX-NEXT: vpextrb $1, %xmm0, %eax 352; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 353; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) 354; SKX-NEXT: retq 355 %r1 = extractelement <64 x i8> %x, i32 1 356 %r2 = extractelement <64 x i8> %x, i32 17 357 store i8 %r2, i8* %dst, align 1 358 ret i8 %r1 359} 360 361define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) { 362; SKX-LABEL: extract_v32i8: 363; SKX: ## BB#0: 364; SKX-NEXT: vpextrb $1, %xmm0, %eax 365; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 366; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) 367; SKX-NEXT: retq 368 %r1 = extractelement <32 x i8> %x, i32 1 369 %r2 = extractelement <32 x i8> %x, i32 17 370 store i8 %r2, i8* %dst, align 1 371 ret i8 %r1 372} 373 374define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) { 375; SKX-LABEL: extract_v16i8: 376; SKX: ## BB#0: 377; SKX-NEXT: vpextrb $1, %xmm0, %eax 378; SKX-NEXT: vpextrb $3, %xmm0, (%rdi) 379; SKX-NEXT: retq 380 %r1 = extractelement <16 x i8> %x, i32 1 381 %r2 = extractelement <16 x i8> %x, i32 3 382 store i8 %r2, i8* %dst, align 1 383 ret i8 %r1 384} 385 386define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) { 387; SKX-LABEL: insert_v8i64: 388; SKX: ## BB#0: 389; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 390; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 391; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm1 392; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 393; SKX-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 394; SKX-NEXT: retq 395 %val = load i64, i64* %ptr 396 %r1 = insertelement <8 x i64> %x, i64 %val, i32 1 397 %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3 398 ret <8 x i64> %r2 399} 400 401define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) { 402; SKX-LABEL: insert_v4i64: 403; SKX: ## BB#0: 404; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 405; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 406; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 407; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 408; SKX-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 409; SKX-NEXT: retq 410 %val = load i64, i64* %ptr 411 %r1 = insertelement <4 x i64> %x, i64 %val, i32 1 412 %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3 413 ret <4 x i64> %r2 414} 415 416define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) { 417; SKX-LABEL: insert_v2i64: 418; SKX: ## BB#0: 419; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0 420; SKX-NEXT: vpinsrq $3, %rdi, %xmm0, %xmm0 421; SKX-NEXT: retq 422 %val = load i64, i64* %ptr 423 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 424 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3 425 ret <2 x i64> %r2 426} 427 428define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) { 429; SKX-LABEL: insert_v16i32: 430; SKX: ## BB#0: 431; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 432; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 433; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1 434; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 435; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 436; SKX-NEXT: retq 437 %val = load i32, i32* %ptr 438 %r1 = insertelement <16 x i32> %x, i32 %val, i32 1 439 %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5 440 ret <16 x i32> %r2 441} 442 443define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) { 444; KNL-LABEL: insert_v8i32: 445; KNL: ## BB#0: 446; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 447; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 448; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 449; KNL-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 450; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 451; KNL-NEXT: retq 452; 453; SKX-LABEL: insert_v8i32: 454; SKX: ## BB#0: 455; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 456; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 457; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 458; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 459; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 460; SKX-NEXT: retq 461 %val = load i32, i32* %ptr 462 %r1 = insertelement <8 x i32> %x, i32 %val, i32 1 463 %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5 464 ret <8 x i32> %r2 465} 466 467define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) { 468; KNL-LABEL: insert_v4i32: 469; KNL: ## BB#0: 470; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 471; KNL-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 472; KNL-NEXT: retq 473; 474; SKX-LABEL: insert_v4i32: 475; SKX: ## BB#0: 476; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 477; SKX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 478; SKX-NEXT: retq 479 %val = load i32, i32* %ptr 480 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 481 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 482 ret <4 x i32> %r2 483} 484 485define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) { 486; KNL-LABEL: insert_v32i16: 487; KNL: ## BB#0: 488; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm2 489; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 490; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 491; KNL-NEXT: vpinsrw $1, %edi, %xmm2, %xmm2 492; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 493; KNL-NEXT: retq 494; 495; SKX-LABEL: insert_v32i16: 496; SKX: ## BB#0: 497; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 498; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 499; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1 500; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 501; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 502; SKX-NEXT: retq 503 %val = load i16, i16* %ptr 504 %r1 = insertelement <32 x i16> %x, i16 %val, i32 1 505 %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9 506 ret <32 x i16> %r2 507} 508 509define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) { 510; KNL-LABEL: insert_v16i16: 511; KNL: ## BB#0: 512; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 513; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 514; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 515; KNL-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 516; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 517; KNL-NEXT: retq 518; 519; SKX-LABEL: insert_v16i16: 520; SKX: ## BB#0: 521; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 522; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 523; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 524; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 525; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 526; SKX-NEXT: retq 527 %val = load i16, i16* %ptr 528 %r1 = insertelement <16 x i16> %x, i16 %val, i32 1 529 %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9 530 ret <16 x i16> %r2 531} 532 533define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) { 534; KNL-LABEL: insert_v8i16: 535; KNL: ## BB#0: 536; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 537; KNL-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 538; KNL-NEXT: retq 539; 540; SKX-LABEL: insert_v8i16: 541; SKX: ## BB#0: 542; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 543; SKX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 544; SKX-NEXT: retq 545 %val = load i16, i16* %ptr 546 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 547 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 548 ret <8 x i16> %r2 549} 550 551define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) { 552; KNL-LABEL: insert_v64i8: 553; KNL: ## BB#0: 554; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm2 555; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 556; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 557; KNL-NEXT: vpinsrb $2, %edi, %xmm2, %xmm2 558; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 559; KNL-NEXT: retq 560; 561; SKX-LABEL: insert_v64i8: 562; SKX: ## BB#0: 563; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 564; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 565; SKX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 566; SKX-NEXT: vpinsrb $2, %edi, %xmm1, %xmm1 567; SKX-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0 568; SKX-NEXT: retq 569 %val = load i8, i8* %ptr 570 %r1 = insertelement <64 x i8> %x, i8 %val, i32 1 571 %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50 572 ret <64 x i8> %r2 573} 574 575define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) { 576; SKX-LABEL: insert_v32i8: 577; SKX: ## BB#0: 578; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 579; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 580; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 581; SKX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 582; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 583; SKX-NEXT: retq 584 %val = load i8, i8* %ptr 585 %r1 = insertelement <32 x i8> %x, i8 %val, i32 1 586 %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17 587 ret <32 x i8> %r2 588} 589 590define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) { 591; KNL-LABEL: insert_v16i8: 592; KNL: ## BB#0: 593; KNL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 594; KNL-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 595; KNL-NEXT: retq 596; 597; SKX-LABEL: insert_v16i8: 598; SKX: ## BB#0: 599; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 600; SKX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 601; SKX-NEXT: retq 602 %val = load i8, i8* %ptr 603 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 604 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 605 ret <16 x i8> %r2 606} 607 608define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) { 609; KNL-LABEL: test_insert_128_v8i64: 610; KNL: ## BB#0: 611; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 612; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 613; KNL-NEXT: retq 614; 615; SKX-LABEL: test_insert_128_v8i64: 616; SKX: ## BB#0: 617; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 618; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 619; SKX-NEXT: retq 620 %r = insertelement <8 x i64> %x, i64 %y, i32 1 621 ret <8 x i64> %r 622} 623 624define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) { 625; KNL-LABEL: test_insert_128_v16i32: 626; KNL: ## BB#0: 627; KNL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 628; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 629; KNL-NEXT: retq 630; 631; SKX-LABEL: test_insert_128_v16i32: 632; SKX: ## BB#0: 633; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 634; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 635; SKX-NEXT: retq 636 %r = insertelement <16 x i32> %x, i32 %y, i32 1 637 ret <16 x i32> %r 638} 639 640define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) { 641; KNL-LABEL: test_insert_128_v8f64: 642; KNL: ## BB#0: 643; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] 644; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 645; KNL-NEXT: retq 646; 647; SKX-LABEL: test_insert_128_v8f64: 648; SKX: ## BB#0: 649; SKX-NEXT: vunpcklpd %xmm1, %xmm0, %xmm1 650; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0 651; SKX-NEXT: retq 652 %r = insertelement <8 x double> %x, double %y, i32 1 653 ret <8 x double> %r 654} 655 656define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) { 657; KNL-LABEL: test_insert_128_v16f32: 658; KNL: ## BB#0: 659; KNL-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1 660; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 661; KNL-NEXT: retq 662; 663; SKX-LABEL: test_insert_128_v16f32: 664; SKX: ## BB#0: 665; SKX-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1 666; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 667; SKX-NEXT: retq 668 %r = insertelement <16 x float> %x, float %y, i32 1 669 ret <16 x float> %r 670} 671 672define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) { 673; KNL-LABEL: test_insert_128_v16i16: 674; KNL: ## BB#0: 675; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 676; KNL-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 677; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 678; KNL-NEXT: retq 679; 680; SKX-LABEL: test_insert_128_v16i16: 681; SKX: ## BB#0: 682; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 683; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 684; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 685; SKX-NEXT: retq 686 %r = insertelement <16 x i16> %x, i16 %y, i32 10 687 ret <16 x i16> %r 688} 689 690define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) { 691; KNL-LABEL: test_insert_128_v32i8: 692; KNL: ## BB#0: 693; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 694; KNL-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 695; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 696; KNL-NEXT: retq 697; 698; SKX-LABEL: test_insert_128_v32i8: 699; SKX: ## BB#0: 700; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 701; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 702; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 703; SKX-NEXT: retq 704 %r = insertelement <32 x i8> %x, i8 %y, i32 20 705 ret <32 x i8> %r 706} 707