1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL %s 3; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s 4 5define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { 6; KNL-LABEL: test1: 7; KNL: ## BB#0: 8; KNL-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3] 9; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 10; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2 11; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 12; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 13; KNL-NEXT: retq 14; 15; SKX-LABEL: test1: 16; SKX: ## BB#0: 17; SKX-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3] 18; SKX-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 19; SKX-NEXT: vextractf32x4 $3, %zmm0, %xmm2 20; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 21; SKX-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 22; SKX-NEXT: retq 23 %rrr = load float, float* %br 24 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 25 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 26 ret <16 x float> %rrr3 27} 28 29define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { 30; KNL-LABEL: test2: 31; KNL: ## BB#0: 32; KNL-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0] 33; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 34; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2 35; KNL-NEXT: vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1] 36; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 37; KNL-NEXT: retq 38; 39; SKX-LABEL: test2: 40; SKX: ## BB#0: 41; SKX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0] 42; SKX-NEXT: vinsertf64x2 $0, %xmm2, %zmm0, %zmm0 43; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm2 44; SKX-NEXT: vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1] 45; SKX-NEXT: vinsertf64x2 $3, %xmm1, %zmm0, %zmm0 46; SKX-NEXT: retq 47 %rrr = load double, double* %br 48 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 49 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 50 ret <8 x double> %rrr3 51} 52 53define <16 x float> @test3(<16 x float> %x) nounwind { 54; KNL-LABEL: test3: 55; KNL: ## BB#0: 56; KNL-NEXT: vextractf32x4 $1, %zmm0, %xmm1 57; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 58; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 59; KNL-NEXT: retq 60; 61; SKX-LABEL: test3: 62; SKX: ## BB#0: 63; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm1 64; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 65; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 66; SKX-NEXT: retq 67 %eee = extractelement <16 x float> %x, i32 4 68 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 69 ret <16 x float> %rrr2 70} 71 72define <8 x i64> @test4(<8 x i64> %x) nounwind { 73; KNL-LABEL: test4: 74; KNL: ## BB#0: 75; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm1 76; KNL-NEXT: vmovq %xmm1, %rax 77; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 78; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 79; KNL-NEXT: retq 80; 81; SKX-LABEL: test4: 82; SKX: ## BB#0: 83; SKX-NEXT: vextracti64x2 $2, %zmm0, %xmm1 84; SKX-NEXT: vmovq %xmm1, %rax 85; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 86; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 87; SKX-NEXT: retq 88 %eee = extractelement <8 x i64> %x, i32 4 89 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 90 ret <8 x i64> %rrr2 91} 92 93define i32 @test5(<4 x float> %x) nounwind { 94; KNL-LABEL: test5: 95; KNL: ## BB#0: 96; KNL-NEXT: vextractps $3, %xmm0, %eax 97; KNL-NEXT: retq 98; 99; SKX-LABEL: test5: 100; SKX: ## BB#0: 101; SKX-NEXT: vextractps $3, %xmm0, %eax 102; SKX-NEXT: retq 103 %ef = extractelement <4 x float> %x, i32 3 104 %ei = bitcast float %ef to i32 105 ret i32 %ei 106} 107 108define void @test6(<4 x float> %x, float* %out) nounwind { 109; KNL-LABEL: test6: 110; KNL: ## BB#0: 111; KNL-NEXT: vextractps $3, %xmm0, (%rdi) 112; KNL-NEXT: retq 113; 114; SKX-LABEL: test6: 115; SKX: ## BB#0: 116; SKX-NEXT: vextractps $3, %xmm0, (%rdi) 117; SKX-NEXT: retq 118 %ef = extractelement <4 x float> %x, i32 3 119 store float %ef, float* %out, align 4 120 ret void 121} 122 123define float @test7(<16 x float> %x, i32 %ind) nounwind { 124; KNL-LABEL: test7: 125; KNL: ## BB#0: 126; KNL-NEXT: vmovd %edi, %xmm1 127; KNL-NEXT: vpermps %zmm0, %zmm1, %zmm0 128; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 129; KNL-NEXT: retq 130; 131; SKX-LABEL: test7: 132; SKX: ## BB#0: 133; SKX-NEXT: vmovd %edi, %xmm1 134; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 135; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 136; SKX-NEXT: retq 137 %e = extractelement <16 x float> %x, i32 %ind 138 ret float %e 139} 140 141define double @test8(<8 x double> %x, i32 %ind) nounwind { 142; KNL-LABEL: test8: 143; KNL: ## BB#0: 144; KNL-NEXT: movslq %edi, %rax 145; KNL-NEXT: vmovq %rax, %xmm1 146; KNL-NEXT: vpermpd %zmm0, %zmm1, %zmm0 147; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 148; KNL-NEXT: retq 149; 150; SKX-LABEL: test8: 151; SKX: ## BB#0: 152; SKX-NEXT: movslq %edi, %rax 153; SKX-NEXT: vmovq %rax, %xmm1 154; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 155; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 156; SKX-NEXT: retq 157 %e = extractelement <8 x double> %x, i32 %ind 158 ret double %e 159} 160 161define float @test9(<8 x float> %x, i32 %ind) nounwind { 162; KNL-LABEL: test9: 163; KNL: ## BB#0: 164; KNL-NEXT: vmovd %edi, %xmm1 165; KNL-NEXT: vpermps %ymm0, %ymm1, %ymm0 166; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 167; KNL-NEXT: retq 168; 169; SKX-LABEL: test9: 170; SKX: ## BB#0: 171; SKX-NEXT: vmovd %edi, %xmm1 172; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 173; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 174; SKX-NEXT: retq 175 %e = extractelement <8 x float> %x, i32 %ind 176 ret float %e 177} 178 179define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 180; KNL-LABEL: test10: 181; KNL: ## BB#0: 182; KNL-NEXT: vmovd %edi, %xmm1 183; KNL-NEXT: vpermd %zmm0, %zmm1, %zmm0 184; KNL-NEXT: vmovd %xmm0, %eax 185; KNL-NEXT: retq 186; 187; SKX-LABEL: test10: 188; SKX: ## BB#0: 189; SKX-NEXT: vmovd %edi, %xmm1 190; SKX-NEXT: vpermd %zmm0, %zmm1, %zmm0 191; SKX-NEXT: vmovd %xmm0, %eax 192; SKX-NEXT: retq 193 %e = extractelement <16 x i32> %x, i32 %ind 194 ret i32 %e 195} 196 197define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { 198; KNL-LABEL: test11: 199; KNL: ## BB#0: 200; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 201; KNL-NEXT: kshiftlw $11, %k0, %k0 202; KNL-NEXT: kshiftrw $15, %k0, %k0 203; KNL-NEXT: kmovw %k0, %eax 204; KNL-NEXT: testb %al, %al 205; KNL-NEXT: je LBB10_2 206; KNL-NEXT: ## BB#1: ## %A 207; KNL-NEXT: vmovaps %zmm1, %zmm0 208; KNL-NEXT: retq 209; KNL-NEXT: LBB10_2: ## %B 210; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 211; KNL-NEXT: retq 212; 213; SKX-LABEL: test11: 214; SKX: ## BB#0: 215; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 216; SKX-NEXT: kshiftlw $11, %k0, %k0 217; SKX-NEXT: kshiftrw $15, %k0, %k0 218; SKX-NEXT: kmovw %k0, %eax 219; SKX-NEXT: testb %al, %al 220; SKX-NEXT: je LBB10_2 221; SKX-NEXT: ## BB#1: ## %A 222; SKX-NEXT: vmovaps %zmm1, %zmm0 223; SKX-NEXT: retq 224; SKX-NEXT: LBB10_2: ## %B 225; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 226; SKX-NEXT: retq 227 %cmp_res = icmp ult <16 x i32> %a, %b 228 %ia = extractelement <16 x i1> %cmp_res, i32 4 229 br i1 %ia, label %A, label %B 230 A: 231 ret <16 x i32>%b 232 B: 233 %c = add <16 x i32>%b, %a 234 ret <16 x i32>%c 235} 236 237define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { 238; KNL-LABEL: test12: 239; KNL: ## BB#0: 240; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 241; KNL-NEXT: vpcmpgtq %zmm1, %zmm3, %k1 242; KNL-NEXT: kunpckbw %k0, %k1, %k0 243; KNL-NEXT: kshiftlw $15, %k0, %k0 244; KNL-NEXT: kshiftrw $15, %k0, %k0 245; KNL-NEXT: kmovw %k0, %eax 246; KNL-NEXT: testb %al, %al 247; KNL-NEXT: cmoveq %rsi, %rdi 248; KNL-NEXT: movq %rdi, %rax 249; KNL-NEXT: retq 250; 251; SKX-LABEL: test12: 252; SKX: ## BB#0: 253; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 254; SKX-NEXT: vpcmpgtq %zmm1, %zmm3, %k1 255; SKX-NEXT: kunpckbw %k0, %k1, %k0 256; SKX-NEXT: kshiftlw $15, %k0, %k0 257; SKX-NEXT: kshiftrw $15, %k0, %k0 258; SKX-NEXT: kmovw %k0, %eax 259; SKX-NEXT: testb %al, %al 260; SKX-NEXT: cmoveq %rsi, %rdi 261; SKX-NEXT: movq %rdi, %rax 262; SKX-NEXT: retq 263 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 264 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 265 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 266 ret i64 %res 267} 268 269define i16 @test13(i32 %a, i32 %b) { 270; KNL-LABEL: test13: 271; KNL: ## BB#0: 272; KNL-NEXT: cmpl %esi, %edi 273; KNL-NEXT: setb %al 274; KNL-NEXT: kmovw %eax, %k0 275; KNL-NEXT: movw $-4, %ax 276; KNL-NEXT: kmovw %eax, %k1 277; KNL-NEXT: korw %k0, %k1, %k0 278; KNL-NEXT: kmovw %k0, %eax 279; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 280; KNL-NEXT: retq 281; 282; SKX-LABEL: test13: 283; SKX: ## BB#0: 284; SKX-NEXT: cmpl %esi, %edi 285; SKX-NEXT: setb %al 286; SKX-NEXT: kmovw %eax, %k0 287; SKX-NEXT: movw $-4, %ax 288; SKX-NEXT: kmovw %eax, %k1 289; SKX-NEXT: korw %k0, %k1, %k0 290; SKX-NEXT: kmovw %k0, %eax 291; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 292; SKX-NEXT: retq 293 %cmp_res = icmp ult i32 %a, %b 294 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 295 %res = bitcast <16 x i1> %maskv to i16 296 ret i16 %res 297} 298 299define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { 300; KNL-LABEL: test14: 301; KNL: ## BB#0: 302; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 303; KNL-NEXT: kshiftlw $11, %k0, %k0 304; KNL-NEXT: kshiftrw $15, %k0, %k0 305; KNL-NEXT: kmovw %k0, %eax 306; KNL-NEXT: testb %al, %al 307; KNL-NEXT: cmoveq %rsi, %rdi 308; KNL-NEXT: movq %rdi, %rax 309; KNL-NEXT: retq 310; 311; SKX-LABEL: test14: 312; SKX: ## BB#0: 313; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 314; SKX-NEXT: kshiftlb $3, %k0, %k0 315; SKX-NEXT: kshiftrb $7, %k0, %k0 316; SKX-NEXT: kmovw %k0, %eax 317; SKX-NEXT: testb %al, %al 318; SKX-NEXT: cmoveq %rsi, %rdi 319; SKX-NEXT: movq %rdi, %rax 320; SKX-NEXT: retq 321 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 322 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 323 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 324 ret i64 %res 325} 326 327define i16 @test15(i1 *%addr) { 328; KNL-LABEL: test15: 329; KNL: ## BB#0: 330; KNL-NEXT: movb (%rdi), %al 331; KNL-NEXT: xorl %ecx, %ecx 332; KNL-NEXT: testb %al, %al 333; KNL-NEXT: movw $-1, %ax 334; KNL-NEXT: cmovew %cx, %ax 335; KNL-NEXT: retq 336; 337; SKX-LABEL: test15: 338; SKX: ## BB#0: 339; SKX-NEXT: movb (%rdi), %al 340; SKX-NEXT: xorl %ecx, %ecx 341; SKX-NEXT: testb %al, %al 342; SKX-NEXT: movw $-1, %ax 343; SKX-NEXT: cmovew %cx, %ax 344; SKX-NEXT: retq 345 %x = load i1 , i1 * %addr, align 1 346 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 347 %x2 = bitcast <16 x i1>%x1 to i16 348 ret i16 %x2 349} 350 351define i16 @test16(i1 *%addr, i16 %a) { 352; KNL-LABEL: test16: 353; KNL: ## BB#0: 354; KNL-NEXT: movzbl (%rdi), %eax 355; KNL-NEXT: andl $1, %eax 356; KNL-NEXT: kmovw %eax, %k0 357; KNL-NEXT: kmovw %esi, %k1 358; KNL-NEXT: kshiftlw $10, %k0, %k0 359; KNL-NEXT: korw %k0, %k1, %k0 360; KNL-NEXT: kmovw %k0, %eax 361; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 362; KNL-NEXT: retq 363; 364; SKX-LABEL: test16: 365; SKX: ## BB#0: 366; SKX-NEXT: movzbl (%rdi), %eax 367; SKX-NEXT: andl $1, %eax 368; SKX-NEXT: kmovd %eax, %k0 369; SKX-NEXT: kmovw %esi, %k1 370; SKX-NEXT: kshiftlw $10, %k0, %k0 371; SKX-NEXT: korw %k0, %k1, %k0 372; SKX-NEXT: kmovw %k0, %eax 373; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 374; SKX-NEXT: retq 375 %x = load i1 , i1 * %addr, align 128 376 %a1 = bitcast i16 %a to <16 x i1> 377 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 378 %x2 = bitcast <16 x i1>%x1 to i16 379 ret i16 %x2 380} 381 382define i8 @test17(i1 *%addr, i8 %a) { 383; KNL-LABEL: test17: 384; KNL: ## BB#0: 385; KNL-NEXT: movzbl (%rdi), %eax 386; KNL-NEXT: andl $1, %eax 387; KNL-NEXT: kmovw %eax, %k0 388; KNL-NEXT: kmovw %esi, %k1 389; KNL-NEXT: kshiftlw $4, %k0, %k0 390; KNL-NEXT: korw %k0, %k1, %k0 391; KNL-NEXT: kmovw %k0, %eax 392; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 393; KNL-NEXT: retq 394; 395; SKX-LABEL: test17: 396; SKX: ## BB#0: 397; SKX-NEXT: movzbl (%rdi), %eax 398; SKX-NEXT: andl $1, %eax 399; SKX-NEXT: kmovd %eax, %k0 400; SKX-NEXT: kmovb %esi, %k1 401; SKX-NEXT: kshiftlb $4, %k0, %k0 402; SKX-NEXT: korb %k0, %k1, %k0 403; SKX-NEXT: kmovb %k0, %eax 404; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 405; SKX-NEXT: retq 406 %x = load i1 , i1 * %addr, align 128 407 %a1 = bitcast i8 %a to <8 x i1> 408 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4 409 %x2 = bitcast <8 x i1>%x1 to i8 410 ret i8 %x2 411} 412 413define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) { 414; KNL-LABEL: extract_v8i64: 415; KNL: ## BB#0: 416; KNL-NEXT: vpextrq $1, %xmm0, %rax 417; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm0 418; KNL-NEXT: vpextrq $1, %xmm0, (%rdi) 419; KNL-NEXT: retq 420; 421; SKX-LABEL: extract_v8i64: 422; SKX: ## BB#0: 423; SKX-NEXT: vpextrq $1, %xmm0, %rax 424; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm0 425; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 426; SKX-NEXT: retq 427 %r1 = extractelement <8 x i64> %x, i32 1 428 %r2 = extractelement <8 x i64> %x, i32 3 429 store i64 %r2, i64* %dst, align 1 430 ret i64 %r1 431} 432 433define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) { 434; KNL-LABEL: extract_v4i64: 435; KNL: ## BB#0: 436; KNL-NEXT: vpextrq $1, %xmm0, %rax 437; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 438; KNL-NEXT: vpextrq $1, %xmm0, (%rdi) 439; KNL-NEXT: retq 440; 441; SKX-LABEL: extract_v4i64: 442; SKX: ## BB#0: 443; SKX-NEXT: vpextrq $1, %xmm0, %rax 444; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 445; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 446; SKX-NEXT: retq 447 %r1 = extractelement <4 x i64> %x, i32 1 448 %r2 = extractelement <4 x i64> %x, i32 3 449 store i64 %r2, i64* %dst, align 1 450 ret i64 %r1 451} 452 453define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { 454; KNL-LABEL: extract_v2i64: 455; KNL: ## BB#0: 456; KNL-NEXT: vmovq %xmm0, %rax 457; KNL-NEXT: vpextrq $1, %xmm0, (%rdi) 458; KNL-NEXT: retq 459; 460; SKX-LABEL: extract_v2i64: 461; SKX: ## BB#0: 462; SKX-NEXT: vmovq %xmm0, %rax 463; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 464; SKX-NEXT: retq 465 %r1 = extractelement <2 x i64> %x, i32 0 466 %r2 = extractelement <2 x i64> %x, i32 1 467 store i64 %r2, i64* %dst, align 1 468 ret i64 %r1 469} 470 471define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) { 472; KNL-LABEL: extract_v16i32: 473; KNL: ## BB#0: 474; KNL-NEXT: vpextrd $1, %xmm0, %eax 475; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm0 476; KNL-NEXT: vpextrd $1, %xmm0, (%rdi) 477; KNL-NEXT: retq 478; 479; SKX-LABEL: extract_v16i32: 480; SKX: ## BB#0: 481; SKX-NEXT: vpextrd $1, %xmm0, %eax 482; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 483; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) 484; SKX-NEXT: retq 485 %r1 = extractelement <16 x i32> %x, i32 1 486 %r2 = extractelement <16 x i32> %x, i32 5 487 store i32 %r2, i32* %dst, align 1 488 ret i32 %r1 489} 490 491define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { 492; KNL-LABEL: extract_v8i32: 493; KNL: ## BB#0: 494; KNL-NEXT: vpextrd $1, %xmm0, %eax 495; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 496; KNL-NEXT: vpextrd $1, %xmm0, (%rdi) 497; KNL-NEXT: retq 498; 499; SKX-LABEL: extract_v8i32: 500; SKX: ## BB#0: 501; SKX-NEXT: vpextrd $1, %xmm0, %eax 502; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 503; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) 504; SKX-NEXT: retq 505 %r1 = extractelement <8 x i32> %x, i32 1 506 %r2 = extractelement <8 x i32> %x, i32 5 507 store i32 %r2, i32* %dst, align 1 508 ret i32 %r1 509} 510 511define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { 512; KNL-LABEL: extract_v4i32: 513; KNL: ## BB#0: 514; KNL-NEXT: vpextrd $1, %xmm0, %eax 515; KNL-NEXT: vpextrd $3, %xmm0, (%rdi) 516; KNL-NEXT: retq 517; 518; SKX-LABEL: extract_v4i32: 519; SKX: ## BB#0: 520; SKX-NEXT: vpextrd $1, %xmm0, %eax 521; SKX-NEXT: vpextrd $3, %xmm0, (%rdi) 522; SKX-NEXT: retq 523 %r1 = extractelement <4 x i32> %x, i32 1 524 %r2 = extractelement <4 x i32> %x, i32 3 525 store i32 %r2, i32* %dst, align 1 526 ret i32 %r1 527} 528 529define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) { 530; KNL-LABEL: extract_v32i16: 531; KNL: ## BB#0: 532; KNL-NEXT: vpextrw $1, %xmm0, %eax 533; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 534; KNL-NEXT: vpextrw $1, %xmm0, (%rdi) 535; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 536; KNL-NEXT: retq 537; 538; SKX-LABEL: extract_v32i16: 539; SKX: ## BB#0: 540; SKX-NEXT: vpextrw $1, %xmm0, %eax 541; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 542; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) 543; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 544; SKX-NEXT: retq 545 %r1 = extractelement <32 x i16> %x, i32 1 546 %r2 = extractelement <32 x i16> %x, i32 9 547 store i16 %r2, i16* %dst, align 1 548 ret i16 %r1 549} 550 551define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) { 552; KNL-LABEL: extract_v16i16: 553; KNL: ## BB#0: 554; KNL-NEXT: vpextrw $1, %xmm0, %eax 555; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 556; KNL-NEXT: vpextrw $1, %xmm0, (%rdi) 557; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 558; KNL-NEXT: retq 559; 560; SKX-LABEL: extract_v16i16: 561; SKX: ## BB#0: 562; SKX-NEXT: vpextrw $1, %xmm0, %eax 563; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 564; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) 565; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 566; SKX-NEXT: retq 567 %r1 = extractelement <16 x i16> %x, i32 1 568 %r2 = extractelement <16 x i16> %x, i32 9 569 store i16 %r2, i16* %dst, align 1 570 ret i16 %r1 571} 572 573define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) { 574; KNL-LABEL: extract_v8i16: 575; KNL: ## BB#0: 576; KNL-NEXT: vpextrw $1, %xmm0, %eax 577; KNL-NEXT: vpextrw $3, %xmm0, (%rdi) 578; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 579; KNL-NEXT: retq 580; 581; SKX-LABEL: extract_v8i16: 582; SKX: ## BB#0: 583; SKX-NEXT: vpextrw $1, %xmm0, %eax 584; SKX-NEXT: vpextrw $3, %xmm0, (%rdi) 585; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 586; SKX-NEXT: retq 587 %r1 = extractelement <8 x i16> %x, i32 1 588 %r2 = extractelement <8 x i16> %x, i32 3 589 store i16 %r2, i16* %dst, align 1 590 ret i16 %r1 591} 592 593define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) { 594; KNL-LABEL: extract_v64i8: 595; KNL: ## BB#0: 596; KNL-NEXT: vpextrb $1, %xmm0, %eax 597; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 598; KNL-NEXT: vpextrb $1, %xmm0, (%rdi) 599; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 600; KNL-NEXT: retq 601; 602; SKX-LABEL: extract_v64i8: 603; SKX: ## BB#0: 604; SKX-NEXT: vpextrb $1, %xmm0, %eax 605; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 606; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) 607; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 608; SKX-NEXT: retq 609 %r1 = extractelement <64 x i8> %x, i32 1 610 %r2 = extractelement <64 x i8> %x, i32 17 611 store i8 %r2, i8* %dst, align 1 612 ret i8 %r1 613} 614 615define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) { 616; KNL-LABEL: extract_v32i8: 617; KNL: ## BB#0: 618; KNL-NEXT: vpextrb $1, %xmm0, %eax 619; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 620; KNL-NEXT: vpextrb $1, %xmm0, (%rdi) 621; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 622; KNL-NEXT: retq 623; 624; SKX-LABEL: extract_v32i8: 625; SKX: ## BB#0: 626; SKX-NEXT: vpextrb $1, %xmm0, %eax 627; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 628; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) 629; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 630; SKX-NEXT: retq 631 %r1 = extractelement <32 x i8> %x, i32 1 632 %r2 = extractelement <32 x i8> %x, i32 17 633 store i8 %r2, i8* %dst, align 1 634 ret i8 %r1 635} 636 637define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) { 638; KNL-LABEL: extract_v16i8: 639; KNL: ## BB#0: 640; KNL-NEXT: vpextrb $1, %xmm0, %eax 641; KNL-NEXT: vpextrb $3, %xmm0, (%rdi) 642; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 643; KNL-NEXT: retq 644; 645; SKX-LABEL: extract_v16i8: 646; SKX: ## BB#0: 647; SKX-NEXT: vpextrb $1, %xmm0, %eax 648; SKX-NEXT: vpextrb $3, %xmm0, (%rdi) 649; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 650; SKX-NEXT: retq 651 %r1 = extractelement <16 x i8> %x, i32 1 652 %r2 = extractelement <16 x i8> %x, i32 3 653 store i8 %r2, i8* %dst, align 1 654 ret i8 %r1 655} 656 657define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) { 658; KNL-LABEL: insert_v8i64: 659; KNL: ## BB#0: 660; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 661; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 662; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm1 663; KNL-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 664; KNL-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 665; KNL-NEXT: retq 666; 667; SKX-LABEL: insert_v8i64: 668; SKX: ## BB#0: 669; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 670; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 671; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm1 672; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 673; SKX-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 674; SKX-NEXT: retq 675 %val = load i64, i64* %ptr 676 %r1 = insertelement <8 x i64> %x, i64 %val, i32 1 677 %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3 678 ret <8 x i64> %r2 679} 680 681define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) { 682; KNL-LABEL: insert_v4i64: 683; KNL: ## BB#0: 684; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 685; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 686; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 687; KNL-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 688; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 689; KNL-NEXT: retq 690; 691; SKX-LABEL: insert_v4i64: 692; SKX: ## BB#0: 693; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 694; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 695; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm1 696; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 697; SKX-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 698; SKX-NEXT: retq 699 %val = load i64, i64* %ptr 700 %r1 = insertelement <4 x i64> %x, i64 %val, i32 1 701 %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3 702 ret <4 x i64> %r2 703} 704 705define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) { 706; KNL-LABEL: insert_v2i64: 707; KNL: ## BB#0: 708; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0 709; KNL-NEXT: vpinsrq $3, %rdi, %xmm0, %xmm0 710; KNL-NEXT: retq 711; 712; SKX-LABEL: insert_v2i64: 713; SKX: ## BB#0: 714; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0 715; SKX-NEXT: vpinsrq $3, %rdi, %xmm0, %xmm0 716; SKX-NEXT: retq 717 %val = load i64, i64* %ptr 718 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 719 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3 720 ret <2 x i64> %r2 721} 722 723define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) { 724; KNL-LABEL: insert_v16i32: 725; KNL: ## BB#0: 726; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 727; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 728; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm1 729; KNL-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 730; KNL-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 731; KNL-NEXT: retq 732; 733; SKX-LABEL: insert_v16i32: 734; SKX: ## BB#0: 735; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 736; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 737; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1 738; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 739; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 740; SKX-NEXT: retq 741 %val = load i32, i32* %ptr 742 %r1 = insertelement <16 x i32> %x, i32 %val, i32 1 743 %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5 744 ret <16 x i32> %r2 745} 746 747define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) { 748; KNL-LABEL: insert_v8i32: 749; KNL: ## BB#0: 750; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 751; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 752; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 753; KNL-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 754; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 755; KNL-NEXT: retq 756; 757; SKX-LABEL: insert_v8i32: 758; SKX: ## BB#0: 759; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 760; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 761; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 762; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 763; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 764; SKX-NEXT: retq 765 %val = load i32, i32* %ptr 766 %r1 = insertelement <8 x i32> %x, i32 %val, i32 1 767 %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5 768 ret <8 x i32> %r2 769} 770 771define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) { 772; KNL-LABEL: insert_v4i32: 773; KNL: ## BB#0: 774; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 775; KNL-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 776; KNL-NEXT: retq 777; 778; SKX-LABEL: insert_v4i32: 779; SKX: ## BB#0: 780; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 781; SKX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 782; SKX-NEXT: retq 783 %val = load i32, i32* %ptr 784 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 785 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 786 ret <4 x i32> %r2 787} 788 789define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) { 790; KNL-LABEL: insert_v32i16: 791; KNL: ## BB#0: 792; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm2 793; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 794; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 795; KNL-NEXT: vpinsrw $1, %edi, %xmm2, %xmm2 796; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 797; KNL-NEXT: retq 798; 799; SKX-LABEL: insert_v32i16: 800; SKX: ## BB#0: 801; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 802; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 803; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1 804; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 805; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 806; SKX-NEXT: retq 807 %val = load i16, i16* %ptr 808 %r1 = insertelement <32 x i16> %x, i16 %val, i32 1 809 %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9 810 ret <32 x i16> %r2 811} 812 813define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) { 814; KNL-LABEL: insert_v16i16: 815; KNL: ## BB#0: 816; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 817; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 818; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 819; KNL-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 820; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 821; KNL-NEXT: retq 822; 823; SKX-LABEL: insert_v16i16: 824; SKX: ## BB#0: 825; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 826; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 827; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 828; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 829; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 830; SKX-NEXT: retq 831 %val = load i16, i16* %ptr 832 %r1 = insertelement <16 x i16> %x, i16 %val, i32 1 833 %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9 834 ret <16 x i16> %r2 835} 836 837define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) { 838; KNL-LABEL: insert_v8i16: 839; KNL: ## BB#0: 840; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 841; KNL-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 842; KNL-NEXT: retq 843; 844; SKX-LABEL: insert_v8i16: 845; SKX: ## BB#0: 846; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 847; SKX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 848; SKX-NEXT: retq 849 %val = load i16, i16* %ptr 850 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 851 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 852 ret <8 x i16> %r2 853} 854 855define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) { 856; KNL-LABEL: insert_v64i8: 857; KNL: ## BB#0: 858; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm2 859; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 860; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 861; KNL-NEXT: vpinsrb $2, %edi, %xmm2, %xmm2 862; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 863; KNL-NEXT: retq 864; 865; SKX-LABEL: insert_v64i8: 866; SKX: ## BB#0: 867; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 868; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 869; SKX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 870; SKX-NEXT: vpinsrb $2, %edi, %xmm1, %xmm1 871; SKX-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0 872; SKX-NEXT: retq 873 %val = load i8, i8* %ptr 874 %r1 = insertelement <64 x i8> %x, i8 %val, i32 1 875 %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50 876 ret <64 x i8> %r2 877} 878 879define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) { 880; KNL-LABEL: insert_v32i8: 881; KNL: ## BB#0: 882; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 883; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 884; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 885; KNL-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 886; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 887; KNL-NEXT: retq 888; 889; SKX-LABEL: insert_v32i8: 890; SKX: ## BB#0: 891; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 892; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 893; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 894; SKX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 895; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 896; SKX-NEXT: retq 897 %val = load i8, i8* %ptr 898 %r1 = insertelement <32 x i8> %x, i8 %val, i32 1 899 %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17 900 ret <32 x i8> %r2 901} 902 903define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) { 904; KNL-LABEL: insert_v16i8: 905; KNL: ## BB#0: 906; KNL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 907; KNL-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 908; KNL-NEXT: retq 909; 910; SKX-LABEL: insert_v16i8: 911; SKX: ## BB#0: 912; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 913; SKX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 914; SKX-NEXT: retq 915 %val = load i8, i8* %ptr 916 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 917 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 918 ret <16 x i8> %r2 919} 920 921define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) { 922; KNL-LABEL: test_insert_128_v8i64: 923; KNL: ## BB#0: 924; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 925; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 926; KNL-NEXT: retq 927; 928; SKX-LABEL: test_insert_128_v8i64: 929; SKX: ## BB#0: 930; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 931; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 932; SKX-NEXT: retq 933 %r = insertelement <8 x i64> %x, i64 %y, i32 1 934 ret <8 x i64> %r 935} 936 937define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) { 938; KNL-LABEL: test_insert_128_v16i32: 939; KNL: ## BB#0: 940; KNL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 941; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 942; KNL-NEXT: retq 943; 944; SKX-LABEL: test_insert_128_v16i32: 945; SKX: ## BB#0: 946; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 947; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 948; SKX-NEXT: retq 949 %r = insertelement <16 x i32> %x, i32 %y, i32 1 950 ret <16 x i32> %r 951} 952 953define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) { 954; KNL-LABEL: test_insert_128_v8f64: 955; KNL: ## BB#0: 956; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] 957; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 958; KNL-NEXT: retq 959; 960; SKX-LABEL: test_insert_128_v8f64: 961; SKX: ## BB#0: 962; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] 963; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0 964; SKX-NEXT: retq 965 %r = insertelement <8 x double> %x, double %y, i32 1 966 ret <8 x double> %r 967} 968 969define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) { 970; KNL-LABEL: test_insert_128_v16f32: 971; KNL: ## BB#0: 972; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 973; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 974; KNL-NEXT: retq 975; 976; SKX-LABEL: test_insert_128_v16f32: 977; SKX: ## BB#0: 978; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 979; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 980; SKX-NEXT: retq 981 %r = insertelement <16 x float> %x, float %y, i32 1 982 ret <16 x float> %r 983} 984 985define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) { 986; KNL-LABEL: test_insert_128_v16i16: 987; KNL: ## BB#0: 988; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 989; KNL-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 990; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 991; KNL-NEXT: retq 992; 993; SKX-LABEL: test_insert_128_v16i16: 994; SKX: ## BB#0: 995; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 996; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 997; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 998; SKX-NEXT: retq 999 %r = insertelement <16 x i16> %x, i16 %y, i32 10 1000 ret <16 x i16> %r 1001} 1002 1003define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) { 1004; KNL-LABEL: test_insert_128_v32i8: 1005; KNL: ## BB#0: 1006; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 1007; KNL-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1008; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1009; KNL-NEXT: retq 1010; 1011; SKX-LABEL: test_insert_128_v32i8: 1012; SKX: ## BB#0: 1013; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 1014; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1015; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 1016; SKX-NEXT: retq 1017 %r = insertelement <32 x i8> %x, i8 %y, i32 20 1018 ret <32 x i8> %r 1019} 1020