1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=KNL %s 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=SKX --check-prefix=SKX_ONLY %s 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+avx512vbmi | FileCheck --check-prefix=CHECK --check-prefix=SKX --check-prefix=SKX_VBMI %s 5 6define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { 7; CHECK-LABEL: test1: 8; CHECK: ## %bb.0: 9; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3] 10; CHECK-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm2 11; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0 12; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 13; CHECK-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 14; CHECK-NEXT: retq 15 %rrr = load float, float* %br 16 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 17 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 18 ret <16 x float> %rrr3 19} 20 21define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { 22; CHECK-LABEL: test2: 23; CHECK: ## %bb.0: 24; CHECK-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0] 25; CHECK-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm2 26; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0 27; CHECK-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 28; CHECK-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 29; CHECK-NEXT: retq 30 %rrr = load double, double* %br 31 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 32 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 33 ret <8 x double> %rrr3 34} 35 36define <16 x float> @test3(<16 x float> %x) nounwind { 37; CHECK-LABEL: test3: 38; CHECK: ## %bb.0: 39; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 40; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 41; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 42; CHECK-NEXT: retq 43 %eee = extractelement <16 x float> %x, i32 4 44 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 45 ret <16 x float> %rrr2 46} 47 48define <8 x i64> @test4(<8 x i64> %x) nounwind { 49; CHECK-LABEL: test4: 50; CHECK: ## %bb.0: 51; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1 52; CHECK-NEXT: vmovq %xmm1, %rax 53; CHECK-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 54; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 55; CHECK-NEXT: retq 56 %eee = extractelement <8 x i64> %x, i32 4 57 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 58 ret <8 x i64> %rrr2 59} 60 61define i32 @test5(<4 x float> %x) nounwind { 62; CHECK-LABEL: test5: 63; CHECK: ## %bb.0: 64; CHECK-NEXT: vextractps $3, %xmm0, %eax 65; CHECK-NEXT: retq 66 %ef = extractelement <4 x float> %x, i32 3 67 %ei = bitcast float %ef to i32 68 ret i32 %ei 69} 70 71define void @test6(<4 x float> %x, float* %out) nounwind { 72; CHECK-LABEL: test6: 73; CHECK: ## %bb.0: 74; CHECK-NEXT: vextractps $3, %xmm0, (%rdi) 75; CHECK-NEXT: retq 76 %ef = extractelement <4 x float> %x, i32 3 77 store float %ef, float* %out, align 4 78 ret void 79} 80 81define float @test7(<16 x float> %x, i32 %ind) nounwind { 82; CHECK-LABEL: test7: 83; CHECK: ## %bb.0: 84; CHECK-NEXT: pushq %rbp 85; CHECK-NEXT: movq %rsp, %rbp 86; CHECK-NEXT: andq $-64, %rsp 87; CHECK-NEXT: subq $128, %rsp 88; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 89; CHECK-NEXT: vmovaps %zmm0, (%rsp) 90; CHECK-NEXT: andl $15, %edi 91; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 92; CHECK-NEXT: movq %rbp, %rsp 93; CHECK-NEXT: popq %rbp 94; CHECK-NEXT: vzeroupper 95; CHECK-NEXT: retq 96 %e = extractelement <16 x float> %x, i32 %ind 97 ret float %e 98} 99 100define double @test8(<8 x double> %x, i32 %ind) nounwind { 101; CHECK-LABEL: test8: 102; CHECK: ## %bb.0: 103; CHECK-NEXT: pushq %rbp 104; CHECK-NEXT: movq %rsp, %rbp 105; CHECK-NEXT: andq $-64, %rsp 106; CHECK-NEXT: subq $128, %rsp 107; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 108; CHECK-NEXT: vmovaps %zmm0, (%rsp) 109; CHECK-NEXT: andl $7, %edi 110; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 111; CHECK-NEXT: movq %rbp, %rsp 112; CHECK-NEXT: popq %rbp 113; CHECK-NEXT: vzeroupper 114; CHECK-NEXT: retq 115 %e = extractelement <8 x double> %x, i32 %ind 116 ret double %e 117} 118 119define float @test9(<8 x float> %x, i32 %ind) nounwind { 120; CHECK-LABEL: test9: 121; CHECK: ## %bb.0: 122; CHECK-NEXT: pushq %rbp 123; CHECK-NEXT: movq %rsp, %rbp 124; CHECK-NEXT: andq $-32, %rsp 125; CHECK-NEXT: subq $64, %rsp 126; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 127; CHECK-NEXT: vmovaps %ymm0, (%rsp) 128; CHECK-NEXT: andl $7, %edi 129; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 130; CHECK-NEXT: movq %rbp, %rsp 131; CHECK-NEXT: popq %rbp 132; CHECK-NEXT: vzeroupper 133; CHECK-NEXT: retq 134 %e = extractelement <8 x float> %x, i32 %ind 135 ret float %e 136} 137 138define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 139; CHECK-LABEL: test10: 140; CHECK: ## %bb.0: 141; CHECK-NEXT: pushq %rbp 142; CHECK-NEXT: movq %rsp, %rbp 143; CHECK-NEXT: andq $-64, %rsp 144; CHECK-NEXT: subq $128, %rsp 145; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 146; CHECK-NEXT: vmovaps %zmm0, (%rsp) 147; CHECK-NEXT: andl $15, %edi 148; CHECK-NEXT: movl (%rsp,%rdi,4), %eax 149; CHECK-NEXT: movq %rbp, %rsp 150; CHECK-NEXT: popq %rbp 151; CHECK-NEXT: vzeroupper 152; CHECK-NEXT: retq 153 %e = extractelement <16 x i32> %x, i32 %ind 154 ret i32 %e 155} 156 157define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { 158; KNL-LABEL: test11: 159; KNL: ## %bb.0: 160; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 161; KNL-NEXT: kshiftrw $4, %k0, %k0 162; KNL-NEXT: kmovw %k0, %eax 163; KNL-NEXT: testb $1, %al 164; KNL-NEXT: je LBB10_2 165; KNL-NEXT: ## %bb.1: ## %A 166; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 167; KNL-NEXT: retq 168; KNL-NEXT: LBB10_2: ## %B 169; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 170; KNL-NEXT: retq 171; 172; SKX-LABEL: test11: 173; SKX: ## %bb.0: 174; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 175; SKX-NEXT: kshiftrw $4, %k0, %k0 176; SKX-NEXT: kmovd %k0, %eax 177; SKX-NEXT: testb $1, %al 178; SKX-NEXT: je LBB10_2 179; SKX-NEXT: ## %bb.1: ## %A 180; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 181; SKX-NEXT: retq 182; SKX-NEXT: LBB10_2: ## %B 183; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 184; SKX-NEXT: retq 185 %cmp_res = icmp ult <16 x i32> %a, %b 186 %ia = extractelement <16 x i1> %cmp_res, i32 4 187 br i1 %ia, label %A, label %B 188 A: 189 ret <16 x i32>%b 190 B: 191 %c = add <16 x i32>%b, %a 192 ret <16 x i32>%c 193} 194 195define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { 196; KNL-LABEL: test12: 197; KNL: ## %bb.0: 198; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 199; KNL-NEXT: kmovw %k0, %eax 200; KNL-NEXT: testb $1, %al 201; KNL-NEXT: cmoveq %rsi, %rdi 202; KNL-NEXT: movq %rdi, %rax 203; KNL-NEXT: vzeroupper 204; KNL-NEXT: retq 205; 206; SKX-LABEL: test12: 207; SKX: ## %bb.0: 208; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 209; SKX-NEXT: kmovd %k0, %eax 210; SKX-NEXT: testb $1, %al 211; SKX-NEXT: cmoveq %rsi, %rdi 212; SKX-NEXT: movq %rdi, %rax 213; SKX-NEXT: vzeroupper 214; SKX-NEXT: retq 215 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 216 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 217 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 218 ret i64 %res 219} 220 221define i16 @test13(i32 %a, i32 %b) { 222; KNL-LABEL: test13: 223; KNL: ## %bb.0: 224; KNL-NEXT: cmpl %esi, %edi 225; KNL-NEXT: setb %al 226; KNL-NEXT: movw $-4, %cx 227; KNL-NEXT: kmovw %ecx, %k0 228; KNL-NEXT: kshiftrw $1, %k0, %k0 229; KNL-NEXT: kshiftlw $1, %k0, %k0 230; KNL-NEXT: andl $1, %eax 231; KNL-NEXT: kmovw %eax, %k1 232; KNL-NEXT: korw %k1, %k0, %k0 233; KNL-NEXT: kmovw %k0, %eax 234; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 235; KNL-NEXT: retq 236; 237; SKX-LABEL: test13: 238; SKX: ## %bb.0: 239; SKX-NEXT: cmpl %esi, %edi 240; SKX-NEXT: setb %al 241; SKX-NEXT: movw $-4, %cx 242; SKX-NEXT: kmovd %ecx, %k0 243; SKX-NEXT: kshiftrw $1, %k0, %k0 244; SKX-NEXT: kshiftlw $1, %k0, %k0 245; SKX-NEXT: andl $1, %eax 246; SKX-NEXT: kmovw %eax, %k1 247; SKX-NEXT: korw %k1, %k0, %k0 248; SKX-NEXT: kmovd %k0, %eax 249; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 250; SKX-NEXT: retq 251 %cmp_res = icmp ult i32 %a, %b 252 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 253 %res = bitcast <16 x i1> %maskv to i16 254 ret i16 %res 255} 256 257define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { 258; KNL-LABEL: test14: 259; KNL: ## %bb.0: 260; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 261; KNL-NEXT: kshiftrw $4, %k0, %k0 262; KNL-NEXT: kmovw %k0, %eax 263; KNL-NEXT: testb $1, %al 264; KNL-NEXT: cmoveq %rsi, %rdi 265; KNL-NEXT: movq %rdi, %rax 266; KNL-NEXT: vzeroupper 267; KNL-NEXT: retq 268; 269; SKX-LABEL: test14: 270; SKX: ## %bb.0: 271; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 272; SKX-NEXT: kshiftrw $4, %k0, %k0 273; SKX-NEXT: kmovd %k0, %eax 274; SKX-NEXT: testb $1, %al 275; SKX-NEXT: cmoveq %rsi, %rdi 276; SKX-NEXT: movq %rdi, %rax 277; SKX-NEXT: vzeroupper 278; SKX-NEXT: retq 279 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 280 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 281 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 282 ret i64 %res 283} 284 285define i16 @test15(i1 *%addr) { 286; CHECK-LABEL: test15: 287; CHECK: ## %bb.0: 288; CHECK-NEXT: xorl %ecx, %ecx 289; CHECK-NEXT: cmpb $0, (%rdi) 290; CHECK-NEXT: movl $65535, %eax ## imm = 0xFFFF 291; CHECK-NEXT: cmovel %ecx, %eax 292; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 293; CHECK-NEXT: retq 294 %x = load i1 , i1 * %addr, align 1 295 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 296 %x2 = bitcast <16 x i1>%x1 to i16 297 ret i16 %x2 298} 299 300define i16 @test16(i1 *%addr, i16 %a) { 301; KNL-LABEL: test16: 302; KNL: ## %bb.0: 303; KNL-NEXT: movb (%rdi), %al 304; KNL-NEXT: kmovw %esi, %k0 305; KNL-NEXT: kmovw %eax, %k1 306; KNL-NEXT: kshiftrw $10, %k0, %k2 307; KNL-NEXT: kxorw %k1, %k2, %k1 308; KNL-NEXT: kshiftlw $15, %k1, %k1 309; KNL-NEXT: kshiftrw $5, %k1, %k1 310; KNL-NEXT: kxorw %k1, %k0, %k0 311; KNL-NEXT: kmovw %k0, %eax 312; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 313; KNL-NEXT: retq 314; 315; SKX-LABEL: test16: 316; SKX: ## %bb.0: 317; SKX-NEXT: kmovb (%rdi), %k0 318; SKX-NEXT: kmovd %esi, %k1 319; SKX-NEXT: kshiftrw $10, %k1, %k2 320; SKX-NEXT: kxorw %k0, %k2, %k0 321; SKX-NEXT: kshiftlw $15, %k0, %k0 322; SKX-NEXT: kshiftrw $5, %k0, %k0 323; SKX-NEXT: kxorw %k0, %k1, %k0 324; SKX-NEXT: kmovd %k0, %eax 325; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 326; SKX-NEXT: retq 327 %x = load i1 , i1 * %addr, align 128 328 %a1 = bitcast i16 %a to <16 x i1> 329 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 330 %x2 = bitcast <16 x i1>%x1 to i16 331 ret i16 %x2 332} 333 334define i8 @test17(i1 *%addr, i8 %a) { 335; KNL-LABEL: test17: 336; KNL: ## %bb.0: 337; KNL-NEXT: movb (%rdi), %al 338; KNL-NEXT: kmovw %esi, %k0 339; KNL-NEXT: kmovw %eax, %k1 340; KNL-NEXT: kshiftrw $4, %k0, %k2 341; KNL-NEXT: kxorw %k1, %k2, %k1 342; KNL-NEXT: kshiftlw $15, %k1, %k1 343; KNL-NEXT: kshiftrw $11, %k1, %k1 344; KNL-NEXT: kxorw %k1, %k0, %k0 345; KNL-NEXT: kmovw %k0, %eax 346; KNL-NEXT: ## kill: def $al killed $al killed $eax 347; KNL-NEXT: retq 348; 349; SKX-LABEL: test17: 350; SKX: ## %bb.0: 351; SKX-NEXT: kmovb (%rdi), %k0 352; SKX-NEXT: kmovd %esi, %k1 353; SKX-NEXT: kshiftrb $4, %k1, %k2 354; SKX-NEXT: kxorb %k0, %k2, %k0 355; SKX-NEXT: kshiftlb $7, %k0, %k0 356; SKX-NEXT: kshiftrb $3, %k0, %k0 357; SKX-NEXT: kxorb %k0, %k1, %k0 358; SKX-NEXT: kmovd %k0, %eax 359; SKX-NEXT: ## kill: def $al killed $al killed $eax 360; SKX-NEXT: retq 361 %x = load i1 , i1 * %addr, align 128 362 %a1 = bitcast i8 %a to <8 x i1> 363 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4 364 %x2 = bitcast <8 x i1>%x1 to i8 365 ret i8 %x2 366} 367 368define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) { 369; CHECK-LABEL: extract_v8i64: 370; CHECK: ## %bb.0: 371; CHECK-NEXT: vpextrq $1, %xmm0, %rax 372; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 373; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi) 374; CHECK-NEXT: vzeroupper 375; CHECK-NEXT: retq 376 %r1 = extractelement <8 x i64> %x, i32 1 377 %r2 = extractelement <8 x i64> %x, i32 3 378 store i64 %r2, i64* %dst, align 1 379 ret i64 %r1 380} 381 382define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) { 383; CHECK-LABEL: extract_v4i64: 384; CHECK: ## %bb.0: 385; CHECK-NEXT: vpextrq $1, %xmm0, %rax 386; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 387; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi) 388; CHECK-NEXT: vzeroupper 389; CHECK-NEXT: retq 390 %r1 = extractelement <4 x i64> %x, i32 1 391 %r2 = extractelement <4 x i64> %x, i32 3 392 store i64 %r2, i64* %dst, align 1 393 ret i64 %r1 394} 395 396define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { 397; CHECK-LABEL: extract_v2i64: 398; CHECK: ## %bb.0: 399; CHECK-NEXT: vmovq %xmm0, %rax 400; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi) 401; CHECK-NEXT: retq 402 %r1 = extractelement <2 x i64> %x, i32 0 403 %r2 = extractelement <2 x i64> %x, i32 1 404 store i64 %r2, i64* %dst, align 1 405 ret i64 %r1 406} 407 408define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) { 409; CHECK-LABEL: extract_v16i32: 410; CHECK: ## %bb.0: 411; CHECK-NEXT: vextractps $1, %xmm0, %eax 412; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 413; CHECK-NEXT: vextractps $1, %xmm0, (%rdi) 414; CHECK-NEXT: vzeroupper 415; CHECK-NEXT: retq 416 %r1 = extractelement <16 x i32> %x, i32 1 417 %r2 = extractelement <16 x i32> %x, i32 5 418 store i32 %r2, i32* %dst, align 1 419 ret i32 %r1 420} 421 422define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { 423; CHECK-LABEL: extract_v8i32: 424; CHECK: ## %bb.0: 425; CHECK-NEXT: vextractps $1, %xmm0, %eax 426; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 427; CHECK-NEXT: vextractps $1, %xmm0, (%rdi) 428; CHECK-NEXT: vzeroupper 429; CHECK-NEXT: retq 430 %r1 = extractelement <8 x i32> %x, i32 1 431 %r2 = extractelement <8 x i32> %x, i32 5 432 store i32 %r2, i32* %dst, align 1 433 ret i32 %r1 434} 435 436define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { 437; CHECK-LABEL: extract_v4i32: 438; CHECK: ## %bb.0: 439; CHECK-NEXT: vextractps $1, %xmm0, %eax 440; CHECK-NEXT: vextractps $3, %xmm0, (%rdi) 441; CHECK-NEXT: retq 442 %r1 = extractelement <4 x i32> %x, i32 1 443 %r2 = extractelement <4 x i32> %x, i32 3 444 store i32 %r2, i32* %dst, align 1 445 ret i32 %r1 446} 447 448define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) { 449; CHECK-LABEL: extract_v32i16: 450; CHECK: ## %bb.0: 451; CHECK-NEXT: vpextrw $1, %xmm0, %eax 452; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 453; CHECK-NEXT: vpextrw $1, %xmm0, (%rdi) 454; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 455; CHECK-NEXT: vzeroupper 456; CHECK-NEXT: retq 457 %r1 = extractelement <32 x i16> %x, i32 1 458 %r2 = extractelement <32 x i16> %x, i32 9 459 store i16 %r2, i16* %dst, align 1 460 ret i16 %r1 461} 462 463define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) { 464; CHECK-LABEL: extract_v16i16: 465; CHECK: ## %bb.0: 466; CHECK-NEXT: vpextrw $1, %xmm0, %eax 467; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 468; CHECK-NEXT: vpextrw $1, %xmm0, (%rdi) 469; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 470; CHECK-NEXT: vzeroupper 471; CHECK-NEXT: retq 472 %r1 = extractelement <16 x i16> %x, i32 1 473 %r2 = extractelement <16 x i16> %x, i32 9 474 store i16 %r2, i16* %dst, align 1 475 ret i16 %r1 476} 477 478define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) { 479; CHECK-LABEL: extract_v8i16: 480; CHECK: ## %bb.0: 481; CHECK-NEXT: vpextrw $1, %xmm0, %eax 482; CHECK-NEXT: vpextrw $3, %xmm0, (%rdi) 483; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 484; CHECK-NEXT: retq 485 %r1 = extractelement <8 x i16> %x, i32 1 486 %r2 = extractelement <8 x i16> %x, i32 3 487 store i16 %r2, i16* %dst, align 1 488 ret i16 %r1 489} 490 491define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) { 492; CHECK-LABEL: extract_v64i8: 493; CHECK: ## %bb.0: 494; CHECK-NEXT: vpextrb $1, %xmm0, %eax 495; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 496; CHECK-NEXT: vpextrb $1, %xmm0, (%rdi) 497; CHECK-NEXT: ## kill: def $al killed $al killed $eax 498; CHECK-NEXT: vzeroupper 499; CHECK-NEXT: retq 500 %r1 = extractelement <64 x i8> %x, i32 1 501 %r2 = extractelement <64 x i8> %x, i32 17 502 store i8 %r2, i8* %dst, align 1 503 ret i8 %r1 504} 505 506define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) { 507; CHECK-LABEL: extract_v32i8: 508; CHECK: ## %bb.0: 509; CHECK-NEXT: vpextrb $1, %xmm0, %eax 510; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 511; CHECK-NEXT: vpextrb $1, %xmm0, (%rdi) 512; CHECK-NEXT: ## kill: def $al killed $al killed $eax 513; CHECK-NEXT: vzeroupper 514; CHECK-NEXT: retq 515 %r1 = extractelement <32 x i8> %x, i32 1 516 %r2 = extractelement <32 x i8> %x, i32 17 517 store i8 %r2, i8* %dst, align 1 518 ret i8 %r1 519} 520 521define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) { 522; CHECK-LABEL: extract_v16i8: 523; CHECK: ## %bb.0: 524; CHECK-NEXT: vpextrb $1, %xmm0, %eax 525; CHECK-NEXT: vpextrb $3, %xmm0, (%rdi) 526; CHECK-NEXT: ## kill: def $al killed $al killed $eax 527; CHECK-NEXT: retq 528 %r1 = extractelement <16 x i8> %x, i32 1 529 %r2 = extractelement <16 x i8> %x, i32 3 530 store i8 %r2, i8* %dst, align 1 531 ret i8 %r1 532} 533 534define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) { 535; CHECK-LABEL: insert_v8i64: 536; CHECK: ## %bb.0: 537; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 538; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 539; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 540; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 541; CHECK-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0 542; CHECK-NEXT: retq 543 %val = load i64, i64* %ptr 544 %r1 = insertelement <8 x i64> %x, i64 %val, i32 1 545 %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3 546 ret <8 x i64> %r2 547} 548 549define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) { 550; CHECK-LABEL: insert_v4i64: 551; CHECK: ## %bb.0: 552; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 553; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 554; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 555; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 556; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 557; CHECK-NEXT: retq 558 %val = load i64, i64* %ptr 559 %r1 = insertelement <4 x i64> %x, i64 %val, i32 1 560 %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3 561 ret <4 x i64> %r2 562} 563 564define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) { 565; CHECK-LABEL: insert_v2i64: 566; CHECK: ## %bb.0: 567; CHECK-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0 568; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0 569; CHECK-NEXT: retq 570 %val = load i64, i64* %ptr 571 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 572 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 0 573 ret <2 x i64> %r2 574} 575 576define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) { 577; CHECK-LABEL: insert_v16i32: 578; CHECK: ## %bb.0: 579; CHECK-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 580; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 581; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 582; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 583; CHECK-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0 584; CHECK-NEXT: retq 585 %val = load i32, i32* %ptr 586 %r1 = insertelement <16 x i32> %x, i32 %val, i32 1 587 %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5 588 ret <16 x i32> %r2 589} 590 591define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) { 592; CHECK-LABEL: insert_v8i32: 593; CHECK: ## %bb.0: 594; CHECK-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 595; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 596; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 597; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 598; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 599; CHECK-NEXT: retq 600 %val = load i32, i32* %ptr 601 %r1 = insertelement <8 x i32> %x, i32 %val, i32 1 602 %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5 603 ret <8 x i32> %r2 604} 605 606define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) { 607; CHECK-LABEL: insert_v4i32: 608; CHECK: ## %bb.0: 609; CHECK-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 610; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 611; CHECK-NEXT: retq 612 %val = load i32, i32* %ptr 613 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 614 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 615 ret <4 x i32> %r2 616} 617 618define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) { 619; KNL-LABEL: insert_v32i16: 620; KNL: ## %bb.0: 621; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm2 622; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7] 623; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 624; KNL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 625; KNL-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 626; KNL-NEXT: retq 627; 628; SKX-LABEL: insert_v32i16: 629; SKX: ## %bb.0: 630; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 631; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 632; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 633; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 634; SKX-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0 635; SKX-NEXT: retq 636 %val = load i16, i16* %ptr 637 %r1 = insertelement <32 x i16> %x, i16 %val, i32 1 638 %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9 639 ret <32 x i16> %r2 640} 641 642define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) { 643; CHECK-LABEL: insert_v16i16: 644; CHECK: ## %bb.0: 645; CHECK-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 646; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 647; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 648; CHECK-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 649; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 650; CHECK-NEXT: retq 651 %val = load i16, i16* %ptr 652 %r1 = insertelement <16 x i16> %x, i16 %val, i32 1 653 %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9 654 ret <16 x i16> %r2 655} 656 657define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) { 658; CHECK-LABEL: insert_v8i16: 659; CHECK: ## %bb.0: 660; CHECK-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 661; CHECK-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 662; CHECK-NEXT: retq 663 %val = load i16, i16* %ptr 664 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 665 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 666 ret <8 x i16> %r2 667} 668 669define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) { 670; KNL-LABEL: insert_v64i8: 671; KNL: ## %bb.0: 672; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm2 673; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 674; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 675; KNL-NEXT: vpinsrb $2, %edi, %xmm2, %xmm2 676; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 677; KNL-NEXT: retq 678; 679; SKX-LABEL: insert_v64i8: 680; SKX: ## %bb.0: 681; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 682; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 683; SKX-NEXT: vextracti32x4 $3, %zmm0, %xmm0 684; SKX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0 685; SKX-NEXT: vinserti32x4 $3, %xmm0, %zmm1, %zmm0 686; SKX-NEXT: retq 687 %val = load i8, i8* %ptr 688 %r1 = insertelement <64 x i8> %x, i8 %val, i32 1 689 %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50 690 ret <64 x i8> %r2 691} 692 693define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) { 694; CHECK-LABEL: insert_v32i8: 695; CHECK: ## %bb.0: 696; CHECK-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 697; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 698; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 699; CHECK-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 700; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 701; CHECK-NEXT: retq 702 %val = load i8, i8* %ptr 703 %r1 = insertelement <32 x i8> %x, i8 %val, i32 1 704 %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17 705 ret <32 x i8> %r2 706} 707 708define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) { 709; CHECK-LABEL: insert_v16i8: 710; CHECK: ## %bb.0: 711; CHECK-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 712; CHECK-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 713; CHECK-NEXT: retq 714 %val = load i8, i8* %ptr 715 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 716 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 717 ret <16 x i8> %r2 718} 719 720define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) { 721; CHECK-LABEL: test_insert_128_v8i64: 722; CHECK: ## %bb.0: 723; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 724; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 725; CHECK-NEXT: retq 726 %r = insertelement <8 x i64> %x, i64 %y, i32 1 727 ret <8 x i64> %r 728} 729 730define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) { 731; CHECK-LABEL: test_insert_128_v16i32: 732; CHECK: ## %bb.0: 733; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 734; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 735; CHECK-NEXT: retq 736 %r = insertelement <16 x i32> %x, i32 %y, i32 1 737 ret <16 x i32> %r 738} 739 740define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) { 741; CHECK-LABEL: test_insert_128_v8f64: 742; CHECK: ## %bb.0: 743; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0] 744; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 745; CHECK-NEXT: retq 746 %r = insertelement <8 x double> %x, double %y, i32 1 747 ret <8 x double> %r 748} 749 750define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) { 751; CHECK-LABEL: test_insert_128_v16f32: 752; CHECK: ## %bb.0: 753; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 754; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 755; CHECK-NEXT: retq 756 %r = insertelement <16 x float> %x, float %y, i32 1 757 ret <16 x float> %r 758} 759 760define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) { 761; CHECK-LABEL: test_insert_128_v16i16: 762; CHECK: ## %bb.0: 763; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 764; CHECK-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 765; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 766; CHECK-NEXT: retq 767 %r = insertelement <16 x i16> %x, i16 %y, i32 10 768 ret <16 x i16> %r 769} 770 771define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) { 772; CHECK-LABEL: test_insert_128_v32i8: 773; CHECK: ## %bb.0: 774; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 775; CHECK-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 776; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 777; CHECK-NEXT: retq 778 %r = insertelement <32 x i8> %x, i8 %y, i32 20 779 ret <32 x i8> %r 780} 781 782define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> %y) { 783; KNL-LABEL: test_insertelement_v32i1: 784; KNL: ## %bb.0: 785; KNL-NEXT: cmpl %esi, %edi 786; KNL-NEXT: setb %al 787; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 788; KNL-NEXT: kmovw %k0, %ecx 789; KNL-NEXT: shll $16, %ecx 790; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 791; KNL-NEXT: kshiftrw $4, %k0, %k1 792; KNL-NEXT: kmovw %eax, %k2 793; KNL-NEXT: kxorw %k2, %k1, %k1 794; KNL-NEXT: kshiftlw $15, %k1, %k1 795; KNL-NEXT: kshiftrw $11, %k1, %k1 796; KNL-NEXT: kxorw %k1, %k0, %k0 797; KNL-NEXT: kmovw %k0, %eax 798; KNL-NEXT: orl %ecx, %eax 799; KNL-NEXT: vzeroupper 800; KNL-NEXT: retq 801; 802; SKX-LABEL: test_insertelement_v32i1: 803; SKX: ## %bb.0: 804; SKX-NEXT: cmpl %esi, %edi 805; SKX-NEXT: setb %al 806; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k0 807; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k1 808; SKX-NEXT: kunpckwd %k0, %k1, %k0 809; SKX-NEXT: kshiftrd $4, %k0, %k1 810; SKX-NEXT: kmovd %eax, %k2 811; SKX-NEXT: kxord %k2, %k1, %k1 812; SKX-NEXT: kshiftld $31, %k1, %k1 813; SKX-NEXT: kshiftrd $27, %k1, %k1 814; SKX-NEXT: kxord %k1, %k0, %k0 815; SKX-NEXT: kmovd %k0, %eax 816; SKX-NEXT: vzeroupper 817; SKX-NEXT: retq 818 %cmp_res_i1 = icmp ult i32 %a, %b 819 %cmp_cmp_vec = icmp ult <32 x i32> %x, %y 820 %maskv = insertelement <32 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 4 821 %res = bitcast <32 x i1> %maskv to i32 822 ret i32 %res 823} 824 825define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) { 826; KNL-LABEL: test_iinsertelement_v4i1: 827; KNL: ## %bb.0: 828; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 829; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 830; KNL-NEXT: cmpl %esi, %edi 831; KNL-NEXT: setb %al 832; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 833; KNL-NEXT: kshiftrw $2, %k0, %k1 834; KNL-NEXT: kmovw %eax, %k2 835; KNL-NEXT: kxorw %k2, %k1, %k1 836; KNL-NEXT: kshiftlw $15, %k1, %k1 837; KNL-NEXT: kshiftrw $13, %k1, %k1 838; KNL-NEXT: kxorw %k1, %k0, %k0 839; KNL-NEXT: kmovw %k0, %eax 840; KNL-NEXT: ## kill: def $al killed $al killed $eax 841; KNL-NEXT: vzeroupper 842; KNL-NEXT: retq 843; 844; SKX-LABEL: test_iinsertelement_v4i1: 845; SKX: ## %bb.0: 846; SKX-NEXT: cmpl %esi, %edi 847; SKX-NEXT: setb %al 848; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 849; SKX-NEXT: kshiftrb $2, %k0, %k1 850; SKX-NEXT: kmovd %eax, %k2 851; SKX-NEXT: kxorb %k2, %k1, %k1 852; SKX-NEXT: kshiftlb $7, %k1, %k1 853; SKX-NEXT: kshiftrb $5, %k1, %k1 854; SKX-NEXT: kxorb %k1, %k0, %k0 855; SKX-NEXT: kmovd %k0, %eax 856; SKX-NEXT: ## kill: def $al killed $al killed $eax 857; SKX-NEXT: retq 858 %cmp_res_i1 = icmp ult i32 %a, %b 859 %cmp_cmp_vec = icmp ult <4 x i32> %x, %y 860 %maskv = insertelement <4 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 2 861 %res0 = shufflevector <4 x i1> %maskv, <4 x i1> undef , <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 862 %res = bitcast <8 x i1> %res0 to i8 863 ret i8 %res 864} 865 866define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) { 867; KNL-LABEL: test_iinsertelement_v2i1: 868; KNL: ## %bb.0: 869; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 870; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 871; KNL-NEXT: cmpl %esi, %edi 872; KNL-NEXT: setb %al 873; KNL-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 874; KNL-NEXT: kshiftlw $15, %k0, %k0 875; KNL-NEXT: kshiftrw $15, %k0, %k0 876; KNL-NEXT: kmovw %eax, %k1 877; KNL-NEXT: kshiftlw $1, %k1, %k1 878; KNL-NEXT: korw %k1, %k0, %k0 879; KNL-NEXT: kmovw %k0, %eax 880; KNL-NEXT: ## kill: def $al killed $al killed $eax 881; KNL-NEXT: vzeroupper 882; KNL-NEXT: retq 883; 884; SKX-LABEL: test_iinsertelement_v2i1: 885; SKX: ## %bb.0: 886; SKX-NEXT: cmpl %esi, %edi 887; SKX-NEXT: setb %al 888; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 889; SKX-NEXT: kshiftlb $7, %k0, %k0 890; SKX-NEXT: kshiftrb $7, %k0, %k0 891; SKX-NEXT: kmovd %eax, %k1 892; SKX-NEXT: kshiftlb $1, %k1, %k1 893; SKX-NEXT: korb %k1, %k0, %k0 894; SKX-NEXT: kmovd %k0, %eax 895; SKX-NEXT: ## kill: def $al killed $al killed $eax 896; SKX-NEXT: retq 897 %cmp_res_i1 = icmp ult i32 %a, %b 898 %cmp_cmp_vec = icmp ult <2 x i64> %x, %y 899 %maskv = insertelement <2 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 1 900 %res0 = shufflevector <2 x i1> %maskv, <2 x i1> undef , <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 901 %res = bitcast <8 x i1> %res0 to i8 902 ret i8 %res 903} 904 905define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) { 906; KNL-LABEL: test_extractelement_v2i1: 907; KNL: ## %bb.0: 908; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 909; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 910; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 911; KNL-NEXT: kmovw %k0, %eax 912; KNL-NEXT: andb $1, %al 913; KNL-NEXT: movb $4, %cl 914; KNL-NEXT: subb %al, %cl 915; KNL-NEXT: movzbl %cl, %eax 916; KNL-NEXT: vzeroupper 917; KNL-NEXT: retq 918; 919; SKX-LABEL: test_extractelement_v2i1: 920; SKX: ## %bb.0: 921; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 922; SKX-NEXT: kmovd %k0, %eax 923; SKX-NEXT: andb $1, %al 924; SKX-NEXT: movb $4, %cl 925; SKX-NEXT: subb %al, %cl 926; SKX-NEXT: movzbl %cl, %eax 927; SKX-NEXT: retq 928 %t1 = icmp ugt <2 x i64> %a, %b 929 %t2 = extractelement <2 x i1> %t1, i32 0 930 %res = select i1 %t2, i8 3, i8 4 931 ret i8 %res 932} 933 934define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) { 935; KNL-LABEL: extractelement_v2i1_alt: 936; KNL: ## %bb.0: 937; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 938; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 939; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 940; KNL-NEXT: kmovw %k0, %eax 941; KNL-NEXT: andb $1, %al 942; KNL-NEXT: movb $4, %cl 943; KNL-NEXT: subb %al, %cl 944; KNL-NEXT: movzbl %cl, %eax 945; KNL-NEXT: vzeroupper 946; KNL-NEXT: retq 947; 948; SKX-LABEL: extractelement_v2i1_alt: 949; SKX: ## %bb.0: 950; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 951; SKX-NEXT: kmovd %k0, %eax 952; SKX-NEXT: andb $1, %al 953; SKX-NEXT: movb $4, %cl 954; SKX-NEXT: subb %al, %cl 955; SKX-NEXT: movzbl %cl, %eax 956; SKX-NEXT: retq 957 %t1 = icmp ugt <2 x i64> %a, %b 958 %t2 = extractelement <2 x i1> %t1, i32 0 959 %sext = sext i1 %t2 to i8 960 %res = add i8 %sext, 4 961 ret i8 %res 962} 963 964define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) { 965; KNL-LABEL: test_extractelement_v4i1: 966; KNL: ## %bb.0: 967; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 968; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 969; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 970; KNL-NEXT: kshiftrw $3, %k0, %k0 971; KNL-NEXT: kmovw %k0, %eax 972; KNL-NEXT: andl $1, %eax 973; KNL-NEXT: vzeroupper 974; KNL-NEXT: retq 975; 976; SKX-LABEL: test_extractelement_v4i1: 977; SKX: ## %bb.0: 978; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 979; SKX-NEXT: kshiftrw $3, %k0, %k0 980; SKX-NEXT: kmovd %k0, %eax 981; SKX-NEXT: andl $1, %eax 982; SKX-NEXT: retq 983 %t1 = icmp ugt <4 x i32> %a, %b 984 %t2 = extractelement <4 x i1> %t1, i32 3 985 %res = zext i1 %t2 to i8 986 ret i8 %res 987} 988 989define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) { 990; KNL-LABEL: test_extractelement_v32i1: 991; KNL: ## %bb.0: 992; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 993; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 994; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 995; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 996; KNL-NEXT: vpslld $31, %zmm0, %zmm0 997; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 998; KNL-NEXT: kshiftrw $2, %k0, %k0 999; KNL-NEXT: kmovw %k0, %eax 1000; KNL-NEXT: andl $1, %eax 1001; KNL-NEXT: vzeroupper 1002; KNL-NEXT: retq 1003; 1004; SKX-LABEL: test_extractelement_v32i1: 1005; SKX: ## %bb.0: 1006; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 1007; SKX-NEXT: kshiftrd $2, %k0, %k0 1008; SKX-NEXT: kmovd %k0, %eax 1009; SKX-NEXT: andl $1, %eax 1010; SKX-NEXT: vzeroupper 1011; SKX-NEXT: retq 1012 %t1 = icmp ugt <32 x i8> %a, %b 1013 %t2 = extractelement <32 x i1> %t1, i32 2 1014 %res = zext i1 %t2 to i8 1015 ret i8 %res 1016} 1017 1018define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) { 1019; KNL-LABEL: test_extractelement_v64i1: 1020; KNL: ## %bb.0: 1021; KNL-NEXT: vpminub %ymm3, %ymm1, %ymm0 1022; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0 1023; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1024; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 1025; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1026; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1027; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1028; KNL-NEXT: kshiftrw $15, %k0, %k0 1029; KNL-NEXT: kmovw %k0, %eax 1030; KNL-NEXT: andb $1, %al 1031; KNL-NEXT: movb $4, %cl 1032; KNL-NEXT: subb %al, %cl 1033; KNL-NEXT: movzbl %cl, %eax 1034; KNL-NEXT: vzeroupper 1035; KNL-NEXT: retq 1036; 1037; SKX-LABEL: test_extractelement_v64i1: 1038; SKX: ## %bb.0: 1039; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 1040; SKX-NEXT: kshiftrq $63, %k0, %k0 1041; SKX-NEXT: kmovd %k0, %eax 1042; SKX-NEXT: andb $1, %al 1043; SKX-NEXT: movb $4, %cl 1044; SKX-NEXT: subb %al, %cl 1045; SKX-NEXT: movzbl %cl, %eax 1046; SKX-NEXT: vzeroupper 1047; SKX-NEXT: retq 1048 %t1 = icmp ugt <64 x i8> %a, %b 1049 %t2 = extractelement <64 x i1> %t1, i32 63 1050 %res = select i1 %t2, i8 3, i8 4 1051 ret i8 %res 1052} 1053 1054define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) { 1055; KNL-LABEL: extractelement_v64i1_alt: 1056; KNL: ## %bb.0: 1057; KNL-NEXT: vpminub %ymm3, %ymm1, %ymm0 1058; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0 1059; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1060; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 1061; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1062; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1063; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1064; KNL-NEXT: kshiftrw $15, %k0, %k0 1065; KNL-NEXT: kmovw %k0, %eax 1066; KNL-NEXT: andb $1, %al 1067; KNL-NEXT: movb $4, %cl 1068; KNL-NEXT: subb %al, %cl 1069; KNL-NEXT: movzbl %cl, %eax 1070; KNL-NEXT: vzeroupper 1071; KNL-NEXT: retq 1072; 1073; SKX-LABEL: extractelement_v64i1_alt: 1074; SKX: ## %bb.0: 1075; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 1076; SKX-NEXT: kshiftrq $63, %k0, %k0 1077; SKX-NEXT: kmovd %k0, %eax 1078; SKX-NEXT: andb $1, %al 1079; SKX-NEXT: movb $4, %cl 1080; SKX-NEXT: subb %al, %cl 1081; SKX-NEXT: movzbl %cl, %eax 1082; SKX-NEXT: vzeroupper 1083; SKX-NEXT: retq 1084 %t1 = icmp ugt <64 x i8> %a, %b 1085 %t2 = extractelement <64 x i1> %t1, i32 63 1086 %sext = sext i1 %t2 to i8 1087 %res = add i8 %sext, 4 1088 ret i8 %res 1089} 1090 1091define i64 @test_extractelement_variable_v2i64(<2 x i64> %t1, i32 %index) { 1092; CHECK-LABEL: test_extractelement_variable_v2i64: 1093; CHECK: ## %bb.0: 1094; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1095; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1096; CHECK-NEXT: andl $1, %edi 1097; CHECK-NEXT: movq -24(%rsp,%rdi,8), %rax 1098; CHECK-NEXT: retq 1099 %t2 = extractelement <2 x i64> %t1, i32 %index 1100 ret i64 %t2 1101} 1102 1103define i64 @test_extractelement_variable_v4i64(<4 x i64> %t1, i32 %index) { 1104; CHECK-LABEL: test_extractelement_variable_v4i64: 1105; CHECK: ## %bb.0: 1106; CHECK-NEXT: pushq %rbp 1107; CHECK-NEXT: .cfi_def_cfa_offset 16 1108; CHECK-NEXT: .cfi_offset %rbp, -16 1109; CHECK-NEXT: movq %rsp, %rbp 1110; CHECK-NEXT: .cfi_def_cfa_register %rbp 1111; CHECK-NEXT: andq $-32, %rsp 1112; CHECK-NEXT: subq $64, %rsp 1113; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1114; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1115; CHECK-NEXT: andl $3, %edi 1116; CHECK-NEXT: movq (%rsp,%rdi,8), %rax 1117; CHECK-NEXT: movq %rbp, %rsp 1118; CHECK-NEXT: popq %rbp 1119; CHECK-NEXT: vzeroupper 1120; CHECK-NEXT: retq 1121 %t2 = extractelement <4 x i64> %t1, i32 %index 1122 ret i64 %t2 1123} 1124 1125define i64 @test_extractelement_variable_v8i64(<8 x i64> %t1, i32 %index) { 1126; CHECK-LABEL: test_extractelement_variable_v8i64: 1127; CHECK: ## %bb.0: 1128; CHECK-NEXT: pushq %rbp 1129; CHECK-NEXT: .cfi_def_cfa_offset 16 1130; CHECK-NEXT: .cfi_offset %rbp, -16 1131; CHECK-NEXT: movq %rsp, %rbp 1132; CHECK-NEXT: .cfi_def_cfa_register %rbp 1133; CHECK-NEXT: andq $-64, %rsp 1134; CHECK-NEXT: subq $128, %rsp 1135; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1136; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1137; CHECK-NEXT: andl $7, %edi 1138; CHECK-NEXT: movq (%rsp,%rdi,8), %rax 1139; CHECK-NEXT: movq %rbp, %rsp 1140; CHECK-NEXT: popq %rbp 1141; CHECK-NEXT: vzeroupper 1142; CHECK-NEXT: retq 1143 %t2 = extractelement <8 x i64> %t1, i32 %index 1144 ret i64 %t2 1145} 1146 1147define double @test_extractelement_variable_v2f64(<2 x double> %t1, i32 %index) { 1148; CHECK-LABEL: test_extractelement_variable_v2f64: 1149; CHECK: ## %bb.0: 1150; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1151; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1152; CHECK-NEXT: andl $1, %edi 1153; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1154; CHECK-NEXT: retq 1155 %t2 = extractelement <2 x double> %t1, i32 %index 1156 ret double %t2 1157} 1158 1159define double @test_extractelement_variable_v4f64(<4 x double> %t1, i32 %index) { 1160; CHECK-LABEL: test_extractelement_variable_v4f64: 1161; CHECK: ## %bb.0: 1162; CHECK-NEXT: pushq %rbp 1163; CHECK-NEXT: .cfi_def_cfa_offset 16 1164; CHECK-NEXT: .cfi_offset %rbp, -16 1165; CHECK-NEXT: movq %rsp, %rbp 1166; CHECK-NEXT: .cfi_def_cfa_register %rbp 1167; CHECK-NEXT: andq $-32, %rsp 1168; CHECK-NEXT: subq $64, %rsp 1169; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1170; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1171; CHECK-NEXT: andl $3, %edi 1172; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1173; CHECK-NEXT: movq %rbp, %rsp 1174; CHECK-NEXT: popq %rbp 1175; CHECK-NEXT: vzeroupper 1176; CHECK-NEXT: retq 1177 %t2 = extractelement <4 x double> %t1, i32 %index 1178 ret double %t2 1179} 1180 1181define double @test_extractelement_variable_v8f64(<8 x double> %t1, i32 %index) { 1182; CHECK-LABEL: test_extractelement_variable_v8f64: 1183; CHECK: ## %bb.0: 1184; CHECK-NEXT: pushq %rbp 1185; CHECK-NEXT: .cfi_def_cfa_offset 16 1186; CHECK-NEXT: .cfi_offset %rbp, -16 1187; CHECK-NEXT: movq %rsp, %rbp 1188; CHECK-NEXT: .cfi_def_cfa_register %rbp 1189; CHECK-NEXT: andq $-64, %rsp 1190; CHECK-NEXT: subq $128, %rsp 1191; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1192; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1193; CHECK-NEXT: andl $7, %edi 1194; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1195; CHECK-NEXT: movq %rbp, %rsp 1196; CHECK-NEXT: popq %rbp 1197; CHECK-NEXT: vzeroupper 1198; CHECK-NEXT: retq 1199 %t2 = extractelement <8 x double> %t1, i32 %index 1200 ret double %t2 1201} 1202 1203define i32 @test_extractelement_variable_v4i32(<4 x i32> %t1, i32 %index) { 1204; CHECK-LABEL: test_extractelement_variable_v4i32: 1205; CHECK: ## %bb.0: 1206; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1207; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1208; CHECK-NEXT: andl $3, %edi 1209; CHECK-NEXT: movl -24(%rsp,%rdi,4), %eax 1210; CHECK-NEXT: retq 1211 %t2 = extractelement <4 x i32> %t1, i32 %index 1212 ret i32 %t2 1213} 1214 1215define i32 @test_extractelement_variable_v8i32(<8 x i32> %t1, i32 %index) { 1216; CHECK-LABEL: test_extractelement_variable_v8i32: 1217; CHECK: ## %bb.0: 1218; CHECK-NEXT: pushq %rbp 1219; CHECK-NEXT: .cfi_def_cfa_offset 16 1220; CHECK-NEXT: .cfi_offset %rbp, -16 1221; CHECK-NEXT: movq %rsp, %rbp 1222; CHECK-NEXT: .cfi_def_cfa_register %rbp 1223; CHECK-NEXT: andq $-32, %rsp 1224; CHECK-NEXT: subq $64, %rsp 1225; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1226; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1227; CHECK-NEXT: andl $7, %edi 1228; CHECK-NEXT: movl (%rsp,%rdi,4), %eax 1229; CHECK-NEXT: movq %rbp, %rsp 1230; CHECK-NEXT: popq %rbp 1231; CHECK-NEXT: vzeroupper 1232; CHECK-NEXT: retq 1233 %t2 = extractelement <8 x i32> %t1, i32 %index 1234 ret i32 %t2 1235} 1236 1237define i32 @test_extractelement_variable_v16i32(<16 x i32> %t1, i32 %index) { 1238; CHECK-LABEL: test_extractelement_variable_v16i32: 1239; CHECK: ## %bb.0: 1240; CHECK-NEXT: pushq %rbp 1241; CHECK-NEXT: .cfi_def_cfa_offset 16 1242; CHECK-NEXT: .cfi_offset %rbp, -16 1243; CHECK-NEXT: movq %rsp, %rbp 1244; CHECK-NEXT: .cfi_def_cfa_register %rbp 1245; CHECK-NEXT: andq $-64, %rsp 1246; CHECK-NEXT: subq $128, %rsp 1247; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1248; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1249; CHECK-NEXT: andl $15, %edi 1250; CHECK-NEXT: movl (%rsp,%rdi,4), %eax 1251; CHECK-NEXT: movq %rbp, %rsp 1252; CHECK-NEXT: popq %rbp 1253; CHECK-NEXT: vzeroupper 1254; CHECK-NEXT: retq 1255 %t2 = extractelement <16 x i32> %t1, i32 %index 1256 ret i32 %t2 1257} 1258 1259define float @test_extractelement_variable_v4f32(<4 x float> %t1, i32 %index) { 1260; CHECK-LABEL: test_extractelement_variable_v4f32: 1261; CHECK: ## %bb.0: 1262; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1263; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1264; CHECK-NEXT: andl $3, %edi 1265; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1266; CHECK-NEXT: retq 1267 %t2 = extractelement <4 x float> %t1, i32 %index 1268 ret float %t2 1269} 1270 1271define float @test_extractelement_variable_v8f32(<8 x float> %t1, i32 %index) { 1272; CHECK-LABEL: test_extractelement_variable_v8f32: 1273; CHECK: ## %bb.0: 1274; CHECK-NEXT: pushq %rbp 1275; CHECK-NEXT: .cfi_def_cfa_offset 16 1276; CHECK-NEXT: .cfi_offset %rbp, -16 1277; CHECK-NEXT: movq %rsp, %rbp 1278; CHECK-NEXT: .cfi_def_cfa_register %rbp 1279; CHECK-NEXT: andq $-32, %rsp 1280; CHECK-NEXT: subq $64, %rsp 1281; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1282; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1283; CHECK-NEXT: andl $7, %edi 1284; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1285; CHECK-NEXT: movq %rbp, %rsp 1286; CHECK-NEXT: popq %rbp 1287; CHECK-NEXT: vzeroupper 1288; CHECK-NEXT: retq 1289 %t2 = extractelement <8 x float> %t1, i32 %index 1290 ret float %t2 1291} 1292 1293define float @test_extractelement_variable_v16f32(<16 x float> %t1, i32 %index) { 1294; CHECK-LABEL: test_extractelement_variable_v16f32: 1295; CHECK: ## %bb.0: 1296; CHECK-NEXT: pushq %rbp 1297; CHECK-NEXT: .cfi_def_cfa_offset 16 1298; CHECK-NEXT: .cfi_offset %rbp, -16 1299; CHECK-NEXT: movq %rsp, %rbp 1300; CHECK-NEXT: .cfi_def_cfa_register %rbp 1301; CHECK-NEXT: andq $-64, %rsp 1302; CHECK-NEXT: subq $128, %rsp 1303; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1304; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1305; CHECK-NEXT: andl $15, %edi 1306; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1307; CHECK-NEXT: movq %rbp, %rsp 1308; CHECK-NEXT: popq %rbp 1309; CHECK-NEXT: vzeroupper 1310; CHECK-NEXT: retq 1311 %t2 = extractelement <16 x float> %t1, i32 %index 1312 ret float %t2 1313} 1314 1315define i16 @test_extractelement_variable_v8i16(<8 x i16> %t1, i32 %index) { 1316; CHECK-LABEL: test_extractelement_variable_v8i16: 1317; CHECK: ## %bb.0: 1318; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1319; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1320; CHECK-NEXT: andl $7, %edi 1321; CHECK-NEXT: movzwl -24(%rsp,%rdi,2), %eax 1322; CHECK-NEXT: retq 1323 %t2 = extractelement <8 x i16> %t1, i32 %index 1324 ret i16 %t2 1325} 1326 1327define i16 @test_extractelement_variable_v16i16(<16 x i16> %t1, i32 %index) { 1328; CHECK-LABEL: test_extractelement_variable_v16i16: 1329; CHECK: ## %bb.0: 1330; CHECK-NEXT: pushq %rbp 1331; CHECK-NEXT: .cfi_def_cfa_offset 16 1332; CHECK-NEXT: .cfi_offset %rbp, -16 1333; CHECK-NEXT: movq %rsp, %rbp 1334; CHECK-NEXT: .cfi_def_cfa_register %rbp 1335; CHECK-NEXT: andq $-32, %rsp 1336; CHECK-NEXT: subq $64, %rsp 1337; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1338; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1339; CHECK-NEXT: andl $15, %edi 1340; CHECK-NEXT: movzwl (%rsp,%rdi,2), %eax 1341; CHECK-NEXT: movq %rbp, %rsp 1342; CHECK-NEXT: popq %rbp 1343; CHECK-NEXT: vzeroupper 1344; CHECK-NEXT: retq 1345 %t2 = extractelement <16 x i16> %t1, i32 %index 1346 ret i16 %t2 1347} 1348 1349define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) { 1350; KNL-LABEL: test_extractelement_variable_v32i16: 1351; KNL: ## %bb.0: 1352; KNL-NEXT: pushq %rbp 1353; KNL-NEXT: .cfi_def_cfa_offset 16 1354; KNL-NEXT: .cfi_offset %rbp, -16 1355; KNL-NEXT: movq %rsp, %rbp 1356; KNL-NEXT: .cfi_def_cfa_register %rbp 1357; KNL-NEXT: andq $-64, %rsp 1358; KNL-NEXT: subq $128, %rsp 1359; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1360; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 1361; KNL-NEXT: vmovaps %ymm0, (%rsp) 1362; KNL-NEXT: andl $31, %edi 1363; KNL-NEXT: movzwl (%rsp,%rdi,2), %eax 1364; KNL-NEXT: movq %rbp, %rsp 1365; KNL-NEXT: popq %rbp 1366; KNL-NEXT: vzeroupper 1367; KNL-NEXT: retq 1368; 1369; SKX-LABEL: test_extractelement_variable_v32i16: 1370; SKX: ## %bb.0: 1371; SKX-NEXT: pushq %rbp 1372; SKX-NEXT: .cfi_def_cfa_offset 16 1373; SKX-NEXT: .cfi_offset %rbp, -16 1374; SKX-NEXT: movq %rsp, %rbp 1375; SKX-NEXT: .cfi_def_cfa_register %rbp 1376; SKX-NEXT: andq $-64, %rsp 1377; SKX-NEXT: subq $128, %rsp 1378; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1379; SKX-NEXT: vmovaps %zmm0, (%rsp) 1380; SKX-NEXT: andl $31, %edi 1381; SKX-NEXT: movzwl (%rsp,%rdi,2), %eax 1382; SKX-NEXT: movq %rbp, %rsp 1383; SKX-NEXT: popq %rbp 1384; SKX-NEXT: vzeroupper 1385; SKX-NEXT: retq 1386 %t2 = extractelement <32 x i16> %t1, i32 %index 1387 ret i16 %t2 1388} 1389 1390define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) { 1391; CHECK-LABEL: test_extractelement_variable_v16i8: 1392; CHECK: ## %bb.0: 1393; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1394; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1395; CHECK-NEXT: andl $15, %edi 1396; CHECK-NEXT: movb -24(%rsp,%rdi), %al 1397; CHECK-NEXT: retq 1398 %t2 = extractelement <16 x i8> %t1, i32 %index 1399 ret i8 %t2 1400} 1401 1402define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) { 1403; CHECK-LABEL: test_extractelement_variable_v32i8: 1404; CHECK: ## %bb.0: 1405; CHECK-NEXT: pushq %rbp 1406; CHECK-NEXT: .cfi_def_cfa_offset 16 1407; CHECK-NEXT: .cfi_offset %rbp, -16 1408; CHECK-NEXT: movq %rsp, %rbp 1409; CHECK-NEXT: .cfi_def_cfa_register %rbp 1410; CHECK-NEXT: andq $-32, %rsp 1411; CHECK-NEXT: subq $64, %rsp 1412; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1413; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1414; CHECK-NEXT: andl $31, %edi 1415; CHECK-NEXT: movb (%rsp,%rdi), %al 1416; CHECK-NEXT: movq %rbp, %rsp 1417; CHECK-NEXT: popq %rbp 1418; CHECK-NEXT: vzeroupper 1419; CHECK-NEXT: retq 1420 1421 %t2 = extractelement <32 x i8> %t1, i32 %index 1422 ret i8 %t2 1423} 1424 1425define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) { 1426; KNL-LABEL: test_extractelement_variable_v64i8: 1427; KNL: ## %bb.0: 1428; KNL-NEXT: pushq %rbp 1429; KNL-NEXT: .cfi_def_cfa_offset 16 1430; KNL-NEXT: .cfi_offset %rbp, -16 1431; KNL-NEXT: movq %rsp, %rbp 1432; KNL-NEXT: .cfi_def_cfa_register %rbp 1433; KNL-NEXT: andq $-64, %rsp 1434; KNL-NEXT: subq $128, %rsp 1435; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1436; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 1437; KNL-NEXT: vmovaps %ymm0, (%rsp) 1438; KNL-NEXT: andl $63, %edi 1439; KNL-NEXT: movb (%rsp,%rdi), %al 1440; KNL-NEXT: movq %rbp, %rsp 1441; KNL-NEXT: popq %rbp 1442; KNL-NEXT: vzeroupper 1443; KNL-NEXT: retq 1444; 1445; SKX-LABEL: test_extractelement_variable_v64i8: 1446; SKX: ## %bb.0: 1447; SKX-NEXT: pushq %rbp 1448; SKX-NEXT: .cfi_def_cfa_offset 16 1449; SKX-NEXT: .cfi_offset %rbp, -16 1450; SKX-NEXT: movq %rsp, %rbp 1451; SKX-NEXT: .cfi_def_cfa_register %rbp 1452; SKX-NEXT: andq $-64, %rsp 1453; SKX-NEXT: subq $128, %rsp 1454; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1455; SKX-NEXT: vmovaps %zmm0, (%rsp) 1456; SKX-NEXT: andl $63, %edi 1457; SKX-NEXT: movb (%rsp,%rdi), %al 1458; SKX-NEXT: movq %rbp, %rsp 1459; SKX-NEXT: popq %rbp 1460; SKX-NEXT: vzeroupper 1461; SKX-NEXT: retq 1462 1463 %t2 = extractelement <64 x i8> %t1, i32 %index 1464 ret i8 %t2 1465} 1466 1467define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index) { 1468; KNL-LABEL: test_extractelement_variable_v64i8_indexi8: 1469; KNL: ## %bb.0: 1470; KNL-NEXT: pushq %rbp 1471; KNL-NEXT: .cfi_def_cfa_offset 16 1472; KNL-NEXT: .cfi_offset %rbp, -16 1473; KNL-NEXT: movq %rsp, %rbp 1474; KNL-NEXT: .cfi_def_cfa_register %rbp 1475; KNL-NEXT: andq $-64, %rsp 1476; KNL-NEXT: subq $128, %rsp 1477; KNL-NEXT: addb %dil, %dil 1478; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 1479; KNL-NEXT: vmovaps %ymm0, (%rsp) 1480; KNL-NEXT: movzbl %dil, %eax 1481; KNL-NEXT: andl $63, %eax 1482; KNL-NEXT: movb (%rsp,%rax), %al 1483; KNL-NEXT: movq %rbp, %rsp 1484; KNL-NEXT: popq %rbp 1485; KNL-NEXT: vzeroupper 1486; KNL-NEXT: retq 1487; 1488; SKX-LABEL: test_extractelement_variable_v64i8_indexi8: 1489; SKX: ## %bb.0: 1490; SKX-NEXT: pushq %rbp 1491; SKX-NEXT: .cfi_def_cfa_offset 16 1492; SKX-NEXT: .cfi_offset %rbp, -16 1493; SKX-NEXT: movq %rsp, %rbp 1494; SKX-NEXT: .cfi_def_cfa_register %rbp 1495; SKX-NEXT: andq $-64, %rsp 1496; SKX-NEXT: subq $128, %rsp 1497; SKX-NEXT: addb %dil, %dil 1498; SKX-NEXT: vmovaps %zmm0, (%rsp) 1499; SKX-NEXT: movzbl %dil, %eax 1500; SKX-NEXT: andl $63, %eax 1501; SKX-NEXT: movb (%rsp,%rax), %al 1502; SKX-NEXT: movq %rbp, %rsp 1503; SKX-NEXT: popq %rbp 1504; SKX-NEXT: vzeroupper 1505; SKX-NEXT: retq 1506 1507 %i = add i8 %index, %index 1508 %t2 = extractelement <64 x i8> %t1, i8 %i 1509 ret i8 %t2 1510} 1511 1512define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) { 1513; KNL-LABEL: test_extractelement_varible_v2i1: 1514; KNL: ## %bb.0: 1515; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1516; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 1517; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 1518; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 1519; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1520; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp) 1521; KNL-NEXT: andl $1, %edi 1522; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax 1523; KNL-NEXT: andl $1, %eax 1524; KNL-NEXT: vzeroupper 1525; KNL-NEXT: retq 1526; 1527; SKX-LABEL: test_extractelement_varible_v2i1: 1528; SKX: ## %bb.0: 1529; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1530; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 1531; SKX-NEXT: vpmovm2q %k0, %xmm0 1532; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1533; SKX-NEXT: andl $1, %edi 1534; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax 1535; SKX-NEXT: andl $1, %eax 1536; SKX-NEXT: retq 1537 %t1 = icmp ugt <2 x i64> %a, %b 1538 %t2 = extractelement <2 x i1> %t1, i32 %index 1539 %res = zext i1 %t2 to i8 1540 ret i8 %res 1541} 1542 1543define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) { 1544; KNL-LABEL: test_extractelement_varible_v4i1: 1545; KNL: ## %bb.0: 1546; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1547; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 1548; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 1549; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 1550; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1551; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp) 1552; KNL-NEXT: andl $3, %edi 1553; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax 1554; KNL-NEXT: andl $1, %eax 1555; KNL-NEXT: vzeroupper 1556; KNL-NEXT: retq 1557; 1558; SKX-LABEL: test_extractelement_varible_v4i1: 1559; SKX: ## %bb.0: 1560; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1561; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 1562; SKX-NEXT: vpmovm2d %k0, %xmm0 1563; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1564; SKX-NEXT: andl $3, %edi 1565; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax 1566; SKX-NEXT: andl $1, %eax 1567; SKX-NEXT: retq 1568 %t1 = icmp ugt <4 x i32> %a, %b 1569 %t2 = extractelement <4 x i1> %t1, i32 %index 1570 %res = zext i1 %t2 to i8 1571 ret i8 %res 1572} 1573 1574define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, i32 %index) { 1575; KNL-LABEL: test_extractelement_varible_v8i1: 1576; KNL: ## %bb.0: 1577; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1578; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 1579; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 1580; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 1581; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1582; KNL-NEXT: vpmovdw %zmm0, %ymm0 1583; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1584; KNL-NEXT: andl $7, %edi 1585; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax 1586; KNL-NEXT: andl $1, %eax 1587; KNL-NEXT: vzeroupper 1588; KNL-NEXT: retq 1589; 1590; SKX-LABEL: test_extractelement_varible_v8i1: 1591; SKX: ## %bb.0: 1592; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1593; SKX-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 1594; SKX-NEXT: vpmovm2w %k0, %xmm0 1595; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1596; SKX-NEXT: andl $7, %edi 1597; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax 1598; SKX-NEXT: andl $1, %eax 1599; SKX-NEXT: vzeroupper 1600; SKX-NEXT: retq 1601 %t1 = icmp ugt <8 x i32> %a, %b 1602 %t2 = extractelement <8 x i1> %t1, i32 %index 1603 %res = zext i1 %t2 to i8 1604 ret i8 %res 1605} 1606 1607define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %b, i32 %index) { 1608; KNL-LABEL: test_extractelement_varible_v16i1: 1609; KNL: ## %bb.0: 1610; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1611; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 1612; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1613; KNL-NEXT: vpmovdb %zmm0, -{{[0-9]+}}(%rsp) 1614; KNL-NEXT: andl $15, %edi 1615; KNL-NEXT: movzbl -24(%rsp,%rdi), %eax 1616; KNL-NEXT: andl $1, %eax 1617; KNL-NEXT: vzeroupper 1618; KNL-NEXT: retq 1619; 1620; SKX-LABEL: test_extractelement_varible_v16i1: 1621; SKX: ## %bb.0: 1622; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1623; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 1624; SKX-NEXT: vpmovm2b %k0, %xmm0 1625; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1626; SKX-NEXT: andl $15, %edi 1627; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax 1628; SKX-NEXT: andl $1, %eax 1629; SKX-NEXT: vzeroupper 1630; SKX-NEXT: retq 1631 %t1 = icmp ugt <16 x i32> %a, %b 1632 %t2 = extractelement <16 x i1> %t1, i32 %index 1633 %res = zext i1 %t2 to i8 1634 ret i8 %res 1635} 1636 1637define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b, i32 %index) { 1638; KNL-LABEL: test_extractelement_varible_v32i1: 1639; KNL: ## %bb.0: 1640; KNL-NEXT: pushq %rbp 1641; KNL-NEXT: .cfi_def_cfa_offset 16 1642; KNL-NEXT: .cfi_offset %rbp, -16 1643; KNL-NEXT: movq %rsp, %rbp 1644; KNL-NEXT: .cfi_def_cfa_register %rbp 1645; KNL-NEXT: andq $-32, %rsp 1646; KNL-NEXT: subq $64, %rsp 1647; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1648; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 1649; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1650; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1651; KNL-NEXT: vmovdqa %ymm0, (%rsp) 1652; KNL-NEXT: andl $31, %edi 1653; KNL-NEXT: movzbl (%rsp,%rdi), %eax 1654; KNL-NEXT: andl $1, %eax 1655; KNL-NEXT: movq %rbp, %rsp 1656; KNL-NEXT: popq %rbp 1657; KNL-NEXT: vzeroupper 1658; KNL-NEXT: retq 1659; 1660; SKX-LABEL: test_extractelement_varible_v32i1: 1661; SKX: ## %bb.0: 1662; SKX-NEXT: pushq %rbp 1663; SKX-NEXT: .cfi_def_cfa_offset 16 1664; SKX-NEXT: .cfi_offset %rbp, -16 1665; SKX-NEXT: movq %rsp, %rbp 1666; SKX-NEXT: .cfi_def_cfa_register %rbp 1667; SKX-NEXT: andq $-32, %rsp 1668; SKX-NEXT: subq $64, %rsp 1669; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1670; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 1671; SKX-NEXT: vpmovm2b %k0, %ymm0 1672; SKX-NEXT: vmovdqa %ymm0, (%rsp) 1673; SKX-NEXT: andl $31, %edi 1674; SKX-NEXT: movzbl (%rsp,%rdi), %eax 1675; SKX-NEXT: andl $1, %eax 1676; SKX-NEXT: movq %rbp, %rsp 1677; SKX-NEXT: popq %rbp 1678; SKX-NEXT: vzeroupper 1679; SKX-NEXT: retq 1680 %t1 = icmp ugt <32 x i8> %a, %b 1681 %t2 = extractelement <32 x i1> %t1, i32 %index 1682 %res = zext i1 %t2 to i8 1683 ret i8 %res 1684} 1685 1686define <8 x i64> @insert_double_zero(<2 x i64> %a) nounwind { 1687; CHECK-LABEL: insert_double_zero: 1688; CHECK: ## %bb.0: 1689; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1690; CHECK-NEXT: vinsertf32x4 $2, %xmm0, %zmm1, %zmm0 1691; CHECK-NEXT: retq 1692 %b = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1693 %d = shufflevector <4 x i64> %b, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1694 %e = shufflevector <8 x i64> %d, <8 x i64> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 1695 ret <8 x i64> %e 1696} 1697 1698define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) { 1699; KNL-LABEL: test_insertelement_variable_v32i1: 1700; KNL: ## %bb.0: 1701; KNL-NEXT: pushq %rbp 1702; KNL-NEXT: .cfi_def_cfa_offset 16 1703; KNL-NEXT: .cfi_offset %rbp, -16 1704; KNL-NEXT: movq %rsp, %rbp 1705; KNL-NEXT: .cfi_def_cfa_register %rbp 1706; KNL-NEXT: andq $-32, %rsp 1707; KNL-NEXT: subq $64, %rsp 1708; KNL-NEXT: ## kill: def $esi killed $esi def $rsi 1709; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1710; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1711; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1712; KNL-NEXT: andl $31, %esi 1713; KNL-NEXT: testb %dil, %dil 1714; KNL-NEXT: vmovdqa %ymm0, (%rsp) 1715; KNL-NEXT: setne (%rsp,%rsi) 1716; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 1717; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1718; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1719; KNL-NEXT: kmovw %k0, %ecx 1720; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1721; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1722; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1723; KNL-NEXT: kmovw %k0, %eax 1724; KNL-NEXT: shll $16, %eax 1725; KNL-NEXT: orl %ecx, %eax 1726; KNL-NEXT: movq %rbp, %rsp 1727; KNL-NEXT: popq %rbp 1728; KNL-NEXT: vzeroupper 1729; KNL-NEXT: retq 1730; 1731; SKX-LABEL: test_insertelement_variable_v32i1: 1732; SKX: ## %bb.0: 1733; SKX-NEXT: pushq %rbp 1734; SKX-NEXT: .cfi_def_cfa_offset 16 1735; SKX-NEXT: .cfi_offset %rbp, -16 1736; SKX-NEXT: movq %rsp, %rbp 1737; SKX-NEXT: .cfi_def_cfa_register %rbp 1738; SKX-NEXT: andq $-32, %rsp 1739; SKX-NEXT: subq $64, %rsp 1740; SKX-NEXT: ## kill: def $esi killed $esi def $rsi 1741; SKX-NEXT: vptestmb %ymm0, %ymm0, %k0 1742; SKX-NEXT: andl $31, %esi 1743; SKX-NEXT: testb %dil, %dil 1744; SKX-NEXT: vpmovm2b %k0, %ymm0 1745; SKX-NEXT: vmovdqa %ymm0, (%rsp) 1746; SKX-NEXT: setne (%rsp,%rsi) 1747; SKX-NEXT: vpsllw $7, (%rsp), %ymm0 1748; SKX-NEXT: vpmovb2m %ymm0, %k0 1749; SKX-NEXT: kmovd %k0, %eax 1750; SKX-NEXT: movq %rbp, %rsp 1751; SKX-NEXT: popq %rbp 1752; SKX-NEXT: vzeroupper 1753; SKX-NEXT: retq 1754 %t1 = icmp ugt <32 x i8> %a, zeroinitializer 1755 %t2 = icmp ugt i8 %b, 0 1756 %t3 = insertelement <32 x i1> %t1, i1 %t2, i32 %index 1757 %t4 = bitcast <32 x i1> %t3 to i32 1758 ret i32 %t4 1759} 1760 1761define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) { 1762; KNL-LABEL: test_insertelement_variable_v64i1: 1763; KNL: ## %bb.0: 1764; KNL-NEXT: pushq %rbp 1765; KNL-NEXT: .cfi_def_cfa_offset 16 1766; KNL-NEXT: .cfi_offset %rbp, -16 1767; KNL-NEXT: movq %rsp, %rbp 1768; KNL-NEXT: .cfi_def_cfa_register %rbp 1769; KNL-NEXT: andq $-64, %rsp 1770; KNL-NEXT: subq $128, %rsp 1771; KNL-NEXT: ## kill: def $esi killed $esi def $rsi 1772; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 1773; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 1774; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1775; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 1776; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 1777; KNL-NEXT: andl $63, %esi 1778; KNL-NEXT: testb %dil, %dil 1779; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) 1780; KNL-NEXT: vmovdqa %ymm0, (%rsp) 1781; KNL-NEXT: setne (%rsp,%rsi) 1782; KNL-NEXT: vmovdqa (%rsp), %ymm0 1783; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 1784; KNL-NEXT: vpmovsxbd %xmm0, %zmm2 1785; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1786; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1787; KNL-NEXT: kmovw %k0, %eax 1788; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 1789; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1790; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1791; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1792; KNL-NEXT: kmovw %k0, %ecx 1793; KNL-NEXT: shll $16, %ecx 1794; KNL-NEXT: orl %eax, %ecx 1795; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 1796; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1797; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1798; KNL-NEXT: kmovw %k0, %edx 1799; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0 1800; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1801; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1802; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1803; KNL-NEXT: kmovw %k0, %eax 1804; KNL-NEXT: shll $16, %eax 1805; KNL-NEXT: orl %edx, %eax 1806; KNL-NEXT: shlq $32, %rax 1807; KNL-NEXT: orq %rcx, %rax 1808; KNL-NEXT: movq %rbp, %rsp 1809; KNL-NEXT: popq %rbp 1810; KNL-NEXT: vzeroupper 1811; KNL-NEXT: retq 1812; 1813; SKX-LABEL: test_insertelement_variable_v64i1: 1814; SKX: ## %bb.0: 1815; SKX-NEXT: pushq %rbp 1816; SKX-NEXT: .cfi_def_cfa_offset 16 1817; SKX-NEXT: .cfi_offset %rbp, -16 1818; SKX-NEXT: movq %rsp, %rbp 1819; SKX-NEXT: .cfi_def_cfa_register %rbp 1820; SKX-NEXT: andq $-64, %rsp 1821; SKX-NEXT: subq $128, %rsp 1822; SKX-NEXT: ## kill: def $esi killed $esi def $rsi 1823; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 1824; SKX-NEXT: andl $63, %esi 1825; SKX-NEXT: testb %dil, %dil 1826; SKX-NEXT: vpmovm2b %k0, %zmm0 1827; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) 1828; SKX-NEXT: setne (%rsp,%rsi) 1829; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 1830; SKX-NEXT: vpmovb2m %zmm0, %k0 1831; SKX-NEXT: kmovq %k0, %rax 1832; SKX-NEXT: movq %rbp, %rsp 1833; SKX-NEXT: popq %rbp 1834; SKX-NEXT: vzeroupper 1835; SKX-NEXT: retq 1836 %t1 = icmp ugt <64 x i8> %a, zeroinitializer 1837 %t2 = icmp ugt i8 %b, 0 1838 %t3 = insertelement <64 x i1> %t1, i1 %t2, i32 %index 1839 %t4 = bitcast <64 x i1> %t3 to i64 1840 ret i64 %t4 1841} 1842 1843define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) { 1844; KNL-LABEL: test_insertelement_variable_v96i1: 1845; KNL: ## %bb.0: 1846; KNL-NEXT: pushq %rbp 1847; KNL-NEXT: .cfi_def_cfa_offset 16 1848; KNL-NEXT: .cfi_offset %rbp, -16 1849; KNL-NEXT: movq %rsp, %rbp 1850; KNL-NEXT: .cfi_def_cfa_register %rbp 1851; KNL-NEXT: andq $-128, %rsp 1852; KNL-NEXT: subq $256, %rsp ## imm = 0x100 1853; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1854; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm0, %xmm0 1855; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm0, %xmm0 1856; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm0, %xmm0 1857; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm0, %xmm0 1858; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm0, %xmm0 1859; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm0, %xmm0 1860; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm0, %xmm0 1861; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm0, %xmm0 1862; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm0, %xmm0 1863; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm0, %xmm0 1864; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm0, %xmm0 1865; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm0, %xmm0 1866; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm0, %xmm0 1867; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm0, %xmm0 1868; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm0, %xmm0 1869; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1870; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm1, %xmm1 1871; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm1, %xmm1 1872; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm1, %xmm1 1873; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm1, %xmm1 1874; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm1, %xmm1 1875; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm1, %xmm1 1876; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm1, %xmm1 1877; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm1, %xmm1 1878; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm1, %xmm1 1879; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm1, %xmm1 1880; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm1, %xmm1 1881; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm1, %xmm1 1882; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm1, %xmm1 1883; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm1, %xmm1 1884; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm1, %xmm1 1885; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1886; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1887; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm1, %xmm1 1888; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm1, %xmm1 1889; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm1, %xmm1 1890; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm1, %xmm1 1891; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm1, %xmm1 1892; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm1, %xmm1 1893; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm1, %xmm1 1894; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm1, %xmm1 1895; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm1, %xmm1 1896; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm1, %xmm1 1897; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm1, %xmm1 1898; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm1, %xmm1 1899; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm1, %xmm1 1900; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm1, %xmm1 1901; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm1, %xmm1 1902; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1903; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm2, %xmm2 1904; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm2, %xmm2 1905; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm2, %xmm2 1906; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm2, %xmm2 1907; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm2, %xmm2 1908; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm2, %xmm2 1909; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm2, %xmm2 1910; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm2, %xmm2 1911; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm2, %xmm2 1912; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm2, %xmm2 1913; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm2, %xmm2 1914; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm2, %xmm2 1915; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm2, %xmm2 1916; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm2, %xmm2 1917; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm2, %xmm2 1918; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 1919; KNL-NEXT: vmovd %edi, %xmm2 1920; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 1921; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 1922; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 1923; KNL-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2 1924; KNL-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2 1925; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm2, %xmm2 1926; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm2, %xmm2 1927; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm2, %xmm2 1928; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm2, %xmm2 1929; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm2, %xmm2 1930; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm2, %xmm2 1931; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm2, %xmm2 1932; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm2, %xmm2 1933; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm2, %xmm2 1934; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm2, %xmm2 1935; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1936; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm3, %xmm3 1937; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm3, %xmm3 1938; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm3, %xmm3 1939; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm3, %xmm3 1940; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm3, %xmm3 1941; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm3, %xmm3 1942; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm3, %xmm3 1943; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm3, %xmm3 1944; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm3, %xmm3 1945; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm3, %xmm3 1946; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm3, %xmm3 1947; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm3, %xmm3 1948; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm3, %xmm3 1949; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm3, %xmm3 1950; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3 1951; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 1952; KNL-NEXT: movl 744(%rbp), %eax 1953; KNL-NEXT: andl $127, %eax 1954; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 1955; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 1956; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 1957; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 1958; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 1959; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 1960; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1961; KNL-NEXT: cmpb $0, 736(%rbp) 1962; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp) 1963; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) 1964; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) 1965; KNL-NEXT: vmovdqa %ymm2, (%rsp) 1966; KNL-NEXT: setne (%rsp,%rax) 1967; KNL-NEXT: vmovdqa (%rsp), %ymm2 1968; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3 1969; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 1970; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 1971; KNL-NEXT: vpmovsxbd %xmm2, %zmm4 1972; KNL-NEXT: vpslld $31, %zmm4, %zmm4 1973; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 1974; KNL-NEXT: kmovw %k0, %eax 1975; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 1976; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 1977; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1978; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1979; KNL-NEXT: kmovw %k0, %ecx 1980; KNL-NEXT: shll $16, %ecx 1981; KNL-NEXT: orl %eax, %ecx 1982; KNL-NEXT: vpmovsxbd %xmm3, %zmm2 1983; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1984; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1985; KNL-NEXT: kmovw %k0, %edx 1986; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2 1987; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 1988; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1989; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1990; KNL-NEXT: kmovw %k0, %eax 1991; KNL-NEXT: shll $16, %eax 1992; KNL-NEXT: orl %edx, %eax 1993; KNL-NEXT: shlq $32, %rax 1994; KNL-NEXT: orq %rcx, %rax 1995; KNL-NEXT: vpmovsxbd %xmm1, %zmm2 1996; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1997; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1998; KNL-NEXT: kmovw %k0, %ecx 1999; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 2000; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 2001; KNL-NEXT: vpslld $31, %zmm1, %zmm1 2002; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 2003; KNL-NEXT: kmovw %k0, %esi 2004; KNL-NEXT: shll $16, %esi 2005; KNL-NEXT: orl %ecx, %esi 2006; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 2007; KNL-NEXT: vpslld $31, %zmm1, %zmm1 2008; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 2009; KNL-NEXT: kmovw %k0, %ecx 2010; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 2011; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 2012; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2013; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2014; KNL-NEXT: kmovw %k0, %edx 2015; KNL-NEXT: shll $16, %edx 2016; KNL-NEXT: orl %ecx, %edx 2017; KNL-NEXT: shlq $32, %rdx 2018; KNL-NEXT: orq %rsi, %rdx 2019; KNL-NEXT: movq %rbp, %rsp 2020; KNL-NEXT: popq %rbp 2021; KNL-NEXT: vzeroupper 2022; KNL-NEXT: retq 2023; 2024; SKX-LABEL: test_insertelement_variable_v96i1: 2025; SKX: ## %bb.0: 2026; SKX-NEXT: pushq %rbp 2027; SKX-NEXT: .cfi_def_cfa_offset 16 2028; SKX-NEXT: .cfi_offset %rbp, -16 2029; SKX-NEXT: movq %rsp, %rbp 2030; SKX-NEXT: .cfi_def_cfa_register %rbp 2031; SKX-NEXT: andq $-128, %rsp 2032; SKX-NEXT: subq $256, %rsp ## imm = 0x100 2033; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2034; SKX-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0 2035; SKX-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0 2036; SKX-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0 2037; SKX-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0 2038; SKX-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0 2039; SKX-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0 2040; SKX-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0 2041; SKX-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0 2042; SKX-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0 2043; SKX-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0 2044; SKX-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0 2045; SKX-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0 2046; SKX-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0 2047; SKX-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0 2048; SKX-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0 2049; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2050; SKX-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1 2051; SKX-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1 2052; SKX-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1 2053; SKX-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1 2054; SKX-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1 2055; SKX-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1 2056; SKX-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1 2057; SKX-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1 2058; SKX-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1 2059; SKX-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1 2060; SKX-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1 2061; SKX-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1 2062; SKX-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1 2063; SKX-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1 2064; SKX-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1 2065; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2066; SKX-NEXT: vmovd %edi, %xmm1 2067; SKX-NEXT: vpinsrb $1, %esi, %xmm1, %xmm1 2068; SKX-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 2069; SKX-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 2070; SKX-NEXT: vpinsrb $4, %r8d, %xmm1, %xmm1 2071; SKX-NEXT: vpinsrb $5, %r9d, %xmm1, %xmm1 2072; SKX-NEXT: vpinsrb $6, 16(%rbp), %xmm1, %xmm1 2073; SKX-NEXT: vpinsrb $7, 24(%rbp), %xmm1, %xmm1 2074; SKX-NEXT: vpinsrb $8, 32(%rbp), %xmm1, %xmm1 2075; SKX-NEXT: vpinsrb $9, 40(%rbp), %xmm1, %xmm1 2076; SKX-NEXT: vpinsrb $10, 48(%rbp), %xmm1, %xmm1 2077; SKX-NEXT: vpinsrb $11, 56(%rbp), %xmm1, %xmm1 2078; SKX-NEXT: vpinsrb $12, 64(%rbp), %xmm1, %xmm1 2079; SKX-NEXT: vpinsrb $13, 72(%rbp), %xmm1, %xmm1 2080; SKX-NEXT: vpinsrb $14, 80(%rbp), %xmm1, %xmm1 2081; SKX-NEXT: vpinsrb $15, 88(%rbp), %xmm1, %xmm1 2082; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2083; SKX-NEXT: vpinsrb $1, 104(%rbp), %xmm2, %xmm2 2084; SKX-NEXT: vpinsrb $2, 112(%rbp), %xmm2, %xmm2 2085; SKX-NEXT: vpinsrb $3, 120(%rbp), %xmm2, %xmm2 2086; SKX-NEXT: vpinsrb $4, 128(%rbp), %xmm2, %xmm2 2087; SKX-NEXT: vpinsrb $5, 136(%rbp), %xmm2, %xmm2 2088; SKX-NEXT: vpinsrb $6, 144(%rbp), %xmm2, %xmm2 2089; SKX-NEXT: vpinsrb $7, 152(%rbp), %xmm2, %xmm2 2090; SKX-NEXT: vpinsrb $8, 160(%rbp), %xmm2, %xmm2 2091; SKX-NEXT: vpinsrb $9, 168(%rbp), %xmm2, %xmm2 2092; SKX-NEXT: vpinsrb $10, 176(%rbp), %xmm2, %xmm2 2093; SKX-NEXT: vpinsrb $11, 184(%rbp), %xmm2, %xmm2 2094; SKX-NEXT: vpinsrb $12, 192(%rbp), %xmm2, %xmm2 2095; SKX-NEXT: vpinsrb $13, 200(%rbp), %xmm2, %xmm2 2096; SKX-NEXT: vpinsrb $14, 208(%rbp), %xmm2, %xmm2 2097; SKX-NEXT: vpinsrb $15, 216(%rbp), %xmm2, %xmm2 2098; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2099; SKX-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 2100; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2101; SKX-NEXT: vpinsrb $1, 488(%rbp), %xmm1, %xmm1 2102; SKX-NEXT: vpinsrb $2, 496(%rbp), %xmm1, %xmm1 2103; SKX-NEXT: vpinsrb $3, 504(%rbp), %xmm1, %xmm1 2104; SKX-NEXT: vpinsrb $4, 512(%rbp), %xmm1, %xmm1 2105; SKX-NEXT: vpinsrb $5, 520(%rbp), %xmm1, %xmm1 2106; SKX-NEXT: vpinsrb $6, 528(%rbp), %xmm1, %xmm1 2107; SKX-NEXT: vpinsrb $7, 536(%rbp), %xmm1, %xmm1 2108; SKX-NEXT: vpinsrb $8, 544(%rbp), %xmm1, %xmm1 2109; SKX-NEXT: vpinsrb $9, 552(%rbp), %xmm1, %xmm1 2110; SKX-NEXT: vpinsrb $10, 560(%rbp), %xmm1, %xmm1 2111; SKX-NEXT: vpinsrb $11, 568(%rbp), %xmm1, %xmm1 2112; SKX-NEXT: vpinsrb $12, 576(%rbp), %xmm1, %xmm1 2113; SKX-NEXT: vpinsrb $13, 584(%rbp), %xmm1, %xmm1 2114; SKX-NEXT: vpinsrb $14, 592(%rbp), %xmm1, %xmm1 2115; SKX-NEXT: vpinsrb $15, 600(%rbp), %xmm1, %xmm1 2116; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2117; SKX-NEXT: vpinsrb $1, 616(%rbp), %xmm2, %xmm2 2118; SKX-NEXT: vpinsrb $2, 624(%rbp), %xmm2, %xmm2 2119; SKX-NEXT: vpinsrb $3, 632(%rbp), %xmm2, %xmm2 2120; SKX-NEXT: vpinsrb $4, 640(%rbp), %xmm2, %xmm2 2121; SKX-NEXT: vpinsrb $5, 648(%rbp), %xmm2, %xmm2 2122; SKX-NEXT: vpinsrb $6, 656(%rbp), %xmm2, %xmm2 2123; SKX-NEXT: vpinsrb $7, 664(%rbp), %xmm2, %xmm2 2124; SKX-NEXT: vpinsrb $8, 672(%rbp), %xmm2, %xmm2 2125; SKX-NEXT: vpinsrb $9, 680(%rbp), %xmm2, %xmm2 2126; SKX-NEXT: vpinsrb $10, 688(%rbp), %xmm2, %xmm2 2127; SKX-NEXT: vpinsrb $11, 696(%rbp), %xmm2, %xmm2 2128; SKX-NEXT: vpinsrb $12, 704(%rbp), %xmm2, %xmm2 2129; SKX-NEXT: vpinsrb $13, 712(%rbp), %xmm2, %xmm2 2130; SKX-NEXT: vpinsrb $14, 720(%rbp), %xmm2, %xmm2 2131; SKX-NEXT: vpinsrb $15, 728(%rbp), %xmm2, %xmm2 2132; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2133; SKX-NEXT: movl 744(%rbp), %eax 2134; SKX-NEXT: andl $127, %eax 2135; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 2136; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 2137; SKX-NEXT: cmpb $0, 736(%rbp) 2138; SKX-NEXT: vpmovm2b %k1, %zmm0 2139; SKX-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) 2140; SKX-NEXT: vpmovm2b %k0, %zmm0 2141; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) 2142; SKX-NEXT: setne (%rsp,%rax) 2143; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0 2144; SKX-NEXT: vpmovb2m %zmm0, %k0 2145; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 2146; SKX-NEXT: vpmovb2m %zmm0, %k1 2147; SKX-NEXT: kmovq %k1, %rax 2148; SKX-NEXT: kmovq %k0, %rdx 2149; SKX-NEXT: movq %rbp, %rsp 2150; SKX-NEXT: popq %rbp 2151; SKX-NEXT: vzeroupper 2152; SKX-NEXT: retq 2153 %t1 = icmp ugt <96 x i8> %a, zeroinitializer 2154 %t2 = icmp ugt i8 %b, 0 2155 %t3 = insertelement <96 x i1> %t1, i1 %t2, i32 %index 2156 %t4 = bitcast <96 x i1> %t3 to i96 2157 ret i96 %t4 2158} 2159 2160define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index) { 2161; KNL-LABEL: test_insertelement_variable_v128i1: 2162; KNL: ## %bb.0: 2163; KNL-NEXT: pushq %rbp 2164; KNL-NEXT: .cfi_def_cfa_offset 16 2165; KNL-NEXT: .cfi_offset %rbp, -16 2166; KNL-NEXT: movq %rsp, %rbp 2167; KNL-NEXT: .cfi_def_cfa_register %rbp 2168; KNL-NEXT: andq $-128, %rsp 2169; KNL-NEXT: subq $256, %rsp ## imm = 0x100 2170; KNL-NEXT: ## kill: def $esi killed $esi def $rsi 2171; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4 2172; KNL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 2173; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 2174; KNL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm1 2175; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 2176; KNL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2 2177; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 2178; KNL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm3 2179; KNL-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm3 2180; KNL-NEXT: andl $127, %esi 2181; KNL-NEXT: testb %dil, %dil 2182; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp) 2183; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) 2184; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) 2185; KNL-NEXT: vmovdqa %ymm0, (%rsp) 2186; KNL-NEXT: setne (%rsp,%rsi) 2187; KNL-NEXT: vmovdqa (%rsp), %ymm2 2188; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3 2189; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 2190; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 2191; KNL-NEXT: vpmovsxbd %xmm2, %zmm4 2192; KNL-NEXT: vpslld $31, %zmm4, %zmm4 2193; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 2194; KNL-NEXT: kmovw %k0, %eax 2195; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 2196; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 2197; KNL-NEXT: vpslld $31, %zmm2, %zmm2 2198; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 2199; KNL-NEXT: kmovw %k0, %ecx 2200; KNL-NEXT: shll $16, %ecx 2201; KNL-NEXT: orl %eax, %ecx 2202; KNL-NEXT: vpmovsxbd %xmm3, %zmm2 2203; KNL-NEXT: vpslld $31, %zmm2, %zmm2 2204; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 2205; KNL-NEXT: kmovw %k0, %edx 2206; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2 2207; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 2208; KNL-NEXT: vpslld $31, %zmm2, %zmm2 2209; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 2210; KNL-NEXT: kmovw %k0, %eax 2211; KNL-NEXT: shll $16, %eax 2212; KNL-NEXT: orl %edx, %eax 2213; KNL-NEXT: shlq $32, %rax 2214; KNL-NEXT: orq %rcx, %rax 2215; KNL-NEXT: vpmovsxbd %xmm1, %zmm2 2216; KNL-NEXT: vpslld $31, %zmm2, %zmm2 2217; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 2218; KNL-NEXT: kmovw %k0, %ecx 2219; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 2220; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 2221; KNL-NEXT: vpslld $31, %zmm1, %zmm1 2222; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 2223; KNL-NEXT: kmovw %k0, %esi 2224; KNL-NEXT: shll $16, %esi 2225; KNL-NEXT: orl %ecx, %esi 2226; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 2227; KNL-NEXT: vpslld $31, %zmm1, %zmm1 2228; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 2229; KNL-NEXT: kmovw %k0, %ecx 2230; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 2231; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 2232; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2233; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2234; KNL-NEXT: kmovw %k0, %edx 2235; KNL-NEXT: shll $16, %edx 2236; KNL-NEXT: orl %ecx, %edx 2237; KNL-NEXT: shlq $32, %rdx 2238; KNL-NEXT: orq %rsi, %rdx 2239; KNL-NEXT: movq %rbp, %rsp 2240; KNL-NEXT: popq %rbp 2241; KNL-NEXT: vzeroupper 2242; KNL-NEXT: retq 2243; 2244; SKX-LABEL: test_insertelement_variable_v128i1: 2245; SKX: ## %bb.0: 2246; SKX-NEXT: pushq %rbp 2247; SKX-NEXT: .cfi_def_cfa_offset 16 2248; SKX-NEXT: .cfi_offset %rbp, -16 2249; SKX-NEXT: movq %rsp, %rbp 2250; SKX-NEXT: .cfi_def_cfa_register %rbp 2251; SKX-NEXT: andq $-128, %rsp 2252; SKX-NEXT: subq $256, %rsp ## imm = 0x100 2253; SKX-NEXT: ## kill: def $esi killed $esi def $rsi 2254; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 2255; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 2256; SKX-NEXT: andl $127, %esi 2257; SKX-NEXT: testb %dil, %dil 2258; SKX-NEXT: vpmovm2b %k1, %zmm0 2259; SKX-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) 2260; SKX-NEXT: vpmovm2b %k0, %zmm0 2261; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) 2262; SKX-NEXT: setne (%rsp,%rsi) 2263; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0 2264; SKX-NEXT: vpmovb2m %zmm0, %k0 2265; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 2266; SKX-NEXT: vpmovb2m %zmm0, %k1 2267; SKX-NEXT: kmovq %k1, %rax 2268; SKX-NEXT: kmovq %k0, %rdx 2269; SKX-NEXT: movq %rbp, %rsp 2270; SKX-NEXT: popq %rbp 2271; SKX-NEXT: vzeroupper 2272; SKX-NEXT: retq 2273 %t1 = icmp ugt <128 x i8> %a, zeroinitializer 2274 %t2 = icmp ugt i8 %b, 0 2275 %t3 = insertelement <128 x i1> %t1, i1 %t2, i32 %index 2276 %t4 = bitcast <128 x i1> %t3 to i128 2277 ret i128 %t4 2278} 2279