1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,SSE2,X86-SSE2 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,SSE41,X86-SSE41 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,SSE2,X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,SSE41,X64-SSE41 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 10 11; Ensure that the backend no longer emits unnecessary vector insert 12; instructions immediately after SSE scalar fp instructions 13; like addss or mulss. 14 15define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 16; SSE-LABEL: test_add_ss: 17; SSE: # %bb.0: 18; SSE-NEXT: addss %xmm1, %xmm0 19; SSE-NEXT: ret{{[l|q]}} 20; 21; AVX-LABEL: test_add_ss: 22; AVX: # %bb.0: 23; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 24; AVX-NEXT: ret{{[l|q]}} 25 %1 = extractelement <4 x float> %b, i32 0 26 %2 = extractelement <4 x float> %a, i32 0 27 %add = fadd float %2, %1 28 %3 = insertelement <4 x float> %a, float %add, i32 0 29 ret <4 x float> %3 30} 31 32define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 33; SSE-LABEL: test_sub_ss: 34; SSE: # %bb.0: 35; SSE-NEXT: subss %xmm1, %xmm0 36; SSE-NEXT: ret{{[l|q]}} 37; 38; AVX-LABEL: test_sub_ss: 39; AVX: # %bb.0: 40; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 41; AVX-NEXT: ret{{[l|q]}} 42 %1 = extractelement <4 x float> %b, i32 0 43 %2 = extractelement <4 x float> %a, i32 0 44 %sub = fsub float %2, %1 45 %3 = insertelement <4 x float> %a, float %sub, i32 0 46 ret <4 x float> %3 47} 48 49define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 50; SSE-LABEL: test_mul_ss: 51; SSE: # %bb.0: 52; SSE-NEXT: mulss %xmm1, %xmm0 53; SSE-NEXT: ret{{[l|q]}} 54; 55; AVX-LABEL: test_mul_ss: 56; AVX: # %bb.0: 57; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 58; AVX-NEXT: ret{{[l|q]}} 59 %1 = extractelement <4 x float> %b, i32 0 60 %2 = extractelement <4 x float> %a, i32 0 61 %mul = fmul float %2, %1 62 %3 = insertelement <4 x float> %a, float %mul, i32 0 63 ret <4 x float> %3 64} 65 66define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 67; SSE-LABEL: test_div_ss: 68; SSE: # %bb.0: 69; SSE-NEXT: divss %xmm1, %xmm0 70; SSE-NEXT: ret{{[l|q]}} 71; 72; AVX-LABEL: test_div_ss: 73; AVX: # %bb.0: 74; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 75; AVX-NEXT: ret{{[l|q]}} 76 %1 = extractelement <4 x float> %b, i32 0 77 %2 = extractelement <4 x float> %a, i32 0 78 %div = fdiv float %2, %1 79 %3 = insertelement <4 x float> %a, float %div, i32 0 80 ret <4 x float> %3 81} 82 83define <4 x float> @test_sqrt_ss(<4 x float> %a) { 84; SSE-LABEL: test_sqrt_ss: 85; SSE: # %bb.0: 86; SSE-NEXT: sqrtss %xmm0, %xmm0 87; SSE-NEXT: ret{{[l|q]}} 88; 89; AVX-LABEL: test_sqrt_ss: 90; AVX: # %bb.0: 91; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 92; AVX-NEXT: ret{{[l|q]}} 93 %1 = extractelement <4 x float> %a, i32 0 94 %2 = call float @llvm.sqrt.f32(float %1) 95 %3 = insertelement <4 x float> %a, float %2, i32 0 96 ret <4 x float> %3 97} 98declare float @llvm.sqrt.f32(float) 99 100define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 101; SSE-LABEL: test_add_sd: 102; SSE: # %bb.0: 103; SSE-NEXT: addsd %xmm1, %xmm0 104; SSE-NEXT: ret{{[l|q]}} 105; 106; AVX-LABEL: test_add_sd: 107; AVX: # %bb.0: 108; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 109; AVX-NEXT: ret{{[l|q]}} 110 %1 = extractelement <2 x double> %b, i32 0 111 %2 = extractelement <2 x double> %a, i32 0 112 %add = fadd double %2, %1 113 %3 = insertelement <2 x double> %a, double %add, i32 0 114 ret <2 x double> %3 115} 116 117define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 118; SSE-LABEL: test_sub_sd: 119; SSE: # %bb.0: 120; SSE-NEXT: subsd %xmm1, %xmm0 121; SSE-NEXT: ret{{[l|q]}} 122; 123; AVX-LABEL: test_sub_sd: 124; AVX: # %bb.0: 125; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 126; AVX-NEXT: ret{{[l|q]}} 127 %1 = extractelement <2 x double> %b, i32 0 128 %2 = extractelement <2 x double> %a, i32 0 129 %sub = fsub double %2, %1 130 %3 = insertelement <2 x double> %a, double %sub, i32 0 131 ret <2 x double> %3 132} 133 134define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 135; SSE-LABEL: test_mul_sd: 136; SSE: # %bb.0: 137; SSE-NEXT: mulsd %xmm1, %xmm0 138; SSE-NEXT: ret{{[l|q]}} 139; 140; AVX-LABEL: test_mul_sd: 141; AVX: # %bb.0: 142; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 143; AVX-NEXT: ret{{[l|q]}} 144 %1 = extractelement <2 x double> %b, i32 0 145 %2 = extractelement <2 x double> %a, i32 0 146 %mul = fmul double %2, %1 147 %3 = insertelement <2 x double> %a, double %mul, i32 0 148 ret <2 x double> %3 149} 150 151define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 152; SSE-LABEL: test_div_sd: 153; SSE: # %bb.0: 154; SSE-NEXT: divsd %xmm1, %xmm0 155; SSE-NEXT: ret{{[l|q]}} 156; 157; AVX-LABEL: test_div_sd: 158; AVX: # %bb.0: 159; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 160; AVX-NEXT: ret{{[l|q]}} 161 %1 = extractelement <2 x double> %b, i32 0 162 %2 = extractelement <2 x double> %a, i32 0 163 %div = fdiv double %2, %1 164 %3 = insertelement <2 x double> %a, double %div, i32 0 165 ret <2 x double> %3 166} 167 168define <2 x double> @test_sqrt_sd(<2 x double> %a) { 169; SSE-LABEL: test_sqrt_sd: 170; SSE: # %bb.0: 171; SSE-NEXT: sqrtsd %xmm0, %xmm0 172; SSE-NEXT: ret{{[l|q]}} 173; 174; AVX-LABEL: test_sqrt_sd: 175; AVX: # %bb.0: 176; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 177; AVX-NEXT: ret{{[l|q]}} 178 %1 = extractelement <2 x double> %a, i32 0 179 %2 = call double @llvm.sqrt.f64(double %1) 180 %3 = insertelement <2 x double> %a, double %2, i32 0 181 ret <2 x double> %3 182} 183declare double @llvm.sqrt.f64(double) 184 185define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 186; SSE-LABEL: test2_add_ss: 187; SSE: # %bb.0: 188; SSE-NEXT: addss %xmm0, %xmm1 189; SSE-NEXT: movaps %xmm1, %xmm0 190; SSE-NEXT: ret{{[l|q]}} 191; 192; AVX-LABEL: test2_add_ss: 193; AVX: # %bb.0: 194; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 195; AVX-NEXT: ret{{[l|q]}} 196 %1 = extractelement <4 x float> %a, i32 0 197 %2 = extractelement <4 x float> %b, i32 0 198 %add = fadd float %1, %2 199 %3 = insertelement <4 x float> %b, float %add, i32 0 200 ret <4 x float> %3 201} 202 203define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 204; SSE-LABEL: test2_sub_ss: 205; SSE: # %bb.0: 206; SSE-NEXT: subss %xmm0, %xmm1 207; SSE-NEXT: movaps %xmm1, %xmm0 208; SSE-NEXT: ret{{[l|q]}} 209; 210; AVX-LABEL: test2_sub_ss: 211; AVX: # %bb.0: 212; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 213; AVX-NEXT: ret{{[l|q]}} 214 %1 = extractelement <4 x float> %a, i32 0 215 %2 = extractelement <4 x float> %b, i32 0 216 %sub = fsub float %2, %1 217 %3 = insertelement <4 x float> %b, float %sub, i32 0 218 ret <4 x float> %3 219} 220 221define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 222; SSE-LABEL: test2_mul_ss: 223; SSE: # %bb.0: 224; SSE-NEXT: mulss %xmm0, %xmm1 225; SSE-NEXT: movaps %xmm1, %xmm0 226; SSE-NEXT: ret{{[l|q]}} 227; 228; AVX-LABEL: test2_mul_ss: 229; AVX: # %bb.0: 230; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 231; AVX-NEXT: ret{{[l|q]}} 232 %1 = extractelement <4 x float> %a, i32 0 233 %2 = extractelement <4 x float> %b, i32 0 234 %mul = fmul float %1, %2 235 %3 = insertelement <4 x float> %b, float %mul, i32 0 236 ret <4 x float> %3 237} 238 239define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 240; SSE-LABEL: test2_div_ss: 241; SSE: # %bb.0: 242; SSE-NEXT: divss %xmm0, %xmm1 243; SSE-NEXT: movaps %xmm1, %xmm0 244; SSE-NEXT: ret{{[l|q]}} 245; 246; AVX-LABEL: test2_div_ss: 247; AVX: # %bb.0: 248; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 249; AVX-NEXT: ret{{[l|q]}} 250 %1 = extractelement <4 x float> %a, i32 0 251 %2 = extractelement <4 x float> %b, i32 0 252 %div = fdiv float %2, %1 253 %3 = insertelement <4 x float> %b, float %div, i32 0 254 ret <4 x float> %3 255} 256 257define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 258; SSE-LABEL: test2_add_sd: 259; SSE: # %bb.0: 260; SSE-NEXT: addsd %xmm0, %xmm1 261; SSE-NEXT: movapd %xmm1, %xmm0 262; SSE-NEXT: ret{{[l|q]}} 263; 264; AVX-LABEL: test2_add_sd: 265; AVX: # %bb.0: 266; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 267; AVX-NEXT: ret{{[l|q]}} 268 %1 = extractelement <2 x double> %a, i32 0 269 %2 = extractelement <2 x double> %b, i32 0 270 %add = fadd double %1, %2 271 %3 = insertelement <2 x double> %b, double %add, i32 0 272 ret <2 x double> %3 273} 274 275define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 276; SSE-LABEL: test2_sub_sd: 277; SSE: # %bb.0: 278; SSE-NEXT: subsd %xmm0, %xmm1 279; SSE-NEXT: movapd %xmm1, %xmm0 280; SSE-NEXT: ret{{[l|q]}} 281; 282; AVX-LABEL: test2_sub_sd: 283; AVX: # %bb.0: 284; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 285; AVX-NEXT: ret{{[l|q]}} 286 %1 = extractelement <2 x double> %a, i32 0 287 %2 = extractelement <2 x double> %b, i32 0 288 %sub = fsub double %2, %1 289 %3 = insertelement <2 x double> %b, double %sub, i32 0 290 ret <2 x double> %3 291} 292 293define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 294; SSE-LABEL: test2_mul_sd: 295; SSE: # %bb.0: 296; SSE-NEXT: mulsd %xmm0, %xmm1 297; SSE-NEXT: movapd %xmm1, %xmm0 298; SSE-NEXT: ret{{[l|q]}} 299; 300; AVX-LABEL: test2_mul_sd: 301; AVX: # %bb.0: 302; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 303; AVX-NEXT: ret{{[l|q]}} 304 %1 = extractelement <2 x double> %a, i32 0 305 %2 = extractelement <2 x double> %b, i32 0 306 %mul = fmul double %1, %2 307 %3 = insertelement <2 x double> %b, double %mul, i32 0 308 ret <2 x double> %3 309} 310 311define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 312; SSE-LABEL: test2_div_sd: 313; SSE: # %bb.0: 314; SSE-NEXT: divsd %xmm0, %xmm1 315; SSE-NEXT: movapd %xmm1, %xmm0 316; SSE-NEXT: ret{{[l|q]}} 317; 318; AVX-LABEL: test2_div_sd: 319; AVX: # %bb.0: 320; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 321; AVX-NEXT: ret{{[l|q]}} 322 %1 = extractelement <2 x double> %a, i32 0 323 %2 = extractelement <2 x double> %b, i32 0 324 %div = fdiv double %2, %1 325 %3 = insertelement <2 x double> %b, double %div, i32 0 326 ret <2 x double> %3 327} 328 329define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { 330; SSE-LABEL: test_multiple_add_ss: 331; SSE: # %bb.0: 332; SSE-NEXT: addss %xmm0, %xmm1 333; SSE-NEXT: addss %xmm1, %xmm0 334; SSE-NEXT: ret{{[l|q]}} 335; 336; AVX-LABEL: test_multiple_add_ss: 337; AVX: # %bb.0: 338; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1 339; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 340; AVX-NEXT: ret{{[l|q]}} 341 %1 = extractelement <4 x float> %b, i32 0 342 %2 = extractelement <4 x float> %a, i32 0 343 %add = fadd float %2, %1 344 %add2 = fadd float %2, %add 345 %3 = insertelement <4 x float> %a, float %add2, i32 0 346 ret <4 x float> %3 347} 348 349define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { 350; SSE-LABEL: test_multiple_sub_ss: 351; SSE: # %bb.0: 352; SSE-NEXT: movaps %xmm0, %xmm2 353; SSE-NEXT: subss %xmm1, %xmm2 354; SSE-NEXT: subss %xmm2, %xmm0 355; SSE-NEXT: ret{{[l|q]}} 356; 357; AVX-LABEL: test_multiple_sub_ss: 358; AVX: # %bb.0: 359; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1 360; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 361; AVX-NEXT: ret{{[l|q]}} 362 %1 = extractelement <4 x float> %b, i32 0 363 %2 = extractelement <4 x float> %a, i32 0 364 %sub = fsub float %2, %1 365 %sub2 = fsub float %2, %sub 366 %3 = insertelement <4 x float> %a, float %sub2, i32 0 367 ret <4 x float> %3 368} 369 370define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { 371; SSE-LABEL: test_multiple_mul_ss: 372; SSE: # %bb.0: 373; SSE-NEXT: mulss %xmm0, %xmm1 374; SSE-NEXT: mulss %xmm1, %xmm0 375; SSE-NEXT: ret{{[l|q]}} 376; 377; AVX-LABEL: test_multiple_mul_ss: 378; AVX: # %bb.0: 379; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 380; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 381; AVX-NEXT: ret{{[l|q]}} 382 %1 = extractelement <4 x float> %b, i32 0 383 %2 = extractelement <4 x float> %a, i32 0 384 %mul = fmul float %2, %1 385 %mul2 = fmul float %2, %mul 386 %3 = insertelement <4 x float> %a, float %mul2, i32 0 387 ret <4 x float> %3 388} 389 390define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { 391; SSE-LABEL: test_multiple_div_ss: 392; SSE: # %bb.0: 393; SSE-NEXT: movaps %xmm0, %xmm2 394; SSE-NEXT: divss %xmm1, %xmm2 395; SSE-NEXT: divss %xmm2, %xmm0 396; SSE-NEXT: ret{{[l|q]}} 397; 398; AVX-LABEL: test_multiple_div_ss: 399; AVX: # %bb.0: 400; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1 401; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 402; AVX-NEXT: ret{{[l|q]}} 403 %1 = extractelement <4 x float> %b, i32 0 404 %2 = extractelement <4 x float> %a, i32 0 405 %div = fdiv float %2, %1 406 %div2 = fdiv float %2, %div 407 %3 = insertelement <4 x float> %a, float %div2, i32 0 408 ret <4 x float> %3 409} 410 411; With SSE4.1 or greater, the shuffles in the following tests may 412; be lowered to X86Blendi nodes. 413 414define <4 x float> @blend_add_ss(<4 x float> %a, float %b) { 415; X86-SSE-LABEL: blend_add_ss: 416; X86-SSE: # %bb.0: 417; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 418; X86-SSE-NEXT: addss %xmm1, %xmm0 419; X86-SSE-NEXT: retl 420; 421; X86-AVX-LABEL: blend_add_ss: 422; X86-AVX: # %bb.0: 423; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 424; X86-AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 425; X86-AVX-NEXT: retl 426; 427; X64-SSE-LABEL: blend_add_ss: 428; X64-SSE: # %bb.0: 429; X64-SSE-NEXT: addss %xmm1, %xmm0 430; X64-SSE-NEXT: retq 431; 432; X64-AVX-LABEL: blend_add_ss: 433; X64-AVX: # %bb.0: 434; X64-AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 435; X64-AVX-NEXT: retq 436 437 %ext = extractelement <4 x float> %a, i32 0 438 %op = fadd float %b, %ext 439 %ins = insertelement <4 x float> undef, float %op, i32 0 440 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 441 ret <4 x float> %shuf 442} 443 444define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) { 445; X86-SSE-LABEL: blend_sub_ss: 446; X86-SSE: # %bb.0: 447; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 448; X86-SSE-NEXT: subss %xmm1, %xmm0 449; X86-SSE-NEXT: retl 450; 451; X86-AVX-LABEL: blend_sub_ss: 452; X86-AVX: # %bb.0: 453; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 454; X86-AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 455; X86-AVX-NEXT: retl 456; 457; X64-SSE-LABEL: blend_sub_ss: 458; X64-SSE: # %bb.0: 459; X64-SSE-NEXT: subss %xmm1, %xmm0 460; X64-SSE-NEXT: retq 461; 462; X64-AVX-LABEL: blend_sub_ss: 463; X64-AVX: # %bb.0: 464; X64-AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 465; X64-AVX-NEXT: retq 466 467 %ext = extractelement <4 x float> %a, i32 0 468 %op = fsub float %ext, %b 469 %ins = insertelement <4 x float> undef, float %op, i32 0 470 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 471 ret <4 x float> %shuf 472} 473 474define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) { 475; X86-SSE-LABEL: blend_mul_ss: 476; X86-SSE: # %bb.0: 477; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 478; X86-SSE-NEXT: mulss %xmm1, %xmm0 479; X86-SSE-NEXT: retl 480; 481; X86-AVX-LABEL: blend_mul_ss: 482; X86-AVX: # %bb.0: 483; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 484; X86-AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 485; X86-AVX-NEXT: retl 486; 487; X64-SSE-LABEL: blend_mul_ss: 488; X64-SSE: # %bb.0: 489; X64-SSE-NEXT: mulss %xmm1, %xmm0 490; X64-SSE-NEXT: retq 491; 492; X64-AVX-LABEL: blend_mul_ss: 493; X64-AVX: # %bb.0: 494; X64-AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 495; X64-AVX-NEXT: retq 496 497 %ext = extractelement <4 x float> %a, i32 0 498 %op = fmul float %b, %ext 499 %ins = insertelement <4 x float> undef, float %op, i32 0 500 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 501 ret <4 x float> %shuf 502} 503 504define <4 x float> @blend_div_ss(<4 x float> %a, float %b) { 505; X86-SSE-LABEL: blend_div_ss: 506; X86-SSE: # %bb.0: 507; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 508; X86-SSE-NEXT: divss %xmm1, %xmm0 509; X86-SSE-NEXT: retl 510; 511; X86-AVX-LABEL: blend_div_ss: 512; X86-AVX: # %bb.0: 513; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 514; X86-AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 515; X86-AVX-NEXT: retl 516; 517; X64-SSE-LABEL: blend_div_ss: 518; X64-SSE: # %bb.0: 519; X64-SSE-NEXT: divss %xmm1, %xmm0 520; X64-SSE-NEXT: retq 521; 522; X64-AVX-LABEL: blend_div_ss: 523; X64-AVX: # %bb.0: 524; X64-AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 525; X64-AVX-NEXT: retq 526 527 %ext = extractelement <4 x float> %a, i32 0 528 %op = fdiv float %ext, %b 529 %ins = insertelement <4 x float> undef, float %op, i32 0 530 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 531 ret <4 x float> %shuf 532} 533 534define <2 x double> @blend_add_sd(<2 x double> %a, double %b) { 535; X86-SSE-LABEL: blend_add_sd: 536; X86-SSE: # %bb.0: 537; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 538; X86-SSE-NEXT: addsd %xmm1, %xmm0 539; X86-SSE-NEXT: retl 540; 541; X86-AVX-LABEL: blend_add_sd: 542; X86-AVX: # %bb.0: 543; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 544; X86-AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 545; X86-AVX-NEXT: retl 546; 547; X64-SSE-LABEL: blend_add_sd: 548; X64-SSE: # %bb.0: 549; X64-SSE-NEXT: addsd %xmm1, %xmm0 550; X64-SSE-NEXT: retq 551; 552; X64-AVX-LABEL: blend_add_sd: 553; X64-AVX: # %bb.0: 554; X64-AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 555; X64-AVX-NEXT: retq 556 557 %ext = extractelement <2 x double> %a, i32 0 558 %op = fadd double %b, %ext 559 %ins = insertelement <2 x double> undef, double %op, i32 0 560 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 561 ret <2 x double> %shuf 562} 563 564define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) { 565; X86-SSE-LABEL: blend_sub_sd: 566; X86-SSE: # %bb.0: 567; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 568; X86-SSE-NEXT: subsd %xmm1, %xmm0 569; X86-SSE-NEXT: retl 570; 571; X86-AVX-LABEL: blend_sub_sd: 572; X86-AVX: # %bb.0: 573; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 574; X86-AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 575; X86-AVX-NEXT: retl 576; 577; X64-SSE-LABEL: blend_sub_sd: 578; X64-SSE: # %bb.0: 579; X64-SSE-NEXT: subsd %xmm1, %xmm0 580; X64-SSE-NEXT: retq 581; 582; X64-AVX-LABEL: blend_sub_sd: 583; X64-AVX: # %bb.0: 584; X64-AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 585; X64-AVX-NEXT: retq 586 587 %ext = extractelement <2 x double> %a, i32 0 588 %op = fsub double %ext, %b 589 %ins = insertelement <2 x double> undef, double %op, i32 0 590 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 591 ret <2 x double> %shuf 592} 593 594define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) { 595; X86-SSE-LABEL: blend_mul_sd: 596; X86-SSE: # %bb.0: 597; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 598; X86-SSE-NEXT: mulsd %xmm1, %xmm0 599; X86-SSE-NEXT: retl 600; 601; X86-AVX-LABEL: blend_mul_sd: 602; X86-AVX: # %bb.0: 603; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 604; X86-AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 605; X86-AVX-NEXT: retl 606; 607; X64-SSE-LABEL: blend_mul_sd: 608; X64-SSE: # %bb.0: 609; X64-SSE-NEXT: mulsd %xmm1, %xmm0 610; X64-SSE-NEXT: retq 611; 612; X64-AVX-LABEL: blend_mul_sd: 613; X64-AVX: # %bb.0: 614; X64-AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 615; X64-AVX-NEXT: retq 616 617 %ext = extractelement <2 x double> %a, i32 0 618 %op = fmul double %b, %ext 619 %ins = insertelement <2 x double> undef, double %op, i32 0 620 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 621 ret <2 x double> %shuf 622} 623 624define <2 x double> @blend_div_sd(<2 x double> %a, double %b) { 625; X86-SSE-LABEL: blend_div_sd: 626; X86-SSE: # %bb.0: 627; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 628; X86-SSE-NEXT: divsd %xmm1, %xmm0 629; X86-SSE-NEXT: retl 630; 631; X86-AVX-LABEL: blend_div_sd: 632; X86-AVX: # %bb.0: 633; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 634; X86-AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 635; X86-AVX-NEXT: retl 636; 637; X64-SSE-LABEL: blend_div_sd: 638; X64-SSE: # %bb.0: 639; X64-SSE-NEXT: divsd %xmm1, %xmm0 640; X64-SSE-NEXT: retq 641; 642; X64-AVX-LABEL: blend_div_sd: 643; X64-AVX: # %bb.0: 644; X64-AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 645; X64-AVX-NEXT: retq 646 647 %ext = extractelement <2 x double> %a, i32 0 648 %op = fdiv double %ext, %b 649 %ins = insertelement <2 x double> undef, double %op, i32 0 650 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 651 ret <2 x double> %shuf 652} 653 654; Ensure that the backend selects SSE/AVX scalar fp instructions 655; from a packed fp instruction plus a vector insert. 656 657define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { 658; SSE-LABEL: insert_test_add_ss: 659; SSE: # %bb.0: 660; SSE-NEXT: addss %xmm1, %xmm0 661; SSE-NEXT: ret{{[l|q]}} 662; 663; AVX-LABEL: insert_test_add_ss: 664; AVX: # %bb.0: 665; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 666; AVX-NEXT: ret{{[l|q]}} 667 %1 = fadd <4 x float> %a, %b 668 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 669 ret <4 x float> %2 670} 671 672define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) { 673; SSE-LABEL: insert_test_sub_ss: 674; SSE: # %bb.0: 675; SSE-NEXT: subss %xmm1, %xmm0 676; SSE-NEXT: ret{{[l|q]}} 677; 678; AVX-LABEL: insert_test_sub_ss: 679; AVX: # %bb.0: 680; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 681; AVX-NEXT: ret{{[l|q]}} 682 %1 = fsub <4 x float> %a, %b 683 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 684 ret <4 x float> %2 685} 686 687define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) { 688; SSE-LABEL: insert_test_mul_ss: 689; SSE: # %bb.0: 690; SSE-NEXT: mulss %xmm1, %xmm0 691; SSE-NEXT: ret{{[l|q]}} 692; 693; AVX-LABEL: insert_test_mul_ss: 694; AVX: # %bb.0: 695; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 696; AVX-NEXT: ret{{[l|q]}} 697 %1 = fmul <4 x float> %a, %b 698 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 699 ret <4 x float> %2 700} 701 702define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) { 703; SSE-LABEL: insert_test_div_ss: 704; SSE: # %bb.0: 705; SSE-NEXT: divss %xmm1, %xmm0 706; SSE-NEXT: ret{{[l|q]}} 707; 708; AVX-LABEL: insert_test_div_ss: 709; AVX: # %bb.0: 710; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 711; AVX-NEXT: ret{{[l|q]}} 712 %1 = fdiv <4 x float> %a, %b 713 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 714 ret <4 x float> %2 715} 716 717define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) { 718; SSE-LABEL: insert_test_add_sd: 719; SSE: # %bb.0: 720; SSE-NEXT: addsd %xmm1, %xmm0 721; SSE-NEXT: ret{{[l|q]}} 722; 723; AVX-LABEL: insert_test_add_sd: 724; AVX: # %bb.0: 725; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 726; AVX-NEXT: ret{{[l|q]}} 727 %1 = fadd <2 x double> %a, %b 728 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 729 ret <2 x double> %2 730} 731 732define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) { 733; SSE-LABEL: insert_test_sub_sd: 734; SSE: # %bb.0: 735; SSE-NEXT: subsd %xmm1, %xmm0 736; SSE-NEXT: ret{{[l|q]}} 737; 738; AVX-LABEL: insert_test_sub_sd: 739; AVX: # %bb.0: 740; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 741; AVX-NEXT: ret{{[l|q]}} 742 %1 = fsub <2 x double> %a, %b 743 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 744 ret <2 x double> %2 745} 746 747define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) { 748; SSE-LABEL: insert_test_mul_sd: 749; SSE: # %bb.0: 750; SSE-NEXT: mulsd %xmm1, %xmm0 751; SSE-NEXT: ret{{[l|q]}} 752; 753; AVX-LABEL: insert_test_mul_sd: 754; AVX: # %bb.0: 755; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 756; AVX-NEXT: ret{{[l|q]}} 757 %1 = fmul <2 x double> %a, %b 758 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 759 ret <2 x double> %2 760} 761 762define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) { 763; SSE-LABEL: insert_test_div_sd: 764; SSE: # %bb.0: 765; SSE-NEXT: divsd %xmm1, %xmm0 766; SSE-NEXT: ret{{[l|q]}} 767; 768; AVX-LABEL: insert_test_div_sd: 769; AVX: # %bb.0: 770; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 771; AVX-NEXT: ret{{[l|q]}} 772 %1 = fdiv <2 x double> %a, %b 773 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 774 ret <2 x double> %2 775} 776 777define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) { 778; SSE-LABEL: insert_test2_add_ss: 779; SSE: # %bb.0: 780; SSE-NEXT: addss %xmm0, %xmm1 781; SSE-NEXT: movaps %xmm1, %xmm0 782; SSE-NEXT: ret{{[l|q]}} 783; 784; AVX-LABEL: insert_test2_add_ss: 785; AVX: # %bb.0: 786; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 787; AVX-NEXT: ret{{[l|q]}} 788 %1 = fadd <4 x float> %b, %a 789 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 790 ret <4 x float> %2 791} 792 793define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) { 794; SSE-LABEL: insert_test2_sub_ss: 795; SSE: # %bb.0: 796; SSE-NEXT: subss %xmm0, %xmm1 797; SSE-NEXT: movaps %xmm1, %xmm0 798; SSE-NEXT: ret{{[l|q]}} 799; 800; AVX-LABEL: insert_test2_sub_ss: 801; AVX: # %bb.0: 802; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 803; AVX-NEXT: ret{{[l|q]}} 804 %1 = fsub <4 x float> %b, %a 805 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 806 ret <4 x float> %2 807} 808 809define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) { 810; SSE-LABEL: insert_test2_mul_ss: 811; SSE: # %bb.0: 812; SSE-NEXT: mulss %xmm0, %xmm1 813; SSE-NEXT: movaps %xmm1, %xmm0 814; SSE-NEXT: ret{{[l|q]}} 815; 816; AVX-LABEL: insert_test2_mul_ss: 817; AVX: # %bb.0: 818; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 819; AVX-NEXT: ret{{[l|q]}} 820 %1 = fmul <4 x float> %b, %a 821 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 822 ret <4 x float> %2 823} 824 825define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) { 826; SSE-LABEL: insert_test2_div_ss: 827; SSE: # %bb.0: 828; SSE-NEXT: divss %xmm0, %xmm1 829; SSE-NEXT: movaps %xmm1, %xmm0 830; SSE-NEXT: ret{{[l|q]}} 831; 832; AVX-LABEL: insert_test2_div_ss: 833; AVX: # %bb.0: 834; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 835; AVX-NEXT: ret{{[l|q]}} 836 %1 = fdiv <4 x float> %b, %a 837 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 838 ret <4 x float> %2 839} 840 841define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) { 842; SSE-LABEL: insert_test2_add_sd: 843; SSE: # %bb.0: 844; SSE-NEXT: addsd %xmm0, %xmm1 845; SSE-NEXT: movapd %xmm1, %xmm0 846; SSE-NEXT: ret{{[l|q]}} 847; 848; AVX-LABEL: insert_test2_add_sd: 849; AVX: # %bb.0: 850; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 851; AVX-NEXT: ret{{[l|q]}} 852 %1 = fadd <2 x double> %b, %a 853 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 854 ret <2 x double> %2 855} 856 857define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) { 858; SSE-LABEL: insert_test2_sub_sd: 859; SSE: # %bb.0: 860; SSE-NEXT: subsd %xmm0, %xmm1 861; SSE-NEXT: movapd %xmm1, %xmm0 862; SSE-NEXT: ret{{[l|q]}} 863; 864; AVX-LABEL: insert_test2_sub_sd: 865; AVX: # %bb.0: 866; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 867; AVX-NEXT: ret{{[l|q]}} 868 %1 = fsub <2 x double> %b, %a 869 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 870 ret <2 x double> %2 871} 872 873define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) { 874; SSE-LABEL: insert_test2_mul_sd: 875; SSE: # %bb.0: 876; SSE-NEXT: mulsd %xmm0, %xmm1 877; SSE-NEXT: movapd %xmm1, %xmm0 878; SSE-NEXT: ret{{[l|q]}} 879; 880; AVX-LABEL: insert_test2_mul_sd: 881; AVX: # %bb.0: 882; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 883; AVX-NEXT: ret{{[l|q]}} 884 %1 = fmul <2 x double> %b, %a 885 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 886 ret <2 x double> %2 887} 888 889define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) { 890; SSE-LABEL: insert_test2_div_sd: 891; SSE: # %bb.0: 892; SSE-NEXT: divsd %xmm0, %xmm1 893; SSE-NEXT: movapd %xmm1, %xmm0 894; SSE-NEXT: ret{{[l|q]}} 895; 896; AVX-LABEL: insert_test2_div_sd: 897; AVX: # %bb.0: 898; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 899; AVX-NEXT: ret{{[l|q]}} 900 %1 = fdiv <2 x double> %b, %a 901 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 902 ret <2 x double> %2 903} 904 905define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) { 906; SSE-LABEL: insert_test3_add_ss: 907; SSE: # %bb.0: 908; SSE-NEXT: addss %xmm1, %xmm0 909; SSE-NEXT: ret{{[l|q]}} 910; 911; AVX-LABEL: insert_test3_add_ss: 912; AVX: # %bb.0: 913; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 914; AVX-NEXT: ret{{[l|q]}} 915 %1 = fadd <4 x float> %a, %b 916 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 917 ret <4 x float> %2 918} 919 920define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) { 921; SSE-LABEL: insert_test3_sub_ss: 922; SSE: # %bb.0: 923; SSE-NEXT: subss %xmm1, %xmm0 924; SSE-NEXT: ret{{[l|q]}} 925; 926; AVX-LABEL: insert_test3_sub_ss: 927; AVX: # %bb.0: 928; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 929; AVX-NEXT: ret{{[l|q]}} 930 %1 = fsub <4 x float> %a, %b 931 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 932 ret <4 x float> %2 933} 934 935define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) { 936; SSE-LABEL: insert_test3_mul_ss: 937; SSE: # %bb.0: 938; SSE-NEXT: mulss %xmm1, %xmm0 939; SSE-NEXT: ret{{[l|q]}} 940; 941; AVX-LABEL: insert_test3_mul_ss: 942; AVX: # %bb.0: 943; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 944; AVX-NEXT: ret{{[l|q]}} 945 %1 = fmul <4 x float> %a, %b 946 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 947 ret <4 x float> %2 948} 949 950define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) { 951; SSE-LABEL: insert_test3_div_ss: 952; SSE: # %bb.0: 953; SSE-NEXT: divss %xmm1, %xmm0 954; SSE-NEXT: ret{{[l|q]}} 955; 956; AVX-LABEL: insert_test3_div_ss: 957; AVX: # %bb.0: 958; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 959; AVX-NEXT: ret{{[l|q]}} 960 %1 = fdiv <4 x float> %a, %b 961 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 962 ret <4 x float> %2 963} 964 965define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) { 966; SSE-LABEL: insert_test3_add_sd: 967; SSE: # %bb.0: 968; SSE-NEXT: addsd %xmm1, %xmm0 969; SSE-NEXT: ret{{[l|q]}} 970; 971; AVX-LABEL: insert_test3_add_sd: 972; AVX: # %bb.0: 973; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 974; AVX-NEXT: ret{{[l|q]}} 975 %1 = fadd <2 x double> %a, %b 976 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 977 ret <2 x double> %2 978} 979 980define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) { 981; SSE-LABEL: insert_test3_sub_sd: 982; SSE: # %bb.0: 983; SSE-NEXT: subsd %xmm1, %xmm0 984; SSE-NEXT: ret{{[l|q]}} 985; 986; AVX-LABEL: insert_test3_sub_sd: 987; AVX: # %bb.0: 988; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 989; AVX-NEXT: ret{{[l|q]}} 990 %1 = fsub <2 x double> %a, %b 991 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 992 ret <2 x double> %2 993} 994 995define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) { 996; SSE-LABEL: insert_test3_mul_sd: 997; SSE: # %bb.0: 998; SSE-NEXT: mulsd %xmm1, %xmm0 999; SSE-NEXT: ret{{[l|q]}} 1000; 1001; AVX-LABEL: insert_test3_mul_sd: 1002; AVX: # %bb.0: 1003; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1004; AVX-NEXT: ret{{[l|q]}} 1005 %1 = fmul <2 x double> %a, %b 1006 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 1007 ret <2 x double> %2 1008} 1009 1010define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) { 1011; SSE-LABEL: insert_test3_div_sd: 1012; SSE: # %bb.0: 1013; SSE-NEXT: divsd %xmm1, %xmm0 1014; SSE-NEXT: ret{{[l|q]}} 1015; 1016; AVX-LABEL: insert_test3_div_sd: 1017; AVX: # %bb.0: 1018; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 1019; AVX-NEXT: ret{{[l|q]}} 1020 %1 = fdiv <2 x double> %a, %b 1021 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 1022 ret <2 x double> %2 1023} 1024 1025define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) { 1026; SSE-LABEL: insert_test4_add_ss: 1027; SSE: # %bb.0: 1028; SSE-NEXT: addss %xmm0, %xmm1 1029; SSE-NEXT: movaps %xmm1, %xmm0 1030; SSE-NEXT: ret{{[l|q]}} 1031; 1032; AVX-LABEL: insert_test4_add_ss: 1033; AVX: # %bb.0: 1034; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 1035; AVX-NEXT: ret{{[l|q]}} 1036 %1 = fadd <4 x float> %b, %a 1037 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1038 ret <4 x float> %2 1039} 1040 1041define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) { 1042; SSE-LABEL: insert_test4_sub_ss: 1043; SSE: # %bb.0: 1044; SSE-NEXT: subss %xmm0, %xmm1 1045; SSE-NEXT: movaps %xmm1, %xmm0 1046; SSE-NEXT: ret{{[l|q]}} 1047; 1048; AVX-LABEL: insert_test4_sub_ss: 1049; AVX: # %bb.0: 1050; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 1051; AVX-NEXT: ret{{[l|q]}} 1052 %1 = fsub <4 x float> %b, %a 1053 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1054 ret <4 x float> %2 1055} 1056 1057define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) { 1058; SSE-LABEL: insert_test4_mul_ss: 1059; SSE: # %bb.0: 1060; SSE-NEXT: mulss %xmm0, %xmm1 1061; SSE-NEXT: movaps %xmm1, %xmm0 1062; SSE-NEXT: ret{{[l|q]}} 1063; 1064; AVX-LABEL: insert_test4_mul_ss: 1065; AVX: # %bb.0: 1066; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 1067; AVX-NEXT: ret{{[l|q]}} 1068 %1 = fmul <4 x float> %b, %a 1069 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1070 ret <4 x float> %2 1071} 1072 1073define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) { 1074; SSE-LABEL: insert_test4_div_ss: 1075; SSE: # %bb.0: 1076; SSE-NEXT: divss %xmm0, %xmm1 1077; SSE-NEXT: movaps %xmm1, %xmm0 1078; SSE-NEXT: ret{{[l|q]}} 1079; 1080; AVX-LABEL: insert_test4_div_ss: 1081; AVX: # %bb.0: 1082; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 1083; AVX-NEXT: ret{{[l|q]}} 1084 %1 = fdiv <4 x float> %b, %a 1085 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1086 ret <4 x float> %2 1087} 1088 1089define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) { 1090; SSE-LABEL: insert_test4_add_sd: 1091; SSE: # %bb.0: 1092; SSE-NEXT: addsd %xmm0, %xmm1 1093; SSE-NEXT: movapd %xmm1, %xmm0 1094; SSE-NEXT: ret{{[l|q]}} 1095; 1096; AVX-LABEL: insert_test4_add_sd: 1097; AVX: # %bb.0: 1098; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 1099; AVX-NEXT: ret{{[l|q]}} 1100 %1 = fadd <2 x double> %b, %a 1101 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1102 ret <2 x double> %2 1103} 1104 1105define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) { 1106; SSE-LABEL: insert_test4_sub_sd: 1107; SSE: # %bb.0: 1108; SSE-NEXT: subsd %xmm0, %xmm1 1109; SSE-NEXT: movapd %xmm1, %xmm0 1110; SSE-NEXT: ret{{[l|q]}} 1111; 1112; AVX-LABEL: insert_test4_sub_sd: 1113; AVX: # %bb.0: 1114; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 1115; AVX-NEXT: ret{{[l|q]}} 1116 %1 = fsub <2 x double> %b, %a 1117 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1118 ret <2 x double> %2 1119} 1120 1121define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) { 1122; SSE-LABEL: insert_test4_mul_sd: 1123; SSE: # %bb.0: 1124; SSE-NEXT: mulsd %xmm0, %xmm1 1125; SSE-NEXT: movapd %xmm1, %xmm0 1126; SSE-NEXT: ret{{[l|q]}} 1127; 1128; AVX-LABEL: insert_test4_mul_sd: 1129; AVX: # %bb.0: 1130; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1131; AVX-NEXT: ret{{[l|q]}} 1132 %1 = fmul <2 x double> %b, %a 1133 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1134 ret <2 x double> %2 1135} 1136 1137define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) { 1138; SSE-LABEL: insert_test4_div_sd: 1139; SSE: # %bb.0: 1140; SSE-NEXT: divsd %xmm0, %xmm1 1141; SSE-NEXT: movapd %xmm1, %xmm0 1142; SSE-NEXT: ret{{[l|q]}} 1143; 1144; AVX-LABEL: insert_test4_div_sd: 1145; AVX: # %bb.0: 1146; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 1147; AVX-NEXT: ret{{[l|q]}} 1148 %1 = fdiv <2 x double> %b, %a 1149 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1150 ret <2 x double> %2 1151} 1152 1153define <4 x float> @insert_test5_add_ss(<4 x float> %a, <4 x float> %b) { 1154; SSE-LABEL: insert_test5_add_ss: 1155; SSE: # %bb.0: 1156; SSE-NEXT: addss %xmm1, %xmm0 1157; SSE-NEXT: ret{{[l|q]}} 1158; 1159; AVX-LABEL: insert_test5_add_ss: 1160; AVX: # %bb.0: 1161; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 1162; AVX-NEXT: ret{{[l|q]}} 1163 %1 = fadd <4 x float> %b, %a 1164 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1165 ret <4 x float> %2 1166} 1167 1168define <4 x float> @insert_test5_sub_ss(<4 x float> %a, <4 x float> %b) { 1169; SSE2-LABEL: insert_test5_sub_ss: 1170; SSE2: # %bb.0: 1171; SSE2-NEXT: subps %xmm0, %xmm1 1172; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1173; SSE2-NEXT: ret{{[l|q]}} 1174; 1175; SSE41-LABEL: insert_test5_sub_ss: 1176; SSE41: # %bb.0: 1177; SSE41-NEXT: subps %xmm0, %xmm1 1178; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1179; SSE41-NEXT: ret{{[l|q]}} 1180; 1181; AVX-LABEL: insert_test5_sub_ss: 1182; AVX: # %bb.0: 1183; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm1 1184; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1185; AVX-NEXT: ret{{[l|q]}} 1186 %1 = fsub <4 x float> %b, %a 1187 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1188 ret <4 x float> %2 1189} 1190 1191define <4 x float> @insert_test5_mul_ss(<4 x float> %a, <4 x float> %b) { 1192; SSE-LABEL: insert_test5_mul_ss: 1193; SSE: # %bb.0: 1194; SSE-NEXT: mulss %xmm1, %xmm0 1195; SSE-NEXT: ret{{[l|q]}} 1196; 1197; AVX-LABEL: insert_test5_mul_ss: 1198; AVX: # %bb.0: 1199; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 1200; AVX-NEXT: ret{{[l|q]}} 1201 %1 = fmul <4 x float> %b, %a 1202 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1203 ret <4 x float> %2 1204} 1205 1206define <4 x float> @insert_test5_div_ss(<4 x float> %a, <4 x float> %b) { 1207; SSE2-LABEL: insert_test5_div_ss: 1208; SSE2: # %bb.0: 1209; SSE2-NEXT: divps %xmm0, %xmm1 1210; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1211; SSE2-NEXT: ret{{[l|q]}} 1212; 1213; SSE41-LABEL: insert_test5_div_ss: 1214; SSE41: # %bb.0: 1215; SSE41-NEXT: divps %xmm0, %xmm1 1216; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1217; SSE41-NEXT: ret{{[l|q]}} 1218; 1219; AVX-LABEL: insert_test5_div_ss: 1220; AVX: # %bb.0: 1221; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm1 1222; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1223; AVX-NEXT: ret{{[l|q]}} 1224 %1 = fdiv <4 x float> %b, %a 1225 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1226 ret <4 x float> %2 1227} 1228 1229define <2 x double> @insert_test5_add_sd(<2 x double> %a, <2 x double> %b) { 1230; SSE-LABEL: insert_test5_add_sd: 1231; SSE: # %bb.0: 1232; SSE-NEXT: addsd %xmm1, %xmm0 1233; SSE-NEXT: ret{{[l|q]}} 1234; 1235; AVX-LABEL: insert_test5_add_sd: 1236; AVX: # %bb.0: 1237; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 1238; AVX-NEXT: ret{{[l|q]}} 1239 %1 = fadd <2 x double> %b, %a 1240 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1241 ret <2 x double> %2 1242} 1243 1244define <2 x double> @insert_test5_sub_sd(<2 x double> %a, <2 x double> %b) { 1245; SSE2-LABEL: insert_test5_sub_sd: 1246; SSE2: # %bb.0: 1247; SSE2-NEXT: subpd %xmm0, %xmm1 1248; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1249; SSE2-NEXT: ret{{[l|q]}} 1250; 1251; SSE41-LABEL: insert_test5_sub_sd: 1252; SSE41: # %bb.0: 1253; SSE41-NEXT: subpd %xmm0, %xmm1 1254; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1255; SSE41-NEXT: ret{{[l|q]}} 1256; 1257; AVX-LABEL: insert_test5_sub_sd: 1258; AVX: # %bb.0: 1259; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm1 1260; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1261; AVX-NEXT: ret{{[l|q]}} 1262 %1 = fsub <2 x double> %b, %a 1263 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1264 ret <2 x double> %2 1265} 1266 1267define <2 x double> @insert_test5_mul_sd(<2 x double> %a, <2 x double> %b) { 1268; SSE-LABEL: insert_test5_mul_sd: 1269; SSE: # %bb.0: 1270; SSE-NEXT: mulsd %xmm1, %xmm0 1271; SSE-NEXT: ret{{[l|q]}} 1272; 1273; AVX-LABEL: insert_test5_mul_sd: 1274; AVX: # %bb.0: 1275; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1276; AVX-NEXT: ret{{[l|q]}} 1277 %1 = fmul <2 x double> %b, %a 1278 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1279 ret <2 x double> %2 1280} 1281 1282define <2 x double> @insert_test5_div_sd(<2 x double> %a, <2 x double> %b) { 1283; SSE2-LABEL: insert_test5_div_sd: 1284; SSE2: # %bb.0: 1285; SSE2-NEXT: divpd %xmm0, %xmm1 1286; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1287; SSE2-NEXT: ret{{[l|q]}} 1288; 1289; SSE41-LABEL: insert_test5_div_sd: 1290; SSE41: # %bb.0: 1291; SSE41-NEXT: divpd %xmm0, %xmm1 1292; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1293; SSE41-NEXT: ret{{[l|q]}} 1294; 1295; AVX-LABEL: insert_test5_div_sd: 1296; AVX: # %bb.0: 1297; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm1 1298; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1299; AVX-NEXT: ret{{[l|q]}} 1300 %1 = fdiv <2 x double> %b, %a 1301 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1302 ret <2 x double> %2 1303} 1304 1305define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1306; X86-SSE2-LABEL: add_ss_mask: 1307; X86-SSE2: # %bb.0: 1308; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp) 1309; X86-SSE2-NEXT: jne .LBB70_1 1310; X86-SSE2-NEXT: # %bb.2: 1311; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1312; X86-SSE2-NEXT: retl 1313; X86-SSE2-NEXT: .LBB70_1: 1314; X86-SSE2-NEXT: addss %xmm0, %xmm1 1315; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1316; X86-SSE2-NEXT: retl 1317; 1318; X86-SSE41-LABEL: add_ss_mask: 1319; X86-SSE41: # %bb.0: 1320; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp) 1321; X86-SSE41-NEXT: jne .LBB70_1 1322; X86-SSE41-NEXT: # %bb.2: 1323; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1324; X86-SSE41-NEXT: retl 1325; X86-SSE41-NEXT: .LBB70_1: 1326; X86-SSE41-NEXT: addss %xmm0, %xmm1 1327; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1328; X86-SSE41-NEXT: retl 1329; 1330; X86-AVX1-LABEL: add_ss_mask: 1331; X86-AVX1: # %bb.0: 1332; X86-AVX1-NEXT: testb $1, {{[0-9]+}}(%esp) 1333; X86-AVX1-NEXT: je .LBB70_2 1334; X86-AVX1-NEXT: # %bb.1: 1335; X86-AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2 1336; X86-AVX1-NEXT: .LBB70_2: 1337; X86-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1338; X86-AVX1-NEXT: retl 1339; 1340; X86-AVX512-LABEL: add_ss_mask: 1341; X86-AVX512: # %bb.0: 1342; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al 1343; X86-AVX512-NEXT: kmovw %eax, %k1 1344; X86-AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 1345; X86-AVX512-NEXT: vmovaps %xmm2, %xmm0 1346; X86-AVX512-NEXT: retl 1347; 1348; X64-SSE2-LABEL: add_ss_mask: 1349; X64-SSE2: # %bb.0: 1350; X64-SSE2-NEXT: testb $1, %dil 1351; X64-SSE2-NEXT: jne .LBB70_1 1352; X64-SSE2-NEXT: # %bb.2: 1353; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1354; X64-SSE2-NEXT: retq 1355; X64-SSE2-NEXT: .LBB70_1: 1356; X64-SSE2-NEXT: addss %xmm0, %xmm1 1357; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1358; X64-SSE2-NEXT: retq 1359; 1360; X64-SSE41-LABEL: add_ss_mask: 1361; X64-SSE41: # %bb.0: 1362; X64-SSE41-NEXT: testb $1, %dil 1363; X64-SSE41-NEXT: jne .LBB70_1 1364; X64-SSE41-NEXT: # %bb.2: 1365; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1366; X64-SSE41-NEXT: retq 1367; X64-SSE41-NEXT: .LBB70_1: 1368; X64-SSE41-NEXT: addss %xmm0, %xmm1 1369; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1370; X64-SSE41-NEXT: retq 1371; 1372; X64-AVX1-LABEL: add_ss_mask: 1373; X64-AVX1: # %bb.0: 1374; X64-AVX1-NEXT: testb $1, %dil 1375; X64-AVX1-NEXT: je .LBB70_2 1376; X64-AVX1-NEXT: # %bb.1: 1377; X64-AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2 1378; X64-AVX1-NEXT: .LBB70_2: 1379; X64-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1380; X64-AVX1-NEXT: retq 1381; 1382; X64-AVX512-LABEL: add_ss_mask: 1383; X64-AVX512: # %bb.0: 1384; X64-AVX512-NEXT: kmovw %edi, %k1 1385; X64-AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 1386; X64-AVX512-NEXT: vmovaps %xmm2, %xmm0 1387; X64-AVX512-NEXT: retq 1388 %1 = extractelement <4 x float> %a, i64 0 1389 %2 = extractelement <4 x float> %b, i64 0 1390 %3 = fadd float %1, %2 1391 %4 = extractelement <4 x float> %c, i32 0 1392 %5 = bitcast i8 %mask to <8 x i1> 1393 %6 = extractelement <8 x i1> %5, i64 0 1394 %7 = select i1 %6, float %3, float %4 1395 %8 = insertelement <4 x float> %a, float %7, i64 0 1396 ret <4 x float> %8 1397} 1398 1399define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1400; X86-SSE2-LABEL: add_sd_mask: 1401; X86-SSE2: # %bb.0: 1402; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp) 1403; X86-SSE2-NEXT: jne .LBB71_1 1404; X86-SSE2-NEXT: # %bb.2: 1405; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1406; X86-SSE2-NEXT: retl 1407; X86-SSE2-NEXT: .LBB71_1: 1408; X86-SSE2-NEXT: addsd %xmm0, %xmm1 1409; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1410; X86-SSE2-NEXT: retl 1411; 1412; X86-SSE41-LABEL: add_sd_mask: 1413; X86-SSE41: # %bb.0: 1414; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp) 1415; X86-SSE41-NEXT: jne .LBB71_1 1416; X86-SSE41-NEXT: # %bb.2: 1417; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 1418; X86-SSE41-NEXT: retl 1419; X86-SSE41-NEXT: .LBB71_1: 1420; X86-SSE41-NEXT: addsd %xmm0, %xmm1 1421; X86-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1422; X86-SSE41-NEXT: retl 1423; 1424; X86-AVX1-LABEL: add_sd_mask: 1425; X86-AVX1: # %bb.0: 1426; X86-AVX1-NEXT: testb $1, {{[0-9]+}}(%esp) 1427; X86-AVX1-NEXT: je .LBB71_2 1428; X86-AVX1-NEXT: # %bb.1: 1429; X86-AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2 1430; X86-AVX1-NEXT: .LBB71_2: 1431; X86-AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1432; X86-AVX1-NEXT: retl 1433; 1434; X86-AVX512-LABEL: add_sd_mask: 1435; X86-AVX512: # %bb.0: 1436; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al 1437; X86-AVX512-NEXT: kmovw %eax, %k1 1438; X86-AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 1439; X86-AVX512-NEXT: vmovapd %xmm2, %xmm0 1440; X86-AVX512-NEXT: retl 1441; 1442; X64-SSE2-LABEL: add_sd_mask: 1443; X64-SSE2: # %bb.0: 1444; X64-SSE2-NEXT: testb $1, %dil 1445; X64-SSE2-NEXT: jne .LBB71_1 1446; X64-SSE2-NEXT: # %bb.2: 1447; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1448; X64-SSE2-NEXT: retq 1449; X64-SSE2-NEXT: .LBB71_1: 1450; X64-SSE2-NEXT: addsd %xmm0, %xmm1 1451; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1452; X64-SSE2-NEXT: retq 1453; 1454; X64-SSE41-LABEL: add_sd_mask: 1455; X64-SSE41: # %bb.0: 1456; X64-SSE41-NEXT: testb $1, %dil 1457; X64-SSE41-NEXT: jne .LBB71_1 1458; X64-SSE41-NEXT: # %bb.2: 1459; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 1460; X64-SSE41-NEXT: retq 1461; X64-SSE41-NEXT: .LBB71_1: 1462; X64-SSE41-NEXT: addsd %xmm0, %xmm1 1463; X64-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1464; X64-SSE41-NEXT: retq 1465; 1466; X64-AVX1-LABEL: add_sd_mask: 1467; X64-AVX1: # %bb.0: 1468; X64-AVX1-NEXT: testb $1, %dil 1469; X64-AVX1-NEXT: je .LBB71_2 1470; X64-AVX1-NEXT: # %bb.1: 1471; X64-AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2 1472; X64-AVX1-NEXT: .LBB71_2: 1473; X64-AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1474; X64-AVX1-NEXT: retq 1475; 1476; X64-AVX512-LABEL: add_sd_mask: 1477; X64-AVX512: # %bb.0: 1478; X64-AVX512-NEXT: kmovw %edi, %k1 1479; X64-AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 1480; X64-AVX512-NEXT: vmovapd %xmm2, %xmm0 1481; X64-AVX512-NEXT: retq 1482 %1 = extractelement <2 x double> %a, i64 0 1483 %2 = extractelement <2 x double> %b, i64 0 1484 %3 = fadd double %1, %2 1485 %4 = extractelement <2 x double> %c, i32 0 1486 %5 = bitcast i8 %mask to <8 x i1> 1487 %6 = extractelement <8 x i1> %5, i64 0 1488 %7 = select i1 %6, double %3, double %4 1489 %8 = insertelement <2 x double> %a, double %7, i64 0 1490 ret <2 x double> %8 1491} 1492