1; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s 2; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s 3; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s 4 5target triple = "x86_64-unknown-unknown" 6 7; Ensure that the backend no longer emits unnecessary vector insert 8; instructions immediately after SSE scalar fp instructions 9; like addss or mulss. 10 11define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 12; SSE-LABEL: test_add_ss: 13; SSE: # BB#0: 14; SSE-NEXT: addss %xmm1, %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: test_add_ss: 18; AVX: # BB#0: 19; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 20; AVX-NEXT: retq 21 %1 = extractelement <4 x float> %b, i32 0 22 %2 = extractelement <4 x float> %a, i32 0 23 %add = fadd float %2, %1 24 %3 = insertelement <4 x float> %a, float %add, i32 0 25 ret <4 x float> %3 26} 27 28define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 29; SSE-LABEL: test_sub_ss: 30; SSE: # BB#0: 31; SSE-NEXT: subss %xmm1, %xmm0 32; SSE-NEXT: retq 33; 34; AVX-LABEL: test_sub_ss: 35; AVX: # BB#0: 36; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 37; AVX-NEXT: retq 38 %1 = extractelement <4 x float> %b, i32 0 39 %2 = extractelement <4 x float> %a, i32 0 40 %sub = fsub float %2, %1 41 %3 = insertelement <4 x float> %a, float %sub, i32 0 42 ret <4 x float> %3 43} 44 45define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 46; SSE-LABEL: test_mul_ss: 47; SSE: # BB#0: 48; SSE-NEXT: mulss %xmm1, %xmm0 49; SSE-NEXT: retq 50; 51; AVX-LABEL: test_mul_ss: 52; AVX: # BB#0: 53; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 54; AVX-NEXT: retq 55 %1 = extractelement <4 x float> %b, i32 0 56 %2 = extractelement <4 x float> %a, i32 0 57 %mul = fmul float %2, %1 58 %3 = insertelement <4 x float> %a, float %mul, i32 0 59 ret <4 x float> %3 60} 61 62define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 63; SSE-LABEL: test_div_ss: 64; SSE: # BB#0: 65; SSE-NEXT: divss %xmm1, %xmm0 66; SSE-NEXT: retq 67; 68; AVX-LABEL: test_div_ss: 69; AVX: # BB#0: 70; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 71; AVX-NEXT: retq 72 %1 = extractelement <4 x float> %b, i32 0 73 %2 = extractelement <4 x float> %a, i32 0 74 %div = fdiv float %2, %1 75 %3 = insertelement <4 x float> %a, float %div, i32 0 76 ret <4 x float> %3 77} 78 79define <4 x float> @test_sqrt_ss(<4 x float> %a) { 80; SSE2-LABEL: test_sqrt_ss: 81; SSE2: # BB#0: 82; SSE2-NEXT: sqrtss %xmm0, %xmm1 83; SSE2-NEXT: movss %xmm1, %xmm0 84; SSE2-NEXT: retq 85; 86; SSE41-LABEL: test_sqrt_ss: 87; SSE41: # BB#0: 88; SSE41-NEXT: sqrtss %xmm0, %xmm1 89; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 90; SSE41-NEXT: retq 91; 92; AVX-LABEL: test_sqrt_ss: 93; AVX: # BB#0: 94; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm1 95; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 96; AVX-NEXT: retq 97 %1 = extractelement <4 x float> %a, i32 0 98 %2 = call float @llvm.sqrt.f32(float %1) 99 %3 = insertelement <4 x float> %a, float %2, i32 0 100 ret <4 x float> %3 101} 102declare float @llvm.sqrt.f32(float) 103 104define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 105; SSE-LABEL: test_add_sd: 106; SSE: # BB#0: 107; SSE-NEXT: addsd %xmm1, %xmm0 108; SSE-NEXT: retq 109; 110; AVX-LABEL: test_add_sd: 111; AVX: # BB#0: 112; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 113; AVX-NEXT: retq 114 %1 = extractelement <2 x double> %b, i32 0 115 %2 = extractelement <2 x double> %a, i32 0 116 %add = fadd double %2, %1 117 %3 = insertelement <2 x double> %a, double %add, i32 0 118 ret <2 x double> %3 119} 120 121define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 122; SSE-LABEL: test_sub_sd: 123; SSE: # BB#0: 124; SSE-NEXT: subsd %xmm1, %xmm0 125; SSE-NEXT: retq 126; 127; AVX-LABEL: test_sub_sd: 128; AVX: # BB#0: 129; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 130; AVX-NEXT: retq 131 %1 = extractelement <2 x double> %b, i32 0 132 %2 = extractelement <2 x double> %a, i32 0 133 %sub = fsub double %2, %1 134 %3 = insertelement <2 x double> %a, double %sub, i32 0 135 ret <2 x double> %3 136} 137 138define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 139; SSE-LABEL: test_mul_sd: 140; SSE: # BB#0: 141; SSE-NEXT: mulsd %xmm1, %xmm0 142; SSE-NEXT: retq 143; 144; AVX-LABEL: test_mul_sd: 145; AVX: # BB#0: 146; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 147; AVX-NEXT: retq 148 %1 = extractelement <2 x double> %b, i32 0 149 %2 = extractelement <2 x double> %a, i32 0 150 %mul = fmul double %2, %1 151 %3 = insertelement <2 x double> %a, double %mul, i32 0 152 ret <2 x double> %3 153} 154 155define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 156; SSE-LABEL: test_div_sd: 157; SSE: # BB#0: 158; SSE-NEXT: divsd %xmm1, %xmm0 159; SSE-NEXT: retq 160; 161; AVX-LABEL: test_div_sd: 162; AVX: # BB#0: 163; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 164; AVX-NEXT: retq 165 %1 = extractelement <2 x double> %b, i32 0 166 %2 = extractelement <2 x double> %a, i32 0 167 %div = fdiv double %2, %1 168 %3 = insertelement <2 x double> %a, double %div, i32 0 169 ret <2 x double> %3 170} 171 172define <2 x double> @test_sqrt_sd(<2 x double> %a) { 173; SSE-LABEL: test_sqrt_sd: 174; SSE: # BB#0: 175; SSE-NEXT: sqrtsd %xmm0, %xmm1 176; SSE-NEXT: movsd %xmm1, %xmm0 177; SSE-NEXT: retq 178; 179; AVX-LABEL: test_sqrt_sd: 180; AVX: # BB#0: 181; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1 182; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 183; AVX-NEXT: retq 184 %1 = extractelement <2 x double> %a, i32 0 185 %2 = call double @llvm.sqrt.f64(double %1) 186 %3 = insertelement <2 x double> %a, double %2, i32 0 187 ret <2 x double> %3 188} 189declare double @llvm.sqrt.f64(double) 190 191define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 192; SSE-LABEL: test2_add_ss: 193; SSE: # BB#0: 194; SSE-NEXT: addss %xmm0, %xmm1 195; SSE-NEXT: movaps %xmm1, %xmm0 196; SSE-NEXT: retq 197; 198; AVX-LABEL: test2_add_ss: 199; AVX: # BB#0: 200; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 201; AVX-NEXT: retq 202 %1 = extractelement <4 x float> %a, i32 0 203 %2 = extractelement <4 x float> %b, i32 0 204 %add = fadd float %1, %2 205 %3 = insertelement <4 x float> %b, float %add, i32 0 206 ret <4 x float> %3 207} 208 209define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 210; SSE-LABEL: test2_sub_ss: 211; SSE: # BB#0: 212; SSE-NEXT: subss %xmm0, %xmm1 213; SSE-NEXT: movaps %xmm1, %xmm0 214; SSE-NEXT: retq 215; 216; AVX-LABEL: test2_sub_ss: 217; AVX: # BB#0: 218; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 219; AVX-NEXT: retq 220 %1 = extractelement <4 x float> %a, i32 0 221 %2 = extractelement <4 x float> %b, i32 0 222 %sub = fsub float %2, %1 223 %3 = insertelement <4 x float> %b, float %sub, i32 0 224 ret <4 x float> %3 225} 226 227define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 228; SSE-LABEL: test2_mul_ss: 229; SSE: # BB#0: 230; SSE-NEXT: mulss %xmm0, %xmm1 231; SSE-NEXT: movaps %xmm1, %xmm0 232; SSE-NEXT: retq 233; 234; AVX-LABEL: test2_mul_ss: 235; AVX: # BB#0: 236; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 237; AVX-NEXT: retq 238 %1 = extractelement <4 x float> %a, i32 0 239 %2 = extractelement <4 x float> %b, i32 0 240 %mul = fmul float %1, %2 241 %3 = insertelement <4 x float> %b, float %mul, i32 0 242 ret <4 x float> %3 243} 244 245define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 246; SSE-LABEL: test2_div_ss: 247; SSE: # BB#0: 248; SSE-NEXT: divss %xmm0, %xmm1 249; SSE-NEXT: movaps %xmm1, %xmm0 250; SSE-NEXT: retq 251; 252; AVX-LABEL: test2_div_ss: 253; AVX: # BB#0: 254; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 255; AVX-NEXT: retq 256 %1 = extractelement <4 x float> %a, i32 0 257 %2 = extractelement <4 x float> %b, i32 0 258 %div = fdiv float %2, %1 259 %3 = insertelement <4 x float> %b, float %div, i32 0 260 ret <4 x float> %3 261} 262 263define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 264; SSE-LABEL: test2_add_sd: 265; SSE: # BB#0: 266; SSE-NEXT: addsd %xmm0, %xmm1 267; SSE-NEXT: movapd %xmm1, %xmm0 268; SSE-NEXT: retq 269; 270; AVX-LABEL: test2_add_sd: 271; AVX: # BB#0: 272; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 273; AVX-NEXT: retq 274 %1 = extractelement <2 x double> %a, i32 0 275 %2 = extractelement <2 x double> %b, i32 0 276 %add = fadd double %1, %2 277 %3 = insertelement <2 x double> %b, double %add, i32 0 278 ret <2 x double> %3 279} 280 281define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 282; SSE-LABEL: test2_sub_sd: 283; SSE: # BB#0: 284; SSE-NEXT: subsd %xmm0, %xmm1 285; SSE-NEXT: movapd %xmm1, %xmm0 286; SSE-NEXT: retq 287; 288; AVX-LABEL: test2_sub_sd: 289; AVX: # BB#0: 290; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 291; AVX-NEXT: retq 292 %1 = extractelement <2 x double> %a, i32 0 293 %2 = extractelement <2 x double> %b, i32 0 294 %sub = fsub double %2, %1 295 %3 = insertelement <2 x double> %b, double %sub, i32 0 296 ret <2 x double> %3 297} 298 299define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 300; SSE-LABEL: test2_mul_sd: 301; SSE: # BB#0: 302; SSE-NEXT: mulsd %xmm0, %xmm1 303; SSE-NEXT: movapd %xmm1, %xmm0 304; SSE-NEXT: retq 305; 306; AVX-LABEL: test2_mul_sd: 307; AVX: # BB#0: 308; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 309; AVX-NEXT: retq 310 %1 = extractelement <2 x double> %a, i32 0 311 %2 = extractelement <2 x double> %b, i32 0 312 %mul = fmul double %1, %2 313 %3 = insertelement <2 x double> %b, double %mul, i32 0 314 ret <2 x double> %3 315} 316 317define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 318; SSE-LABEL: test2_div_sd: 319; SSE: # BB#0: 320; SSE-NEXT: divsd %xmm0, %xmm1 321; SSE-NEXT: movapd %xmm1, %xmm0 322; SSE-NEXT: retq 323; 324; AVX-LABEL: test2_div_sd: 325; AVX: # BB#0: 326; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 327; AVX-NEXT: retq 328 %1 = extractelement <2 x double> %a, i32 0 329 %2 = extractelement <2 x double> %b, i32 0 330 %div = fdiv double %2, %1 331 %3 = insertelement <2 x double> %b, double %div, i32 0 332 ret <2 x double> %3 333} 334 335define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { 336; SSE-LABEL: test_multiple_add_ss: 337; SSE: # BB#0: 338; SSE-NEXT: addss %xmm0, %xmm1 339; SSE-NEXT: addss %xmm1, %xmm0 340; SSE-NEXT: retq 341; 342; AVX-LABEL: test_multiple_add_ss: 343; AVX: # BB#0: 344; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1 345; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 346; AVX-NEXT: retq 347 %1 = extractelement <4 x float> %b, i32 0 348 %2 = extractelement <4 x float> %a, i32 0 349 %add = fadd float %2, %1 350 %add2 = fadd float %2, %add 351 %3 = insertelement <4 x float> %a, float %add2, i32 0 352 ret <4 x float> %3 353} 354 355define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { 356; SSE-LABEL: test_multiple_sub_ss: 357; SSE: # BB#0: 358; SSE-NEXT: movaps %xmm0, %xmm2 359; SSE-NEXT: subss %xmm1, %xmm2 360; SSE-NEXT: subss %xmm2, %xmm0 361; SSE-NEXT: retq 362; 363; AVX-LABEL: test_multiple_sub_ss: 364; AVX: # BB#0: 365; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1 366; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 367; AVX-NEXT: retq 368 %1 = extractelement <4 x float> %b, i32 0 369 %2 = extractelement <4 x float> %a, i32 0 370 %sub = fsub float %2, %1 371 %sub2 = fsub float %2, %sub 372 %3 = insertelement <4 x float> %a, float %sub2, i32 0 373 ret <4 x float> %3 374} 375 376define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { 377; SSE-LABEL: test_multiple_mul_ss: 378; SSE: # BB#0: 379; SSE-NEXT: mulss %xmm0, %xmm1 380; SSE-NEXT: mulss %xmm1, %xmm0 381; SSE-NEXT: retq 382; 383; AVX-LABEL: test_multiple_mul_ss: 384; AVX: # BB#0: 385; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 386; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 387; AVX-NEXT: retq 388 %1 = extractelement <4 x float> %b, i32 0 389 %2 = extractelement <4 x float> %a, i32 0 390 %mul = fmul float %2, %1 391 %mul2 = fmul float %2, %mul 392 %3 = insertelement <4 x float> %a, float %mul2, i32 0 393 ret <4 x float> %3 394} 395 396define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { 397; SSE-LABEL: test_multiple_div_ss: 398; SSE: # BB#0: 399; SSE-NEXT: movaps %xmm0, %xmm2 400; SSE-NEXT: divss %xmm1, %xmm2 401; SSE-NEXT: divss %xmm2, %xmm0 402; SSE-NEXT: retq 403; 404; AVX-LABEL: test_multiple_div_ss: 405; AVX: # BB#0: 406; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1 407; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 408; AVX-NEXT: retq 409 %1 = extractelement <4 x float> %b, i32 0 410 %2 = extractelement <4 x float> %a, i32 0 411 %div = fdiv float %2, %1 412 %div2 = fdiv float %2, %div 413 %3 = insertelement <4 x float> %a, float %div2, i32 0 414 ret <4 x float> %3 415} 416 417; With SSE4.1 or greater, the shuffles in the following tests may 418; be lowered to X86Blendi nodes. 419 420define <4 x float> @blend_add_ss(<4 x float> %a, float %b) { 421; SSE-LABEL: blend_add_ss: 422; SSE: # BB#0: 423; SSE-NEXT: addss %xmm1, %xmm0 424; SSE-NEXT: retq 425; 426; AVX-LABEL: blend_add_ss: 427; AVX: # BB#0: 428; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 429; AVX-NEXT: retq 430 431 %ext = extractelement <4 x float> %a, i32 0 432 %op = fadd float %b, %ext 433 %ins = insertelement <4 x float> undef, float %op, i32 0 434 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 435 ret <4 x float> %shuf 436} 437 438define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) { 439; SSE-LABEL: blend_sub_ss: 440; SSE: # BB#0: 441; SSE-NEXT: subss %xmm1, %xmm0 442; SSE-NEXT: retq 443; 444; AVX-LABEL: blend_sub_ss: 445; AVX: # BB#0: 446; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 447; AVX-NEXT: retq 448 449 %ext = extractelement <4 x float> %a, i32 0 450 %op = fsub float %ext, %b 451 %ins = insertelement <4 x float> undef, float %op, i32 0 452 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 453 ret <4 x float> %shuf 454} 455 456define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) { 457; SSE-LABEL: blend_mul_ss: 458; SSE: # BB#0: 459; SSE-NEXT: mulss %xmm1, %xmm0 460; SSE-NEXT: retq 461; 462; AVX-LABEL: blend_mul_ss: 463; AVX: # BB#0: 464; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 465; AVX-NEXT: retq 466 467 %ext = extractelement <4 x float> %a, i32 0 468 %op = fmul float %b, %ext 469 %ins = insertelement <4 x float> undef, float %op, i32 0 470 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 471 ret <4 x float> %shuf 472} 473 474define <4 x float> @blend_div_ss(<4 x float> %a, float %b) { 475; SSE-LABEL: blend_div_ss: 476; SSE: # BB#0: 477; SSE-NEXT: divss %xmm1, %xmm0 478; SSE-NEXT: retq 479; 480; AVX-LABEL: blend_div_ss: 481; AVX: # BB#0: 482; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 483; AVX-NEXT: retq 484 485 %ext = extractelement <4 x float> %a, i32 0 486 %op = fdiv float %ext, %b 487 %ins = insertelement <4 x float> undef, float %op, i32 0 488 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 489 ret <4 x float> %shuf 490} 491 492define <2 x double> @blend_add_sd(<2 x double> %a, double %b) { 493; SSE-LABEL: blend_add_sd: 494; SSE: # BB#0: 495; SSE-NEXT: addsd %xmm1, %xmm0 496; SSE-NEXT: retq 497; 498; AVX-LABEL: blend_add_sd: 499; AVX: # BB#0: 500; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 501; AVX-NEXT: retq 502 503 %ext = extractelement <2 x double> %a, i32 0 504 %op = fadd double %b, %ext 505 %ins = insertelement <2 x double> undef, double %op, i32 0 506 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 507 ret <2 x double> %shuf 508} 509 510define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) { 511; SSE-LABEL: blend_sub_sd: 512; SSE: # BB#0: 513; SSE-NEXT: subsd %xmm1, %xmm0 514; SSE-NEXT: retq 515; 516; AVX-LABEL: blend_sub_sd: 517; AVX: # BB#0: 518; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 519; AVX-NEXT: retq 520 521 %ext = extractelement <2 x double> %a, i32 0 522 %op = fsub double %ext, %b 523 %ins = insertelement <2 x double> undef, double %op, i32 0 524 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 525 ret <2 x double> %shuf 526} 527 528define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) { 529; SSE-LABEL: blend_mul_sd: 530; SSE: # BB#0: 531; SSE-NEXT: mulsd %xmm1, %xmm0 532; SSE-NEXT: retq 533; 534; AVX-LABEL: blend_mul_sd: 535; AVX: # BB#0: 536; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 537; AVX-NEXT: retq 538 539 %ext = extractelement <2 x double> %a, i32 0 540 %op = fmul double %b, %ext 541 %ins = insertelement <2 x double> undef, double %op, i32 0 542 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 543 ret <2 x double> %shuf 544} 545 546define <2 x double> @blend_div_sd(<2 x double> %a, double %b) { 547; SSE-LABEL: blend_div_sd: 548; SSE: # BB#0: 549; SSE-NEXT: divsd %xmm1, %xmm0 550; SSE-NEXT: retq 551; 552; AVX-LABEL: blend_div_sd: 553; AVX: # BB#0: 554; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 555; AVX-NEXT: retq 556 557 %ext = extractelement <2 x double> %a, i32 0 558 %op = fdiv double %ext, %b 559 %ins = insertelement <2 x double> undef, double %op, i32 0 560 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 561 ret <2 x double> %shuf 562} 563 564; Ensure that the backend selects SSE/AVX scalar fp instructions 565; from a packed fp instruction plus a vector insert. 566 567define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { 568; SSE-LABEL: insert_test_add_ss: 569; SSE: # BB#0: 570; SSE-NEXT: addss %xmm1, %xmm0 571; SSE-NEXT: retq 572; 573; AVX-LABEL: insert_test_add_ss: 574; AVX: # BB#0: 575; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 576; AVX-NEXT: retq 577 %1 = fadd <4 x float> %a, %b 578 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 579 ret <4 x float> %2 580} 581 582define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) { 583; SSE-LABEL: insert_test_sub_ss: 584; SSE: # BB#0: 585; SSE-NEXT: subss %xmm1, %xmm0 586; SSE-NEXT: retq 587; 588; AVX-LABEL: insert_test_sub_ss: 589; AVX: # BB#0: 590; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 591; AVX-NEXT: retq 592 %1 = fsub <4 x float> %a, %b 593 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 594 ret <4 x float> %2 595} 596 597define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) { 598; SSE-LABEL: insert_test_mul_ss: 599; SSE: # BB#0: 600; SSE-NEXT: mulss %xmm1, %xmm0 601; SSE-NEXT: retq 602; 603; AVX-LABEL: insert_test_mul_ss: 604; AVX: # BB#0: 605; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 606; AVX-NEXT: retq 607 %1 = fmul <4 x float> %a, %b 608 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 609 ret <4 x float> %2 610} 611 612define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) { 613; SSE-LABEL: insert_test_div_ss: 614; SSE: # BB#0: 615; SSE-NEXT: divss %xmm1, %xmm0 616; SSE-NEXT: retq 617; 618; AVX-LABEL: insert_test_div_ss: 619; AVX: # BB#0: 620; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 621; AVX-NEXT: retq 622 %1 = fdiv <4 x float> %a, %b 623 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 624 ret <4 x float> %2 625} 626 627define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) { 628; SSE-LABEL: insert_test_add_sd: 629; SSE: # BB#0: 630; SSE-NEXT: addsd %xmm1, %xmm0 631; SSE-NEXT: retq 632; 633; AVX-LABEL: insert_test_add_sd: 634; AVX: # BB#0: 635; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 636; AVX-NEXT: retq 637 %1 = fadd <2 x double> %a, %b 638 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 639 ret <2 x double> %2 640} 641 642define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) { 643; SSE-LABEL: insert_test_sub_sd: 644; SSE: # BB#0: 645; SSE-NEXT: subsd %xmm1, %xmm0 646; SSE-NEXT: retq 647; 648; AVX-LABEL: insert_test_sub_sd: 649; AVX: # BB#0: 650; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 651; AVX-NEXT: retq 652 %1 = fsub <2 x double> %a, %b 653 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 654 ret <2 x double> %2 655} 656 657define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) { 658; SSE-LABEL: insert_test_mul_sd: 659; SSE: # BB#0: 660; SSE-NEXT: mulsd %xmm1, %xmm0 661; SSE-NEXT: retq 662; 663; AVX-LABEL: insert_test_mul_sd: 664; AVX: # BB#0: 665; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 666; AVX-NEXT: retq 667 %1 = fmul <2 x double> %a, %b 668 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 669 ret <2 x double> %2 670} 671 672define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) { 673; SSE-LABEL: insert_test_div_sd: 674; SSE: # BB#0: 675; SSE-NEXT: divsd %xmm1, %xmm0 676; SSE-NEXT: retq 677; 678; AVX-LABEL: insert_test_div_sd: 679; AVX: # BB#0: 680; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 681; AVX-NEXT: retq 682 %1 = fdiv <2 x double> %a, %b 683 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 684 ret <2 x double> %2 685} 686 687define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) { 688; SSE-LABEL: insert_test2_add_ss: 689; SSE: # BB#0: 690; SSE-NEXT: addss %xmm0, %xmm1 691; SSE-NEXT: movaps %xmm1, %xmm0 692; SSE-NEXT: retq 693; 694; AVX-LABEL: insert_test2_add_ss: 695; AVX: # BB#0: 696; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 697; AVX-NEXT: retq 698 %1 = fadd <4 x float> %b, %a 699 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 700 ret <4 x float> %2 701} 702 703define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) { 704; SSE-LABEL: insert_test2_sub_ss: 705; SSE: # BB#0: 706; SSE-NEXT: subss %xmm0, %xmm1 707; SSE-NEXT: movaps %xmm1, %xmm0 708; SSE-NEXT: retq 709; 710; AVX-LABEL: insert_test2_sub_ss: 711; AVX: # BB#0: 712; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 713; AVX-NEXT: retq 714 %1 = fsub <4 x float> %b, %a 715 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 716 ret <4 x float> %2 717} 718 719define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) { 720; SSE-LABEL: insert_test2_mul_ss: 721; SSE: # BB#0: 722; SSE-NEXT: mulss %xmm0, %xmm1 723; SSE-NEXT: movaps %xmm1, %xmm0 724; SSE-NEXT: retq 725; 726; AVX-LABEL: insert_test2_mul_ss: 727; AVX: # BB#0: 728; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 729; AVX-NEXT: retq 730 %1 = fmul <4 x float> %b, %a 731 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 732 ret <4 x float> %2 733} 734 735define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) { 736; SSE-LABEL: insert_test2_div_ss: 737; SSE: # BB#0: 738; SSE-NEXT: divss %xmm0, %xmm1 739; SSE-NEXT: movaps %xmm1, %xmm0 740; SSE-NEXT: retq 741; 742; AVX-LABEL: insert_test2_div_ss: 743; AVX: # BB#0: 744; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 745; AVX-NEXT: retq 746 %1 = fdiv <4 x float> %b, %a 747 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 748 ret <4 x float> %2 749} 750 751define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) { 752; SSE-LABEL: insert_test2_add_sd: 753; SSE: # BB#0: 754; SSE-NEXT: addsd %xmm0, %xmm1 755; SSE-NEXT: movapd %xmm1, %xmm0 756; SSE-NEXT: retq 757; 758; AVX-LABEL: insert_test2_add_sd: 759; AVX: # BB#0: 760; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 761; AVX-NEXT: retq 762 %1 = fadd <2 x double> %b, %a 763 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 764 ret <2 x double> %2 765} 766 767define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) { 768; SSE-LABEL: insert_test2_sub_sd: 769; SSE: # BB#0: 770; SSE-NEXT: subsd %xmm0, %xmm1 771; SSE-NEXT: movapd %xmm1, %xmm0 772; SSE-NEXT: retq 773; 774; AVX-LABEL: insert_test2_sub_sd: 775; AVX: # BB#0: 776; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 777; AVX-NEXT: retq 778 %1 = fsub <2 x double> %b, %a 779 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 780 ret <2 x double> %2 781} 782 783define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) { 784; SSE-LABEL: insert_test2_mul_sd: 785; SSE: # BB#0: 786; SSE-NEXT: mulsd %xmm0, %xmm1 787; SSE-NEXT: movapd %xmm1, %xmm0 788; SSE-NEXT: retq 789; 790; AVX-LABEL: insert_test2_mul_sd: 791; AVX: # BB#0: 792; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 793; AVX-NEXT: retq 794 %1 = fmul <2 x double> %b, %a 795 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 796 ret <2 x double> %2 797} 798 799define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) { 800; SSE-LABEL: insert_test2_div_sd: 801; SSE: # BB#0: 802; SSE-NEXT: divsd %xmm0, %xmm1 803; SSE-NEXT: movapd %xmm1, %xmm0 804; SSE-NEXT: retq 805; 806; AVX-LABEL: insert_test2_div_sd: 807; AVX: # BB#0: 808; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 809; AVX-NEXT: retq 810 %1 = fdiv <2 x double> %b, %a 811 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 812 ret <2 x double> %2 813} 814 815define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) { 816; SSE-LABEL: insert_test3_add_ss: 817; SSE: # BB#0: 818; SSE-NEXT: addss %xmm1, %xmm0 819; SSE-NEXT: retq 820; 821; AVX-LABEL: insert_test3_add_ss: 822; AVX: # BB#0: 823; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 824; AVX-NEXT: retq 825 %1 = fadd <4 x float> %a, %b 826 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 827 ret <4 x float> %2 828} 829 830define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) { 831; SSE-LABEL: insert_test3_sub_ss: 832; SSE: # BB#0: 833; SSE-NEXT: subss %xmm1, %xmm0 834; SSE-NEXT: retq 835; 836; AVX-LABEL: insert_test3_sub_ss: 837; AVX: # BB#0: 838; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 839; AVX-NEXT: retq 840 %1 = fsub <4 x float> %a, %b 841 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 842 ret <4 x float> %2 843} 844 845define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) { 846; SSE-LABEL: insert_test3_mul_ss: 847; SSE: # BB#0: 848; SSE-NEXT: mulss %xmm1, %xmm0 849; SSE-NEXT: retq 850; 851; AVX-LABEL: insert_test3_mul_ss: 852; AVX: # BB#0: 853; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 854; AVX-NEXT: retq 855 %1 = fmul <4 x float> %a, %b 856 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 857 ret <4 x float> %2 858} 859 860define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) { 861; SSE-LABEL: insert_test3_div_ss: 862; SSE: # BB#0: 863; SSE-NEXT: divss %xmm1, %xmm0 864; SSE-NEXT: retq 865; 866; AVX-LABEL: insert_test3_div_ss: 867; AVX: # BB#0: 868; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 869; AVX-NEXT: retq 870 %1 = fdiv <4 x float> %a, %b 871 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 872 ret <4 x float> %2 873} 874 875define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) { 876; SSE-LABEL: insert_test3_add_sd: 877; SSE: # BB#0: 878; SSE-NEXT: addsd %xmm1, %xmm0 879; SSE-NEXT: retq 880; 881; AVX-LABEL: insert_test3_add_sd: 882; AVX: # BB#0: 883; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 884; AVX-NEXT: retq 885 %1 = fadd <2 x double> %a, %b 886 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 887 ret <2 x double> %2 888} 889 890define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) { 891; SSE-LABEL: insert_test3_sub_sd: 892; SSE: # BB#0: 893; SSE-NEXT: subsd %xmm1, %xmm0 894; SSE-NEXT: retq 895; 896; AVX-LABEL: insert_test3_sub_sd: 897; AVX: # BB#0: 898; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 899; AVX-NEXT: retq 900 %1 = fsub <2 x double> %a, %b 901 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 902 ret <2 x double> %2 903} 904 905define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) { 906; SSE-LABEL: insert_test3_mul_sd: 907; SSE: # BB#0: 908; SSE-NEXT: mulsd %xmm1, %xmm0 909; SSE-NEXT: retq 910; 911; AVX-LABEL: insert_test3_mul_sd: 912; AVX: # BB#0: 913; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 914; AVX-NEXT: retq 915 %1 = fmul <2 x double> %a, %b 916 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 917 ret <2 x double> %2 918} 919 920define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) { 921; SSE-LABEL: insert_test3_div_sd: 922; SSE: # BB#0: 923; SSE-NEXT: divsd %xmm1, %xmm0 924; SSE-NEXT: retq 925; 926; AVX-LABEL: insert_test3_div_sd: 927; AVX: # BB#0: 928; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 929; AVX-NEXT: retq 930 %1 = fdiv <2 x double> %a, %b 931 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 932 ret <2 x double> %2 933} 934 935define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) { 936; SSE-LABEL: insert_test4_add_ss: 937; SSE: # BB#0: 938; SSE-NEXT: addss %xmm0, %xmm1 939; SSE-NEXT: movaps %xmm1, %xmm0 940; SSE-NEXT: retq 941; 942; AVX-LABEL: insert_test4_add_ss: 943; AVX: # BB#0: 944; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 945; AVX-NEXT: retq 946 %1 = fadd <4 x float> %b, %a 947 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 948 ret <4 x float> %2 949} 950 951define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) { 952; SSE-LABEL: insert_test4_sub_ss: 953; SSE: # BB#0: 954; SSE-NEXT: subss %xmm0, %xmm1 955; SSE-NEXT: movaps %xmm1, %xmm0 956; SSE-NEXT: retq 957; 958; AVX-LABEL: insert_test4_sub_ss: 959; AVX: # BB#0: 960; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 961; AVX-NEXT: retq 962 %1 = fsub <4 x float> %b, %a 963 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 964 ret <4 x float> %2 965} 966 967define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) { 968; SSE-LABEL: insert_test4_mul_ss: 969; SSE: # BB#0: 970; SSE-NEXT: mulss %xmm0, %xmm1 971; SSE-NEXT: movaps %xmm1, %xmm0 972; SSE-NEXT: retq 973; 974; AVX-LABEL: insert_test4_mul_ss: 975; AVX: # BB#0: 976; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 977; AVX-NEXT: retq 978 %1 = fmul <4 x float> %b, %a 979 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 980 ret <4 x float> %2 981} 982 983define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) { 984; SSE-LABEL: insert_test4_div_ss: 985; SSE: # BB#0: 986; SSE-NEXT: divss %xmm0, %xmm1 987; SSE-NEXT: movaps %xmm1, %xmm0 988; SSE-NEXT: retq 989; 990; AVX-LABEL: insert_test4_div_ss: 991; AVX: # BB#0: 992; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 993; AVX-NEXT: retq 994 %1 = fdiv <4 x float> %b, %a 995 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 996 ret <4 x float> %2 997} 998 999define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) { 1000; SSE-LABEL: insert_test4_add_sd: 1001; SSE: # BB#0: 1002; SSE-NEXT: addsd %xmm0, %xmm1 1003; SSE-NEXT: movapd %xmm1, %xmm0 1004; SSE-NEXT: retq 1005; 1006; AVX-LABEL: insert_test4_add_sd: 1007; AVX: # BB#0: 1008; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 1009; AVX-NEXT: retq 1010 %1 = fadd <2 x double> %b, %a 1011 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1012 ret <2 x double> %2 1013} 1014 1015define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) { 1016; SSE-LABEL: insert_test4_sub_sd: 1017; SSE: # BB#0: 1018; SSE-NEXT: subsd %xmm0, %xmm1 1019; SSE-NEXT: movapd %xmm1, %xmm0 1020; SSE-NEXT: retq 1021; 1022; AVX-LABEL: insert_test4_sub_sd: 1023; AVX: # BB#0: 1024; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 1025; AVX-NEXT: retq 1026 %1 = fsub <2 x double> %b, %a 1027 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1028 ret <2 x double> %2 1029} 1030 1031define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) { 1032; SSE-LABEL: insert_test4_mul_sd: 1033; SSE: # BB#0: 1034; SSE-NEXT: mulsd %xmm0, %xmm1 1035; SSE-NEXT: movapd %xmm1, %xmm0 1036; SSE-NEXT: retq 1037; 1038; AVX-LABEL: insert_test4_mul_sd: 1039; AVX: # BB#0: 1040; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1041; AVX-NEXT: retq 1042 %1 = fmul <2 x double> %b, %a 1043 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1044 ret <2 x double> %2 1045} 1046 1047define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) { 1048; SSE-LABEL: insert_test4_div_sd: 1049; SSE: # BB#0: 1050; SSE-NEXT: divsd %xmm0, %xmm1 1051; SSE-NEXT: movapd %xmm1, %xmm0 1052; SSE-NEXT: retq 1053; 1054; AVX-LABEL: insert_test4_div_sd: 1055; AVX: # BB#0: 1056; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 1057; AVX-NEXT: retq 1058 %1 = fdiv <2 x double> %b, %a 1059 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1060 ret <2 x double> %2 1061} 1062