1; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s 2; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s 3; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s 4 5target triple = "x86_64-unknown-unknown" 6 7; Ensure that the backend no longer emits unnecessary vector insert 8; instructions immediately after SSE scalar fp instructions 9; like addss or mulss. 10 11define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 12; SSE-LABEL: test_add_ss: 13; SSE: # BB#0: 14; SSE-NEXT: addss %xmm1, %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: test_add_ss: 18; AVX: # BB#0: 19; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 20; AVX-NEXT: retq 21 %1 = extractelement <4 x float> %b, i32 0 22 %2 = extractelement <4 x float> %a, i32 0 23 %add = fadd float %2, %1 24 %3 = insertelement <4 x float> %a, float %add, i32 0 25 ret <4 x float> %3 26} 27 28define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 29; SSE-LABEL: test_sub_ss: 30; SSE: # BB#0: 31; SSE-NEXT: subss %xmm1, %xmm0 32; SSE-NEXT: retq 33; 34; AVX-LABEL: test_sub_ss: 35; AVX: # BB#0: 36; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 37; AVX-NEXT: retq 38 %1 = extractelement <4 x float> %b, i32 0 39 %2 = extractelement <4 x float> %a, i32 0 40 %sub = fsub float %2, %1 41 %3 = insertelement <4 x float> %a, float %sub, i32 0 42 ret <4 x float> %3 43} 44 45define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 46; SSE-LABEL: test_mul_ss: 47; SSE: # BB#0: 48; SSE-NEXT: mulss %xmm1, %xmm0 49; SSE-NEXT: retq 50; 51; AVX-LABEL: test_mul_ss: 52; AVX: # BB#0: 53; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 54; AVX-NEXT: retq 55 %1 = extractelement <4 x float> %b, i32 0 56 %2 = extractelement <4 x float> %a, i32 0 57 %mul = fmul float %2, %1 58 %3 = insertelement <4 x float> %a, float %mul, i32 0 59 ret <4 x float> %3 60} 61 62define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 63; SSE-LABEL: test_div_ss: 64; SSE: # BB#0: 65; SSE-NEXT: divss %xmm1, %xmm0 66; SSE-NEXT: retq 67; 68; AVX-LABEL: test_div_ss: 69; AVX: # BB#0: 70; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 71; AVX-NEXT: retq 72 %1 = extractelement <4 x float> %b, i32 0 73 %2 = extractelement <4 x float> %a, i32 0 74 %div = fdiv float %2, %1 75 %3 = insertelement <4 x float> %a, float %div, i32 0 76 ret <4 x float> %3 77} 78 79define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 80; SSE-LABEL: test_add_sd: 81; SSE: # BB#0: 82; SSE-NEXT: addsd %xmm1, %xmm0 83; SSE-NEXT: retq 84; 85; AVX-LABEL: test_add_sd: 86; AVX: # BB#0: 87; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 88; AVX-NEXT: retq 89 %1 = extractelement <2 x double> %b, i32 0 90 %2 = extractelement <2 x double> %a, i32 0 91 %add = fadd double %2, %1 92 %3 = insertelement <2 x double> %a, double %add, i32 0 93 ret <2 x double> %3 94} 95 96define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 97; SSE-LABEL: test_sub_sd: 98; SSE: # BB#0: 99; SSE-NEXT: subsd %xmm1, %xmm0 100; SSE-NEXT: retq 101; 102; AVX-LABEL: test_sub_sd: 103; AVX: # BB#0: 104; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 105; AVX-NEXT: retq 106 %1 = extractelement <2 x double> %b, i32 0 107 %2 = extractelement <2 x double> %a, i32 0 108 %sub = fsub double %2, %1 109 %3 = insertelement <2 x double> %a, double %sub, i32 0 110 ret <2 x double> %3 111} 112 113define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 114; SSE-LABEL: test_mul_sd: 115; SSE: # BB#0: 116; SSE-NEXT: mulsd %xmm1, %xmm0 117; SSE-NEXT: retq 118; 119; AVX-LABEL: test_mul_sd: 120; AVX: # BB#0: 121; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 122; AVX-NEXT: retq 123 %1 = extractelement <2 x double> %b, i32 0 124 %2 = extractelement <2 x double> %a, i32 0 125 %mul = fmul double %2, %1 126 %3 = insertelement <2 x double> %a, double %mul, i32 0 127 ret <2 x double> %3 128} 129 130define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 131; SSE-LABEL: test_div_sd: 132; SSE: # BB#0: 133; SSE-NEXT: divsd %xmm1, %xmm0 134; SSE-NEXT: retq 135; 136; AVX-LABEL: test_div_sd: 137; AVX: # BB#0: 138; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 139; AVX-NEXT: retq 140 %1 = extractelement <2 x double> %b, i32 0 141 %2 = extractelement <2 x double> %a, i32 0 142 %div = fdiv double %2, %1 143 %3 = insertelement <2 x double> %a, double %div, i32 0 144 ret <2 x double> %3 145} 146 147define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 148; SSE-LABEL: test2_add_ss: 149; SSE: # BB#0: 150; SSE-NEXT: addss %xmm0, %xmm1 151; SSE-NEXT: movaps %xmm1, %xmm0 152; SSE-NEXT: retq 153; 154; AVX-LABEL: test2_add_ss: 155; AVX: # BB#0: 156; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 157; AVX-NEXT: retq 158 %1 = extractelement <4 x float> %a, i32 0 159 %2 = extractelement <4 x float> %b, i32 0 160 %add = fadd float %1, %2 161 %3 = insertelement <4 x float> %b, float %add, i32 0 162 ret <4 x float> %3 163} 164 165define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 166; SSE-LABEL: test2_sub_ss: 167; SSE: # BB#0: 168; SSE-NEXT: subss %xmm0, %xmm1 169; SSE-NEXT: movaps %xmm1, %xmm0 170; SSE-NEXT: retq 171; 172; AVX-LABEL: test2_sub_ss: 173; AVX: # BB#0: 174; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 175; AVX-NEXT: retq 176 %1 = extractelement <4 x float> %a, i32 0 177 %2 = extractelement <4 x float> %b, i32 0 178 %sub = fsub float %2, %1 179 %3 = insertelement <4 x float> %b, float %sub, i32 0 180 ret <4 x float> %3 181} 182 183define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 184; SSE-LABEL: test2_mul_ss: 185; SSE: # BB#0: 186; SSE-NEXT: mulss %xmm0, %xmm1 187; SSE-NEXT: movaps %xmm1, %xmm0 188; SSE-NEXT: retq 189; 190; AVX-LABEL: test2_mul_ss: 191; AVX: # BB#0: 192; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 193; AVX-NEXT: retq 194 %1 = extractelement <4 x float> %a, i32 0 195 %2 = extractelement <4 x float> %b, i32 0 196 %mul = fmul float %1, %2 197 %3 = insertelement <4 x float> %b, float %mul, i32 0 198 ret <4 x float> %3 199} 200 201define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 202; SSE-LABEL: test2_div_ss: 203; SSE: # BB#0: 204; SSE-NEXT: divss %xmm0, %xmm1 205; SSE-NEXT: movaps %xmm1, %xmm0 206; SSE-NEXT: retq 207; 208; AVX-LABEL: test2_div_ss: 209; AVX: # BB#0: 210; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 211; AVX-NEXT: retq 212 %1 = extractelement <4 x float> %a, i32 0 213 %2 = extractelement <4 x float> %b, i32 0 214 %div = fdiv float %2, %1 215 %3 = insertelement <4 x float> %b, float %div, i32 0 216 ret <4 x float> %3 217} 218 219define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 220; SSE-LABEL: test2_add_sd: 221; SSE: # BB#0: 222; SSE-NEXT: addsd %xmm0, %xmm1 223; SSE-NEXT: movaps %xmm1, %xmm0 224; SSE-NEXT: retq 225; 226; AVX-LABEL: test2_add_sd: 227; AVX: # BB#0: 228; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 229; AVX-NEXT: retq 230 %1 = extractelement <2 x double> %a, i32 0 231 %2 = extractelement <2 x double> %b, i32 0 232 %add = fadd double %1, %2 233 %3 = insertelement <2 x double> %b, double %add, i32 0 234 ret <2 x double> %3 235} 236 237define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 238; SSE-LABEL: test2_sub_sd: 239; SSE: # BB#0: 240; SSE-NEXT: subsd %xmm0, %xmm1 241; SSE-NEXT: movaps %xmm1, %xmm0 242; SSE-NEXT: retq 243; 244; AVX-LABEL: test2_sub_sd: 245; AVX: # BB#0: 246; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 247; AVX-NEXT: retq 248 %1 = extractelement <2 x double> %a, i32 0 249 %2 = extractelement <2 x double> %b, i32 0 250 %sub = fsub double %2, %1 251 %3 = insertelement <2 x double> %b, double %sub, i32 0 252 ret <2 x double> %3 253} 254 255define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 256; SSE-LABEL: test2_mul_sd: 257; SSE: # BB#0: 258; SSE-NEXT: mulsd %xmm0, %xmm1 259; SSE-NEXT: movaps %xmm1, %xmm0 260; SSE-NEXT: retq 261; 262; AVX-LABEL: test2_mul_sd: 263; AVX: # BB#0: 264; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 265; AVX-NEXT: retq 266 %1 = extractelement <2 x double> %a, i32 0 267 %2 = extractelement <2 x double> %b, i32 0 268 %mul = fmul double %1, %2 269 %3 = insertelement <2 x double> %b, double %mul, i32 0 270 ret <2 x double> %3 271} 272 273define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 274; SSE-LABEL: test2_div_sd: 275; SSE: # BB#0: 276; SSE-NEXT: divsd %xmm0, %xmm1 277; SSE-NEXT: movaps %xmm1, %xmm0 278; SSE-NEXT: retq 279; 280; AVX-LABEL: test2_div_sd: 281; AVX: # BB#0: 282; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 283; AVX-NEXT: retq 284 %1 = extractelement <2 x double> %a, i32 0 285 %2 = extractelement <2 x double> %b, i32 0 286 %div = fdiv double %2, %1 287 %3 = insertelement <2 x double> %b, double %div, i32 0 288 ret <2 x double> %3 289} 290 291define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { 292; SSE-LABEL: test_multiple_add_ss: 293; SSE: # BB#0: 294; SSE-NEXT: addss %xmm0, %xmm1 295; SSE-NEXT: addss %xmm1, %xmm0 296; SSE-NEXT: retq 297; 298; AVX-LABEL: test_multiple_add_ss: 299; AVX: # BB#0: 300; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1 301; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 302; AVX-NEXT: retq 303 %1 = extractelement <4 x float> %b, i32 0 304 %2 = extractelement <4 x float> %a, i32 0 305 %add = fadd float %2, %1 306 %add2 = fadd float %2, %add 307 %3 = insertelement <4 x float> %a, float %add2, i32 0 308 ret <4 x float> %3 309} 310 311define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { 312; SSE-LABEL: test_multiple_sub_ss: 313; SSE: # BB#0: 314; SSE-NEXT: movaps %xmm0, %xmm2 315; SSE-NEXT: subss %xmm1, %xmm2 316; SSE-NEXT: subss %xmm2, %xmm0 317; SSE-NEXT: retq 318; 319; AVX-LABEL: test_multiple_sub_ss: 320; AVX: # BB#0: 321; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1 322; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 323; AVX-NEXT: retq 324 %1 = extractelement <4 x float> %b, i32 0 325 %2 = extractelement <4 x float> %a, i32 0 326 %sub = fsub float %2, %1 327 %sub2 = fsub float %2, %sub 328 %3 = insertelement <4 x float> %a, float %sub2, i32 0 329 ret <4 x float> %3 330} 331 332define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { 333; SSE-LABEL: test_multiple_mul_ss: 334; SSE: # BB#0: 335; SSE-NEXT: mulss %xmm0, %xmm1 336; SSE-NEXT: mulss %xmm1, %xmm0 337; SSE-NEXT: retq 338; 339; AVX-LABEL: test_multiple_mul_ss: 340; AVX: # BB#0: 341; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 342; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 343; AVX-NEXT: retq 344 %1 = extractelement <4 x float> %b, i32 0 345 %2 = extractelement <4 x float> %a, i32 0 346 %mul = fmul float %2, %1 347 %mul2 = fmul float %2, %mul 348 %3 = insertelement <4 x float> %a, float %mul2, i32 0 349 ret <4 x float> %3 350} 351 352define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { 353; SSE-LABEL: test_multiple_div_ss: 354; SSE: # BB#0: 355; SSE-NEXT: movaps %xmm0, %xmm2 356; SSE-NEXT: divss %xmm1, %xmm2 357; SSE-NEXT: divss %xmm2, %xmm0 358; SSE-NEXT: retq 359; 360; AVX-LABEL: test_multiple_div_ss: 361; AVX: # BB#0: 362; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1 363; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 364; AVX-NEXT: retq 365 %1 = extractelement <4 x float> %b, i32 0 366 %2 = extractelement <4 x float> %a, i32 0 367 %div = fdiv float %2, %1 368 %div2 = fdiv float %2, %div 369 %3 = insertelement <4 x float> %a, float %div2, i32 0 370 ret <4 x float> %3 371} 372 373; With SSE4.1 or greater, the shuffles in the following tests may 374; be lowered to X86Blendi nodes. 375 376define <4 x float> @blend_add_ss(<4 x float> %a, float %b) { 377; SSE-LABEL: blend_add_ss: 378; SSE: # BB#0: 379; SSE-NEXT: addss %xmm1, %xmm0 380; SSE-NEXT: retq 381; 382; AVX-LABEL: blend_add_ss: 383; AVX: # BB#0: 384; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 385; AVX-NEXT: retq 386 387 %ext = extractelement <4 x float> %a, i32 0 388 %op = fadd float %b, %ext 389 %ins = insertelement <4 x float> undef, float %op, i32 0 390 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 391 ret <4 x float> %shuf 392} 393 394define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) { 395; SSE-LABEL: blend_sub_ss: 396; SSE: # BB#0: 397; SSE-NEXT: subss %xmm1, %xmm0 398; SSE-NEXT: retq 399; 400; AVX-LABEL: blend_sub_ss: 401; AVX: # BB#0: 402; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 403; AVX-NEXT: retq 404 405 %ext = extractelement <4 x float> %a, i32 0 406 %op = fsub float %ext, %b 407 %ins = insertelement <4 x float> undef, float %op, i32 0 408 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 409 ret <4 x float> %shuf 410} 411 412define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) { 413; SSE-LABEL: blend_mul_ss: 414; SSE: # BB#0: 415; SSE-NEXT: mulss %xmm1, %xmm0 416; SSE-NEXT: retq 417; 418; AVX-LABEL: blend_mul_ss: 419; AVX: # BB#0: 420; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 421; AVX-NEXT: retq 422 423 %ext = extractelement <4 x float> %a, i32 0 424 %op = fmul float %b, %ext 425 %ins = insertelement <4 x float> undef, float %op, i32 0 426 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 427 ret <4 x float> %shuf 428} 429 430define <4 x float> @blend_div_ss(<4 x float> %a, float %b) { 431; SSE-LABEL: blend_div_ss: 432; SSE: # BB#0: 433; SSE-NEXT: divss %xmm1, %xmm0 434; SSE-NEXT: retq 435; 436; AVX-LABEL: blend_div_ss: 437; AVX: # BB#0: 438; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 439; AVX-NEXT: retq 440 441 %ext = extractelement <4 x float> %a, i32 0 442 %op = fdiv float %ext, %b 443 %ins = insertelement <4 x float> undef, float %op, i32 0 444 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 445 ret <4 x float> %shuf 446} 447 448define <2 x double> @blend_add_sd(<2 x double> %a, double %b) { 449; SSE-LABEL: blend_add_sd: 450; SSE: # BB#0: 451; SSE-NEXT: addsd %xmm1, %xmm0 452; SSE-NEXT: retq 453; 454; AVX-LABEL: blend_add_sd: 455; AVX: # BB#0: 456; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 457; AVX-NEXT: retq 458 459 %ext = extractelement <2 x double> %a, i32 0 460 %op = fadd double %b, %ext 461 %ins = insertelement <2 x double> undef, double %op, i32 0 462 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 463 ret <2 x double> %shuf 464} 465 466define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) { 467; SSE-LABEL: blend_sub_sd: 468; SSE: # BB#0: 469; SSE-NEXT: subsd %xmm1, %xmm0 470; SSE-NEXT: retq 471; 472; AVX-LABEL: blend_sub_sd: 473; AVX: # BB#0: 474; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 475; AVX-NEXT: retq 476 477 %ext = extractelement <2 x double> %a, i32 0 478 %op = fsub double %ext, %b 479 %ins = insertelement <2 x double> undef, double %op, i32 0 480 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 481 ret <2 x double> %shuf 482} 483 484define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) { 485; SSE-LABEL: blend_mul_sd: 486; SSE: # BB#0: 487; SSE-NEXT: mulsd %xmm1, %xmm0 488; SSE-NEXT: retq 489; 490; AVX-LABEL: blend_mul_sd: 491; AVX: # BB#0: 492; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 493; AVX-NEXT: retq 494 495 %ext = extractelement <2 x double> %a, i32 0 496 %op = fmul double %b, %ext 497 %ins = insertelement <2 x double> undef, double %op, i32 0 498 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 499 ret <2 x double> %shuf 500} 501 502define <2 x double> @blend_div_sd(<2 x double> %a, double %b) { 503; SSE-LABEL: blend_div_sd: 504; SSE: # BB#0: 505; SSE-NEXT: divsd %xmm1, %xmm0 506; SSE-NEXT: retq 507; 508; AVX-LABEL: blend_div_sd: 509; AVX: # BB#0: 510; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 511; AVX-NEXT: retq 512 513 %ext = extractelement <2 x double> %a, i32 0 514 %op = fdiv double %ext, %b 515 %ins = insertelement <2 x double> undef, double %op, i32 0 516 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 517 ret <2 x double> %shuf 518} 519 520; Ensure that the backend selects SSE/AVX scalar fp instructions 521; from a packed fp instruction plus a vector insert. 522 523define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { 524; SSE-LABEL: insert_test_add_ss: 525; SSE: # BB#0: 526; SSE-NEXT: addss %xmm1, %xmm0 527; SSE-NEXT: retq 528; 529; AVX-LABEL: insert_test_add_ss: 530; AVX: # BB#0: 531; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 532; AVX-NEXT: retq 533 %1 = fadd <4 x float> %a, %b 534 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 535 ret <4 x float> %2 536} 537 538define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) { 539; SSE-LABEL: insert_test_sub_ss: 540; SSE: # BB#0: 541; SSE-NEXT: subss %xmm1, %xmm0 542; SSE-NEXT: retq 543; 544; AVX-LABEL: insert_test_sub_ss: 545; AVX: # BB#0: 546; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 547; AVX-NEXT: retq 548 %1 = fsub <4 x float> %a, %b 549 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 550 ret <4 x float> %2 551} 552 553define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) { 554; SSE-LABEL: insert_test_mul_ss: 555; SSE: # BB#0: 556; SSE-NEXT: mulss %xmm1, %xmm0 557; SSE-NEXT: retq 558; 559; AVX-LABEL: insert_test_mul_ss: 560; AVX: # BB#0: 561; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 562; AVX-NEXT: retq 563 %1 = fmul <4 x float> %a, %b 564 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 565 ret <4 x float> %2 566} 567 568define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) { 569; SSE-LABEL: insert_test_div_ss: 570; SSE: # BB#0: 571; SSE-NEXT: divss %xmm1, %xmm0 572; SSE-NEXT: retq 573; 574; AVX-LABEL: insert_test_div_ss: 575; AVX: # BB#0: 576; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 577; AVX-NEXT: retq 578 %1 = fdiv <4 x float> %a, %b 579 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 580 ret <4 x float> %2 581} 582 583define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) { 584; SSE-LABEL: insert_test_add_sd: 585; SSE: # BB#0: 586; SSE-NEXT: addsd %xmm1, %xmm0 587; SSE-NEXT: retq 588; 589; AVX-LABEL: insert_test_add_sd: 590; AVX: # BB#0: 591; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 592; AVX-NEXT: retq 593 %1 = fadd <2 x double> %a, %b 594 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 595 ret <2 x double> %2 596} 597 598define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) { 599; SSE-LABEL: insert_test_sub_sd: 600; SSE: # BB#0: 601; SSE-NEXT: subsd %xmm1, %xmm0 602; SSE-NEXT: retq 603; 604; AVX-LABEL: insert_test_sub_sd: 605; AVX: # BB#0: 606; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 607; AVX-NEXT: retq 608 %1 = fsub <2 x double> %a, %b 609 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 610 ret <2 x double> %2 611} 612 613define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) { 614; SSE-LABEL: insert_test_mul_sd: 615; SSE: # BB#0: 616; SSE-NEXT: mulsd %xmm1, %xmm0 617; SSE-NEXT: retq 618; 619; AVX-LABEL: insert_test_mul_sd: 620; AVX: # BB#0: 621; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 622; AVX-NEXT: retq 623 %1 = fmul <2 x double> %a, %b 624 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 625 ret <2 x double> %2 626} 627 628define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) { 629; SSE-LABEL: insert_test_div_sd: 630; SSE: # BB#0: 631; SSE-NEXT: divsd %xmm1, %xmm0 632; SSE-NEXT: retq 633; 634; AVX-LABEL: insert_test_div_sd: 635; AVX: # BB#0: 636; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 637; AVX-NEXT: retq 638 %1 = fdiv <2 x double> %a, %b 639 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 640 ret <2 x double> %2 641} 642 643define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) { 644; SSE-LABEL: insert_test2_add_ss: 645; SSE: # BB#0: 646; SSE-NEXT: addss %xmm0, %xmm1 647; SSE-NEXT: movaps %xmm1, %xmm0 648; SSE-NEXT: retq 649; 650; AVX-LABEL: insert_test2_add_ss: 651; AVX: # BB#0: 652; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 653; AVX-NEXT: retq 654 %1 = fadd <4 x float> %b, %a 655 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 656 ret <4 x float> %2 657} 658 659define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) { 660; SSE-LABEL: insert_test2_sub_ss: 661; SSE: # BB#0: 662; SSE-NEXT: subss %xmm0, %xmm1 663; SSE-NEXT: movaps %xmm1, %xmm0 664; SSE-NEXT: retq 665; 666; AVX-LABEL: insert_test2_sub_ss: 667; AVX: # BB#0: 668; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 669; AVX-NEXT: retq 670 %1 = fsub <4 x float> %b, %a 671 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 672 ret <4 x float> %2 673} 674 675define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) { 676; SSE-LABEL: insert_test2_mul_ss: 677; SSE: # BB#0: 678; SSE-NEXT: mulss %xmm0, %xmm1 679; SSE-NEXT: movaps %xmm1, %xmm0 680; SSE-NEXT: retq 681; 682; AVX-LABEL: insert_test2_mul_ss: 683; AVX: # BB#0: 684; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 685; AVX-NEXT: retq 686 %1 = fmul <4 x float> %b, %a 687 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 688 ret <4 x float> %2 689} 690 691define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) { 692; SSE-LABEL: insert_test2_div_ss: 693; SSE: # BB#0: 694; SSE-NEXT: divss %xmm0, %xmm1 695; SSE-NEXT: movaps %xmm1, %xmm0 696; SSE-NEXT: retq 697; 698; AVX-LABEL: insert_test2_div_ss: 699; AVX: # BB#0: 700; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 701; AVX-NEXT: retq 702 %1 = fdiv <4 x float> %b, %a 703 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 704 ret <4 x float> %2 705} 706 707define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) { 708; SSE-LABEL: insert_test2_add_sd: 709; SSE: # BB#0: 710; SSE-NEXT: addsd %xmm0, %xmm1 711; SSE-NEXT: movaps %xmm1, %xmm0 712; SSE-NEXT: retq 713; 714; AVX-LABEL: insert_test2_add_sd: 715; AVX: # BB#0: 716; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 717; AVX-NEXT: retq 718 %1 = fadd <2 x double> %b, %a 719 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 720 ret <2 x double> %2 721} 722 723define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) { 724; SSE-LABEL: insert_test2_sub_sd: 725; SSE: # BB#0: 726; SSE-NEXT: subsd %xmm0, %xmm1 727; SSE-NEXT: movaps %xmm1, %xmm0 728; SSE-NEXT: retq 729; 730; AVX-LABEL: insert_test2_sub_sd: 731; AVX: # BB#0: 732; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 733; AVX-NEXT: retq 734 %1 = fsub <2 x double> %b, %a 735 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 736 ret <2 x double> %2 737} 738 739define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) { 740; SSE-LABEL: insert_test2_mul_sd: 741; SSE: # BB#0: 742; SSE-NEXT: mulsd %xmm0, %xmm1 743; SSE-NEXT: movaps %xmm1, %xmm0 744; SSE-NEXT: retq 745; 746; AVX-LABEL: insert_test2_mul_sd: 747; AVX: # BB#0: 748; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 749; AVX-NEXT: retq 750 %1 = fmul <2 x double> %b, %a 751 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 752 ret <2 x double> %2 753} 754 755define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) { 756; SSE-LABEL: insert_test2_div_sd: 757; SSE: # BB#0: 758; SSE-NEXT: divsd %xmm0, %xmm1 759; SSE-NEXT: movaps %xmm1, %xmm0 760; SSE-NEXT: retq 761; 762; AVX-LABEL: insert_test2_div_sd: 763; AVX: # BB#0: 764; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 765; AVX-NEXT: retq 766 %1 = fdiv <2 x double> %b, %a 767 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 768 ret <2 x double> %2 769} 770 771define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) { 772; SSE-LABEL: insert_test3_add_ss: 773; SSE: # BB#0: 774; SSE-NEXT: addss %xmm1, %xmm0 775; SSE-NEXT: retq 776; 777; AVX-LABEL: insert_test3_add_ss: 778; AVX: # BB#0: 779; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 780; AVX-NEXT: retq 781 %1 = fadd <4 x float> %a, %b 782 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 783 ret <4 x float> %2 784} 785 786define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) { 787; SSE-LABEL: insert_test3_sub_ss: 788; SSE: # BB#0: 789; SSE-NEXT: subss %xmm1, %xmm0 790; SSE-NEXT: retq 791; 792; AVX-LABEL: insert_test3_sub_ss: 793; AVX: # BB#0: 794; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 795; AVX-NEXT: retq 796 %1 = fsub <4 x float> %a, %b 797 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 798 ret <4 x float> %2 799} 800 801define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) { 802; SSE-LABEL: insert_test3_mul_ss: 803; SSE: # BB#0: 804; SSE-NEXT: mulss %xmm1, %xmm0 805; SSE-NEXT: retq 806; 807; AVX-LABEL: insert_test3_mul_ss: 808; AVX: # BB#0: 809; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 810; AVX-NEXT: retq 811 %1 = fmul <4 x float> %a, %b 812 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 813 ret <4 x float> %2 814} 815 816define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) { 817; SSE-LABEL: insert_test3_div_ss: 818; SSE: # BB#0: 819; SSE-NEXT: divss %xmm1, %xmm0 820; SSE-NEXT: retq 821; 822; AVX-LABEL: insert_test3_div_ss: 823; AVX: # BB#0: 824; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 825; AVX-NEXT: retq 826 %1 = fdiv <4 x float> %a, %b 827 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 828 ret <4 x float> %2 829} 830 831define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) { 832; SSE-LABEL: insert_test3_add_sd: 833; SSE: # BB#0: 834; SSE-NEXT: addsd %xmm1, %xmm0 835; SSE-NEXT: retq 836; 837; AVX-LABEL: insert_test3_add_sd: 838; AVX: # BB#0: 839; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 840; AVX-NEXT: retq 841 %1 = fadd <2 x double> %a, %b 842 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 843 ret <2 x double> %2 844} 845 846define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) { 847; SSE-LABEL: insert_test3_sub_sd: 848; SSE: # BB#0: 849; SSE-NEXT: subsd %xmm1, %xmm0 850; SSE-NEXT: retq 851; 852; AVX-LABEL: insert_test3_sub_sd: 853; AVX: # BB#0: 854; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 855; AVX-NEXT: retq 856 %1 = fsub <2 x double> %a, %b 857 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 858 ret <2 x double> %2 859} 860 861define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) { 862; SSE-LABEL: insert_test3_mul_sd: 863; SSE: # BB#0: 864; SSE-NEXT: mulsd %xmm1, %xmm0 865; SSE-NEXT: retq 866; 867; AVX-LABEL: insert_test3_mul_sd: 868; AVX: # BB#0: 869; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 870; AVX-NEXT: retq 871 %1 = fmul <2 x double> %a, %b 872 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 873 ret <2 x double> %2 874} 875 876define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) { 877; SSE-LABEL: insert_test3_div_sd: 878; SSE: # BB#0: 879; SSE-NEXT: divsd %xmm1, %xmm0 880; SSE-NEXT: retq 881; 882; AVX-LABEL: insert_test3_div_sd: 883; AVX: # BB#0: 884; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 885; AVX-NEXT: retq 886 %1 = fdiv <2 x double> %a, %b 887 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 888 ret <2 x double> %2 889} 890 891define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) { 892; SSE-LABEL: insert_test4_add_ss: 893; SSE: # BB#0: 894; SSE-NEXT: addss %xmm0, %xmm1 895; SSE-NEXT: movaps %xmm1, %xmm0 896; SSE-NEXT: retq 897; 898; AVX-LABEL: insert_test4_add_ss: 899; AVX: # BB#0: 900; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 901; AVX-NEXT: retq 902 %1 = fadd <4 x float> %b, %a 903 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 904 ret <4 x float> %2 905} 906 907define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) { 908; SSE-LABEL: insert_test4_sub_ss: 909; SSE: # BB#0: 910; SSE-NEXT: subss %xmm0, %xmm1 911; SSE-NEXT: movaps %xmm1, %xmm0 912; SSE-NEXT: retq 913; 914; AVX-LABEL: insert_test4_sub_ss: 915; AVX: # BB#0: 916; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 917; AVX-NEXT: retq 918 %1 = fsub <4 x float> %b, %a 919 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 920 ret <4 x float> %2 921} 922 923define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) { 924; SSE-LABEL: insert_test4_mul_ss: 925; SSE: # BB#0: 926; SSE-NEXT: mulss %xmm0, %xmm1 927; SSE-NEXT: movaps %xmm1, %xmm0 928; SSE-NEXT: retq 929; 930; AVX-LABEL: insert_test4_mul_ss: 931; AVX: # BB#0: 932; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 933; AVX-NEXT: retq 934 %1 = fmul <4 x float> %b, %a 935 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 936 ret <4 x float> %2 937} 938 939define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) { 940; SSE-LABEL: insert_test4_div_ss: 941; SSE: # BB#0: 942; SSE-NEXT: divss %xmm0, %xmm1 943; SSE-NEXT: movaps %xmm1, %xmm0 944; SSE-NEXT: retq 945; 946; AVX-LABEL: insert_test4_div_ss: 947; AVX: # BB#0: 948; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 949; AVX-NEXT: retq 950 %1 = fdiv <4 x float> %b, %a 951 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 952 ret <4 x float> %2 953} 954 955define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) { 956; SSE-LABEL: insert_test4_add_sd: 957; SSE: # BB#0: 958; SSE-NEXT: addsd %xmm0, %xmm1 959; SSE-NEXT: movaps %xmm1, %xmm0 960; SSE-NEXT: retq 961; 962; AVX-LABEL: insert_test4_add_sd: 963; AVX: # BB#0: 964; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 965; AVX-NEXT: retq 966 %1 = fadd <2 x double> %b, %a 967 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 968 ret <2 x double> %2 969} 970 971define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) { 972; SSE-LABEL: insert_test4_sub_sd: 973; SSE: # BB#0: 974; SSE-NEXT: subsd %xmm0, %xmm1 975; SSE-NEXT: movaps %xmm1, %xmm0 976; SSE-NEXT: retq 977; 978; AVX-LABEL: insert_test4_sub_sd: 979; AVX: # BB#0: 980; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 981; AVX-NEXT: retq 982 %1 = fsub <2 x double> %b, %a 983 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 984 ret <2 x double> %2 985} 986 987define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) { 988; SSE-LABEL: insert_test4_mul_sd: 989; SSE: # BB#0: 990; SSE-NEXT: mulsd %xmm0, %xmm1 991; SSE-NEXT: movaps %xmm1, %xmm0 992; SSE-NEXT: retq 993; 994; AVX-LABEL: insert_test4_mul_sd: 995; AVX: # BB#0: 996; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 997; AVX-NEXT: retq 998 %1 = fmul <2 x double> %b, %a 999 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1000 ret <2 x double> %2 1001} 1002 1003define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) { 1004; SSE-LABEL: insert_test4_div_sd: 1005; SSE: # BB#0: 1006; SSE-NEXT: divsd %xmm0, %xmm1 1007; SSE-NEXT: movaps %xmm1, %xmm0 1008; SSE-NEXT: retq 1009; 1010; AVX-LABEL: insert_test4_div_sd: 1011; AVX: # BB#0: 1012; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 1013; AVX-NEXT: retq 1014 %1 = fdiv <2 x double> %b, %a 1015 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1016 ret <2 x double> %2 1017} 1018