1; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE 2; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX 3 4; Incremental updates of the instruction depths should be enough for this test 5; case. 6; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE 7; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=AVX 8 9; Verify that the first two adds are independent regardless of how the inputs are 10; commuted. The destination registers are used as source registers for the third add. 11 12define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { 13; SSE-LABEL: reassociate_adds1: 14; SSE: # %bb.0: 15; SSE-NEXT: addss %xmm1, %xmm0 16; SSE-NEXT: addss %xmm3, %xmm2 17; SSE-NEXT: addss %xmm2, %xmm0 18; SSE-NEXT: retq 19; 20; AVX-LABEL: reassociate_adds1: 21; AVX: # %bb.0: 22; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 23; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 24; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 25; AVX-NEXT: retq 26 %t0 = fadd float %x0, %x1 27 %t1 = fadd float %t0, %x2 28 %t2 = fadd float %t1, %x3 29 ret float %t2 30} 31 32define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { 33; SSE-LABEL: reassociate_adds2: 34; SSE: # %bb.0: 35; SSE-NEXT: addss %xmm1, %xmm0 36; SSE-NEXT: addss %xmm3, %xmm2 37; SSE-NEXT: addss %xmm2, %xmm0 38; SSE-NEXT: retq 39; 40; AVX-LABEL: reassociate_adds2: 41; AVX: # %bb.0: 42; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 43; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 44; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 45; AVX-NEXT: retq 46 %t0 = fadd float %x0, %x1 47 %t1 = fadd float %x2, %t0 48 %t2 = fadd float %t1, %x3 49 ret float %t2 50} 51 52define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { 53; SSE-LABEL: reassociate_adds3: 54; SSE: # %bb.0: 55; SSE-NEXT: addss %xmm1, %xmm0 56; SSE-NEXT: addss %xmm3, %xmm2 57; SSE-NEXT: addss %xmm2, %xmm0 58; SSE-NEXT: retq 59; 60; AVX-LABEL: reassociate_adds3: 61; AVX: # %bb.0: 62; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 63; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 64; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 65; AVX-NEXT: retq 66 %t0 = fadd float %x0, %x1 67 %t1 = fadd float %t0, %x2 68 %t2 = fadd float %x3, %t1 69 ret float %t2 70} 71 72define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { 73; SSE-LABEL: reassociate_adds4: 74; SSE: # %bb.0: 75; SSE-NEXT: addss %xmm1, %xmm0 76; SSE-NEXT: addss %xmm3, %xmm2 77; SSE-NEXT: addss %xmm2, %xmm0 78; SSE-NEXT: retq 79; 80; AVX-LABEL: reassociate_adds4: 81; AVX: # %bb.0: 82; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 83; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 84; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 85; AVX-NEXT: retq 86 %t0 = fadd float %x0, %x1 87 %t1 = fadd float %x2, %t0 88 %t2 = fadd float %x3, %t1 89 ret float %t2 90} 91 92; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not 93; produced because that would cost more compile time. 94 95define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { 96; SSE-LABEL: reassociate_adds5: 97; SSE: # %bb.0: 98; SSE-NEXT: addss %xmm1, %xmm0 99; SSE-NEXT: addss %xmm3, %xmm2 100; SSE-NEXT: addss %xmm2, %xmm0 101; SSE-NEXT: addss %xmm5, %xmm4 102; SSE-NEXT: addss %xmm6, %xmm4 103; SSE-NEXT: addss %xmm4, %xmm0 104; SSE-NEXT: addss %xmm7, %xmm0 105; SSE-NEXT: retq 106; 107; AVX-LABEL: reassociate_adds5: 108; AVX: # %bb.0: 109; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 110; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 111; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 112; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1 113; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1 114; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 115; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0 116; AVX-NEXT: retq 117 %t0 = fadd float %x0, %x1 118 %t1 = fadd float %t0, %x2 119 %t2 = fadd float %t1, %x3 120 %t3 = fadd float %t2, %x4 121 %t4 = fadd float %t3, %x5 122 %t5 = fadd float %t4, %x6 123 %t6 = fadd float %t5, %x7 124 ret float %t6 125} 126 127; Verify that we only need two associative operations to reassociate the operands. 128; Also, we should reassociate such that the result of the high latency division 129; is used by the final 'add' rather than reassociating the %x3 operand with the 130; division. The latter reassociation would not improve anything. 131 132define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { 133; SSE-LABEL: reassociate_adds6: 134; SSE: # %bb.0: 135; SSE-NEXT: divss %xmm1, %xmm0 136; SSE-NEXT: addss %xmm3, %xmm2 137; SSE-NEXT: addss %xmm2, %xmm0 138; SSE-NEXT: retq 139; 140; AVX-LABEL: reassociate_adds6: 141; AVX: # %bb.0: 142; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 143; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 144; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 145; AVX-NEXT: retq 146 %t0 = fdiv float %x0, %x1 147 %t1 = fadd float %x2, %t0 148 %t2 = fadd float %x3, %t1 149 ret float %t2 150} 151 152; Verify that SSE and AVX scalar single-precision multiplies are reassociated. 153 154define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { 155; SSE-LABEL: reassociate_muls1: 156; SSE: # %bb.0: 157; SSE-NEXT: divss %xmm1, %xmm0 158; SSE-NEXT: mulss %xmm3, %xmm2 159; SSE-NEXT: mulss %xmm2, %xmm0 160; SSE-NEXT: retq 161; 162; AVX-LABEL: reassociate_muls1: 163; AVX: # %bb.0: 164; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 165; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1 166; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 167; AVX-NEXT: retq 168 %t0 = fdiv float %x0, %x1 169 %t1 = fmul float %x2, %t0 170 %t2 = fmul float %x3, %t1 171 ret float %t2 172} 173 174; Verify that SSE and AVX scalar double-precision adds are reassociated. 175 176define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { 177; SSE-LABEL: reassociate_adds_double: 178; SSE: # %bb.0: 179; SSE-NEXT: divsd %xmm1, %xmm0 180; SSE-NEXT: addsd %xmm3, %xmm2 181; SSE-NEXT: addsd %xmm2, %xmm0 182; SSE-NEXT: retq 183; 184; AVX-LABEL: reassociate_adds_double: 185; AVX: # %bb.0: 186; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 187; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1 188; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 189; AVX-NEXT: retq 190 %t0 = fdiv double %x0, %x1 191 %t1 = fadd double %x2, %t0 192 %t2 = fadd double %x3, %t1 193 ret double %t2 194} 195 196; Verify that SSE and AVX scalar double-precision multiplies are reassociated. 197 198define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { 199; SSE-LABEL: reassociate_muls_double: 200; SSE: # %bb.0: 201; SSE-NEXT: divsd %xmm1, %xmm0 202; SSE-NEXT: mulsd %xmm3, %xmm2 203; SSE-NEXT: mulsd %xmm2, %xmm0 204; SSE-NEXT: retq 205; 206; AVX-LABEL: reassociate_muls_double: 207; AVX: # %bb.0: 208; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 209; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1 210; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 211; AVX-NEXT: retq 212 %t0 = fdiv double %x0, %x1 213 %t1 = fmul double %x2, %t0 214 %t2 = fmul double %x3, %t1 215 ret double %t2 216} 217 218; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated. 219 220define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 221; SSE-LABEL: reassociate_adds_v4f32: 222; SSE: # %bb.0: 223; SSE-NEXT: mulps %xmm1, %xmm0 224; SSE-NEXT: addps %xmm3, %xmm2 225; SSE-NEXT: addps %xmm2, %xmm0 226; SSE-NEXT: retq 227; 228; AVX-LABEL: reassociate_adds_v4f32: 229; AVX: # %bb.0: 230; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 231; AVX-NEXT: vaddps %xmm3, %xmm2, %xmm1 232; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 233; AVX-NEXT: retq 234 %t0 = fmul <4 x float> %x0, %x1 235 %t1 = fadd <4 x float> %x2, %t0 236 %t2 = fadd <4 x float> %x3, %t1 237 ret <4 x float> %t2 238} 239 240; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated. 241 242define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 243; SSE-LABEL: reassociate_adds_v2f64: 244; SSE: # %bb.0: 245; SSE-NEXT: mulpd %xmm1, %xmm0 246; SSE-NEXT: addpd %xmm3, %xmm2 247; SSE-NEXT: addpd %xmm2, %xmm0 248; SSE-NEXT: retq 249; 250; AVX-LABEL: reassociate_adds_v2f64: 251; AVX: # %bb.0: 252; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 253; AVX-NEXT: vaddpd %xmm3, %xmm2, %xmm1 254; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 255; AVX-NEXT: retq 256 %t0 = fmul <2 x double> %x0, %x1 257 %t1 = fadd <2 x double> %x2, %t0 258 %t2 = fadd <2 x double> %x3, %t1 259 ret <2 x double> %t2 260} 261 262; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated. 263 264define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 265; SSE-LABEL: reassociate_muls_v4f32: 266; SSE: # %bb.0: 267; SSE-NEXT: addps %xmm1, %xmm0 268; SSE-NEXT: mulps %xmm3, %xmm2 269; SSE-NEXT: mulps %xmm2, %xmm0 270; SSE-NEXT: retq 271; 272; AVX-LABEL: reassociate_muls_v4f32: 273; AVX: # %bb.0: 274; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 275; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1 276; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 277; AVX-NEXT: retq 278 %t0 = fadd <4 x float> %x0, %x1 279 %t1 = fmul <4 x float> %x2, %t0 280 %t2 = fmul <4 x float> %x3, %t1 281 ret <4 x float> %t2 282} 283 284; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated. 285 286define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 287; SSE-LABEL: reassociate_muls_v2f64: 288; SSE: # %bb.0: 289; SSE-NEXT: addpd %xmm1, %xmm0 290; SSE-NEXT: mulpd %xmm3, %xmm2 291; SSE-NEXT: mulpd %xmm2, %xmm0 292; SSE-NEXT: retq 293; 294; AVX-LABEL: reassociate_muls_v2f64: 295; AVX: # %bb.0: 296; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 297; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1 298; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 299; AVX-NEXT: retq 300 %t0 = fadd <2 x double> %x0, %x1 301 %t1 = fmul <2 x double> %x2, %t0 302 %t2 = fmul <2 x double> %x3, %t1 303 ret <2 x double> %t2 304} 305 306; Verify that AVX 256-bit vector single-precision adds are reassociated. 307 308define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 309; AVX-LABEL: reassociate_adds_v8f32: 310; AVX: # %bb.0: 311; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 312; AVX-NEXT: vaddps %ymm3, %ymm2, %ymm1 313; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 314; AVX-NEXT: retq 315 %t0 = fmul <8 x float> %x0, %x1 316 %t1 = fadd <8 x float> %x2, %t0 317 %t2 = fadd <8 x float> %x3, %t1 318 ret <8 x float> %t2 319} 320 321; Verify that AVX 256-bit vector double-precision adds are reassociated. 322 323define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 324; AVX-LABEL: reassociate_adds_v4f64: 325; AVX: # %bb.0: 326; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 327; AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm1 328; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 329; AVX-NEXT: retq 330 %t0 = fmul <4 x double> %x0, %x1 331 %t1 = fadd <4 x double> %x2, %t0 332 %t2 = fadd <4 x double> %x3, %t1 333 ret <4 x double> %t2 334} 335 336; Verify that AVX 256-bit vector single-precision multiplies are reassociated. 337 338define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 339; AVX-LABEL: reassociate_muls_v8f32: 340; AVX: # %bb.0: 341; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 342; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1 343; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 344; AVX-NEXT: retq 345 %t0 = fadd <8 x float> %x0, %x1 346 %t1 = fmul <8 x float> %x2, %t0 347 %t2 = fmul <8 x float> %x3, %t1 348 ret <8 x float> %t2 349} 350 351; Verify that AVX 256-bit vector double-precision multiplies are reassociated. 352 353define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 354; AVX-LABEL: reassociate_muls_v4f64: 355; AVX: # %bb.0: 356; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 357; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1 358; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 359; AVX-NEXT: retq 360 %t0 = fadd <4 x double> %x0, %x1 361 %t1 = fmul <4 x double> %x2, %t0 362 %t2 = fmul <4 x double> %x3, %t1 363 ret <4 x double> %t2 364} 365 366; Verify that SSE and AVX scalar single-precision minimum ops are reassociated. 367 368define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) { 369; SSE-LABEL: reassociate_mins_single: 370; SSE: # %bb.0: 371; SSE-NEXT: divss %xmm1, %xmm0 372; SSE-NEXT: minss %xmm3, %xmm2 373; SSE-NEXT: minss %xmm2, %xmm0 374; SSE-NEXT: retq 375; 376; AVX-LABEL: reassociate_mins_single: 377; AVX: # %bb.0: 378; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 379; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1 380; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 381; AVX-NEXT: retq 382 %t0 = fdiv float %x0, %x1 383 %cmp1 = fcmp olt float %x2, %t0 384 %sel1 = select i1 %cmp1, float %x2, float %t0 385 %cmp2 = fcmp olt float %x3, %sel1 386 %sel2 = select i1 %cmp2, float %x3, float %sel1 387 ret float %sel2 388} 389 390; Verify that SSE and AVX scalar single-precision maximum ops are reassociated. 391 392define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) { 393; SSE-LABEL: reassociate_maxs_single: 394; SSE: # %bb.0: 395; SSE-NEXT: divss %xmm1, %xmm0 396; SSE-NEXT: maxss %xmm3, %xmm2 397; SSE-NEXT: maxss %xmm2, %xmm0 398; SSE-NEXT: retq 399; 400; AVX-LABEL: reassociate_maxs_single: 401; AVX: # %bb.0: 402; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 403; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1 404; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 405; AVX-NEXT: retq 406 %t0 = fdiv float %x0, %x1 407 %cmp1 = fcmp ogt float %x2, %t0 408 %sel1 = select i1 %cmp1, float %x2, float %t0 409 %cmp2 = fcmp ogt float %x3, %sel1 410 %sel2 = select i1 %cmp2, float %x3, float %sel1 411 ret float %sel2 412} 413 414; Verify that SSE and AVX scalar double-precision minimum ops are reassociated. 415 416define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) { 417; SSE-LABEL: reassociate_mins_double: 418; SSE: # %bb.0: 419; SSE-NEXT: divsd %xmm1, %xmm0 420; SSE-NEXT: minsd %xmm3, %xmm2 421; SSE-NEXT: minsd %xmm2, %xmm0 422; SSE-NEXT: retq 423; 424; AVX-LABEL: reassociate_mins_double: 425; AVX: # %bb.0: 426; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 427; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1 428; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 429; AVX-NEXT: retq 430 %t0 = fdiv double %x0, %x1 431 %cmp1 = fcmp olt double %x2, %t0 432 %sel1 = select i1 %cmp1, double %x2, double %t0 433 %cmp2 = fcmp olt double %x3, %sel1 434 %sel2 = select i1 %cmp2, double %x3, double %sel1 435 ret double %sel2 436} 437 438; Verify that SSE and AVX scalar double-precision maximum ops are reassociated. 439 440define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) { 441; SSE-LABEL: reassociate_maxs_double: 442; SSE: # %bb.0: 443; SSE-NEXT: divsd %xmm1, %xmm0 444; SSE-NEXT: maxsd %xmm3, %xmm2 445; SSE-NEXT: maxsd %xmm2, %xmm0 446; SSE-NEXT: retq 447; 448; AVX-LABEL: reassociate_maxs_double: 449; AVX: # %bb.0: 450; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 451; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1 452; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 453; AVX-NEXT: retq 454 %t0 = fdiv double %x0, %x1 455 %cmp1 = fcmp ogt double %x2, %t0 456 %sel1 = select i1 %cmp1, double %x2, double %t0 457 %cmp2 = fcmp ogt double %x3, %sel1 458 %sel2 = select i1 %cmp2, double %x3, double %sel1 459 ret double %sel2 460} 461 462; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated. 463 464define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 465; SSE-LABEL: reassociate_mins_v4f32: 466; SSE: # %bb.0: 467; SSE-NEXT: addps %xmm1, %xmm0 468; SSE-NEXT: minps %xmm3, %xmm2 469; SSE-NEXT: minps %xmm2, %xmm0 470; SSE-NEXT: retq 471; 472; AVX-LABEL: reassociate_mins_v4f32: 473; AVX: # %bb.0: 474; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 475; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1 476; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 477; AVX-NEXT: retq 478 %t0 = fadd <4 x float> %x0, %x1 479 %cmp1 = fcmp olt <4 x float> %x2, %t0 480 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0 481 %cmp2 = fcmp olt <4 x float> %x3, %sel1 482 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1 483 ret <4 x float> %sel2 484} 485 486; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated. 487 488define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 489; SSE-LABEL: reassociate_maxs_v4f32: 490; SSE: # %bb.0: 491; SSE-NEXT: addps %xmm1, %xmm0 492; SSE-NEXT: maxps %xmm3, %xmm2 493; SSE-NEXT: maxps %xmm2, %xmm0 494; SSE-NEXT: retq 495; 496; AVX-LABEL: reassociate_maxs_v4f32: 497; AVX: # %bb.0: 498; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 499; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1 500; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 501; AVX-NEXT: retq 502 %t0 = fadd <4 x float> %x0, %x1 503 %cmp1 = fcmp ogt <4 x float> %x2, %t0 504 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0 505 %cmp2 = fcmp ogt <4 x float> %x3, %sel1 506 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1 507 ret <4 x float> %sel2 508} 509 510; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated. 511 512define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 513; SSE-LABEL: reassociate_mins_v2f64: 514; SSE: # %bb.0: 515; SSE-NEXT: addpd %xmm1, %xmm0 516; SSE-NEXT: minpd %xmm3, %xmm2 517; SSE-NEXT: minpd %xmm2, %xmm0 518; SSE-NEXT: retq 519; 520; AVX-LABEL: reassociate_mins_v2f64: 521; AVX: # %bb.0: 522; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 523; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1 524; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 525; AVX-NEXT: retq 526 %t0 = fadd <2 x double> %x0, %x1 527 %cmp1 = fcmp olt <2 x double> %x2, %t0 528 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0 529 %cmp2 = fcmp olt <2 x double> %x3, %sel1 530 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1 531 ret <2 x double> %sel2 532} 533 534; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated. 535 536define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 537; SSE-LABEL: reassociate_maxs_v2f64: 538; SSE: # %bb.0: 539; SSE-NEXT: addpd %xmm1, %xmm0 540; SSE-NEXT: maxpd %xmm3, %xmm2 541; SSE-NEXT: maxpd %xmm2, %xmm0 542; SSE-NEXT: retq 543; 544; AVX-LABEL: reassociate_maxs_v2f64: 545; AVX: # %bb.0: 546; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 547; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1 548; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 549; AVX-NEXT: retq 550 %t0 = fadd <2 x double> %x0, %x1 551 %cmp1 = fcmp ogt <2 x double> %x2, %t0 552 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0 553 %cmp2 = fcmp ogt <2 x double> %x3, %sel1 554 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1 555 ret <2 x double> %sel2 556} 557 558; Verify that AVX 256-bit vector single-precision minimum ops are reassociated. 559 560define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 561; AVX-LABEL: reassociate_mins_v8f32: 562; AVX: # %bb.0: 563; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 564; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1 565; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 566; AVX-NEXT: retq 567 %t0 = fadd <8 x float> %x0, %x1 568 %cmp1 = fcmp olt <8 x float> %x2, %t0 569 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0 570 %cmp2 = fcmp olt <8 x float> %x3, %sel1 571 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1 572 ret <8 x float> %sel2 573} 574 575; Verify that AVX 256-bit vector single-precision maximum ops are reassociated. 576 577define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 578; AVX-LABEL: reassociate_maxs_v8f32: 579; AVX: # %bb.0: 580; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 581; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1 582; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 583; AVX-NEXT: retq 584 %t0 = fadd <8 x float> %x0, %x1 585 %cmp1 = fcmp ogt <8 x float> %x2, %t0 586 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0 587 %cmp2 = fcmp ogt <8 x float> %x3, %sel1 588 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1 589 ret <8 x float> %sel2 590} 591 592; Verify that AVX 256-bit vector double-precision minimum ops are reassociated. 593 594define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 595; AVX-LABEL: reassociate_mins_v4f64: 596; AVX: # %bb.0: 597; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 598; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1 599; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 600; AVX-NEXT: retq 601 %t0 = fadd <4 x double> %x0, %x1 602 %cmp1 = fcmp olt <4 x double> %x2, %t0 603 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0 604 %cmp2 = fcmp olt <4 x double> %x3, %sel1 605 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1 606 ret <4 x double> %sel2 607} 608 609; Verify that AVX 256-bit vector double-precision maximum ops are reassociated. 610 611define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 612; AVX-LABEL: reassociate_maxs_v4f64: 613; AVX: # %bb.0: 614; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 615; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1 616; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 617; AVX-NEXT: retq 618 %t0 = fadd <4 x double> %x0, %x1 619 %cmp1 = fcmp ogt <4 x double> %x2, %t0 620 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0 621 %cmp2 = fcmp ogt <4 x double> %x3, %sel1 622 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1 623 ret <4 x double> %sel2 624} 625 626; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016 627; Verify that reassociation is not happening needlessly or wrongly. 628 629declare double @bar() 630 631define double @reassociate_adds_from_calls() { 632; AVX-LABEL: reassociate_adds_from_calls: 633; AVX: callq bar 634; AVX-NEXT: vmovsd %xmm0, 16(%rsp) 635; AVX-NEXT: callq bar 636; AVX-NEXT: vmovsd %xmm0, 8(%rsp) 637; AVX-NEXT: callq bar 638; AVX-NEXT: vmovsd %xmm0, (%rsp) 639; AVX-NEXT: callq bar 640; AVX-NEXT: vmovsd 8(%rsp), %xmm1 641; AVX: vaddsd 16(%rsp), %xmm1, %xmm1 642; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 643; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 644 645 %x0 = call double @bar() 646 %x1 = call double @bar() 647 %x2 = call double @bar() 648 %x3 = call double @bar() 649 %t0 = fadd double %x0, %x1 650 %t1 = fadd double %t0, %x2 651 %t2 = fadd double %t1, %x3 652 ret double %t2 653} 654 655define double @already_reassociated() { 656; AVX-LABEL: already_reassociated: 657; AVX: callq bar 658; AVX-NEXT: vmovsd %xmm0, 16(%rsp) 659; AVX-NEXT: callq bar 660; AVX-NEXT: vmovsd %xmm0, 8(%rsp) 661; AVX-NEXT: callq bar 662; AVX-NEXT: vmovsd %xmm0, (%rsp) 663; AVX-NEXT: callq bar 664; AVX-NEXT: vmovsd 8(%rsp), %xmm1 665; AVX: vaddsd 16(%rsp), %xmm1, %xmm1 666; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 667; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 668 669 %x0 = call double @bar() 670 %x1 = call double @bar() 671 %x2 = call double @bar() 672 %x3 = call double @bar() 673 %t0 = fadd double %x0, %x1 674 %t1 = fadd double %x2, %x3 675 %t2 = fadd double %t0, %t1 676 ret double %t2 677} 678 679