1; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE 2; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX 3 4; Verify that the first two adds are independent regardless of how the inputs are 5; commuted. The destination registers are used as source registers for the third add. 6 7define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { 8; SSE-LABEL: reassociate_adds1: 9; SSE: # BB#0: 10; SSE-NEXT: addss %xmm1, %xmm0 11; SSE-NEXT: addss %xmm3, %xmm2 12; SSE-NEXT: addss %xmm2, %xmm0 13; SSE-NEXT: retq 14; 15; AVX-LABEL: reassociate_adds1: 16; AVX: # BB#0: 17; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 18; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 19; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 20; AVX-NEXT: retq 21 %t0 = fadd float %x0, %x1 22 %t1 = fadd float %t0, %x2 23 %t2 = fadd float %t1, %x3 24 ret float %t2 25} 26 27define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { 28; SSE-LABEL: reassociate_adds2: 29; SSE: # BB#0: 30; SSE-NEXT: addss %xmm1, %xmm0 31; SSE-NEXT: addss %xmm3, %xmm2 32; SSE-NEXT: addss %xmm2, %xmm0 33; SSE-NEXT: retq 34; 35; AVX-LABEL: reassociate_adds2: 36; AVX: # BB#0: 37; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 38; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 39; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 40; AVX-NEXT: retq 41 %t0 = fadd float %x0, %x1 42 %t1 = fadd float %x2, %t0 43 %t2 = fadd float %t1, %x3 44 ret float %t2 45} 46 47define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { 48; SSE-LABEL: reassociate_adds3: 49; SSE: # BB#0: 50; SSE-NEXT: addss %xmm1, %xmm0 51; SSE-NEXT: addss %xmm3, %xmm2 52; SSE-NEXT: addss %xmm2, %xmm0 53; SSE-NEXT: retq 54; 55; AVX-LABEL: reassociate_adds3: 56; AVX: # BB#0: 57; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 58; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 59; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 60; AVX-NEXT: retq 61 %t0 = fadd float %x0, %x1 62 %t1 = fadd float %t0, %x2 63 %t2 = fadd float %x3, %t1 64 ret float %t2 65} 66 67define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { 68; SSE-LABEL: reassociate_adds4: 69; SSE: # BB#0: 70; SSE-NEXT: addss %xmm1, %xmm0 71; SSE-NEXT: addss %xmm3, %xmm2 72; SSE-NEXT: addss %xmm2, %xmm0 73; SSE-NEXT: retq 74; 75; AVX-LABEL: reassociate_adds4: 76; AVX: # BB#0: 77; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 78; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 79; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 80; AVX-NEXT: retq 81 %t0 = fadd float %x0, %x1 82 %t1 = fadd float %x2, %t0 83 %t2 = fadd float %x3, %t1 84 ret float %t2 85} 86 87; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not 88; produced because that would cost more compile time. 89 90define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { 91; SSE-LABEL: reassociate_adds5: 92; SSE: # BB#0: 93; SSE-NEXT: addss %xmm1, %xmm0 94; SSE-NEXT: addss %xmm3, %xmm2 95; SSE-NEXT: addss %xmm2, %xmm0 96; SSE-NEXT: addss %xmm5, %xmm4 97; SSE-NEXT: addss %xmm6, %xmm4 98; SSE-NEXT: addss %xmm4, %xmm0 99; SSE-NEXT: addss %xmm7, %xmm0 100; SSE-NEXT: retq 101; 102; AVX-LABEL: reassociate_adds5: 103; AVX: # BB#0: 104; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 105; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 106; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 107; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1 108; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1 109; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 110; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0 111; AVX-NEXT: retq 112 %t0 = fadd float %x0, %x1 113 %t1 = fadd float %t0, %x2 114 %t2 = fadd float %t1, %x3 115 %t3 = fadd float %t2, %x4 116 %t4 = fadd float %t3, %x5 117 %t5 = fadd float %t4, %x6 118 %t6 = fadd float %t5, %x7 119 ret float %t6 120} 121 122; Verify that we only need two associative operations to reassociate the operands. 123; Also, we should reassociate such that the result of the high latency division 124; is used by the final 'add' rather than reassociating the %x3 operand with the 125; division. The latter reassociation would not improve anything. 126 127define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { 128; SSE-LABEL: reassociate_adds6: 129; SSE: # BB#0: 130; SSE-NEXT: divss %xmm1, %xmm0 131; SSE-NEXT: addss %xmm3, %xmm2 132; SSE-NEXT: addss %xmm2, %xmm0 133; SSE-NEXT: retq 134; 135; AVX-LABEL: reassociate_adds6: 136; AVX: # BB#0: 137; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 138; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 139; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 140; AVX-NEXT: retq 141 %t0 = fdiv float %x0, %x1 142 %t1 = fadd float %x2, %t0 143 %t2 = fadd float %x3, %t1 144 ret float %t2 145} 146 147; Verify that SSE and AVX scalar single-precision multiplies are reassociated. 148 149define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { 150; SSE-LABEL: reassociate_muls1: 151; SSE: # BB#0: 152; SSE-NEXT: divss %xmm1, %xmm0 153; SSE-NEXT: mulss %xmm3, %xmm2 154; SSE-NEXT: mulss %xmm2, %xmm0 155; SSE-NEXT: retq 156; 157; AVX-LABEL: reassociate_muls1: 158; AVX: # BB#0: 159; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 160; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1 161; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 162; AVX-NEXT: retq 163 %t0 = fdiv float %x0, %x1 164 %t1 = fmul float %x2, %t0 165 %t2 = fmul float %x3, %t1 166 ret float %t2 167} 168 169; Verify that SSE and AVX scalar double-precision adds are reassociated. 170 171define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { 172; SSE-LABEL: reassociate_adds_double: 173; SSE: # BB#0: 174; SSE-NEXT: divsd %xmm1, %xmm0 175; SSE-NEXT: addsd %xmm3, %xmm2 176; SSE-NEXT: addsd %xmm2, %xmm0 177; SSE-NEXT: retq 178; 179; AVX-LABEL: reassociate_adds_double: 180; AVX: # BB#0: 181; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 182; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1 183; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 184; AVX-NEXT: retq 185 %t0 = fdiv double %x0, %x1 186 %t1 = fadd double %x2, %t0 187 %t2 = fadd double %x3, %t1 188 ret double %t2 189} 190 191; Verify that SSE and AVX scalar double-precision multiplies are reassociated. 192 193define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { 194; SSE-LABEL: reassociate_muls_double: 195; SSE: # BB#0: 196; SSE-NEXT: divsd %xmm1, %xmm0 197; SSE-NEXT: mulsd %xmm3, %xmm2 198; SSE-NEXT: mulsd %xmm2, %xmm0 199; SSE-NEXT: retq 200; 201; AVX-LABEL: reassociate_muls_double: 202; AVX: # BB#0: 203; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 204; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1 205; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 206; AVX-NEXT: retq 207 %t0 = fdiv double %x0, %x1 208 %t1 = fmul double %x2, %t0 209 %t2 = fmul double %x3, %t1 210 ret double %t2 211} 212 213; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated. 214 215define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 216; SSE-LABEL: reassociate_adds_v4f32: 217; SSE: # BB#0: 218; SSE-NEXT: mulps %xmm1, %xmm0 219; SSE-NEXT: addps %xmm3, %xmm2 220; SSE-NEXT: addps %xmm2, %xmm0 221; SSE-NEXT: retq 222; 223; AVX-LABEL: reassociate_adds_v4f32: 224; AVX: # BB#0: 225; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 226; AVX-NEXT: vaddps %xmm3, %xmm2, %xmm1 227; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 228; AVX-NEXT: retq 229 %t0 = fmul <4 x float> %x0, %x1 230 %t1 = fadd <4 x float> %x2, %t0 231 %t2 = fadd <4 x float> %x3, %t1 232 ret <4 x float> %t2 233} 234 235; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated. 236 237define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 238; SSE-LABEL: reassociate_adds_v2f64: 239; SSE: # BB#0: 240; SSE-NEXT: mulpd %xmm1, %xmm0 241; SSE-NEXT: addpd %xmm3, %xmm2 242; SSE-NEXT: addpd %xmm2, %xmm0 243; SSE-NEXT: retq 244; 245; AVX-LABEL: reassociate_adds_v2f64: 246; AVX: # BB#0: 247; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 248; AVX-NEXT: vaddpd %xmm3, %xmm2, %xmm1 249; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 250; AVX-NEXT: retq 251 %t0 = fmul <2 x double> %x0, %x1 252 %t1 = fadd <2 x double> %x2, %t0 253 %t2 = fadd <2 x double> %x3, %t1 254 ret <2 x double> %t2 255} 256 257; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated. 258 259define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 260; SSE-LABEL: reassociate_muls_v4f32: 261; SSE: # BB#0: 262; SSE-NEXT: addps %xmm1, %xmm0 263; SSE-NEXT: mulps %xmm3, %xmm2 264; SSE-NEXT: mulps %xmm2, %xmm0 265; SSE-NEXT: retq 266; 267; AVX-LABEL: reassociate_muls_v4f32: 268; AVX: # BB#0: 269; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 270; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1 271; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 272; AVX-NEXT: retq 273 %t0 = fadd <4 x float> %x0, %x1 274 %t1 = fmul <4 x float> %x2, %t0 275 %t2 = fmul <4 x float> %x3, %t1 276 ret <4 x float> %t2 277} 278 279; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated. 280 281define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 282; SSE-LABEL: reassociate_muls_v2f64: 283; SSE: # BB#0: 284; SSE-NEXT: addpd %xmm1, %xmm0 285; SSE-NEXT: mulpd %xmm3, %xmm2 286; SSE-NEXT: mulpd %xmm2, %xmm0 287; SSE-NEXT: retq 288; 289; AVX-LABEL: reassociate_muls_v2f64: 290; AVX: # BB#0: 291; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 292; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1 293; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 294; AVX-NEXT: retq 295 %t0 = fadd <2 x double> %x0, %x1 296 %t1 = fmul <2 x double> %x2, %t0 297 %t2 = fmul <2 x double> %x3, %t1 298 ret <2 x double> %t2 299} 300 301; Verify that AVX 256-bit vector single-precision adds are reassociated. 302 303define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 304; AVX-LABEL: reassociate_adds_v8f32: 305; AVX: # BB#0: 306; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 307; AVX-NEXT: vaddps %ymm3, %ymm2, %ymm1 308; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 309; AVX-NEXT: retq 310 %t0 = fmul <8 x float> %x0, %x1 311 %t1 = fadd <8 x float> %x2, %t0 312 %t2 = fadd <8 x float> %x3, %t1 313 ret <8 x float> %t2 314} 315 316; Verify that AVX 256-bit vector double-precision adds are reassociated. 317 318define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 319; AVX-LABEL: reassociate_adds_v4f64: 320; AVX: # BB#0: 321; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 322; AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm1 323; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 324; AVX-NEXT: retq 325 %t0 = fmul <4 x double> %x0, %x1 326 %t1 = fadd <4 x double> %x2, %t0 327 %t2 = fadd <4 x double> %x3, %t1 328 ret <4 x double> %t2 329} 330 331; Verify that AVX 256-bit vector single-precision multiplies are reassociated. 332 333define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 334; AVX-LABEL: reassociate_muls_v8f32: 335; AVX: # BB#0: 336; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 337; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1 338; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 339; AVX-NEXT: retq 340 %t0 = fadd <8 x float> %x0, %x1 341 %t1 = fmul <8 x float> %x2, %t0 342 %t2 = fmul <8 x float> %x3, %t1 343 ret <8 x float> %t2 344} 345 346; Verify that AVX 256-bit vector double-precision multiplies are reassociated. 347 348define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 349; AVX-LABEL: reassociate_muls_v4f64: 350; AVX: # BB#0: 351; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 352; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1 353; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 354; AVX-NEXT: retq 355 %t0 = fadd <4 x double> %x0, %x1 356 %t1 = fmul <4 x double> %x2, %t0 357 %t2 = fmul <4 x double> %x3, %t1 358 ret <4 x double> %t2 359} 360 361; Verify that SSE and AVX scalar single-precision minimum ops are reassociated. 362 363define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) { 364; SSE-LABEL: reassociate_mins_single: 365; SSE: # BB#0: 366; SSE-NEXT: divss %xmm1, %xmm0 367; SSE-NEXT: minss %xmm3, %xmm2 368; SSE-NEXT: minss %xmm2, %xmm0 369; SSE-NEXT: retq 370; 371; AVX-LABEL: reassociate_mins_single: 372; AVX: # BB#0: 373; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 374; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1 375; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 376; AVX-NEXT: retq 377 %t0 = fdiv float %x0, %x1 378 %cmp1 = fcmp olt float %x2, %t0 379 %sel1 = select i1 %cmp1, float %x2, float %t0 380 %cmp2 = fcmp olt float %x3, %sel1 381 %sel2 = select i1 %cmp2, float %x3, float %sel1 382 ret float %sel2 383} 384 385; Verify that SSE and AVX scalar single-precision maximum ops are reassociated. 386 387define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) { 388; SSE-LABEL: reassociate_maxs_single: 389; SSE: # BB#0: 390; SSE-NEXT: divss %xmm1, %xmm0 391; SSE-NEXT: maxss %xmm3, %xmm2 392; SSE-NEXT: maxss %xmm2, %xmm0 393; SSE-NEXT: retq 394; 395; AVX-LABEL: reassociate_maxs_single: 396; AVX: # BB#0: 397; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 398; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1 399; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 400; AVX-NEXT: retq 401 %t0 = fdiv float %x0, %x1 402 %cmp1 = fcmp ogt float %x2, %t0 403 %sel1 = select i1 %cmp1, float %x2, float %t0 404 %cmp2 = fcmp ogt float %x3, %sel1 405 %sel2 = select i1 %cmp2, float %x3, float %sel1 406 ret float %sel2 407} 408 409; Verify that SSE and AVX scalar double-precision minimum ops are reassociated. 410 411define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) { 412; SSE-LABEL: reassociate_mins_double: 413; SSE: # BB#0: 414; SSE-NEXT: divsd %xmm1, %xmm0 415; SSE-NEXT: minsd %xmm3, %xmm2 416; SSE-NEXT: minsd %xmm2, %xmm0 417; SSE-NEXT: retq 418; 419; AVX-LABEL: reassociate_mins_double: 420; AVX: # BB#0: 421; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 422; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1 423; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 424; AVX-NEXT: retq 425 %t0 = fdiv double %x0, %x1 426 %cmp1 = fcmp olt double %x2, %t0 427 %sel1 = select i1 %cmp1, double %x2, double %t0 428 %cmp2 = fcmp olt double %x3, %sel1 429 %sel2 = select i1 %cmp2, double %x3, double %sel1 430 ret double %sel2 431} 432 433; Verify that SSE and AVX scalar double-precision maximum ops are reassociated. 434 435define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) { 436; SSE-LABEL: reassociate_maxs_double: 437; SSE: # BB#0: 438; SSE-NEXT: divsd %xmm1, %xmm0 439; SSE-NEXT: maxsd %xmm3, %xmm2 440; SSE-NEXT: maxsd %xmm2, %xmm0 441; SSE-NEXT: retq 442; 443; AVX-LABEL: reassociate_maxs_double: 444; AVX: # BB#0: 445; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 446; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1 447; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 448; AVX-NEXT: retq 449 %t0 = fdiv double %x0, %x1 450 %cmp1 = fcmp ogt double %x2, %t0 451 %sel1 = select i1 %cmp1, double %x2, double %t0 452 %cmp2 = fcmp ogt double %x3, %sel1 453 %sel2 = select i1 %cmp2, double %x3, double %sel1 454 ret double %sel2 455} 456 457; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated. 458 459define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 460; SSE-LABEL: reassociate_mins_v4f32: 461; SSE: # BB#0: 462; SSE-NEXT: addps %xmm1, %xmm0 463; SSE-NEXT: minps %xmm3, %xmm2 464; SSE-NEXT: minps %xmm2, %xmm0 465; SSE-NEXT: retq 466; 467; AVX-LABEL: reassociate_mins_v4f32: 468; AVX: # BB#0: 469; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 470; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1 471; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 472; AVX-NEXT: retq 473 %t0 = fadd <4 x float> %x0, %x1 474 %cmp1 = fcmp olt <4 x float> %x2, %t0 475 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0 476 %cmp2 = fcmp olt <4 x float> %x3, %sel1 477 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1 478 ret <4 x float> %sel2 479} 480 481; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated. 482 483define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 484; SSE-LABEL: reassociate_maxs_v4f32: 485; SSE: # BB#0: 486; SSE-NEXT: addps %xmm1, %xmm0 487; SSE-NEXT: maxps %xmm3, %xmm2 488; SSE-NEXT: maxps %xmm2, %xmm0 489; SSE-NEXT: retq 490; 491; AVX-LABEL: reassociate_maxs_v4f32: 492; AVX: # BB#0: 493; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 494; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1 495; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 496; AVX-NEXT: retq 497 %t0 = fadd <4 x float> %x0, %x1 498 %cmp1 = fcmp ogt <4 x float> %x2, %t0 499 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0 500 %cmp2 = fcmp ogt <4 x float> %x3, %sel1 501 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1 502 ret <4 x float> %sel2 503} 504 505; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated. 506 507define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 508; SSE-LABEL: reassociate_mins_v2f64: 509; SSE: # BB#0: 510; SSE-NEXT: addpd %xmm1, %xmm0 511; SSE-NEXT: minpd %xmm3, %xmm2 512; SSE-NEXT: minpd %xmm2, %xmm0 513; SSE-NEXT: retq 514; 515; AVX-LABEL: reassociate_mins_v2f64: 516; AVX: # BB#0: 517; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 518; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1 519; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 520; AVX-NEXT: retq 521 %t0 = fadd <2 x double> %x0, %x1 522 %cmp1 = fcmp olt <2 x double> %x2, %t0 523 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0 524 %cmp2 = fcmp olt <2 x double> %x3, %sel1 525 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1 526 ret <2 x double> %sel2 527} 528 529; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated. 530 531define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 532; SSE-LABEL: reassociate_maxs_v2f64: 533; SSE: # BB#0: 534; SSE-NEXT: addpd %xmm1, %xmm0 535; SSE-NEXT: maxpd %xmm3, %xmm2 536; SSE-NEXT: maxpd %xmm2, %xmm0 537; SSE-NEXT: retq 538; 539; AVX-LABEL: reassociate_maxs_v2f64: 540; AVX: # BB#0: 541; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 542; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1 543; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 544; AVX-NEXT: retq 545 %t0 = fadd <2 x double> %x0, %x1 546 %cmp1 = fcmp ogt <2 x double> %x2, %t0 547 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0 548 %cmp2 = fcmp ogt <2 x double> %x3, %sel1 549 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1 550 ret <2 x double> %sel2 551} 552 553; Verify that AVX 256-bit vector single-precision minimum ops are reassociated. 554 555define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 556; AVX-LABEL: reassociate_mins_v8f32: 557; AVX: # BB#0: 558; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 559; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1 560; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 561; AVX-NEXT: retq 562 %t0 = fadd <8 x float> %x0, %x1 563 %cmp1 = fcmp olt <8 x float> %x2, %t0 564 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0 565 %cmp2 = fcmp olt <8 x float> %x3, %sel1 566 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1 567 ret <8 x float> %sel2 568} 569 570; Verify that AVX 256-bit vector single-precision maximum ops are reassociated. 571 572define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 573; AVX-LABEL: reassociate_maxs_v8f32: 574; AVX: # BB#0: 575; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 576; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1 577; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 578; AVX-NEXT: retq 579 %t0 = fadd <8 x float> %x0, %x1 580 %cmp1 = fcmp ogt <8 x float> %x2, %t0 581 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0 582 %cmp2 = fcmp ogt <8 x float> %x3, %sel1 583 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1 584 ret <8 x float> %sel2 585} 586 587; Verify that AVX 256-bit vector double-precision minimum ops are reassociated. 588 589define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 590; AVX-LABEL: reassociate_mins_v4f64: 591; AVX: # BB#0: 592; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 593; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1 594; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 595; AVX-NEXT: retq 596 %t0 = fadd <4 x double> %x0, %x1 597 %cmp1 = fcmp olt <4 x double> %x2, %t0 598 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0 599 %cmp2 = fcmp olt <4 x double> %x3, %sel1 600 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1 601 ret <4 x double> %sel2 602} 603 604; Verify that AVX 256-bit vector double-precision maximum ops are reassociated. 605 606define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 607; AVX-LABEL: reassociate_maxs_v4f64: 608; AVX: # BB#0: 609; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 610; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1 611; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 612; AVX-NEXT: retq 613 %t0 = fadd <4 x double> %x0, %x1 614 %cmp1 = fcmp ogt <4 x double> %x2, %t0 615 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0 616 %cmp2 = fcmp ogt <4 x double> %x3, %sel1 617 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1 618 ret <4 x double> %sel2 619} 620 621; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016 622; Verify that reassociation is not happening needlessly or wrongly. 623 624declare double @bar() 625 626define double @reassociate_adds_from_calls() { 627; AVX-LABEL: reassociate_adds_from_calls: 628; AVX: callq bar 629; AVX-NEXT: vmovsd %xmm0, 16(%rsp) 630; AVX-NEXT: callq bar 631; AVX-NEXT: vmovsd %xmm0, 8(%rsp) 632; AVX-NEXT: callq bar 633; AVX-NEXT: vmovsd %xmm0, (%rsp) 634; AVX-NEXT: callq bar 635; AVX-NEXT: vmovsd 8(%rsp), %xmm1 636; AVX: vaddsd 16(%rsp), %xmm1, %xmm1 637; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 638; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 639 640 %x0 = call double @bar() 641 %x1 = call double @bar() 642 %x2 = call double @bar() 643 %x3 = call double @bar() 644 %t0 = fadd double %x0, %x1 645 %t1 = fadd double %t0, %x2 646 %t2 = fadd double %t1, %x3 647 ret double %t2 648} 649 650define double @already_reassociated() { 651; AVX-LABEL: already_reassociated: 652; AVX: callq bar 653; AVX-NEXT: vmovsd %xmm0, 16(%rsp) 654; AVX-NEXT: callq bar 655; AVX-NEXT: vmovsd %xmm0, 8(%rsp) 656; AVX-NEXT: callq bar 657; AVX-NEXT: vmovsd %xmm0, (%rsp) 658; AVX-NEXT: callq bar 659; AVX-NEXT: vmovsd 8(%rsp), %xmm1 660; AVX: vaddsd 16(%rsp), %xmm1, %xmm1 661; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 662; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 663 664 %x0 = call double @bar() 665 %x1 = call double @bar() 666 %x2 = call double @bar() 667 %x3 = call double @bar() 668 %t0 = fadd double %x0, %x1 669 %t1 = fadd double %x2, %x3 670 %t2 = fadd double %t0, %t1 671 ret double %t2 672} 673 674