1; RUN: opt < %s -instcombine -S | FileCheck %s 2 3; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }" 4; 1.2f and 2.3f is supposed to be fold. 5define float @fold(float %a) { 6 %mul = fmul fast float %a, 0x3FF3333340000000 7 %mul1 = fmul fast float %mul, 0x4002666660000000 8 ret float %mul1 9; CHECK-LABEL: @fold( 10; CHECK: fmul fast float %a, 0x4006147AE0000000 11} 12 13; Same testing-case as the one used in fold() except that the operators have 14; fixed FP mode. 15define float @notfold(float %a) { 16; CHECK-LABEL: @notfold( 17; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000 18 %mul = fmul fast float %a, 0x3FF3333340000000 19 %mul1 = fmul float %mul, 0x4002666660000000 20 ret float %mul1 21} 22 23define float @fold2(float %a) { 24; CHECK-LABEL: @fold2( 25; CHECK: fmul fast float %a, 0x4006147AE0000000 26 %mul = fmul float %a, 0x3FF3333340000000 27 %mul1 = fmul fast float %mul, 0x4002666660000000 28 ret float %mul1 29} 30 31; C * f1 + f1 = (C+1) * f1 32define double @fold3(double %f1) { 33 %t1 = fmul fast double 2.000000e+00, %f1 34 %t2 = fadd fast double %f1, %t1 35 ret double %t2 36; CHECK-LABEL: @fold3( 37; CHECK: fmul fast double %f1, 3.000000e+00 38} 39 40; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y) 41define float @fold4(float %f1, float %f2) { 42 %sub = fsub float 4.000000e+00, %f1 43 %sub1 = fsub float 5.000000e+00, %f2 44 %add = fadd fast float %sub, %sub1 45 ret float %add 46; CHECK-LABEL: @fold4( 47; CHECK: %1 = fadd fast float %f1, %f2 48; CHECK: fsub fast float 9.000000e+00, %1 49} 50 51; (X + C1) + C2 => X + (C1 + C2) 52define float @fold5(float %f1, float %f2) { 53 %add = fadd float %f1, 4.000000e+00 54 %add1 = fadd fast float %add, 5.000000e+00 55 ret float %add1 56; CHECK-LABEL: @fold5( 57; CHECK: fadd fast float %f1, 9.000000e+00 58} 59 60; (X + X) + X => 3.0 * X 61define float @fold6(float %f1) { 62 %t1 = fadd fast float %f1, %f1 63 %t2 = fadd fast float %f1, %t1 64 ret float %t2 65; CHECK-LABEL: @fold6( 66; CHECK: fmul fast float %f1, 3.000000e+00 67} 68 69; C1 * X + (X + X) = (C1 + 2) * X 70define float @fold7(float %f1) { 71 %t1 = fmul fast float %f1, 5.000000e+00 72 %t2 = fadd fast float %f1, %f1 73 %t3 = fadd fast float %t1, %t2 74 ret float %t3 75; CHECK-LABEL: @fold7( 76; CHECK: fmul fast float %f1, 7.000000e+00 77} 78 79; (X + X) + (X + X) => 4.0 * X 80define float @fold8(float %f1) { 81 %t1 = fadd fast float %f1, %f1 82 %t2 = fadd fast float %f1, %f1 83 %t3 = fadd fast float %t1, %t2 84 ret float %t3 85; CHECK: fold8 86; CHECK: fmul fast float %f1, 4.000000e+00 87} 88 89; X - (X + Y) => 0 - Y 90define float @fold9(float %f1, float %f2) { 91 %t1 = fadd float %f1, %f2 92 %t3 = fsub fast float %f1, %t1 93 ret float %t3 94 95; CHECK-LABEL: @fold9( 96; CHECK: fsub fast float -0.000000e+00, %f2 97} 98 99; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of 100; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the 101; top of resulting simplified expression tree may potentially reveal some 102; optimization opportunities in the super-expression trees. 103; 104define float @fold10(float %f1, float %f2) { 105 %t1 = fadd fast float 2.000000e+00, %f1 106 %t2 = fsub fast float %f2, 3.000000e+00 107 %t3 = fadd fast float %t1, %t2 108 ret float %t3 109; CHECK-LABEL: @fold10( 110; CHECK: %t3 = fadd fast float %t2, -1.000000e+00 111; CHECK: ret float %t3 112} 113 114; once cause Crash/miscompilation 115define float @fail1(float %f1, float %f2) { 116 %conv3 = fadd fast float %f1, -1.000000e+00 117 %add = fadd fast float %conv3, %conv3 118 %add2 = fadd fast float %add, %conv3 119 ret float %add2 120; CHECK-LABEL: @fail1( 121; CHECK: ret 122} 123 124define double @fail2(double %f1, double %f2) { 125 %t1 = fsub fast double %f1, %f2 126 %t2 = fadd fast double %f1, %f2 127 %t3 = fsub fast double %t1, %t2 128 ret double %t3 129; CHECK-LABEL: @fail2( 130; CHECK: ret 131} 132 133; c1 * x - x => (c1 - 1.0) * x 134define float @fold13(float %x) { 135 %mul = fmul fast float %x, 7.000000e+00 136 %sub = fsub fast float %mul, %x 137 ret float %sub 138; CHECK: fold13 139; CHECK: fmul fast float %x, 6.000000e+00 140; CHECK: ret 141} 142 143; -x + y => y - x 144define float @fold14(float %x, float %y) { 145 %neg = fsub fast float -0.0, %x 146 %add = fadd fast float %neg, %y 147 ret float %add 148; CHECK: fold14 149; CHECK: fsub fast float %y, %x 150; CHECK: ret 151} 152 153; x + -y => x - y 154define float @fold15(float %x, float %y) { 155 %neg = fsub fast float -0.0, %y 156 %add = fadd fast float %x, %neg 157 ret float %add 158; CHECK: fold15 159; CHECK: fsub fast float %x, %y 160; CHECK: ret 161} 162 163; (select X+Y, X-Y) => X + (select Y, -Y) 164define float @fold16(float %x, float %y) { 165 %cmp = fcmp ogt float %x, %y 166 %plus = fadd fast float %x, %y 167 %minus = fsub fast float %x, %y 168 %r = select i1 %cmp, float %plus, float %minus 169 ret float %r 170; CHECK: fold16 171; CHECK: fsub fast float 172; CHECK: select 173; CHECK: fadd fast float 174; CHECK: ret 175} 176 177 178 179; ========================================================================= 180; 181; Testing-cases about fmul begin 182; 183; ========================================================================= 184 185; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution) 186define float @fmul_distribute1(float %f1) { 187 %t1 = fmul float %f1, 6.0e+3 188 %t2 = fadd float %t1, 2.0e+3 189 %t3 = fmul fast float %t2, 5.0e+3 190 ret float %t3 191; CHECK-LABEL: @fmul_distribute1( 192; CHECK: %1 = fmul fast float %f1, 3.000000e+07 193; CHECK: %t3 = fadd fast float %1, 1.000000e+07 194} 195 196; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3 197define double @fmul_distribute2(double %f1, double %f2) { 198 %t1 = fdiv double %f1, 3.0e+0 199 %t2 = fadd double %t1, 5.0e+1 200 ; 0x10000000000000 = DBL_MIN 201 %t3 = fmul fast double %t2, 0x10000000000000 202 ret double %t3 203 204; CHECK-LABEL: @fmul_distribute2( 205; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000 206; CHECK: fadd fast double %1, 0x69000000000000 207} 208 209; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot 210; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN) 211define double @fmul_distribute3(double %f1) { 212 %t1 = fdiv double %f1, 3.0e+0 213 %t2 = fadd double %t1, 5.0e-1 214 %t3 = fmul fast double %t2, 0x10000000000000 215 ret double %t3 216 217; CHECK-LABEL: @fmul_distribute3( 218; CHECK: fmul fast double %t2, 0x10000000000000 219} 220 221; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution) 222define float @fmul_distribute4(float %f1) { 223 %t1 = fmul float %f1, 6.0e+3 224 %t2 = fsub float 2.0e+3, %t1 225 %t3 = fmul fast float %t2, 5.0e+3 226 ret float %t3 227; CHECK-LABEL: @fmul_distribute4( 228; CHECK: %1 = fmul fast float %f1, 3.000000e+07 229; CHECK: %t3 = fsub fast float 1.000000e+07, %1 230} 231 232; C1/X * C2 => (C1*C2) / X 233define float @fmul2(float %f1) { 234 %t1 = fdiv float 2.0e+3, %f1 235 %t3 = fmul fast float %t1, 6.0e+3 236 ret float %t3 237; CHECK-LABEL: @fmul2( 238; CHECK: fdiv fast float 1.200000e+07, %f1 239} 240 241; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses 242@fmul2_external = external global float 243define float @fmul2_disable(float %f1) { 244 %div = fdiv fast float 1.000000e+00, %f1 245 store float %div, float* @fmul2_external 246 %mul = fmul fast float %div, 2.000000e+00 247 ret float %mul 248; CHECK-LABEL: @fmul2_disable 249; CHECK: store 250; CHECK: fmul fast 251} 252 253; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp) 254define float @fmul3(float %f1, float %f2) { 255 %t1 = fdiv float %f1, 2.0e+3 256 %t3 = fmul fast float %t1, 6.0e+3 257 ret float %t3 258; CHECK-LABEL: @fmul3( 259; CHECK: fmul fast float %f1, 3.000000e+00 260} 261 262define <4 x float> @fmul3_vec(<4 x float> %f1, <4 x float> %f2) { 263 %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> 264 %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3> 265 ret <4 x float> %t3 266; CHECK-LABEL: @fmul3_vec( 267; CHECK: fmul fast <4 x float> %f1, <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00> 268} 269 270; Make sure fmul with constant expression doesn't assert. 271define <4 x float> @fmul3_vec_constexpr(<4 x float> %f1, <4 x float> %f2) { 272 %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float> 273 %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> 274 %t3 = fmul fast <4 x float> %t1, %constExprMul 275 ret <4 x float> %t3 276} 277 278; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special 279; value of a denormal. The 0x3810000000000000 here take value FLT_MIN 280; 281define float @fmul4(float %f1, float %f2) { 282 %t1 = fdiv float %f1, 2.0e+3 283 %t3 = fmul fast float %t1, 0x3810000000000000 284 ret float %t3 285; CHECK-LABEL: @fmul4( 286; CHECK: fmul fast float %t1, 0x3810000000000000 287} 288 289; X / C1 * C2 => X / (C2/C1) if C1/C2 is either a special value of a denormal, 290; and C2/C1 is a normal value. 291; 292define float @fmul5(float %f1, float %f2) { 293 %t1 = fdiv float %f1, 3.0e+0 294 %t3 = fmul fast float %t1, 0x3810000000000000 295 ret float %t3 296; CHECK-LABEL: @fmul5( 297; CHECK: fdiv fast float %f1, 0x47E8000000000000 298} 299 300; (X*Y) * X => (X*X) * Y 301define float @fmul6(float %f1, float %f2) { 302 %mul = fmul float %f1, %f2 303 %mul1 = fmul fast float %mul, %f1 304 ret float %mul1 305; CHECK-LABEL: @fmul6( 306; CHECK: fmul fast float %f1, %f1 307} 308 309; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses 310define float @fmul7(float %f1, float %f2) { 311 %mul = fmul float %f1, %f2 312 %mul1 = fmul fast float %mul, %f1 313 %add = fadd float %mul1, %mul 314 ret float %add 315; CHECK-LABEL: @fmul7( 316; CHECK: fmul fast float %mul, %f1 317} 318 319; ========================================================================= 320; 321; Testing-cases about negation 322; 323; ========================================================================= 324define float @fneg1(float %f1, float %f2) { 325 %sub = fsub float -0.000000e+00, %f1 326 %sub1 = fsub nsz float 0.000000e+00, %f2 327 %mul = fmul float %sub, %sub1 328 ret float %mul 329; CHECK-LABEL: @fneg1( 330; CHECK: fmul float %f1, %f2 331} 332 333define float @fneg2(float %x) { 334 %sub = fsub nsz float 0.0, %x 335 ret float %sub 336; CHECK-LABEL: @fneg2( 337; CHECK-NEXT: fsub nsz float -0.000000e+00, %x 338; CHECK-NEXT: ret float 339} 340 341; ========================================================================= 342; 343; Testing-cases about div 344; 345; ========================================================================= 346 347; X/C1 / C2 => X * (1/(C2*C1)) 348define float @fdiv1(float %x) { 349 %div = fdiv float %x, 0x3FF3333340000000 350 %div1 = fdiv fast float %div, 0x4002666660000000 351 ret float %div1 352; 0x3FF3333340000000 = 1.2f 353; 0x4002666660000000 = 2.3f 354; 0x3FD7303B60000000 = 0.36231884057971014492 355; CHECK-LABEL: @fdiv1( 356; CHECK: fmul fast float %x, 0x3FD7303B60000000 357} 358 359; X*C1 / C2 => X * (C1/C2) 360define float @fdiv2(float %x) { 361 %mul = fmul float %x, 0x3FF3333340000000 362 %div1 = fdiv fast float %mul, 0x4002666660000000 363 ret float %div1 364 365; 0x3FF3333340000000 = 1.2f 366; 0x4002666660000000 = 2.3f 367; 0x3FE0B21660000000 = 0.52173918485641479492 368; CHECK-LABEL: @fdiv2( 369; CHECK: fmul fast float %x, 0x3FE0B21660000000 370} 371 372define <2 x float> @fdiv2_vec(<2 x float> %x) { 373 %mul = fmul <2 x float> %x, <float 6.0, float 9.0> 374 %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0> 375 ret <2 x float> %div1 376 377; CHECK-LABEL: @fdiv2_vec( 378; CHECK: fmul fast <2 x float> %x, <float 3.000000e+00, float 3.000000e+00> 379} 380 381; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal 382; 383define float @fdiv3(float %x) { 384 %div = fdiv float %x, 0x47EFFFFFE0000000 385 %div1 = fdiv fast float %div, 0x4002666660000000 386 ret float %div1 387; CHECK-LABEL: @fdiv3( 388; CHECK: fdiv float %x, 0x47EFFFFFE0000000 389} 390 391; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal 392define float @fdiv4(float %x) { 393 %mul = fmul float %x, 0x47EFFFFFE0000000 394 %div = fdiv float %mul, 0x3FC99999A0000000 395 ret float %div 396; CHECK-LABEL: @fdiv4( 397; CHECK: fmul float %x, 0x47EFFFFFE0000000 398} 399 400; (X/Y)/Z = > X/(Y*Z) 401define float @fdiv5(float %f1, float %f2, float %f3) { 402 %t1 = fdiv float %f1, %f2 403 %t2 = fdiv fast float %t1, %f3 404 ret float %t2 405; CHECK-LABEL: @fdiv5( 406; CHECK: fmul float %f2, %f3 407} 408 409; Z/(X/Y) = > (Z*Y)/X 410define float @fdiv6(float %f1, float %f2, float %f3) { 411 %t1 = fdiv float %f1, %f2 412 %t2 = fdiv fast float %f3, %t1 413 ret float %t2 414; CHECK-LABEL: @fdiv6( 415; CHECK: fmul float %f3, %f2 416} 417 418; C1/(X*C2) => (C1/C2) / X 419define float @fdiv7(float %x) { 420 %t1 = fmul float %x, 3.0e0 421 %t2 = fdiv fast float 15.0e0, %t1 422 ret float %t2 423; CHECK-LABEL: @fdiv7( 424; CHECK: fdiv fast float 5.000000e+00, %x 425} 426 427; C1/(X/C2) => (C1*C2) / X 428define float @fdiv8(float %x) { 429 %t1 = fdiv float %x, 3.0e0 430 %t2 = fdiv fast float 15.0e0, %t1 431 ret float %t2 432; CHECK-LABEL: @fdiv8( 433; CHECK: fdiv fast float 4.500000e+01, %x 434} 435 436; C1/(C2/X) => (C1/C2) * X 437define float @fdiv9(float %x) { 438 %t1 = fdiv float 3.0e0, %x 439 %t2 = fdiv fast float 15.0e0, %t1 440 ret float %t2 441; CHECK-LABEL: @fdiv9( 442; CHECK: fmul fast float %x, 5.000000e+00 443} 444 445; ========================================================================= 446; 447; Testing-cases about factorization 448; 449; ========================================================================= 450; x*z + y*z => (x+y) * z 451define float @fact_mul1(float %x, float %y, float %z) { 452 %t1 = fmul fast float %x, %z 453 %t2 = fmul fast float %y, %z 454 %t3 = fadd fast float %t1, %t2 455 ret float %t3 456; CHECK-LABEL: @fact_mul1( 457; CHECK: fmul fast float %1, %z 458} 459 460; z*x + y*z => (x+y) * z 461define float @fact_mul2(float %x, float %y, float %z) { 462 %t1 = fmul fast float %z, %x 463 %t2 = fmul fast float %y, %z 464 %t3 = fsub fast float %t1, %t2 465 ret float %t3 466; CHECK-LABEL: @fact_mul2( 467; CHECK: fmul fast float %1, %z 468} 469 470; z*x - z*y => (x-y) * z 471define float @fact_mul3(float %x, float %y, float %z) { 472 %t2 = fmul fast float %z, %y 473 %t1 = fmul fast float %z, %x 474 %t3 = fsub fast float %t1, %t2 475 ret float %t3 476; CHECK-LABEL: @fact_mul3( 477; CHECK: fmul fast float %1, %z 478} 479 480; x*z - z*y => (x-y) * z 481define float @fact_mul4(float %x, float %y, float %z) { 482 %t1 = fmul fast float %x, %z 483 %t2 = fmul fast float %z, %y 484 %t3 = fsub fast float %t1, %t2 485 ret float %t3 486; CHECK-LABEL: @fact_mul4( 487; CHECK: fmul fast float %1, %z 488} 489 490; x/y + x/z, no xform 491define float @fact_div1(float %x, float %y, float %z) { 492 %t1 = fdiv fast float %x, %y 493 %t2 = fdiv fast float %x, %z 494 %t3 = fadd fast float %t1, %t2 495 ret float %t3 496; CHECK: fact_div1 497; CHECK: fadd fast float %t1, %t2 498} 499 500; x/y + z/x; no xform 501define float @fact_div2(float %x, float %y, float %z) { 502 %t1 = fdiv fast float %x, %y 503 %t2 = fdiv fast float %z, %x 504 %t3 = fadd fast float %t1, %t2 505 ret float %t3 506; CHECK: fact_div2 507; CHECK: fadd fast float %t1, %t2 508} 509 510; y/x + z/x => (y+z)/x 511define float @fact_div3(float %x, float %y, float %z) { 512 %t1 = fdiv fast float %y, %x 513 %t2 = fdiv fast float %z, %x 514 %t3 = fadd fast float %t1, %t2 515 ret float %t3 516; CHECK: fact_div3 517; CHECK: fdiv fast float %1, %x 518} 519 520; y/x - z/x => (y-z)/x 521define float @fact_div4(float %x, float %y, float %z) { 522 %t1 = fdiv fast float %y, %x 523 %t2 = fdiv fast float %z, %x 524 %t3 = fsub fast float %t1, %t2 525 ret float %t3 526; CHECK: fact_div4 527; CHECK: fdiv fast float %1, %x 528} 529 530; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 531define float @fact_div5(float %x) { 532 %t1 = fdiv fast float 0x3810000000000000, %x 533 %t2 = fdiv fast float 0x3800000000000000, %x 534 %t3 = fadd fast float %t1, %t2 535 ret float %t3 536; CHECK: fact_div5 537; CHECK: fdiv fast float 0x3818000000000000, %x 538} 539 540; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 541define float @fact_div6(float %x) { 542 %t1 = fdiv fast float 0x3810000000000000, %x 543 %t2 = fdiv fast float 0x3800000000000000, %x 544 %t3 = fsub fast float %t1, %t2 545 ret float %t3 546; CHECK: fact_div6 547; CHECK: %t3 = fsub fast float %t1, %t2 548} 549 550; ========================================================================= 551; 552; Test-cases for square root 553; 554; ========================================================================= 555 556; A squared factor fed into a square root intrinsic should be hoisted out 557; as a fabs() value. 558 559declare double @llvm.sqrt.f64(double) 560 561define double @sqrt_intrinsic_arg_squared(double %x) { 562 %mul = fmul fast double %x, %x 563 %sqrt = call fast double @llvm.sqrt.f64(double %mul) 564 ret double %sqrt 565 566; CHECK-LABEL: sqrt_intrinsic_arg_squared( 567; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 568; CHECK-NEXT: ret double %fabs 569} 570 571; Check all 6 combinations of a 3-way multiplication tree where 572; one factor is repeated. 573 574define double @sqrt_intrinsic_three_args1(double %x, double %y) { 575 %mul = fmul fast double %y, %x 576 %mul2 = fmul fast double %mul, %x 577 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 578 ret double %sqrt 579 580; CHECK-LABEL: sqrt_intrinsic_three_args1( 581; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 582; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 583; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 584; CHECK-NEXT: ret double %1 585} 586 587define double @sqrt_intrinsic_three_args2(double %x, double %y) { 588 %mul = fmul fast double %x, %y 589 %mul2 = fmul fast double %mul, %x 590 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 591 ret double %sqrt 592 593; CHECK-LABEL: sqrt_intrinsic_three_args2( 594; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 595; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 596; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 597; CHECK-NEXT: ret double %1 598} 599 600define double @sqrt_intrinsic_three_args3(double %x, double %y) { 601 %mul = fmul fast double %x, %x 602 %mul2 = fmul fast double %mul, %y 603 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 604 ret double %sqrt 605 606; CHECK-LABEL: sqrt_intrinsic_three_args3( 607; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 608; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 609; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 610; CHECK-NEXT: ret double %1 611} 612 613define double @sqrt_intrinsic_three_args4(double %x, double %y) { 614 %mul = fmul fast double %y, %x 615 %mul2 = fmul fast double %x, %mul 616 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 617 ret double %sqrt 618 619; CHECK-LABEL: sqrt_intrinsic_three_args4( 620; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 621; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 622; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 623; CHECK-NEXT: ret double %1 624} 625 626define double @sqrt_intrinsic_three_args5(double %x, double %y) { 627 %mul = fmul fast double %x, %y 628 %mul2 = fmul fast double %x, %mul 629 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 630 ret double %sqrt 631 632; CHECK-LABEL: sqrt_intrinsic_three_args5( 633; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 634; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 635; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 636; CHECK-NEXT: ret double %1 637} 638 639define double @sqrt_intrinsic_three_args6(double %x, double %y) { 640 %mul = fmul fast double %x, %x 641 %mul2 = fmul fast double %y, %mul 642 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 643 ret double %sqrt 644 645; CHECK-LABEL: sqrt_intrinsic_three_args6( 646; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 647; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 648; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 649; CHECK-NEXT: ret double %1 650} 651 652; If any operation is not 'fast', we can't simplify. 653 654define double @sqrt_intrinsic_not_so_fast(double %x, double %y) { 655 %mul = fmul double %x, %x 656 %mul2 = fmul fast double %mul, %y 657 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 658 ret double %sqrt 659 660; CHECK-LABEL: sqrt_intrinsic_not_so_fast( 661; CHECK-NEXT: %mul = fmul double %x, %x 662; CHECK-NEXT: %mul2 = fmul fast double %mul, %y 663; CHECK-NEXT: %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 664; CHECK-NEXT: ret double %sqrt 665} 666 667define double @sqrt_intrinsic_arg_4th(double %x) { 668 %mul = fmul fast double %x, %x 669 %mul2 = fmul fast double %mul, %mul 670 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 671 ret double %sqrt 672 673; CHECK-LABEL: sqrt_intrinsic_arg_4th( 674; CHECK-NEXT: %mul = fmul fast double %x, %x 675; CHECK-NEXT: ret double %mul 676} 677 678define double @sqrt_intrinsic_arg_5th(double %x) { 679 %mul = fmul fast double %x, %x 680 %mul2 = fmul fast double %mul, %x 681 %mul3 = fmul fast double %mul2, %mul 682 %sqrt = call fast double @llvm.sqrt.f64(double %mul3) 683 ret double %sqrt 684 685; CHECK-LABEL: sqrt_intrinsic_arg_5th( 686; CHECK-NEXT: %mul = fmul fast double %x, %x 687; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x) 688; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 689; CHECK-NEXT: ret double %1 690} 691 692; Check that square root calls have the same behavior. 693 694declare float @sqrtf(float) 695declare double @sqrt(double) 696declare fp128 @sqrtl(fp128) 697 698define float @sqrt_call_squared_f32(float %x) { 699 %mul = fmul fast float %x, %x 700 %sqrt = call fast float @sqrtf(float %mul) 701 ret float %sqrt 702 703; CHECK-LABEL: sqrt_call_squared_f32( 704; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x) 705; CHECK-NEXT: ret float %fabs 706} 707 708define double @sqrt_call_squared_f64(double %x) { 709 %mul = fmul fast double %x, %x 710 %sqrt = call fast double @sqrt(double %mul) 711 ret double %sqrt 712 713; CHECK-LABEL: sqrt_call_squared_f64( 714; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 715; CHECK-NEXT: ret double %fabs 716} 717 718define fp128 @sqrt_call_squared_f128(fp128 %x) { 719 %mul = fmul fast fp128 %x, %x 720 %sqrt = call fast fp128 @sqrtl(fp128 %mul) 721 ret fp128 %sqrt 722 723; CHECK-LABEL: sqrt_call_squared_f128( 724; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x) 725; CHECK-NEXT: ret fp128 %fabs 726} 727 728; ========================================================================= 729; 730; Test-cases for fmin / fmax 731; 732; ========================================================================= 733 734declare double @fmax(double, double) 735declare double @fmin(double, double) 736declare float @fmaxf(float, float) 737declare float @fminf(float, float) 738declare fp128 @fmaxl(fp128, fp128) 739declare fp128 @fminl(fp128, fp128) 740 741; No NaNs is the minimum requirement to replace these calls. 742; This should always be set when unsafe-fp-math is true, but 743; alternate the attributes for additional test coverage. 744; 'nsz' is implied by the definition of fmax or fmin itself. 745 746; Shrink and remove the call. 747define float @max1(float %a, float %b) { 748 %c = fpext float %a to double 749 %d = fpext float %b to double 750 %e = call fast double @fmax(double %c, double %d) 751 %f = fptrunc double %e to float 752 ret float %f 753 754; CHECK-LABEL: max1( 755; CHECK-NEXT: fcmp fast ogt float %a, %b 756; CHECK-NEXT: select {{.*}} float %a, float %b 757; CHECK-NEXT: ret 758} 759 760define float @max2(float %a, float %b) { 761 %c = call nnan float @fmaxf(float %a, float %b) 762 ret float %c 763 764; CHECK-LABEL: max2( 765; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b 766; CHECK-NEXT: select {{.*}} float %a, float %b 767; CHECK-NEXT: ret 768} 769 770 771define double @max3(double %a, double %b) { 772 %c = call fast double @fmax(double %a, double %b) 773 ret double %c 774 775; CHECK-LABEL: max3( 776; CHECK-NEXT: fcmp fast ogt double %a, %b 777; CHECK-NEXT: select {{.*}} double %a, double %b 778; CHECK-NEXT: ret 779} 780 781define fp128 @max4(fp128 %a, fp128 %b) { 782 %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b) 783 ret fp128 %c 784 785; CHECK-LABEL: max4( 786; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b 787; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b 788; CHECK-NEXT: ret 789} 790 791; Shrink and remove the call. 792define float @min1(float %a, float %b) { 793 %c = fpext float %a to double 794 %d = fpext float %b to double 795 %e = call nnan double @fmin(double %c, double %d) 796 %f = fptrunc double %e to float 797 ret float %f 798 799; CHECK-LABEL: min1( 800; CHECK-NEXT: fcmp nnan nsz olt float %a, %b 801; CHECK-NEXT: select {{.*}} float %a, float %b 802; CHECK-NEXT: ret 803} 804 805define float @min2(float %a, float %b) { 806 %c = call fast float @fminf(float %a, float %b) 807 ret float %c 808 809; CHECK-LABEL: min2( 810; CHECK-NEXT: fcmp fast olt float %a, %b 811; CHECK-NEXT: select {{.*}} float %a, float %b 812; CHECK-NEXT: ret 813} 814 815define double @min3(double %a, double %b) { 816 %c = call nnan double @fmin(double %a, double %b) 817 ret double %c 818 819; CHECK-LABEL: min3( 820; CHECK-NEXT: fcmp nnan nsz olt double %a, %b 821; CHECK-NEXT: select {{.*}} double %a, double %b 822; CHECK-NEXT: ret 823} 824 825define fp128 @min4(fp128 %a, fp128 %b) { 826 %c = call fast fp128 @fminl(fp128 %a, fp128 %b) 827 ret fp128 %c 828 829; CHECK-LABEL: min4( 830; CHECK-NEXT: fcmp fast olt fp128 %a, %b 831; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b 832; CHECK-NEXT: ret 833} 834