1; RUN: opt < %s -instcombine -S | FileCheck %s 2 3; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }" 4; 1.2f and 2.3f is supposed to be fold. 5define float @fold(float %a) { 6 %mul = fmul fast float %a, 0x3FF3333340000000 7 %mul1 = fmul fast float %mul, 0x4002666660000000 8 ret float %mul1 9; CHECK-LABEL: @fold( 10; CHECK: fmul fast float %a, 0x4006147AE0000000 11} 12 13; Same testing-case as the one used in fold() except that the operators have 14; fixed FP mode. 15define float @notfold(float %a) { 16; CHECK-LABEL: @notfold( 17; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000 18 %mul = fmul fast float %a, 0x3FF3333340000000 19 %mul1 = fmul float %mul, 0x4002666660000000 20 ret float %mul1 21} 22 23define float @fold2(float %a) { 24; CHECK-LABEL: @fold2( 25; CHECK: fmul fast float %a, 0x4006147AE0000000 26 %mul = fmul float %a, 0x3FF3333340000000 27 %mul1 = fmul fast float %mul, 0x4002666660000000 28 ret float %mul1 29} 30 31; C * f1 + f1 = (C+1) * f1 32define double @fold3(double %f1) { 33 %t1 = fmul fast double 2.000000e+00, %f1 34 %t2 = fadd fast double %f1, %t1 35 ret double %t2 36; CHECK-LABEL: @fold3( 37; CHECK: fmul fast double %f1, 3.000000e+00 38} 39 40; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y) 41define float @fold4(float %f1, float %f2) { 42 %sub = fsub float 4.000000e+00, %f1 43 %sub1 = fsub float 5.000000e+00, %f2 44 %add = fadd fast float %sub, %sub1 45 ret float %add 46; CHECK-LABEL: @fold4( 47; CHECK: %1 = fadd fast float %f1, %f2 48; CHECK: fsub fast float 9.000000e+00, %1 49} 50 51; (X + C1) + C2 => X + (C1 + C2) 52define float @fold5(float %f1, float %f2) { 53 %add = fadd float %f1, 4.000000e+00 54 %add1 = fadd fast float %add, 5.000000e+00 55 ret float %add1 56; CHECK-LABEL: @fold5( 57; CHECK: fadd fast float %f1, 9.000000e+00 58} 59 60; (X + X) + X => 3.0 * X 61define float @fold6(float %f1) { 62 %t1 = fadd fast float %f1, %f1 63 %t2 = fadd fast float %f1, %t1 64 ret float %t2 65; CHECK-LABEL: @fold6( 66; CHECK: fmul fast float %f1, 3.000000e+00 67} 68 69; C1 * X + (X + X) = (C1 + 2) * X 70define float @fold7(float %f1) { 71 %t1 = fmul fast float %f1, 5.000000e+00 72 %t2 = fadd fast float %f1, %f1 73 %t3 = fadd fast float %t1, %t2 74 ret float %t3 75; CHECK-LABEL: @fold7( 76; CHECK: fmul fast float %f1, 7.000000e+00 77} 78 79; (X + X) + (X + X) => 4.0 * X 80define float @fold8(float %f1) { 81 %t1 = fadd fast float %f1, %f1 82 %t2 = fadd fast float %f1, %f1 83 %t3 = fadd fast float %t1, %t2 84 ret float %t3 85; CHECK: fold8 86; CHECK: fmul fast float %f1, 4.000000e+00 87} 88 89; X - (X + Y) => 0 - Y 90define float @fold9(float %f1, float %f2) { 91 %t1 = fadd float %f1, %f2 92 %t3 = fsub fast float %f1, %t1 93 ret float %t3 94 95; CHECK-LABEL: @fold9( 96; CHECK: fsub fast float -0.000000e+00, %f2 97} 98 99; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of 100; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the 101; top of resulting simplified expression tree may potentially reveal some 102; optimization opportunities in the super-expression trees. 103; 104define float @fold10(float %f1, float %f2) { 105 %t1 = fadd fast float 2.000000e+00, %f1 106 %t2 = fsub fast float %f2, 3.000000e+00 107 %t3 = fadd fast float %t1, %t2 108 ret float %t3 109; CHECK-LABEL: @fold10( 110; CHECK: %t3 = fadd fast float %t2, -1.000000e+00 111; CHECK: ret float %t3 112} 113 114; once cause Crash/miscompilation 115define float @fail1(float %f1, float %f2) { 116 %conv3 = fadd fast float %f1, -1.000000e+00 117 %add = fadd fast float %conv3, %conv3 118 %add2 = fadd fast float %add, %conv3 119 ret float %add2 120; CHECK-LABEL: @fail1( 121; CHECK: ret 122} 123 124define double @fail2(double %f1, double %f2) { 125 %t1 = fsub fast double %f1, %f2 126 %t2 = fadd fast double %f1, %f2 127 %t3 = fsub fast double %t1, %t2 128 ret double %t3 129; CHECK-LABEL: @fail2( 130; CHECK: ret 131} 132 133; c1 * x - x => (c1 - 1.0) * x 134define float @fold13(float %x) { 135 %mul = fmul fast float %x, 7.000000e+00 136 %sub = fsub fast float %mul, %x 137 ret float %sub 138; CHECK: fold13 139; CHECK: fmul fast float %x, 6.000000e+00 140; CHECK: ret 141} 142 143; -x + y => y - x 144define float @fold14(float %x, float %y) { 145 %neg = fsub fast float -0.0, %x 146 %add = fadd fast float %neg, %y 147 ret float %add 148; CHECK: fold14 149; CHECK: fsub fast float %y, %x 150; CHECK: ret 151} 152 153; x + -y => x - y 154define float @fold15(float %x, float %y) { 155 %neg = fsub fast float -0.0, %y 156 %add = fadd fast float %x, %neg 157 ret float %add 158; CHECK: fold15 159; CHECK: fsub fast float %x, %y 160; CHECK: ret 161} 162 163; (select X+Y, X-Y) => X + (select Y, -Y) 164define float @fold16(float %x, float %y) { 165 %cmp = fcmp ogt float %x, %y 166 %plus = fadd fast float %x, %y 167 %minus = fsub fast float %x, %y 168 %r = select i1 %cmp, float %plus, float %minus 169 ret float %r 170; CHECK: fold16 171; CHECK: fsub fast float 172; CHECK: select 173; CHECK: fadd fast float 174; CHECK: ret 175} 176 177 178 179; ========================================================================= 180; 181; Testing-cases about fmul begin 182; 183; ========================================================================= 184 185; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution) 186define float @fmul_distribute1(float %f1) { 187 %t1 = fmul float %f1, 6.0e+3 188 %t2 = fadd float %t1, 2.0e+3 189 %t3 = fmul fast float %t2, 5.0e+3 190 ret float %t3 191; CHECK-LABEL: @fmul_distribute1( 192; CHECK: %1 = fmul fast float %f1, 3.000000e+07 193; CHECK: %t3 = fadd fast float %1, 1.000000e+07 194} 195 196; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3 197define double @fmul_distribute2(double %f1, double %f2) { 198 %t1 = fdiv double %f1, 3.0e+0 199 %t2 = fadd double %t1, 5.0e+1 200 ; 0x10000000000000 = DBL_MIN 201 %t3 = fmul fast double %t2, 0x10000000000000 202 ret double %t3 203 204; CHECK-LABEL: @fmul_distribute2( 205; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000 206; CHECK: fadd fast double %1, 0x69000000000000 207} 208 209; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot 210; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN) 211define double @fmul_distribute3(double %f1) { 212 %t1 = fdiv double %f1, 3.0e+0 213 %t2 = fadd double %t1, 5.0e-1 214 %t3 = fmul fast double %t2, 0x10000000000000 215 ret double %t3 216 217; CHECK-LABEL: @fmul_distribute3( 218; CHECK: fmul fast double %t2, 0x10000000000000 219} 220 221; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution) 222define float @fmul_distribute4(float %f1) { 223 %t1 = fmul float %f1, 6.0e+3 224 %t2 = fsub float 2.0e+3, %t1 225 %t3 = fmul fast float %t2, 5.0e+3 226 ret float %t3 227; CHECK-LABEL: @fmul_distribute4( 228; CHECK: %1 = fmul fast float %f1, 3.000000e+07 229; CHECK: %t3 = fsub fast float 1.000000e+07, %1 230} 231 232; C1/X * C2 => (C1*C2) / X 233define float @fmul2(float %f1) { 234 %t1 = fdiv float 2.0e+3, %f1 235 %t3 = fmul fast float %t1, 6.0e+3 236 ret float %t3 237; CHECK-LABEL: @fmul2( 238; CHECK: fdiv fast float 1.200000e+07, %f1 239} 240 241; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses 242@fmul2_external = external global float 243define float @fmul2_disable(float %f1) { 244 %div = fdiv fast float 1.000000e+00, %f1 245 store float %div, float* @fmul2_external 246 %mul = fmul fast float %div, 2.000000e+00 247 ret float %mul 248; CHECK-LABEL: @fmul2_disable 249; CHECK: store 250; CHECK: fmul fast 251} 252 253; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp) 254define float @fmul3(float %f1, float %f2) { 255 %t1 = fdiv float %f1, 2.0e+3 256 %t3 = fmul fast float %t1, 6.0e+3 257 ret float %t3 258; CHECK-LABEL: @fmul3( 259; CHECK: fmul fast float %f1, 3.000000e+00 260} 261 262define <4 x float> @fmul3_vec(<4 x float> %f1, <4 x float> %f2) { 263 %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> 264 %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3> 265 ret <4 x float> %t3 266; CHECK-LABEL: @fmul3_vec( 267; CHECK: fmul fast <4 x float> %f1, <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00> 268} 269 270; Make sure fmul with constant expression doesn't assert. 271define <4 x float> @fmul3_vec_constexpr(<4 x float> %f1, <4 x float> %f2) { 272 %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float> 273 %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> 274 %t3 = fmul fast <4 x float> %t1, %constExprMul 275 ret <4 x float> %t3 276} 277 278; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special 279; value of a denormal. The 0x3810000000000000 here take value FLT_MIN 280; 281define float @fmul4(float %f1, float %f2) { 282 %t1 = fdiv float %f1, 2.0e+3 283 %t3 = fmul fast float %t1, 0x3810000000000000 284 ret float %t3 285; CHECK-LABEL: @fmul4( 286; CHECK: fmul fast float %t1, 0x3810000000000000 287} 288 289; X / C1 * C2 => X / (C2/C1) if C1/C2 is either a special value of a denormal, 290; and C2/C1 is a normal value. 291; 292define float @fmul5(float %f1, float %f2) { 293 %t1 = fdiv float %f1, 3.0e+0 294 %t3 = fmul fast float %t1, 0x3810000000000000 295 ret float %t3 296; CHECK-LABEL: @fmul5( 297; CHECK: fdiv fast float %f1, 0x47E8000000000000 298} 299 300; (X*Y) * X => (X*X) * Y 301define float @fmul6(float %f1, float %f2) { 302 %mul = fmul float %f1, %f2 303 %mul1 = fmul fast float %mul, %f1 304 ret float %mul1 305; CHECK-LABEL: @fmul6( 306; CHECK: fmul fast float %f1, %f1 307} 308 309; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses 310define float @fmul7(float %f1, float %f2) { 311 %mul = fmul float %f1, %f2 312 %mul1 = fmul fast float %mul, %f1 313 %add = fadd float %mul1, %mul 314 ret float %add 315; CHECK-LABEL: @fmul7( 316; CHECK: fmul fast float %mul, %f1 317} 318 319; ========================================================================= 320; 321; Testing-cases about negation 322; 323; ========================================================================= 324define float @fneg1(float %f1, float %f2) { 325 %sub = fsub float -0.000000e+00, %f1 326 %sub1 = fsub nsz float 0.000000e+00, %f2 327 %mul = fmul float %sub, %sub1 328 ret float %mul 329; CHECK-LABEL: @fneg1( 330; CHECK: fmul float %f1, %f2 331} 332 333define float @fneg2(float %x) { 334 %sub = fsub nsz float 0.0, %x 335 ret float %sub 336; CHECK-LABEL: @fneg2( 337; CHECK-NEXT: fsub nsz float -0.000000e+00, %x 338; CHECK-NEXT: ret float 339} 340 341; ========================================================================= 342; 343; Testing-cases about div 344; 345; ========================================================================= 346 347; X/C1 / C2 => X * (1/(C2*C1)) 348define float @fdiv1(float %x) { 349 %div = fdiv float %x, 0x3FF3333340000000 350 %div1 = fdiv fast float %div, 0x4002666660000000 351 ret float %div1 352; 0x3FF3333340000000 = 1.2f 353; 0x4002666660000000 = 2.3f 354; 0x3FD7303B60000000 = 0.36231884057971014492 355; CHECK-LABEL: @fdiv1( 356; CHECK: fmul fast float %x, 0x3FD7303B60000000 357} 358 359; X*C1 / C2 => X * (C1/C2) 360define float @fdiv2(float %x) { 361 %mul = fmul float %x, 0x3FF3333340000000 362 %div1 = fdiv fast float %mul, 0x4002666660000000 363 ret float %div1 364 365; 0x3FF3333340000000 = 1.2f 366; 0x4002666660000000 = 2.3f 367; 0x3FE0B21660000000 = 0.52173918485641479492 368; CHECK-LABEL: @fdiv2( 369; CHECK: fmul fast float %x, 0x3FE0B21660000000 370} 371 372define <2 x float> @fdiv2_vec(<2 x float> %x) { 373 %mul = fmul <2 x float> %x, <float 6.0, float 9.0> 374 %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0> 375 ret <2 x float> %div1 376 377; CHECK-LABEL: @fdiv2_vec( 378; CHECK: fmul fast <2 x float> %x, <float 3.000000e+00, float 3.000000e+00> 379} 380 381; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal 382; 383define float @fdiv3(float %x) { 384 %div = fdiv float %x, 0x47EFFFFFE0000000 385 %div1 = fdiv fast float %div, 0x4002666660000000 386 ret float %div1 387; CHECK-LABEL: @fdiv3( 388; CHECK: fdiv float %x, 0x47EFFFFFE0000000 389} 390 391; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal 392define float @fdiv4(float %x) { 393 %mul = fmul float %x, 0x47EFFFFFE0000000 394 %div = fdiv float %mul, 0x3FC99999A0000000 395 ret float %div 396; CHECK-LABEL: @fdiv4( 397; CHECK: fmul float %x, 0x47EFFFFFE0000000 398} 399 400; (X/Y)/Z = > X/(Y*Z) 401define float @fdiv5(float %f1, float %f2, float %f3) { 402 %t1 = fdiv float %f1, %f2 403 %t2 = fdiv fast float %t1, %f3 404 ret float %t2 405; CHECK-LABEL: @fdiv5( 406; CHECK: fmul float %f2, %f3 407} 408 409; Z/(X/Y) = > (Z*Y)/X 410define float @fdiv6(float %f1, float %f2, float %f3) { 411 %t1 = fdiv float %f1, %f2 412 %t2 = fdiv fast float %f3, %t1 413 ret float %t2 414; CHECK-LABEL: @fdiv6( 415; CHECK: fmul float %f3, %f2 416} 417 418; C1/(X*C2) => (C1/C2) / X 419define float @fdiv7(float %x) { 420 %t1 = fmul float %x, 3.0e0 421 %t2 = fdiv fast float 15.0e0, %t1 422 ret float %t2 423; CHECK-LABEL: @fdiv7( 424; CHECK: fdiv fast float 5.000000e+00, %x 425} 426 427; C1/(X/C2) => (C1*C2) / X 428define float @fdiv8(float %x) { 429 %t1 = fdiv float %x, 3.0e0 430 %t2 = fdiv fast float 15.0e0, %t1 431 ret float %t2 432; CHECK-LABEL: @fdiv8( 433; CHECK: fdiv fast float 4.500000e+01, %x 434} 435 436; C1/(C2/X) => (C1/C2) * X 437define float @fdiv9(float %x) { 438 %t1 = fdiv float 3.0e0, %x 439 %t2 = fdiv fast float 15.0e0, %t1 440 ret float %t2 441; CHECK-LABEL: @fdiv9( 442; CHECK: fmul fast float %x, 5.000000e+00 443} 444 445; ========================================================================= 446; 447; Testing-cases about factorization 448; 449; ========================================================================= 450; x*z + y*z => (x+y) * z 451define float @fact_mul1(float %x, float %y, float %z) { 452 %t1 = fmul fast float %x, %z 453 %t2 = fmul fast float %y, %z 454 %t3 = fadd fast float %t1, %t2 455 ret float %t3 456; CHECK-LABEL: @fact_mul1( 457; CHECK: fmul fast float %1, %z 458} 459 460; z*x + y*z => (x+y) * z 461define float @fact_mul2(float %x, float %y, float %z) { 462 %t1 = fmul fast float %z, %x 463 %t2 = fmul fast float %y, %z 464 %t3 = fsub fast float %t1, %t2 465 ret float %t3 466; CHECK-LABEL: @fact_mul2( 467; CHECK: fmul fast float %1, %z 468} 469 470; z*x - z*y => (x-y) * z 471define float @fact_mul3(float %x, float %y, float %z) { 472 %t2 = fmul fast float %z, %y 473 %t1 = fmul fast float %z, %x 474 %t3 = fsub fast float %t1, %t2 475 ret float %t3 476; CHECK-LABEL: @fact_mul3( 477; CHECK: fmul fast float %1, %z 478} 479 480; x*z - z*y => (x-y) * z 481define float @fact_mul4(float %x, float %y, float %z) { 482 %t1 = fmul fast float %x, %z 483 %t2 = fmul fast float %z, %y 484 %t3 = fsub fast float %t1, %t2 485 ret float %t3 486; CHECK-LABEL: @fact_mul4( 487; CHECK: fmul fast float %1, %z 488} 489 490; x/y + x/z, no xform 491define float @fact_div1(float %x, float %y, float %z) { 492 %t1 = fdiv fast float %x, %y 493 %t2 = fdiv fast float %x, %z 494 %t3 = fadd fast float %t1, %t2 495 ret float %t3 496; CHECK: fact_div1 497; CHECK: fadd fast float %t1, %t2 498} 499 500; x/y + z/x; no xform 501define float @fact_div2(float %x, float %y, float %z) { 502 %t1 = fdiv fast float %x, %y 503 %t2 = fdiv fast float %z, %x 504 %t3 = fadd fast float %t1, %t2 505 ret float %t3 506; CHECK: fact_div2 507; CHECK: fadd fast float %t1, %t2 508} 509 510; y/x + z/x => (y+z)/x 511define float @fact_div3(float %x, float %y, float %z) { 512 %t1 = fdiv fast float %y, %x 513 %t2 = fdiv fast float %z, %x 514 %t3 = fadd fast float %t1, %t2 515 ret float %t3 516; CHECK: fact_div3 517; CHECK: fdiv fast float %1, %x 518} 519 520; y/x - z/x => (y-z)/x 521define float @fact_div4(float %x, float %y, float %z) { 522 %t1 = fdiv fast float %y, %x 523 %t2 = fdiv fast float %z, %x 524 %t3 = fsub fast float %t1, %t2 525 ret float %t3 526; CHECK: fact_div4 527; CHECK: fdiv fast float %1, %x 528} 529 530; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 531define float @fact_div5(float %x) { 532 %t1 = fdiv fast float 0x3810000000000000, %x 533 %t2 = fdiv fast float 0x3800000000000000, %x 534 %t3 = fadd fast float %t1, %t2 535 ret float %t3 536; CHECK: fact_div5 537; CHECK: fdiv fast float 0x3818000000000000, %x 538} 539 540; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 541define float @fact_div6(float %x) { 542 %t1 = fdiv fast float 0x3810000000000000, %x 543 %t2 = fdiv fast float 0x3800000000000000, %x 544 %t3 = fsub fast float %t1, %t2 545 ret float %t3 546; CHECK: fact_div6 547; CHECK: %t3 = fsub fast float %t1, %t2 548} 549 550; ========================================================================= 551; 552; Test-cases for square root 553; 554; ========================================================================= 555 556; A squared factor fed into a square root intrinsic should be hoisted out 557; as a fabs() value. 558; We have to rely on a function-level attribute to enable this optimization 559; because intrinsics don't currently have access to IR-level fast-math 560; flags. If that changes, we can relax the requirement on all of these 561; tests to just specify 'fast' on the sqrt. 562 563attributes #0 = { "unsafe-fp-math" = "true" } 564 565declare double @llvm.sqrt.f64(double) 566 567define double @sqrt_intrinsic_arg_squared(double %x) #0 { 568 %mul = fmul fast double %x, %x 569 %sqrt = call double @llvm.sqrt.f64(double %mul) 570 ret double %sqrt 571 572; CHECK-LABEL: sqrt_intrinsic_arg_squared( 573; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 574; CHECK-NEXT: ret double %fabs 575} 576 577; Check all 6 combinations of a 3-way multiplication tree where 578; one factor is repeated. 579 580define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 { 581 %mul = fmul fast double %y, %x 582 %mul2 = fmul fast double %mul, %x 583 %sqrt = call double @llvm.sqrt.f64(double %mul2) 584 ret double %sqrt 585 586; CHECK-LABEL: sqrt_intrinsic_three_args1( 587; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 588; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 589; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 590; CHECK-NEXT: ret double %1 591} 592 593define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 { 594 %mul = fmul fast double %x, %y 595 %mul2 = fmul fast double %mul, %x 596 %sqrt = call double @llvm.sqrt.f64(double %mul2) 597 ret double %sqrt 598 599; CHECK-LABEL: sqrt_intrinsic_three_args2( 600; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 601; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 602; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 603; CHECK-NEXT: ret double %1 604} 605 606define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 { 607 %mul = fmul fast double %x, %x 608 %mul2 = fmul fast double %mul, %y 609 %sqrt = call double @llvm.sqrt.f64(double %mul2) 610 ret double %sqrt 611 612; CHECK-LABEL: sqrt_intrinsic_three_args3( 613; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 614; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 615; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 616; CHECK-NEXT: ret double %1 617} 618 619define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 { 620 %mul = fmul fast double %y, %x 621 %mul2 = fmul fast double %x, %mul 622 %sqrt = call double @llvm.sqrt.f64(double %mul2) 623 ret double %sqrt 624 625; CHECK-LABEL: sqrt_intrinsic_three_args4( 626; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 627; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 628; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 629; CHECK-NEXT: ret double %1 630} 631 632define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 { 633 %mul = fmul fast double %x, %y 634 %mul2 = fmul fast double %x, %mul 635 %sqrt = call double @llvm.sqrt.f64(double %mul2) 636 ret double %sqrt 637 638; CHECK-LABEL: sqrt_intrinsic_three_args5( 639; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 640; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 641; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 642; CHECK-NEXT: ret double %1 643} 644 645define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 { 646 %mul = fmul fast double %x, %x 647 %mul2 = fmul fast double %y, %mul 648 %sqrt = call double @llvm.sqrt.f64(double %mul2) 649 ret double %sqrt 650 651; CHECK-LABEL: sqrt_intrinsic_three_args6( 652; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 653; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 654; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 655; CHECK-NEXT: ret double %1 656} 657 658define double @sqrt_intrinsic_arg_4th(double %x) #0 { 659 %mul = fmul fast double %x, %x 660 %mul2 = fmul fast double %mul, %mul 661 %sqrt = call double @llvm.sqrt.f64(double %mul2) 662 ret double %sqrt 663 664; CHECK-LABEL: sqrt_intrinsic_arg_4th( 665; CHECK-NEXT: %mul = fmul fast double %x, %x 666; CHECK-NEXT: ret double %mul 667} 668 669define double @sqrt_intrinsic_arg_5th(double %x) #0 { 670 %mul = fmul fast double %x, %x 671 %mul2 = fmul fast double %mul, %x 672 %mul3 = fmul fast double %mul2, %mul 673 %sqrt = call double @llvm.sqrt.f64(double %mul3) 674 ret double %sqrt 675 676; CHECK-LABEL: sqrt_intrinsic_arg_5th( 677; CHECK-NEXT: %mul = fmul fast double %x, %x 678; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x) 679; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 680; CHECK-NEXT: ret double %1 681} 682 683; Check that square root calls have the same behavior. 684 685declare float @sqrtf(float) 686declare double @sqrt(double) 687declare fp128 @sqrtl(fp128) 688 689define float @sqrt_call_squared_f32(float %x) #0 { 690 %mul = fmul fast float %x, %x 691 %sqrt = call float @sqrtf(float %mul) 692 ret float %sqrt 693 694; CHECK-LABEL: sqrt_call_squared_f32( 695; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x) 696; CHECK-NEXT: ret float %fabs 697} 698 699define double @sqrt_call_squared_f64(double %x) #0 { 700 %mul = fmul fast double %x, %x 701 %sqrt = call double @sqrt(double %mul) 702 ret double %sqrt 703 704; CHECK-LABEL: sqrt_call_squared_f64( 705; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 706; CHECK-NEXT: ret double %fabs 707} 708 709define fp128 @sqrt_call_squared_f128(fp128 %x) #0 { 710 %mul = fmul fast fp128 %x, %x 711 %sqrt = call fp128 @sqrtl(fp128 %mul) 712 ret fp128 %sqrt 713 714; CHECK-LABEL: sqrt_call_squared_f128( 715; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x) 716; CHECK-NEXT: ret fp128 %fabs 717} 718 719; ========================================================================= 720; 721; Test-cases for fmin / fmax 722; 723; ========================================================================= 724 725declare double @fmax(double, double) 726declare double @fmin(double, double) 727declare float @fmaxf(float, float) 728declare float @fminf(float, float) 729declare fp128 @fmaxl(fp128, fp128) 730declare fp128 @fminl(fp128, fp128) 731 732; No NaNs is the minimum requirement to replace these calls. 733; This should always be set when unsafe-fp-math is true, but 734; alternate the attributes for additional test coverage. 735; 'nsz' is implied by the definition of fmax or fmin itself. 736attributes #1 = { "no-nans-fp-math" = "true" } 737 738; Shrink and remove the call. 739define float @max1(float %a, float %b) #0 { 740 %c = fpext float %a to double 741 %d = fpext float %b to double 742 %e = call double @fmax(double %c, double %d) 743 %f = fptrunc double %e to float 744 ret float %f 745 746; CHECK-LABEL: max1( 747; CHECK-NEXT: fcmp fast ogt float %a, %b 748; CHECK-NEXT: select {{.*}} float %a, float %b 749; CHECK-NEXT: ret 750} 751 752define float @max2(float %a, float %b) #1 { 753 %c = call float @fmaxf(float %a, float %b) 754 ret float %c 755 756; CHECK-LABEL: max2( 757; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b 758; CHECK-NEXT: select {{.*}} float %a, float %b 759; CHECK-NEXT: ret 760} 761 762 763define double @max3(double %a, double %b) #0 { 764 %c = call double @fmax(double %a, double %b) 765 ret double %c 766 767; CHECK-LABEL: max3( 768; CHECK-NEXT: fcmp fast ogt double %a, %b 769; CHECK-NEXT: select {{.*}} double %a, double %b 770; CHECK-NEXT: ret 771} 772 773define fp128 @max4(fp128 %a, fp128 %b) #1 { 774 %c = call fp128 @fmaxl(fp128 %a, fp128 %b) 775 ret fp128 %c 776 777; CHECK-LABEL: max4( 778; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b 779; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b 780; CHECK-NEXT: ret 781} 782 783; Shrink and remove the call. 784define float @min1(float %a, float %b) #1 { 785 %c = fpext float %a to double 786 %d = fpext float %b to double 787 %e = call double @fmin(double %c, double %d) 788 %f = fptrunc double %e to float 789 ret float %f 790 791; CHECK-LABEL: min1( 792; CHECK-NEXT: fcmp nnan nsz olt float %a, %b 793; CHECK-NEXT: select {{.*}} float %a, float %b 794; CHECK-NEXT: ret 795} 796 797define float @min2(float %a, float %b) #0 { 798 %c = call float @fminf(float %a, float %b) 799 ret float %c 800 801; CHECK-LABEL: min2( 802; CHECK-NEXT: fcmp fast olt float %a, %b 803; CHECK-NEXT: select {{.*}} float %a, float %b 804; CHECK-NEXT: ret 805} 806 807define double @min3(double %a, double %b) #1 { 808 %c = call double @fmin(double %a, double %b) 809 ret double %c 810 811; CHECK-LABEL: min3( 812; CHECK-NEXT: fcmp nnan nsz olt double %a, %b 813; CHECK-NEXT: select {{.*}} double %a, double %b 814; CHECK-NEXT: ret 815} 816 817define fp128 @min4(fp128 %a, fp128 %b) #0 { 818 %c = call fp128 @fminl(fp128 %a, fp128 %b) 819 ret fp128 %c 820 821; CHECK-LABEL: min4( 822; CHECK-NEXT: fcmp fast olt fp128 %a, %b 823; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b 824; CHECK-NEXT: ret 825} 826