1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; REQUIRES: asserts 3; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG 4; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF 5; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG 6; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL 7 8; Test FP transforms using instruction/node-level fast-math-flags. 9; We're also checking debug output to verify that FMF is propagated to the newly created nodes. 10; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes. 11 12declare float @llvm.fma.f32(float, float, float) 13declare float @llvm.sqrt.f32(float) 14 15; X * Y + Z --> fma(X, Y, Z) 16 17; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:' 18; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 19; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:' 20 21define float @fmul_fadd_contract1(float %x, float %y, float %z) { 22; FMF-LABEL: fmul_fadd_contract1: 23; FMF: # %bb.0: 24; FMF-NEXT: xsmaddasp 3, 1, 2 25; FMF-NEXT: fmr 1, 3 26; FMF-NEXT: blr 27; 28; GLOBAL-LABEL: fmul_fadd_contract1: 29; GLOBAL: # %bb.0: 30; GLOBAL-NEXT: xsmaddasp 3, 1, 2 31; GLOBAL-NEXT: fmr 1, 3 32; GLOBAL-NEXT: blr 33 %mul = fmul float %x, %y 34 %add = fadd contract float %mul, %z 35 ret float %add 36} 37 38; This shouldn't change anything - the intermediate fmul result is now also flagged. 39 40; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:' 41; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 42; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:' 43 44define float @fmul_fadd_contract2(float %x, float %y, float %z) { 45; FMF-LABEL: fmul_fadd_contract2: 46; FMF: # %bb.0: 47; FMF-NEXT: xsmaddasp 3, 1, 2 48; FMF-NEXT: fmr 1, 3 49; FMF-NEXT: blr 50; 51; GLOBAL-LABEL: fmul_fadd_contract2: 52; GLOBAL: # %bb.0: 53; GLOBAL-NEXT: xsmaddasp 3, 1, 2 54; GLOBAL-NEXT: fmr 1, 3 55; GLOBAL-NEXT: blr 56 %mul = fmul contract float %x, %y 57 %add = fadd contract float %mul, %z 58 ret float %add 59} 60 61; Reassociation implies that FMA contraction is allowed. 62 63; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:' 64; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 65; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:' 66 67define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { 68; FMF-LABEL: fmul_fadd_reassoc1: 69; FMF: # %bb.0: 70; FMF-NEXT: xsmaddasp 3, 1, 2 71; FMF-NEXT: fmr 1, 3 72; FMF-NEXT: blr 73; 74; GLOBAL-LABEL: fmul_fadd_reassoc1: 75; GLOBAL: # %bb.0: 76; GLOBAL-NEXT: xsmaddasp 3, 1, 2 77; GLOBAL-NEXT: fmr 1, 3 78; GLOBAL-NEXT: blr 79 %mul = fmul float %x, %y 80 %add = fadd reassoc float %mul, %z 81 ret float %add 82} 83 84; This shouldn't change anything - the intermediate fmul result is now also flagged. 85 86; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:' 87; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} 88; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:' 89 90define float @fmul_fadd_reassoc2(float %x, float %y, float %z) { 91; FMF-LABEL: fmul_fadd_reassoc2: 92; FMF: # %bb.0: 93; FMF-NEXT: xsmaddasp 3, 1, 2 94; FMF-NEXT: fmr 1, 3 95; FMF-NEXT: blr 96; 97; GLOBAL-LABEL: fmul_fadd_reassoc2: 98; GLOBAL: # %bb.0: 99; GLOBAL-NEXT: xsmaddasp 3, 1, 2 100; GLOBAL-NEXT: fmr 1, 3 101; GLOBAL-NEXT: blr 102 %mul = fmul reassoc float %x, %y 103 %add = fadd reassoc float %mul, %z 104 ret float %add 105} 106 107; The fadd is now fully 'fast'. This implies that contraction is allowed. 108 109; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:' 110; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 111; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:' 112 113define float @fmul_fadd_fast1(float %x, float %y, float %z) { 114; FMF-LABEL: fmul_fadd_fast1: 115; FMF: # %bb.0: 116; FMF-NEXT: xsmaddasp 3, 1, 2 117; FMF-NEXT: fmr 1, 3 118; FMF-NEXT: blr 119; 120; GLOBAL-LABEL: fmul_fadd_fast1: 121; GLOBAL: # %bb.0: 122; GLOBAL-NEXT: xsmaddasp 3, 1, 2 123; GLOBAL-NEXT: fmr 1, 3 124; GLOBAL-NEXT: blr 125 %mul = fmul reassoc float %x, %y 126 %add = fadd reassoc float %mul, %z 127 ret float %add 128} 129 130; This shouldn't change anything - the intermediate fmul result is now also flagged. 131 132; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:' 133; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 134; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:' 135 136define float @fmul_fadd_fast2(float %x, float %y, float %z) { 137; FMF-LABEL: fmul_fadd_fast2: 138; FMF: # %bb.0: 139; FMF-NEXT: xsmaddasp 3, 1, 2 140; FMF-NEXT: fmr 1, 3 141; FMF-NEXT: blr 142; 143; GLOBAL-LABEL: fmul_fadd_fast2: 144; GLOBAL: # %bb.0: 145; GLOBAL-NEXT: xsmaddasp 3, 1, 2 146; GLOBAL-NEXT: fmr 1, 3 147; GLOBAL-NEXT: blr 148 %mul = fmul reassoc float %x, %y 149 %add = fadd reassoc float %mul, %z 150 ret float %add 151} 152 153; fma(X, 7.0, X * 42.0) --> X * 49.0 154; This is the minimum FMF needed for this transform - the FMA allows reassociation. 155 156; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' 157; FMFDEBUG: fmul reassoc {{t[0-9]+}}, 158; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' 159 160; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' 161; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} 162; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' 163 164define float @fmul_fma_reassoc1(float %x) { 165; FMF-LABEL: fmul_fma_reassoc1: 166; FMF: # %bb.0: 167; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha 168; FMF-NEXT: lfs 0, .LCPI6_0@toc@l(3) 169; FMF-NEXT: xsmulsp 1, 1, 0 170; FMF-NEXT: blr 171; 172; GLOBAL-LABEL: fmul_fma_reassoc1: 173; GLOBAL: # %bb.0: 174; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha 175; GLOBAL-NEXT: lfs 0, .LCPI6_0@toc@l(3) 176; GLOBAL-NEXT: xsmulsp 1, 1, 0 177; GLOBAL-NEXT: blr 178 %mul = fmul float %x, 42.0 179 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) 180 ret float %fma 181} 182 183; This shouldn't change anything - the intermediate fmul result is now also flagged. 184 185; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' 186; FMFDEBUG: fmul reassoc {{t[0-9]+}} 187; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' 188 189; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' 190; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} 191; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' 192 193define float @fmul_fma_reassoc2(float %x) { 194; FMF-LABEL: fmul_fma_reassoc2: 195; FMF: # %bb.0: 196; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha 197; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3) 198; FMF-NEXT: xsmulsp 1, 1, 0 199; FMF-NEXT: blr 200; 201; GLOBAL-LABEL: fmul_fma_reassoc2: 202; GLOBAL: # %bb.0: 203; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha 204; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3) 205; GLOBAL-NEXT: xsmulsp 1, 1, 0 206; GLOBAL-NEXT: blr 207 %mul = fmul reassoc float %x, 42.0 208 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) 209 ret float %fma 210} 211 212; The FMA is now fully 'fast'. This implies that reassociation is allowed. 213 214; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' 215; FMFDEBUG: fmul reassoc {{t[0-9]+}} 216; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' 217 218; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' 219; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} 220; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' 221 222define float @fmul_fma_fast1(float %x) { 223; FMF-LABEL: fmul_fma_fast1: 224; FMF: # %bb.0: 225; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha 226; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) 227; FMF-NEXT: xsmulsp 1, 1, 0 228; FMF-NEXT: blr 229; 230; GLOBAL-LABEL: fmul_fma_fast1: 231; GLOBAL: # %bb.0: 232; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha 233; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) 234; GLOBAL-NEXT: xsmulsp 1, 1, 0 235; GLOBAL-NEXT: blr 236 %mul = fmul float %x, 42.0 237 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) 238 ret float %fma 239} 240 241; This shouldn't change anything - the intermediate fmul result is now also flagged. 242 243; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' 244; FMFDEBUG: fmul reassoc {{t[0-9]+}} 245; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' 246 247; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' 248; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} 249; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' 250 251define float @fmul_fma_fast2(float %x) { 252; FMF-LABEL: fmul_fma_fast2: 253; FMF: # %bb.0: 254; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha 255; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) 256; FMF-NEXT: xsmulsp 1, 1, 0 257; FMF-NEXT: blr 258; 259; GLOBAL-LABEL: fmul_fma_fast2: 260; GLOBAL: # %bb.0: 261; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha 262; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) 263; GLOBAL-NEXT: xsmulsp 1, 1, 0 264; GLOBAL-NEXT: blr 265 %mul = fmul reassoc float %x, 42.0 266 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) 267 ret float %fma 268} 269 270; Reduced precision for sqrt is allowed - should use estimate and NR iterations. 271 272; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:' 273; FMFDEBUG: fmul ninf afn {{t[0-9]+}} 274; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:' 275 276; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:' 277; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}} 278; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:' 279 280define float @sqrt_afn_ieee(float %x) #0 { 281; FMF-LABEL: sqrt_afn_ieee: 282; FMF: # %bb.0: 283; FMF-NEXT: xsabsdp 0, 1 284; FMF-NEXT: addis 3, 2, .LCPI10_2@toc@ha 285; FMF-NEXT: lfs 2, .LCPI10_2@toc@l(3) 286; FMF-NEXT: fcmpu 0, 0, 2 287; FMF-NEXT: xxlxor 0, 0, 0 288; FMF-NEXT: blt 0, .LBB10_2 289; FMF-NEXT: # %bb.1: 290; FMF-NEXT: xsrsqrtesp 0, 1 291; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha 292; FMF-NEXT: addis 4, 2, .LCPI10_1@toc@ha 293; FMF-NEXT: lfs 2, .LCPI10_0@toc@l(3) 294; FMF-NEXT: lfs 3, .LCPI10_1@toc@l(4) 295; FMF-NEXT: xsmulsp 1, 1, 0 296; FMF-NEXT: xsmulsp 0, 1, 0 297; FMF-NEXT: xsmulsp 1, 1, 2 298; FMF-NEXT: xsaddsp 0, 0, 3 299; FMF-NEXT: xsmulsp 0, 1, 0 300; FMF-NEXT: .LBB10_2: 301; FMF-NEXT: fmr 1, 0 302; FMF-NEXT: blr 303; 304; GLOBAL-LABEL: sqrt_afn_ieee: 305; GLOBAL: # %bb.0: 306; GLOBAL-NEXT: xsabsdp 0, 1 307; GLOBAL-NEXT: addis 3, 2, .LCPI10_2@toc@ha 308; GLOBAL-NEXT: lfs 2, .LCPI10_2@toc@l(3) 309; GLOBAL-NEXT: fcmpu 0, 0, 2 310; GLOBAL-NEXT: xxlxor 0, 0, 0 311; GLOBAL-NEXT: blt 0, .LBB10_2 312; GLOBAL-NEXT: # %bb.1: 313; GLOBAL-NEXT: xsrsqrtesp 0, 1 314; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha 315; GLOBAL-NEXT: addis 4, 2, .LCPI10_1@toc@ha 316; GLOBAL-NEXT: lfs 2, .LCPI10_0@toc@l(3) 317; GLOBAL-NEXT: lfs 3, .LCPI10_1@toc@l(4) 318; GLOBAL-NEXT: xsmulsp 1, 1, 0 319; GLOBAL-NEXT: xsmaddasp 2, 1, 0 320; GLOBAL-NEXT: xsmulsp 0, 1, 3 321; GLOBAL-NEXT: xsmulsp 0, 0, 2 322; GLOBAL-NEXT: .LBB10_2: 323; GLOBAL-NEXT: fmr 1, 0 324; GLOBAL-NEXT: blr 325 %rt = call afn ninf float @llvm.sqrt.f32(float %x) 326 ret float %rt 327} 328 329define float @sqrt_afn_ieee_inf(float %x) #0 { 330; FMF-LABEL: sqrt_afn_ieee_inf: 331; FMF: # %bb.0: 332; FMF-NEXT: xssqrtsp 1, 1 333; FMF-NEXT: blr 334; 335; GLOBAL-LABEL: sqrt_afn_ieee_inf: 336; GLOBAL: # %bb.0: 337; GLOBAL-NEXT: xssqrtsp 1, 1 338; GLOBAL-NEXT: blr 339 %rt = call afn float @llvm.sqrt.f32(float %x) 340 ret float %rt 341} 342 343; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:' 344; FMFDEBUG: fmul ninf afn {{t[0-9]+}} 345; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:' 346 347; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:' 348; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}} 349; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:' 350 351define float @sqrt_afn_preserve_sign(float %x) #1 { 352; FMF-LABEL: sqrt_afn_preserve_sign: 353; FMF: # %bb.0: 354; FMF-NEXT: xxlxor 0, 0, 0 355; FMF-NEXT: fcmpu 0, 1, 0 356; FMF-NEXT: beq 0, .LBB12_2 357; FMF-NEXT: # %bb.1: 358; FMF-NEXT: xsrsqrtesp 0, 1 359; FMF-NEXT: addis 3, 2, .LCPI12_0@toc@ha 360; FMF-NEXT: addis 4, 2, .LCPI12_1@toc@ha 361; FMF-NEXT: lfs 2, .LCPI12_0@toc@l(3) 362; FMF-NEXT: lfs 3, .LCPI12_1@toc@l(4) 363; FMF-NEXT: xsmulsp 1, 1, 0 364; FMF-NEXT: xsmulsp 0, 1, 0 365; FMF-NEXT: xsmulsp 1, 1, 2 366; FMF-NEXT: xsaddsp 0, 0, 3 367; FMF-NEXT: xsmulsp 0, 1, 0 368; FMF-NEXT: .LBB12_2: 369; FMF-NEXT: fmr 1, 0 370; FMF-NEXT: blr 371; 372; GLOBAL-LABEL: sqrt_afn_preserve_sign: 373; GLOBAL: # %bb.0: 374; GLOBAL-NEXT: xxlxor 0, 0, 0 375; GLOBAL-NEXT: fcmpu 0, 1, 0 376; GLOBAL-NEXT: beq 0, .LBB12_2 377; GLOBAL-NEXT: # %bb.1: 378; GLOBAL-NEXT: xsrsqrtesp 0, 1 379; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha 380; GLOBAL-NEXT: addis 4, 2, .LCPI12_1@toc@ha 381; GLOBAL-NEXT: lfs 2, .LCPI12_0@toc@l(3) 382; GLOBAL-NEXT: lfs 3, .LCPI12_1@toc@l(4) 383; GLOBAL-NEXT: xsmulsp 1, 1, 0 384; GLOBAL-NEXT: xsmaddasp 2, 1, 0 385; GLOBAL-NEXT: xsmulsp 0, 1, 3 386; GLOBAL-NEXT: xsmulsp 0, 0, 2 387; GLOBAL-NEXT: .LBB12_2: 388; GLOBAL-NEXT: fmr 1, 0 389; GLOBAL-NEXT: blr 390 %rt = call afn ninf float @llvm.sqrt.f32(float %x) 391 ret float %rt 392} 393 394define float @sqrt_afn_preserve_sign_inf(float %x) #1 { 395; FMF-LABEL: sqrt_afn_preserve_sign_inf: 396; FMF: # %bb.0: 397; FMF-NEXT: xssqrtsp 1, 1 398; FMF-NEXT: blr 399; 400; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf: 401; GLOBAL: # %bb.0: 402; GLOBAL-NEXT: xssqrtsp 1, 1 403; GLOBAL-NEXT: blr 404 %rt = call afn float @llvm.sqrt.f32(float %x) 405 ret float %rt 406} 407 408; The call is now fully 'fast'. This implies that approximation is allowed. 409 410; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:' 411; FMFDEBUG: fmul ninf afn reassoc {{t[0-9]+}} 412; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:' 413 414; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:' 415; GLOBALDEBUG: fmul ninf afn reassoc {{t[0-9]+}} 416; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:' 417 418define float @sqrt_fast_ieee(float %x) #0 { 419; FMF-LABEL: sqrt_fast_ieee: 420; FMF: # %bb.0: 421; FMF-NEXT: xsabsdp 0, 1 422; FMF-NEXT: addis 3, 2, .LCPI14_2@toc@ha 423; FMF-NEXT: lfs 2, .LCPI14_2@toc@l(3) 424; FMF-NEXT: fcmpu 0, 0, 2 425; FMF-NEXT: xxlxor 0, 0, 0 426; FMF-NEXT: blt 0, .LBB14_2 427; FMF-NEXT: # %bb.1: 428; FMF-NEXT: xsrsqrtesp 0, 1 429; FMF-NEXT: addis 3, 2, .LCPI14_0@toc@ha 430; FMF-NEXT: addis 4, 2, .LCPI14_1@toc@ha 431; FMF-NEXT: lfs 2, .LCPI14_0@toc@l(3) 432; FMF-NEXT: lfs 3, .LCPI14_1@toc@l(4) 433; FMF-NEXT: xsmulsp 1, 1, 0 434; FMF-NEXT: xsmaddasp 2, 1, 0 435; FMF-NEXT: xsmulsp 0, 1, 3 436; FMF-NEXT: xsmulsp 0, 0, 2 437; FMF-NEXT: .LBB14_2: 438; FMF-NEXT: fmr 1, 0 439; FMF-NEXT: blr 440; 441; GLOBAL-LABEL: sqrt_fast_ieee: 442; GLOBAL: # %bb.0: 443; GLOBAL-NEXT: xsabsdp 0, 1 444; GLOBAL-NEXT: addis 3, 2, .LCPI14_2@toc@ha 445; GLOBAL-NEXT: lfs 2, .LCPI14_2@toc@l(3) 446; GLOBAL-NEXT: fcmpu 0, 0, 2 447; GLOBAL-NEXT: xxlxor 0, 0, 0 448; GLOBAL-NEXT: blt 0, .LBB14_2 449; GLOBAL-NEXT: # %bb.1: 450; GLOBAL-NEXT: xsrsqrtesp 0, 1 451; GLOBAL-NEXT: addis 3, 2, .LCPI14_0@toc@ha 452; GLOBAL-NEXT: addis 4, 2, .LCPI14_1@toc@ha 453; GLOBAL-NEXT: lfs 2, .LCPI14_0@toc@l(3) 454; GLOBAL-NEXT: lfs 3, .LCPI14_1@toc@l(4) 455; GLOBAL-NEXT: xsmulsp 1, 1, 0 456; GLOBAL-NEXT: xsmaddasp 2, 1, 0 457; GLOBAL-NEXT: xsmulsp 0, 1, 3 458; GLOBAL-NEXT: xsmulsp 0, 0, 2 459; GLOBAL-NEXT: .LBB14_2: 460; GLOBAL-NEXT: fmr 1, 0 461; GLOBAL-NEXT: blr 462 %rt = call reassoc afn ninf float @llvm.sqrt.f32(float %x) 463 ret float %rt 464} 465 466; The call is now fully 'fast'. This implies that approximation is allowed. 467 468; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:' 469; FMFDEBUG: fmul ninf afn reassoc {{t[0-9]+}} 470; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:' 471 472; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:' 473; GLOBALDEBUG: fmul ninf afn reassoc {{t[0-9]+}} 474; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:' 475 476define float @sqrt_fast_preserve_sign(float %x) #1 { 477; FMF-LABEL: sqrt_fast_preserve_sign: 478; FMF: # %bb.0: 479; FMF-NEXT: xxlxor 0, 0, 0 480; FMF-NEXT: fcmpu 0, 1, 0 481; FMF-NEXT: beq 0, .LBB15_2 482; FMF-NEXT: # %bb.1: 483; FMF-NEXT: xsrsqrtesp 0, 1 484; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha 485; FMF-NEXT: addis 4, 2, .LCPI15_1@toc@ha 486; FMF-NEXT: lfs 2, .LCPI15_0@toc@l(3) 487; FMF-NEXT: lfs 3, .LCPI15_1@toc@l(4) 488; FMF-NEXT: xsmulsp 1, 1, 0 489; FMF-NEXT: xsmaddasp 2, 1, 0 490; FMF-NEXT: xsmulsp 0, 1, 3 491; FMF-NEXT: xsmulsp 0, 0, 2 492; FMF-NEXT: .LBB15_2: 493; FMF-NEXT: fmr 1, 0 494; FMF-NEXT: blr 495; 496; GLOBAL-LABEL: sqrt_fast_preserve_sign: 497; GLOBAL: # %bb.0: 498; GLOBAL-NEXT: xxlxor 0, 0, 0 499; GLOBAL-NEXT: fcmpu 0, 1, 0 500; GLOBAL-NEXT: beq 0, .LBB15_2 501; GLOBAL-NEXT: # %bb.1: 502; GLOBAL-NEXT: xsrsqrtesp 0, 1 503; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha 504; GLOBAL-NEXT: addis 4, 2, .LCPI15_1@toc@ha 505; GLOBAL-NEXT: lfs 2, .LCPI15_0@toc@l(3) 506; GLOBAL-NEXT: lfs 3, .LCPI15_1@toc@l(4) 507; GLOBAL-NEXT: xsmulsp 1, 1, 0 508; GLOBAL-NEXT: xsmaddasp 2, 1, 0 509; GLOBAL-NEXT: xsmulsp 0, 1, 3 510; GLOBAL-NEXT: xsmulsp 0, 0, 2 511; GLOBAL-NEXT: .LBB15_2: 512; GLOBAL-NEXT: fmr 1, 0 513; GLOBAL-NEXT: blr 514 %rt = call reassoc ninf afn float @llvm.sqrt.f32(float %x) 515 ret float %rt 516} 517 518; fcmp can have fast-math-flags. 519 520; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:' 521; FMFDEBUG: select_cc nnan {{t[0-9]+}} 522; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:' 523 524; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:' 525; GLOBALDEBUG: select_cc nnan {{t[0-9]+}} 526; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:' 527 528define double @fcmp_nnan(double %a, double %y, double %z) { 529; FMF-LABEL: fcmp_nnan: 530; FMF: # %bb.0: 531; FMF-NEXT: xxlxor 0, 0, 0 532; FMF-NEXT: xscmpudp 0, 1, 0 533; FMF-NEXT: blt 0, .LBB16_2 534; FMF-NEXT: # %bb.1: 535; FMF-NEXT: fmr 3, 2 536; FMF-NEXT: .LBB16_2: 537; FMF-NEXT: fmr 1, 3 538; FMF-NEXT: blr 539; 540; GLOBAL-LABEL: fcmp_nnan: 541; GLOBAL: # %bb.0: 542; GLOBAL-NEXT: xxlxor 0, 0, 0 543; GLOBAL-NEXT: xscmpudp 0, 1, 0 544; GLOBAL-NEXT: blt 0, .LBB16_2 545; GLOBAL-NEXT: # %bb.1: 546; GLOBAL-NEXT: fmr 3, 2 547; GLOBAL-NEXT: .LBB16_2: 548; GLOBAL-NEXT: fmr 1, 3 549; GLOBAL-NEXT: blr 550 %cmp = fcmp nnan ult double %a, 0.0 551 %z.y = select i1 %cmp, double %z, double %y 552 ret double %z.y 553} 554 555; FP library calls can have fast-math-flags. 556 557; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' 558; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2> 559; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 560; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 561; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' 562 563; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' 564; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2> 565; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 566; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 567; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' 568 569declare double @log2(double) 570define double @log2_approx(double %x) nounwind { 571; FMF-LABEL: log2_approx: 572; FMF: # %bb.0: 573; FMF-NEXT: mflr 0 574; FMF-NEXT: std 0, 16(1) 575; FMF-NEXT: stdu 1, -32(1) 576; FMF-NEXT: bl log2 577; FMF-NEXT: nop 578; FMF-NEXT: addi 1, 1, 32 579; FMF-NEXT: ld 0, 16(1) 580; FMF-NEXT: mtlr 0 581; FMF-NEXT: blr 582; 583; GLOBAL-LABEL: log2_approx: 584; GLOBAL: # %bb.0: 585; GLOBAL-NEXT: mflr 0 586; GLOBAL-NEXT: std 0, 16(1) 587; GLOBAL-NEXT: stdu 1, -32(1) 588; GLOBAL-NEXT: bl log2 589; GLOBAL-NEXT: nop 590; GLOBAL-NEXT: addi 1, 1, 32 591; GLOBAL-NEXT: ld 0, 16(1) 592; GLOBAL-NEXT: mtlr 0 593; GLOBAL-NEXT: blr 594 %r = call afn double @log2(double %x) 595 ret double %r 596} 597 598; -(X - Y) --> (Y - X) 599 600; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 601; FMFDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}} 602; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 603 604; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 605; GLOBALDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}} 606; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 607 608define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) { 609; FMF-LABEL: fneg_fsub_nozeros_1: 610; FMF: # %bb.0: 611; FMF-NEXT: xssubsp 1, 2, 1 612; FMF-NEXT: blr 613; 614; GLOBAL-LABEL: fneg_fsub_nozeros_1: 615; GLOBAL: # %bb.0: 616; GLOBAL-NEXT: xssubsp 1, 2, 1 617; GLOBAL-NEXT: blr 618 %neg = fsub float %x, %y 619 %add = fsub nsz float 0.0, %neg 620 ret float %add 621} 622 623attributes #0 = { "denormal-fp-math"="ieee,ieee" } 624attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" } 625