1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT 3; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s 4 5declare float @llvm.sqrt.f32(float) #0 6declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0 7declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0 8declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0 9declare double @llvm.sqrt.f64(double) #0 10declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0 11declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0 12 13define float @fsqrt(float %a) #0 { 14; FAULT-LABEL: fsqrt: 15; FAULT: // %bb.0: 16; FAULT-NEXT: fsqrt s0, s0 17; FAULT-NEXT: ret 18; 19; CHECK-LABEL: fsqrt: 20; CHECK: // %bb.0: 21; CHECK-NEXT: frsqrte s1, s0 22; CHECK-NEXT: fmul s2, s1, s1 23; CHECK-NEXT: frsqrts s2, s0, s2 24; CHECK-NEXT: fmul s1, s1, s2 25; CHECK-NEXT: fmul s2, s1, s1 26; CHECK-NEXT: frsqrts s2, s0, s2 27; CHECK-NEXT: fmul s2, s2, s0 28; CHECK-NEXT: fmul s1, s1, s2 29; CHECK-NEXT: fcmp s0, #0.0 30; CHECK-NEXT: fcsel s0, s0, s1, eq 31; CHECK-NEXT: ret 32 %1 = tail call fast float @llvm.sqrt.f32(float %a) 33 ret float %1 34} 35 36define float @fsqrt_ieee_denorms(float %a) #1 { 37; FAULT-LABEL: fsqrt_ieee_denorms: 38; FAULT: // %bb.0: 39; FAULT-NEXT: fsqrt s0, s0 40; FAULT-NEXT: ret 41; 42; CHECK-LABEL: fsqrt_ieee_denorms: 43; CHECK: // %bb.0: 44; CHECK-NEXT: frsqrte s1, s0 45; CHECK-NEXT: fmul s2, s1, s1 46; CHECK-NEXT: frsqrts s2, s0, s2 47; CHECK-NEXT: fmul s1, s1, s2 48; CHECK-NEXT: fmul s2, s1, s1 49; CHECK-NEXT: frsqrts s2, s0, s2 50; CHECK-NEXT: fmul s2, s2, s0 51; CHECK-NEXT: fmul s1, s1, s2 52; CHECK-NEXT: fcmp s0, #0.0 53; CHECK-NEXT: fcsel s0, s0, s1, eq 54; CHECK-NEXT: ret 55 %1 = tail call fast float @llvm.sqrt.f32(float %a) 56 ret float %1 57} 58 59define <2 x float> @f2sqrt(<2 x float> %a) #0 { 60; FAULT-LABEL: f2sqrt: 61; FAULT: // %bb.0: 62; FAULT-NEXT: fsqrt v0.2s, v0.2s 63; FAULT-NEXT: ret 64; 65; CHECK-LABEL: f2sqrt: 66; CHECK: // %bb.0: 67; CHECK-NEXT: frsqrte v1.2s, v0.2s 68; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s 69; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s 70; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s 71; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s 72; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s 73; CHECK-NEXT: fmul v2.2s, v2.2s, v0.2s 74; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s 75; CHECK-NEXT: fcmeq v2.2s, v0.2s, #0.0 76; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 77; CHECK-NEXT: ret 78 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) 79 ret <2 x float> %1 80} 81 82define <4 x float> @f4sqrt(<4 x float> %a) #0 { 83; FAULT-LABEL: f4sqrt: 84; FAULT: // %bb.0: 85; FAULT-NEXT: fsqrt v0.4s, v0.4s 86; FAULT-NEXT: ret 87; 88; CHECK-LABEL: f4sqrt: 89; CHECK: // %bb.0: 90; CHECK-NEXT: frsqrte v1.4s, v0.4s 91; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s 92; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s 93; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s 94; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s 95; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s 96; CHECK-NEXT: fmul v2.4s, v2.4s, v0.4s 97; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s 98; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0 99; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 100; CHECK-NEXT: ret 101 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) 102 ret <4 x float> %1 103} 104 105define <8 x float> @f8sqrt(<8 x float> %a) #0 { 106; FAULT-LABEL: f8sqrt: 107; FAULT: // %bb.0: 108; FAULT-NEXT: fsqrt v0.4s, v0.4s 109; FAULT-NEXT: fsqrt v1.4s, v1.4s 110; FAULT-NEXT: ret 111; 112; CHECK-LABEL: f8sqrt: 113; CHECK: // %bb.0: 114; CHECK-NEXT: frsqrte v2.4s, v0.4s 115; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s 116; CHECK-NEXT: frsqrts v3.4s, v0.4s, v3.4s 117; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s 118; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s 119; CHECK-NEXT: frsqrts v3.4s, v0.4s, v3.4s 120; CHECK-NEXT: fmul v3.4s, v3.4s, v0.4s 121; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s 122; CHECK-NEXT: fcmeq v3.4s, v0.4s, #0.0 123; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b 124; CHECK-NEXT: frsqrte v2.4s, v1.4s 125; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s 126; CHECK-NEXT: frsqrts v3.4s, v1.4s, v3.4s 127; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s 128; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s 129; CHECK-NEXT: frsqrts v3.4s, v1.4s, v3.4s 130; CHECK-NEXT: fmul v3.4s, v3.4s, v1.4s 131; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s 132; CHECK-NEXT: fcmeq v3.4s, v1.4s, #0.0 133; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b 134; CHECK-NEXT: ret 135 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) 136 ret <8 x float> %1 137} 138 139define double @dsqrt(double %a) #0 { 140; FAULT-LABEL: dsqrt: 141; FAULT: // %bb.0: 142; FAULT-NEXT: fsqrt d0, d0 143; FAULT-NEXT: ret 144; 145; CHECK-LABEL: dsqrt: 146; CHECK: // %bb.0: 147; CHECK-NEXT: frsqrte d1, d0 148; CHECK-NEXT: fmul d2, d1, d1 149; CHECK-NEXT: frsqrts d2, d0, d2 150; CHECK-NEXT: fmul d1, d1, d2 151; CHECK-NEXT: fmul d2, d1, d1 152; CHECK-NEXT: frsqrts d2, d0, d2 153; CHECK-NEXT: fmul d1, d1, d2 154; CHECK-NEXT: fmul d2, d1, d1 155; CHECK-NEXT: frsqrts d2, d0, d2 156; CHECK-NEXT: fmul d2, d2, d0 157; CHECK-NEXT: fmul d1, d1, d2 158; CHECK-NEXT: fcmp d0, #0.0 159; CHECK-NEXT: fcsel d0, d0, d1, eq 160; CHECK-NEXT: ret 161 %1 = tail call fast double @llvm.sqrt.f64(double %a) 162 ret double %1 163} 164 165define double @dsqrt_ieee_denorms(double %a) #1 { 166; FAULT-LABEL: dsqrt_ieee_denorms: 167; FAULT: // %bb.0: 168; FAULT-NEXT: fsqrt d0, d0 169; FAULT-NEXT: ret 170; 171; CHECK-LABEL: dsqrt_ieee_denorms: 172; CHECK: // %bb.0: 173; CHECK-NEXT: frsqrte d1, d0 174; CHECK-NEXT: fmul d2, d1, d1 175; CHECK-NEXT: frsqrts d2, d0, d2 176; CHECK-NEXT: fmul d1, d1, d2 177; CHECK-NEXT: fmul d2, d1, d1 178; CHECK-NEXT: frsqrts d2, d0, d2 179; CHECK-NEXT: fmul d1, d1, d2 180; CHECK-NEXT: fmul d2, d1, d1 181; CHECK-NEXT: frsqrts d2, d0, d2 182; CHECK-NEXT: fmul d2, d2, d0 183; CHECK-NEXT: fmul d1, d1, d2 184; CHECK-NEXT: fcmp d0, #0.0 185; CHECK-NEXT: fcsel d0, d0, d1, eq 186; CHECK-NEXT: ret 187 %1 = tail call fast double @llvm.sqrt.f64(double %a) 188 ret double %1 189} 190 191define <2 x double> @d2sqrt(<2 x double> %a) #0 { 192; FAULT-LABEL: d2sqrt: 193; FAULT: // %bb.0: 194; FAULT-NEXT: fsqrt v0.2d, v0.2d 195; FAULT-NEXT: ret 196; 197; CHECK-LABEL: d2sqrt: 198; CHECK: // %bb.0: 199; CHECK-NEXT: frsqrte v1.2d, v0.2d 200; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 201; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 202; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 203; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 204; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 205; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 206; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 207; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 208; CHECK-NEXT: fmul v2.2d, v2.2d, v0.2d 209; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 210; CHECK-NEXT: fcmeq v2.2d, v0.2d, #0.0 211; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 212; CHECK-NEXT: ret 213 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) 214 ret <2 x double> %1 215} 216 217define <4 x double> @d4sqrt(<4 x double> %a) #0 { 218; FAULT-LABEL: d4sqrt: 219; FAULT: // %bb.0: 220; FAULT-NEXT: fsqrt v0.2d, v0.2d 221; FAULT-NEXT: fsqrt v1.2d, v1.2d 222; FAULT-NEXT: ret 223; 224; CHECK-LABEL: d4sqrt: 225; CHECK: // %bb.0: 226; CHECK-NEXT: frsqrte v2.2d, v0.2d 227; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 228; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d 229; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 230; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 231; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d 232; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 233; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 234; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d 235; CHECK-NEXT: fmul v3.2d, v3.2d, v0.2d 236; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 237; CHECK-NEXT: fcmeq v3.2d, v0.2d, #0.0 238; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b 239; CHECK-NEXT: frsqrte v2.2d, v1.2d 240; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 241; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d 242; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 243; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 244; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d 245; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 246; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 247; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d 248; CHECK-NEXT: fmul v3.2d, v3.2d, v1.2d 249; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 250; CHECK-NEXT: fcmeq v3.2d, v1.2d, #0.0 251; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b 252; CHECK-NEXT: ret 253 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) 254 ret <4 x double> %1 255} 256 257define float @frsqrt(float %a) #0 { 258; FAULT-LABEL: frsqrt: 259; FAULT: // %bb.0: 260; FAULT-NEXT: fsqrt s0, s0 261; FAULT-NEXT: fmov s1, #1.00000000 262; FAULT-NEXT: fdiv s0, s1, s0 263; FAULT-NEXT: ret 264; 265; CHECK-LABEL: frsqrt: 266; CHECK: // %bb.0: 267; CHECK-NEXT: frsqrte s1, s0 268; CHECK-NEXT: fmul s2, s1, s1 269; CHECK-NEXT: frsqrts s2, s0, s2 270; CHECK-NEXT: fmul s1, s1, s2 271; CHECK-NEXT: fmul s2, s1, s1 272; CHECK-NEXT: frsqrts s0, s0, s2 273; CHECK-NEXT: fmul s0, s1, s0 274; CHECK-NEXT: ret 275 %1 = tail call fast float @llvm.sqrt.f32(float %a) 276 %2 = fdiv fast float 1.000000e+00, %1 277 ret float %2 278} 279 280define <2 x float> @f2rsqrt(<2 x float> %a) #0 { 281; FAULT-LABEL: f2rsqrt: 282; FAULT: // %bb.0: 283; FAULT-NEXT: fsqrt v0.2s, v0.2s 284; FAULT-NEXT: fmov v1.2s, #1.00000000 285; FAULT-NEXT: fdiv v0.2s, v1.2s, v0.2s 286; FAULT-NEXT: ret 287; 288; CHECK-LABEL: f2rsqrt: 289; CHECK: // %bb.0: 290; CHECK-NEXT: frsqrte v1.2s, v0.2s 291; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s 292; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s 293; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s 294; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s 295; CHECK-NEXT: frsqrts v0.2s, v0.2s, v2.2s 296; CHECK-NEXT: fmul v0.2s, v1.2s, v0.2s 297; CHECK-NEXT: ret 298 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) 299 %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1 300 ret <2 x float> %2 301} 302 303define <4 x float> @f4rsqrt(<4 x float> %a) #0 { 304; FAULT-LABEL: f4rsqrt: 305; FAULT: // %bb.0: 306; FAULT-NEXT: fsqrt v0.4s, v0.4s 307; FAULT-NEXT: fmov v1.4s, #1.00000000 308; FAULT-NEXT: fdiv v0.4s, v1.4s, v0.4s 309; FAULT-NEXT: ret 310; 311; CHECK-LABEL: f4rsqrt: 312; CHECK: // %bb.0: 313; CHECK-NEXT: frsqrte v1.4s, v0.4s 314; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s 315; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s 316; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s 317; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s 318; CHECK-NEXT: frsqrts v0.4s, v0.4s, v2.4s 319; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s 320; CHECK-NEXT: ret 321 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) 322 %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1 323 ret <4 x float> %2 324} 325 326define <8 x float> @f8rsqrt(<8 x float> %a) #0 { 327; FAULT-LABEL: f8rsqrt: 328; FAULT: // %bb.0: 329; FAULT-NEXT: fsqrt v1.4s, v1.4s 330; FAULT-NEXT: fsqrt v0.4s, v0.4s 331; FAULT-NEXT: fmov v2.4s, #1.00000000 332; FAULT-NEXT: fdiv v0.4s, v2.4s, v0.4s 333; FAULT-NEXT: fdiv v1.4s, v2.4s, v1.4s 334; FAULT-NEXT: ret 335; 336; CHECK-LABEL: f8rsqrt: 337; CHECK: // %bb.0: 338; CHECK-NEXT: frsqrte v2.4s, v0.4s 339; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s 340; CHECK-NEXT: frsqrte v3.4s, v1.4s 341; CHECK-NEXT: frsqrts v4.4s, v0.4s, v4.4s 342; CHECK-NEXT: fmul v2.4s, v2.4s, v4.4s 343; CHECK-NEXT: fmul v4.4s, v3.4s, v3.4s 344; CHECK-NEXT: frsqrts v4.4s, v1.4s, v4.4s 345; CHECK-NEXT: fmul v3.4s, v3.4s, v4.4s 346; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s 347; CHECK-NEXT: frsqrts v0.4s, v0.4s, v4.4s 348; CHECK-NEXT: fmul v4.4s, v3.4s, v3.4s 349; CHECK-NEXT: frsqrts v1.4s, v1.4s, v4.4s 350; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s 351; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s 352; CHECK-NEXT: ret 353 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) 354 %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1 355 ret <8 x float> %2 356} 357 358define double @drsqrt(double %a) #0 { 359; FAULT-LABEL: drsqrt: 360; FAULT: // %bb.0: 361; FAULT-NEXT: fsqrt d0, d0 362; FAULT-NEXT: fmov d1, #1.00000000 363; FAULT-NEXT: fdiv d0, d1, d0 364; FAULT-NEXT: ret 365; 366; CHECK-LABEL: drsqrt: 367; CHECK: // %bb.0: 368; CHECK-NEXT: frsqrte d1, d0 369; CHECK-NEXT: fmul d2, d1, d1 370; CHECK-NEXT: frsqrts d2, d0, d2 371; CHECK-NEXT: fmul d1, d1, d2 372; CHECK-NEXT: fmul d2, d1, d1 373; CHECK-NEXT: frsqrts d2, d0, d2 374; CHECK-NEXT: fmul d1, d1, d2 375; CHECK-NEXT: fmul d2, d1, d1 376; CHECK-NEXT: frsqrts d0, d0, d2 377; CHECK-NEXT: fmul d0, d1, d0 378; CHECK-NEXT: ret 379 %1 = tail call fast double @llvm.sqrt.f64(double %a) 380 %2 = fdiv fast double 1.000000e+00, %1 381 ret double %2 382} 383 384define <2 x double> @d2rsqrt(<2 x double> %a) #0 { 385; FAULT-LABEL: d2rsqrt: 386; FAULT: // %bb.0: 387; FAULT-NEXT: fsqrt v0.2d, v0.2d 388; FAULT-NEXT: fmov v1.2d, #1.00000000 389; FAULT-NEXT: fdiv v0.2d, v1.2d, v0.2d 390; FAULT-NEXT: ret 391; 392; CHECK-LABEL: d2rsqrt: 393; CHECK: // %bb.0: 394; CHECK-NEXT: frsqrte v1.2d, v0.2d 395; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 396; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 397; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 398; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 399; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 400; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 401; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 402; CHECK-NEXT: frsqrts v0.2d, v0.2d, v2.2d 403; CHECK-NEXT: fmul v0.2d, v1.2d, v0.2d 404; CHECK-NEXT: ret 405 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) 406 %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1 407 ret <2 x double> %2 408} 409 410define <4 x double> @d4rsqrt(<4 x double> %a) #0 { 411; FAULT-LABEL: d4rsqrt: 412; FAULT: // %bb.0: 413; FAULT-NEXT: fsqrt v1.2d, v1.2d 414; FAULT-NEXT: fsqrt v0.2d, v0.2d 415; FAULT-NEXT: fmov v2.2d, #1.00000000 416; FAULT-NEXT: fdiv v0.2d, v2.2d, v0.2d 417; FAULT-NEXT: fdiv v1.2d, v2.2d, v1.2d 418; FAULT-NEXT: ret 419; 420; CHECK-LABEL: d4rsqrt: 421; CHECK: // %bb.0: 422; CHECK-NEXT: frsqrte v2.2d, v0.2d 423; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d 424; CHECK-NEXT: frsqrte v3.2d, v1.2d 425; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d 426; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d 427; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d 428; CHECK-NEXT: frsqrts v4.2d, v1.2d, v4.2d 429; CHECK-NEXT: fmul v3.2d, v3.2d, v4.2d 430; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d 431; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d 432; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d 433; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d 434; CHECK-NEXT: frsqrts v4.2d, v1.2d, v4.2d 435; CHECK-NEXT: fmul v3.2d, v3.2d, v4.2d 436; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d 437; CHECK-NEXT: frsqrts v0.2d, v0.2d, v4.2d 438; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d 439; CHECK-NEXT: frsqrts v1.2d, v1.2d, v4.2d 440; CHECK-NEXT: fmul v0.2d, v2.2d, v0.2d 441; CHECK-NEXT: fmul v1.2d, v3.2d, v1.2d 442; CHECK-NEXT: ret 443 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) 444 %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1 445 ret <4 x double> %2 446} 447 448define double @sqrt_fdiv_common_operand(double %x) nounwind { 449; FAULT-LABEL: sqrt_fdiv_common_operand: 450; FAULT: // %bb.0: 451; FAULT-NEXT: fsqrt d0, d0 452; FAULT-NEXT: ret 453; 454; CHECK-LABEL: sqrt_fdiv_common_operand: 455; CHECK: // %bb.0: 456; CHECK-NEXT: frsqrte d1, d0 457; CHECK-NEXT: fmul d2, d1, d1 458; CHECK-NEXT: frsqrts d2, d0, d2 459; CHECK-NEXT: fmul d1, d1, d2 460; CHECK-NEXT: fmul d2, d1, d1 461; CHECK-NEXT: frsqrts d2, d0, d2 462; CHECK-NEXT: fmul d1, d1, d2 463; CHECK-NEXT: fmul d2, d1, d1 464; CHECK-NEXT: frsqrts d2, d0, d2 465; CHECK-NEXT: fmul d1, d1, d2 466; CHECK-NEXT: fmul d0, d0, d1 467; CHECK-NEXT: ret 468 %sqrt = call fast double @llvm.sqrt.f64(double %x) 469 %r = fdiv fast double %x, %sqrt 470 ret double %r 471} 472 473define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind { 474; FAULT-LABEL: sqrt_fdiv_common_operand_vec: 475; FAULT: // %bb.0: 476; FAULT-NEXT: fsqrt v0.2d, v0.2d 477; FAULT-NEXT: ret 478; 479; CHECK-LABEL: sqrt_fdiv_common_operand_vec: 480; CHECK: // %bb.0: 481; CHECK-NEXT: frsqrte v1.2d, v0.2d 482; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 483; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 484; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 485; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 486; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 487; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 488; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 489; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 490; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 491; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d 492; CHECK-NEXT: ret 493 %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) 494 %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt 495 ret <2 x double> %r 496} 497 498define double @sqrt_fdiv_common_operand_extra_use(double %x, double* %p) nounwind { 499; FAULT-LABEL: sqrt_fdiv_common_operand_extra_use: 500; FAULT: // %bb.0: 501; FAULT-NEXT: fsqrt d0, d0 502; FAULT-NEXT: str d0, [x0] 503; FAULT-NEXT: ret 504; 505; CHECK-LABEL: sqrt_fdiv_common_operand_extra_use: 506; CHECK: // %bb.0: 507; CHECK-NEXT: frsqrte d1, d0 508; CHECK-NEXT: fmul d2, d1, d1 509; CHECK-NEXT: frsqrts d2, d0, d2 510; CHECK-NEXT: fmul d1, d1, d2 511; CHECK-NEXT: fmul d2, d1, d1 512; CHECK-NEXT: frsqrts d2, d0, d2 513; CHECK-NEXT: fmul d1, d1, d2 514; CHECK-NEXT: fmul d2, d1, d1 515; CHECK-NEXT: frsqrts d2, d0, d2 516; CHECK-NEXT: fmul d1, d1, d2 517; CHECK-NEXT: fcmp d0, #0.0 518; CHECK-NEXT: fmul d1, d0, d1 519; CHECK-NEXT: fcsel d0, d0, d1, eq 520; CHECK-NEXT: str d0, [x0] 521; CHECK-NEXT: mov v0.16b, v1.16b 522; CHECK-NEXT: ret 523 %sqrt = call fast double @llvm.sqrt.f64(double %x) 524 store double %sqrt, double* %p 525 %r = fdiv fast double %x, %sqrt 526 ret double %r 527} 528 529define double @sqrt_simplify_before_recip_3_uses(double %x, double* %p1, double* %p2) nounwind { 530; FAULT-LABEL: sqrt_simplify_before_recip_3_uses: 531; FAULT: // %bb.0: 532; FAULT-NEXT: mov x8, #4631107791820423168 533; FAULT-NEXT: fsqrt d0, d0 534; FAULT-NEXT: fmov d1, #1.00000000 535; FAULT-NEXT: fmov d2, x8 536; FAULT-NEXT: fdiv d1, d1, d0 537; FAULT-NEXT: fdiv d2, d2, d0 538; FAULT-NEXT: str d1, [x0] 539; FAULT-NEXT: str d2, [x1] 540; FAULT-NEXT: ret 541; 542; CHECK-LABEL: sqrt_simplify_before_recip_3_uses: 543; CHECK: // %bb.0: 544; CHECK-NEXT: frsqrte d1, d0 545; CHECK-NEXT: fmul d2, d1, d1 546; CHECK-NEXT: frsqrts d2, d0, d2 547; CHECK-NEXT: fmul d1, d1, d2 548; CHECK-NEXT: fmul d2, d1, d1 549; CHECK-NEXT: frsqrts d2, d0, d2 550; CHECK-NEXT: fmul d1, d1, d2 551; CHECK-NEXT: fmul d2, d1, d1 552; CHECK-NEXT: mov x8, #4631107791820423168 553; CHECK-NEXT: frsqrts d2, d0, d2 554; CHECK-NEXT: fmul d1, d1, d2 555; CHECK-NEXT: fmov d2, x8 556; CHECK-NEXT: fmul d2, d1, d2 557; CHECK-NEXT: fmul d0, d0, d1 558; CHECK-NEXT: str d1, [x0] 559; CHECK-NEXT: str d2, [x1] 560; CHECK-NEXT: ret 561 %sqrt = tail call fast double @llvm.sqrt.f64(double %x) 562 %rsqrt = fdiv fast double 1.0, %sqrt 563 %r = fdiv fast double 42.0, %sqrt 564 %sqrt_fast = fdiv fast double %x, %sqrt 565 store double %rsqrt, double* %p1, align 8 566 store double %r, double* %p2, align 8 567 ret double %sqrt_fast 568} 569 570define double @sqrt_simplify_before_recip_3_uses_order(double %x, double* %p1, double* %p2) nounwind { 571; FAULT-LABEL: sqrt_simplify_before_recip_3_uses_order: 572; FAULT: // %bb.0: 573; FAULT-NEXT: mov x9, #140737488355328 574; FAULT-NEXT: mov x8, #4631107791820423168 575; FAULT-NEXT: movk x9, #16453, lsl #48 576; FAULT-NEXT: fsqrt d0, d0 577; FAULT-NEXT: fmov d1, x8 578; FAULT-NEXT: fmov d2, x9 579; FAULT-NEXT: fdiv d1, d1, d0 580; FAULT-NEXT: fdiv d2, d2, d0 581; FAULT-NEXT: str d1, [x0] 582; FAULT-NEXT: str d2, [x1] 583; FAULT-NEXT: ret 584; 585; CHECK-LABEL: sqrt_simplify_before_recip_3_uses_order: 586; CHECK: // %bb.0: 587; CHECK-NEXT: frsqrte d1, d0 588; CHECK-NEXT: fmul d3, d1, d1 589; CHECK-NEXT: frsqrts d3, d0, d3 590; CHECK-NEXT: fmul d1, d1, d3 591; CHECK-NEXT: fmul d3, d1, d1 592; CHECK-NEXT: frsqrts d3, d0, d3 593; CHECK-NEXT: mov x8, #4631107791820423168 594; CHECK-NEXT: fmul d1, d1, d3 595; CHECK-NEXT: fmov d2, x8 596; CHECK-NEXT: mov x8, #140737488355328 597; CHECK-NEXT: fmul d3, d1, d1 598; CHECK-NEXT: movk x8, #16453, lsl #48 599; CHECK-NEXT: frsqrts d3, d0, d3 600; CHECK-NEXT: fmul d1, d1, d3 601; CHECK-NEXT: fmov d3, x8 602; CHECK-NEXT: fmul d0, d0, d1 603; CHECK-NEXT: fmul d2, d1, d2 604; CHECK-NEXT: fmul d1, d1, d3 605; CHECK-NEXT: str d2, [x0] 606; CHECK-NEXT: str d1, [x1] 607; CHECK-NEXT: ret 608 %sqrt = tail call fast double @llvm.sqrt.f64(double %x) 609 %sqrt_fast = fdiv fast double %x, %sqrt 610 %r1 = fdiv fast double 42.0, %sqrt 611 %r2 = fdiv fast double 43.0, %sqrt 612 store double %r1, double* %p1, align 8 613 store double %r2, double* %p2, align 8 614 ret double %sqrt_fast 615} 616 617 618define double @sqrt_simplify_before_recip_4_uses(double %x, double* %p1, double* %p2, double* %p3) nounwind { 619; FAULT-LABEL: sqrt_simplify_before_recip_4_uses: 620; FAULT: // %bb.0: 621; FAULT-NEXT: mov x8, #4631107791820423168 622; FAULT-NEXT: fmov d2, x8 623; FAULT-NEXT: mov x8, #140737488355328 624; FAULT-NEXT: fsqrt d0, d0 625; FAULT-NEXT: fmov d1, #1.00000000 626; FAULT-NEXT: movk x8, #16453, lsl #48 627; FAULT-NEXT: fdiv d1, d1, d0 628; FAULT-NEXT: fmov d3, x8 629; FAULT-NEXT: fmul d2, d1, d2 630; FAULT-NEXT: fmul d3, d1, d3 631; FAULT-NEXT: str d1, [x0] 632; FAULT-NEXT: str d2, [x1] 633; FAULT-NEXT: str d3, [x2] 634; FAULT-NEXT: ret 635; 636; CHECK-LABEL: sqrt_simplify_before_recip_4_uses: 637; CHECK: // %bb.0: 638; CHECK-NEXT: frsqrte d1, d0 639; CHECK-NEXT: fmul d3, d1, d1 640; CHECK-NEXT: frsqrts d3, d0, d3 641; CHECK-NEXT: fmul d1, d1, d3 642; CHECK-NEXT: fmul d3, d1, d1 643; CHECK-NEXT: frsqrts d3, d0, d3 644; CHECK-NEXT: fmul d1, d1, d3 645; CHECK-NEXT: mov x8, #4631107791820423168 646; CHECK-NEXT: fmul d3, d1, d1 647; CHECK-NEXT: fmov d2, x8 648; CHECK-NEXT: mov x8, #140737488355328 649; CHECK-NEXT: frsqrts d3, d0, d3 650; CHECK-NEXT: movk x8, #16453, lsl #48 651; CHECK-NEXT: fmul d1, d1, d3 652; CHECK-NEXT: fcmp d0, #0.0 653; CHECK-NEXT: fmov d4, x8 654; CHECK-NEXT: fmul d3, d0, d1 655; CHECK-NEXT: fmul d2, d1, d2 656; CHECK-NEXT: fmul d4, d1, d4 657; CHECK-NEXT: str d1, [x0] 658; CHECK-NEXT: fcsel d1, d0, d3, eq 659; CHECK-NEXT: fdiv d0, d0, d1 660; CHECK-NEXT: str d2, [x1] 661; CHECK-NEXT: str d4, [x2] 662; CHECK-NEXT: ret 663 %sqrt = tail call fast double @llvm.sqrt.f64(double %x) 664 %rsqrt = fdiv fast double 1.0, %sqrt 665 %r1 = fdiv fast double 42.0, %sqrt 666 %r2 = fdiv fast double 43.0, %sqrt 667 %sqrt_fast = fdiv fast double %x, %sqrt 668 store double %rsqrt, double* %p1, align 8 669 store double %r1, double* %p2, align 8 670 store double %r2, double* %p3, align 8 671 ret double %sqrt_fast 672} 673 674attributes #0 = { "unsafe-fp-math"="true" } 675attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" } 676