1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s 3 4; More than one 'arcp' division using a single divisor operand 5; should be converted into a reciprocal and multiplication. 6 7; Don't do anything for just one division. 8 9define float @div1_arcp(float %x, float %y, float %z) { 10; CHECK-LABEL: div1_arcp: 11; CHECK: # %bb.0: 12; CHECK-NEXT: divss %xmm1, %xmm0 13; CHECK-NEXT: retq 14 %div1 = fdiv arcp float %x, %y 15 ret float %div1 16} 17 18; All math instructions are 'arcp', so optimize. 19 20define float @div2_arcp_all(float %x, float %y, float %z) { 21; CHECK-LABEL: div2_arcp_all: 22; CHECK: # %bb.0: 23; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 24; CHECK-NEXT: divss %xmm2, %xmm3 25; CHECK-NEXT: mulss %xmm3, %xmm0 26; CHECK-NEXT: mulss %xmm1, %xmm0 27; CHECK-NEXT: mulss %xmm3, %xmm0 28; CHECK-NEXT: retq 29 %div1 = fdiv arcp float %x, %z 30 %mul = fmul arcp float %div1, %y 31 %div2 = fdiv arcp float %mul, %z 32 ret float %div2 33} 34 35; The first division is not 'arcp', so do not optimize. 36 37define float @div2_arcp_partial1(float %x, float %y, float %z) { 38; CHECK-LABEL: div2_arcp_partial1: 39; CHECK: # %bb.0: 40; CHECK-NEXT: divss %xmm2, %xmm0 41; CHECK-NEXT: mulss %xmm1, %xmm0 42; CHECK-NEXT: divss %xmm2, %xmm0 43; CHECK-NEXT: retq 44 %div1 = fdiv float %x, %z 45 %mul = fmul arcp float %div1, %y 46 %div2 = fdiv arcp float %mul, %z 47 ret float %div2 48} 49 50; The second division is not 'arcp', so do not optimize. 51 52define float @div2_arcp_partial2(float %x, float %y, float %z) { 53; CHECK-LABEL: div2_arcp_partial2: 54; CHECK: # %bb.0: 55; CHECK-NEXT: divss %xmm2, %xmm0 56; CHECK-NEXT: mulss %xmm1, %xmm0 57; CHECK-NEXT: divss %xmm2, %xmm0 58; CHECK-NEXT: retq 59 %div1 = fdiv arcp float %x, %z 60 %mul = fmul arcp float %div1, %y 61 %div2 = fdiv float %mul, %z 62 ret float %div2 63} 64 65; The multiply is not 'arcp', but that does not prevent optimizing the divisions. 66 67define float @div2_arcp_partial3(float %x, float %y, float %z) { 68; CHECK-LABEL: div2_arcp_partial3: 69; CHECK: # %bb.0: 70; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 71; CHECK-NEXT: divss %xmm2, %xmm3 72; CHECK-NEXT: mulss %xmm3, %xmm0 73; CHECK-NEXT: mulss %xmm1, %xmm0 74; CHECK-NEXT: mulss %xmm3, %xmm0 75; CHECK-NEXT: retq 76 %div1 = fdiv arcp float %x, %z 77 %mul = fmul float %div1, %y 78 %div2 = fdiv arcp float %mul, %z 79 ret float %div2 80} 81 82; If the reciprocal is already calculated, we should not 83; generate an extra multiplication by 1.0. 84 85define double @div3_arcp(double %x, double %y, double %z) { 86; CHECK-LABEL: div3_arcp: 87; CHECK: # %bb.0: 88; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 89; CHECK-NEXT: divsd %xmm1, %xmm2 90; CHECK-NEXT: mulsd %xmm2, %xmm0 91; CHECK-NEXT: addsd %xmm2, %xmm0 92; CHECK-NEXT: retq 93 %div1 = fdiv fast double 1.0, %y 94 %div2 = fdiv fast double %x, %y 95 %ret = fadd fast double %div2, %div1 96 ret double %ret 97} 98 99define float @div_select_constant_fold(i1 zeroext %arg) { 100; CHECK-LABEL: div_select_constant_fold: 101; CHECK: # %bb.0: 102; CHECK-NEXT: testl %edi, %edi 103; CHECK-NEXT: jne .LBB6_1 104; CHECK-NEXT: # %bb.2: 105; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 106; CHECK-NEXT: retq 107; CHECK-NEXT: .LBB6_1: 108; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 109; CHECK-NEXT: retq 110 %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00 111 %B2 = fdiv nnan float %tmp, 2.000000e+00 112 ret float %B2 113} 114 115define float @div_select_constant_fold_zero(i1 zeroext %arg) { 116; CHECK-LABEL: div_select_constant_fold_zero: 117; CHECK: # %bb.0: 118; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 119; CHECK-NEXT: retq 120 %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00 121 %B2 = fdiv float %tmp, 0.000000e+00 122 ret float %B2 123} 124 125define void @PR24141() { 126; CHECK-LABEL: PR24141: 127; CHECK: # %bb.0: # %entry 128; CHECK-NEXT: pushq %rax 129; CHECK-NEXT: .cfi_def_cfa_offset 16 130; CHECK-NEXT: # implicit-def: $xmm0 131; CHECK-NEXT: .p2align 4, 0x90 132; CHECK-NEXT: .LBB8_1: # %while.body 133; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 134; CHECK-NEXT: callq g 135; CHECK-NEXT: divsd %xmm1, %xmm0 136; CHECK-NEXT: jmp .LBB8_1 137entry: 138 br label %while.body 139 140while.body: 141 %x.0 = phi double [ undef, %entry ], [ %div, %while.body ] 142 %call = call { double, double } @g(double %x.0) 143 %xv0 = extractvalue { double, double } %call, 0 144 %xv1 = extractvalue { double, double } %call, 1 145 %div = fdiv arcp double %xv0, %xv1 146 br label %while.body 147} 148 149declare { double, double } @g(double) 150 151