1#include <asm/ppc_asm.h> 2#include <asm/reg.h> 3 4/* 5 * The routines below are in assembler so we can closely control the 6 * usage of floating-point registers. These routines must be called 7 * with preempt disabled. 8 */ 9#ifdef CONFIG_PPC32 10 .data 11fpzero: 12 .long 0 13fpone: 14 .long 0x3f800000 /* 1.0 in single-precision FP */ 15fphalf: 16 .long 0x3f000000 /* 0.5 in single-precision FP */ 17 18#define LDCONST(fr, name) \ 19 lis r11,name@ha; \ 20 lfs fr,name@l(r11) 21#else 22 23 .section ".toc","aw" 24fpzero: 25 .tc FD_0_0[TC],0 26fpone: 27 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 28fphalf: 29 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 30 31#define LDCONST(fr, name) \ 32 lfd fr,name@toc(r2) 33#endif 34 35 .text 36/* 37 * Internal routine to enable floating point and set FPSCR to 0. 38 * Don't call it from C; it doesn't use the normal calling convention. 39 */ 40fpenable: 41#ifdef CONFIG_PPC32 42 stwu r1,-64(r1) 43#else 44 stdu r1,-64(r1) 45#endif 46 mfmsr r10 47 ori r11,r10,MSR_FP 48 mtmsr r11 49 isync 50 stfd fr0,24(r1) 51 stfd fr1,16(r1) 52 stfd fr31,8(r1) 53 LDCONST(fr1, fpzero) 54 mffs fr31 55 MTFSF_L(fr1) 56 blr 57 58fpdisable: 59 mtlr r12 60 MTFSF_L(fr31) 61 lfd fr31,8(r1) 62 lfd fr1,16(r1) 63 lfd fr0,24(r1) 64 mtmsr r10 65 isync 66 addi r1,r1,64 67 blr 68 69/* 70 * Vector add, floating point. 71 */ 72_GLOBAL(vaddfp) 73 mflr r12 74 bl fpenable 75 li r0,4 76 mtctr r0 77 li r6,0 781: lfsx fr0,r4,r6 79 lfsx fr1,r5,r6 80 fadds fr0,fr0,fr1 81 stfsx fr0,r3,r6 82 addi r6,r6,4 83 bdnz 1b 84 b fpdisable 85 86/* 87 * Vector subtract, floating point. 88 */ 89_GLOBAL(vsubfp) 90 mflr r12 91 bl fpenable 92 li r0,4 93 mtctr r0 94 li r6,0 951: lfsx fr0,r4,r6 96 lfsx fr1,r5,r6 97 fsubs fr0,fr0,fr1 98 stfsx fr0,r3,r6 99 addi r6,r6,4 100 bdnz 1b 101 b fpdisable 102 103/* 104 * Vector multiply and add, floating point. 105 */ 106_GLOBAL(vmaddfp) 107 mflr r12 108 bl fpenable 109 stfd fr2,32(r1) 110 li r0,4 111 mtctr r0 112 li r7,0 1131: lfsx fr0,r4,r7 114 lfsx fr1,r5,r7 115 lfsx fr2,r6,r7 116 fmadds fr0,fr0,fr2,fr1 117 stfsx fr0,r3,r7 118 addi r7,r7,4 119 bdnz 1b 120 lfd fr2,32(r1) 121 b fpdisable 122 123/* 124 * Vector negative multiply and subtract, floating point. 125 */ 126_GLOBAL(vnmsubfp) 127 mflr r12 128 bl fpenable 129 stfd fr2,32(r1) 130 li r0,4 131 mtctr r0 132 li r7,0 1331: lfsx fr0,r4,r7 134 lfsx fr1,r5,r7 135 lfsx fr2,r6,r7 136 fnmsubs fr0,fr0,fr2,fr1 137 stfsx fr0,r3,r7 138 addi r7,r7,4 139 bdnz 1b 140 lfd fr2,32(r1) 141 b fpdisable 142 143/* 144 * Vector reciprocal estimate. We just compute 1.0/x. 145 * r3 -> destination, r4 -> source. 146 */ 147_GLOBAL(vrefp) 148 mflr r12 149 bl fpenable 150 li r0,4 151 LDCONST(fr1, fpone) 152 mtctr r0 153 li r6,0 1541: lfsx fr0,r4,r6 155 fdivs fr0,fr1,fr0 156 stfsx fr0,r3,r6 157 addi r6,r6,4 158 bdnz 1b 159 b fpdisable 160 161/* 162 * Vector reciprocal square-root estimate, floating point. 163 * We use the frsqrte instruction for the initial estimate followed 164 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 165 * r3 -> destination, r4 -> source. 166 */ 167_GLOBAL(vrsqrtefp) 168 mflr r12 169 bl fpenable 170 stfd fr2,32(r1) 171 stfd fr3,40(r1) 172 stfd fr4,48(r1) 173 stfd fr5,56(r1) 174 li r0,4 175 LDCONST(fr4, fpone) 176 LDCONST(fr5, fphalf) 177 mtctr r0 178 li r6,0 1791: lfsx fr0,r4,r6 180 frsqrte fr1,fr0 /* r = frsqrte(s) */ 181 fmuls fr3,fr1,fr0 /* r * s */ 182 fmuls fr2,fr1,fr5 /* r * 0.5 */ 183 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 184 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 185 fmuls fr3,fr1,fr0 /* r * s */ 186 fmuls fr2,fr1,fr5 /* r * 0.5 */ 187 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 188 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 189 stfsx fr1,r3,r6 190 addi r6,r6,4 191 bdnz 1b 192 lfd fr5,56(r1) 193 lfd fr4,48(r1) 194 lfd fr3,40(r1) 195 lfd fr2,32(r1) 196 b fpdisable 197