1 /* libs/pixelflinger/fixed.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #include <stdio.h>
19
20 #include <private/pixelflinger/ggl_context.h>
21 #include <private/pixelflinger/ggl_fixed.h>
22
23
24 // ------------------------------------------------------------------------
25
gglRecipQNormalized(int32_t x,int * exponent)26 int32_t gglRecipQNormalized(int32_t x, int* exponent)
27 {
28 const int32_t s = x>>31;
29 uint32_t a = s ? -x : x;
30
31 // the result will overflow, so just set it to the biggest/inf value
32 if (ggl_unlikely(a <= 2LU)) {
33 *exponent = 0;
34 return s ? FIXED_MIN : FIXED_MAX;
35 }
36
37 // Newton-Raphson iteration:
38 // x = r*(2 - a*r)
39
40 const int32_t lz = gglClz(a);
41 a <<= lz; // 0.32
42 uint32_t r = a;
43 // note: if a == 0x80000000, this means x was a power-of-2, in this
44 // case we don't need to compute anything. We get the reciprocal for
45 // (almost) free.
46 if (a != 0x80000000) {
47 r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
48 // 0.32 + 2.30 = 2.62 -> 2.30
49 // 2.30 + 2.30 = 4.60 -> 2.30
50 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
51 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
52 }
53
54 // shift right 1-bit to make room for the sign bit
55 *exponent = 30-lz-1;
56 r >>= 1;
57 return s ? -r : r;
58 }
59
gglRecipQ(GGLfixed x,int q)60 int32_t gglRecipQ(GGLfixed x, int q)
61 {
62 int shift;
63 x = gglRecipQNormalized(x, &shift);
64 shift += 16-q;
65 x += 1L << (shift-1); // rounding
66 x >>= shift;
67 return x;
68 }
69
70 // ------------------------------------------------------------------------
71
gglFastDivx(GGLfixed n,GGLfixed d)72 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
73 {
74 if ((d>>24) && ((d>>24)+1)) {
75 n >>= 8;
76 d >>= 8;
77 }
78 return gglMulx(n, gglRecip(d));
79 }
80
81 // ------------------------------------------------------------------------
82
83 static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
84 // 1/sqrt(x) with x = 1-N/16, N=[8...1]
85 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
86 };
87
gglSqrtRecipx(GGLfixed x)88 GGLfixed gglSqrtRecipx(GGLfixed x)
89 {
90 if (x == 0) return FIXED_MAX;
91 if (x == FIXED_ONE) return x;
92 const GGLfixed a = x;
93 const int32_t lz = gglClz(x);
94 x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
95 const int32_t exp = lz - 16;
96 if (exp <= 0) x >>= -exp>>1;
97 else x <<= (exp>>1) + (exp & 1);
98 if (exp & 1) {
99 x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
100 }
101 // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
102 x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
103 x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
104 return x;
105 }
106
gglSqrtx(GGLfixed a)107 GGLfixed gglSqrtx(GGLfixed a)
108 {
109 // Compute a full precision square-root (24 bits accuracy)
110 GGLfixed r = 0;
111 GGLfixed bit = 0x800000;
112 int32_t bshift = 15;
113 do {
114 GGLfixed temp = bit + (r<<1);
115 if (bshift >= 8) temp <<= (bshift-8);
116 else temp >>= (8-bshift);
117 if (a >= temp) {
118 r += bit;
119 a -= temp;
120 }
121 bshift--;
122 } while (bit>>=1);
123 return r;
124 }
125
126 // ------------------------------------------------------------------------
127
128 static const GGLfixed ggl_log_approx_tab[] = {
129 // -ln(x)/ln(2) with x = N/16, N=[8...16]
130 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
131 };
132
133 static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
134 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
135 };
136
gglPowx(GGLfixed x,GGLfixed y)137 GGLfixed gglPowx(GGLfixed x, GGLfixed y)
138 {
139 // prerequisite: 0 <= x <= 1, and y >=0
140
141 // pow(x,y) = 2^(y*log2(x))
142 // = 2^(y*log2(x*(2^exp)*(2^-exp))))
143 // = 2^(y*(log2(X)-exp))
144 // = 2^(log2(X)*y - y*exp)
145 // = 2^( - (-log2(X)*y + y*exp) )
146
147 int32_t exp = gglClz(x) - 16;
148 GGLfixed f = x << exp;
149 x = (f & 0x0FFF)<<4;
150 f = (f >> 12) & 0x7;
151 GGLfixed p = gglMulAddx(
152 ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
153 ggl_log_approx_tab[f]);
154 p = gglMulAddx(p, y, y*exp);
155 exp = gglFixedToIntFloor(p);
156 if (exp < 31) {
157 p = gglFracx(p);
158 x = (p & 0x1FFF)<<3;
159 p >>= 13;
160 p = gglMulAddx(
161 ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
162 ggl_alog_approx_tab[p]);
163 p >>= exp;
164 } else {
165 p = 0;
166 }
167 return p;
168 // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
169 }
170
171 // ------------------------------------------------------------------------
172
gglDivQ(GGLfixed n,GGLfixed d,int32_t i)173 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
174 {
175 //int32_t r =int32_t((int64_t(n)<<i)/d);
176 const int32_t ds = n^d;
177 if (n<0) n = -n;
178 if (d<0) d = -d;
179 int nd = gglClz(d) - gglClz(n);
180 i += nd + 1;
181 if (nd > 0) d <<= nd;
182 else n <<= -nd;
183 uint32_t q = 0;
184
185 int j = i & 7;
186 i >>= 3;
187
188 // gcc deals with the code below pretty well.
189 // we get 3.75 cycles per bit in the main loop
190 // and 8 cycles per bit in the termination loop
191 if (ggl_likely(i)) {
192 n -= d;
193 do {
194 q <<= 8;
195 if (n>=0) q |= 128;
196 else n += d;
197 n = n*2 - d;
198 if (n>=0) q |= 64;
199 else n += d;
200 n = n*2 - d;
201 if (n>=0) q |= 32;
202 else n += d;
203 n = n*2 - d;
204 if (n>=0) q |= 16;
205 else n += d;
206 n = n*2 - d;
207 if (n>=0) q |= 8;
208 else n += d;
209 n = n*2 - d;
210 if (n>=0) q |= 4;
211 else n += d;
212 n = n*2 - d;
213 if (n>=0) q |= 2;
214 else n += d;
215 n = n*2 - d;
216 if (n>=0) q |= 1;
217 else n += d;
218
219 if (--i == 0)
220 goto finish;
221
222 n = n*2 - d;
223 } while(true);
224 do {
225 q <<= 1;
226 n = n*2 - d;
227 if (n>=0) q |= 1;
228 else n += d;
229 finish: ;
230 } while (j--);
231 return (ds<0) ? -q : q;
232 }
233
234 n -= d;
235 if (n>=0) q |= 1;
236 else n += d;
237 j--;
238 goto finish;
239 }
240
241 // ------------------------------------------------------------------------
242
243 // assumes that the int32_t values of a, b, and c are all positive
244 // use when both a and b are larger than c
245
246 template <typename T>
swap(T & a,T & b)247 static inline void swap(T& a, T& b) {
248 T t(a);
249 a = b;
250 b = t;
251 }
252
253 static __attribute__((noinline))
slow_muldiv(uint32_t a,uint32_t b,uint32_t c)254 int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
255 {
256 // first we compute a*b as a 64-bit integer
257 // (GCC generates umull with the code below)
258 uint64_t ab = uint64_t(a)*b;
259 uint32_t hi = ab>>32;
260 uint32_t lo = ab;
261 uint32_t result;
262
263 // now perform the division
264 if (hi >= c) {
265 overflow:
266 result = 0x7fffffff; // basic overflow
267 } else if (hi == 0) {
268 result = lo/c; // note: c can't be 0
269 if ((result >> 31) != 0) // result must fit in 31 bits
270 goto overflow;
271 } else {
272 uint32_t r = hi;
273 int bits = 31;
274 result = 0;
275 do {
276 r = (r << 1) | (lo >> 31);
277 lo <<= 1;
278 result <<= 1;
279 if (r >= c) {
280 r -= c;
281 result |= 1;
282 }
283 } while (bits--);
284 }
285 return int32_t(result);
286 }
287
288 // assumes a >= 0 and c >= b >= 0
289 static inline
quick_muldiv(int32_t a,int32_t b,int32_t c)290 int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
291 {
292 int32_t r = 0, q = 0, i;
293 int leading = gglClz(a);
294 i = 32 - leading;
295 a <<= leading;
296 do {
297 r <<= 1;
298 if (a < 0)
299 r += b;
300 a <<= 1;
301 q <<= 1;
302 if (r >= c) {
303 r -= c;
304 q++;
305 }
306 asm(""::); // gcc generates better code this way
307 if (r >= c) {
308 r -= c;
309 q++;
310 }
311 }
312 while (--i);
313 return q;
314 }
315
316 // this function computes a*b/c with 64-bit intermediate accuracy
317 // overflows (e.g. division by 0) are handled and return INT_MAX
318
gglMulDivi(int32_t a,int32_t b,int32_t c)319 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
320 {
321 int32_t result;
322 int32_t sign = a^b^c;
323
324 if (a < 0) a = -a;
325 if (b < 0) b = -b;
326 if (c < 0) c = -c;
327
328 if (a < b) {
329 swap(a, b);
330 }
331
332 if (b <= c) result = quick_muldiv(a, b, c);
333 else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
334
335 if (sign < 0)
336 result = -result;
337
338 return result;
339 }
340