1 /*
2 * Copyright (C) 2005 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ANDROID_GGL_FIXED_H
18 #define ANDROID_GGL_FIXED_H
19
20 #include <math.h>
21 #include <pixelflinger/pixelflinger.h>
22
23 // ----------------------------------------------------------------------------
24
25 #define CONST __attribute__((const))
26 #define ALWAYS_INLINE __attribute__((always_inline))
27
28 const GGLfixed FIXED_BITS = 16;
29 const GGLfixed FIXED_EPSILON = 1;
30 const GGLfixed FIXED_ONE = 1L<<FIXED_BITS;
31 const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1);
32 const GGLfixed FIXED_MIN = 0x80000000L;
33 const GGLfixed FIXED_MAX = 0x7FFFFFFFL;
34
35 inline GGLfixed gglIntToFixed(GGLfixed i) ALWAYS_INLINE ;
36 inline GGLfixed gglFixedToIntRound(GGLfixed f) ALWAYS_INLINE ;
37 inline GGLfixed gglFixedToIntFloor(GGLfixed f) ALWAYS_INLINE ;
38 inline GGLfixed gglFixedToIntCeil(GGLfixed f) ALWAYS_INLINE ;
39 inline GGLfixed gglFracx(GGLfixed v) ALWAYS_INLINE ;
40 inline GGLfixed gglFloorx(GGLfixed v) ALWAYS_INLINE ;
41 inline GGLfixed gglCeilx(GGLfixed v) ALWAYS_INLINE ;
42 inline GGLfixed gglCenterx(GGLfixed v) ALWAYS_INLINE ;
43 inline GGLfixed gglRoundx(GGLfixed v) ALWAYS_INLINE ;
44
gglIntToFixed(GGLfixed i)45 GGLfixed gglIntToFixed(GGLfixed i) {
46 return i<<FIXED_BITS;
47 }
gglFixedToIntRound(GGLfixed f)48 GGLfixed gglFixedToIntRound(GGLfixed f) {
49 return (f + FIXED_HALF)>>FIXED_BITS;
50 }
gglFixedToIntFloor(GGLfixed f)51 GGLfixed gglFixedToIntFloor(GGLfixed f) {
52 return f>>FIXED_BITS;
53 }
gglFixedToIntCeil(GGLfixed f)54 GGLfixed gglFixedToIntCeil(GGLfixed f) {
55 return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS;
56 }
57
gglFracx(GGLfixed v)58 GGLfixed gglFracx(GGLfixed v) {
59 return v & ((1<<FIXED_BITS)-1);
60 }
gglFloorx(GGLfixed v)61 GGLfixed gglFloorx(GGLfixed v) {
62 return gglFixedToIntFloor(v)<<FIXED_BITS;
63 }
gglCeilx(GGLfixed v)64 GGLfixed gglCeilx(GGLfixed v) {
65 return gglFixedToIntCeil(v)<<FIXED_BITS;
66 }
gglCenterx(GGLfixed v)67 GGLfixed gglCenterx(GGLfixed v) {
68 return gglFloorx(v + FIXED_HALF) | FIXED_HALF;
69 }
gglRoundx(GGLfixed v)70 GGLfixed gglRoundx(GGLfixed v) {
71 return gglFixedToIntRound(v)<<FIXED_BITS;
72 }
73
74 // conversion from (unsigned) int, short, byte to fixed...
75 #define GGL_B_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<10 )
76 #define GGL_S_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<2 )
77 #define GGL_I_TO_X(_x) GGLfixed( ((int32_t(_x)>>1)+1)>>14 )
78 #define GGL_UB_TO_X(_x) GGLfixed( uint32_t(_x) + \
79 (uint32_t(_x)<<8) + \
80 (uint32_t(_x)>>7) )
81 #define GGL_US_TO_X(_x) GGLfixed( (_x) + ((_x)>>15) )
82 #define GGL_UI_TO_X(_x) GGLfixed( (((_x)>>1)+1)>>15 )
83
84 // ----------------------------------------------------------------------------
85
86 GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST;
87 GGLfixed gglSqrtx(GGLfixed a) CONST;
88 GGLfixed gglSqrtRecipx(GGLfixed x) CONST;
89 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) CONST;
90 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c);
91
92 int32_t gglRecipQNormalized(int32_t x, int* exponent);
93 int32_t gglRecipQ(GGLfixed x, int q) CONST;
94
95 inline GGLfixed gglRecip(GGLfixed x) CONST;
gglRecip(GGLfixed x)96 inline GGLfixed gglRecip(GGLfixed x) {
97 return gglRecipQ(x, 16);
98 }
99
100 inline GGLfixed gglRecip28(GGLfixed x) CONST;
gglRecip28(GGLfixed x)101 int32_t gglRecip28(GGLfixed x) {
102 return gglRecipQ(x, 28);
103 }
104
105 // ----------------------------------------------------------------------------
106
107 #if defined(__arm__) && !defined(__thumb__)
108
109 // inline ARM implementations
110 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
gglMulx(GGLfixed x,GGLfixed y,int shift)111 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) {
112 GGLfixed result, t;
113 if (__builtin_constant_p(shift)) {
114 asm("smull %[lo], %[hi], %[x], %[y] \n"
115 "movs %[lo], %[lo], lsr %[rshift] \n"
116 "adc %[lo], %[lo], %[hi], lsl %[lshift] \n"
117 : [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x)
118 : "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift)
119 : "cc"
120 );
121 } else {
122 asm("smull %[lo], %[hi], %[x], %[y] \n"
123 "movs %[lo], %[lo], lsr %[rshift] \n"
124 "adc %[lo], %[lo], %[hi], lsl %[lshift] \n"
125 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
126 : "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift)
127 : "cc"
128 );
129 }
130 return result;
131 }
132
133 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
gglMulAddx(GGLfixed x,GGLfixed y,GGLfixed a,int shift)134 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
135 GGLfixed result, t;
136 if (__builtin_constant_p(shift)) {
137 asm("smull %[lo], %[hi], %[x], %[y] \n"
138 "add %[lo], %[a], %[lo], lsr %[rshift] \n"
139 "add %[lo], %[lo], %[hi], lsl %[lshift] \n"
140 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
141 : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
142 );
143 } else {
144 asm("smull %[lo], %[hi], %[x], %[y] \n"
145 "add %[lo], %[a], %[lo], lsr %[rshift] \n"
146 "add %[lo], %[lo], %[hi], lsl %[lshift] \n"
147 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
148 : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
149 );
150 }
151 return result;
152 }
153
154 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
gglMulSubx(GGLfixed x,GGLfixed y,GGLfixed a,int shift)155 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
156 GGLfixed result, t;
157 if (__builtin_constant_p(shift)) {
158 asm("smull %[lo], %[hi], %[x], %[y] \n"
159 "rsb %[lo], %[a], %[lo], lsr %[rshift] \n"
160 "add %[lo], %[lo], %[hi], lsl %[lshift] \n"
161 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
162 : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
163 );
164 } else {
165 asm("smull %[lo], %[hi], %[x], %[y] \n"
166 "rsb %[lo], %[a], %[lo], lsr %[rshift] \n"
167 "add %[lo], %[lo], %[hi], lsl %[lshift] \n"
168 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
169 : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
170 );
171 }
172 return result;
173 }
174
175 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
gglMulii(int32_t x,int32_t y)176 inline int64_t gglMulii(int32_t x, int32_t y)
177 {
178 // 64-bits result: r0=low, r1=high
179 union {
180 struct {
181 int32_t lo;
182 int32_t hi;
183 } s;
184 int64_t res;
185 };
186 asm("smull %0, %1, %2, %3 \n"
187 : "=r"(s.lo), "=&r"(s.hi)
188 : "%r"(x), "r"(y)
189 :
190 );
191 return res;
192 }
193 #elif defined(__mips__) && __mips_isa_rev < 6
194
195 /*inline MIPS implementations*/
196 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
gglMulx(GGLfixed a,GGLfixed b,int shift)197 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
198 GGLfixed result,tmp,tmp1,tmp2;
199
200 if (__builtin_constant_p(shift)) {
201 if (shift == 0) {
202 asm ("mult %[a], %[b] \t\n"
203 "mflo %[res] \t\n"
204 : [res]"=&r"(result),[tmp]"=&r"(tmp)
205 : [a]"r"(a),[b]"r"(b)
206 : "%hi","%lo"
207 );
208 } else if (shift == 32)
209 {
210 asm ("mult %[a], %[b] \t\n"
211 "li %[tmp],1\t\n"
212 "sll %[tmp],%[tmp],0x1f\t\n"
213 "mflo %[res] \t\n"
214 "addu %[tmp1],%[tmp],%[res] \t\n"
215 "sltu %[tmp1],%[tmp1],%[tmp]\t\n" /*obit*/
216 "sra %[tmp],%[tmp],0x1f \t\n"
217 "mfhi %[res] \t\n"
218 "addu %[res],%[res],%[tmp]\t\n"
219 "addu %[res],%[res],%[tmp1]\t\n"
220 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
221 : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
222 : "%hi","%lo"
223 );
224 } else if ((shift >0) && (shift < 32))
225 {
226 asm ("mult %[a], %[b] \t\n"
227 "li %[tmp],1 \t\n"
228 "sll %[tmp],%[tmp],%[shiftm1] \t\n"
229 "mflo %[res] \t\n"
230 "addu %[tmp1],%[tmp],%[res] \t\n"
231 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
232 "addu %[res],%[res],%[tmp] \t\n"
233 "mfhi %[tmp] \t\n"
234 "addu %[tmp],%[tmp],%[tmp1] \t\n"
235 "sll %[tmp],%[tmp],%[lshift] \t\n"
236 "srl %[res],%[res],%[rshift] \t\n"
237 "or %[res],%[res],%[tmp] \t\n"
238 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
239 : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
240 : "%hi","%lo"
241 );
242 } else {
243 asm ("mult %[a], %[b] \t\n"
244 "li %[tmp],1 \t\n"
245 "sll %[tmp],%[tmp],%[shiftm1] \t\n"
246 "mflo %[res] \t\n"
247 "addu %[tmp1],%[tmp],%[res] \t\n"
248 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
249 "sra %[tmp2],%[tmp],0x1f \t\n"
250 "addu %[res],%[res],%[tmp] \t\n"
251 "mfhi %[tmp] \t\n"
252 "addu %[tmp],%[tmp],%[tmp2] \t\n"
253 "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/
254 "srl %[tmp2],%[res],%[rshift] \t\n"
255 "srav %[res], %[tmp],%[rshift]\t\n"
256 "sll %[tmp],%[tmp],1 \t\n"
257 "sll %[tmp],%[tmp],%[norbits] \t\n"
258 "or %[tmp],%[tmp],%[tmp2] \t\n"
259 "movz %[res],%[tmp],%[bit5] \t\n"
260 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
261 : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
262 : "%hi","%lo"
263 );
264 }
265 } else {
266 asm ("mult %[a], %[b] \t\n"
267 "li %[tmp],1 \t\n"
268 "sll %[tmp],%[tmp],%[shiftm1] \t\n"
269 "mflo %[res] \t\n"
270 "addu %[tmp1],%[tmp],%[res] \t\n"
271 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
272 "sra %[tmp2],%[tmp],0x1f \t\n"
273 "addu %[res],%[res],%[tmp] \t\n"
274 "mfhi %[tmp] \t\n"
275 "addu %[tmp],%[tmp],%[tmp2] \t\n"
276 "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/
277 "srl %[tmp2],%[res],%[rshift] \t\n"
278 "srav %[res], %[tmp],%[rshift]\t\n"
279 "sll %[tmp],%[tmp],1 \t\n"
280 "sll %[tmp],%[tmp],%[norbits] \t\n"
281 "or %[tmp],%[tmp],%[tmp2] \t\n"
282 "movz %[res],%[tmp],%[bit5] \t\n"
283 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
284 : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
285 : "%hi","%lo"
286 );
287 }
288
289 return result;
290 }
291
292 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulAddx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)293 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
294 GGLfixed result,t,tmp1,tmp2;
295
296 if (__builtin_constant_p(shift)) {
297 if (shift == 0) {
298 asm ("mult %[a], %[b] \t\n"
299 "mflo %[lo] \t\n"
300 "addu %[lo],%[lo],%[c] \t\n"
301 : [lo]"=&r"(result)
302 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
303 : "%hi","%lo"
304 );
305 } else if (shift == 32) {
306 asm ("mult %[a], %[b] \t\n"
307 "mfhi %[lo] \t\n"
308 "addu %[lo],%[lo],%[c] \t\n"
309 : [lo]"=&r"(result)
310 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
311 : "%hi","%lo"
312 );
313 } else if ((shift>0) && (shift<32)) {
314 asm ("mult %[a], %[b] \t\n"
315 "mflo %[res] \t\n"
316 "mfhi %[t] \t\n"
317 "srl %[res],%[res],%[rshift] \t\n"
318 "sll %[t],%[t],%[lshift] \t\n"
319 "or %[res],%[res],%[t] \t\n"
320 "addu %[res],%[res],%[c] \t\n"
321 : [res]"=&r"(result),[t]"=&r"(t)
322 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
323 : "%hi","%lo"
324 );
325 } else {
326 asm ("mult %[a], %[b] \t\n"
327 "nor %[tmp1],$zero,%[shift]\t\n"
328 "mflo %[res] \t\n"
329 "mfhi %[t] \t\n"
330 "srl %[res],%[res],%[shift] \t\n"
331 "sll %[tmp2],%[t],1 \t\n"
332 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
333 "or %[tmp1],%[tmp2],%[res] \t\n"
334 "srav %[res],%[t],%[shift] \t\n"
335 "andi %[tmp2],%[shift],0x20\t\n"
336 "movz %[res],%[tmp1],%[tmp2]\t\n"
337 "addu %[res],%[res],%[c] \t\n"
338 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
339 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
340 : "%hi","%lo"
341 );
342 }
343 } else {
344 asm ("mult %[a], %[b] \t\n"
345 "nor %[tmp1],$zero,%[shift]\t\n"
346 "mflo %[res] \t\n"
347 "mfhi %[t] \t\n"
348 "srl %[res],%[res],%[shift] \t\n"
349 "sll %[tmp2],%[t],1 \t\n"
350 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
351 "or %[tmp1],%[tmp2],%[res] \t\n"
352 "srav %[res],%[t],%[shift] \t\n"
353 "andi %[tmp2],%[shift],0x20\t\n"
354 "movz %[res],%[tmp1],%[tmp2]\t\n"
355 "addu %[res],%[res],%[c] \t\n"
356 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
357 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
358 : "%hi","%lo"
359 );
360 }
361 return result;
362 }
363
364 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulSubx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)365 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
366 GGLfixed result,t,tmp1,tmp2;
367
368 if (__builtin_constant_p(shift)) {
369 if (shift == 0) {
370 asm ("mult %[a], %[b] \t\n"
371 "mflo %[lo] \t\n"
372 "subu %[lo],%[lo],%[c] \t\n"
373 : [lo]"=&r"(result)
374 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
375 : "%hi","%lo"
376 );
377 } else if (shift == 32) {
378 asm ("mult %[a], %[b] \t\n"
379 "mfhi %[lo] \t\n"
380 "subu %[lo],%[lo],%[c] \t\n"
381 : [lo]"=&r"(result)
382 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
383 : "%hi","%lo"
384 );
385 } else if ((shift>0) && (shift<32)) {
386 asm ("mult %[a], %[b] \t\n"
387 "mflo %[res] \t\n"
388 "mfhi %[t] \t\n"
389 "srl %[res],%[res],%[rshift] \t\n"
390 "sll %[t],%[t],%[lshift] \t\n"
391 "or %[res],%[res],%[t] \t\n"
392 "subu %[res],%[res],%[c] \t\n"
393 : [res]"=&r"(result),[t]"=&r"(t)
394 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
395 : "%hi","%lo"
396 );
397 } else {
398 asm ("mult %[a], %[b] \t\n"
399 "nor %[tmp1],$zero,%[shift]\t\n"
400 "mflo %[res] \t\n"
401 "mfhi %[t] \t\n"
402 "srl %[res],%[res],%[shift] \t\n"
403 "sll %[tmp2],%[t],1 \t\n"
404 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
405 "or %[tmp1],%[tmp2],%[res] \t\n"
406 "srav %[res],%[t],%[shift] \t\n"
407 "andi %[tmp2],%[shift],0x20\t\n"
408 "movz %[res],%[tmp1],%[tmp2]\t\n"
409 "subu %[res],%[res],%[c] \t\n"
410 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
411 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
412 : "%hi","%lo"
413 );
414 }
415 } else {
416 asm ("mult %[a], %[b] \t\n"
417 "nor %[tmp1],$zero,%[shift]\t\n"
418 "mflo %[res] \t\n"
419 "mfhi %[t] \t\n"
420 "srl %[res],%[res],%[shift] \t\n"
421 "sll %[tmp2],%[t],1 \t\n"
422 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
423 "or %[tmp1],%[tmp2],%[res] \t\n"
424 "srav %[res],%[t],%[shift] \t\n"
425 "andi %[tmp2],%[shift],0x20\t\n"
426 "movz %[res],%[tmp1],%[tmp2]\t\n"
427 "subu %[res],%[res],%[c] \t\n"
428 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
429 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
430 : "%hi","%lo"
431 );
432 }
433 return result;
434 }
435
436 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
gglMulii(int32_t x,int32_t y)437 inline int64_t gglMulii(int32_t x, int32_t y) {
438 union {
439 struct {
440 #if defined(__MIPSEL__)
441 int32_t lo;
442 int32_t hi;
443 #elif defined(__MIPSEB__)
444 int32_t hi;
445 int32_t lo;
446 #endif
447 } s;
448 int64_t res;
449 }u;
450 asm("mult %2, %3 \t\n"
451 "mfhi %1 \t\n"
452 "mflo %0 \t\n"
453 : "=r"(u.s.lo), "=&r"(u.s.hi)
454 : "%r"(x), "r"(y)
455 : "%hi","%lo"
456 );
457 return u.res;
458 }
459
460 #elif defined(__aarch64__)
461
462 // inline AArch64 implementations
463
464 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
gglMulx(GGLfixed x,GGLfixed y,int shift)465 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift)
466 {
467 GGLfixed result;
468 GGLfixed round;
469
470 asm("mov %x[round], #1 \n"
471 "lsl %x[round], %x[round], %x[shift] \n"
472 "lsr %x[round], %x[round], #1 \n"
473 "smaddl %x[result], %w[x], %w[y],%x[round] \n"
474 "lsr %x[result], %x[result], %x[shift] \n"
475 : [round]"=&r"(round), [result]"=&r"(result) \
476 : [x]"r"(x), [y]"r"(y), [shift] "r"(shift) \
477 :
478 );
479 return result;
480 }
481 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
gglMulAddx(GGLfixed x,GGLfixed y,GGLfixed a,int shift)482 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
483 {
484 GGLfixed result;
485 asm("smull %x[result], %w[x], %w[y] \n"
486 "lsr %x[result], %x[result], %x[shift] \n"
487 "add %w[result], %w[result], %w[a] \n"
488 : [result]"=&r"(result) \
489 : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
490 :
491 );
492 return result;
493 }
494
495 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
gglMulSubx(GGLfixed x,GGLfixed y,GGLfixed a,int shift)496 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
497 {
498
499 GGLfixed result;
500 int rshift;
501
502 asm("smull %x[result], %w[x], %w[y] \n"
503 "lsr %x[result], %x[result], %x[shift] \n"
504 "sub %w[result], %w[result], %w[a] \n"
505 : [result]"=&r"(result) \
506 : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
507 :
508 );
509 return result;
510 }
511 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
gglMulii(int32_t x,int32_t y)512 inline int64_t gglMulii(int32_t x, int32_t y)
513 {
514 int64_t res;
515 asm("smull %x0, %w1, %w2 \n"
516 : "=r"(res)
517 : "%r"(x), "r"(y)
518 :
519 );
520 return res;
521 }
522
523 #elif defined(__mips__) && __mips_isa_rev == 6
524
525 /*inline MIPS implementations*/
526 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
gglMulx(GGLfixed a,GGLfixed b,int shift)527 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
528 GGLfixed result,tmp,tmp1,tmp2;
529
530 if (__builtin_constant_p(shift)) {
531 if (shift == 0) {
532 asm ("mul %[res], %[a], %[b] \t\n"
533 : [res]"=&r"(result)
534 : [a]"r"(a),[b]"r"(b)
535 );
536 } else if (shift == 32)
537 {
538 asm ("mul %[res], %[a], %[b] \t\n"
539 "li %[tmp],1\t\n"
540 "sll %[tmp],%[tmp],0x1f\t\n"
541 "addu %[tmp1],%[tmp],%[res] \t\n"
542 "muh %[res], %[a], %[b] \t\n"
543 "sltu %[tmp1],%[tmp1],%[tmp]\t\n" /*obit*/
544 "sra %[tmp],%[tmp],0x1f \t\n"
545 "addu %[res],%[res],%[tmp]\t\n"
546 "addu %[res],%[res],%[tmp1]\t\n"
547 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
548 : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
549 );
550 } else if ((shift >0) && (shift < 32))
551 {
552 asm ("mul %[res], %[a], %[b] \t\n"
553 "li %[tmp],1 \t\n"
554 "sll %[tmp],%[tmp],%[shiftm1] \t\n"
555 "addu %[tmp1],%[tmp],%[res] \t\n"
556 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
557 "addu %[res],%[res],%[tmp] \t\n"
558 "muh %[tmp], %[a], %[b] \t\n"
559 "addu %[tmp],%[tmp],%[tmp1] \t\n"
560 "sll %[tmp],%[tmp],%[lshift] \t\n"
561 "srl %[res],%[res],%[rshift] \t\n"
562 "or %[res],%[res],%[tmp] \t\n"
563 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
564 : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
565 );
566 } else {
567 asm ("mul %[res], %[a], %[b] \t\n"
568 "li %[tmp],1 \t\n"
569 "sll %[tmp],%[tmp],%[shiftm1] \t\n"
570 "addu %[tmp1],%[tmp],%[res] \t\n"
571 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
572 "sra %[tmp2],%[tmp],0x1f \t\n"
573 "addu %[res],%[res],%[tmp] \t\n"
574 "muh %[tmp], %[a], %[b] \t\n"
575 "addu %[tmp],%[tmp],%[tmp2] \t\n"
576 "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/
577 "srl %[tmp2],%[res],%[rshift] \t\n"
578 "srav %[res], %[tmp],%[rshift]\t\n"
579 "sll %[tmp],%[tmp],1 \t\n"
580 "sll %[tmp],%[tmp],%[norbits] \t\n"
581 "or %[tmp],%[tmp],%[tmp2] \t\n"
582 "seleqz %[tmp],%[tmp],%[bit5] \t\n"
583 "selnez %[res],%[res],%[bit5] \t\n"
584 "or %[res],%[res],%[tmp] \t\n"
585 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
586 : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
587 );
588 }
589 } else {
590 asm ("mul %[res], %[a], %[b] \t\n"
591 "li %[tmp],1 \t\n"
592 "sll %[tmp],%[tmp],%[shiftm1] \t\n"
593 "addu %[tmp1],%[tmp],%[res] \t\n"
594 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
595 "sra %[tmp2],%[tmp],0x1f \t\n"
596 "addu %[res],%[res],%[tmp] \t\n"
597 "muh %[tmp], %[a], %[b] \t\n"
598 "addu %[tmp],%[tmp],%[tmp2] \t\n"
599 "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/
600 "srl %[tmp2],%[res],%[rshift] \t\n"
601 "srav %[res], %[tmp],%[rshift]\t\n"
602 "sll %[tmp],%[tmp],1 \t\n"
603 "sll %[tmp],%[tmp],%[norbits] \t\n"
604 "or %[tmp],%[tmp],%[tmp2] \t\n"
605 "seleqz %[tmp],%[tmp],%[bit5] \t\n"
606 "selnez %[res],%[res],%[bit5] \t\n"
607 "or %[res],%[res],%[tmp] \t\n"
608 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
609 : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
610 );
611 }
612 return result;
613 }
614
615 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulAddx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)616 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
617 GGLfixed result,t,tmp1,tmp2;
618
619 if (__builtin_constant_p(shift)) {
620 if (shift == 0) {
621 asm ("mul %[lo], %[a], %[b] \t\n"
622 "addu %[lo],%[lo],%[c] \t\n"
623 : [lo]"=&r"(result)
624 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
625 );
626 } else if (shift == 32) {
627 asm ("muh %[lo], %[a], %[b] \t\n"
628 "addu %[lo],%[lo],%[c] \t\n"
629 : [lo]"=&r"(result)
630 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
631 );
632 } else if ((shift>0) && (shift<32)) {
633 asm ("mul %[res], %[a], %[b] \t\n"
634 "muh %[t], %[a], %[b] \t\n"
635 "srl %[res],%[res],%[rshift] \t\n"
636 "sll %[t],%[t],%[lshift] \t\n"
637 "or %[res],%[res],%[t] \t\n"
638 "addu %[res],%[res],%[c] \t\n"
639 : [res]"=&r"(result),[t]"=&r"(t)
640 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
641 );
642 } else {
643 asm ("mul %[res], %[a], %[b] \t\n"
644 "muh %[t], %[a], %[b] \t\n"
645 "nor %[tmp1],$zero,%[shift]\t\n"
646 "srl %[res],%[res],%[shift] \t\n"
647 "sll %[tmp2],%[t],1 \t\n"
648 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
649 "or %[tmp1],%[tmp2],%[res] \t\n"
650 "srav %[res],%[t],%[shift] \t\n"
651 "andi %[tmp2],%[shift],0x20\t\n"
652 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
653 "selnez %[res],%[res],%[tmp2]\t\n"
654 "or %[res],%[res],%[tmp1]\t\n"
655 "addu %[res],%[res],%[c] \t\n"
656 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
657 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
658 );
659 }
660 } else {
661 asm ("mul %[res], %[a], %[b] \t\n"
662 "muh %[t], %[a], %[b] \t\n"
663 "nor %[tmp1],$zero,%[shift]\t\n"
664 "srl %[res],%[res],%[shift] \t\n"
665 "sll %[tmp2],%[t],1 \t\n"
666 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
667 "or %[tmp1],%[tmp2],%[res] \t\n"
668 "srav %[res],%[t],%[shift] \t\n"
669 "andi %[tmp2],%[shift],0x20\t\n"
670 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
671 "selnez %[res],%[res],%[tmp2]\t\n"
672 "or %[res],%[res],%[tmp1]\t\n"
673 "addu %[res],%[res],%[c] \t\n"
674 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
675 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
676 );
677 }
678 return result;
679 }
680
681 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulSubx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)682 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
683 GGLfixed result,t,tmp1,tmp2;
684
685 if (__builtin_constant_p(shift)) {
686 if (shift == 0) {
687 asm ("mul %[lo], %[a], %[b] \t\n"
688 "subu %[lo],%[lo],%[c] \t\n"
689 : [lo]"=&r"(result)
690 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
691 );
692 } else if (shift == 32) {
693 asm ("muh %[lo], %[a], %[b] \t\n"
694 "subu %[lo],%[lo],%[c] \t\n"
695 : [lo]"=&r"(result)
696 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
697 );
698 } else if ((shift>0) && (shift<32)) {
699 asm ("mul %[res], %[a], %[b] \t\n"
700 "muh %[t], %[a], %[b] \t\n"
701 "srl %[res],%[res],%[rshift] \t\n"
702 "sll %[t],%[t],%[lshift] \t\n"
703 "or %[res],%[res],%[t] \t\n"
704 "subu %[res],%[res],%[c] \t\n"
705 : [res]"=&r"(result),[t]"=&r"(t)
706 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
707 );
708 } else {
709 asm ("mul %[res], %[a], %[b] \t\n"
710 "muh %[t], %[a], %[b] \t\n"
711 "nor %[tmp1],$zero,%[shift]\t\n"
712 "srl %[res],%[res],%[shift] \t\n"
713 "sll %[tmp2],%[t],1 \t\n"
714 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
715 "or %[tmp1],%[tmp2],%[res] \t\n"
716 "srav %[res],%[t],%[shift] \t\n"
717 "andi %[tmp2],%[shift],0x20\t\n"
718 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
719 "selnez %[res],%[res],%[tmp2]\t\n"
720 "or %[res],%[res],%[tmp1]\t\n"
721 "subu %[res],%[res],%[c] \t\n"
722 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
723 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
724 );
725 }
726 } else {
727 asm ("mul %[res], %[a], %[b] \t\n"
728 "muh %[t], %[a], %[b] \t\n"
729 "nor %[tmp1],$zero,%[shift]\t\n"
730 "srl %[res],%[res],%[shift] \t\n"
731 "sll %[tmp2],%[t],1 \t\n"
732 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
733 "or %[tmp1],%[tmp2],%[res] \t\n"
734 "srav %[res],%[t],%[shift] \t\n"
735 "andi %[tmp2],%[shift],0x20\t\n"
736 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
737 "selnez %[res],%[res],%[tmp2]\t\n"
738 "or %[res],%[res],%[tmp1]\t\n"
739 "subu %[res],%[res],%[c] \t\n"
740 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
741 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
742 );
743 }
744 return result;
745 }
746
747 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
gglMulii(int32_t x,int32_t y)748 inline int64_t gglMulii(int32_t x, int32_t y) {
749 union {
750 struct {
751 #if defined(__MIPSEL__)
752 int32_t lo;
753 int32_t hi;
754 #elif defined(__MIPSEB__)
755 int32_t hi;
756 int32_t lo;
757 #endif
758 } s;
759 int64_t res;
760 }u;
761 asm("mul %0, %2, %3 \t\n"
762 "muh %1, %2, %3 \t\n"
763 : "=r"(u.s.lo), "=&r"(u.s.hi)
764 : "%r"(x), "r"(y)
765 );
766 return u.res;
767 }
768
769 #else // ----------------------------------------------------------------------
770
771 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
gglMulx(GGLfixed a,GGLfixed b,int shift)772 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
773 return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift);
774 }
775 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulAddx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)776 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
777 return GGLfixed((int64_t(a)*b)>>shift) + c;
778 }
779 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
gglMulSubx(GGLfixed a,GGLfixed b,GGLfixed c,int shift)780 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
781 return GGLfixed((int64_t(a)*b)>>shift) - c;
782 }
783 inline int64_t gglMulii(int32_t a, int32_t b) CONST;
gglMulii(int32_t a,int32_t b)784 inline int64_t gglMulii(int32_t a, int32_t b) {
785 return int64_t(a)*b;
786 }
787
788 #endif
789
790 // ------------------------------------------------------------------------
791
792 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST;
gglMulx(GGLfixed a,GGLfixed b)793 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) {
794 return gglMulx(a, b, 16);
795 }
796 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
gglMulAddx(GGLfixed a,GGLfixed b,GGLfixed c)797 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) {
798 return gglMulAddx(a, b, c, 16);
799 }
800 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
gglMulSubx(GGLfixed a,GGLfixed b,GGLfixed c)801 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) {
802 return gglMulSubx(a, b, c, 16);
803 }
804
805 // ------------------------------------------------------------------------
806
807 inline int32_t gglClz(int32_t x) CONST;
gglClz(int32_t x)808 inline int32_t gglClz(int32_t x)
809 {
810 #if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__)
811 return __builtin_clz(x);
812 #else
813 if (!x) return 32;
814 int32_t exp = 31;
815 if (x & 0xFFFF0000) { exp -=16; x >>= 16; }
816 if (x & 0x0000ff00) { exp -= 8; x >>= 8; }
817 if (x & 0x000000f0) { exp -= 4; x >>= 4; }
818 if (x & 0x0000000c) { exp -= 2; x >>= 2; }
819 if (x & 0x00000002) { exp -= 1; }
820 return exp;
821 #endif
822 }
823
824 // ------------------------------------------------------------------------
825
826 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST;
827
828 inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST;
gglDivQ16(GGLfixed n,GGLfixed d)829 inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) {
830 return gglDivQ(n, d, 16);
831 }
832
833 inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST;
gglDivx(GGLfixed n,GGLfixed d)834 inline int32_t gglDivx(GGLfixed n, GGLfixed d) {
835 return gglDivQ(n, d, 16);
836 }
837
838 // ------------------------------------------------------------------------
839
840 inline GGLfixed gglRecipFast(GGLfixed x) CONST;
gglRecipFast(GGLfixed x)841 inline GGLfixed gglRecipFast(GGLfixed x)
842 {
843 // This is a really bad approximation of 1/x, but it's also
844 // very fast. x must be strictly positive.
845 // if x between [0.5, 1[ , then 1/x = 3-2*x
846 // (we use 2.30 fixed-point)
847 const int32_t lz = gglClz(x);
848 return (0xC0000000 - (x << (lz - 1))) >> (30-lz);
849 }
850
851 // ------------------------------------------------------------------------
852
853 inline GGLfixed gglClampx(GGLfixed c) CONST;
gglClampx(GGLfixed c)854 inline GGLfixed gglClampx(GGLfixed c)
855 {
856 #if defined(__thumb__)
857 // clamp without branches
858 c &= ~(c>>31); c = FIXED_ONE - c;
859 c &= ~(c>>31); c = FIXED_ONE - c;
860 #else
861 #if defined(__arm__)
862 // I don't know why gcc thinks its smarter than me! The code below
863 // clamps to zero in one instruction, but gcc won't generate it and
864 // replace it by a cmp + movlt (it's quite amazing actually).
865 asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
866 #elif defined(__aarch64__)
867 asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c));
868 #else
869 c &= ~(c>>31);
870 #endif
871 if (c>FIXED_ONE)
872 c = FIXED_ONE;
873 #endif
874 return c;
875 }
876
877 // ------------------------------------------------------------------------
878
879 #endif // ANDROID_GGL_FIXED_H
880