• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2022 Advanced Micro Devices, Inc.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a
4  * copy of this software and associated documentation files (the "Software"),
5  * to deal in the Software without restriction, including without limitation
6  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7  * and/or sell copies of the Software, and to permit persons to whom the
8  * Software is furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19  * OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * Authors: AMD
22  *
23  */
24 
25 #include "fixed31_32.h"
26 #include "calc_u64.h"
27 
abs_i64(long long arg)28 static inline unsigned long long abs_i64(long long arg)
29 {
30     if (arg > 0)
31         return (unsigned long long)arg;
32     else
33         return (unsigned long long)(-arg);
34 }
35 
36 /*
37  * @brief
38  * result = dividend / divisor
39  * *remainder = dividend % divisor
40  */
complete_integer_division_u64(unsigned long long dividend,unsigned long long divisor,unsigned long long * remainder)41 static inline unsigned long long complete_integer_division_u64(
42     unsigned long long dividend, unsigned long long divisor, unsigned long long *remainder)
43 {
44     unsigned long long result;
45 
46     VPE_ASSERT(divisor);
47 
48     result = div64_u64_rem(dividend, divisor, (uint64_t *)remainder);
49 
50     return result;
51 }
52 
53 #define FRACTIONAL_PART_MASK ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
54 
55 #define GET_INTEGER_PART(x) ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
56 
57 #define GET_FRACTIONAL_PART(x) (FRACTIONAL_PART_MASK & (x))
58 
vpe_fixpt_from_fraction(long long numerator,long long denominator)59 struct fixed31_32 vpe_fixpt_from_fraction(long long numerator, long long denominator)
60 {
61     struct fixed31_32 res;
62 
63     bool arg1_negative = numerator < 0;
64     bool arg2_negative = denominator < 0;
65 
66     unsigned long long arg1_value = (unsigned long long)(arg1_negative ? -numerator : numerator);
67     unsigned long long arg2_value =
68         (unsigned long long)(arg2_negative ? -denominator : denominator);
69 
70     unsigned long long remainder;
71 
72     /* determine integer part */
73 
74     unsigned long long res_value =
75         complete_integer_division_u64(arg1_value, arg2_value, &remainder);
76 
77     VPE_ASSERT(res_value <= LONG_MAX);
78 
79     /* determine fractional part */
80     {
81         unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
82 
83         do {
84             remainder <<= 1;
85 
86             res_value <<= 1;
87 
88             if (remainder >= arg2_value) {
89                 res_value |= 1;
90                 remainder -= arg2_value;
91             }
92         } while (--i != 0);
93     }
94 
95     /* round up LSB */
96     {
97         unsigned long long summand = (remainder << 1) >= arg2_value;
98 
99         VPE_ASSERT(res_value <= LLONG_MAX - summand);
100 
101         res_value += summand;
102     }
103 
104     res.value = (long long)res_value;
105 
106     if (arg1_negative ^ arg2_negative)
107         res.value = -res.value;
108 
109     return res;
110 }
111 
vpe_fixpt_mul(struct fixed31_32 arg1,struct fixed31_32 arg2)112 struct fixed31_32 vpe_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
113 {
114     struct fixed31_32 res;
115 
116     bool arg1_negative = arg1.value < 0;
117     bool arg2_negative = arg2.value < 0;
118 
119     unsigned long long arg1_value = (unsigned long long)(arg1_negative ? -arg1.value : arg1.value);
120     unsigned long long arg2_value = (unsigned long long)(arg2_negative ? -arg2.value : arg2.value);
121 
122     unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
123     unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
124 
125     unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
126     unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
127 
128     unsigned long long tmp;
129 
130     res.value = (long long)(arg1_int * arg2_int);
131 
132     VPE_ASSERT(res.value <= LONG_MAX);
133 
134     res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
135 
136     tmp = arg1_int * arg2_fra;
137 
138     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
139 
140     res.value += tmp;
141 
142     tmp = arg2_int * arg1_fra;
143 
144     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
145 
146     res.value += tmp;
147 
148     tmp = arg1_fra * arg2_fra;
149 
150     tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
151           (tmp >= (unsigned long long)vpe_fixpt_half.value);
152 
153     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
154 
155     res.value += tmp;
156 
157     if (arg1_negative ^ arg2_negative)
158         res.value = -res.value;
159 
160     return res;
161 }
162 
vpe_fixpt_sqr(struct fixed31_32 arg)163 struct fixed31_32 vpe_fixpt_sqr(struct fixed31_32 arg)
164 {
165     struct fixed31_32 res;
166 
167     unsigned long long arg_value = abs_i64(arg.value);
168 
169     unsigned long long arg_int = GET_INTEGER_PART(arg_value);
170 
171     unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
172 
173     unsigned long long tmp;
174 
175     res.value = (long long)(arg_int * arg_int);
176 
177     VPE_ASSERT(res.value <= LONG_MAX);
178 
179     res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
180 
181     tmp = arg_int * arg_fra;
182 
183     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
184 
185     res.value += tmp;
186 
187     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
188 
189     res.value += tmp;
190 
191     tmp = arg_fra * arg_fra;
192 
193     tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
194           (tmp >= (unsigned long long)vpe_fixpt_half.value);
195 
196     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
197 
198     res.value += tmp;
199 
200     return res;
201 }
202 
vpe_fixpt_recip(struct fixed31_32 arg)203 struct fixed31_32 vpe_fixpt_recip(struct fixed31_32 arg)
204 {
205     /*
206      * @note
207      * Good idea to use Newton's method
208      */
209 
210     VPE_ASSERT(arg.value);
211 
212     return vpe_fixpt_from_fraction(vpe_fixpt_one.value, arg.value);
213 }
214 
vpe_fixpt_sinc(struct fixed31_32 arg)215 struct fixed31_32 vpe_fixpt_sinc(struct fixed31_32 arg)
216 {
217     struct fixed31_32 square;
218 
219     struct fixed31_32 res = vpe_fixpt_one;
220 
221     int n = 27;
222 
223     struct fixed31_32 arg_norm = arg;
224 
225     if (vpe_fixpt_le(vpe_fixpt_two_pi, vpe_fixpt_abs(arg))) {
226         arg_norm =
227             vpe_fixpt_sub(arg_norm, vpe_fixpt_mul_int(vpe_fixpt_two_pi,
228                                         (int)div64_s64(arg_norm.value, vpe_fixpt_two_pi.value)));
229     }
230 
231     square = vpe_fixpt_sqr(arg_norm);
232 
233     do {
234         res = vpe_fixpt_sub(
235             vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(square, res), n * (n - 1)));
236 
237         n -= 2;
238     } while (n > 2);
239 
240     if (arg.value != arg_norm.value)
241         res = vpe_fixpt_div(vpe_fixpt_mul(res, arg_norm), arg);
242 
243     return res;
244 }
245 
vpe_fixpt_sin(struct fixed31_32 arg)246 struct fixed31_32 vpe_fixpt_sin(struct fixed31_32 arg)
247 {
248     return vpe_fixpt_mul(arg, vpe_fixpt_sinc(arg));
249 }
250 
vpe_fixpt_cos(struct fixed31_32 arg)251 struct fixed31_32 vpe_fixpt_cos(struct fixed31_32 arg)
252 {
253 
254     const struct fixed31_32 square = vpe_fixpt_sqr(arg);
255 
256     struct fixed31_32 res = vpe_fixpt_one;
257 
258     int n = 26;
259 
260     do {
261         res = vpe_fixpt_sub(
262             vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(square, res), n * (n - 1)));
263 
264         n -= 2;
265     } while (n != 0);
266 
267     return res;
268 }
269 
270 /*
271  * @brief
272  * result = exp(arg),
273  * where abs(arg) < 1
274  *
275  * Calculated as Taylor series.
276  */
fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)277 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)
278 {
279     unsigned int n = 9;
280 
281     struct fixed31_32 res = vpe_fixpt_from_fraction(n + 2, n + 1);
282 
283     VPE_ASSERT(vpe_fixpt_lt(arg, vpe_fixpt_one));
284 
285     do
286         res = vpe_fixpt_add(vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(arg, res), n));
287     while (--n != 1);
288 
289     return vpe_fixpt_add(vpe_fixpt_one, vpe_fixpt_mul(arg, res));
290 }
291 
vpe_fixpt_exp(struct fixed31_32 arg)292 struct fixed31_32 vpe_fixpt_exp(struct fixed31_32 arg)
293 {
294     /*
295      * @brief
296      * Main equation is:
297      * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
298      * where m = round(x / ln(2)), r = x - m * ln(2)
299      */
300 
301     if (vpe_fixpt_le(vpe_fixpt_ln2_div_2, vpe_fixpt_abs(arg))) {
302         int m = vpe_fixpt_round(vpe_fixpt_div(arg, vpe_fixpt_ln2));
303 
304         struct fixed31_32 r = vpe_fixpt_sub(arg, vpe_fixpt_mul_int(vpe_fixpt_ln2, m));
305 
306         VPE_ASSERT(m != 0);
307 
308         VPE_ASSERT(vpe_fixpt_lt(vpe_fixpt_abs(r), vpe_fixpt_one));
309 
310         if (m > 0)
311             return vpe_fixpt_shl(fixed31_32_exp_from_taylor_series(r), (unsigned char)m);
312         else
313             return vpe_fixpt_div_int(fixed31_32_exp_from_taylor_series(r), 1LL << -m);
314     } else if (arg.value != 0)
315         return fixed31_32_exp_from_taylor_series(arg);
316     else
317         return vpe_fixpt_one;
318 }
319 
vpe_fixpt_log(struct fixed31_32 arg)320 struct fixed31_32 vpe_fixpt_log(struct fixed31_32 arg)
321 {
322     struct fixed31_32 res = vpe_fixpt_neg(vpe_fixpt_one);
323 
324     struct fixed31_32 error;
325 
326     VPE_ASSERT(arg.value > 0); /*log is defined only for positive numbers*/
327 
328     do {
329         struct fixed31_32 res1 = vpe_fixpt_add(
330             vpe_fixpt_sub(res, vpe_fixpt_one), vpe_fixpt_div(arg, vpe_fixpt_exp(res)));
331 
332         error = vpe_fixpt_sub(res, res1);
333 
334         res = res1;
335     } while (abs_i64(error.value) > 100ULL);
336 
337     return res;
338 }
339 
340 /* this function is a generic helper to translate fixed point value to
341  * specified integer format that will consist of integer_bits integer part and
342  * fractional_bits fractional part. For example it is used in
343  *  vpe_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
344  * part in 32 bits. It is used in hw programming (scaler)
345  */
346 
ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits)347 static inline unsigned int ux_dy(
348     long long value, unsigned int integer_bits, unsigned int fractional_bits)
349 {
350     /* 1. create mask of integer part */
351     unsigned int result = (1 << integer_bits) - 1;
352     /* 2. mask out fractional part */
353     unsigned int fractional_part = FRACTIONAL_PART_MASK & (unsigned long long)value;
354     /* 3. shrink fixed point integer part to be of integer_bits width*/
355     result &= GET_INTEGER_PART(value);
356     /* 4. make space for fractional part to be filled in after integer */
357     result <<= fractional_bits;
358     /* 5. shrink fixed point fractional part to of fractional_bits width*/
359     fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
360     /* 6. merge the result */
361     return result | fractional_part;
362 }
363 
clamp_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits,unsigned int min_clamp)364 static inline unsigned int clamp_ux_dy(long long value, unsigned int integer_bits,
365     unsigned int fractional_bits, unsigned int min_clamp)
366 {
367     unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
368 
369     if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
370         return (1 << (integer_bits + fractional_bits)) - 1;
371     else if (truncated_val > min_clamp)
372         return truncated_val;
373     else
374         return min_clamp;
375 }
376 
vpe_fixpt_u4d19(struct fixed31_32 arg)377 unsigned int vpe_fixpt_u4d19(struct fixed31_32 arg)
378 {
379     return ux_dy(arg.value, 4, 19);
380 }
381 
vpe_fixpt_u3d19(struct fixed31_32 arg)382 unsigned int vpe_fixpt_u3d19(struct fixed31_32 arg)
383 {
384     return ux_dy(arg.value, 3, 19);
385 }
386 
vpe_fixpt_u2d19(struct fixed31_32 arg)387 unsigned int vpe_fixpt_u2d19(struct fixed31_32 arg)
388 {
389     return ux_dy(arg.value, 2, 19);
390 }
391 
vpe_fixpt_u0d19(struct fixed31_32 arg)392 unsigned int vpe_fixpt_u0d19(struct fixed31_32 arg)
393 {
394     return ux_dy(arg.value, 0, 19);
395 }
396 
vpe_fixpt_clamp_u0d14(struct fixed31_32 arg)397 unsigned int vpe_fixpt_clamp_u0d14(struct fixed31_32 arg)
398 {
399     return clamp_ux_dy(arg.value, 0, 14, 1);
400 }
401 
vpe_fixpt_clamp_u0d10(struct fixed31_32 arg)402 unsigned int vpe_fixpt_clamp_u0d10(struct fixed31_32 arg)
403 {
404     return clamp_ux_dy(arg.value, 0, 10, 1);
405 }
406 
vpe_fixpt_s4d19(struct fixed31_32 arg)407 int vpe_fixpt_s4d19(struct fixed31_32 arg)
408 {
409     if (arg.value < 0)
410         return -(int)ux_dy(vpe_fixpt_abs(arg).value, 4, 19);
411     else
412         return (int)ux_dy(arg.value, 4, 19);
413 }
414 
vpe_to_fixed_point(unsigned int decimalBits,double value,unsigned int mask,double d_pix)415 unsigned int vpe_to_fixed_point(
416     unsigned int decimalBits, double value, unsigned int mask, double d_pix)
417 {
418     unsigned int d_i;
419 
420     d_i = (int)((value * d_pix) + 0.5);
421     d_i = d_i & mask;
422     return d_i;
423 }
424 
425 /* This function is a generic way to convert a double into fixpt format AdBu, where
426  * A is the decimal bits and B is the fractional bits. If clamp is set, it will
427  * clamp the max value, otherwise there is risk of overflow.
428  */
vpe_double_to_fixed_point(double x,unsigned long long decimal_bits,unsigned long long fractional_bits,bool clamp)429 unsigned long long vpe_double_to_fixed_point(
430     double x, unsigned long long decimal_bits, unsigned long long fractional_bits, bool clamp)
431 {
432     unsigned long long shift   = 1;
433     double             norm    = (double)(shift << fractional_bits);
434     unsigned long long x_fixpt = (long long)(x * norm);
435     unsigned long long mask    = ((shift << (decimal_bits + fractional_bits)) - 1);
436 
437     if ((clamp == true) && (x_fixpt > mask)) {
438         x_fixpt = mask;
439     } else {
440         x_fixpt = x_fixpt & mask;
441     }
442 
443     return x_fixpt;
444 }
445