1 /* Copyright 2022 Advanced Micro Devices, Inc.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 * OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors: AMD
22 *
23 */
24
25 #include "fixed31_32.h"
26 #include "calc_u64.h"
27
abs_i64(long long arg)28 static inline unsigned long long abs_i64(long long arg)
29 {
30 if (arg > 0)
31 return (unsigned long long)arg;
32 else
33 return (unsigned long long)(-arg);
34 }
35
36 /*
37 * @brief
38 * result = dividend / divisor
39 * *remainder = dividend % divisor
40 */
complete_integer_division_u64(unsigned long long dividend,unsigned long long divisor,unsigned long long * remainder)41 static inline unsigned long long complete_integer_division_u64(
42 unsigned long long dividend, unsigned long long divisor, unsigned long long *remainder)
43 {
44 unsigned long long result;
45
46 VPE_ASSERT(divisor);
47
48 result = div64_u64_rem(dividend, divisor, (uint64_t *)remainder);
49
50 return result;
51 }
52
53 #define FRACTIONAL_PART_MASK ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
54
55 #define GET_INTEGER_PART(x) ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
56
57 #define GET_FRACTIONAL_PART(x) (FRACTIONAL_PART_MASK & (x))
58
vpe_fixpt_from_fraction(long long numerator,long long denominator)59 struct fixed31_32 vpe_fixpt_from_fraction(long long numerator, long long denominator)
60 {
61 struct fixed31_32 res;
62
63 bool arg1_negative = numerator < 0;
64 bool arg2_negative = denominator < 0;
65
66 unsigned long long arg1_value = (unsigned long long)(arg1_negative ? -numerator : numerator);
67 unsigned long long arg2_value =
68 (unsigned long long)(arg2_negative ? -denominator : denominator);
69
70 unsigned long long remainder;
71
72 /* determine integer part */
73
74 unsigned long long res_value =
75 complete_integer_division_u64(arg1_value, arg2_value, &remainder);
76
77 VPE_ASSERT(res_value <= LONG_MAX);
78
79 /* determine fractional part */
80 {
81 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
82
83 do {
84 remainder <<= 1;
85
86 res_value <<= 1;
87
88 if (remainder >= arg2_value) {
89 res_value |= 1;
90 remainder -= arg2_value;
91 }
92 } while (--i != 0);
93 }
94
95 /* round up LSB */
96 {
97 unsigned long long summand = (remainder << 1) >= arg2_value;
98
99 VPE_ASSERT(res_value <= LLONG_MAX - summand);
100
101 res_value += summand;
102 }
103
104 res.value = (long long)res_value;
105
106 if (arg1_negative ^ arg2_negative)
107 res.value = -res.value;
108
109 return res;
110 }
111
vpe_fixpt_mul(struct fixed31_32 arg1,struct fixed31_32 arg2)112 struct fixed31_32 vpe_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
113 {
114 struct fixed31_32 res;
115
116 bool arg1_negative = arg1.value < 0;
117 bool arg2_negative = arg2.value < 0;
118
119 unsigned long long arg1_value = (unsigned long long)(arg1_negative ? -arg1.value : arg1.value);
120 unsigned long long arg2_value = (unsigned long long)(arg2_negative ? -arg2.value : arg2.value);
121
122 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
123 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
124
125 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
126 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
127
128 unsigned long long tmp;
129
130 res.value = (long long)(arg1_int * arg2_int);
131
132 VPE_ASSERT(res.value <= LONG_MAX);
133
134 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
135
136 tmp = arg1_int * arg2_fra;
137
138 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
139
140 res.value += tmp;
141
142 tmp = arg2_int * arg1_fra;
143
144 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
145
146 res.value += tmp;
147
148 tmp = arg1_fra * arg2_fra;
149
150 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
151 (tmp >= (unsigned long long)vpe_fixpt_half.value);
152
153 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
154
155 res.value += tmp;
156
157 if (arg1_negative ^ arg2_negative)
158 res.value = -res.value;
159
160 return res;
161 }
162
vpe_fixpt_sqr(struct fixed31_32 arg)163 struct fixed31_32 vpe_fixpt_sqr(struct fixed31_32 arg)
164 {
165 struct fixed31_32 res;
166
167 unsigned long long arg_value = abs_i64(arg.value);
168
169 unsigned long long arg_int = GET_INTEGER_PART(arg_value);
170
171 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
172
173 unsigned long long tmp;
174
175 res.value = (long long)(arg_int * arg_int);
176
177 VPE_ASSERT(res.value <= LONG_MAX);
178
179 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
180
181 tmp = arg_int * arg_fra;
182
183 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
184
185 res.value += tmp;
186
187 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
188
189 res.value += tmp;
190
191 tmp = arg_fra * arg_fra;
192
193 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
194 (tmp >= (unsigned long long)vpe_fixpt_half.value);
195
196 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
197
198 res.value += tmp;
199
200 return res;
201 }
202
vpe_fixpt_recip(struct fixed31_32 arg)203 struct fixed31_32 vpe_fixpt_recip(struct fixed31_32 arg)
204 {
205 /*
206 * @note
207 * Good idea to use Newton's method
208 */
209
210 VPE_ASSERT(arg.value);
211
212 return vpe_fixpt_from_fraction(vpe_fixpt_one.value, arg.value);
213 }
214
vpe_fixpt_sinc(struct fixed31_32 arg)215 struct fixed31_32 vpe_fixpt_sinc(struct fixed31_32 arg)
216 {
217 struct fixed31_32 square;
218
219 struct fixed31_32 res = vpe_fixpt_one;
220
221 int n = 27;
222
223 struct fixed31_32 arg_norm = arg;
224
225 if (vpe_fixpt_le(vpe_fixpt_two_pi, vpe_fixpt_abs(arg))) {
226 arg_norm =
227 vpe_fixpt_sub(arg_norm, vpe_fixpt_mul_int(vpe_fixpt_two_pi,
228 (int)div64_s64(arg_norm.value, vpe_fixpt_two_pi.value)));
229 }
230
231 square = vpe_fixpt_sqr(arg_norm);
232
233 do {
234 res = vpe_fixpt_sub(
235 vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(square, res), n * (n - 1)));
236
237 n -= 2;
238 } while (n > 2);
239
240 if (arg.value != arg_norm.value)
241 res = vpe_fixpt_div(vpe_fixpt_mul(res, arg_norm), arg);
242
243 return res;
244 }
245
vpe_fixpt_sin(struct fixed31_32 arg)246 struct fixed31_32 vpe_fixpt_sin(struct fixed31_32 arg)
247 {
248 return vpe_fixpt_mul(arg, vpe_fixpt_sinc(arg));
249 }
250
vpe_fixpt_cos(struct fixed31_32 arg)251 struct fixed31_32 vpe_fixpt_cos(struct fixed31_32 arg)
252 {
253
254 const struct fixed31_32 square = vpe_fixpt_sqr(arg);
255
256 struct fixed31_32 res = vpe_fixpt_one;
257
258 int n = 26;
259
260 do {
261 res = vpe_fixpt_sub(
262 vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(square, res), n * (n - 1)));
263
264 n -= 2;
265 } while (n != 0);
266
267 return res;
268 }
269
270 /*
271 * @brief
272 * result = exp(arg),
273 * where abs(arg) < 1
274 *
275 * Calculated as Taylor series.
276 */
fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)277 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)
278 {
279 unsigned int n = 9;
280
281 struct fixed31_32 res = vpe_fixpt_from_fraction(n + 2, n + 1);
282
283 VPE_ASSERT(vpe_fixpt_lt(arg, vpe_fixpt_one));
284
285 do
286 res = vpe_fixpt_add(vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(arg, res), n));
287 while (--n != 1);
288
289 return vpe_fixpt_add(vpe_fixpt_one, vpe_fixpt_mul(arg, res));
290 }
291
vpe_fixpt_exp(struct fixed31_32 arg)292 struct fixed31_32 vpe_fixpt_exp(struct fixed31_32 arg)
293 {
294 /*
295 * @brief
296 * Main equation is:
297 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
298 * where m = round(x / ln(2)), r = x - m * ln(2)
299 */
300
301 if (vpe_fixpt_le(vpe_fixpt_ln2_div_2, vpe_fixpt_abs(arg))) {
302 int m = vpe_fixpt_round(vpe_fixpt_div(arg, vpe_fixpt_ln2));
303
304 struct fixed31_32 r = vpe_fixpt_sub(arg, vpe_fixpt_mul_int(vpe_fixpt_ln2, m));
305
306 VPE_ASSERT(m != 0);
307
308 VPE_ASSERT(vpe_fixpt_lt(vpe_fixpt_abs(r), vpe_fixpt_one));
309
310 if (m > 0)
311 return vpe_fixpt_shl(fixed31_32_exp_from_taylor_series(r), (unsigned char)m);
312 else
313 return vpe_fixpt_div_int(fixed31_32_exp_from_taylor_series(r), 1LL << -m);
314 } else if (arg.value != 0)
315 return fixed31_32_exp_from_taylor_series(arg);
316 else
317 return vpe_fixpt_one;
318 }
319
vpe_fixpt_log(struct fixed31_32 arg)320 struct fixed31_32 vpe_fixpt_log(struct fixed31_32 arg)
321 {
322 struct fixed31_32 res = vpe_fixpt_neg(vpe_fixpt_one);
323
324 struct fixed31_32 error;
325
326 VPE_ASSERT(arg.value > 0); /*log is defined only for positive numbers*/
327
328 do {
329 struct fixed31_32 res1 = vpe_fixpt_add(
330 vpe_fixpt_sub(res, vpe_fixpt_one), vpe_fixpt_div(arg, vpe_fixpt_exp(res)));
331
332 error = vpe_fixpt_sub(res, res1);
333
334 res = res1;
335 } while (abs_i64(error.value) > 100ULL);
336
337 return res;
338 }
339
340 /* this function is a generic helper to translate fixed point value to
341 * specified integer format that will consist of integer_bits integer part and
342 * fractional_bits fractional part. For example it is used in
343 * vpe_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
344 * part in 32 bits. It is used in hw programming (scaler)
345 */
346
ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits)347 static inline unsigned int ux_dy(
348 long long value, unsigned int integer_bits, unsigned int fractional_bits)
349 {
350 /* 1. create mask of integer part */
351 unsigned int result = (1 << integer_bits) - 1;
352 /* 2. mask out fractional part */
353 unsigned int fractional_part = FRACTIONAL_PART_MASK & (unsigned long long)value;
354 /* 3. shrink fixed point integer part to be of integer_bits width*/
355 result &= GET_INTEGER_PART(value);
356 /* 4. make space for fractional part to be filled in after integer */
357 result <<= fractional_bits;
358 /* 5. shrink fixed point fractional part to of fractional_bits width*/
359 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
360 /* 6. merge the result */
361 return result | fractional_part;
362 }
363
clamp_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits,unsigned int min_clamp)364 static inline unsigned int clamp_ux_dy(long long value, unsigned int integer_bits,
365 unsigned int fractional_bits, unsigned int min_clamp)
366 {
367 unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
368
369 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
370 return (1 << (integer_bits + fractional_bits)) - 1;
371 else if (truncated_val > min_clamp)
372 return truncated_val;
373 else
374 return min_clamp;
375 }
376
vpe_fixpt_u4d19(struct fixed31_32 arg)377 unsigned int vpe_fixpt_u4d19(struct fixed31_32 arg)
378 {
379 return ux_dy(arg.value, 4, 19);
380 }
381
vpe_fixpt_u3d19(struct fixed31_32 arg)382 unsigned int vpe_fixpt_u3d19(struct fixed31_32 arg)
383 {
384 return ux_dy(arg.value, 3, 19);
385 }
386
vpe_fixpt_u2d19(struct fixed31_32 arg)387 unsigned int vpe_fixpt_u2d19(struct fixed31_32 arg)
388 {
389 return ux_dy(arg.value, 2, 19);
390 }
391
vpe_fixpt_u0d19(struct fixed31_32 arg)392 unsigned int vpe_fixpt_u0d19(struct fixed31_32 arg)
393 {
394 return ux_dy(arg.value, 0, 19);
395 }
396
vpe_fixpt_clamp_u0d14(struct fixed31_32 arg)397 unsigned int vpe_fixpt_clamp_u0d14(struct fixed31_32 arg)
398 {
399 return clamp_ux_dy(arg.value, 0, 14, 1);
400 }
401
vpe_fixpt_clamp_u0d10(struct fixed31_32 arg)402 unsigned int vpe_fixpt_clamp_u0d10(struct fixed31_32 arg)
403 {
404 return clamp_ux_dy(arg.value, 0, 10, 1);
405 }
406
vpe_fixpt_s4d19(struct fixed31_32 arg)407 int vpe_fixpt_s4d19(struct fixed31_32 arg)
408 {
409 if (arg.value < 0)
410 return -(int)ux_dy(vpe_fixpt_abs(arg).value, 4, 19);
411 else
412 return (int)ux_dy(arg.value, 4, 19);
413 }
414
vpe_to_fixed_point(unsigned int decimalBits,double value,unsigned int mask,double d_pix)415 unsigned int vpe_to_fixed_point(
416 unsigned int decimalBits, double value, unsigned int mask, double d_pix)
417 {
418 unsigned int d_i;
419
420 d_i = (int)((value * d_pix) + 0.5);
421 d_i = d_i & mask;
422 return d_i;
423 }
424
425 /* This function is a generic way to convert a double into fixpt format AdBu, where
426 * A is the decimal bits and B is the fractional bits. If clamp is set, it will
427 * clamp the max value, otherwise there is risk of overflow.
428 */
vpe_double_to_fixed_point(double x,unsigned long long decimal_bits,unsigned long long fractional_bits,bool clamp)429 unsigned long long vpe_double_to_fixed_point(
430 double x, unsigned long long decimal_bits, unsigned long long fractional_bits, bool clamp)
431 {
432 unsigned long long shift = 1;
433 double norm = (double)(shift << fractional_bits);
434 unsigned long long x_fixpt = (long long)(x * norm);
435 unsigned long long mask = ((shift << (decimal_bits + fractional_bits)) - 1);
436
437 if ((clamp == true) && (x_fixpt > mask)) {
438 x_fixpt = mask;
439 } else {
440 x_fixpt = x_fixpt & mask;
441 }
442
443 return x_fixpt;
444 }
445