1 /******************************************************************************
2 * @file csky_math.h
3 * @brief Public header file for CSI DSP Library.
4 * @version V1.0
5 * @date 20. Dec 2016
6 ******************************************************************************/
7 /* ---------------------------------------------------------------------------
8 * Copyright (C) 2016 CSKY Limited. All rights reserved.
9 *
10 * Redistribution and use of this software in source and binary forms,
11 * with or without modification, are permitted provided that the following
12 * conditions are met:
13 * * Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * * Neither the name of CSKY Ltd. nor the names of CSKY's contributors may
19 * be used to endorse or promote products derived from this software without
20 * specific prior written permission of CSKY Ltd.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
27 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 * -------------------------------------------------------------------------- */
34
35 /**
36 * @defgroup groupMath Basic Math Functions
37 */
38
39 /**
40 * @defgroup groupFastMath Fast Math Functions
41 * This set of functions provides a fast approximation to sine, cosine, and square root.
42 * As compared to most of the other functions in the CSI math library, the fast math functions
43 * operate on individual values and not arrays.
44 * There are separate functions for Q15, Q31, and floating-point data.
45 *
46 */
47
48 /**
49 * @defgroup groupCmplxMath Complex Math Functions
50 * This set of functions operates on complex data vectors.
51 * The data in the complex arrays is stored in an interleaved fashion
52 * (real, imag, real, imag, ...).
53 * In the API functions, the number of samples in a complex array refers
54 * to the number of complex values; the array contains twice this number of
55 * real values.
56 */
57
58 /**
59 * @defgroup groupFilters Filtering Functions
60 */
61
62 /**
63 * @defgroup groupMatrix Matrix Functions
64 *
65 * This set of functions provides basic matrix math operations.
66 * The functions operate on matrix data structures. For example,
67 * the type
68 * definition for the floating-point matrix structure is shown
69 * below:
70 * <pre>
71 * typedef struct
72 * {
73 * uint16_t numRows; // number of rows of the matrix.
74 * uint16_t numCols; // number of columns of the matrix.
75 * float32_t *pData; // points to the data of the matrix.
76 * } csky_matrix_instance_f32;
77 * </pre>
78 * There are similar definitions for Q15 and Q31 data types.
79 *
80 * The structure specifies the size of the matrix and then points to
81 * an array of data. The array is of size <code>numRows X numCols</code>
82 * and the values are arranged in row order. That is, the
83 * matrix element (i, j) is stored at:
84 * <pre>
85 * pData[i*numCols + j]
86 * </pre>
87 *
88 * \par Init Functions
89 * There is an associated initialization function for each type of matrix
90 * data structure.
91 * The initialization function sets the values of the internal structure fields.
92 * Refer to the function <code>csky_mat_init_f32()</code>, <code>csky_mat_init_q31()</code>
93 * and <code>csky_mat_init_q15()</code> for floating-point, Q31 and Q15 types, respectively.
94 *
95 * \par
96 * Use of the initialization function is optional. However, if initialization function is used
97 * then the instance structure cannot be placed into a const data section.
98 * To place the instance structure in a const data
99 * section, manually initialize the data structure. For example:
100 * <pre>
101 * <code>csky_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
102 * <code>csky_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
103 * <code>csky_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
104 * </pre>
105 * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
106 * specifies the number of columns, and <code>pData</code> points to the
107 * data array.
108 *
109 * \par Size Checking
110 * By default all of the matrix functions perform size checking on the input and
111 * output matrices. For example, the matrix addition function verifies that the
112 * two input matrices and the output matrix all have the same number of rows and
113 * columns. If the size check fails the functions return:
114 * <pre>
115 * CSKY_MATH_SIZE_MISMATCH
116 * </pre>
117 * Otherwise the functions return
118 * <pre>
119 * CSKY_MATH_SUCCESS
120 * </pre>
121 * There is some overhead associated with this matrix size checking.
122 * The matrix size checking is enabled via the \#define
123 * <pre>
124 * CSKY_MATH_MATRIX_CHECK
125 * </pre>
126 * within the library project settings. By default this macro is defined
127 * and size checking is enabled. By changing the project settings and
128 * undefining this macro size checking is eliminated and the functions
129 * run a bit faster. With size checking disabled the functions always
130 * return <code>CSKY_MATH_SUCCESS</code>.
131 */
132
133 /**
134 * @defgroup groupTransforms Transform Functions
135 */
136
137 /**
138 * @defgroup groupController Controller Functions
139 */
140
141 /**
142 * @defgroup groupStats Statistics Functions
143 */
144 /**
145 * @defgroup groupSupport Support Functions
146 */
147
148 /**
149 * @defgroup groupInterpolation Interpolation Functions
150 * These functions perform 1- and 2-dimensional interpolation of data.
151 * Linear interpolation is used for 1-dimensional data and
152 * bilinear interpolation is used for 2-dimensional data.
153 */
154
155 /**
156 * @defgroup groupYunvoice Yunvoice Functions
157 * These functions are designed for Yunvoice project, which are modified
158 * according to the CEVA DSP functions. So, one can porting the software
159 * from CEVA to CSKY straightforwardly.
160 */
161
162 /**
163 * @defgroup groupExamples Examples
164 */
165
166 #ifndef _CSKY_MATH_H
167 #define _CSKY_MATH_H
168
169 #define __CSI_GENERIC /* disable NVIC and Systick functions */
170
171 #include "csi_core.h"
172
173 #include <float.h>
174 #undef __CSI_GENERIC /* enable NVIC and Systick functions */
175 #include "string.h"
176 #include "math.h"
177 #ifdef __cplusplus
178 extern "C"
179 {
180 #endif
181
182 /**
183 * @brief Macros required for reciprocal calculation in Normalized LMS
184 */
185
186 #define DELTA_Q31 (0x100)
187 #define DELTA_Q15 0x5
188 #define INDEX_MASK 0x0000003F
189 #ifndef PI
190 #define PI 3.14159265358979f
191 #endif
192
193 /**
194 * @brief Macros required for SINE and COSINE Fast math approximations
195 */
196
197 #define FAST_MATH_TABLE_SIZE 512
198 #define FAST_MATH_Q31_SHIFT (32 - 10)
199 #define FAST_MATH_Q15_SHIFT (16 - 10)
200 #define CONTROLLER_Q31_SHIFT (32 - 9)
201 #define TABLE_SIZE 256
202 #define TABLE_SPACING_Q31 0x400000
203 #define TABLE_SPACING_Q15 0x80
204
205 /**
206 * @brief Macros required for SINE and COSINE Controller functions
207 */
208 /* 1.31(q31) Fixed value of 2/360 */
209 /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
210 #define INPUT_SPACING 0xB60B61
211
212 /**
213 * @brief Macro for Unaligned Support
214 */
215 #ifndef UNALIGNED_SUPPORT_DISABLE
216 #define ALIGN4
217 #else
218 #define ALIGN4 __attribute__((aligned(4)))
219 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
220
221 /**
222 * @brief Macro for log , pow and related fast functions.
223 */
224 #define ABS(x) (((x) > 0) ? (x) : (-(x)))
225 #define max(x) (((y) > (x)) ? (y) : (x))
226 #define min(x) (((y) < (x)) ? (y) : (x))
227 #define CN 124217729.0
228 #define HIGH_HALF 1
229 #define LOW_HALF 0
230
231 /* Exact addition of two single-length floating point numbers. */
232 /* The macro produces a double-length number (z,zz) that satisfies */
233 /* z+zz = x+y exactly. */
234
235 #define EADD(x, y, z, zz) do { \
236 (z)=(x)+(y); \
237 (zz)=(ABS(x)>ABS(y)) ? (((x)-(z))+(y)) : (((y)-(z))+(x)); \
238 } while (0)
239
240 /* Exact multiplication of two single-length floating point numbers, */
241 /* The macro produces a double-length number (z,zz) that */
242 /* satisfies z+zz = x*y exactly. p,hx,tx,hy,ty are temporary */
243 /* storage variables of type double. */
244
245 # define EMULV(x, y, z, zz, p, hx, tx, hy, ty) do { \
246 (p)=CN*(x); (hx)=((x)-(p))+(p); (tx)=(x)-(hx); \
247 (p)=CN*(y); (hy)=((y)-(p))+(p); (ty)=(y)-(hy); \
248 (z)=(x)*(y); \
249 (zz)=((((hx)*(hy)-(z))+(hx)*(ty))+(tx)*(hy))+(tx)*(ty); \
250 } while (0)
251 /* Exact multiplication of two single-length floating point numbers. */
252 /* The macro produces a nearly double-length number (z,zz) (see Dekker) */
253 /* that satisfies z+zz = x*y exactly. p,hx,tx,hy,ty,q are temporary */
254 /* storage variables of type double. */
255
256 # define MUL12(x, y, z, zz, p, hx, tx, hy, ty, q) do { \
257 (p)=CN*(x); (hx)=((x)-(p))+(p); (tx)=(x)-(hx); \
258 (p)=CN*(y); (hy)=((y)-(p))+(p); (ty)=(y)-(hy); \
259 (p)=(hx)*(hy); \
260 (q)=(hx)*(ty)+(tx)*(hy); (z)=(p)+(q); \
261 (zz)=(((p)-(z))+(q))+(tx)*(ty); \
262 } while (0)
263
264 /* Double-length addition, Dekker. The macro produces a double-length */
265 /* number (z,zz) which satisfies approximately z+zz = x+xx + y+yy. */
266 /* An error bound: (abs(x+xx)+abs(y+yy))*4.94e-32. (x,xx), (y,yy) */
267 /* are assumed to be double-length numbers. r,s are temporary */
268 /* storage variables of type double. */
269
270 #define ADD2(x, xx, y, yy, z, zz, r, s) do { \
271 (r)=(x)+(y); (s)=(ABS(x)>ABS(y)) ? \
272 (((((x)-(r))+(y))+(yy))+(xx)) : \
273 (((((y)-(r))+(x))+(xx))+(yy)); \
274 (z)=(r)+(s); \
275 (zz)=((r)-(z))+(s); \
276 } while (0)
277
278 /* Double-length subtraction, Dekker. The macro produces a double-length */
279 /* number (z,zz) which satisfies approximately z+zz = x+xx - (y+yy). */
280 /* An error bound: (abs(x+xx)+abs(y+yy))*4.94e-32. (x,xx), (y,yy) */
281 /* are assumed to be double-length numbers. r,s are temporary */
282 /* storage variables of type double. */
283
284 #define SUB2(x, xx, y, yy, z, zz, r, s) do { \
285 (r)=(x)-(y); (s)=(ABS(x)>ABS(y)) ? \
286 (((((x)-(r))-(y))-(yy))+(xx)) : \
287 ((((x)-((y)+(r)))+(xx))-(yy)); \
288 (z)=(r)+(s); \
289 (zz)=((r)-(z))+(s); \
290 } while (0)
291
292 /* Double-length multiplication, Dekker. The macro produces a double-length */
293 /* number (z,zz) which satisfies approximately z+zz = (x+xx)*(y+yy). */
294 /* An error bound: abs((x+xx)*(y+yy))*1.24e-31. (x,xx), (y,yy) */
295 /* are assumed to be double-length numbers. p,hx,tx,hy,ty,q,c,cc are */
296 /* temporary storage variables of type double. */
297
298 #define MUL2(x, xx, y, yy, z, zz, p, hx, tx, hy, ty, q, c, cc) do { \
299 MUL12((x), (y), (c), (cc), (p), (hx), (tx), (hy), (ty), (q)) \
300 (cc)=((x)*(yy)+(xx)*(y))+(cc); \
301 (z)=(c)+(cc); \
302 (zz)=((c)-(z))+(cc); \
303 } while (0)
304
__SSAT_31(int32_t x)305 __STATIC_INLINE int32_t __SSAT_31(int32_t x)
306 {
307 int32_t res = x;
308 if (x > 0x3fffffff) {
309 res = 0x3fffffff;
310 } else if (x < -1073741824) {
311 res = -1073741824;
312 }
313
314 return res;
315 }
316
__SSAT_16(int32_t x)317 __STATIC_INLINE int32_t __SSAT_16(int32_t x)
318 {
319 int32_t res = x;
320 if (x > 0x7fff) {
321 res = 0x7fff;
322 } else if (x < -32768) {
323 res = -32768;
324 }
325
326 return res;
327 }
328
__SSAT_8(int32_t x)329 __STATIC_INLINE int32_t __SSAT_8(int32_t x)
330 {
331 int32_t res = x;
332 if (x > 0x7f) {
333 res = 0x7f;
334 } else if (x < -128) {
335 res = -128;
336 }
337
338 return res;
339 }
340
341 #ifdef CSKY_SIMD
342 /* SMMLAR */
multAcc_32x32_keep32_R(int32_t a,int32_t x,int32_t y)343 __STATIC_INLINE int32_t multAcc_32x32_keep32_R(int32_t a, int32_t x, int32_t y)
344 {
345 __ASM volatile("mula.s32.rhs %0, %1, %2\n\t"
346 :"=r" (a), "=r" (x), "=r" (y) : "0" (a), "1" (x), "2" (y));
347 return a;
348 }
349
350 /* SMMLSR */
multSub_32x32_keep32_R(int32_t a,int32_t x,int32_t y)351 __STATIC_INLINE int32_t multSub_32x32_keep32_R(int32_t a, int32_t x, int32_t y)
352 {
353 __ASM volatile("muls.s32.rhs %0, %1, %2\n\t"
354 :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
355 return a;
356 }
357
358 /* SMMULR */
mult_32x32_keep32_R(int32_t x,int32_t y)359 __STATIC_INLINE int32_t mult_32x32_keep32_R(int32_t x, int32_t y)
360 {
361 int32_t a;
362 __ASM volatile("mul.s32.rh %0, %1, %2\n\t"
363 :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
364 return a;
365 }
366
367 /* SMMLA */
multAcc_32x32_keep32(int32_t a,int32_t x,int32_t y)368 __STATIC_INLINE int32_t multAcc_32x32_keep32(int32_t a, int32_t x, int32_t y)
369 {
370 __ASM volatile("mula.s32.hs %0, %1, %2\n\t"
371 :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
372 return a;
373 }
374
375 /* SMMLS */
multSub_32x32_keep32(int32_t a,int32_t x,int32_t y)376 __STATIC_INLINE int32_t multSub_32x32_keep32(int32_t a, int32_t x, int32_t y)
377 {
378 __ASM volatile("muls.s32.hs %0, %1, %2\n\t"
379 :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
380 return a;
381 }
382
383 /* SMMUL */
mult_32x32_keep32(int32_t x,int32_t y)384 __STATIC_INLINE int32_t mult_32x32_keep32(int32_t x, int32_t y)
385 {
386 int32_t a;
387 __ASM volatile("mul.s32.h %0, %1, %2\n\t"
388 :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
389 return a;
390 }
391
multAcc_16x16_keep32(int32_t a,int16_t x,int16_t y)392 __STATIC_INLINE int32_t multAcc_16x16_keep32(int32_t a, int16_t x, int16_t y)
393 {
394 __ASM volatile("mulall.s16 %0, %1, %2\n\t"
395 :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
396 return a;
397 }
398
multAcc_16x16_keep64(int64_t a,int16_t x,int16_t y)399 __STATIC_INLINE int64_t multAcc_16x16_keep64(int64_t a, int16_t x, int16_t y)
400 {
401 __ASM volatile("mulall.s16.e %0, %1, %2\n\t"
402 :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
403 return a;
404 }
405
mult_32x32_keep64(int32_t x,int32_t y)406 __STATIC_INLINE int64_t mult_32x32_keep64(int32_t x, int32_t y)
407 {
408 int64_t a;
409 __ASM volatile("mul.s32 %0, %1, %2\n\t"
410 :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
411 return a;
412 }
413
multAcc_32x32_keep64(int64_t a,int32_t x,int32_t y)414 __STATIC_INLINE int64_t multAcc_32x32_keep64(int64_t a, int32_t x, int32_t y)
415 {
416 __ASM volatile("mula.s32 %0, %1, %2\n\t"
417 :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
418 return a;
419 }
420
mult_32x32_dext_31(int32_t x,int32_t y)421 __STATIC_INLINE int32_t mult_32x32_dext_31(int32_t x, int32_t y)
422 {
423 int64_t tmp1;
424 int32_t tmp2;
425 __ASM volatile("mul.s32 %0, %1, %2\n\t"
426 "dexti %3, %0, %R0, 31"
427 :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y));
428 return tmp2;
429 }
430
mult_32x32_dext_30(int32_t x,int32_t y)431 __STATIC_INLINE int32_t mult_32x32_dext_30(int32_t x, int32_t y)
432 {
433 int64_t tmp1;
434 int32_t tmp2;
435 __ASM volatile("mul.s32 %0, %1, %2\n\t"
436 "dexti %3, %0, %R0, 30"
437 :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y));
438 return tmp2;
439 }
440
mult_32x32_dext_4(int32_t x,int32_t y)441 __STATIC_INLINE int32_t mult_32x32_dext_4(int32_t x, int32_t y)
442 {
443 int64_t tmp1;
444 int32_t tmp2;
445 __ASM volatile("mul.s32 %0, %1, %2\n\t"
446 "dexti %3, %0, %R0, 4"
447 :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y));
448 return tmp2;
449 }
450
mult_32x32_dext_33(int32_t x,int32_t y)451 __STATIC_INLINE int32_t mult_32x32_dext_33(int32_t x, int32_t y)
452 {
453 int64_t tmp1;
454 int32_t tmp2;
455 __ASM volatile("mul.s32 %0, %1, %2\n\t"
456 "asri %3, %R0, 1"
457 :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y));
458 return tmp2;
459 }
460
dext_31(int64_t x)461 __STATIC_INLINE int32_t dext_31(int64_t x)
462 {
463 int32_t tmp1;
464 __ASM volatile(
465 "dexti %0, %1, %R1, 31"
466 :"=r" (tmp1), "=r" (x) : "1" (x));
467 return tmp1;
468 }
469
mult_l16xl16_keep32(int32_t x,int32_t y)470 __STATIC_INLINE int32_t mult_l16xl16_keep32(int32_t x, int32_t y)
471 {
472 int32_t a;
473 __ASM volatile("mulll.s16 %0, %1, %2\n\t"
474 :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
475 return a;
476 }
477
mult_h16xl16_keep32(int32_t x,int32_t y)478 __STATIC_INLINE int32_t mult_h16xl16_keep32(int32_t x, int32_t y)
479 {
480 int32_t a;
481 __ASM volatile("mulhl.s16 %0, %1, %2\n\t"
482 :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
483 return a;
484 }
485
mult_h16xh16_keep32(int32_t x,int32_t y)486 __STATIC_INLINE int32_t mult_h16xh16_keep32(int32_t x, int32_t y)
487 {
488 int32_t a;
489 __ASM volatile("mulhh.s16 %0, %1, %2\n\t"
490 :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
491 return a;
492 }
493
494 #endif
495
496 /**
497 * @brief Error status returned by some functions in the library.
498 */
499
500 typedef enum {
501 CSKY_MATH_SUCCESS = 0, /**< No error */
502 CSKY_MATH_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */
503 CSKY_MATH_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */
504 CSKY_MATH_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation. */
505 CSKY_MATH_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */
506 /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
507 CSKY_MATH_SINGULAR = -5,
508 CSKY_MATH_TEST_FAILURE = -6 /**< Test Failed */
509 } csky_status;
510
511 /**
512 * @brief 8-bit fractional data type in 1.7 format.
513 */
514 typedef int8_t q7_t;
515
516 /**
517 * @brief 16-bit fractional data type in 1.15 format.
518 */
519 typedef int16_t q15_t;
520
521 /**
522 * @brief 32-bit fractional data type in 1.31 format.
523 */
524 typedef int32_t q31_t;
525
526 /**
527 * @brief 64-bit fractional data type in 1.63 format.
528 */
529 typedef int64_t q63_t;
530
531 /**
532 * @brief 32-bit floating-point type definition.
533 */
534 typedef float float32_t;
535
536 /**
537 * @brief 64-bit floating-point type definition.
538 */
539 typedef double float64_t;
540
541 /**
542 * @brief 32-bit fractional complex data type in 1.31 format.
543 */
544 typedef struct {
545 q31_t re;
546 q31_t im;
547 } cq31_t;
548 /**
549 * @brief 16-bit fractional complex data type in 1.15 format.
550 */
551 typedef struct {
552 q15_t re;
553 q15_t im;
554 } cq15_t;
555 /**
556 * @brief definition to read/write two 16 bit values.
557 */
558 #define __SIMD32_TYPE int32_t
559 #define CSI_UNUSED __attribute__((unused))
560
561 #define __SIMD32(addr) (*(__SIMD32_TYPE **) & (addr))
562 #define __SIMD32_CONST(addr) ((__SIMD32_TYPE *)(addr))
563 #define _SIMD32_OFFSET(addr) (*(__SIMD32_TYPE *) (addr))
564 #define __SIMD64(addr) (*(int64_t **) & (addr))
565
566 #if defined (CSKY_MATH_NO_SIMD)
567 /**
568 * @brief definition to pack two 16 bit values.
569 */
570 #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \
571 (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000))
572 #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \
573 (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF))
574
575 #endif
576
577 /**
578 * @brief definition to pack four 8 bit values.
579 */
580 #ifndef CSKY_MATH_BIG_ENDIAN
581
582 #define __PACKq7(v0, v1, v2, v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \
583 (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \
584 (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
585 (((int32_t)(v3) << 24) & (int32_t)0xFF000000))
586 #else
587
588 #define __PACKq7(v0, v1, v2, v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \
589 (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \
590 (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
591 (((int32_t)(v0) << 24) & (int32_t)0xFF000000))
592
593 #endif
594
595 /**
596 * @brief Clips Q63 to Q31 values.
597 */
clip_q63_to_q31(q63_t x)598 static __INLINE q31_t clip_q63_to_q31(
599 q63_t x)
600 {
601 return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
602 ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
603 }
604
605 /**
606 * @brief Instance structure for the Q7 FIR filter.
607 */
608 typedef struct {
609 uint16_t numTaps; /**< number of filter coefficients in the filter. */
610 q7_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
611 q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
612 } csky_fir_instance_q7;
613
614 /**
615 * @brief Instance structure for the Q15 FIR filter.
616 */
617 typedef struct {
618 uint16_t numTaps; /**< number of filter coefficients in the filter. */
619 q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
620 q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
621 } csky_fir_instance_q15;
622
623 /**
624 * @brief Instance structure for the Q31 FIR filter.
625 */
626 typedef struct {
627 uint16_t numTaps; /**< number of filter coefficients in the filter. */
628 q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
629 q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
630 } csky_fir_instance_q31;
631
632 /**
633 * @brief Instance structure for the floating-point FIR filter.
634 */
635 typedef struct {
636 uint16_t numTaps; /**< number of filter coefficients in the filter. */
637 float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
638 float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
639 } csky_fir_instance_f32;
640
641 void csky_fir_q7(
642 const csky_fir_instance_q7 *S,
643 q7_t *pSrc,
644 q7_t *pDst,
645 uint32_t blockSize);
646
647 void csky_fir_init_q7(
648 csky_fir_instance_q7 *S,
649 uint16_t numTaps,
650 q7_t *pCoeffs,
651 q7_t *pState,
652 uint32_t blockSize);
653
654 void csky_fir_q15(
655 const csky_fir_instance_q15 *S,
656 q15_t *pSrc,
657 q15_t *pDst,
658 uint32_t blockSize);
659
660 void csky_fir_fast_q15(
661 const csky_fir_instance_q15 *S,
662 q15_t *pSrc,
663 q15_t *pDst,
664 uint32_t blockSize);
665
666 csky_status csky_fir_init_q15(
667 csky_fir_instance_q15 *S,
668 uint16_t numTaps,
669 q15_t *pCoeffs,
670 q15_t *pState,
671 uint32_t blockSize);
672
673 void csky_fir_q31(
674 const csky_fir_instance_q31 *S,
675 q31_t *pSrc,
676 q31_t *pDst,
677 uint32_t blockSize);
678
679 void csky_fir_fast_q31(
680 const csky_fir_instance_q31 *S,
681 q31_t *pSrc,
682 q31_t *pDst,
683 uint32_t blockSize);
684
685 void csky_fir_init_q31(
686 csky_fir_instance_q31 *S,
687 uint16_t numTaps,
688 q31_t *pCoeffs,
689 q31_t *pState,
690 uint32_t blockSize);
691
692 void csky_fir_f32(
693 const csky_fir_instance_f32 *S,
694 float32_t *pSrc,
695 float32_t *pDst,
696 uint32_t blockSize);
697
698 void csky_fir_init_f32(
699 csky_fir_instance_f32 *S,
700 uint16_t numTaps,
701 float32_t *pCoeffs,
702 float32_t *pState,
703 uint32_t blockSize);
704
705 /**
706 * @brief Instance structure for the Q15 Biquad cascade filter.
707 */
708 typedef struct {
709 int8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */
710 q15_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */
711 q15_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */
712 int8_t postShift; /**< Additional shift, in bits, applied to each output sample. */
713 } csky_biquad_casd_df1_inst_q15;
714
715 /**
716 * @brief Instance structure for the Q31 Biquad cascade filter.
717 */
718 typedef struct {
719 uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */
720 q31_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */
721 q31_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */
722 uint8_t postShift; /**< Additional shift, in bits, applied to each output sample. */
723 } csky_biquad_casd_df1_inst_q31;
724
725 /**
726 * @brief Instance structure for the Q31 Biquad cascade filter.
727 */
728
729 /**
730 * @brief Instance structure for the floating-point Biquad cascade filter.
731 */
732 typedef struct {
733 uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */
734 float32_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */
735 float32_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */
736 } csky_biquad_casd_df1_inst_f32;
737
738 void csky_biquad_cascade_df1_q15(
739 const csky_biquad_casd_df1_inst_q15 *S,
740 q15_t *pSrc,
741 q15_t *pDst,
742 uint32_t blockSize);
743
744 void csky_biquad_cascade_df1_init_q15(
745 csky_biquad_casd_df1_inst_q15 *S,
746 uint8_t numStages,
747 q15_t *pCoeffs,
748 q15_t *pState,
749 int8_t postShift);
750
751 void csky_biquad_cascade_df1_fast_q15(
752 const csky_biquad_casd_df1_inst_q15 *S,
753 q15_t *pSrc,
754 q15_t *pDst,
755 uint32_t blockSize);
756
757 void csky_biquad_cascade_df1_q31(
758 const csky_biquad_casd_df1_inst_q31 *S,
759 q31_t *pSrc,
760 q31_t *pDst,
761 uint32_t blockSize);
762
763 void csky_biquad_cascade_df1_fast_q31(
764 const csky_biquad_casd_df1_inst_q31 *S,
765 q31_t *pSrc,
766 q31_t *pDst,
767 uint32_t blockSize);
768
769 void csky_biquad_cascade_df1_init_q31(
770 csky_biquad_casd_df1_inst_q31 *S,
771 uint8_t numStages,
772 q31_t *pCoeffs,
773 q31_t *pState,
774 int8_t postShift);
775
776 void csky_biquad_cascade_df1_f32(
777 const csky_biquad_casd_df1_inst_f32 *S,
778 float32_t *pSrc,
779 float32_t *pDst,
780 uint32_t blockSize);
781
782 void csky_biquad_cascade_df1_init_f32(
783 csky_biquad_casd_df1_inst_f32 *S,
784 uint8_t numStages,
785 float32_t *pCoeffs,
786 float32_t *pState);
787
788 /**
789 * @brief Instance structure for the floating-point matrix structure.
790 */
791 typedef struct {
792 uint16_t numRows; /**< number of rows of the matrix. */
793 uint16_t numCols; /**< number of columns of the matrix. */
794 float32_t *pData; /**< points to the data of the matrix. */
795 } csky_matrix_instance_f32;
796
797 /**
798 * @brief Instance structure for the floating-point matrix structure.
799 */
800 typedef struct {
801 uint16_t numRows; /**< number of rows of the matrix. */
802 uint16_t numCols; /**< number of columns of the matrix. */
803 float64_t *pData; /**< points to the data of the matrix. */
804 } csky_matrix_instance_f64;
805
806 /**
807 * @brief Instance structure for the Q15 matrix structure.
808 */
809 typedef struct {
810 uint16_t numRows; /**< number of rows of the matrix. */
811 uint16_t numCols; /**< number of columns of the matrix. */
812 q15_t *pData; /**< points to the data of the matrix. */
813 } csky_matrix_instance_q15;
814
815 /**
816 * @brief Instance structure for the Q31 matrix structure.
817 */
818 typedef struct {
819 uint16_t numRows; /**< number of rows of the matrix. */
820 uint16_t numCols; /**< number of columns of the matrix. */
821 q31_t *pData; /**< points to the data of the matrix. */
822 } csky_matrix_instance_q31;
823
824 csky_status csky_mat_add_f32(
825 const csky_matrix_instance_f32 *pSrcA,
826 const csky_matrix_instance_f32 *pSrcB,
827 csky_matrix_instance_f32 *pDst);
828
829 csky_status csky_mat_add_q15(
830 const csky_matrix_instance_q15 *pSrcA,
831 const csky_matrix_instance_q15 *pSrcB,
832 csky_matrix_instance_q15 *pDst);
833
834 csky_status csky_mat_add_q31(
835 const csky_matrix_instance_q31 *pSrcA,
836 const csky_matrix_instance_q31 *pSrcB,
837 csky_matrix_instance_q31 *pDst);
838
839 csky_status csky_mat_cmplx_mult_f32(
840 const csky_matrix_instance_f32 *pSrcA,
841 const csky_matrix_instance_f32 *pSrcB,
842 csky_matrix_instance_f32 *pDst);
843
844 csky_status csky_mat_cmplx_mult_q15(
845 const csky_matrix_instance_q15 *pSrcA,
846 const csky_matrix_instance_q15 *pSrcB,
847 csky_matrix_instance_q15 *pDst,
848 q15_t *pScratch);
849
850 csky_status csky_mat_cmplx_mult_q31(
851 const csky_matrix_instance_q31 *pSrcA,
852 const csky_matrix_instance_q31 *pSrcB,
853 csky_matrix_instance_q31 *pDst);
854
855 csky_status csky_mat_trans_f32(
856 const csky_matrix_instance_f32 *pSrc,
857 csky_matrix_instance_f32 *pDst);
858
859 csky_status csky_mat_trans_q15(
860 const csky_matrix_instance_q15 *pSrc,
861 csky_matrix_instance_q15 *pDst);
862
863 csky_status csky_mat_trans_q31(
864 const csky_matrix_instance_q31 *pSrc,
865 csky_matrix_instance_q31 *pDst);
866
867 csky_status csky_mat_mult_f32(
868 const csky_matrix_instance_f32 *pSrcA,
869 const csky_matrix_instance_f32 *pSrcB,
870 csky_matrix_instance_f32 *pDst);
871
872 csky_status csky_mat_mult_q15(
873 const csky_matrix_instance_q15 *pSrcA,
874 const csky_matrix_instance_q15 *pSrcB,
875 csky_matrix_instance_q15 *pDst,
876 q15_t *pState);
877
878 csky_status csky_mat_mult_fast_q15(
879 const csky_matrix_instance_q15 *pSrcA,
880 const csky_matrix_instance_q15 *pSrcB,
881 csky_matrix_instance_q15 *pDst,
882 q15_t *pState);
883
884 csky_status csky_mat_mult_q31(
885 const csky_matrix_instance_q31 *pSrcA,
886 const csky_matrix_instance_q31 *pSrcB,
887 csky_matrix_instance_q31 *pDst);
888
889 csky_status csky_mat_mult_fast_q31(
890 const csky_matrix_instance_q31 *pSrcA,
891 const csky_matrix_instance_q31 *pSrcB,
892 csky_matrix_instance_q31 *pDst);
893
894 csky_status csky_mat_sub_f32(
895 const csky_matrix_instance_f32 *pSrcA,
896 const csky_matrix_instance_f32 *pSrcB,
897 csky_matrix_instance_f32 *pDst);
898
899 csky_status csky_mat_sub_q15(
900 const csky_matrix_instance_q15 *pSrcA,
901 const csky_matrix_instance_q15 *pSrcB,
902 csky_matrix_instance_q15 *pDst);
903
904 csky_status csky_mat_sub_q31(
905 const csky_matrix_instance_q31 *pSrcA,
906 const csky_matrix_instance_q31 *pSrcB,
907 csky_matrix_instance_q31 *pDst);
908
909 csky_status csky_mat_scale_f32(
910 const csky_matrix_instance_f32 *pSrc,
911 float32_t scale,
912 csky_matrix_instance_f32 *pDst);
913
914 csky_status csky_mat_scale_q15(
915 const csky_matrix_instance_q15 *pSrc,
916 q15_t scaleFract,
917 int32_t shift,
918 csky_matrix_instance_q15 *pDst);
919
920 csky_status csky_mat_scale_q31(
921 const csky_matrix_instance_q31 *pSrc,
922 q31_t scaleFract,
923 int32_t shift,
924 csky_matrix_instance_q31 *pDst);
925
926 void csky_mat_init_q31(
927 csky_matrix_instance_q31 *S,
928 uint16_t nRows,
929 uint16_t nColumns,
930 q31_t *pData);
931
932 void csky_mat_init_q15(
933 csky_matrix_instance_q15 *S,
934 uint16_t nRows,
935 uint16_t nColumns,
936 q15_t *pData);
937
938 void csky_mat_init_f32(
939 csky_matrix_instance_f32 *S,
940 uint16_t nRows,
941 uint16_t nColumns,
942 float32_t *pData);
943
944 /**
945 * @brief Instance structure for the Q15 PID Control.
946 */
947 typedef struct {
948 q15_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */
949 q15_t A1;
950 q15_t A2;
951 q15_t state[3]; /**< The state array of length 3. */
952 q15_t Kp; /**< The proportional gain. */
953 q15_t Ki; /**< The integral gain. */
954 q15_t Kd; /**< The derivative gain. */
955 } csky_pid_instance_q15;
956
957 /**
958 * @brief Instance structure for the Q31 PID Control.
959 */
960 typedef struct {
961 q31_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */
962 q31_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */
963 q31_t A2; /**< The derived gain, A2 = Kd . */
964 q31_t state[3]; /**< The state array of length 3. */
965 q31_t Kp; /**< The proportional gain. */
966 q31_t Ki; /**< The integral gain. */
967 q31_t Kd; /**< The derivative gain. */
968 } csky_pid_instance_q31;
969
970 /**
971 * @brief Instance structure for the floating-point PID Control.
972 */
973 typedef struct {
974 float32_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */
975 float32_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */
976 float32_t A2; /**< The derived gain, A2 = Kd . */
977 float32_t state[3]; /**< The state array of length 3. */
978 float32_t Kp; /**< The proportional gain. */
979 float32_t Ki; /**< The integral gain. */
980 float32_t Kd; /**< The derivative gain. */
981 } csky_pid_instance_f32;
982
983 void csky_pid_init_f32(
984 csky_pid_instance_f32 *S,
985 int32_t resetStateFlag);
986
987 void csky_pid_reset_f32(
988 csky_pid_instance_f32 *S);
989
990 void csky_pid_init_q31(
991 csky_pid_instance_q31 *S,
992 int32_t resetStateFlag);
993
994 void csky_pid_reset_q31(
995 csky_pid_instance_q31 *S);
996
997 void csky_pid_init_q15(
998 csky_pid_instance_q15 *S,
999 int32_t resetStateFlag);
1000
1001 void csky_pid_reset_q15(
1002 csky_pid_instance_q15 *S);
1003
1004 /**
1005 * @brief Instance structure for the floating-point Linear Interpolate function.
1006 */
1007 typedef struct {
1008 uint32_t nValues; /**< nValues */
1009 float32_t x1; /**< x1 */
1010 float32_t xSpacing; /**< xSpacing */
1011 float32_t *pYData; /**< pointer to the table of Y values */
1012 } csky_linear_interp_instance_f32;
1013
1014 /**
1015 * @brief Instance structure for the floating-point bilinear interpolation function.
1016 */
1017 typedef struct {
1018 uint16_t numRows; /**< number of rows in the data table. */
1019 uint16_t numCols; /**< number of columns in the data table. */
1020 float32_t *pData; /**< points to the data table. */
1021 } csky_bilinear_interp_instance_f32;
1022
1023 /**
1024 * @brief Instance structure for the Q31 bilinear interpolation function.
1025 */
1026 typedef struct {
1027 uint16_t numRows; /**< number of rows in the data table. */
1028 uint16_t numCols; /**< number of columns in the data table. */
1029 q31_t *pData; /**< points to the data table. */
1030 } csky_bilinear_interp_instance_q31;
1031
1032 /**
1033 * @brief Instance structure for the Q15 bilinear interpolation function.
1034 */
1035 typedef struct {
1036 uint16_t numRows; /**< number of rows in the data table. */
1037 uint16_t numCols; /**< number of columns in the data table. */
1038 q15_t *pData; /**< points to the data table. */
1039 } csky_bilinear_interp_instance_q15;
1040
1041 /**
1042 * @brief Instance structure for the Q15 bilinear interpolation function.
1043 */
1044 typedef struct {
1045 uint16_t numRows; /**< number of rows in the data table. */
1046 uint16_t numCols; /**< number of columns in the data table. */
1047 q7_t *pData; /**< points to the data table. */
1048 } csky_bilinear_interp_instance_q7;
1049
1050 void csky_mult_q7(
1051 q7_t *pSrcA,
1052 q7_t *pSrcB,
1053 q7_t *pDst,
1054 uint32_t blockSize);
1055
1056 void csky_mult_q15(
1057 q15_t *pSrcA,
1058 q15_t *pSrcB,
1059 q15_t *pDst,
1060 uint32_t blockSize);
1061
1062 void csky_mult_rnd_q15(
1063 q15_t *pSrcA,
1064 q15_t *pSrcB,
1065 q15_t *pDst,
1066 uint32_t blockSize);
1067
1068 void csky_mult_q31(
1069 q31_t *pSrcA,
1070 q31_t *pSrcB,
1071 q31_t *pDst,
1072 uint32_t blockSize);
1073
1074 void csky_mult_f32(
1075 float32_t *pSrcA,
1076 float32_t *pSrcB,
1077 float32_t *pDst,
1078 uint32_t blockSize);
1079
1080 /**
1081 * @brief Instance structure for the Q15 CFFT/CIFFT function.
1082 */
1083 typedef struct {
1084 uint16_t fftLen; /**< length of the FFT. */
1085 uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1086 /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1087 uint8_t bitReverseFlag;
1088 q15_t *pTwiddle; /**< points to the Sin twiddle factor table. */
1089 uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1090 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1091 uint16_t twidCoefModifier;
1092 /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1093 uint16_t bitRevFactor;
1094 } csky_cfft_radix2_instance_q15;
1095
1096 /**
1097 * @brief Instance structure for the Q15 CFFT/CIFFT function.
1098 */
1099 typedef struct {
1100 uint16_t fftLen; /**< length of the FFT. */
1101 uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1102 /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1103 uint8_t bitReverseFlag;
1104 q15_t *pTwiddle; /**< points to the twiddle factor table. */
1105 uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1106 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1107 uint16_t twidCoefModifier;
1108 /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1109 uint16_t bitRevFactor;
1110 } csky_cfft_radix4_instance_q15;
1111
1112 /**
1113 * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
1114 */
1115 typedef struct {
1116 uint16_t fftLen; /**< length of the FFT. */
1117 uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1118 /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1119 uint8_t bitReverseFlag;
1120 q31_t *pTwiddle; /**< points to the Twiddle factor table. */
1121 uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1122 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1123 uint16_t twidCoefModifier;
1124 /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1125 uint16_t bitRevFactor;
1126 } csky_cfft_radix2_instance_q31;
1127
1128 /**
1129 * @brief Instance structure for the Q31 CFFT/CIFFT function.
1130 */
1131 typedef struct {
1132 uint16_t fftLen; /**< length of the FFT. */
1133 uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1134 /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1135 uint8_t bitReverseFlag;
1136 q31_t *pTwiddle; /**< points to the twiddle factor table. */
1137 uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1138 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1139 uint16_t twidCoefModifier;
1140 /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1141 uint16_t bitRevFactor;
1142 } csky_cfft_radix4_instance_q31;
1143
1144 /**
1145 * @brief Instance structure for the floating-point CFFT/CIFFT function.
1146 */
1147 typedef struct {
1148 uint16_t fftLen; /**< length of the FFT. */
1149 uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1150 /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1151 uint8_t bitReverseFlag;
1152 float32_t *pTwiddle; /**< points to the Twiddle factor table. */
1153 uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1154 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1155 uint16_t twidCoefModifier;
1156 /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1157 uint16_t bitRevFactor;
1158 float32_t onebyfftLen; /**< value of 1/fftLen. */
1159 } csky_cfft_radix2_instance_f32;
1160
1161 /**
1162 * @brief Instance structure for the floating-point CFFT/CIFFT function.
1163 */
1164 typedef struct {
1165 uint16_t fftLen; /**< length of the FFT. */
1166 uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1167 /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1168 uint8_t bitReverseFlag;
1169 float32_t *pTwiddle; /**< points to the Twiddle factor table. */
1170 uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1171 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1172 uint16_t twidCoefModifier;
1173 /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1174 uint16_t bitRevFactor;
1175 float32_t onebyfftLen; /**< value of 1/fftLen. */
1176 } csky_cfft_radix4_instance_f32;
1177
1178 /**
1179 * @brief Instance structure for the fixed-point CFFT/CIFFT function.
1180 */
1181 typedef struct {
1182 uint16_t fftLen; /**< length of the FFT. */
1183 const q15_t *pTwiddle; /**< points to the Twiddle factor table. */
1184 const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1185 uint16_t bitRevLength; /**< bit reversal table length. */
1186 } csky_cfft_instance_q15;
1187
1188 void csky_cfft_q15(
1189 const csky_cfft_instance_q15 *S,
1190 q15_t *p1,
1191 uint8_t ifftFlag,
1192 uint8_t bitReverseFlag);
1193
1194 /**
1195 * @brief Instance structure for the fixed-point CFFT/CIFFT function.
1196 */
1197 typedef struct {
1198 uint16_t fftLen; /**< length of the FFT. */
1199 const q31_t *pTwiddle; /**< points to the Twiddle factor table. */
1200 const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1201 uint16_t bitRevLength; /**< bit reversal table length. */
1202 } csky_cfft_instance_q31;
1203
1204 void csky_cfft_q31(
1205 const csky_cfft_instance_q31 *S,
1206 q31_t *p1,
1207 uint8_t ifftFlag,
1208 uint8_t bitReverseFlag);
1209
1210 /**
1211 * @brief Instance structure for the floating-point CFFT/CIFFT function.
1212 */
1213 typedef struct {
1214 uint16_t fftLen; /**< length of the FFT. */
1215 const float32_t *pTwiddle; /**< points to the Twiddle factor table. */
1216 const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
1217 uint16_t bitRevLength; /**< bit reversal table length. */
1218 } csky_cfft_instance_f32;
1219
1220 void csky_cfft_f32(
1221 const csky_cfft_instance_f32 *S,
1222 float32_t *p1,
1223 uint8_t ifftFlag,
1224 uint8_t bitReverseFlag);
1225
1226 /**
1227 * @brief Instance structure for the Q15 RFFT/RIFFT function.
1228 */
1229 typedef struct {
1230 uint32_t fftLenReal; /**< length of the real FFT. */
1231 uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
1232 /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
1233 uint8_t bitReverseFlagR;
1234 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1235 uint32_t twidCoefRModifier;
1236 q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */
1237 const csky_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */
1238 } csky_rfft_instance_q15;
1239
1240 csky_status csky_rfft_init_q15(
1241 csky_rfft_instance_q15 *S,
1242 uint32_t fftLenReal,
1243 uint32_t ifftFlagR,
1244 uint32_t bitReverseFlag);
1245
1246 void csky_rfft_q15(
1247 const csky_rfft_instance_q15 *S,
1248 q15_t *pSrc,
1249 q15_t *pDst);
1250
1251 /**
1252 * @brief Instance structure for the Q31 RFFT/RIFFT function.
1253 */
1254 typedef struct {
1255 uint32_t fftLenReal; /**< length of the real FFT. */
1256 uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
1257 /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
1258 uint8_t bitReverseFlagR;
1259 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1260 uint32_t twidCoefRModifier;
1261 q31_t *pTwiddleAReal; /**< points to the real twiddle factor table. */
1262 const csky_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */
1263 } csky_rfft_instance_q31;
1264
1265 csky_status csky_rfft_init_q31(
1266 csky_rfft_instance_q31 *S,
1267 uint32_t fftLenReal,
1268 uint32_t ifftFlagR,
1269 uint32_t bitReverseFlag);
1270
1271 void csky_rfft_q31(
1272 const csky_rfft_instance_q31 *S,
1273 q31_t *pSrc,
1274 q31_t *pDst);
1275
1276 /**
1277 * @brief Instance structure for the floating-point RFFT/RIFFT function.
1278 */
1279 typedef struct {
1280 uint32_t fftLenReal; /**< length of the real FFT. */
1281 uint16_t fftLenBy2; /**< length of the complex FFT. */
1282 /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
1283 uint8_t ifftFlagR;
1284 /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
1285 uint8_t bitReverseFlagR;
1286 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1287 uint32_t twidCoefRModifier;
1288 float32_t *pTwiddleAReal; /**< points to the real twiddle factor table. */
1289 float32_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */
1290 csky_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
1291 } csky_rfft_instance_f32;
1292
1293 csky_status csky_rfft_init_f32(
1294 csky_rfft_instance_f32 *S,
1295 csky_cfft_radix4_instance_f32 *S_CFFT,
1296 uint32_t fftLenReal,
1297 uint32_t ifftFlagR,
1298 uint32_t bitReverseFlag);
1299
1300 void csky_rfft_f32(
1301 const csky_rfft_instance_f32 *S,
1302 float32_t *pSrc,
1303 float32_t *pDst);
1304
1305 /**
1306 * @brief Instance structure for the floating-point RFFT/RIFFT function.
1307 */
1308 typedef struct {
1309 csky_cfft_instance_f32 Sint; /**< Internal CFFT structure. */
1310 uint16_t fftLenRFFT; /**< length of the real sequence */
1311 float32_t *pTwiddleRFFT; /**< Twiddle factors real stage */
1312 } csky_rfft_fast_instance_f32 ;
1313
1314 csky_status csky_rfft_fast_init_f32 (
1315 csky_rfft_fast_instance_f32 *S,
1316 uint16_t fftLen);
1317
1318 void csky_rfft_fast_f32(
1319 csky_rfft_fast_instance_f32 *S,
1320 float32_t *p, float32_t *pOut,
1321 uint8_t ifftFlag);
1322
1323 /**
1324 * @brief Instance structure for the floating-point DCT4/IDCT4 function.
1325 */
1326 typedef struct {
1327 uint16_t N; /**< length of the DCT4. */
1328 uint16_t Nby2; /**< half of the length of the DCT4. */
1329 float32_t normalize; /**< normalizing factor. */
1330 float32_t *pTwiddle; /**< points to the twiddle factor table. */
1331 float32_t *pCosFactor; /**< points to the cosFactor table. */
1332 csky_rfft_fast_instance_f32 *pRfft; /**< points to the real FFT fast instance. */
1333 csky_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
1334 } csky_dct4_instance_f32;
1335
1336 csky_status csky_dct4_init_f32(
1337 csky_dct4_instance_f32 *S,
1338 csky_rfft_fast_instance_f32 *S_RFFT,
1339 csky_cfft_radix4_instance_f32 *S_CFFT,
1340 uint16_t N,
1341 uint16_t Nby2,
1342 float32_t normalize);
1343
1344 void csky_dct4_f32(
1345 const csky_dct4_instance_f32 *S,
1346 float32_t *pState,
1347 float32_t *pInlineBuffer);
1348
1349 /**
1350 * @brief Instance structure for the Q31 DCT4/IDCT4 function.
1351 */
1352 typedef struct {
1353 uint16_t N; /**< length of the DCT4. */
1354 uint16_t Nby2; /**< half of the length of the DCT4. */
1355 q31_t normalize; /**< normalizing factor. */
1356 q31_t *pTwiddle; /**< points to the twiddle factor table. */
1357 q31_t *pCosFactor; /**< points to the cosFactor table. */
1358 csky_rfft_instance_q31 *pRfft; /**< points to the real FFT instance. */
1359 csky_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
1360 } csky_dct4_instance_q31;
1361
1362 csky_status csky_dct4_init_q31(
1363 csky_dct4_instance_q31 *S,
1364 csky_rfft_instance_q31 *S_RFFT,
1365 csky_cfft_radix4_instance_q31 *S_CFFT,
1366 uint16_t N,
1367 uint16_t Nby2,
1368 q31_t normalize);
1369
1370 void csky_dct4_q31(
1371 const csky_dct4_instance_q31 *S,
1372 q31_t *pState,
1373 q31_t *pInlineBuffer);
1374
1375 /**
1376 * @brief Instance structure for the Q15 DCT4/IDCT4 function.
1377 */
1378 typedef struct {
1379 uint16_t N; /**< length of the DCT4. */
1380 uint16_t Nby2; /**< half of the length of the DCT4. */
1381 q15_t normalize; /**< normalizing factor. */
1382 q15_t *pTwiddle; /**< points to the twiddle factor table. */
1383 q15_t *pCosFactor; /**< points to the cosFactor table. */
1384 csky_rfft_instance_q15 *pRfft; /**< points to the real FFT instance. */
1385 csky_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
1386 } csky_dct4_instance_q15;
1387
1388 csky_status csky_dct4_init_q15(
1389 csky_dct4_instance_q15 *S,
1390 csky_rfft_instance_q15 *S_RFFT,
1391 csky_cfft_radix4_instance_q15 *S_CFFT,
1392 uint16_t N,
1393 uint16_t Nby2,
1394 q15_t normalize);
1395
1396 void csky_dct4_q15(
1397 const csky_dct4_instance_q15 *S,
1398 q15_t *pState,
1399 q15_t *pInlineBuffer);
1400
1401 void csky_add_f32(
1402 float32_t *pSrcA,
1403 float32_t *pSrcB,
1404 float32_t *pDst,
1405 uint32_t blockSize);
1406
1407 void csky_add_q7(
1408 q7_t *pSrcA,
1409 q7_t *pSrcB,
1410 q7_t *pDst,
1411 uint32_t blockSize);
1412
1413 void csky_add_q15(
1414 q15_t *pSrcA,
1415 q15_t *pSrcB,
1416 q15_t *pDst,
1417 uint32_t blockSize);
1418
1419 void csky_add_q31(
1420 q31_t *pSrcA,
1421 q31_t *pSrcB,
1422 q31_t *pDst,
1423 uint32_t blockSize);
1424
1425 void csky_sub_f32(
1426 float32_t *pSrcA,
1427 float32_t *pSrcB,
1428 float32_t *pDst,
1429 uint32_t blockSize);
1430
1431 void csky_sub_q7(
1432 q7_t *pSrcA,
1433 q7_t *pSrcB,
1434 q7_t *pDst,
1435 uint32_t blockSize);
1436
1437 void csky_sub_q15(
1438 q15_t *pSrcA,
1439 q15_t *pSrcB,
1440 q15_t *pDst,
1441 uint32_t blockSize);
1442
1443 void csky_sub_q31(
1444 q31_t *pSrcA,
1445 q31_t *pSrcB,
1446 q31_t *pDst,
1447 uint32_t blockSize);
1448
1449 void csky_scale_f32(
1450 float32_t *pSrc,
1451 float32_t scale,
1452 float32_t *pDst,
1453 uint32_t blockSize);
1454
1455 void csky_scale_q7(
1456 q7_t *pSrc,
1457 q7_t scaleFract,
1458 int8_t shift,
1459 q7_t *pDst,
1460 uint32_t blockSize);
1461
1462 void csky_scale_q15(
1463 q15_t *pSrc,
1464 q15_t scaleFract,
1465 int8_t shift,
1466 q15_t *pDst,
1467 uint32_t blockSize);
1468
1469 void csky_scale_q31(
1470 q31_t *pSrc,
1471 q31_t scaleFract,
1472 int8_t shift,
1473 q31_t *pDst,
1474 uint32_t blockSize);
1475
1476 void csky_abs_q7(
1477 q7_t *pSrc,
1478 q7_t *pDst,
1479 uint32_t blockSize);
1480
1481 void csky_abs_f32(
1482 float32_t *pSrc,
1483 float32_t *pDst,
1484 uint32_t blockSize);
1485
1486 void csky_abs_q15(
1487 q15_t *pSrc,
1488 q15_t *pDst,
1489 uint32_t blockSize);
1490
1491 void csky_abs_q31(
1492 q31_t *pSrc,
1493 q31_t *pDst,
1494 uint32_t blockSize);
1495
1496 void csky_abs_max_q15(
1497 q15_t *pSrc,
1498 q15_t *pDst,
1499 uint32_t blockSize);
1500
1501 void csky_abs_max_q31(
1502 q31_t *pSrc,
1503 q31_t *pDst,
1504 uint32_t blockSize);
1505
1506 void csky_dot_prod_f32(
1507 float32_t *pSrcA,
1508 float32_t *pSrcB,
1509 uint32_t blockSize,
1510 float32_t *result);
1511
1512 void csky_dot_prod_q7(
1513 q7_t *pSrcA,
1514 q7_t *pSrcB,
1515 uint32_t blockSize,
1516 q31_t *result);
1517
1518 void csky_dot_prod_q15(
1519 q15_t *pSrcA,
1520 q15_t *pSrcB,
1521 uint32_t blockSize,
1522 q63_t *result);
1523
1524 void csky_dot_prod_q31(
1525 q31_t *pSrcA,
1526 q31_t *pSrcB,
1527 uint32_t blockSize,
1528 q63_t *result);
1529
1530 void csky_shift_q7(
1531 q7_t *pSrc,
1532 int8_t shiftBits,
1533 q7_t *pDst,
1534 uint32_t blockSize);
1535
1536 void csky_shift_q15(
1537 q15_t *pSrc,
1538 int8_t shiftBits,
1539 q15_t *pDst,
1540 uint32_t blockSize);
1541
1542 void csky_shift_q31(
1543 q31_t *pSrc,
1544 int8_t shiftBits,
1545 q31_t *pDst,
1546 uint32_t blockSize);
1547
1548 void csky_offset_f32(
1549 float32_t *pSrc,
1550 float32_t offset,
1551 float32_t *pDst,
1552 uint32_t blockSize);
1553
1554 void csky_offset_q7(
1555 q7_t *pSrc,
1556 q7_t offset,
1557 q7_t *pDst,
1558 uint32_t blockSize);
1559
1560 void csky_offset_q15(
1561 q15_t *pSrc,
1562 q15_t offset,
1563 q15_t *pDst,
1564 uint32_t blockSize);
1565
1566 void csky_offset_q31(
1567 q31_t *pSrc,
1568 q31_t offset,
1569 q31_t *pDst,
1570 uint32_t blockSize);
1571
1572 void csky_negate_f32(
1573 float32_t *pSrc,
1574 float32_t *pDst,
1575 uint32_t blockSize);
1576
1577 void csky_negate_q7(
1578 q7_t *pSrc,
1579 q7_t *pDst,
1580 uint32_t blockSize);
1581
1582 void csky_negate_q15(
1583 q15_t *pSrc,
1584 q15_t *pDst,
1585 uint32_t blockSize);
1586
1587 void csky_negate_q31(
1588 q31_t *pSrc,
1589 q31_t *pDst,
1590 uint32_t blockSize);
1591
1592 void csky_copy_f32(
1593 float32_t *pSrc,
1594 float32_t *pDst,
1595 uint32_t blockSize);
1596
1597 void csky_copy_q7(
1598 q7_t *pSrc,
1599 q7_t *pDst,
1600 uint32_t blockSize);
1601
1602 void csky_copy_q15(
1603 q15_t *pSrc,
1604 q15_t *pDst,
1605 uint32_t blockSize);
1606
1607 void csky_copy_q31(
1608 q31_t *pSrc,
1609 q31_t *pDst,
1610 uint32_t blockSize);
1611
1612 void csky_fill_f32(
1613 float32_t value,
1614 float32_t *pDst,
1615 uint32_t blockSize);
1616
1617 void csky_fill_q7(
1618 q7_t value,
1619 q7_t *pDst,
1620 uint32_t blockSize);
1621
1622 void csky_fill_q15(
1623 q15_t value,
1624 q15_t *pDst,
1625 uint32_t blockSize);
1626
1627 void csky_fill_q31(
1628 q31_t value,
1629 q31_t *pDst,
1630 uint32_t blockSize);
1631
1632 void csky_conv_f32(
1633 float32_t *pSrcA,
1634 uint32_t srcALen,
1635 float32_t *pSrcB,
1636 uint32_t srcBLen,
1637 float32_t *pDst);
1638
1639 void csky_conv_opt_q15(
1640 q15_t *pSrcA,
1641 uint32_t srcALen,
1642 q15_t *pSrcB,
1643 uint32_t srcBLen,
1644 q15_t *pDst,
1645 q15_t *pScratch1,
1646 q15_t *pScratch2);
1647
1648 void csky_conv_q15(
1649 q15_t *pSrcA,
1650 uint32_t srcALen,
1651 q15_t *pSrcB,
1652 uint32_t srcBLen,
1653 q15_t *pDst);
1654
1655 void csky_conv_fast_q15(
1656 q15_t *pSrcA,
1657 uint32_t srcALen,
1658 q15_t *pSrcB,
1659 uint32_t srcBLen,
1660 q15_t *pDst);
1661
1662 void csky_conv_fast_opt_q15(
1663 q15_t *pSrcA,
1664 uint32_t srcALen,
1665 q15_t *pSrcB,
1666 uint32_t srcBLen,
1667 q15_t *pDst,
1668 q15_t *pScratch1,
1669 q15_t *pScratch2);
1670
1671 void csky_conv_q31(
1672 q31_t *pSrcA,
1673 uint32_t srcALen,
1674 q31_t *pSrcB,
1675 uint32_t srcBLen,
1676 q31_t *pDst);
1677
1678 void csky_conv_fast_q31(
1679 q31_t *pSrcA,
1680 uint32_t srcALen,
1681 q31_t *pSrcB,
1682 uint32_t srcBLen,
1683 q31_t *pDst);
1684
1685 void csky_conv_opt_q7(
1686 q7_t *pSrcA,
1687 uint32_t srcALen,
1688 q7_t *pSrcB,
1689 uint32_t srcBLen,
1690 q7_t *pDst,
1691 q15_t *pScratch1,
1692 q15_t *pScratch2);
1693
1694 void csky_conv_q7(
1695 q7_t *pSrcA,
1696 uint32_t srcALen,
1697 q7_t *pSrcB,
1698 uint32_t srcBLen,
1699 q7_t *pDst);
1700
1701 csky_status csky_conv_partial_f32(
1702 float32_t *pSrcA,
1703 uint32_t srcALen,
1704 float32_t *pSrcB,
1705 uint32_t srcBLen,
1706 float32_t *pDst,
1707 uint32_t firstIndex,
1708 uint32_t numPoints);
1709
1710 csky_status csky_conv_partial_opt_q15(
1711 q15_t *pSrcA,
1712 uint32_t srcALen,
1713 q15_t *pSrcB,
1714 uint32_t srcBLen,
1715 q15_t *pDst,
1716 uint32_t firstIndex,
1717 uint32_t numPoints,
1718 q15_t *pScratch1,
1719 q15_t *pScratch2);
1720
1721 csky_status csky_conv_partial_q15(
1722 q15_t *pSrcA,
1723 uint32_t srcALen,
1724 q15_t *pSrcB,
1725 uint32_t srcBLen,
1726 q15_t *pDst,
1727 uint32_t firstIndex,
1728 uint32_t numPoints);
1729
1730 csky_status csky_conv_partial_fast_q15(
1731 q15_t *pSrcA,
1732 uint32_t srcALen,
1733 q15_t *pSrcB,
1734 uint32_t srcBLen,
1735 q15_t *pDst,
1736 uint32_t firstIndex,
1737 uint32_t numPoints);
1738
1739 csky_status csky_conv_partial_fast_opt_q15(
1740 q15_t *pSrcA,
1741 uint32_t srcALen,
1742 q15_t *pSrcB,
1743 uint32_t srcBLen,
1744 q15_t *pDst,
1745 uint32_t firstIndex,
1746 uint32_t numPoints,
1747 q15_t *pScratch1,
1748 q15_t *pScratch2);
1749
1750 csky_status csky_conv_partial_q31(
1751 q31_t *pSrcA,
1752 uint32_t srcALen,
1753 q31_t *pSrcB,
1754 uint32_t srcBLen,
1755 q31_t *pDst,
1756 uint32_t firstIndex,
1757 uint32_t numPoints);
1758
1759 csky_status csky_conv_partial_fast_q31(
1760 q31_t *pSrcA,
1761 uint32_t srcALen,
1762 q31_t *pSrcB,
1763 uint32_t srcBLen,
1764 q31_t *pDst,
1765 uint32_t firstIndex,
1766 uint32_t numPoints);
1767
1768 csky_status csky_conv_partial_opt_q7(
1769 q7_t *pSrcA,
1770 uint32_t srcALen,
1771 q7_t *pSrcB,
1772 uint32_t srcBLen,
1773 q7_t *pDst,
1774 uint32_t firstIndex,
1775 uint32_t numPoints,
1776 q15_t *pScratch1,
1777 q15_t *pScratch2);
1778
1779 csky_status csky_conv_partial_q7(
1780 q7_t *pSrcA,
1781 uint32_t srcALen,
1782 q7_t *pSrcB,
1783 uint32_t srcBLen,
1784 q7_t *pDst,
1785 uint32_t firstIndex,
1786 uint32_t numPoints);
1787
1788 /**
1789 * functions for the yunVoice functions.
1790 */
1791 q15_t csky_dsp_lib_vec_max_abs16(
1792 q15_t *A,
1793 uint32_t N);
1794
1795 q31_t csky_dsp_lib_vec_max_abs32(
1796 q31_t *A,
1797 uint32_t N);
1798
1799 void csky_dsp_lib_vec_abs16(
1800 q15_t *A,
1801 uint32_t N,
1802 q15_t *C);
1803
1804 void csky_dsp_lib_vec_abs32(
1805 q31_t *A,
1806 uint32_t N,
1807 q31_t *C);
1808
1809 void csky_dsp_lib_vec_add16(
1810 q15_t *A,
1811 q15_t *B,
1812 uint32_t N,
1813 q15_t *C);
1814
1815 void csky_dsp_lib_vec_add32(
1816 q31_t *A,
1817 q31_t *B,
1818 uint32_t N,
1819 q31_t *C);
1820
1821 void csky_dsp_lib_vec_cx_conj_q15(
1822 q15_t *A,
1823 uint32_t N,
1824 q15_t *B);
1825
1826 void csky_dsp_lib_vec_cx_conj_q31(
1827 q31_t *A,
1828 uint32_t N,
1829 q31_t *C);
1830
1831 q31_t csky_dsp_lib_vec_dot_q15(
1832 q15_t *A,
1833 q15_t *B,
1834 uint32_t N);
1835
1836 q31_t csky_dsp_lib_vec_dot_q31(
1837 q31_t *A,
1838 q31_t *B,
1839 uint32_t N);
1840
1841 void csky_dsp_lib_mat_cx_add16(
1842 cq15_t *A,
1843 cq15_t *B,
1844 uint32_t N,
1845 uint32_t M,
1846 cq15_t *C);
1847
1848 void csky_dsp_lib_mat_cx_add32(
1849 cq31_t *A,
1850 cq31_t *B,
1851 uint32_t N,
1852 uint32_t M,
1853 cq31_t *C);
1854
1855 void csky_dsp_lib_mat_cx_mul_q15(
1856 cq15_t *A,
1857 cq15_t *B,
1858 uint32_t N,
1859 uint32_t M,
1860 uint32_t L,
1861 cq15_t *C);
1862
1863 void csky_dsp_lib_mat_cx_mul_q31(
1864 cq31_t *A,
1865 cq31_t *B,
1866 uint32_t N,
1867 uint32_t M,
1868 uint32_t L,
1869 cq31_t *C);
1870
1871 void csky_dsp_lib_mat_cx_sub16(
1872 cq15_t *A,
1873 cq15_t *B,
1874 uint32_t N,
1875 uint32_t M,
1876 cq15_t *C);
1877
1878 void csky_dsp_lib_mat_cx_sub32(
1879 cq31_t *A,
1880 cq31_t *B,
1881 uint32_t N,
1882 uint32_t M,
1883 cq31_t *C);
1884
1885 void csky_dsp_lib_vec_mul_q15(
1886 q15_t *A,
1887 q15_t *B,
1888 uint32_t N,
1889 q15_t *C);
1890
1891 void csky_dsp_lib_vec_mul_q31(
1892 q31_t *A,
1893 q31_t *B,
1894 uint32_t N,
1895 q31_t *C);
1896
1897 q31_t csky_dsp_lib_pow_int32(
1898 q31_t arg_in_x,
1899 q15_t arg_exp_in_x,
1900 q31_t arg_in_y,
1901 q15_t arg_exp_in_y,
1902 q31_t *arg_exp_out);
1903
1904 void csky_dsp_lib_vec_scale_q15(
1905 q15_t *A,
1906 q15_t scaleFract,
1907 int8_t shift,
1908 q15_t *B,
1909 uint32_t N);
1910
1911 void csky_dsp_lib_vec_scale_q31(
1912 q31_t *A,
1913 q31_t scaleFract,
1914 int8_t shift,
1915 q31_t *B,
1916 uint32_t N);
1917
1918 void csky_dsp_lib_vec_shf16(
1919 q15_t *A,
1920 int8_t shift_val,
1921 uint32_t N,
1922 q15_t *C);
1923
1924 void csky_dsp_lib_vec_shf32(
1925 q31_t *A,
1926 q31_t shift_val,
1927 uint32_t N,
1928 q31_t *C);
1929
1930 q15_t csky_dsp_lib_sqrt_int32(
1931 q31_t x,
1932 uint32_t rnd_flag);
1933
1934 void csky_dsp_lib_vec_sub16(
1935 q15_t *A,
1936 q15_t *B,
1937 uint32_t N,
1938 q15_t *C);
1939
1940 void csky_dsp_lib_vec_sub32(
1941 q31_t *A,
1942 q31_t *B,
1943 uint32_t N,
1944 q31_t *C);
1945
1946 q63_t csky_dsp_lib_vec_sum16(
1947 q15_t *A,
1948 uint32_t N);
1949
1950 q63_t csky_dsp_lib_vec_sum32(
1951 q31_t *A,
1952 uint32_t N);
1953
1954 void csky_fft_lib_cx16_fft(
1955 q31_t log2_buf_len,
1956 q15_t *in_buf,
1957 q15_t *out_buf,
1958 const q15_t *twi_table,
1959 const uint16_t *bitrev_tbl,
1960 q15_t *temp_buf,
1961 q7_t *ScaleShift,
1962 q31_t br);
1963
1964 void csky_fft_lib_cx32_fft(
1965 q31_t log2_buf_len,
1966 q31_t *in_buf,
1967 q31_t *out_buf,
1968 const q31_t *twi_table,
1969 const uint16_t *bitrev_tbl,
1970 q31_t *temp_buf,
1971 q31_t br);
1972
1973 void csky_fft_lib_cx16_ifft(
1974 q31_t log2_buf_len,
1975 q15_t *in_buf,
1976 q15_t *out_buf,
1977 const q15_t *twi_table,
1978 const uint16_t *bitrev_tbl,
1979 q15_t *temp_buf,
1980 q7_t *ScaleShift,
1981 q31_t br);
1982
1983 void csky_fft_lib_cx32_ifft(
1984 q31_t log2_buf_len,
1985 q31_t *in_buf,
1986 q31_t *out_buf,
1987 const q31_t *twi_table,
1988 const uint16_t *bitrev_tbl,
1989 q31_t *temp_buf,
1990 q31_t br);
1991
1992 void csky_fft_lib_int16_fft(
1993 q31_t log2_buf_len,
1994 q15_t *in_buf,
1995 q15_t *out_buf,
1996 const q15_t *twi_table,
1997 const q15_t *last_stage_twi_table,
1998 const uint16_t *bitrev_tbl,
1999 q15_t *temp_buf,
2000 q7_t *ScaleShift,
2001 q31_t br);
2002
2003 void csky_fft_lib_int32_fft(
2004 q31_t log2_buf_len,
2005 q31_t *in_buf,
2006 q31_t *out_buf,
2007 const q31_t *twi_table,
2008 const q31_t *last_stage_twi_table,
2009 const uint16_t *bitrev_tbl,
2010 q31_t *temp_buf,
2011 q31_t br);
2012
2013 void csky_fft_lib_int16_ifft(
2014 q31_t log2_buf_len,
2015 q15_t *in_buf,
2016 q15_t *out_buf,
2017 const q15_t *twi_table,
2018 const q15_t *last_stage_twi_table,
2019 const uint16_t *bitrev_tbl,
2020 q15_t *temp_buf,
2021 q7_t *ScaleShift,
2022 q31_t br);
2023
2024 void csky_fft_lib_int32_ifft(
2025 q31_t log2_buf_len,
2026 q31_t *in_buf,
2027 q31_t *out_buf,
2028 const q31_t *twi_table,
2029 const q31_t *last_stage_twi_table,
2030 const uint16_t *bitrev_tbl,
2031 q31_t *temp_buf,
2032 q31_t br);
2033
2034 /**
2035 * @brief Instance structure for the Q15 FIR decimator.
2036 */
2037 typedef struct {
2038 uint8_t M; /**< decimation factor. */
2039 uint16_t numTaps; /**< number of coefficients in the filter. */
2040 q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
2041 q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2042 } csky_fir_decimate_instance_q15;
2043
2044 /**
2045 * @brief Instance structure for the Q31 FIR decimator.
2046 */
2047 typedef struct {
2048 uint8_t M; /**< decimation factor. */
2049 uint16_t numTaps; /**< number of coefficients in the filter. */
2050 q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
2051 q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2052 } csky_fir_decimate_instance_q31;
2053
2054 /**
2055 * @brief Instance structure for the floating-point FIR decimator.
2056 */
2057 typedef struct {
2058 uint8_t M; /**< decimation factor. */
2059 uint16_t numTaps; /**< number of coefficients in the filter. */
2060 float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
2061 float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2062 } csky_fir_decimate_instance_f32;
2063
2064 void csky_fir_decimate_f32(
2065 const csky_fir_decimate_instance_f32 *S,
2066 float32_t *pSrc,
2067 float32_t *pDst,
2068 uint32_t blockSize);
2069
2070 csky_status csky_fir_decimate_init_f32(
2071 csky_fir_decimate_instance_f32 *S,
2072 uint16_t numTaps,
2073 uint8_t M,
2074 float32_t *pCoeffs,
2075 float32_t *pState,
2076 uint32_t blockSize);
2077
2078 void csky_fir_decimate_q15(
2079 const csky_fir_decimate_instance_q15 *S,
2080 q15_t *pSrc,
2081 q15_t *pDst,
2082 uint32_t blockSize);
2083
2084 void csky_fir_decimate_fast_q15(
2085 const csky_fir_decimate_instance_q15 *S,
2086 q15_t *pSrc,
2087 q15_t *pDst,
2088 uint32_t blockSize);
2089
2090 csky_status csky_fir_decimate_init_q15(
2091 csky_fir_decimate_instance_q15 *S,
2092 uint16_t numTaps,
2093 uint8_t M,
2094 q15_t *pCoeffs,
2095 q15_t *pState,
2096 uint32_t blockSize);
2097
2098 void csky_fir_decimate_q31(
2099 const csky_fir_decimate_instance_q31 *S,
2100 q31_t *pSrc,
2101 q31_t *pDst,
2102 uint32_t blockSize);
2103
2104 void csky_fir_decimate_fast_q31(
2105 csky_fir_decimate_instance_q31 *S,
2106 q31_t *pSrc,
2107 q31_t *pDst,
2108 uint32_t blockSize);
2109
2110 csky_status csky_fir_decimate_init_q31(
2111 csky_fir_decimate_instance_q31 *S,
2112 uint16_t numTaps,
2113 uint8_t M,
2114 q31_t *pCoeffs,
2115 q31_t *pState,
2116 uint32_t blockSize);
2117
2118 /**
2119 * @brief Instance structure for the Q15 FIR interpolator.
2120 */
2121 typedef struct {
2122 uint8_t L; /**< upsample factor. */
2123 uint16_t phaseLength; /**< length of each polyphase filter component. */
2124 q15_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */
2125 q15_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
2126 } csky_fir_interpolate_instance_q15;
2127
2128 /**
2129 * @brief Instance structure for the Q31 FIR interpolator.
2130 */
2131 typedef struct {
2132 uint8_t L; /**< upsample factor. */
2133 uint16_t phaseLength; /**< length of each polyphase filter component. */
2134 q31_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */
2135 q31_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
2136 } csky_fir_interpolate_instance_q31;
2137
2138 /**
2139 * @brief Instance structure for the floating-point FIR interpolator.
2140 */
2141 typedef struct {
2142 uint8_t L; /**< upsample factor. */
2143 uint16_t phaseLength; /**< length of each polyphase filter component. */
2144 float32_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */
2145 float32_t *pState; /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
2146 } csky_fir_interpolate_instance_f32;
2147
2148 void csky_fir_interpolate_q15(
2149 const csky_fir_interpolate_instance_q15 *S,
2150 q15_t *pSrc,
2151 q15_t *pDst,
2152 uint32_t blockSize);
2153
2154 csky_status csky_fir_interpolate_init_q15(
2155 csky_fir_interpolate_instance_q15 *S,
2156 uint8_t L,
2157 uint16_t numTaps,
2158 q15_t *pCoeffs,
2159 q15_t *pState,
2160 uint32_t blockSize);
2161
2162 void csky_fir_interpolate_q31(
2163 const csky_fir_interpolate_instance_q31 *S,
2164 q31_t *pSrc,
2165 q31_t *pDst,
2166 uint32_t blockSize);
2167
2168 csky_status csky_fir_interpolate_init_q31(
2169 csky_fir_interpolate_instance_q31 *S,
2170 uint8_t L,
2171 uint16_t numTaps,
2172 q31_t *pCoeffs,
2173 q31_t *pState,
2174 uint32_t blockSize);
2175
2176 void csky_fir_interpolate_f32(
2177 const csky_fir_interpolate_instance_f32 *S,
2178 float32_t *pSrc,
2179 float32_t *pDst,
2180 uint32_t blockSize);
2181
2182 csky_status csky_fir_interpolate_init_f32(
2183 csky_fir_interpolate_instance_f32 *S,
2184 uint8_t L,
2185 uint16_t numTaps,
2186 float32_t *pCoeffs,
2187 float32_t *pState,
2188 uint32_t blockSize);
2189
2190 /**
2191 * @brief Instance structure for the high precision Q31 Biquad cascade filter.
2192 */
2193 typedef struct {
2194 uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */
2195 q63_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */
2196 q31_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */
2197 uint8_t postShift; /**< additional shift, in bits, applied to each output sample. */
2198 } csky_biquad_cas_df1_32x64_ins_q31;
2199
2200 void csky_biquad_cas_df1_32x64_q31(
2201 const csky_biquad_cas_df1_32x64_ins_q31 *S,
2202 q31_t *pSrc,
2203 q31_t *pDst,
2204 uint32_t blockSize);
2205
2206 void csky_biquad_cas_df1_32x64_init_q31(
2207 csky_biquad_cas_df1_32x64_ins_q31 *S,
2208 uint8_t numStages,
2209 q31_t *pCoeffs,
2210 q63_t *pState,
2211 uint8_t postShift);
2212
2213 /**
2214 * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
2215 */
2216 typedef struct {
2217 uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */
2218 float32_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */
2219 float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */
2220 } csky_biquad_cascade_df2T_instance_f32;
2221
2222 /**
2223 * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
2224 */
2225 typedef struct {
2226 uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */
2227 float32_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */
2228 float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */
2229 } csky_biquad_cascade_stereo_df2T_instance_f32;
2230
2231 /**
2232 * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
2233 */
2234 typedef struct {
2235 uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */
2236 float64_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */
2237 float64_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */
2238 } csky_biquad_cascade_df2T_instance_f64;
2239
2240 void csky_biquad_cascade_df2T_f32(
2241 const csky_biquad_cascade_df2T_instance_f32 *S,
2242 float32_t *pSrc,
2243 float32_t *pDst,
2244 uint32_t blockSize);
2245
2246 void csky_biquad_cascade_stereo_df2T_f32(
2247 const csky_biquad_cascade_stereo_df2T_instance_f32 *S,
2248 float32_t *pSrc,
2249 float32_t *pDst,
2250 uint32_t blockSize);
2251
2252 void csky_biquad_cascade_df2T_f64(
2253 const csky_biquad_cascade_df2T_instance_f64 *S,
2254 float64_t *pSrc,
2255 float64_t *pDst,
2256 uint32_t blockSize);
2257
2258 void csky_biquad_cascade_df2T_init_f32(
2259 csky_biquad_cascade_df2T_instance_f32 *S,
2260 uint8_t numStages,
2261 float32_t *pCoeffs,
2262 float32_t *pState);
2263
2264 void csky_biquad_cascade_stereo_df2T_init_f32(
2265 csky_biquad_cascade_stereo_df2T_instance_f32 *S,
2266 uint8_t numStages,
2267 float32_t *pCoeffs,
2268 float32_t *pState);
2269
2270 void csky_biquad_cascade_df2T_init_f64(
2271 csky_biquad_cascade_df2T_instance_f64 *S,
2272 uint8_t numStages,
2273 float64_t *pCoeffs,
2274 float64_t *pState);
2275
2276 /**
2277 * @brief Instance structure for the Q15 FIR lattice filter.
2278 */
2279 typedef struct {
2280 uint16_t numStages; /**< number of filter stages. */
2281 q15_t *pState; /**< points to the state variable array. The array is of length numStages. */
2282 q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */
2283 } csky_fir_lattice_instance_q15;
2284
2285 /**
2286 * @brief Instance structure for the Q31 FIR lattice filter.
2287 */
2288 typedef struct {
2289 uint16_t numStages; /**< number of filter stages. */
2290 q31_t *pState; /**< points to the state variable array. The array is of length numStages. */
2291 q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */
2292 } csky_fir_lattice_instance_q31;
2293
2294 /**
2295 * @brief Instance structure for the floating-point FIR lattice filter.
2296 */
2297 typedef struct {
2298 uint16_t numStages; /**< number of filter stages. */
2299 float32_t *pState; /**< points to the state variable array. The array is of length numStages. */
2300 float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */
2301 } csky_fir_lattice_instance_f32;
2302
2303 void csky_fir_lattice_init_q15(
2304 csky_fir_lattice_instance_q15 *S,
2305 uint16_t numStages,
2306 q15_t *pCoeffs,
2307 q15_t *pState);
2308
2309 void csky_fir_lattice_q15(
2310 const csky_fir_lattice_instance_q15 *S,
2311 q15_t *pSrc,
2312 q15_t *pDst,
2313 uint32_t blockSize);
2314
2315 void csky_fir_lattice_init_q31(
2316 csky_fir_lattice_instance_q31 *S,
2317 uint16_t numStages,
2318 q31_t *pCoeffs,
2319 q31_t *pState);
2320
2321 void csky_fir_lattice_q31(
2322 const csky_fir_lattice_instance_q31 *S,
2323 q31_t *pSrc,
2324 q31_t *pDst,
2325 uint32_t blockSize);
2326
2327 void csky_fir_lattice_init_f32(
2328 csky_fir_lattice_instance_f32 *S,
2329 uint16_t numStages,
2330 float32_t *pCoeffs,
2331 float32_t *pState);
2332
2333 void csky_fir_lattice_f32(
2334 const csky_fir_lattice_instance_f32 *S,
2335 float32_t *pSrc,
2336 float32_t *pDst,
2337 uint32_t blockSize);
2338
2339 /**
2340 * @brief Instance structure for the Q15 IIR lattice filter.
2341 */
2342 typedef struct {
2343 uint16_t numStages; /**< number of stages in the filter. */
2344 q15_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */
2345 q15_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */
2346 q15_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */
2347 } csky_iir_lattice_instance_q15;
2348
2349 /**
2350 * @brief Instance structure for the Q31 IIR lattice filter.
2351 */
2352 typedef struct {
2353 uint16_t numStages; /**< number of stages in the filter. */
2354 q31_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */
2355 q31_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */
2356 q31_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */
2357 } csky_iir_lattice_instance_q31;
2358
2359 /**
2360 * @brief Instance structure for the floating-point IIR lattice filter.
2361 */
2362 typedef struct {
2363 uint16_t numStages; /**< number of stages in the filter. */
2364 float32_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */
2365 float32_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */
2366 float32_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */
2367 } csky_iir_lattice_instance_f32;
2368
2369 void csky_iir_lattice_f32(
2370 const csky_iir_lattice_instance_f32 *S,
2371 float32_t *pSrc,
2372 float32_t *pDst,
2373 uint32_t blockSize);
2374
2375 void csky_iir_lattice_init_f32(
2376 csky_iir_lattice_instance_f32 *S,
2377 uint16_t numStages,
2378 float32_t *pkCoeffs,
2379 float32_t *pvCoeffs,
2380 float32_t *pState,
2381 uint32_t blockSize);
2382
2383 void csky_iir_lattice_q31(
2384 const csky_iir_lattice_instance_q31 *S,
2385 q31_t *pSrc,
2386 q31_t *pDst,
2387 uint32_t blockSize);
2388
2389 void csky_iir_lattice_init_q31(
2390 csky_iir_lattice_instance_q31 *S,
2391 uint16_t numStages,
2392 q31_t *pkCoeffs,
2393 q31_t *pvCoeffs,
2394 q31_t *pState,
2395 uint32_t blockSize);
2396
2397 void csky_iir_lattice_q15(
2398 const csky_iir_lattice_instance_q15 *S,
2399 q15_t *pSrc,
2400 q15_t *pDst,
2401 uint32_t blockSize);
2402
2403 void csky_iir_lattice_init_q15(
2404 csky_iir_lattice_instance_q15 *S,
2405 uint16_t numStages,
2406 q15_t *pkCoeffs,
2407 q15_t *pvCoeffs,
2408 q15_t *pState,
2409 uint32_t blockSize);
2410
2411 /**
2412 * @brief Instance structure for the floating-point LMS filter.
2413 */
2414 typedef struct {
2415 uint16_t numTaps; /**< number of coefficients in the filter. */
2416 float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2417 float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
2418 float32_t mu; /**< step size that controls filter coefficient updates. */
2419 } csky_lms_instance_f32;
2420
2421 void csky_lms_f32(
2422 const csky_lms_instance_f32 *S,
2423 float32_t *pSrc,
2424 float32_t *pRef,
2425 float32_t *pOut,
2426 float32_t *pErr,
2427 uint32_t blockSize);
2428
2429 void csky_lms_init_f32(
2430 csky_lms_instance_f32 *S,
2431 uint16_t numTaps,
2432 float32_t *pCoeffs,
2433 float32_t *pState,
2434 float32_t mu,
2435 uint32_t blockSize);
2436
2437 /**
2438 * @brief Instance structure for the Q15 LMS filter.
2439 */
2440 typedef struct {
2441 uint16_t numTaps; /**< number of coefficients in the filter. */
2442 q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2443 q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
2444 q15_t mu; /**< step size that controls filter coefficient updates. */
2445 uint32_t postShift; /**< bit shift applied to coefficients. */
2446 } csky_lms_instance_q15;
2447
2448 void csky_lms_init_q15(
2449 csky_lms_instance_q15 *S,
2450 uint16_t numTaps,
2451 q15_t *pCoeffs,
2452 q15_t *pState,
2453 q15_t mu,
2454 uint32_t blockSize,
2455 uint32_t postShift);
2456
2457 void csky_lms_q15(
2458 const csky_lms_instance_q15 *S,
2459 q15_t *pSrc,
2460 q15_t *pRef,
2461 q15_t *pOut,
2462 q15_t *pErr,
2463 uint32_t blockSize);
2464
2465 /**
2466 * @brief Instance structure for the Q31 LMS filter.
2467 */
2468 typedef struct {
2469 uint16_t numTaps; /**< number of coefficients in the filter. */
2470 q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2471 q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
2472 q31_t mu; /**< step size that controls filter coefficient updates. */
2473 uint32_t postShift; /**< bit shift applied to coefficients. */
2474 } csky_lms_instance_q31;
2475
2476 void csky_lms_q31(
2477 const csky_lms_instance_q31 *S,
2478 q31_t *pSrc,
2479 q31_t *pRef,
2480 q31_t *pOut,
2481 q31_t *pErr,
2482 uint32_t blockSize);
2483
2484 void csky_lms_init_q31(
2485 csky_lms_instance_q31 *S,
2486 uint16_t numTaps,
2487 q31_t *pCoeffs,
2488 q31_t *pState,
2489 q31_t mu,
2490 uint32_t blockSize,
2491 uint32_t postShift);
2492
2493 /**
2494 * @brief Instance structure for the floating-point normalized LMS filter.
2495 */
2496 typedef struct {
2497 uint16_t numTaps; /**< number of coefficients in the filter. */
2498 float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2499 float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
2500 float32_t mu; /**< step size that control filter coefficient updates. */
2501 float32_t energy; /**< saves previous frame energy. */
2502 float32_t x0; /**< saves previous input sample. */
2503 } csky_lms_norm_instance_f32;
2504
2505 void csky_lms_norm_f32(
2506 csky_lms_norm_instance_f32 *S,
2507 float32_t *pSrc,
2508 float32_t *pRef,
2509 float32_t *pOut,
2510 float32_t *pErr,
2511 uint32_t blockSize);
2512
2513 void csky_lms_norm_init_f32(
2514 csky_lms_norm_instance_f32 *S,
2515 uint16_t numTaps,
2516 float32_t *pCoeffs,
2517 float32_t *pState,
2518 float32_t mu,
2519 uint32_t blockSize);
2520
2521 /**
2522 * @brief Instance structure for the Q31 normalized LMS filter.
2523 */
2524 typedef struct {
2525 uint16_t numTaps; /**< number of coefficients in the filter. */
2526 q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2527 q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
2528 q31_t mu; /**< step size that controls filter coefficient updates. */
2529 uint8_t postShift; /**< bit shift applied to coefficients. */
2530 q31_t *recipTable; /**< points to the reciprocal initial value table. */
2531 q31_t energy; /**< saves previous frame energy. */
2532 q31_t x0; /**< saves previous input sample. */
2533 } csky_lms_norm_instance_q31;
2534
2535 void csky_lms_norm_q31(
2536 csky_lms_norm_instance_q31 *S,
2537 q31_t *pSrc,
2538 q31_t *pRef,
2539 q31_t *pOut,
2540 q31_t *pErr,
2541 uint32_t blockSize);
2542
2543 void csky_lms_norm_init_q31(
2544 csky_lms_norm_instance_q31 *S,
2545 uint16_t numTaps,
2546 q31_t *pCoeffs,
2547 q31_t *pState,
2548 q31_t mu,
2549 uint32_t blockSize,
2550 uint8_t postShift);
2551
2552 /**
2553 * @brief Instance structure for the Q15 normalized LMS filter.
2554 */
2555 typedef struct {
2556 uint16_t numTaps; /**< Number of coefficients in the filter. */
2557 q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2558 q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
2559 q15_t mu; /**< step size that controls filter coefficient updates. */
2560 uint8_t postShift; /**< bit shift applied to coefficients. */
2561 q15_t *recipTable; /**< Points to the reciprocal initial value table. */
2562 q15_t energy; /**< saves previous frame energy. */
2563 q15_t x0; /**< saves previous input sample. */
2564 } csky_lms_norm_instance_q15;
2565
2566 void csky_lms_norm_q15(
2567 csky_lms_norm_instance_q15 *S,
2568 q15_t *pSrc,
2569 q15_t *pRef,
2570 q15_t *pOut,
2571 q15_t *pErr,
2572 uint32_t blockSize);
2573
2574 void csky_lms_norm_init_q15(
2575 csky_lms_norm_instance_q15 *S,
2576 uint16_t numTaps,
2577 q15_t *pCoeffs,
2578 q15_t *pState,
2579 q15_t mu,
2580 uint32_t blockSize,
2581 uint8_t postShift);
2582
2583 void csky_correlate_f32(
2584 float32_t *pSrcA,
2585 uint32_t srcALen,
2586 float32_t *pSrcB,
2587 uint32_t srcBLen,
2588 float32_t *pDst);
2589
2590 void csky_correlate_opt_q15(
2591 q15_t *pSrcA,
2592 uint32_t srcALen,
2593 q15_t *pSrcB,
2594 uint32_t srcBLen,
2595 q15_t *pDst,
2596 q15_t *pScratch);
2597
2598 void csky_correlate_q15(
2599 q15_t *pSrcA,
2600 uint32_t srcALen,
2601 q15_t *pSrcB,
2602 uint32_t srcBLen,
2603 q15_t *pDst);
2604
2605 void csky_correlate_fast_q15(
2606 q15_t *pSrcA,
2607 uint32_t srcALen,
2608 q15_t *pSrcB,
2609 uint32_t srcBLen,
2610 q15_t *pDst);
2611
2612 void csky_correlate_fast_opt_q15(
2613 q15_t *pSrcA,
2614 uint32_t srcALen,
2615 q15_t *pSrcB,
2616 uint32_t srcBLen,
2617 q15_t *pDst,
2618 q15_t *pScratch);
2619
2620 void csky_correlate_q31(
2621 q31_t *pSrcA,
2622 uint32_t srcALen,
2623 q31_t *pSrcB,
2624 uint32_t srcBLen,
2625 q31_t *pDst);
2626
2627 void csky_correlate_fast_q31(
2628 q31_t *pSrcA,
2629 uint32_t srcALen,
2630 q31_t *pSrcB,
2631 uint32_t srcBLen,
2632 q31_t *pDst);
2633
2634 void csky_correlate_opt_q7(
2635 q7_t *pSrcA,
2636 uint32_t srcALen,
2637 q7_t *pSrcB,
2638 uint32_t srcBLen,
2639 q7_t *pDst,
2640 q15_t *pScratch1,
2641 q15_t *pScratch2);
2642
2643 void csky_correlate_q7(
2644 q7_t *pSrcA,
2645 uint32_t srcALen,
2646 q7_t *pSrcB,
2647 uint32_t srcBLen,
2648 q7_t *pDst);
2649
2650 /**
2651 * @brief Instance structure for the floating-point sparse FIR filter.
2652 */
2653 typedef struct {
2654 uint16_t numTaps; /**< number of coefficients in the filter. */
2655 uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */
2656 float32_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
2657 float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
2658 uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */
2659 int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */
2660 } csky_fir_sparse_instance_f32;
2661
2662 /**
2663 * @brief Instance structure for the Q31 sparse FIR filter.
2664 */
2665 typedef struct {
2666 uint16_t numTaps; /**< number of coefficients in the filter. */
2667 uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */
2668 q31_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
2669 q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
2670 uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */
2671 int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */
2672 } csky_fir_sparse_instance_q31;
2673
2674 /**
2675 * @brief Instance structure for the Q15 sparse FIR filter.
2676 */
2677 typedef struct {
2678 uint16_t numTaps; /**< number of coefficients in the filter. */
2679 uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */
2680 q15_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
2681 q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
2682 uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */
2683 int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */
2684 } csky_fir_sparse_instance_q15;
2685
2686 /**
2687 * @brief Instance structure for the Q7 sparse FIR filter.
2688 */
2689 typedef struct {
2690 uint16_t numTaps; /**< number of coefficients in the filter. */
2691 uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */
2692 q7_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
2693 q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
2694 uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */
2695 int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */
2696 } csky_fir_sparse_instance_q7;
2697
2698 void csky_fir_sparse_f32(
2699 csky_fir_sparse_instance_f32 *S,
2700 float32_t *pSrc,
2701 float32_t *pDst,
2702 float32_t *pScratchIn,
2703 uint32_t blockSize);
2704
2705 void csky_fir_sparse_init_f32(
2706 csky_fir_sparse_instance_f32 *S,
2707 uint16_t numTaps,
2708 float32_t *pCoeffs,
2709 float32_t *pState,
2710 int32_t *pTapDelay,
2711 uint16_t maxDelay,
2712 uint32_t blockSize);
2713
2714 void csky_fir_sparse_q31(
2715 csky_fir_sparse_instance_q31 *S,
2716 q31_t *pSrc,
2717 q31_t *pDst,
2718 q31_t *pScratchIn,
2719 uint32_t blockSize);
2720
2721 void csky_fir_sparse_init_q31(
2722 csky_fir_sparse_instance_q31 *S,
2723 uint16_t numTaps,
2724 q31_t *pCoeffs,
2725 q31_t *pState,
2726 int32_t *pTapDelay,
2727 uint16_t maxDelay,
2728 uint32_t blockSize);
2729
2730 void csky_fir_sparse_q15(
2731 csky_fir_sparse_instance_q15 *S,
2732 q15_t *pSrc,
2733 q15_t *pDst,
2734 q15_t *pScratchIn,
2735 q31_t *pScratchOut,
2736 uint32_t blockSize);
2737
2738 void csky_fir_sparse_init_q15(
2739 csky_fir_sparse_instance_q15 *S,
2740 uint16_t numTaps,
2741 q15_t *pCoeffs,
2742 q15_t *pState,
2743 int32_t *pTapDelay,
2744 uint16_t maxDelay,
2745 uint32_t blockSize);
2746
2747 void csky_fir_sparse_q7(
2748 csky_fir_sparse_instance_q7 *S,
2749 q7_t *pSrc,
2750 q7_t *pDst,
2751 q7_t *pScratchIn,
2752 q31_t *pScratchOut,
2753 uint32_t blockSize);
2754
2755 void csky_fir_sparse_init_q7(
2756 csky_fir_sparse_instance_q7 *S,
2757 uint16_t numTaps,
2758 q7_t *pCoeffs,
2759 q7_t *pState,
2760 int32_t *pTapDelay,
2761 uint16_t maxDelay,
2762 uint32_t blockSize);
2763
2764 void csky_sin_cos_f32(
2765 float32_t theta,
2766 float32_t *pSinVal,
2767 float32_t *pCosVal);
2768
2769 void csky_sin_cos_q31(
2770 q31_t theta,
2771 q31_t *pSinVal,
2772 q31_t *pCosVal);
2773
2774 void csky_cmplx_conj_f32(
2775 float32_t *pSrc,
2776 float32_t *pDst,
2777 uint32_t numSamples);
2778
2779 void csky_cmplx_conj_q31(
2780 q31_t *pSrc,
2781 q31_t *pDst,
2782 uint32_t numSamples);
2783
2784 void csky_cmplx_conj_q15(
2785 q15_t *pSrc,
2786 q15_t *pDst,
2787 uint32_t numSamples);
2788
2789 void csky_cmplx_mag_squared_f32(
2790 float32_t *pSrc,
2791 float32_t *pDst,
2792 uint32_t numSamples);
2793
2794 void csky_cmplx_mag_squared_q31(
2795 q31_t *pSrc,
2796 q31_t *pDst,
2797 uint32_t numSamples);
2798
2799 void csky_cmplx_mag_squared_q15(
2800 q15_t *pSrc,
2801 q15_t *pDst,
2802 uint32_t numSamples);
2803
2804 /**
2805 * @ingroup groupController
2806 */
2807
2808 /**
2809 * @defgroup PID PID Motor Control
2810 *
2811 * A Proportional Integral Derivative (PID) controller is a generic feedback control
2812 * loop mechanism widely used in industrial control systems.
2813 * A PID controller is the most commonly used type of feedback controller.
2814 *
2815 * This set of functions implements (PID) controllers
2816 * for Q15, Q31, and floating-point data types. The functions operate on a single sample
2817 * of data and each call to the function returns a single processed value.
2818 * <code>S</code> points to an instance of the PID control data structure. <code>in</code>
2819 * is the input sample value. The functions return the output value.
2820 *
2821 * \par Algorithm:
2822 * <pre>
2823 * y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
2824 * A0 = Kp + Ki + Kd
2825 * A1 = (-Kp) - (2 * Kd)
2826 * A2 = Kd </pre>
2827 *
2828 * \par
2829 * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
2830 *
2831 * \par
2832 * \image html PID.gif "Proportional Integral Derivative Controller"
2833 *
2834 * \par
2835 * The PID controller calculates an "error" value as the difference between
2836 * the measured output and the reference input.
2837 * The controller attempts to minimize the error by adjusting the process control inputs.
2838 * The proportional value determines the reaction to the current error,
2839 * the integral value determines the reaction based on the sum of recent errors,
2840 * and the derivative value determines the reaction based on the rate at which the error has been changing.
2841 *
2842 * \par Instance Structure
2843 * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
2844 * A separate instance structure must be defined for each PID Controller.
2845 * There are separate instance structure declarations for each of the 3 supported data types.
2846 *
2847 * \par Reset Functions
2848 * There is also an associated reset function for each data type which clears the state array.
2849 *
2850 * \par Initialization Functions
2851 * There is also an associated initialization function for each data type.
2852 * The initialization function performs the following operations:
2853 * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
2854 * - Zeros out the values in the state buffer.
2855 *
2856 * \par
2857 * Instance structure cannot be placed into a const data section
2858 * and it is recommended to use the initialization function.
2859 *
2860 * \par Fixed-Point Behavior
2861 * Care must be taken when using the fixed-point versions of the PID Controller functions.
2862 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
2863 * Refer to the function specific documentation below for usage guidelines.
2864 */
2865
2866 /**
2867 * @addtogroup PID
2868 * @{
2869 */
2870
2871 /**
2872 * @brief Process function for the floating-point PID Control.
2873 * @param[in,out] S is an instance of the floating-point PID Control structure
2874 * @param[in] in input sample to process
2875 * @return out processed output sample.
2876 */
csky_pid_f32(csky_pid_instance_f32 * S,float32_t in)2877 __STATIC_INLINE float32_t csky_pid_f32(
2878 csky_pid_instance_f32 *S,
2879 float32_t in)
2880 {
2881 float32_t out;
2882
2883 /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2] */
2884 out = (S->A0 * in) +
2885 (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
2886
2887 /* Update state */
2888 S->state[1] = S->state[0];
2889 S->state[0] = in;
2890 S->state[2] = out;
2891
2892 /* return to application */
2893 return (out);
2894 }
2895
2896 /**
2897 * @}
2898 */ // end of PID group
2899
2900 /**
2901 * @addtogroup PID
2902 * @{
2903 */
2904
2905 /**
2906 * @brief Process function for the Q31 PID Control.
2907 * @param[in,out] S points to an instance of the Q31 PID Control structure
2908 * @param[in] in input sample to process
2909 * @return out processed output sample.
2910 *
2911 * <b>Scaling and Overflow Behavior:</b>
2912 * \par
2913 * The function is implemented using an internal 64-bit accumulator.
2914 * The accumulator has a 2.62 format
2915 * and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
2916 * Thus, if the accumulator result overflows it wraps around rather than clip.
2917 * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
2918 * After all multiply-accumulates are performed,
2919 * the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
2920 */
csky_pid_q31(csky_pid_instance_q31 * S,q31_t in)2921 __STATIC_INLINE q31_t csky_pid_q31(
2922 csky_pid_instance_q31 *S,
2923 q31_t in)
2924 {
2925 q63_t acc;
2926 q31_t out;
2927
2928 #ifdef CSKY_SIMD
2929 /* acc = A0 * x[n] */
2930 acc = mult_32x32_keep64(S->A0, in);
2931
2932 /* acc += A1 * x[n-1] */
2933 acc = multAcc_32x32_keep64(acc, S->A1, S->state[0]);
2934
2935 /* acc += A2 * x[n-2] */
2936 acc = multAcc_32x32_keep64(acc, S->A2, S->state[1]);
2937
2938 /* convert output to 1.31 format to add y[n-1] */
2939 out = dext_31(acc);
2940 #else
2941 /* acc = A0 * x[n] */
2942 acc = (q63_t) S->A0 * in;
2943
2944 /* acc += A1 * x[n-1] */
2945 acc += (q63_t) S->A1 * S->state[0];
2946
2947 /* acc += A2 * x[n-2] */
2948 acc += (q63_t) S->A2 * S->state[1];
2949
2950 /* convert output to 1.31 format to add y[n-1] */
2951 out = (q31_t) (acc >> 31u);
2952 #endif
2953
2954 /* out += y[n-1] */
2955 out += S->state[2];
2956
2957 /* Update state */
2958 S->state[1] = S->state[0];
2959 S->state[0] = in;
2960 S->state[2] = out;
2961
2962 /* return to application */
2963 return (out);
2964 }
2965
2966 /**
2967 * @}
2968 */ // end of PID group
2969
2970 /**
2971 * @addtogroup PID
2972 * @{
2973 */
2974 /**
2975 * @brief Process function for the Q15 PID Control.
2976 * @param[in,out] S points to an instance of the Q15 PID Control structure
2977 * @param[in] in input sample to process
2978 * @return out processed output sample.
2979 *
2980 * <b>Scaling and Overflow Behavior:</b>
2981 * \par
2982 * The function is implemented using a 64-bit internal accumulator.
2983 * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
2984 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
2985 * There is no risk of internal overflow with this approach
2986 * and the full precision of intermediate multiplications is preserved.
2987 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
2988 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
2989 */
csky_pid_q15(csky_pid_instance_q15 * S,q15_t in)2990 __STATIC_INLINE q15_t csky_pid_q15(
2991 csky_pid_instance_q15 *S,
2992 q15_t in)
2993 {
2994 q63_t acc;
2995 q15_t out;
2996
2997 /* acc = A0 * x[n] */
2998 acc = ((q31_t) S->A0) * in;
2999
3000 /* acc += A1 * x[n-1] + A2 * x[n-2] */
3001 acc += (q31_t) S->A1 * S->state[0];
3002 acc += (q31_t) S->A2 * S->state[1];
3003
3004 /* acc += y[n-1] */
3005 acc += (q31_t) S->state[2] << 15;
3006
3007 /* saturate the output */
3008 out = (q15_t) (__SSAT_16((acc >> 15)));
3009
3010 /* Update state */
3011 S->state[1] = S->state[0];
3012 S->state[0] = in;
3013 S->state[2] = out;
3014
3015 /* return to application */
3016 return (out);
3017 }
3018 /**
3019 * @}
3020 */ // end of PID group
3021
3022 csky_status csky_mat_inverse_f32(
3023 const csky_matrix_instance_f32 *src,
3024 csky_matrix_instance_f32 *dst);
3025
3026 csky_status csky_mat_inverse_f64(
3027 const csky_matrix_instance_f64 *src,
3028 csky_matrix_instance_f64 *dst);
3029
3030 /**
3031 * @ingroup groupController
3032 */
3033
3034 /**
3035 * @defgroup clarke Vector Clarke Transform
3036 * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
3037 * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
3038 * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
3039 * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
3040 * \image html clarke.gif Stator current space vector and its components in (a,b).
3041 * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
3042 * can be calculated using only <code>Ia</code> and <code>Ib</code>.
3043 *
3044 * The function operates on a single sample of data and each call to the function returns the processed output.
3045 * The library provides separate functions for Q31 and floating-point data types.
3046 * \par Algorithm
3047 * \image html clarkeFormula.gif
3048 * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
3049 * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
3050 * \par Fixed-Point Behavior
3051 * Care must be taken when using the Q31 version of the Clarke transform.
3052 * In particular, the overflow and saturation behavior of the accumulator used must be considered.
3053 * Refer to the function specific documentation below for usage guidelines.
3054 */
3055
3056 /**
3057 * @addtogroup clarke
3058 * @{
3059 */
3060
3061 /**
3062 *
3063 * @brief Floating-point Clarke transform
3064 * @param[in] Ia input three-phase coordinate a
3065 * @param[in] Ib input three-phase coordinate b
3066 * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha
3067 * @param[out] pIbeta points to output two-phase orthogonal vector axis beta
3068 */
csky_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)3069 __STATIC_INLINE void csky_clarke_f32(
3070 float32_t Ia,
3071 float32_t Ib,
3072 float32_t *pIalpha,
3073 float32_t *pIbeta)
3074 {
3075 /* Calculate pIalpha using the equation, pIalpha = Ia */
3076 *pIalpha = Ia;
3077
3078 /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
3079 *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
3080 }
3081
3082 /**
3083 * @}
3084 */ // end of clarke group
3085
3086 /**
3087 * @addtogroup clarke
3088 * @{
3089 */
3090
3091 /**
3092 * @brief Clarke transform for Q31 version
3093 * @param[in] Ia input three-phase coordinate a
3094 * @param[in] Ib input three-phase coordinate b
3095 * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha
3096 * @param[out] pIbeta points to output two-phase orthogonal vector axis beta
3097 *
3098 * <b>Scaling and Overflow Behavior:</b>
3099 * \par
3100 * The function is implemented using an internal 32-bit accumulator.
3101 * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
3102 * There is saturation on the addition, hence there is no risk of overflow.
3103 */
csky_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)3104 __STATIC_INLINE void csky_clarke_q31(
3105 q31_t Ia,
3106 q31_t Ib,
3107 q31_t *pIalpha,
3108 q31_t *pIbeta)
3109 {
3110 q31_t product1, product2; /* Temporary variables used to store intermediate results */
3111
3112 /* Calculating pIalpha from Ia by equation pIalpha = Ia */
3113 *pIalpha = Ia;
3114
3115 #ifdef CSKY_SIMD
3116 /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
3117 product1 = mult_32x32_dext_30(Ia, 0x24F34E8B);
3118
3119 /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
3120 product2 = mult_32x32_dext_30(Ib, 0x49E69D16);
3121 #else
3122 /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
3123 product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
3124
3125 /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
3126 product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
3127 #endif
3128
3129 /* pIbeta is calculated by adding the intermediate products */
3130 *pIbeta = __QADD(product1, product2);
3131 }
3132
3133 /**
3134 * @}
3135 */ // end of clarke group
3136
3137 void csky_q7_to_q31(
3138 q7_t *pSrc,
3139 q31_t *pDst,
3140 uint32_t blockSize);
3141
3142 /**
3143 * @ingroup groupController
3144 */
3145 /**
3146 * @defgroup inv_clarke Vector Inverse Clarke Transform
3147 * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
3148 *
3149 * The function operates on a single sample of data and each call to the function returns the processed output.
3150 * The library provides separate functions for Q31 and floating-point data types.
3151 * \par Algorithm
3152 * \image html clarkeInvFormula.gif
3153 * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
3154 * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
3155 * \par Fixed-Point Behavior
3156 * Care must be taken when using the Q31 version of the Clarke transform.
3157 * In particular, the overflow and saturation behavior of the accumulator used must be considered.
3158 * Refer to the function specific documentation below for usage guidelines.
3159 */
3160
3161 /**
3162 * @addtogroup inv_clarke
3163 * @{
3164 */
3165
3166 /**
3167 * @brief Floating-point Inverse Clarke transform
3168 * @param[in] Ialpha input two-phase orthogonal vector axis alpha
3169 * @param[in] Ibeta input two-phase orthogonal vector axis beta
3170 * @param[out] pIa points to output three-phase coordinate <code>a</code>
3171 * @param[out] pIb points to output three-phase coordinate <code>b</code>
3172 */
csky_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)3173 __STATIC_INLINE void csky_inv_clarke_f32(
3174 float32_t Ialpha,
3175 float32_t Ibeta,
3176 float32_t *pIa,
3177 float32_t *pIb)
3178 {
3179 /* Calculating pIa from Ialpha by equation pIa = Ialpha */
3180 *pIa = Ialpha;
3181
3182 /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
3183 *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta;
3184 }
3185
3186 /**
3187 * @}
3188 */ // end of inv_clarke group
3189
3190 /**
3191 * @addtogroup inv_clarke
3192 * @{
3193 */
3194
3195 /**
3196 * @brief Inverse Clarke transform for Q31 version
3197 * @param[in] Ialpha input two-phase orthogonal vector axis alpha
3198 * @param[in] Ibeta input two-phase orthogonal vector axis beta
3199 * @param[out] pIa points to output three-phase coordinate <code>a</code>
3200 * @param[out] pIb points to output three-phase coordinate <code>b</code>
3201 *
3202 * <b>Scaling and Overflow Behavior:</b>
3203 * \par
3204 * The function is implemented using an internal 32-bit accumulator.
3205 * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
3206 * There is saturation on the subtraction, hence there is no risk of overflow.
3207 */
csky_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)3208 __STATIC_INLINE void csky_inv_clarke_q31(
3209 q31_t Ialpha,
3210 q31_t Ibeta,
3211 q31_t *pIa,
3212 q31_t *pIb)
3213 {
3214 q31_t product1, product2; /* Temporary variables used to store intermediate results */
3215
3216 /* Calculating pIa from Ialpha by equation pIa = Ialpha */
3217 *pIa = Ialpha;
3218
3219 #ifdef CSKY_SIMD
3220 /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
3221 product1 = mult_32x32_dext_31(Ialpha, 0x40000000);
3222
3223 /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
3224 product2 = mult_32x32_dext_31(Ibeta, 0x6ED9EBA1);
3225 #else
3226 /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
3227 product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
3228
3229 /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
3230 product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
3231 #endif
3232
3233 /* pIb is calculated by subtracting the products */
3234 *pIb = __QSUB(product2, product1);
3235 }
3236
3237 /**
3238 * @}
3239 */ // end of inv_clarke group
3240
3241 void csky_q7_to_q15(
3242 q7_t *pSrc,
3243 q15_t *pDst,
3244 uint32_t blockSize);
3245
3246 /**
3247 * @ingroup groupController
3248 */
3249 /**
3250 * @defgroup park Vector Park Transform
3251 *
3252 * Forward Park transform converts the input two-coordinate vector to flux and torque components.
3253 * The Park transform can be used to realize the transformation of the <code>Ialpha</code>
3254 * and the <code>Ibeta</code> currents
3255 * from the stationary to the moving reference frame and control the spatial relationship between
3256 * the stator vector current and rotor flux vector.
3257 * If we consider the d axis aligned with the rotor flux, the diagram below shows the
3258 * current vector and the relationship from the two reference frames:
3259 * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
3260 *
3261 * The function operates on a single sample of data and each call to the function returns the processed output.
3262 * The library provides separate functions for Q31 and floating-point data types.
3263 * \par Algorithm
3264 * \image html parkFormula.gif
3265 * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
3266 * <code>pId</code> and <code>pIq</code> are rotor vector components
3267 * and <code>cosVal</code> and <code>sinVal</code> are the
3268 * cosine and sine values of theta (rotor flux position).
3269 * \par Fixed-Point Behavior
3270 * Care must be taken when using the Q31 version of the Park transform.
3271 * In particular, the overflow and saturation behavior of the accumulator used must be considered.
3272 * Refer to the function specific documentation below for usage guidelines.
3273 */
3274 /**
3275 * @addtogroup park
3276 * @{
3277 */
3278 /**
3279 * @brief Floating-point Park transform
3280 * @param[in] Ialpha input two-phase vector coordinate alpha
3281 * @param[in] Ibeta input two-phase vector coordinate beta
3282 * @param[out] pId points to output rotor reference frame d
3283 * @param[out] pIq points to output rotor reference frame q
3284 * @param[in] sinVal sine value of rotation angle theta
3285 * @param[in] cosVal cosine value of rotation angle theta
3286 *
3287 * The function implements the forward Park transform.
3288 *
3289 */
csky_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)3290 __STATIC_INLINE void csky_park_f32(
3291 float32_t Ialpha,
3292 float32_t Ibeta,
3293 float32_t *pId,
3294 float32_t *pIq,
3295 float32_t sinVal,
3296 float32_t cosVal)
3297 {
3298 /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
3299 *pId = Ialpha * cosVal + Ibeta * sinVal;
3300 /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
3301 *pIq = -Ialpha * sinVal + Ibeta * cosVal;
3302 }
3303 /**
3304 * @}
3305 */ // end of park group
3306
3307 /**
3308 * @addtogroup park
3309 * @{
3310 */
3311 /**
3312 * @brief Park transform for Q31 version
3313 * @param[in] Ialpha input two-phase vector coordinate alpha
3314 * @param[in] Ibeta input two-phase vector coordinate beta
3315 * @param[out] pId points to output rotor reference frame d
3316 * @param[out] pIq points to output rotor reference frame q
3317 * @param[in] sinVal sine value of rotation angle theta
3318 * @param[in] cosVal cosine value of rotation angle theta
3319 *
3320 * <b>Scaling and Overflow Behavior:</b>
3321 * \par
3322 * The function is implemented using an internal 32-bit accumulator.
3323 * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
3324 * There is saturation on the addition and subtraction, hence there is no risk of overflow.
3325 */
csky_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)3326 __STATIC_INLINE void csky_park_q31(
3327 q31_t Ialpha,
3328 q31_t Ibeta,
3329 q31_t *pId,
3330 q31_t *pIq,
3331 q31_t sinVal,
3332 q31_t cosVal)
3333 {
3334 #ifdef CSKY_SIMD
3335 __ASM volatile(
3336 "rmul.s32.h t0, %0, %3\n\t"
3337 "rmul.s32.h t1, %1, %2\n\t"
3338 "add.s32.s t0, t0, t1\n\t"
3339 "st.w t0, (%4, 0x0)\n\t"
3340 "rmul.s32.h t0, %0, %2\n\t"
3341 "rmul.s32.h t1, %1, %3\n\t"
3342 "sub.s32.s t1, t1, t0\n\t"
3343 "st.w t1, (%5, 0x0)\n\t"
3344 ::"r"(Ialpha), "r"(Ibeta), "r"(sinVal), "r"(cosVal), "r"(pId), "r"(pIq)
3345 :"t0", "t1", "memory");
3346 #else
3347 q31_t product1, product2; /* Temporary variables used to store intermediate results */
3348 q31_t product3, product4; /* Temporary variables used to store intermediate results */
3349 /* Intermediate product is calculated by (Ialpha * cosVal) */
3350 product1 = clip_q63_to_q31 (((q63_t) (Ialpha) * (cosVal)) >> 31);
3351 /* Intermediate product is calculated by (Ibeta * sinVal) */
3352 product2 = clip_q63_to_q31 (((q63_t) (Ibeta) * (sinVal)) >> 31);
3353 /* Intermediate product is calculated by (Ialpha * sinVal) */
3354 product3 = clip_q63_to_q31 (((q63_t) (Ialpha) * (sinVal)) >> 31);
3355 /* Intermediate product is calculated by (Ibeta * cosVal) */
3356 product4 = clip_q63_to_q31 (((q63_t) (Ibeta) * (cosVal)) >> 31);
3357 /* Calculate pId by adding the two intermediate products 1 and 2 */
3358 *pId = __QADD(product1, product2);
3359 /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
3360 *pIq = __QSUB(product4, product3);
3361 #endif
3362 }
3363 /**
3364 * @}
3365 */ // end of park group
3366
3367 void csky_q7_to_float(
3368 q7_t *pSrc,
3369 float32_t *pDst,
3370 uint32_t blockSize);
3371
3372 /**
3373 * @ingroup groupController
3374 */
3375 /**
3376 * @defgroup inv_park Vector Inverse Park transform
3377 * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
3378 *
3379 * The function operates on a single sample of data and each call to the function returns the processed output.
3380 * The library provides separate functions for Q31 and floating-point data types.
3381 * \par Algorithm
3382 * \image html parkInvFormula.gif
3383 * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
3384 * <code>Id</code> and <code>Iq</code> are rotor vector components
3385 * and <code>cosVal</code> and <code>sinVal</code> are the
3386 * cosine and sine values of theta (rotor flux position).
3387 * \par Fixed-Point Behavior
3388 * Care must be taken when using the Q31 version of the Park transform.
3389 * In particular, the overflow and saturation behavior of the accumulator used must be considered.
3390 * Refer to the function specific documentation below for usage guidelines.
3391 */
3392 /**
3393 * @addtogroup inv_park
3394 * @{
3395 */
3396 /**
3397 * @brief Floating-point Inverse Park transform
3398 * @param[in] Id input coordinate of rotor reference frame d
3399 * @param[in] Iq input coordinate of rotor reference frame q
3400 * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha
3401 * @param[out] pIbeta points to output two-phase orthogonal vector axis beta
3402 * @param[in] sinVal sine value of rotation angle theta
3403 * @param[in] cosVal cosine value of rotation angle theta
3404 */
csky_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)3405 __STATIC_INLINE void csky_inv_park_f32(
3406 float32_t Id,
3407 float32_t Iq,
3408 float32_t *pIalpha,
3409 float32_t *pIbeta,
3410 float32_t sinVal,
3411 float32_t cosVal)
3412 {
3413 /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
3414 *pIalpha = Id * cosVal - Iq * sinVal;
3415 /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
3416 *pIbeta = Id * sinVal + Iq * cosVal;
3417 }
3418 /**
3419 * @}
3420 */ // end of inv_park group
3421
3422 /**
3423 * @addtogroup inv_park
3424 * @{
3425 */
3426 /**
3427 * @brief Inverse Park transform for Q31 version
3428 * @param[in] Id input coordinate of rotor reference frame d
3429 * @param[in] Iq input coordinate of rotor reference frame q
3430 * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha
3431 * @param[out] pIbeta points to output two-phase orthogonal vector axis beta
3432 * @param[in] sinVal sine value of rotation angle theta
3433 * @param[in] cosVal cosine value of rotation angle theta
3434 *
3435 * <b>Scaling and Overflow Behavior:</b>
3436 * \par
3437 * The function is implemented using an internal 32-bit accumulator.
3438 * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
3439 * There is saturation on the addition, hence there is no risk of overflow.
3440 */
csky_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)3441 __STATIC_INLINE void csky_inv_park_q31(
3442 q31_t Id,
3443 q31_t Iq,
3444 q31_t *pIalpha,
3445 q31_t *pIbeta,
3446 q31_t sinVal,
3447 q31_t cosVal)
3448 {
3449 #ifdef CSKY_SIMD
3450 __ASM volatile(
3451 "rmul.s32.h t0, %0, %3\n\t"
3452 "rmul.s32.h t1, %1, %2\n\t"
3453 "sub.s32.s t0, t0, t1\n\t"
3454 "st.w t0, (%4, 0x0)\n\t"
3455 "rmul.s32.h t0, %0, %2\n\t"
3456 "rmul.s32.h t1, %1, %3\n\t"
3457 "add.s32.s t0, t0, t1\n\t"
3458 "st.w t0, (%5, 0x0)\n\t"
3459 ::"r"(Id), "r"(Iq), "r"(sinVal), "r"(cosVal), "r"(pIalpha), "r"(pIbeta)
3460 :"t0", "t1", "memory");
3461
3462 #else
3463 q31_t product1, product2; /* Temporary variables used to store intermediate results */
3464 q31_t product3, product4; /* Temporary variables used to store intermediate results */
3465 /* Intermediate product is calculated by (Id * cosVal) */
3466 product1 = clip_q63_to_q31 (((q63_t) (Id) * (cosVal)) >> 31);
3467 /* Intermediate product is calculated by (Iq * sinVal) */
3468 product2 = clip_q63_to_q31 (((q63_t) (Iq) * (sinVal)) >> 31);
3469 /* Intermediate product is calculated by (Id * sinVal) */
3470 product3 = clip_q63_to_q31 (((q63_t) (Id) * (sinVal)) >> 31);
3471 /* Intermediate product is calculated by (Iq * cosVal) */
3472 product4 = clip_q63_to_q31 (((q63_t) (Iq) * (cosVal)) >> 31);
3473 /* Calculate pIalpha by using the two intermediate products 1 and 2 */
3474 *pIalpha = __QSUB(product1, product2);
3475 /* Calculate pIbeta by using the two intermediate products 3 and 4 */
3476 *pIbeta = __QADD(product4, product3);
3477 #endif
3478 }
3479
3480 /**
3481 * @}
3482 */ // end of inv_park group
3483
3484 void csky_q31_to_float(
3485 q31_t *pSrc,
3486 float32_t *pDst,
3487 uint32_t blockSize);
3488
3489 /**
3490 * @ingroup groupInterpolation
3491 */
3492 /**
3493 * @defgroup LinearInterpolate Linear Interpolation
3494 *
3495 * Linear interpolation is a method of curve fitting using linear polynomials.
3496 * Linear interpolation works by effectively drawing a straight line between two neighboring samples
3497 * and returning the appropriate point along that line
3498 *
3499 * \par
3500 * \image html LinearInterp.gif "Linear interpolation"
3501 *
3502 * \par
3503 * A Linear Interpolate function calculates an output value(y), for the input(x)
3504 * using linear interpolation of the input values x0, x1( nearest input values)
3505 * and the output values y0 and y1(nearest output values)
3506 *
3507 * \par Algorithm:
3508 * <pre>
3509 * y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
3510 * where x0, x1 are nearest values of input x
3511 * y0, y1 are nearest values to output y
3512 * </pre>
3513 *
3514 * \par
3515 * This set of functions implements Linear interpolation process
3516 * for Q7, Q15, Q31, and floating-point data types. The functions operate on a single
3517 * sample of data and each call to the function returns a single processed value.
3518 * <code>S</code> points to an instance of the Linear Interpolate function data structure.
3519 * <code>x</code> is the input sample value. The functions returns the output value.
3520 *
3521 * \par
3522 * if x is outside of the table boundary, Linear interpolation returns first value of the table
3523 * if x is below input range and returns last value of table if x is above range.
3524 */
3525 /**
3526 * @addtogroup LinearInterpolate
3527 * @{
3528 */
3529 /**
3530 * @brief Process function for the floating-point Linear Interpolation Function.
3531 * @param[in,out] S is an instance of the floating-point Linear Interpolation structure
3532 * @param[in] x input sample to process
3533 * @return y processed output sample.
3534 *
3535 */
csky_linear_interp_f32(csky_linear_interp_instance_f32 * S,float32_t x)3536 __STATIC_INLINE float32_t csky_linear_interp_f32(
3537 csky_linear_interp_instance_f32 * S,
3538 float32_t x)
3539 {
3540 float32_t y;
3541 float32_t x0, x1; /* Nearest input values */
3542 float32_t y0, y1; /* Nearest output values */
3543 float32_t xSpacing = S->xSpacing; /* spacing between input values */
3544 int32_t i; /* Index variable */
3545 float32_t *pYData = S->pYData; /* pointer to output table */
3546 /* Calculation of index */
3547 i = (int32_t) ((x - S->x1) / xSpacing);
3548 if (i < 0) {
3549 /* Iniatilize output for below specified range as least output value of table */
3550 y = pYData[0];
3551 } else if ((uint32_t)i >= S->nValues) {
3552 /* Iniatilize output for above specified range as last output value of table */
3553 y = pYData[S->nValues - 1];
3554 } else {
3555 /* Calculation of nearest input values */
3556 x0 = S->x1 + i * xSpacing;
3557 x1 = S->x1 + (i + 1) * xSpacing;
3558 /* Read of nearest output values */
3559 y0 = pYData[i];
3560 y1 = pYData[i + 1];
3561 /* Calculation of output */
3562 y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
3563 }
3564 /* returns output value */
3565 return (y);
3566 }
3567 /**
3568 * @}
3569 */ // end of LinearInterpolate group
3570
3571 /**
3572 * @addtogroup LinearInterpolate
3573 * @{
3574 */
3575
3576 /**
3577 * @brief Process function for the Q31 Linear Interpolation Function.
3578 * @param[in] pYData pointer to Q31 Linear Interpolation table
3579 * @param[in] x input sample to process
3580 * @param[in] nValues number of table values
3581 * @return y processed output sample.
3582 *
3583 * \par
3584 * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index
3585 * and 20 bits for fractional part.
3586 * This function can support maximum of table size 2^12.
3587 *
3588 */
csky_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)3589 __STATIC_INLINE q31_t csky_linear_interp_q31(
3590 q31_t *pYData,
3591 q31_t x,
3592 uint32_t nValues)
3593 {
3594 q31_t y; /* output */
3595 q31_t y0, y1; /* Nearest output values */
3596 q31_t fract; /* fractional part */
3597 int32_t index; /* Index to read nearest output values */
3598 /* Input is in 12.20 format */
3599 /* 12 bits for the table index */
3600 /* Index value calculation */
3601 index = ((x & (q31_t)0xFFF00000) >> 20);
3602 if (index >= (int32_t)(nValues - 1)) {
3603 return (pYData[nValues - 1]);
3604 } else if (index < 0) {
3605 return (pYData[0]);
3606 } else {
3607 /* 20 bits for the fractional part */
3608 /* shift left by 11 to keep fract in 1.31 format */
3609 fract = (x & 0x000FFFFF) << 11;
3610 /* Read two nearest output values from the index in 1.31(q31) format */
3611 y0 = pYData[index];
3612 y1 = pYData[index + 1];
3613 #ifdef CSKY_SIMD
3614 /* Calculation of y0 * (1-fract) and y is in 2.30 format */
3615 y = mult_32x32_keep32(y0, (0x7FFFFFFF - fract));
3616 /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
3617 y = multAcc_32x32_keep32(y, y1, fract);
3618 #else
3619 /* Calculation of y0 * (1-fract) and y is in 2.30 format */
3620 y = ((q31_t) (((q63_t) y0 * (0x7FFFFFFF - fract)) >> 32));
3621 /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
3622 y += ((q31_t) (((q63_t) y1 * fract) >> 32));
3623 #endif
3624 /* Convert y to 1.31 format */
3625 return (y << 1u);
3626 }
3627 }
3628 /**
3629 * @}
3630 */ // end of LinearInterpolate group
3631
3632 /**
3633 * @addtogroup LinearInterpolate
3634 * @{
3635 */
3636 /**
3637 *
3638 * @brief Process function for the Q15 Linear Interpolation Function.
3639 * @param[in] pYData pointer to Q15 Linear Interpolation table
3640 * @param[in] x input sample to process
3641 * @param[in] nValues number of table values
3642 * @return y processed output sample.
3643 *
3644 * \par
3645 * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index
3646 * and 20 bits for fractional part.
3647 * This function can support maximum of table size 2^12.
3648 *
3649 */
csky_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)3650 __STATIC_INLINE q15_t csky_linear_interp_q15(
3651 q15_t *pYData,
3652 q31_t x,
3653 uint32_t nValues)
3654 {
3655 q63_t y; /* output */
3656 q15_t y0, y1; /* Nearest output values */
3657 q31_t fract; /* fractional part */
3658 int32_t index; /* Index to read nearest output values */
3659 /* Input is in 12.20 format */
3660 /* 12 bits for the table index */
3661 /* Index value calculation */
3662 index = ((x & (int32_t)0xFFF00000) >> 20);
3663 if (index >= (int32_t)(nValues - 1)) {
3664 return (pYData[nValues - 1]);
3665 } else if (index < 0) {
3666 return (pYData[0]);
3667 } else {
3668 /* 20 bits for the fractional part */
3669 /* fract is in 12.20 format */
3670 fract = (x & 0x000FFFFF);
3671 /* Read two nearest output values from the index */
3672 y0 = pYData[index];
3673 y1 = pYData[index + 1];
3674 #ifdef CSKY_SIMD
3675 /* Calculation of y0 * (1-fract) and y is in 13.35 format */
3676 y = mult_32x32_keep64(y0, (0xFFFFF - fract));
3677 /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
3678 y = multAcc_32x32_keep64(y, y1, (fract));
3679 #else
3680 /* Calculation of y0 * (1-fract) and y is in 13.35 format */
3681 y = ((q63_t) y0 * (0xFFFFF - fract));
3682 /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
3683 y += ((q63_t) y1 * (fract));
3684 #endif
3685 /* convert y to 1.15 format */
3686 return (q15_t) (y >> 20);
3687 }
3688 }
3689 /**
3690 * @}
3691 */ // end of LinearInterpolate group
3692
3693 /**
3694 * @addtogroup LinearInterpolate
3695 * @{
3696 */
3697 /**
3698 *
3699 * @brief Process function for the Q7 Linear Interpolation Function.
3700 * @param[in] pYData pointer to Q7 Linear Interpolation table
3701 * @param[in] x input sample to process
3702 * @param[in] nValues number of table values
3703 * @return y processed output sample.
3704 *
3705 * \par
3706 * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index
3707 * and 20 bits for fractional part.
3708 * This function can support maximum of table size 2^12.
3709 */
csky_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)3710 __STATIC_INLINE q7_t csky_linear_interp_q7(
3711 q7_t *pYData,
3712 q31_t x,
3713 uint32_t nValues)
3714 {
3715 q31_t y; /* output */
3716 q7_t y0, y1; /* Nearest output values */
3717 q31_t fract; /* fractional part */
3718 uint32_t index; /* Index to read nearest output values */
3719 /* Input is in 12.20 format */
3720 /* 12 bits for the table index */
3721 /* Index value calculation */
3722 if (x < 0) {
3723 return (pYData[0]);
3724 }
3725 index = (x >> 20) & 0xfff;
3726 if (index >= (nValues - 1)) {
3727 return (pYData[nValues - 1]);
3728 } else {
3729 /* 20 bits for the fractional part */
3730 /* fract is in 12.20 format */
3731 fract = (x & 0x000FFFFF);
3732 /* Read two nearest output values from the index and are in 1.7(q7) format */
3733 y0 = pYData[index];
3734 y1 = pYData[index + 1];
3735 /* Calculation of y0 * (1-fract) and y is in 13.27(q27) format */
3736 y = ((y0 * (0xFFFFF - fract)));
3737 /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
3738 y += (y1 * fract);
3739 /* convert y to 1.7(q7) format */
3740 return (q7_t) (y >> 20);
3741 }
3742 }
3743 /**
3744 * @}
3745 */ // end of LinearInterpolate group
3746
3747 float32_t csky_sin_f32(
3748 float32_t x);
3749
3750 q31_t csky_sin_q31(
3751 q31_t x);
3752
3753 q15_t csky_sin_q15(
3754 q15_t x);
3755
3756 float32_t csky_cos_f32(
3757 float32_t x);
3758
3759 q31_t csky_cos_q31(
3760 q31_t x);
3761
3762 q15_t csky_cos_q15(
3763 q15_t x);
3764
3765 csky_status csky_sqrt_f32(
3766 float32_t in,
3767 float32_t *pOut);
3768
3769 csky_status csky_sqrt_q31(
3770 q31_t in,
3771 q31_t *pOut);
3772
3773 csky_status csky_sqrt_q15(
3774 q15_t in,
3775 q15_t *pOut);
3776
3777 /* double format */
3778 typedef union _myNumber {
3779 q31_t i[2];
3780 float64_t x;
3781 }mynumber;
3782
3783 /* the coefficient for log2 table looh up */
3784 typedef union {
3785 q31_t i[5800];
3786 float64_t x[2900];
3787 }log2_cof1;
3788
3789 typedef union {
3790 q31_t i[4350];
3791 float64_t x[2175];
3792 }log2_cof2;
3793
3794 /* the coefficient for exp table looh up */
3795 typedef union {
3796 q31_t i[1424];
3797 float64_t x[712];
3798 }exp_cof1;
3799
3800 typedef union {
3801 q31_t i[2048];
3802 float64_t x[1024];
3803 }exp_cof2;
3804
3805 union ieee754_double {
3806 float64_t d;
3807
3808 struct {
3809 unsigned int mantissa1 : 32;
3810 unsigned int mantissa0 : 20;
3811 unsigned int exponent : 11;
3812 unsigned int negative : 1;
3813 } ieee;
3814 struct {
3815 unsigned int mantissa1 : 32;
3816 unsigned int mantissa0 : 19;
3817 unsigned int quiet_nan : 1;
3818 unsigned int exponent : 11;
3819 unsigned int negative : 1;
3820 } ieee_nan;
3821 };
3822
3823 typedef struct {
3824 q31_t e;
3825 long d[40];
3826 }mp_no;
3827
3828 float64_t csky_pow_f64(
3829 float64_t x,
3830 float64_t y);
3831
3832 float64_t csky_log_f64(
3833 float64_t x);
3834
3835 float64_t csky_exp_f64(
3836 float64_t x);
3837
3838 float64_t csky_pow2_f64(
3839 float64_t x);
3840
3841 float64_t csky_log2_f64(
3842 float64_t x);
3843
3844 float64_t csky_log10_f64(
3845 float64_t x);
3846
3847 void csky_power_q31(
3848 q31_t *pSrc,
3849 uint32_t blockSize,
3850 q63_t *pResult);
3851
3852 void csky_power_int32(
3853 int32_t *pSrc,
3854 uint32_t blockSize,
3855 q63_t *pResult);
3856
3857 void csky_power_int32(
3858 int32_t *pSrc,
3859 uint32_t blockSize,
3860 q63_t *pResult);
3861
3862 void csky_power_f32(
3863 float32_t *pSrc,
3864 uint32_t blockSize,
3865 float32_t *pResult);
3866
3867 void csky_power_q15(
3868 q15_t *pSrc,
3869 uint32_t blockSize,
3870 q63_t *pResult);
3871
3872 void csky_power_q7(
3873 q7_t *pSrc,
3874 uint32_t blockSize,
3875 q31_t *pResult);
3876
3877 void csky_mean_q7(
3878 q7_t *pSrc,
3879 uint32_t blockSize,
3880 q7_t *pResult);
3881
3882 void csky_mean_q15(
3883 q15_t *pSrc,
3884 uint32_t blockSize,
3885 q15_t *pResult);
3886
3887 void csky_mean_q31(
3888 q31_t *pSrc,
3889 uint32_t blockSize,
3890 q31_t *pResult);
3891
3892 void csky_mean_f32(
3893 float32_t *pSrc,
3894 uint32_t blockSize,
3895 float32_t *pResult);
3896
3897 void csky_var_f32(
3898 float32_t *pSrc,
3899 uint32_t blockSize,
3900 float32_t *pResult);
3901
3902 void csky_var_q31(
3903 q31_t *pSrc,
3904 uint32_t blockSize,
3905 q31_t *pResult);
3906
3907 void csky_var_q15(
3908 q15_t *pSrc,
3909 uint32_t blockSize,
3910 q15_t *pResult);
3911
3912 void csky_rms_f32(
3913 float32_t *pSrc,
3914 uint32_t blockSize,
3915 float32_t *pResult);
3916
3917 void csky_rms_q31(
3918 q31_t *pSrc,
3919 uint32_t blockSize,
3920 q31_t *pResult);
3921
3922 void csky_rms_q15(
3923 q15_t *pSrc,
3924 uint32_t blockSize,
3925 q15_t *pResult);
3926
3927 void csky_std_f32(
3928 float32_t *pSrc,
3929 uint32_t blockSize,
3930 float32_t *pResult);
3931
3932 void csky_std_q31(
3933 q31_t *pSrc,
3934 uint32_t blockSize,
3935 q31_t *pResult);
3936
3937 void csky_std_q15(
3938 q15_t *pSrc,
3939 uint32_t blockSize,
3940 q15_t *pResult);
3941
3942 void csky_cmplx_mag_f32(
3943 float32_t *pSrc,
3944 float32_t *pDst,
3945 uint32_t numSamples);
3946
3947 void csky_cmplx_mag_q31(
3948 q31_t *pSrc,
3949 q31_t *pDst,
3950 uint32_t numSamples);
3951
3952 void csky_cmplx_mag_q15(
3953 q15_t *pSrc,
3954 q15_t *pDst,
3955 uint32_t numSamples);
3956
3957 void csky_cmplx_dot_prod_q15(
3958 q15_t *pSrcA,
3959 q15_t *pSrcB,
3960 uint32_t numSamples,
3961 q31_t *realResult,
3962 q31_t *imagResult);
3963
3964 void csky_cmplx_dot_prod_q31(
3965 q31_t *pSrcA,
3966 q31_t *pSrcB,
3967 uint32_t numSamples,
3968 q63_t *realResult,
3969 q63_t *imagResult);
3970
3971 void csky_cmplx_dot_prod_f32(
3972 float32_t *pSrcA,
3973 float32_t *pSrcB,
3974 uint32_t numSamples,
3975 float32_t *realResult,
3976 float32_t *imagResult);
3977
3978 void csky_cmplx_mult_real_q15(
3979 q15_t *pSrcCmplx,
3980 q15_t *pSrcReal,
3981 q15_t *pCmplxDst,
3982 uint32_t numSamples);
3983
3984 void csky_cmplx_mult_real_q31(
3985 q31_t *pSrcCmplx,
3986 q31_t *pSrcReal,
3987 q31_t *pCmplxDst,
3988 uint32_t numSamples);
3989
3990 void csky_cmplx_mult_real_f32(
3991 float32_t *pSrcCmplx,
3992 float32_t *pSrcReal,
3993 float32_t *pCmplxDst,
3994 uint32_t numSamples);
3995
3996 void csky_min_q7(
3997 q7_t *pSrc,
3998 uint32_t blockSize,
3999 q7_t *result,
4000 uint32_t *index);
4001
4002 void csky_min_q15(
4003 q15_t *pSrc,
4004 uint32_t blockSize,
4005 q15_t *pResult,
4006 uint32_t *pIndex);
4007
4008 void csky_min_q31(
4009 q31_t *pSrc,
4010 uint32_t blockSize,
4011 q31_t *pResult,
4012 uint32_t *pIndex);
4013
4014 void csky_min_f32(
4015 float32_t *pSrc,
4016 uint32_t blockSize,
4017 float32_t *pResult,
4018 uint32_t *pIndex);
4019
4020 void csky_max_q7(
4021 q7_t *pSrc,
4022 uint32_t blockSize,
4023 q7_t *pResult,
4024 uint32_t *pIndex);
4025
4026 void csky_max_q15(
4027 q15_t *pSrc,
4028 uint32_t blockSize,
4029 q15_t *pResult,
4030 uint32_t *pIndex);
4031
4032 void csky_max_q31(
4033 q31_t *pSrc,
4034 uint32_t blockSize,
4035 q31_t *pResult,
4036 uint32_t *pIndex);
4037
4038 void csky_max_f32(
4039 float32_t *pSrc,
4040 uint32_t blockSize,
4041 float32_t *pResult,
4042 uint32_t *pIndex);
4043
4044 void csky_cmplx_mult_cmplx_q15(
4045 q15_t *pSrcA,
4046 q15_t *pSrcB,
4047 q15_t *pDst,
4048 uint32_t numSamples);
4049
4050 void csky_cmplx_mult_cmplx_q31(
4051 q31_t *pSrcA,
4052 q31_t *pSrcB,
4053 q31_t *pDst,
4054 uint32_t numSamples);
4055
4056 void csky_cmplx_mult_cmplx_f32(
4057 float32_t *pSrcA,
4058 float32_t *pSrcB,
4059 float32_t *pDst,
4060 uint32_t numSamples);
4061
4062 void csky_cmplx_mult_cmplx_re_q15(
4063 q15_t *pSrcA,
4064 q15_t *pSrcB,
4065 q15_t *pDst,
4066 uint32_t numSamples);
4067
4068 void csky_cmplx_mult_cmplx_re_q31(
4069 q31_t *pSrcA,
4070 q31_t *pSrcB,
4071 q31_t *pDst,
4072 uint32_t numSamples);
4073
4074 void csky_cmplx_mult_cmplx_re_f32(
4075 float32_t *pSrcA,
4076 float32_t *pSrcB,
4077 float32_t *pDst,
4078 uint32_t numSamples);
4079
4080 void csky_float_to_q31(
4081 float32_t *pSrc,
4082 q31_t *pDst,
4083 uint32_t blockSize);
4084
4085 void csky_float_to_q15(
4086 float32_t *pSrc,
4087 q15_t *pDst,
4088 uint32_t blockSize);
4089
4090 void csky_float_to_q7(
4091 float32_t *pSrc,
4092 q7_t *pDst,
4093 uint32_t blockSize);
4094
4095 void csky_q31_to_q15(
4096 q31_t *pSrc,
4097 q15_t *pDst,
4098 uint32_t blockSize);
4099
4100 void csky_q31_to_q7(
4101 q31_t *pSrc,
4102 q7_t *pDst,
4103 uint32_t blockSize);
4104
4105 void csky_q15_to_float(
4106 q15_t *pSrc,
4107 float32_t *pDst,
4108 uint32_t blockSize);
4109
4110 void csky_q15_to_q31(
4111 q15_t *pSrc,
4112 q31_t *pDst,
4113 uint32_t blockSize);
4114
4115 void csky_q15_to_q7(
4116 q15_t *pSrc,
4117 q7_t *pDst,
4118 uint32_t blockSize);
4119
4120 /**
4121 * @ingroup groupInterpolation
4122 */
4123 /**
4124 * @defgroup BilinearInterpolate Bilinear Interpolation
4125 *
4126 * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
4127 * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
4128 * determines values between the grid points.
4129 * Bilinear interpolation is equivalent to two step linear interpolation,
4130 * first in the x-dimension and then in the y-dimension.
4131 * Bilinear interpolation is often used in image processing to rescale images.
4132 * The CSI DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
4133 *
4134 * <b>Algorithm</b>
4135 * \par
4136 * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
4137 * For floating-point, the instance structure is defined as:
4138 * <pre>
4139 * typedef struct
4140 * {
4141 * uint16_t numRows;
4142 * uint16_t numCols;
4143 * float32_t *pData;
4144 * } csky_bilinear_interp_instance_f32;
4145 * </pre>
4146 *
4147 * \par
4148 * where <code>numRows</code> specifies the number of rows in the table;
4149 * <code>numCols</code> specifies the number of columns in the table;
4150 * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
4151 * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
4152 * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
4153 *
4154 * \par
4155 * Let <code>(x, y)</code> specify the desired interpolation point. Then define:
4156 * <pre>
4157 * XF = floor(x)
4158 * YF = floor(y)
4159 * </pre>
4160 * \par
4161 * The interpolated output point is computed as:
4162 * <pre>
4163 * f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
4164 * + f(XF+1, YF) * (x-XF)*(1-(y-YF))
4165 * + f(XF, YF+1) * (1-(x-XF))*(y-YF)
4166 * + f(XF+1, YF+1) * (x-XF)*(y-YF)
4167 * </pre>
4168 * Note that the coordinates (x, y) contain integer and fractional components.
4169 * The integer components specify which portion of the table to use while the
4170 * fractional components control the interpolation processor.
4171 *
4172 * \par
4173 * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
4174 */
4175 /**
4176 * @addtogroup BilinearInterpolate
4177 * @{
4178 */
4179 /**
4180 *
4181 * @brief Floating-point bilinear interpolation.
4182 * @param[in,out] S points to an instance of the interpolation structure.
4183 * @param[in] X interpolation coordinate.
4184 * @param[in] Y interpolation coordinate.
4185 * @return out interpolated value.
4186 */
csky_bilinear_interp_f32(const csky_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)4187 __STATIC_INLINE float32_t csky_bilinear_interp_f32(
4188 const csky_bilinear_interp_instance_f32 * S,
4189 float32_t X,
4190 float32_t Y)
4191 {
4192 float32_t out;
4193 float32_t f00, f01, f10, f11;
4194 float32_t *pData = S->pData;
4195 int32_t xIndex, yIndex, index;
4196 float32_t xdiff, ydiff;
4197 float32_t b1, b2, b3, b4;
4198 xIndex = (int32_t) X;
4199 yIndex = (int32_t) Y;
4200 /* Care taken for table outside boundary */
4201 /* Returns zero output when values are outside table boundary */
4202 if (xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0 || yIndex > (S->numCols - 1)) {
4203 return (0);
4204 }
4205 /* Calculation of index for two nearest points in X-direction */
4206 index = (xIndex - 1) + (yIndex - 1) * S->numCols;
4207 /* Read two nearest points in X-direction */
4208 f00 = pData[index];
4209 f01 = pData[index + 1];
4210 /* Calculation of index for two nearest points in Y-direction */
4211 index = (xIndex - 1) + (yIndex) * S->numCols;
4212 /* Read two nearest points in Y-direction */
4213 f10 = pData[index];
4214 f11 = pData[index + 1];
4215 /* Calculation of intermediate values */
4216 b1 = f00;
4217 b2 = f01 - f00;
4218 b3 = f10 - f00;
4219 b4 = f00 - f01 - f10 + f11;
4220 /* Calculation of fractional part in X */
4221 xdiff = X - xIndex;
4222 /* Calculation of fractional part in Y */
4223 ydiff = Y - yIndex;
4224 /* Calculation of bi-linear interpolated output */
4225 out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
4226 /* return to application */
4227 return (out);
4228 }
4229 /**
4230 * @}
4231 */ // end of BilinearInterpolate group
4232
4233 /**
4234 * @addtogroup BilinearInterpolate
4235 * @{
4236 */
4237 /**
4238 *
4239 * @brief Q31 bilinear interpolation.
4240 * @param[in,out] S points to an instance of the interpolation structure.
4241 * @param[in] X interpolation coordinate in 12.20 format.
4242 * @param[in] Y interpolation coordinate in 12.20 format.
4243 * @return out interpolated value.
4244 */
csky_bilinear_interp_q31(csky_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)4245 __STATIC_INLINE q31_t csky_bilinear_interp_q31(
4246 csky_bilinear_interp_instance_q31 *S,
4247 q31_t X,
4248 q31_t Y)
4249 {
4250 q31_t out; /* Temporary output */
4251 q31_t acc = 0; /* output */
4252 q31_t xfract, yfract; /* X, Y fractional parts */
4253 q31_t x1, x2, y1, y2; /* Nearest output values */
4254 int32_t rI, cI; /* Row and column indices */
4255 q31_t *pYData = S->pData; /* pointer to output table values */
4256 uint32_t nCols = S->numCols; /* num of rows */
4257 /* Input is in 12.20 format */
4258 /* 12 bits for the table index */
4259 /* Index value calculation */
4260 rI = ((X & (q31_t)0xFFF00000) >> 20);
4261 /* Input is in 12.20 format */
4262 /* 12 bits for the table index */
4263 /* Index value calculation */
4264 cI = ((Y & (q31_t)0xFFF00000) >> 20);
4265 /* Care taken for table outside boundary */
4266 /* Returns zero output when values are outside table boundary */
4267 if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) {
4268 return (0);
4269 }
4270 /* 20 bits for the fractional part */
4271 /* shift left xfract by 11 to keep 1.31 format */
4272 xfract = (X & 0x000FFFFF) << 11u;
4273 /* Read two nearest output values from the index */
4274 x1 = pYData[(rI) + (int32_t)nCols * (cI) ];
4275 x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1];
4276 /* 20 bits for the fractional part */
4277 /* shift left yfract by 11 to keep 1.31 format */
4278 yfract = (Y & 0x000FFFFF) << 11u;
4279 /* Read two nearest output values from the index */
4280 y1 = pYData[(rI) + (int32_t)nCols * (cI + 1) ];
4281 y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1];
4282 #ifdef CSKY_SIMD
4283 /* Calculation of x1 * (1-xfract) * (1-yfract) and acc is in 3.29(q29) format */
4284 out = mult_32x32_keep32(x1, (0x7FFFFFFF - xfract));
4285 acc = mult_32x32_keep32(out, (0x7FFFFFFF - yfract));
4286 /* x2 * (xfract) * (1-yfract) in 3.29(q29) and adding to acc */
4287 out = mult_32x32_keep32(x2, (0x7FFFFFFF - yfract));
4288 acc = multAcc_32x32_keep32(acc, out, xfract);
4289 /* y1 * (1 - xfract) * (yfract) in 3.29(q29) and adding to acc */
4290 out = mult_32x32_keep32(y1, (0x7FFFFFFF - xfract));
4291 acc = multAcc_32x32_keep32(acc, out, yfract);
4292 /* y2 * (xfract) * (yfract) in 3.29(q29) and adding to acc */
4293 out = mult_32x32_keep32(y2, xfract);
4294 acc = multAcc_32x32_keep32(acc, out, yfract);
4295 #else
4296 /* Calculation of x1 * (1-xfract) * (1-yfract) and acc is in 3.29(q29) format */
4297 out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32)); // 32:byte alignment
4298 acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32)); // 32:byte alignment
4299 /* x2 * (xfract) * (1-yfract) in 3.29(q29) and adding to acc */
4300 out = ((q31_t) (((q63_t) x2 * (0x7FFFFFFF - yfract)) >> 32)); // 32:byte alignment
4301 acc += ((q31_t) (((q63_t) out * (xfract)) >> 32)); // 32:byte alignment
4302 /* y1 * (1 - xfract) * (yfract) in 3.29(q29) and adding to acc */
4303 out = ((q31_t) (((q63_t) y1 * (0x7FFFFFFF - xfract)) >> 32)); // 32:byte alignment
4304 acc += ((q31_t) (((q63_t) out * (yfract)) >> 32)); // 32:byte alignment
4305 /* y2 * (xfract) * (yfract) in 3.29(q29) and adding to acc */
4306 out = ((q31_t) (((q63_t) y2 * (xfract)) >> 32)); // 32:byte alignment
4307 acc += ((q31_t) (((q63_t) out * (yfract)) >> 32)); // 32:byte alignment
4308 #endif
4309 /* Convert acc to 1.31(q31) format */
4310 return ((q31_t)(acc << 2)); // 2:byte alignment
4311 }
4312 /**
4313 * @}
4314 */ // end of BilinearInterpolate group
4315
4316 /**
4317 * @addtogroup BilinearInterpolate
4318 * @{
4319 */
4320 /**
4321 * @brief Q15 bilinear interpolation.
4322 * @param[in,out] S points to an instance of the interpolation structure.
4323 * @param[in] X interpolation coordinate in 12.20 format.
4324 * @param[in] Y interpolation coordinate in 12.20 format.
4325 * @return out interpolated value.
4326 */
csky_bilinear_interp_q15(csky_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)4327 __STATIC_INLINE q15_t csky_bilinear_interp_q15(
4328 csky_bilinear_interp_instance_q15 * S,
4329 q31_t X,
4330 q31_t Y)
4331 {
4332 q63_t acc = 0; /* output */
4333 q31_t out; /* Temporary output */
4334 q15_t x1, x2, y1, y2; /* Nearest output values */
4335 q31_t xfract, yfract; /* X, Y fractional parts */
4336 int32_t rI, cI; /* Row and column indices */
4337 q15_t *pYData = S->pData; /* pointer to output table values */
4338 uint32_t nCols = S->numCols; /* num of rows */
4339 /* Input is in 12.20 format */
4340 /* 12 bits for the table index */
4341 /* Index value calculation */
4342 rI = ((X & (q31_t)0xFFF00000) >> 20);
4343 /* Input is in 12.20 format */
4344 /* 12 bits for the table index */
4345 /* Index value calculation */
4346 cI = ((Y & (q31_t)0xFFF00000) >> 20);
4347 /* Care taken for table outside boundary */
4348 /* Returns zero output when values are outside table boundary */
4349 if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) {
4350 return (0);
4351 }
4352 /* 20 bits for the fractional part */
4353 /* xfract should be in 12.20 format */
4354 xfract = (X & 0x000FFFFF);
4355 /* Read two nearest output values from the index */
4356 x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ];
4357 x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
4358 /* 20 bits for the fractional part */
4359 /* yfract should be in 12.20 format */
4360 yfract = (Y & 0x000FFFFF);
4361 /* Read two nearest output values from the index */
4362 y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ];
4363 y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
4364 /* Calculation of x1 * (1-xfract) * (1-yfract) and acc is in 13.51 format */
4365 /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
4366 /* convert 13.35 to 13.31 by right shifting and out is in 1.31 */
4367 #ifdef CSKY_SIMD
4368 out = mult_32x32_dext_4(x1, (0xFFFFF - xfract));
4369 acc = mult_32x32_keep64(out, (0xFFFFF - yfract));
4370 /* x2 * (xfract) * (1-yfract) in 1.51 and adding to acc */
4371 out = mult_32x32_dext_4(x2, (0xFFFFF - yfract));
4372 acc = multAcc_32x32_keep64(acc, out, (xfract));
4373 /* y1 * (1 - xfract) * (yfract) in 1.51 and adding to acc */
4374 out = mult_32x32_dext_4(y1, (0xFFFFF - xfract));
4375 acc = multAcc_32x32_keep64(acc, out, (yfract));
4376 /* y2 * (xfract) * (yfract) in 1.51 and adding to acc */
4377 out = mult_32x32_dext_4(y2, (xfract));
4378 acc = multAcc_32x32_keep64(acc, out, (yfract));
4379 #else
4380 out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
4381 acc = ((q63_t) out * (0xFFFFF - yfract));
4382 /* x2 * (xfract) * (1-yfract) in 1.51 and adding to acc */
4383 out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
4384 acc += ((q63_t) out * (xfract));
4385 /* y1 * (1 - xfract) * (yfract) in 1.51 and adding to acc */
4386 out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
4387 acc += ((q63_t) out * (yfract));
4388 /* y2 * (xfract) * (yfract) in 1.51 and adding to acc */
4389 out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
4390 acc += ((q63_t) out * (yfract));
4391 #endif
4392 /* acc is in 13.51 format and down shift acc by 36 times */
4393 /* Convert out to 1.15 format */
4394 return ((q15_t)(acc >> 36));
4395 }
4396 /**
4397 * @}
4398 */ // end of BilinearInterpolate group
4399
4400 void test(q7_t *pSrc, q7_t *pDst);
4401
4402 /**
4403 * @addtogroup BilinearInterpolate
4404 * @{
4405 */
4406 /**
4407 * @brief Q7 bilinear interpolation.
4408 * @param[in,out] S points to an instance of the interpolation structure.
4409 * @param[in] X interpolation coordinate in 12.20 format.
4410 * @param[in] Y interpolation coordinate in 12.20 format.
4411 * @return out interpolated value.
4412 */
csky_bilinear_interp_q7(csky_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)4413 __STATIC_INLINE q7_t csky_bilinear_interp_q7(
4414 csky_bilinear_interp_instance_q7 * S,
4415 q31_t X,
4416 q31_t Y)
4417 {
4418 q63_t acc = 0; /* output */
4419 q31_t out; /* Temporary output */
4420 q31_t xfract, yfract; /* X, Y fractional parts */
4421 q7_t x1, x2, y1, y2; /* Nearest output values */
4422 int32_t rI, cI; /* Row and column indices */
4423 q7_t *pYData = S->pData; /* pointer to output table values */
4424 uint32_t nCols = S->numCols; /* num of rows */
4425 /* Input is in 12.20 format */
4426 /* 12 bits for the table index */
4427 /* Index value calculation */
4428 rI = ((X & (q31_t)0xFFF00000) >> 20);
4429 /* Input is in 12.20 format */
4430 /* 12 bits for the table index */
4431 /* Index value calculation */
4432 cI = ((Y & (q31_t)0xFFF00000) >> 20);
4433 /* Care taken for table outside boundary */
4434 /* Returns zero output when values are outside table boundary */
4435 if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) {
4436 return (0);
4437 }
4438 /* 20 bits for the fractional part */
4439 /* xfract should be in 12.20 format */
4440 xfract = (X & (q31_t)0x000FFFFF);
4441 /* Read two nearest output values from the index */
4442 x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ];
4443 x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
4444 /* 20 bits for the fractional part */
4445 /* yfract should be in 12.20 format */
4446 yfract = (Y & (q31_t)0x000FFFFF);
4447 /* Read two nearest output values from the index */
4448 y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ];
4449 y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
4450 /* Calculation of x1 * (1-xfract) * (1-yfract) and acc is in 16.47 format */
4451 out = ((x1 * (0xFFFFF - xfract)));
4452 #ifdef CSKY_SIMD
4453 acc = multAcc_32x32_keep64(acc, out, (0xFFFFF - yfract));
4454 /* x2 * (xfract) * (1-yfract) in 2.22 and adding to acc */
4455 out = ((x2 * (0xFFFFF - yfract)));
4456 acc = multAcc_32x32_keep64(acc, out, xfract);
4457 /* y1 * (1 - xfract) * (yfract) in 2.22 and adding to acc */
4458 out = ((y1 * (0xFFFFF - xfract)));
4459 acc = multAcc_32x32_keep64(acc, out, yfract);
4460 /* y2 * (xfract) * (yfract) in 2.22 and adding to acc */
4461 out = ((y2 * (yfract)));
4462 acc = multAcc_32x32_keep64(acc, out, xfract);
4463 #else
4464 acc = (((q63_t) out * (0xFFFFF - yfract)));
4465 /* x2 * (xfract) * (1-yfract) in 2.22 and adding to acc */
4466 out = ((x2 * (0xFFFFF - yfract)));
4467 acc += (((q63_t) out * (xfract)));
4468 /* y1 * (1 - xfract) * (yfract) in 2.22 and adding to acc */
4469 out = ((y1 * (0xFFFFF - xfract)));
4470 acc += (((q63_t) out * (yfract)));
4471 /* y2 * (xfract) * (yfract) in 2.22 and adding to acc */
4472 out = ((y2 * (yfract)));
4473 acc += (((q63_t) out * (xfract)));
4474 #endif
4475 /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
4476 return ((q7_t)(acc >> 40));
4477 }
4478 /**
4479 * @}
4480 */ // end of BilinearInterpolate group
4481
4482 /**
4483 * @ingroup groupMath
4484 */
4485
4486 /**
4487 * @defgroup ShiftRight Right Shift
4488 *
4489 * Shift the input value to right with appointed bits, its basic format is:
4490 * <pre>
4491 * a = (a) >> (shift), 1 =< shift <= bitof(a) - 1.
4492 * </pre>
4493 * The basic format is only designed for q31.
4494 *
4495 * and the extended format should be rounding to +inf:
4496 * <pre>
4497 * a = (a + (1<<(shift - 1)) >> (shift), 1 =< shift <= bitof(a) - 1.
4498 * </pre>
4499 *
4500 * which are designed for q31, q31 positive and q63.
4501 */
4502
4503 /**
4504 * @addtogroup ShiftRight
4505 * @{
4506 */
4507 /**
4508 * @brief right shift Q31 version
4509 * @param[in] a input value to be shift.
4510 * @param[in] shift input positive value, the number of bits to be shift.
4511 * @param[out] result the shifted a.
4512 *
4513 * <b>Scaling and Overflow Behavior:</b>
4514 * \par
4515 * The function is only used for right shift. So, the value of shift is
4516 * between[1,31].
4517 */
csky_shr_q31(q31_t a,q31_t shift)4518 __STATIC_INLINE q31_t csky_shr_q31(
4519 q31_t a,
4520 q31_t shift)
4521 {
4522 q31_t res;
4523 #ifdef CSKY_SIMD
4524 __ASM volatile(
4525 "asr %0, %1, %2\n\t"
4526 :"=r"(res), "=r"(a), "=r"(shift):"0"(res), "1"(a), "2"(shift));
4527 #else
4528 res = ((a) >> (shift));
4529 #endif
4530 return res;
4531 }
4532
4533 #define SHR(a, shift) csky_shr_q31(a, shift)
4534
4535 /**
4536 * @}
4537 */ // end of ShiftRight group
4538
4539 /**
4540 * @addtogroup ShiftRight
4541 * @{
4542 */
4543 /**
4544 * @brief right shift Q31 version
4545 * @param[in] a input value to be shift.
4546 * @param[in] shift input positive value, the number of bits to be shift.
4547 * @param[out] result the shifted a.
4548 *
4549 * <b>Scaling and Overflow Behavior:</b>
4550 * \par
4551 * The function is only used for right shift. So, the value of shift is
4552 * between[1,31]. And the output value is rounding to +inf.
4553 */
csky_pshr_q31(q31_t a,q31_t shift)4554 __STATIC_INLINE q31_t csky_pshr_q31(
4555 q31_t a,
4556 q31_t shift)
4557 {
4558 q31_t res;
4559 #ifdef CSKY_SIMD
4560 __ASM volatile(
4561 "asr.s32.r %0, %1, %2\n\t"
4562 :"=r"(res), "=r"(a), "=r"(shift):"0"(res), "1"(a), "2"(shift));
4563 #else
4564 res = (a >= 0?(SHR((a) + (1<<(shift - 1)), shift))\
4565 :(SHR((a) + ((1<<shift)>>1) -1, shift)));
4566 #endif
4567 return res;
4568 }
4569
4570 /**
4571 * @}
4572 */ // end of ShiftRight group
4573
4574 /**
4575 * @addtogroup ShiftRight
4576 * @{
4577 */
4578 /**
4579 * @brief right shift Q31 version
4580 * @param[in] a input positive value to be shift.
4581 * @param[in] shift input positive value, the number of bits to be shift.
4582 * @param[out] result the shifted a.
4583 *
4584 * <b>Scaling and Overflow Behavior:</b>
4585 * \par
4586 * The function is only used for right shift. So, the value of shift is
4587 * between[1,31]. And the output value is rounding to +inf.
4588 */
csky_pshr_pos_q31(q31_t a,q31_t shift)4589 __STATIC_INLINE q31_t csky_pshr_pos_q31(
4590 q31_t a,
4591 q31_t shift)
4592 {
4593 q31_t res;
4594 #ifdef CSKY_SIMD
4595 __ASM volatile(
4596 "asr.s32.r %0, %1, %2\n\t"
4597 :"=r"(res), "=r"(a), "=r"(shift):"0"(res), "1"(a), "2"(shift));
4598 #else
4599 res = SHR((a) + (1<<(shift - 1)), shift);
4600 #endif
4601 return res;
4602 }
4603
4604 /**
4605 * @}
4606 */ // end of ShiftRight group
4607
4608 /**
4609 * @addtogroup ShiftRight
4610 * @{
4611 */
4612 /**
4613 * @brief right shift Q63 version
4614 * @param[in] a input value to be shift.
4615 * @param[in] shift input positive value, the number of bits to be shift.
4616 * @param[out] result the shifted a.
4617 *
4618 * <b>Scaling and Overflow Behavior:</b>
4619 * \par
4620 * The function is only used for right shift. So, the value of shift is
4621 * between[1,63]. And the output value is rounding to +inf.
4622 */
csky_pshr_q63(q63_t a,q31_t shift)4623 __STATIC_INLINE q63_t csky_pshr_q63(
4624 q63_t a,
4625 q31_t shift)
4626 {
4627 q63_t res;
4628 #ifdef CSKY_SIMD
4629 __ASM volatile(
4630 "subi t0, %2, 1\n\t"
4631 "cmphsi t0, 32\n\t"
4632 "bt 1f\n\t"
4633 "movi t1, 1\n\t"
4634 "lsl t0, t1, t0\n\t"
4635 "movi t1, 0\n\t"
4636 "add.s64.s %1, %1, t0\n\t"
4637 "dext %0, %1, %R1, %2\n\t"
4638 "asr %R0, %R1, %2\n\t"
4639 "br 2f\n\t"
4640 "1:\n\t"
4641 "subi %2, %2, 32\n\t"
4642 "subi t0, t0, 32\n\t"
4643 "movi t1, 1\n\t"
4644 "lsl t1, t1, t0\n\t"
4645 "add.s32.s %R1, %R1, t1\n\t"
4646 "asr %0, %R1, %2\n\t"
4647 "asri %R0, %R1, 31\n\t"
4648 "2:\n\t"
4649 :"=r"(res), "=r"(a), "=r"(shift):"0"(res), "1"(a), "2"(shift):"t0", "t1");
4650 #else
4651 res = (a >= 0?(SHR((a) + ((q63_t)1<<(shift - 1)), shift))\
4652 :(SHR((a) + (((q63_t)1<<shift)>>1) -1, shift)));
4653 #endif
4654 return res;
4655 }
4656
4657 /**
4658 * @}
4659 */ // end of ShiftRight group
4660
4661 #define PSHR(a, shift) csky_pshr_q31(a, shift)
4662 #define PSHR_POSITIVE(a, shift) csky_pshr_pos_q31(a, shift)
4663 #define PSHR64(a, shift) csky_pshr_q63(a, shift)
4664
4665 #ifdef CSKY_SIMD
4666 #else
4667 /* SMMLAR */
4668 #define multAcc_32x32_keep32_R(a, x, y) \
4669 (a) = (q31_t) (((((q63_t) (a)) << 32) + ((q63_t) (x) * (y)) + 0x80000000LL) >> 32)
4670
4671 /* SMMLSR */
4672 #define multSub_32x32_keep32_R(a, x, y) \
4673 (a) = (q31_t) (((((q63_t) (a)) << 32) - ((q63_t) (x) * (y)) + 0x80000000LL) >> 32)
4674
4675 /* SMMULR */
4676 #define mult_32x32_keep32_R(a, x, y) \
4677 (a) = (q31_t) (((q63_t) (x) * (y) + 0x80000000LL) >> 32)
4678
4679 /* SMMLA */
4680 #define multAcc_32x32_keep32(a, x, y) \
4681 (a) += (q31_t) (((q63_t) (x) * (y)) >> 32)
4682
4683 /* SMMLS */
4684 #define multSub_32x32_keep32(a, x, y) \
4685 (a) -= (q31_t) (((q63_t) (x) * (y)) >> 32)
4686
4687 /* SMMUL */
4688 #define mult_32x32_keep32(a, x, y) \
4689 (a) = (q31_t) (((q63_t) (x) * (y)) >> 32)
4690 #endif
4691
4692 #ifdef __cplusplus
4693 }
4694 #endif
4695
4696 #endif /* _CSKY_MATH_H */
4697
4698 /**
4699 *
4700 * End of file.
4701 */