• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  * @file     csky_math.h
3  * @brief    Public header file for CSI DSP Library.
4  * @version  V1.0
5  * @date     20. Dec 2016
6  ******************************************************************************/
7 /* ---------------------------------------------------------------------------
8  * Copyright (C) 2016 CSKY Limited. All rights reserved.
9  *
10  * Redistribution and use of this software in source and binary forms,
11  * with or without modification, are permitted provided that the following
12  * conditions are met:
13  *   * Redistributions of source code must retain the above copyright notice,
14  *     this list of conditions and the following disclaimer.
15  *   * Redistributions in binary form must reproduce the above copyright notice,
16  *     this list of conditions and the following disclaimer in the documentation
17  *     and/or other materials provided with the distribution.
18  *   * Neither the name of CSKY Ltd. nor the names of CSKY's contributors may
19  *     be used to endorse or promote products derived from this software without
20  *     specific prior written permission of CSKY Ltd.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
27  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  * -------------------------------------------------------------------------- */
34 
35 /**
36  * @defgroup groupMath Basic Math Functions
37  */
38 
39 /**
40  * @defgroup groupFastMath Fast Math Functions
41  * This set of functions provides a fast approximation to sine, cosine, and square root.
42  * As compared to most of the other functions in the CSI math library, the fast math functions
43  * operate on individual values and not arrays.
44  * There are separate functions for Q15, Q31, and floating-point data.
45  *
46  */
47 
48 /**
49  * @defgroup groupCmplxMath Complex Math Functions
50  * This set of functions operates on complex data vectors.
51  * The data in the complex arrays is stored in an interleaved fashion
52  * (real, imag, real, imag, ...).
53  * In the API functions, the number of samples in a complex array refers
54  * to the number of complex values; the array contains twice this number of
55  * real values.
56  */
57 
58 /**
59  * @defgroup groupFilters Filtering Functions
60  */
61 
62 /**
63  * @defgroup groupMatrix Matrix Functions
64  *
65  * This set of functions provides basic matrix math operations.
66  * The functions operate on matrix data structures.  For example,
67  * the type
68  * definition for the floating-point matrix structure is shown
69  * below:
70  * <pre>
71  *     typedef struct
72  *     {
73  *       uint16_t numRows;     // number of rows of the matrix.
74  *       uint16_t numCols;     // number of columns of the matrix.
75  *       float32_t *pData;     // points to the data of the matrix.
76  *     } csky_matrix_instance_f32;
77  * </pre>
78  * There are similar definitions for Q15 and Q31 data types.
79  *
80  * The structure specifies the size of the matrix and then points to
81  * an array of data.  The array is of size <code>numRows X numCols</code>
82  * and the values are arranged in row order.  That is, the
83  * matrix element (i, j) is stored at:
84  * <pre>
85  *     pData[i*numCols + j]
86  * </pre>
87  *
88  * \par Init Functions
89  * There is an associated initialization function for each type of matrix
90  * data structure.
91  * The initialization function sets the values of the internal structure fields.
92  * Refer to the function <code>csky_mat_init_f32()</code>, <code>csky_mat_init_q31()</code>
93  * and <code>csky_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
94  *
95  * \par
96  * Use of the initialization function is optional. However, if initialization function is used
97  * then the instance structure cannot be placed into a const data section.
98  * To place the instance structure in a const data
99  * section, manually initialize the data structure.  For example:
100  * <pre>
101  * <code>csky_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
102  * <code>csky_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
103  * <code>csky_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
104  * </pre>
105  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
106  * specifies the number of columns, and <code>pData</code> points to the
107  * data array.
108  *
109  * \par Size Checking
110  * By default all of the matrix functions perform size checking on the input and
111  * output matrices.  For example, the matrix addition function verifies that the
112  * two input matrices and the output matrix all have the same number of rows and
113  * columns.  If the size check fails the functions return:
114  * <pre>
115  *     CSKY_MATH_SIZE_MISMATCH
116  * </pre>
117  * Otherwise the functions return
118  * <pre>
119  *     CSKY_MATH_SUCCESS
120  * </pre>
121  * There is some overhead associated with this matrix size checking.
122  * The matrix size checking is enabled via the \#define
123  * <pre>
124  *     CSKY_MATH_MATRIX_CHECK
125  * </pre>
126  * within the library project settings.  By default this macro is defined
127  * and size checking is enabled.  By changing the project settings and
128  * undefining this macro size checking is eliminated and the functions
129  * run a bit faster.  With size checking disabled the functions always
130  * return <code>CSKY_MATH_SUCCESS</code>.
131  */
132 
133 /**
134  * @defgroup groupTransforms Transform Functions
135  */
136 
137 /**
138  * @defgroup groupController Controller Functions
139  */
140 
141 /**
142  * @defgroup groupStats Statistics Functions
143  */
144 /**
145  * @defgroup groupSupport Support Functions
146  */
147 
148 /**
149  * @defgroup groupInterpolation Interpolation Functions
150  * These functions perform 1- and 2-dimensional interpolation of data.
151  * Linear interpolation is used for 1-dimensional data and
152  * bilinear interpolation is used for 2-dimensional data.
153  */
154 
155 /**
156  * @defgroup groupYunvoice Yunvoice Functions
157  * These functions are designed for Yunvoice project, which are modified
158  * according to the CEVA DSP functions. So, one can porting the software
159  * from CEVA to CSKY straightforwardly.
160  */
161 
162 /**
163  * @defgroup groupExamples Examples
164  */
165 
166 #ifndef _CSKY_MATH_H
167 #define _CSKY_MATH_H
168 
169 #define __CSI_GENERIC         /* disable NVIC and Systick functions */
170 
171 #include "csi_core.h"
172 
173 #include <float.h>
174 #undef  __CSI_GENERIC         /* enable NVIC and Systick functions */
175 #include "string.h"
176 #include "math.h"
177 #ifdef   __cplusplus
178 extern "C"
179 {
180 #endif
181 
182   /**
183    * @brief Macros required for reciprocal calculation in Normalized LMS
184    */
185 
186 #define DELTA_Q31          (0x100)
187 #define DELTA_Q15          0x5
188 #define INDEX_MASK         0x0000003F
189 #ifndef PI
190 #define PI                 3.14159265358979f
191 #endif
192 
193   /**
194    * @brief Macros required for SINE and COSINE Fast math approximations
195    */
196 
197 #define FAST_MATH_TABLE_SIZE  512
198 #define FAST_MATH_Q31_SHIFT   (32 - 10)
199 #define FAST_MATH_Q15_SHIFT   (16 - 10)
200 #define CONTROLLER_Q31_SHIFT  (32 - 9)
201 #define TABLE_SIZE  256
202 #define TABLE_SPACING_Q31     0x400000
203 #define TABLE_SPACING_Q15     0x80
204 
205   /**
206    * @brief Macros required for SINE and COSINE Controller functions
207    */
208   /* 1.31(q31) Fixed value of 2/360 */
209   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
210 #define INPUT_SPACING         0xB60B61
211 
212   /**
213    * @brief Macro for Unaligned Support
214    */
215 #ifndef UNALIGNED_SUPPORT_DISABLE
216     #define ALIGN4
217 #else
218     #define ALIGN4 __attribute__((aligned(4)))
219 #endif   /* #ifndef UNALIGNED_SUPPORT_DISABLE */
220 
221   /**
222    * @brief Macro for log , pow and related fast functions.
223    */
224 #define ABS(x) (((x) > 0)   ? (x) : (-(x)))
225 #define max(x) (((y) > (x)) ? (y) : (x))
226 #define min(x) (((y) < (x)) ? (y) : (x))
227 #define CN                     124217729.0
228 #define HIGH_HALF              1
229 #define LOW_HALF               0
230 
231 /* Exact addition of two single-length floating point numbers.         */
232 /* The macro produces a double-length number (z,zz) that satisfies     */
233 /* z+zz = x+y exactly.                                                 */
234 
235 #define  EADD(x, y, z, zz) do {                           \
236         (z)=(x)+(y); \
237         (zz)=(ABS(x)>ABS(y)) ? (((x)-(z))+(y)) : (((y)-(z))+(x)); \
238 } while (0)
239 
240 /* Exact multiplication of two single-length floating point numbers,   */
241 /* The macro produces a double-length number (z,zz) that                */
242 /* satisfies z+zz = x*y exactly. p,hx,tx,hy,ty are temporary           */
243 /* storage variables of type double.                                   */
244 
245 # define  EMULV(x, y, z, zz, p, hx, tx, hy, ty) do {           \
246         (p)=CN*(x);  (hx)=((x)-(p))+(p);  (tx)=(x)-(hx);       \
247         (p)=CN*(y);  (hy)=((y)-(p))+(p);  (ty)=(y)-(hy);       \
248         (z)=(x)*(y); \
249         (zz)=((((hx)*(hy)-(z))+(hx)*(ty))+(tx)*(hy))+(tx)*(ty); \
250 } while (0)
251 /* Exact multiplication of two single-length floating point numbers.         */
252 /* The macro produces a nearly double-length number (z,zz) (see Dekker)      */
253 /* that satisfies z+zz = x*y exactly. p,hx,tx,hy,ty,q are temporary          */
254 /* storage variables of type double.                                         */
255 
256 # define  MUL12(x, y, z, zz, p, hx, tx, hy, ty, q) do {         \
257         (p)=CN*(x);  (hx)=((x)-(p))+(p);  (tx)=(x)-(hx);       \
258         (p)=CN*(y);  (hy)=((y)-(p))+(p);  (ty)=(y)-(hy);       \
259         (p)=(hx)*(hy); \
260         (q)=(hx)*(ty)+(tx)*(hy); (z)=(p)+(q);  \
261         (zz)=(((p)-(z))+(q))+(tx)*(ty); \
262 } while (0)
263 
264 /* Double-length addition, Dekker. The macro produces a double-length   */
265 /* number (z,zz) which satisfies approximately   z+zz = x+xx + y+yy.    */
266 /* An error bound: (abs(x+xx)+abs(y+yy))*4.94e-32. (x,xx), (y,yy)       */
267 /* are assumed to be double-length numbers. r,s are temporary           */
268 /* storage variables of type double.                                    */
269 
270 #define  ADD2(x, xx, y, yy, z, zz, r, s) do {                 \
271         (r)=(x)+(y);  (s)=(ABS(x)>ABS(y)) ?            \
272                 (((((x)-(r))+(y))+(yy))+(xx)) :      \
273                 (((((y)-(r))+(x))+(xx))+(yy));       \
274         (z)=(r)+(s); \
275         (zz)=((r)-(z))+(s); \
276 } while (0)
277 
278 /* Double-length subtraction, Dekker. The macro produces a double-length  */
279 /* number (z,zz) which satisfies approximately   z+zz = x+xx - (y+yy).    */
280 /* An error bound: (abs(x+xx)+abs(y+yy))*4.94e-32. (x,xx), (y,yy)         */
281 /* are assumed to be double-length numbers. r,s are temporary             */
282 /* storage variables of type double.                                      */
283 
284 #define  SUB2(x, xx, y, yy, z, zz, r, s) do {      \
285         (r)=(x)-(y);  (s)=(ABS(x)>ABS(y)) ?            \
286                 (((((x)-(r))-(y))-(yy))+(xx)) :      \
287                 ((((x)-((y)+(r)))+(xx))-(yy));       \
288         (z)=(r)+(s); \
289         (zz)=((r)-(z))+(s); \
290 } while (0)
291 
292 /* Double-length multiplication, Dekker. The macro produces a double-length  */
293 /* number (z,zz) which satisfies approximately   z+zz = (x+xx)*(y+yy).       */
294 /* An error bound: abs((x+xx)*(y+yy))*1.24e-31. (x,xx), (y,yy)               */
295 /* are assumed to be double-length numbers. p,hx,tx,hy,ty,q,c,cc are         */
296 /* temporary storage variables of type double.                               */
297 
298 #define  MUL2(x, xx, y, yy, z, zz, p, hx, tx, hy, ty, q, c, cc) do { \
299         MUL12((x), (y), (c), (cc), (p), (hx), (tx), (hy), (ty), (q))            \
300         (cc)=((x)*(yy)+(xx)*(y))+(cc); \
301         (z)=(c)+(cc); \
302         (zz)=((c)-(z))+(cc); \
303 } while (0)
304 
__SSAT_31(int32_t x)305 __STATIC_INLINE int32_t __SSAT_31(int32_t x)
306 {
307     int32_t res = x;
308     if (x > 0x3fffffff) {
309         res = 0x3fffffff;
310     } else if (x < -1073741824) {
311         res = -1073741824;
312     }
313 
314     return res;
315 }
316 
__SSAT_16(int32_t x)317 __STATIC_INLINE int32_t __SSAT_16(int32_t x)
318 {
319     int32_t res = x;
320     if (x > 0x7fff) {
321         res = 0x7fff;
322     } else if (x < -32768) {
323         res = -32768;
324     }
325 
326     return res;
327 }
328 
__SSAT_8(int32_t x)329 __STATIC_INLINE int32_t __SSAT_8(int32_t x)
330 {
331     int32_t res = x;
332     if (x > 0x7f) {
333         res = 0x7f;
334     } else if (x < -128) {
335         res = -128;
336     }
337 
338     return res;
339 }
340 
341 #ifdef CSKY_SIMD
342 /* SMMLAR */
multAcc_32x32_keep32_R(int32_t a,int32_t x,int32_t y)343 __STATIC_INLINE int32_t multAcc_32x32_keep32_R(int32_t a, int32_t x, int32_t y)
344 {
345     __ASM volatile("mula.s32.rhs %0, %1, %2\n\t"
346                    :"=r" (a), "=r" (x), "=r" (y) : "0" (a), "1" (x), "2" (y));
347     return a;
348 }
349 
350 /* SMMLSR */
multSub_32x32_keep32_R(int32_t a,int32_t x,int32_t y)351 __STATIC_INLINE int32_t multSub_32x32_keep32_R(int32_t a, int32_t x, int32_t y)
352 {
353     __ASM volatile("muls.s32.rhs %0, %1, %2\n\t"
354                    :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
355     return a;
356 }
357 
358 /* SMMULR */
mult_32x32_keep32_R(int32_t x,int32_t y)359 __STATIC_INLINE int32_t mult_32x32_keep32_R(int32_t x, int32_t y)
360 {
361     int32_t a;
362     __ASM volatile("mul.s32.rh %0, %1, %2\n\t"
363                    :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
364     return a;
365 }
366 
367 /* SMMLA */
multAcc_32x32_keep32(int32_t a,int32_t x,int32_t y)368 __STATIC_INLINE int32_t multAcc_32x32_keep32(int32_t a, int32_t x, int32_t y)
369 {
370     __ASM volatile("mula.s32.hs %0, %1, %2\n\t"
371                    :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
372     return a;
373 }
374 
375 /* SMMLS */
multSub_32x32_keep32(int32_t a,int32_t x,int32_t y)376 __STATIC_INLINE int32_t multSub_32x32_keep32(int32_t a, int32_t x, int32_t y)
377 {
378     __ASM volatile("muls.s32.hs %0, %1, %2\n\t"
379                    :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
380     return a;
381 }
382 
383 /* SMMUL */
mult_32x32_keep32(int32_t x,int32_t y)384 __STATIC_INLINE int32_t mult_32x32_keep32(int32_t x, int32_t y)
385 {
386     int32_t a;
387     __ASM volatile("mul.s32.h %0, %1, %2\n\t"
388                    :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
389     return a;
390 }
391 
multAcc_16x16_keep32(int32_t a,int16_t x,int16_t y)392 __STATIC_INLINE int32_t multAcc_16x16_keep32(int32_t a, int16_t x, int16_t y)
393 {
394     __ASM volatile("mulall.s16 %0, %1, %2\n\t"
395                    :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
396     return a;
397 }
398 
multAcc_16x16_keep64(int64_t a,int16_t x,int16_t y)399 __STATIC_INLINE int64_t multAcc_16x16_keep64(int64_t a, int16_t x, int16_t y)
400 {
401     __ASM volatile("mulall.s16.e %0, %1, %2\n\t"
402                    :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
403     return a;
404 }
405 
mult_32x32_keep64(int32_t x,int32_t y)406 __STATIC_INLINE int64_t mult_32x32_keep64(int32_t x, int32_t y)
407 {
408     int64_t a;
409     __ASM volatile("mul.s32 %0, %1, %2\n\t"
410                    :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
411     return a;
412 }
413 
multAcc_32x32_keep64(int64_t a,int32_t x,int32_t y)414 __STATIC_INLINE int64_t multAcc_32x32_keep64(int64_t a, int32_t x, int32_t y)
415 {
416     __ASM volatile("mula.s32 %0, %1, %2\n\t"
417                    :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y));
418     return a;
419 }
420 
mult_32x32_dext_31(int32_t x,int32_t y)421 __STATIC_INLINE int32_t mult_32x32_dext_31(int32_t x, int32_t y)
422 {
423     int64_t tmp1;
424     int32_t tmp2;
425     __ASM volatile("mul.s32 %0, %1, %2\n\t"
426                    "dexti %3, %0, %R0, 31"
427                    :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y));
428     return tmp2;
429 }
430 
mult_32x32_dext_30(int32_t x,int32_t y)431 __STATIC_INLINE int32_t mult_32x32_dext_30(int32_t x, int32_t y)
432 {
433     int64_t tmp1;
434     int32_t tmp2;
435     __ASM volatile("mul.s32 %0, %1, %2\n\t"
436                    "dexti %3, %0, %R0, 30"
437                    :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y));
438     return tmp2;
439 }
440 
mult_32x32_dext_4(int32_t x,int32_t y)441 __STATIC_INLINE int32_t mult_32x32_dext_4(int32_t x, int32_t y)
442 {
443     int64_t tmp1;
444     int32_t tmp2;
445     __ASM volatile("mul.s32 %0, %1, %2\n\t"
446                    "dexti %3, %0, %R0, 4"
447                    :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y));
448     return tmp2;
449 }
450 
mult_32x32_dext_33(int32_t x,int32_t y)451 __STATIC_INLINE int32_t mult_32x32_dext_33(int32_t x, int32_t y)
452 {
453     int64_t tmp1;
454     int32_t tmp2;
455     __ASM volatile("mul.s32 %0, %1, %2\n\t"
456                    "asri %3, %R0, 1"
457                    :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y));
458     return tmp2;
459 }
460 
dext_31(int64_t x)461 __STATIC_INLINE int32_t dext_31(int64_t x)
462 {
463     int32_t tmp1;
464     __ASM volatile(
465                    "dexti %0, %1, %R1, 31"
466                    :"=r" (tmp1), "=r" (x) : "1" (x));
467     return tmp1;
468 }
469 
mult_l16xl16_keep32(int32_t x,int32_t y)470 __STATIC_INLINE int32_t mult_l16xl16_keep32(int32_t x, int32_t y)
471 {
472     int32_t a;
473     __ASM volatile("mulll.s16 %0, %1, %2\n\t"
474                    :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
475     return a;
476 }
477 
mult_h16xl16_keep32(int32_t x,int32_t y)478 __STATIC_INLINE int32_t mult_h16xl16_keep32(int32_t x, int32_t y)
479 {
480     int32_t a;
481     __ASM volatile("mulhl.s16 %0, %1, %2\n\t"
482                    :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
483     return a;
484 }
485 
mult_h16xh16_keep32(int32_t x,int32_t y)486 __STATIC_INLINE int32_t mult_h16xh16_keep32(int32_t x, int32_t y)
487 {
488     int32_t a;
489     __ASM volatile("mulhh.s16 %0, %1, %2\n\t"
490                    :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y));
491     return a;
492 }
493 
494 #endif
495 
496 /**
497  * @brief Error status returned by some functions in the library.
498  */
499 
500 typedef enum {
501     CSKY_MATH_SUCCESS = 0,                /**< No error */
502     CSKY_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
503     CSKY_MATH_LENGTH_ERROR = -2,          /**< Length of data buffer is incorrect */
504     CSKY_MATH_SIZE_MISMATCH = -3,         /**< Size of matrices is not compatible with the operation. */
505     CSKY_MATH_NANINF = -4,                /**< Not-a-number (NaN) or infinity is generated */
506     /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
507     CSKY_MATH_SINGULAR = -5,
508     CSKY_MATH_TEST_FAILURE = -6           /**< Test Failed  */
509 } csky_status;
510 
511   /**
512    * @brief 8-bit fractional data type in 1.7 format.
513    */
514 typedef int8_t q7_t;
515 
516   /**
517    * @brief 16-bit fractional data type in 1.15 format.
518    */
519 typedef int16_t q15_t;
520 
521   /**
522    * @brief 32-bit fractional data type in 1.31 format.
523    */
524 typedef int32_t q31_t;
525 
526   /**
527    * @brief 64-bit fractional data type in 1.63 format.
528    */
529 typedef int64_t q63_t;
530 
531   /**
532    * @brief 32-bit floating-point type definition.
533    */
534 typedef float float32_t;
535 
536   /**
537    * @brief 64-bit floating-point type definition.
538    */
539 typedef double float64_t;
540 
541    /**
542    * @brief 32-bit fractional complex data type in 1.31 format.
543    */
544 typedef struct {
545     q31_t re;
546     q31_t im;
547 } cq31_t;
548   /**
549    * @brief 16-bit fractional complex data type in 1.15 format.
550    */
551 typedef struct {
552     q15_t re;
553     q15_t im;
554 } cq15_t;
555    /**
556    * @brief definition to read/write two 16 bit values.
557    */
558 #define __SIMD32_TYPE int32_t
559 #define CSI_UNUSED __attribute__((unused))
560 
561 #define __SIMD32(addr)        (*(__SIMD32_TYPE **) & (addr))
562 #define __SIMD32_CONST(addr)  ((__SIMD32_TYPE *)(addr))
563 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE *)  (addr))
564 #define __SIMD64(addr)        (*(int64_t **) & (addr))
565 
566 #if defined (CSKY_MATH_NO_SIMD)
567   /**
568    * @brief definition to pack two 16 bit values.
569    */
570 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
571                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000))
572 #define __PKHTB(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0xFFFF0000) | \
573                                          (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF))
574 
575 #endif
576 
577    /**
578    * @brief definition to pack four 8 bit values.
579    */
580 #ifndef CSKY_MATH_BIG_ENDIAN
581 
582 #define __PACKq7(v0, v1, v2, v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
583                                    (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
584                                    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
585                                    (((int32_t)(v3) << 24) & (int32_t)0xFF000000))
586 #else
587 
588 #define __PACKq7(v0, v1, v2, v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
589                                    (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
590                                    (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
591                                    (((int32_t)(v0) << 24) & (int32_t)0xFF000000))
592 
593 #endif
594 
595   /**
596    * @brief Clips Q63 to Q31 values.
597    */
clip_q63_to_q31(q63_t x)598 static __INLINE q31_t clip_q63_to_q31(
599     q63_t x)
600 {
601     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
602         ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
603 }
604 
605   /**
606    * @brief Instance structure for the Q7 FIR filter.
607    */
608 typedef struct {
609     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
610     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
611     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
612 } csky_fir_instance_q7;
613 
614   /**
615    * @brief Instance structure for the Q15 FIR filter.
616    */
617 typedef struct {
618     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
619     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
620     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
621 } csky_fir_instance_q15;
622 
623   /**
624    * @brief Instance structure for the Q31 FIR filter.
625    */
626 typedef struct {
627     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
628     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
629     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
630 } csky_fir_instance_q31;
631 
632   /**
633    * @brief Instance structure for the floating-point FIR filter.
634    */
635 typedef struct {
636     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
637     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
638     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
639 } csky_fir_instance_f32;
640 
641 void csky_fir_q7(
642     const csky_fir_instance_q7 *S,
643     q7_t *pSrc,
644     q7_t *pDst,
645     uint32_t blockSize);
646 
647 void csky_fir_init_q7(
648     csky_fir_instance_q7 *S,
649     uint16_t numTaps,
650     q7_t *pCoeffs,
651     q7_t *pState,
652     uint32_t blockSize);
653 
654 void csky_fir_q15(
655     const csky_fir_instance_q15 *S,
656     q15_t *pSrc,
657     q15_t *pDst,
658     uint32_t blockSize);
659 
660 void csky_fir_fast_q15(
661     const csky_fir_instance_q15 *S,
662     q15_t *pSrc,
663     q15_t *pDst,
664     uint32_t blockSize);
665 
666 csky_status csky_fir_init_q15(
667     csky_fir_instance_q15 *S,
668     uint16_t numTaps,
669     q15_t *pCoeffs,
670     q15_t *pState,
671     uint32_t blockSize);
672 
673 void csky_fir_q31(
674     const csky_fir_instance_q31 *S,
675     q31_t *pSrc,
676     q31_t *pDst,
677     uint32_t blockSize);
678 
679 void csky_fir_fast_q31(
680     const csky_fir_instance_q31 *S,
681     q31_t *pSrc,
682     q31_t *pDst,
683     uint32_t blockSize);
684 
685 void csky_fir_init_q31(
686     csky_fir_instance_q31 *S,
687     uint16_t numTaps,
688     q31_t *pCoeffs,
689     q31_t *pState,
690     uint32_t blockSize);
691 
692 void csky_fir_f32(
693     const csky_fir_instance_f32 *S,
694     float32_t *pSrc,
695     float32_t *pDst,
696     uint32_t blockSize);
697 
698 void csky_fir_init_f32(
699     csky_fir_instance_f32 *S,
700     uint16_t numTaps,
701     float32_t *pCoeffs,
702     float32_t *pState,
703     uint32_t blockSize);
704 
705   /**
706    * @brief Instance structure for the Q15 Biquad cascade filter.
707    */
708 typedef struct {
709     int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
710     q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
711     q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
712     int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
713 } csky_biquad_casd_df1_inst_q15;
714 
715   /**
716    * @brief Instance structure for the Q31 Biquad cascade filter.
717    */
718 typedef struct {
719     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
720     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
721     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
722     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
723 } csky_biquad_casd_df1_inst_q31;
724 
725  /**
726    * @brief Instance structure for the Q31 Biquad cascade filter.
727    */
728 
729   /**
730    * @brief Instance structure for the floating-point Biquad cascade filter.
731    */
732 typedef struct {
733     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
734     float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
735     float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
736 } csky_biquad_casd_df1_inst_f32;
737 
738 void csky_biquad_cascade_df1_q15(
739     const csky_biquad_casd_df1_inst_q15 *S,
740     q15_t *pSrc,
741     q15_t *pDst,
742     uint32_t blockSize);
743 
744 void csky_biquad_cascade_df1_init_q15(
745     csky_biquad_casd_df1_inst_q15 *S,
746     uint8_t numStages,
747     q15_t *pCoeffs,
748     q15_t *pState,
749     int8_t postShift);
750 
751 void csky_biquad_cascade_df1_fast_q15(
752     const csky_biquad_casd_df1_inst_q15 *S,
753     q15_t *pSrc,
754     q15_t *pDst,
755     uint32_t blockSize);
756 
757 void csky_biquad_cascade_df1_q31(
758     const csky_biquad_casd_df1_inst_q31 *S,
759     q31_t *pSrc,
760     q31_t *pDst,
761     uint32_t blockSize);
762 
763 void csky_biquad_cascade_df1_fast_q31(
764     const csky_biquad_casd_df1_inst_q31 *S,
765     q31_t *pSrc,
766     q31_t *pDst,
767     uint32_t blockSize);
768 
769 void csky_biquad_cascade_df1_init_q31(
770     csky_biquad_casd_df1_inst_q31 *S,
771     uint8_t numStages,
772     q31_t *pCoeffs,
773     q31_t *pState,
774     int8_t postShift);
775 
776 void csky_biquad_cascade_df1_f32(
777     const csky_biquad_casd_df1_inst_f32 *S,
778     float32_t *pSrc,
779     float32_t *pDst,
780     uint32_t blockSize);
781 
782 void csky_biquad_cascade_df1_init_f32(
783     csky_biquad_casd_df1_inst_f32 *S,
784     uint8_t numStages,
785     float32_t *pCoeffs,
786     float32_t *pState);
787 
788   /**
789    * @brief Instance structure for the floating-point matrix structure.
790    */
791 typedef struct {
792     uint16_t numRows;     /**< number of rows of the matrix.     */
793     uint16_t numCols;     /**< number of columns of the matrix.  */
794     float32_t *pData;     /**< points to the data of the matrix. */
795 } csky_matrix_instance_f32;
796 
797   /**
798    * @brief Instance structure for the floating-point matrix structure.
799    */
800 typedef struct {
801     uint16_t numRows;     /**< number of rows of the matrix.     */
802     uint16_t numCols;     /**< number of columns of the matrix.  */
803     float64_t *pData;     /**< points to the data of the matrix. */
804 } csky_matrix_instance_f64;
805 
806   /**
807    * @brief Instance structure for the Q15 matrix structure.
808    */
809 typedef struct {
810     uint16_t numRows;     /**< number of rows of the matrix.     */
811     uint16_t numCols;     /**< number of columns of the matrix.  */
812     q15_t *pData;         /**< points to the data of the matrix. */
813 } csky_matrix_instance_q15;
814 
815   /**
816    * @brief Instance structure for the Q31 matrix structure.
817    */
818 typedef struct {
819     uint16_t numRows;     /**< number of rows of the matrix.     */
820     uint16_t numCols;     /**< number of columns of the matrix.  */
821     q31_t *pData;         /**< points to the data of the matrix. */
822 } csky_matrix_instance_q31;
823 
824 csky_status csky_mat_add_f32(
825     const csky_matrix_instance_f32 *pSrcA,
826     const csky_matrix_instance_f32 *pSrcB,
827     csky_matrix_instance_f32 *pDst);
828 
829 csky_status csky_mat_add_q15(
830     const csky_matrix_instance_q15 *pSrcA,
831     const csky_matrix_instance_q15 *pSrcB,
832     csky_matrix_instance_q15 *pDst);
833 
834 csky_status csky_mat_add_q31(
835     const csky_matrix_instance_q31 *pSrcA,
836     const csky_matrix_instance_q31 *pSrcB,
837     csky_matrix_instance_q31 *pDst);
838 
839 csky_status csky_mat_cmplx_mult_f32(
840     const csky_matrix_instance_f32 *pSrcA,
841     const csky_matrix_instance_f32 *pSrcB,
842     csky_matrix_instance_f32 *pDst);
843 
844 csky_status csky_mat_cmplx_mult_q15(
845     const csky_matrix_instance_q15 *pSrcA,
846     const csky_matrix_instance_q15 *pSrcB,
847     csky_matrix_instance_q15 *pDst,
848     q15_t *pScratch);
849 
850 csky_status csky_mat_cmplx_mult_q31(
851     const csky_matrix_instance_q31 *pSrcA,
852     const csky_matrix_instance_q31 *pSrcB,
853     csky_matrix_instance_q31 *pDst);
854 
855 csky_status csky_mat_trans_f32(
856     const csky_matrix_instance_f32 *pSrc,
857     csky_matrix_instance_f32 *pDst);
858 
859 csky_status csky_mat_trans_q15(
860     const csky_matrix_instance_q15 *pSrc,
861     csky_matrix_instance_q15 *pDst);
862 
863 csky_status csky_mat_trans_q31(
864     const csky_matrix_instance_q31 *pSrc,
865     csky_matrix_instance_q31 *pDst);
866 
867 csky_status csky_mat_mult_f32(
868     const csky_matrix_instance_f32 *pSrcA,
869     const csky_matrix_instance_f32 *pSrcB,
870     csky_matrix_instance_f32 *pDst);
871 
872 csky_status csky_mat_mult_q15(
873     const csky_matrix_instance_q15 *pSrcA,
874     const csky_matrix_instance_q15 *pSrcB,
875     csky_matrix_instance_q15 *pDst,
876     q15_t *pState);
877 
878 csky_status csky_mat_mult_fast_q15(
879     const csky_matrix_instance_q15 *pSrcA,
880     const csky_matrix_instance_q15 *pSrcB,
881     csky_matrix_instance_q15 *pDst,
882     q15_t *pState);
883 
884 csky_status csky_mat_mult_q31(
885     const csky_matrix_instance_q31 *pSrcA,
886     const csky_matrix_instance_q31 *pSrcB,
887     csky_matrix_instance_q31 *pDst);
888 
889 csky_status csky_mat_mult_fast_q31(
890     const csky_matrix_instance_q31 *pSrcA,
891     const csky_matrix_instance_q31 *pSrcB,
892     csky_matrix_instance_q31 *pDst);
893 
894 csky_status csky_mat_sub_f32(
895     const csky_matrix_instance_f32 *pSrcA,
896     const csky_matrix_instance_f32 *pSrcB,
897     csky_matrix_instance_f32 *pDst);
898 
899 csky_status csky_mat_sub_q15(
900     const csky_matrix_instance_q15 *pSrcA,
901     const csky_matrix_instance_q15 *pSrcB,
902     csky_matrix_instance_q15 *pDst);
903 
904 csky_status csky_mat_sub_q31(
905     const csky_matrix_instance_q31 *pSrcA,
906     const csky_matrix_instance_q31 *pSrcB,
907     csky_matrix_instance_q31 *pDst);
908 
909 csky_status csky_mat_scale_f32(
910     const csky_matrix_instance_f32 *pSrc,
911     float32_t scale,
912     csky_matrix_instance_f32 *pDst);
913 
914 csky_status csky_mat_scale_q15(
915     const csky_matrix_instance_q15 *pSrc,
916     q15_t scaleFract,
917     int32_t shift,
918     csky_matrix_instance_q15 *pDst);
919 
920 csky_status csky_mat_scale_q31(
921     const csky_matrix_instance_q31 *pSrc,
922     q31_t scaleFract,
923     int32_t shift,
924     csky_matrix_instance_q31 *pDst);
925 
926 void csky_mat_init_q31(
927     csky_matrix_instance_q31 *S,
928     uint16_t nRows,
929     uint16_t nColumns,
930     q31_t *pData);
931 
932 void csky_mat_init_q15(
933     csky_matrix_instance_q15 *S,
934     uint16_t nRows,
935     uint16_t nColumns,
936     q15_t *pData);
937 
938 void csky_mat_init_f32(
939     csky_matrix_instance_f32 *S,
940     uint16_t nRows,
941     uint16_t nColumns,
942     float32_t *pData);
943 
944   /**
945    * @brief Instance structure for the Q15 PID Control.
946    */
947 typedef struct {
948     q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
949     q15_t A1;
950     q15_t A2;
951     q15_t state[3];     /**< The state array of length 3. */
952     q15_t Kp;           /**< The proportional gain. */
953     q15_t Ki;           /**< The integral gain. */
954     q15_t Kd;           /**< The derivative gain. */
955 } csky_pid_instance_q15;
956 
957   /**
958    * @brief Instance structure for the Q31 PID Control.
959    */
960 typedef struct {
961     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
962     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
963     q31_t A2;            /**< The derived gain, A2 = Kd . */
964     q31_t state[3];      /**< The state array of length 3. */
965     q31_t Kp;            /**< The proportional gain. */
966     q31_t Ki;            /**< The integral gain. */
967     q31_t Kd;            /**< The derivative gain. */
968 } csky_pid_instance_q31;
969 
970   /**
971    * @brief Instance structure for the floating-point PID Control.
972    */
973 typedef struct {
974     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
975     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
976     float32_t A2;          /**< The derived gain, A2 = Kd . */
977     float32_t state[3];    /**< The state array of length 3. */
978     float32_t Kp;          /**< The proportional gain. */
979     float32_t Ki;          /**< The integral gain. */
980     float32_t Kd;          /**< The derivative gain. */
981 } csky_pid_instance_f32;
982 
983 void csky_pid_init_f32(
984     csky_pid_instance_f32 *S,
985     int32_t resetStateFlag);
986 
987 void csky_pid_reset_f32(
988     csky_pid_instance_f32 *S);
989 
990 void csky_pid_init_q31(
991     csky_pid_instance_q31 *S,
992     int32_t resetStateFlag);
993 
994 void csky_pid_reset_q31(
995     csky_pid_instance_q31 *S);
996 
997 void csky_pid_init_q15(
998     csky_pid_instance_q15 *S,
999     int32_t resetStateFlag);
1000 
1001 void csky_pid_reset_q15(
1002     csky_pid_instance_q15 *S);
1003 
1004   /**
1005    * @brief Instance structure for the floating-point Linear Interpolate function.
1006    */
1007 typedef struct {
1008     uint32_t nValues;           /**< nValues */
1009     float32_t x1;               /**< x1 */
1010     float32_t xSpacing;         /**< xSpacing */
1011     float32_t *pYData;          /**< pointer to the table of Y values */
1012 } csky_linear_interp_instance_f32;
1013 
1014   /**
1015    * @brief Instance structure for the floating-point bilinear interpolation function.
1016    */
1017   typedef struct {
1018     uint16_t numRows;   /**< number of rows in the data table. */
1019     uint16_t numCols;   /**< number of columns in the data table. */
1020     float32_t *pData;   /**< points to the data table. */
1021 } csky_bilinear_interp_instance_f32;
1022 
1023    /**
1024    * @brief Instance structure for the Q31 bilinear interpolation function.
1025    */
1026 typedef struct {
1027     uint16_t numRows;   /**< number of rows in the data table. */
1028     uint16_t numCols;   /**< number of columns in the data table. */
1029     q31_t *pData;       /**< points to the data table. */
1030 } csky_bilinear_interp_instance_q31;
1031 
1032    /**
1033    * @brief Instance structure for the Q15 bilinear interpolation function.
1034    */
1035 typedef struct {
1036     uint16_t numRows;   /**< number of rows in the data table. */
1037     uint16_t numCols;   /**< number of columns in the data table. */
1038     q15_t *pData;       /**< points to the data table. */
1039 } csky_bilinear_interp_instance_q15;
1040 
1041    /**
1042    * @brief Instance structure for the Q15 bilinear interpolation function.
1043    */
1044 typedef struct {
1045     uint16_t numRows;   /**< number of rows in the data table. */
1046     uint16_t numCols;   /**< number of columns in the data table. */
1047     q7_t *pData;        /**< points to the data table. */
1048 } csky_bilinear_interp_instance_q7;
1049 
1050 void csky_mult_q7(
1051     q7_t *pSrcA,
1052     q7_t *pSrcB,
1053     q7_t *pDst,
1054     uint32_t blockSize);
1055 
1056 void csky_mult_q15(
1057     q15_t *pSrcA,
1058     q15_t *pSrcB,
1059     q15_t *pDst,
1060     uint32_t blockSize);
1061 
1062 void csky_mult_rnd_q15(
1063     q15_t *pSrcA,
1064     q15_t *pSrcB,
1065     q15_t *pDst,
1066     uint32_t blockSize);
1067 
1068 void csky_mult_q31(
1069     q31_t *pSrcA,
1070     q31_t *pSrcB,
1071     q31_t *pDst,
1072     uint32_t blockSize);
1073 
1074 void csky_mult_f32(
1075     float32_t *pSrcA,
1076     float32_t *pSrcB,
1077     float32_t *pDst,
1078     uint32_t blockSize);
1079 
1080   /**
1081    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1082    */
1083 typedef struct {
1084     uint16_t fftLen;                 /**< length of the FFT. */
1085     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1086     /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1087     uint8_t bitReverseFlag;
1088     q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
1089     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1090     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1091     uint16_t twidCoefModifier;
1092     /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1093     uint16_t bitRevFactor;
1094 } csky_cfft_radix2_instance_q15;
1095 
1096   /**
1097    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1098    */
1099 typedef struct {
1100     uint16_t fftLen;                 /**< length of the FFT. */
1101     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1102     /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1103     uint8_t bitReverseFlag;
1104     q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
1105     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1106     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1107     uint16_t twidCoefModifier;
1108     /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1109     uint16_t bitRevFactor;
1110 } csky_cfft_radix4_instance_q15;
1111 
1112   /**
1113    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
1114    */
1115 typedef struct {
1116     uint16_t fftLen;                 /**< length of the FFT. */
1117     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1118     /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1119     uint8_t bitReverseFlag;
1120     q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
1121     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1122     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1123     uint16_t twidCoefModifier;
1124     /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1125     uint16_t bitRevFactor;
1126 } csky_cfft_radix2_instance_q31;
1127 
1128   /**
1129    * @brief Instance structure for the Q31 CFFT/CIFFT function.
1130    */
1131 typedef struct {
1132     uint16_t fftLen;                 /**< length of the FFT. */
1133     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1134     /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1135     uint8_t bitReverseFlag;
1136     q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
1137     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1138     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1139     uint16_t twidCoefModifier;
1140     /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1141     uint16_t bitRevFactor;
1142 } csky_cfft_radix4_instance_q31;
1143 
1144   /**
1145    * @brief Instance structure for the floating-point CFFT/CIFFT function.
1146    */
1147 typedef struct {
1148     uint16_t fftLen;                   /**< length of the FFT. */
1149     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1150     /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1151     uint8_t bitReverseFlag;
1152     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
1153     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
1154     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1155     uint16_t twidCoefModifier;
1156     /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1157     uint16_t bitRevFactor;
1158     float32_t onebyfftLen;             /**< value of 1/fftLen. */
1159 } csky_cfft_radix2_instance_f32;
1160 
1161   /**
1162    * @brief Instance structure for the floating-point CFFT/CIFFT function.
1163    */
1164 typedef struct {
1165     uint16_t fftLen;                   /**< length of the FFT. */
1166     uint8_t ifftFlag;                 /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1167     /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1168     uint8_t bitReverseFlag;
1169     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
1170     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
1171     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1172     uint16_t twidCoefModifier;
1173     /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1174     uint16_t bitRevFactor;
1175     float32_t onebyfftLen;             /**< value of 1/fftLen. */
1176 } csky_cfft_radix4_instance_f32;
1177 
1178   /**
1179    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
1180    */
1181 typedef struct {
1182     uint16_t fftLen;                   /**< length of the FFT. */
1183     const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
1184     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
1185     uint16_t bitRevLength;             /**< bit reversal table length. */
1186 } csky_cfft_instance_q15;
1187 
1188 void csky_cfft_q15(
1189     const csky_cfft_instance_q15 *S,
1190     q15_t *p1,
1191     uint8_t ifftFlag,
1192     uint8_t bitReverseFlag);
1193 
1194   /**
1195    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
1196    */
1197 typedef struct {
1198     uint16_t fftLen;                   /**< length of the FFT. */
1199     const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
1200     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
1201     uint16_t bitRevLength;             /**< bit reversal table length. */
1202 } csky_cfft_instance_q31;
1203 
1204 void csky_cfft_q31(
1205     const csky_cfft_instance_q31 *S,
1206     q31_t *p1,
1207     uint8_t ifftFlag,
1208     uint8_t bitReverseFlag);
1209 
1210   /**
1211    * @brief Instance structure for the floating-point CFFT/CIFFT function.
1212    */
1213 typedef struct {
1214     uint16_t fftLen;                   /**< length of the FFT. */
1215     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
1216     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
1217     uint16_t bitRevLength;             /**< bit reversal table length. */
1218 } csky_cfft_instance_f32;
1219 
1220 void csky_cfft_f32(
1221     const csky_cfft_instance_f32 *S,
1222     float32_t *p1,
1223     uint8_t ifftFlag,
1224     uint8_t bitReverseFlag);
1225 
1226     /**
1227      * @brief Instance structure for the Q15 RFFT/RIFFT function.
1228      */
1229 typedef struct {
1230     uint32_t fftLenReal;                      /**< length of the real FFT. */
1231     uint8_t ifftFlagR;           /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
1232     /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
1233     uint8_t bitReverseFlagR;
1234     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1235     uint32_t twidCoefRModifier;
1236     q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
1237     const csky_cfft_instance_q15 *pCfft;      /**< points to the complex FFT instance. */
1238 } csky_rfft_instance_q15;
1239 
1240 csky_status csky_rfft_init_q15(
1241     csky_rfft_instance_q15 *S,
1242     uint32_t fftLenReal,
1243     uint32_t ifftFlagR,
1244     uint32_t bitReverseFlag);
1245 
1246 void csky_rfft_q15(
1247     const csky_rfft_instance_q15 *S,
1248     q15_t *pSrc,
1249     q15_t *pDst);
1250 
1251   /**
1252    * @brief Instance structure for the Q31 RFFT/RIFFT function.
1253    */
1254 typedef struct {
1255     uint32_t fftLenReal;                        /**< length of the real FFT. */
1256     uint8_t ifftFlagR;            /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
1257     /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
1258     uint8_t bitReverseFlagR;
1259     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1260     uint32_t twidCoefRModifier;
1261     q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
1262     const csky_cfft_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
1263 } csky_rfft_instance_q31;
1264 
1265 csky_status csky_rfft_init_q31(
1266     csky_rfft_instance_q31 *S,
1267     uint32_t fftLenReal,
1268     uint32_t ifftFlagR,
1269     uint32_t bitReverseFlag);
1270 
1271 void csky_rfft_q31(
1272     const csky_rfft_instance_q31 *S,
1273     q31_t *pSrc,
1274     q31_t *pDst);
1275 
1276   /**
1277    * @brief Instance structure for the floating-point RFFT/RIFFT function.
1278   */
1279 typedef struct {
1280     uint32_t fftLenReal;                        /**< length of the real FFT. */
1281     uint16_t fftLenBy2;                         /**< length of the complex FFT. */
1282 /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
1283     uint8_t ifftFlagR;
1284     /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
1285     uint8_t bitReverseFlagR;
1286     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1287     uint32_t twidCoefRModifier;
1288     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
1289     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
1290     csky_cfft_radix4_instance_f32 *pCfft;       /**< points to the complex FFT instance. */
1291 } csky_rfft_instance_f32;
1292 
1293 csky_status csky_rfft_init_f32(
1294     csky_rfft_instance_f32 *S,
1295     csky_cfft_radix4_instance_f32 *S_CFFT,
1296     uint32_t fftLenReal,
1297     uint32_t ifftFlagR,
1298     uint32_t bitReverseFlag);
1299 
1300 void csky_rfft_f32(
1301     const csky_rfft_instance_f32 *S,
1302     float32_t *pSrc,
1303     float32_t *pDst);
1304 
1305   /**
1306    * @brief Instance structure for the floating-point RFFT/RIFFT function.
1307    */
1308 typedef struct {
1309     csky_cfft_instance_f32 Sint;     /**< Internal CFFT structure. */
1310     uint16_t fftLenRFFT;             /**< length of the real sequence */
1311     float32_t *pTwiddleRFFT;        /**< Twiddle factors real stage  */
1312 } csky_rfft_fast_instance_f32 ;
1313 
1314 csky_status csky_rfft_fast_init_f32 (
1315     csky_rfft_fast_instance_f32 *S,
1316     uint16_t fftLen);
1317 
1318 void csky_rfft_fast_f32(
1319     csky_rfft_fast_instance_f32 *S,
1320     float32_t *p, float32_t *pOut,
1321     uint8_t ifftFlag);
1322 
1323   /**
1324    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
1325    */
1326 typedef struct {
1327     uint16_t N;                           /**< length of the DCT4. */
1328     uint16_t Nby2;                        /**< half of the length of the DCT4. */
1329     float32_t normalize;                  /**< normalizing factor. */
1330     float32_t *pTwiddle;                  /**< points to the twiddle factor table. */
1331     float32_t *pCosFactor;                /**< points to the cosFactor table. */
1332     csky_rfft_fast_instance_f32 *pRfft;   /**< points to the real FFT fast instance. */
1333     csky_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
1334 } csky_dct4_instance_f32;
1335 
1336 csky_status csky_dct4_init_f32(
1337     csky_dct4_instance_f32 *S,
1338     csky_rfft_fast_instance_f32 *S_RFFT,
1339     csky_cfft_radix4_instance_f32 *S_CFFT,
1340     uint16_t N,
1341     uint16_t Nby2,
1342     float32_t normalize);
1343 
1344 void csky_dct4_f32(
1345     const csky_dct4_instance_f32 *S,
1346     float32_t *pState,
1347     float32_t *pInlineBuffer);
1348 
1349   /**
1350    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
1351    */
1352 typedef struct {
1353     uint16_t N;                           /**< length of the DCT4. */
1354     uint16_t Nby2;                        /**< half of the length of the DCT4. */
1355     q31_t normalize;                      /**< normalizing factor. */
1356     q31_t *pTwiddle;                      /**< points to the twiddle factor table. */
1357     q31_t *pCosFactor;                    /**< points to the cosFactor table. */
1358     csky_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
1359     csky_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
1360 } csky_dct4_instance_q31;
1361 
1362 csky_status csky_dct4_init_q31(
1363     csky_dct4_instance_q31 *S,
1364     csky_rfft_instance_q31 *S_RFFT,
1365     csky_cfft_radix4_instance_q31 *S_CFFT,
1366     uint16_t N,
1367     uint16_t Nby2,
1368     q31_t normalize);
1369 
1370 void csky_dct4_q31(
1371     const csky_dct4_instance_q31 *S,
1372     q31_t *pState,
1373     q31_t *pInlineBuffer);
1374 
1375   /**
1376    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
1377    */
1378 typedef struct {
1379     uint16_t N;                           /**< length of the DCT4. */
1380     uint16_t Nby2;                        /**< half of the length of the DCT4. */
1381     q15_t normalize;                      /**< normalizing factor. */
1382     q15_t *pTwiddle;                      /**< points to the twiddle factor table. */
1383     q15_t *pCosFactor;                    /**< points to the cosFactor table. */
1384     csky_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
1385     csky_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
1386 } csky_dct4_instance_q15;
1387 
1388 csky_status csky_dct4_init_q15(
1389     csky_dct4_instance_q15 *S,
1390     csky_rfft_instance_q15 *S_RFFT,
1391     csky_cfft_radix4_instance_q15 *S_CFFT,
1392     uint16_t N,
1393     uint16_t Nby2,
1394     q15_t normalize);
1395 
1396 void csky_dct4_q15(
1397     const csky_dct4_instance_q15 *S,
1398     q15_t *pState,
1399     q15_t *pInlineBuffer);
1400 
1401 void csky_add_f32(
1402     float32_t *pSrcA,
1403     float32_t *pSrcB,
1404     float32_t *pDst,
1405     uint32_t blockSize);
1406 
1407 void csky_add_q7(
1408     q7_t *pSrcA,
1409     q7_t *pSrcB,
1410     q7_t *pDst,
1411     uint32_t blockSize);
1412 
1413 void csky_add_q15(
1414     q15_t *pSrcA,
1415     q15_t *pSrcB,
1416     q15_t *pDst,
1417     uint32_t blockSize);
1418 
1419 void csky_add_q31(
1420     q31_t *pSrcA,
1421     q31_t *pSrcB,
1422     q31_t *pDst,
1423     uint32_t blockSize);
1424 
1425 void csky_sub_f32(
1426     float32_t *pSrcA,
1427     float32_t *pSrcB,
1428     float32_t *pDst,
1429     uint32_t blockSize);
1430 
1431 void csky_sub_q7(
1432     q7_t *pSrcA,
1433     q7_t *pSrcB,
1434     q7_t *pDst,
1435     uint32_t blockSize);
1436 
1437 void csky_sub_q15(
1438     q15_t *pSrcA,
1439     q15_t *pSrcB,
1440     q15_t *pDst,
1441     uint32_t blockSize);
1442 
1443 void csky_sub_q31(
1444     q31_t *pSrcA,
1445     q31_t *pSrcB,
1446     q31_t *pDst,
1447     uint32_t blockSize);
1448 
1449 void csky_scale_f32(
1450     float32_t *pSrc,
1451     float32_t scale,
1452     float32_t *pDst,
1453     uint32_t blockSize);
1454 
1455 void csky_scale_q7(
1456     q7_t *pSrc,
1457     q7_t scaleFract,
1458     int8_t shift,
1459     q7_t *pDst,
1460     uint32_t blockSize);
1461 
1462 void csky_scale_q15(
1463     q15_t *pSrc,
1464     q15_t scaleFract,
1465     int8_t shift,
1466     q15_t *pDst,
1467     uint32_t blockSize);
1468 
1469 void csky_scale_q31(
1470     q31_t *pSrc,
1471     q31_t scaleFract,
1472     int8_t shift,
1473     q31_t *pDst,
1474     uint32_t blockSize);
1475 
1476 void csky_abs_q7(
1477     q7_t *pSrc,
1478     q7_t *pDst,
1479     uint32_t blockSize);
1480 
1481 void csky_abs_f32(
1482     float32_t *pSrc,
1483     float32_t *pDst,
1484     uint32_t blockSize);
1485 
1486 void csky_abs_q15(
1487     q15_t *pSrc,
1488     q15_t *pDst,
1489     uint32_t blockSize);
1490 
1491 void csky_abs_q31(
1492     q31_t *pSrc,
1493     q31_t *pDst,
1494     uint32_t blockSize);
1495 
1496 void csky_abs_max_q15(
1497     q15_t *pSrc,
1498     q15_t *pDst,
1499     uint32_t blockSize);
1500 
1501 void csky_abs_max_q31(
1502     q31_t *pSrc,
1503     q31_t *pDst,
1504     uint32_t blockSize);
1505 
1506 void csky_dot_prod_f32(
1507     float32_t *pSrcA,
1508     float32_t *pSrcB,
1509     uint32_t blockSize,
1510     float32_t *result);
1511 
1512 void csky_dot_prod_q7(
1513     q7_t *pSrcA,
1514     q7_t *pSrcB,
1515     uint32_t blockSize,
1516     q31_t *result);
1517 
1518 void csky_dot_prod_q15(
1519     q15_t *pSrcA,
1520     q15_t *pSrcB,
1521     uint32_t blockSize,
1522     q63_t *result);
1523 
1524 void csky_dot_prod_q31(
1525     q31_t *pSrcA,
1526     q31_t *pSrcB,
1527     uint32_t blockSize,
1528     q63_t *result);
1529 
1530 void csky_shift_q7(
1531     q7_t *pSrc,
1532     int8_t shiftBits,
1533     q7_t *pDst,
1534     uint32_t blockSize);
1535 
1536 void csky_shift_q15(
1537     q15_t *pSrc,
1538     int8_t shiftBits,
1539     q15_t *pDst,
1540     uint32_t blockSize);
1541 
1542 void csky_shift_q31(
1543     q31_t *pSrc,
1544     int8_t shiftBits,
1545     q31_t *pDst,
1546     uint32_t blockSize);
1547 
1548 void csky_offset_f32(
1549     float32_t *pSrc,
1550     float32_t offset,
1551     float32_t *pDst,
1552     uint32_t blockSize);
1553 
1554 void csky_offset_q7(
1555     q7_t *pSrc,
1556     q7_t offset,
1557     q7_t *pDst,
1558     uint32_t blockSize);
1559 
1560 void csky_offset_q15(
1561     q15_t *pSrc,
1562     q15_t offset,
1563     q15_t *pDst,
1564     uint32_t blockSize);
1565 
1566 void csky_offset_q31(
1567     q31_t *pSrc,
1568     q31_t offset,
1569     q31_t *pDst,
1570     uint32_t blockSize);
1571 
1572 void csky_negate_f32(
1573     float32_t *pSrc,
1574     float32_t *pDst,
1575     uint32_t blockSize);
1576 
1577 void csky_negate_q7(
1578     q7_t *pSrc,
1579     q7_t *pDst,
1580     uint32_t blockSize);
1581 
1582 void csky_negate_q15(
1583     q15_t *pSrc,
1584     q15_t *pDst,
1585     uint32_t blockSize);
1586 
1587 void csky_negate_q31(
1588     q31_t *pSrc,
1589     q31_t *pDst,
1590     uint32_t blockSize);
1591 
1592 void csky_copy_f32(
1593     float32_t *pSrc,
1594     float32_t *pDst,
1595     uint32_t blockSize);
1596 
1597 void csky_copy_q7(
1598     q7_t *pSrc,
1599     q7_t *pDst,
1600     uint32_t blockSize);
1601 
1602 void csky_copy_q15(
1603     q15_t *pSrc,
1604     q15_t *pDst,
1605     uint32_t blockSize);
1606 
1607 void csky_copy_q31(
1608     q31_t *pSrc,
1609     q31_t *pDst,
1610     uint32_t blockSize);
1611 
1612 void csky_fill_f32(
1613     float32_t value,
1614     float32_t *pDst,
1615     uint32_t blockSize);
1616 
1617 void csky_fill_q7(
1618     q7_t value,
1619     q7_t *pDst,
1620     uint32_t blockSize);
1621 
1622 void csky_fill_q15(
1623     q15_t value,
1624     q15_t *pDst,
1625     uint32_t blockSize);
1626 
1627 void csky_fill_q31(
1628     q31_t value,
1629     q31_t *pDst,
1630     uint32_t blockSize);
1631 
1632 void csky_conv_f32(
1633     float32_t *pSrcA,
1634     uint32_t srcALen,
1635     float32_t *pSrcB,
1636     uint32_t srcBLen,
1637     float32_t *pDst);
1638 
1639 void csky_conv_opt_q15(
1640     q15_t *pSrcA,
1641     uint32_t srcALen,
1642     q15_t *pSrcB,
1643     uint32_t srcBLen,
1644     q15_t *pDst,
1645     q15_t *pScratch1,
1646     q15_t *pScratch2);
1647 
1648 void csky_conv_q15(
1649     q15_t *pSrcA,
1650     uint32_t srcALen,
1651     q15_t *pSrcB,
1652     uint32_t srcBLen,
1653     q15_t *pDst);
1654 
1655 void csky_conv_fast_q15(
1656     q15_t *pSrcA,
1657     uint32_t srcALen,
1658     q15_t *pSrcB,
1659     uint32_t srcBLen,
1660     q15_t *pDst);
1661 
1662 void csky_conv_fast_opt_q15(
1663     q15_t *pSrcA,
1664     uint32_t srcALen,
1665     q15_t *pSrcB,
1666     uint32_t srcBLen,
1667     q15_t *pDst,
1668     q15_t *pScratch1,
1669     q15_t *pScratch2);
1670 
1671 void csky_conv_q31(
1672     q31_t *pSrcA,
1673     uint32_t srcALen,
1674     q31_t *pSrcB,
1675     uint32_t srcBLen,
1676     q31_t *pDst);
1677 
1678 void csky_conv_fast_q31(
1679     q31_t *pSrcA,
1680     uint32_t srcALen,
1681     q31_t *pSrcB,
1682     uint32_t srcBLen,
1683     q31_t *pDst);
1684 
1685 void csky_conv_opt_q7(
1686     q7_t *pSrcA,
1687     uint32_t srcALen,
1688     q7_t *pSrcB,
1689     uint32_t srcBLen,
1690     q7_t *pDst,
1691     q15_t *pScratch1,
1692     q15_t *pScratch2);
1693 
1694 void csky_conv_q7(
1695     q7_t *pSrcA,
1696     uint32_t srcALen,
1697     q7_t *pSrcB,
1698     uint32_t srcBLen,
1699     q7_t *pDst);
1700 
1701 csky_status csky_conv_partial_f32(
1702     float32_t *pSrcA,
1703     uint32_t srcALen,
1704     float32_t *pSrcB,
1705     uint32_t srcBLen,
1706     float32_t *pDst,
1707     uint32_t firstIndex,
1708     uint32_t numPoints);
1709 
1710 csky_status csky_conv_partial_opt_q15(
1711     q15_t *pSrcA,
1712     uint32_t srcALen,
1713     q15_t *pSrcB,
1714     uint32_t srcBLen,
1715     q15_t *pDst,
1716     uint32_t firstIndex,
1717     uint32_t numPoints,
1718     q15_t *pScratch1,
1719     q15_t *pScratch2);
1720 
1721 csky_status csky_conv_partial_q15(
1722     q15_t *pSrcA,
1723     uint32_t srcALen,
1724     q15_t *pSrcB,
1725     uint32_t srcBLen,
1726     q15_t *pDst,
1727     uint32_t firstIndex,
1728     uint32_t numPoints);
1729 
1730 csky_status csky_conv_partial_fast_q15(
1731     q15_t *pSrcA,
1732     uint32_t srcALen,
1733     q15_t *pSrcB,
1734     uint32_t srcBLen,
1735     q15_t *pDst,
1736     uint32_t firstIndex,
1737     uint32_t numPoints);
1738 
1739 csky_status csky_conv_partial_fast_opt_q15(
1740     q15_t *pSrcA,
1741     uint32_t srcALen,
1742     q15_t *pSrcB,
1743     uint32_t srcBLen,
1744     q15_t *pDst,
1745     uint32_t firstIndex,
1746     uint32_t numPoints,
1747     q15_t *pScratch1,
1748     q15_t *pScratch2);
1749 
1750 csky_status csky_conv_partial_q31(
1751     q31_t *pSrcA,
1752     uint32_t srcALen,
1753     q31_t *pSrcB,
1754     uint32_t srcBLen,
1755     q31_t *pDst,
1756     uint32_t firstIndex,
1757     uint32_t numPoints);
1758 
1759 csky_status csky_conv_partial_fast_q31(
1760     q31_t *pSrcA,
1761     uint32_t srcALen,
1762     q31_t *pSrcB,
1763     uint32_t srcBLen,
1764     q31_t *pDst,
1765     uint32_t firstIndex,
1766     uint32_t numPoints);
1767 
1768 csky_status csky_conv_partial_opt_q7(
1769     q7_t *pSrcA,
1770     uint32_t srcALen,
1771     q7_t *pSrcB,
1772     uint32_t srcBLen,
1773     q7_t *pDst,
1774     uint32_t firstIndex,
1775     uint32_t numPoints,
1776     q15_t *pScratch1,
1777     q15_t *pScratch2);
1778 
1779 csky_status csky_conv_partial_q7(
1780     q7_t *pSrcA,
1781     uint32_t srcALen,
1782     q7_t *pSrcB,
1783     uint32_t srcBLen,
1784     q7_t *pDst,
1785     uint32_t firstIndex,
1786     uint32_t numPoints);
1787 
1788  /**
1789   * functions for the yunVoice functions.
1790   */
1791 q15_t csky_dsp_lib_vec_max_abs16(
1792     q15_t  *A,
1793     uint32_t N);
1794 
1795 q31_t csky_dsp_lib_vec_max_abs32(
1796     q31_t  *A,
1797     uint32_t N);
1798 
1799 void csky_dsp_lib_vec_abs16(
1800     q15_t  *A,
1801     uint32_t N,
1802     q15_t  *C);
1803 
1804 void csky_dsp_lib_vec_abs32(
1805     q31_t *A,
1806     uint32_t N,
1807     q31_t *C);
1808 
1809 void csky_dsp_lib_vec_add16(
1810     q15_t *A,
1811     q15_t *B,
1812     uint32_t N,
1813     q15_t *C);
1814 
1815 void csky_dsp_lib_vec_add32(
1816     q31_t *A,
1817     q31_t *B,
1818     uint32_t N,
1819     q31_t *C);
1820 
1821 void csky_dsp_lib_vec_cx_conj_q15(
1822     q15_t *A,
1823     uint32_t N,
1824     q15_t *B);
1825 
1826 void csky_dsp_lib_vec_cx_conj_q31(
1827     q31_t *A,
1828     uint32_t N,
1829     q31_t *C);
1830 
1831 q31_t csky_dsp_lib_vec_dot_q15(
1832     q15_t *A,
1833     q15_t *B,
1834     uint32_t N);
1835 
1836 q31_t csky_dsp_lib_vec_dot_q31(
1837     q31_t *A,
1838     q31_t *B,
1839     uint32_t N);
1840 
1841 void csky_dsp_lib_mat_cx_add16(
1842     cq15_t *A,
1843     cq15_t *B,
1844     uint32_t N,
1845     uint32_t M,
1846     cq15_t *C);
1847 
1848 void csky_dsp_lib_mat_cx_add32(
1849     cq31_t *A,
1850     cq31_t *B,
1851     uint32_t N,
1852     uint32_t M,
1853     cq31_t *C);
1854 
1855 void csky_dsp_lib_mat_cx_mul_q15(
1856     cq15_t *A,
1857     cq15_t *B,
1858     uint32_t N,
1859     uint32_t M,
1860     uint32_t L,
1861     cq15_t *C);
1862 
1863 void csky_dsp_lib_mat_cx_mul_q31(
1864     cq31_t *A,
1865     cq31_t *B,
1866     uint32_t N,
1867     uint32_t M,
1868     uint32_t L,
1869     cq31_t *C);
1870 
1871 void csky_dsp_lib_mat_cx_sub16(
1872     cq15_t *A,
1873     cq15_t *B,
1874     uint32_t N,
1875     uint32_t M,
1876     cq15_t *C);
1877 
1878 void csky_dsp_lib_mat_cx_sub32(
1879     cq31_t *A,
1880     cq31_t *B,
1881     uint32_t N,
1882     uint32_t M,
1883     cq31_t *C);
1884 
1885 void csky_dsp_lib_vec_mul_q15(
1886     q15_t *A,
1887     q15_t *B,
1888     uint32_t N,
1889     q15_t *C);
1890 
1891 void csky_dsp_lib_vec_mul_q31(
1892     q31_t *A,
1893     q31_t *B,
1894     uint32_t N,
1895     q31_t *C);
1896 
1897 q31_t csky_dsp_lib_pow_int32(
1898     q31_t arg_in_x,
1899     q15_t arg_exp_in_x,
1900     q31_t arg_in_y,
1901     q15_t arg_exp_in_y,
1902     q31_t *arg_exp_out);
1903 
1904 void csky_dsp_lib_vec_scale_q15(
1905     q15_t *A,
1906     q15_t scaleFract,
1907     int8_t shift,
1908     q15_t *B,
1909     uint32_t N);
1910 
1911 void csky_dsp_lib_vec_scale_q31(
1912     q31_t *A,
1913     q31_t scaleFract,
1914     int8_t shift,
1915     q31_t *B,
1916     uint32_t N);
1917 
1918 void csky_dsp_lib_vec_shf16(
1919     q15_t *A,
1920     int8_t shift_val,
1921     uint32_t N,
1922     q15_t *C);
1923 
1924 void csky_dsp_lib_vec_shf32(
1925     q31_t *A,
1926     q31_t shift_val,
1927     uint32_t N,
1928     q31_t *C);
1929 
1930 q15_t csky_dsp_lib_sqrt_int32(
1931     q31_t x,
1932     uint32_t rnd_flag);
1933 
1934 void csky_dsp_lib_vec_sub16(
1935     q15_t *A,
1936     q15_t *B,
1937     uint32_t N,
1938     q15_t *C);
1939 
1940 void csky_dsp_lib_vec_sub32(
1941     q31_t *A,
1942     q31_t *B,
1943     uint32_t N,
1944     q31_t *C);
1945 
1946 q63_t csky_dsp_lib_vec_sum16(
1947     q15_t *A,
1948     uint32_t N);
1949 
1950 q63_t csky_dsp_lib_vec_sum32(
1951     q31_t *A,
1952     uint32_t N);
1953 
1954 void csky_fft_lib_cx16_fft(
1955     q31_t log2_buf_len,
1956     q15_t *in_buf,
1957     q15_t *out_buf,
1958     const q15_t *twi_table,
1959     const uint16_t *bitrev_tbl,
1960     q15_t *temp_buf,
1961     q7_t  *ScaleShift,
1962     q31_t br);
1963 
1964 void csky_fft_lib_cx32_fft(
1965     q31_t log2_buf_len,
1966     q31_t *in_buf,
1967     q31_t *out_buf,
1968     const q31_t *twi_table,
1969     const uint16_t *bitrev_tbl,
1970     q31_t *temp_buf,
1971     q31_t br);
1972 
1973 void csky_fft_lib_cx16_ifft(
1974     q31_t log2_buf_len,
1975     q15_t *in_buf,
1976     q15_t *out_buf,
1977     const q15_t *twi_table,
1978     const uint16_t *bitrev_tbl,
1979     q15_t *temp_buf,
1980     q7_t  *ScaleShift,
1981     q31_t br);
1982 
1983 void csky_fft_lib_cx32_ifft(
1984     q31_t log2_buf_len,
1985     q31_t *in_buf,
1986     q31_t *out_buf,
1987     const q31_t *twi_table,
1988     const uint16_t *bitrev_tbl,
1989     q31_t *temp_buf,
1990     q31_t br);
1991 
1992 void csky_fft_lib_int16_fft(
1993     q31_t log2_buf_len,
1994     q15_t *in_buf,
1995     q15_t *out_buf,
1996     const q15_t *twi_table,
1997     const q15_t *last_stage_twi_table,
1998     const uint16_t *bitrev_tbl,
1999     q15_t *temp_buf,
2000     q7_t  *ScaleShift,
2001     q31_t br);
2002 
2003 void csky_fft_lib_int32_fft(
2004     q31_t log2_buf_len,
2005     q31_t *in_buf,
2006     q31_t *out_buf,
2007     const q31_t *twi_table,
2008     const q31_t *last_stage_twi_table,
2009     const uint16_t *bitrev_tbl,
2010     q31_t *temp_buf,
2011     q31_t br);
2012 
2013 void csky_fft_lib_int16_ifft(
2014     q31_t log2_buf_len,
2015     q15_t *in_buf,
2016     q15_t *out_buf,
2017     const q15_t *twi_table,
2018     const q15_t *last_stage_twi_table,
2019     const uint16_t *bitrev_tbl,
2020     q15_t *temp_buf,
2021     q7_t  *ScaleShift,
2022     q31_t br);
2023 
2024 void csky_fft_lib_int32_ifft(
2025     q31_t log2_buf_len,
2026     q31_t *in_buf,
2027     q31_t *out_buf,
2028     const q31_t *twi_table,
2029     const q31_t *last_stage_twi_table,
2030     const uint16_t *bitrev_tbl,
2031     q31_t *temp_buf,
2032     q31_t br);
2033 
2034 /**
2035  * @brief Instance structure for the Q15 FIR decimator.
2036  */
2037 typedef struct {
2038     uint8_t M;                  /**< decimation factor. */
2039     uint16_t numTaps;           /**< number of coefficients in the filter. */
2040     q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
2041     q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2042 } csky_fir_decimate_instance_q15;
2043 
2044 /**
2045  * @brief Instance structure for the Q31 FIR decimator.
2046  */
2047 typedef struct {
2048     uint8_t M;                  /**< decimation factor. */
2049     uint16_t numTaps;           /**< number of coefficients in the filter. */
2050     q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
2051     q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2052 } csky_fir_decimate_instance_q31;
2053 
2054 /**
2055  * @brief Instance structure for the floating-point FIR decimator.
2056  */
2057 typedef struct {
2058     uint8_t M;                  /**< decimation factor. */
2059     uint16_t numTaps;           /**< number of coefficients in the filter. */
2060     float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
2061     float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2062 } csky_fir_decimate_instance_f32;
2063 
2064 void csky_fir_decimate_f32(
2065     const csky_fir_decimate_instance_f32 *S,
2066     float32_t *pSrc,
2067     float32_t *pDst,
2068     uint32_t blockSize);
2069 
2070 csky_status csky_fir_decimate_init_f32(
2071     csky_fir_decimate_instance_f32 *S,
2072     uint16_t numTaps,
2073     uint8_t M,
2074     float32_t *pCoeffs,
2075     float32_t *pState,
2076     uint32_t blockSize);
2077 
2078 void csky_fir_decimate_q15(
2079     const csky_fir_decimate_instance_q15 *S,
2080     q15_t *pSrc,
2081     q15_t *pDst,
2082     uint32_t blockSize);
2083 
2084 void csky_fir_decimate_fast_q15(
2085     const csky_fir_decimate_instance_q15 *S,
2086     q15_t *pSrc,
2087     q15_t *pDst,
2088     uint32_t blockSize);
2089 
2090 csky_status csky_fir_decimate_init_q15(
2091     csky_fir_decimate_instance_q15 *S,
2092     uint16_t numTaps,
2093     uint8_t M,
2094     q15_t *pCoeffs,
2095     q15_t *pState,
2096     uint32_t blockSize);
2097 
2098 void csky_fir_decimate_q31(
2099     const csky_fir_decimate_instance_q31 *S,
2100     q31_t *pSrc,
2101     q31_t *pDst,
2102     uint32_t blockSize);
2103 
2104 void csky_fir_decimate_fast_q31(
2105     csky_fir_decimate_instance_q31 *S,
2106     q31_t *pSrc,
2107     q31_t *pDst,
2108     uint32_t blockSize);
2109 
2110 csky_status csky_fir_decimate_init_q31(
2111     csky_fir_decimate_instance_q31 *S,
2112     uint16_t numTaps,
2113     uint8_t M,
2114     q31_t *pCoeffs,
2115     q31_t *pState,
2116     uint32_t blockSize);
2117 
2118 /**
2119  * @brief Instance structure for the Q15 FIR interpolator.
2120  */
2121 typedef struct {
2122     uint8_t L;              /**< upsample factor. */
2123     uint16_t phaseLength;   /**< length of each polyphase filter component. */
2124     q15_t *pCoeffs;         /**< points to the coefficient array. The array is of length L*phaseLength. */
2125     q15_t *pState;          /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
2126 } csky_fir_interpolate_instance_q15;
2127 
2128 /**
2129  * @brief Instance structure for the Q31 FIR interpolator.
2130  */
2131 typedef struct {
2132     uint8_t L;              /**< upsample factor. */
2133     uint16_t phaseLength;   /**< length of each polyphase filter component. */
2134     q31_t *pCoeffs;         /**< points to the coefficient array. The array is of length L*phaseLength. */
2135     q31_t *pState;          /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
2136 } csky_fir_interpolate_instance_q31;
2137 
2138 /**
2139  * @brief Instance structure for the floating-point FIR interpolator.
2140  */
2141 typedef struct {
2142     uint8_t L;               /**< upsample factor. */
2143     uint16_t phaseLength;    /**< length of each polyphase filter component. */
2144     float32_t *pCoeffs;      /**< points to the coefficient array. The array is of length L*phaseLength. */
2145     float32_t *pState;       /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
2146 } csky_fir_interpolate_instance_f32;
2147 
2148 void csky_fir_interpolate_q15(
2149     const csky_fir_interpolate_instance_q15 *S,
2150     q15_t *pSrc,
2151     q15_t *pDst,
2152     uint32_t blockSize);
2153 
2154 csky_status csky_fir_interpolate_init_q15(
2155     csky_fir_interpolate_instance_q15 *S,
2156     uint8_t L,
2157     uint16_t numTaps,
2158     q15_t *pCoeffs,
2159     q15_t *pState,
2160     uint32_t blockSize);
2161 
2162 void csky_fir_interpolate_q31(
2163     const csky_fir_interpolate_instance_q31 *S,
2164     q31_t *pSrc,
2165     q31_t *pDst,
2166     uint32_t blockSize);
2167 
2168 csky_status csky_fir_interpolate_init_q31(
2169     csky_fir_interpolate_instance_q31 *S,
2170     uint8_t L,
2171     uint16_t numTaps,
2172     q31_t *pCoeffs,
2173     q31_t *pState,
2174     uint32_t blockSize);
2175 
2176 void csky_fir_interpolate_f32(
2177     const csky_fir_interpolate_instance_f32 *S,
2178     float32_t *pSrc,
2179     float32_t *pDst,
2180     uint32_t blockSize);
2181 
2182 csky_status csky_fir_interpolate_init_f32(
2183     csky_fir_interpolate_instance_f32 *S,
2184     uint8_t L,
2185     uint16_t numTaps,
2186     float32_t *pCoeffs,
2187     float32_t *pState,
2188     uint32_t blockSize);
2189 
2190 /**
2191  * @brief Instance structure for the high precision Q31 Biquad cascade filter.
2192  */
2193 typedef struct {
2194     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
2195     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
2196     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
2197     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
2198 } csky_biquad_cas_df1_32x64_ins_q31;
2199 
2200 void csky_biquad_cas_df1_32x64_q31(
2201     const csky_biquad_cas_df1_32x64_ins_q31 *S,
2202     q31_t *pSrc,
2203     q31_t *pDst,
2204     uint32_t blockSize);
2205 
2206 void csky_biquad_cas_df1_32x64_init_q31(
2207     csky_biquad_cas_df1_32x64_ins_q31 *S,
2208     uint8_t numStages,
2209     q31_t *pCoeffs,
2210     q63_t *pState,
2211     uint8_t postShift);
2212 
2213 /**
2214  * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
2215  */
2216 typedef struct {
2217     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
2218     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
2219     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
2220 } csky_biquad_cascade_df2T_instance_f32;
2221 
2222 /**
2223  * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
2224  */
2225 typedef struct {
2226     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
2227     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
2228     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
2229 } csky_biquad_cascade_stereo_df2T_instance_f32;
2230 
2231 /**
2232  * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
2233  */
2234 typedef struct {
2235     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
2236     float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
2237     float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
2238 } csky_biquad_cascade_df2T_instance_f64;
2239 
2240 void csky_biquad_cascade_df2T_f32(
2241     const csky_biquad_cascade_df2T_instance_f32 *S,
2242     float32_t *pSrc,
2243     float32_t *pDst,
2244     uint32_t blockSize);
2245 
2246 void csky_biquad_cascade_stereo_df2T_f32(
2247     const csky_biquad_cascade_stereo_df2T_instance_f32 *S,
2248     float32_t *pSrc,
2249     float32_t *pDst,
2250     uint32_t blockSize);
2251 
2252 void csky_biquad_cascade_df2T_f64(
2253     const csky_biquad_cascade_df2T_instance_f64 *S,
2254     float64_t *pSrc,
2255     float64_t *pDst,
2256     uint32_t blockSize);
2257 
2258 void csky_biquad_cascade_df2T_init_f32(
2259     csky_biquad_cascade_df2T_instance_f32 *S,
2260     uint8_t numStages,
2261     float32_t *pCoeffs,
2262     float32_t *pState);
2263 
2264 void csky_biquad_cascade_stereo_df2T_init_f32(
2265     csky_biquad_cascade_stereo_df2T_instance_f32 *S,
2266     uint8_t numStages,
2267     float32_t *pCoeffs,
2268     float32_t *pState);
2269 
2270 void csky_biquad_cascade_df2T_init_f64(
2271     csky_biquad_cascade_df2T_instance_f64 *S,
2272     uint8_t numStages,
2273     float64_t *pCoeffs,
2274     float64_t *pState);
2275 
2276 /**
2277  * @brief Instance structure for the Q15 FIR lattice filter.
2278  */
2279 typedef struct {
2280     uint16_t numStages;                  /**< number of filter stages. */
2281     q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
2282     q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
2283 } csky_fir_lattice_instance_q15;
2284 
2285 /**
2286  * @brief Instance structure for the Q31 FIR lattice filter.
2287  */
2288 typedef struct {
2289     uint16_t numStages;                  /**< number of filter stages. */
2290     q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
2291     q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
2292 } csky_fir_lattice_instance_q31;
2293 
2294 /**
2295  * @brief Instance structure for the floating-point FIR lattice filter.
2296  */
2297 typedef struct {
2298     uint16_t numStages;                  /**< number of filter stages. */
2299     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
2300     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
2301 } csky_fir_lattice_instance_f32;
2302 
2303 void csky_fir_lattice_init_q15(
2304     csky_fir_lattice_instance_q15 *S,
2305     uint16_t numStages,
2306     q15_t *pCoeffs,
2307     q15_t *pState);
2308 
2309 void csky_fir_lattice_q15(
2310     const csky_fir_lattice_instance_q15 *S,
2311     q15_t *pSrc,
2312     q15_t *pDst,
2313     uint32_t blockSize);
2314 
2315 void csky_fir_lattice_init_q31(
2316     csky_fir_lattice_instance_q31 *S,
2317     uint16_t numStages,
2318     q31_t *pCoeffs,
2319     q31_t *pState);
2320 
2321 void csky_fir_lattice_q31(
2322     const csky_fir_lattice_instance_q31 *S,
2323     q31_t *pSrc,
2324     q31_t *pDst,
2325     uint32_t blockSize);
2326 
2327 void csky_fir_lattice_init_f32(
2328     csky_fir_lattice_instance_f32 *S,
2329     uint16_t numStages,
2330     float32_t *pCoeffs,
2331     float32_t *pState);
2332 
2333 void csky_fir_lattice_f32(
2334     const csky_fir_lattice_instance_f32 *S,
2335     float32_t *pSrc,
2336     float32_t *pDst,
2337     uint32_t blockSize);
2338 
2339 /**
2340  * @brief Instance structure for the Q15 IIR lattice filter.
2341  */
2342 typedef struct {
2343     uint16_t numStages;        /**< number of stages in the filter. */
2344     q15_t *pState;             /**< points to the state variable array. The array is of length numStages+blockSize. */
2345     q15_t *pkCoeffs;           /**< points to the reflection coefficient array. The array is of length numStages. */
2346     q15_t *pvCoeffs;           /**< points to the ladder coefficient array. The array is of length numStages+1. */
2347 } csky_iir_lattice_instance_q15;
2348 
2349 /**
2350  * @brief Instance structure for the Q31 IIR lattice filter.
2351  */
2352 typedef struct {
2353     uint16_t numStages;         /**< number of stages in the filter. */
2354     q31_t *pState;              /**< points to the state variable array. The array is of length numStages+blockSize. */
2355     q31_t *pkCoeffs;            /**< points to the reflection coefficient array. The array is of length numStages. */
2356     q31_t *pvCoeffs;            /**< points to the ladder coefficient array. The array is of length numStages+1. */
2357 } csky_iir_lattice_instance_q31;
2358 
2359 /**
2360  * @brief Instance structure for the floating-point IIR lattice filter.
2361  */
2362 typedef struct {
2363     uint16_t numStages;         /**< number of stages in the filter. */
2364     float32_t *pState;          /**< points to the state variable array. The array is of length numStages+blockSize. */
2365     float32_t *pkCoeffs;        /**< points to the reflection coefficient array. The array is of length numStages. */
2366     float32_t *pvCoeffs;        /**< points to the ladder coefficient array. The array is of length numStages+1. */
2367 } csky_iir_lattice_instance_f32;
2368 
2369 void csky_iir_lattice_f32(
2370     const csky_iir_lattice_instance_f32 *S,
2371     float32_t *pSrc,
2372     float32_t *pDst,
2373     uint32_t blockSize);
2374 
2375 void csky_iir_lattice_init_f32(
2376     csky_iir_lattice_instance_f32 *S,
2377     uint16_t numStages,
2378     float32_t *pkCoeffs,
2379     float32_t *pvCoeffs,
2380     float32_t *pState,
2381     uint32_t blockSize);
2382 
2383 void csky_iir_lattice_q31(
2384     const csky_iir_lattice_instance_q31 *S,
2385     q31_t *pSrc,
2386     q31_t *pDst,
2387     uint32_t blockSize);
2388 
2389 void csky_iir_lattice_init_q31(
2390     csky_iir_lattice_instance_q31 *S,
2391     uint16_t numStages,
2392     q31_t *pkCoeffs,
2393     q31_t *pvCoeffs,
2394     q31_t *pState,
2395     uint32_t blockSize);
2396 
2397 void csky_iir_lattice_q15(
2398     const csky_iir_lattice_instance_q15 *S,
2399     q15_t *pSrc,
2400     q15_t *pDst,
2401     uint32_t blockSize);
2402 
2403 void csky_iir_lattice_init_q15(
2404     csky_iir_lattice_instance_q15 *S,
2405     uint16_t numStages,
2406     q15_t *pkCoeffs,
2407     q15_t *pvCoeffs,
2408     q15_t *pState,
2409     uint32_t blockSize);
2410 
2411 /**
2412  * @brief Instance structure for the floating-point LMS filter.
2413  */
2414 typedef struct {
2415     uint16_t numTaps;    /**< number of coefficients in the filter. */
2416     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2417     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
2418     float32_t mu;        /**< step size that controls filter coefficient updates. */
2419 } csky_lms_instance_f32;
2420 
2421 void csky_lms_f32(
2422     const csky_lms_instance_f32 *S,
2423     float32_t *pSrc,
2424     float32_t *pRef,
2425     float32_t *pOut,
2426     float32_t *pErr,
2427     uint32_t blockSize);
2428 
2429 void csky_lms_init_f32(
2430     csky_lms_instance_f32 *S,
2431     uint16_t numTaps,
2432     float32_t *pCoeffs,
2433     float32_t *pState,
2434     float32_t mu,
2435     uint32_t blockSize);
2436 
2437 /**
2438  * @brief Instance structure for the Q15 LMS filter.
2439  */
2440 typedef struct {
2441     uint16_t numTaps;    /**< number of coefficients in the filter. */
2442     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2443     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
2444     q15_t mu;            /**< step size that controls filter coefficient updates. */
2445     uint32_t postShift;  /**< bit shift applied to coefficients. */
2446 } csky_lms_instance_q15;
2447 
2448 void csky_lms_init_q15(
2449     csky_lms_instance_q15 *S,
2450     uint16_t numTaps,
2451     q15_t *pCoeffs,
2452     q15_t *pState,
2453     q15_t mu,
2454     uint32_t blockSize,
2455     uint32_t postShift);
2456 
2457 void csky_lms_q15(
2458     const csky_lms_instance_q15 *S,
2459     q15_t *pSrc,
2460     q15_t *pRef,
2461     q15_t *pOut,
2462     q15_t *pErr,
2463     uint32_t blockSize);
2464 
2465 /**
2466  * @brief Instance structure for the Q31 LMS filter.
2467  */
2468 typedef struct {
2469     uint16_t numTaps;    /**< number of coefficients in the filter. */
2470     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2471     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
2472     q31_t mu;            /**< step size that controls filter coefficient updates. */
2473     uint32_t postShift;  /**< bit shift applied to coefficients. */
2474 } csky_lms_instance_q31;
2475 
2476 void csky_lms_q31(
2477     const csky_lms_instance_q31 *S,
2478     q31_t *pSrc,
2479     q31_t *pRef,
2480     q31_t *pOut,
2481     q31_t *pErr,
2482     uint32_t blockSize);
2483 
2484 void csky_lms_init_q31(
2485     csky_lms_instance_q31 *S,
2486     uint16_t numTaps,
2487     q31_t *pCoeffs,
2488     q31_t *pState,
2489     q31_t mu,
2490     uint32_t blockSize,
2491     uint32_t postShift);
2492 
2493 /**
2494  * @brief Instance structure for the floating-point normalized LMS filter.
2495  */
2496 typedef struct {
2497     uint16_t numTaps;     /**< number of coefficients in the filter. */
2498     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2499     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
2500     float32_t mu;         /**< step size that control filter coefficient updates. */
2501     float32_t energy;     /**< saves previous frame energy. */
2502     float32_t x0;         /**< saves previous input sample. */
2503 } csky_lms_norm_instance_f32;
2504 
2505 void csky_lms_norm_f32(
2506     csky_lms_norm_instance_f32 *S,
2507     float32_t *pSrc,
2508     float32_t *pRef,
2509     float32_t *pOut,
2510     float32_t *pErr,
2511     uint32_t blockSize);
2512 
2513 void csky_lms_norm_init_f32(
2514     csky_lms_norm_instance_f32 *S,
2515     uint16_t numTaps,
2516     float32_t *pCoeffs,
2517     float32_t *pState,
2518     float32_t mu,
2519     uint32_t blockSize);
2520 
2521 /**
2522  * @brief Instance structure for the Q31 normalized LMS filter.
2523  */
2524 typedef struct {
2525     uint16_t numTaps;     /**< number of coefficients in the filter. */
2526     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2527     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
2528     q31_t mu;             /**< step size that controls filter coefficient updates. */
2529     uint8_t postShift;    /**< bit shift applied to coefficients. */
2530     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
2531     q31_t energy;         /**< saves previous frame energy. */
2532     q31_t x0;             /**< saves previous input sample. */
2533 } csky_lms_norm_instance_q31;
2534 
2535 void csky_lms_norm_q31(
2536     csky_lms_norm_instance_q31 *S,
2537     q31_t *pSrc,
2538     q31_t *pRef,
2539     q31_t *pOut,
2540     q31_t *pErr,
2541     uint32_t blockSize);
2542 
2543 void csky_lms_norm_init_q31(
2544     csky_lms_norm_instance_q31 *S,
2545     uint16_t numTaps,
2546     q31_t *pCoeffs,
2547     q31_t *pState,
2548     q31_t mu,
2549     uint32_t blockSize,
2550     uint8_t postShift);
2551 
2552 /**
2553  * @brief Instance structure for the Q15 normalized LMS filter.
2554  */
2555 typedef struct {
2556     uint16_t numTaps;     /**< Number of coefficients in the filter. */
2557     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
2558     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
2559     q15_t mu;             /**< step size that controls filter coefficient updates. */
2560     uint8_t postShift;    /**< bit shift applied to coefficients. */
2561     q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
2562     q15_t energy;         /**< saves previous frame energy. */
2563     q15_t x0;             /**< saves previous input sample. */
2564 } csky_lms_norm_instance_q15;
2565 
2566 void csky_lms_norm_q15(
2567     csky_lms_norm_instance_q15 *S,
2568     q15_t *pSrc,
2569     q15_t *pRef,
2570     q15_t *pOut,
2571     q15_t *pErr,
2572     uint32_t blockSize);
2573 
2574 void csky_lms_norm_init_q15(
2575     csky_lms_norm_instance_q15 *S,
2576     uint16_t numTaps,
2577     q15_t *pCoeffs,
2578     q15_t *pState,
2579     q15_t mu,
2580     uint32_t blockSize,
2581     uint8_t postShift);
2582 
2583 void csky_correlate_f32(
2584     float32_t *pSrcA,
2585     uint32_t srcALen,
2586     float32_t *pSrcB,
2587     uint32_t srcBLen,
2588     float32_t *pDst);
2589 
2590 void csky_correlate_opt_q15(
2591     q15_t *pSrcA,
2592     uint32_t srcALen,
2593     q15_t *pSrcB,
2594     uint32_t srcBLen,
2595     q15_t *pDst,
2596     q15_t *pScratch);
2597 
2598 void csky_correlate_q15(
2599     q15_t *pSrcA,
2600     uint32_t srcALen,
2601     q15_t *pSrcB,
2602     uint32_t srcBLen,
2603     q15_t *pDst);
2604 
2605 void csky_correlate_fast_q15(
2606     q15_t *pSrcA,
2607     uint32_t srcALen,
2608     q15_t *pSrcB,
2609     uint32_t srcBLen,
2610     q15_t *pDst);
2611 
2612 void csky_correlate_fast_opt_q15(
2613     q15_t *pSrcA,
2614     uint32_t srcALen,
2615     q15_t *pSrcB,
2616     uint32_t srcBLen,
2617     q15_t *pDst,
2618     q15_t *pScratch);
2619 
2620 void csky_correlate_q31(
2621     q31_t *pSrcA,
2622     uint32_t srcALen,
2623     q31_t *pSrcB,
2624     uint32_t srcBLen,
2625     q31_t *pDst);
2626 
2627 void csky_correlate_fast_q31(
2628     q31_t *pSrcA,
2629     uint32_t srcALen,
2630     q31_t *pSrcB,
2631     uint32_t srcBLen,
2632     q31_t *pDst);
2633 
2634 void csky_correlate_opt_q7(
2635     q7_t *pSrcA,
2636     uint32_t srcALen,
2637     q7_t *pSrcB,
2638     uint32_t srcBLen,
2639     q7_t *pDst,
2640     q15_t *pScratch1,
2641     q15_t *pScratch2);
2642 
2643 void csky_correlate_q7(
2644     q7_t *pSrcA,
2645     uint32_t srcALen,
2646     q7_t *pSrcB,
2647     uint32_t srcBLen,
2648     q7_t *pDst);
2649 
2650     /**
2651      * @brief Instance structure for the floating-point sparse FIR filter.
2652      */
2653 typedef struct {
2654     uint16_t numTaps;            /**< number of coefficients in the filter. */
2655     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
2656     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
2657     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
2658     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
2659     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
2660 } csky_fir_sparse_instance_f32;
2661 
2662   /**
2663    * @brief Instance structure for the Q31 sparse FIR filter.
2664    */
2665 typedef struct {
2666     uint16_t numTaps;             /**< number of coefficients in the filter. */
2667     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
2668     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
2669     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
2670     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
2671     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
2672 } csky_fir_sparse_instance_q31;
2673 
2674   /**
2675    * @brief Instance structure for the Q15 sparse FIR filter.
2676    */
2677 typedef struct {
2678     uint16_t numTaps;             /**< number of coefficients in the filter. */
2679     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
2680     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
2681     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
2682     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
2683     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
2684 } csky_fir_sparse_instance_q15;
2685 
2686   /**
2687    * @brief Instance structure for the Q7 sparse FIR filter.
2688    */
2689 typedef struct {
2690     uint16_t numTaps;             /**< number of coefficients in the filter. */
2691     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
2692     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
2693     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
2694     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
2695     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
2696 } csky_fir_sparse_instance_q7;
2697 
2698 void csky_fir_sparse_f32(
2699     csky_fir_sparse_instance_f32 *S,
2700     float32_t *pSrc,
2701     float32_t *pDst,
2702     float32_t *pScratchIn,
2703     uint32_t blockSize);
2704 
2705 void csky_fir_sparse_init_f32(
2706     csky_fir_sparse_instance_f32 *S,
2707     uint16_t numTaps,
2708     float32_t *pCoeffs,
2709     float32_t *pState,
2710     int32_t *pTapDelay,
2711     uint16_t maxDelay,
2712     uint32_t blockSize);
2713 
2714 void csky_fir_sparse_q31(
2715     csky_fir_sparse_instance_q31 *S,
2716     q31_t *pSrc,
2717     q31_t *pDst,
2718     q31_t *pScratchIn,
2719     uint32_t blockSize);
2720 
2721 void csky_fir_sparse_init_q31(
2722     csky_fir_sparse_instance_q31 *S,
2723     uint16_t numTaps,
2724     q31_t *pCoeffs,
2725     q31_t *pState,
2726     int32_t *pTapDelay,
2727     uint16_t maxDelay,
2728     uint32_t blockSize);
2729 
2730 void csky_fir_sparse_q15(
2731     csky_fir_sparse_instance_q15 *S,
2732     q15_t *pSrc,
2733     q15_t *pDst,
2734     q15_t *pScratchIn,
2735     q31_t *pScratchOut,
2736     uint32_t blockSize);
2737 
2738 void csky_fir_sparse_init_q15(
2739     csky_fir_sparse_instance_q15 *S,
2740     uint16_t numTaps,
2741     q15_t *pCoeffs,
2742     q15_t *pState,
2743     int32_t *pTapDelay,
2744     uint16_t maxDelay,
2745     uint32_t blockSize);
2746 
2747 void csky_fir_sparse_q7(
2748     csky_fir_sparse_instance_q7 *S,
2749     q7_t *pSrc,
2750     q7_t *pDst,
2751     q7_t *pScratchIn,
2752     q31_t *pScratchOut,
2753     uint32_t blockSize);
2754 
2755 void csky_fir_sparse_init_q7(
2756     csky_fir_sparse_instance_q7 *S,
2757     uint16_t numTaps,
2758     q7_t *pCoeffs,
2759     q7_t *pState,
2760     int32_t *pTapDelay,
2761     uint16_t maxDelay,
2762     uint32_t blockSize);
2763 
2764 void csky_sin_cos_f32(
2765     float32_t theta,
2766     float32_t *pSinVal,
2767     float32_t *pCosVal);
2768 
2769 void csky_sin_cos_q31(
2770     q31_t theta,
2771     q31_t *pSinVal,
2772     q31_t *pCosVal);
2773 
2774 void csky_cmplx_conj_f32(
2775     float32_t *pSrc,
2776     float32_t *pDst,
2777     uint32_t numSamples);
2778 
2779 void csky_cmplx_conj_q31(
2780     q31_t *pSrc,
2781     q31_t *pDst,
2782     uint32_t numSamples);
2783 
2784 void csky_cmplx_conj_q15(
2785     q15_t *pSrc,
2786     q15_t *pDst,
2787     uint32_t numSamples);
2788 
2789 void csky_cmplx_mag_squared_f32(
2790     float32_t *pSrc,
2791     float32_t *pDst,
2792     uint32_t numSamples);
2793 
2794 void csky_cmplx_mag_squared_q31(
2795     q31_t *pSrc,
2796     q31_t *pDst,
2797     uint32_t numSamples);
2798 
2799 void csky_cmplx_mag_squared_q15(
2800     q15_t *pSrc,
2801     q15_t *pDst,
2802     uint32_t numSamples);
2803 
2804 /**
2805   * @ingroup groupController
2806   */
2807 
2808 /**
2809  * @defgroup PID PID Motor Control
2810  *
2811  * A Proportional Integral Derivative (PID) controller is a generic feedback control
2812  * loop mechanism widely used in industrial control systems.
2813  * A PID controller is the most commonly used type of feedback controller.
2814  *
2815  * This set of functions implements (PID) controllers
2816  * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
2817  * of data and each call to the function returns a single processed value.
2818  * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
2819  * is the input sample value. The functions return the output value.
2820  *
2821  * \par Algorithm:
2822  * <pre>
2823  *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
2824  *    A0 = Kp + Ki + Kd
2825  *    A1 = (-Kp) - (2 * Kd)
2826  *    A2 = Kd  </pre>
2827  *
2828  * \par
2829  * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
2830  *
2831  * \par
2832  * \image html PID.gif "Proportional Integral Derivative Controller"
2833  *
2834  * \par
2835  * The PID controller calculates an "error" value as the difference between
2836  * the measured output and the reference input.
2837  * The controller attempts to minimize the error by adjusting the process control inputs.
2838  * The proportional value determines the reaction to the current error,
2839  * the integral value determines the reaction based on the sum of recent errors,
2840  * and the derivative value determines the reaction based on the rate at which the error has been changing.
2841  *
2842  * \par Instance Structure
2843  * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
2844  * A separate instance structure must be defined for each PID Controller.
2845  * There are separate instance structure declarations for each of the 3 supported data types.
2846  *
2847  * \par Reset Functions
2848  * There is also an associated reset function for each data type which clears the state array.
2849  *
2850  * \par Initialization Functions
2851  * There is also an associated initialization function for each data type.
2852  * The initialization function performs the following operations:
2853  * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
2854  * - Zeros out the values in the state buffer.
2855  *
2856  * \par
2857  * Instance structure cannot be placed into a const data section
2858  * and it is recommended to use the initialization function.
2859  *
2860  * \par Fixed-Point Behavior
2861  * Care must be taken when using the fixed-point versions of the PID Controller functions.
2862  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
2863  * Refer to the function specific documentation below for usage guidelines.
2864  */
2865 
2866 /**
2867  * @addtogroup PID
2868  * @{
2869  */
2870 
2871 /**
2872  * @brief  Process function for the floating-point PID Control.
2873  * @param[in,out] S   is an instance of the floating-point PID Control structure
2874  * @param[in]     in  input sample to process
2875  * @return out processed output sample.
2876  */
csky_pid_f32(csky_pid_instance_f32 * S,float32_t in)2877 __STATIC_INLINE float32_t csky_pid_f32(
2878     csky_pid_instance_f32 *S,
2879     float32_t in)
2880 {
2881     float32_t out;
2882 
2883     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
2884     out = (S->A0 * in) +
2885       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
2886 
2887     /* Update state */
2888     S->state[1] = S->state[0];
2889     S->state[0] = in;
2890     S->state[2] = out;
2891 
2892     /* return to application */
2893     return (out);
2894 }
2895 
2896 /**
2897  * @}
2898 */ // end of PID group
2899 
2900 /**
2901  * @addtogroup PID
2902  * @{
2903  */
2904 
2905 /**
2906  * @brief  Process function for the Q31 PID Control.
2907  * @param[in,out] S  points to an instance of the Q31 PID Control structure
2908  * @param[in]     in  input sample to process
2909  * @return out processed output sample.
2910  *
2911  * <b>Scaling and Overflow Behavior:</b>
2912  * \par
2913  * The function is implemented using an internal 64-bit accumulator.
2914  * The accumulator has a 2.62 format
2915  * and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
2916  * Thus, if the accumulator result overflows it wraps around rather than clip.
2917  * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
2918  * After all multiply-accumulates are performed,
2919  * the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
2920  */
csky_pid_q31(csky_pid_instance_q31 * S,q31_t in)2921 __STATIC_INLINE q31_t csky_pid_q31(
2922     csky_pid_instance_q31 *S,
2923     q31_t in)
2924 {
2925     q63_t acc;
2926     q31_t out;
2927 
2928 #ifdef CSKY_SIMD
2929     /* acc = A0 * x[n]  */
2930     acc = mult_32x32_keep64(S->A0, in);
2931 
2932     /* acc += A1 * x[n-1] */
2933     acc = multAcc_32x32_keep64(acc, S->A1, S->state[0]);
2934 
2935     /* acc += A2 * x[n-2]  */
2936     acc = multAcc_32x32_keep64(acc, S->A2, S->state[1]);
2937 
2938     /* convert output to 1.31 format to add y[n-1] */
2939     out = dext_31(acc);
2940 #else
2941     /* acc = A0 * x[n]  */
2942     acc = (q63_t) S->A0 * in;
2943 
2944     /* acc += A1 * x[n-1] */
2945     acc += (q63_t) S->A1 * S->state[0];
2946 
2947     /* acc += A2 * x[n-2]  */
2948     acc += (q63_t) S->A2 * S->state[1];
2949 
2950     /* convert output to 1.31 format to add y[n-1] */
2951     out = (q31_t) (acc >> 31u);
2952 #endif
2953 
2954     /* out += y[n-1] */
2955     out += S->state[2];
2956 
2957     /* Update state */
2958     S->state[1] = S->state[0];
2959     S->state[0] = in;
2960     S->state[2] = out;
2961 
2962     /* return to application */
2963     return (out);
2964 }
2965 
2966 /**
2967  * @}
2968  */  // end of PID group
2969 
2970 /**
2971  * @addtogroup PID
2972  * @{
2973  */
2974 /**
2975  * @brief  Process function for the Q15 PID Control.
2976  * @param[in,out] S   points to an instance of the Q15 PID Control structure
2977  * @param[in]     in  input sample to process
2978  * @return out processed output sample.
2979  *
2980  * <b>Scaling and Overflow Behavior:</b>
2981  * \par
2982  * The function is implemented using a 64-bit internal accumulator.
2983  * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
2984  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
2985  * There is no risk of internal overflow with this approach
2986  * and the full precision of intermediate multiplications is preserved.
2987  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
2988  * Lastly, the accumulator is saturated to yield a result in 1.15 format.
2989  */
csky_pid_q15(csky_pid_instance_q15 * S,q15_t in)2990 __STATIC_INLINE q15_t csky_pid_q15(
2991     csky_pid_instance_q15 *S,
2992     q15_t in)
2993 {
2994     q63_t acc;
2995     q15_t out;
2996 
2997     /* acc = A0 * x[n]  */
2998     acc = ((q31_t) S->A0) * in;
2999 
3000     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
3001     acc += (q31_t) S->A1 * S->state[0];
3002     acc += (q31_t) S->A2 * S->state[1];
3003 
3004     /* acc += y[n-1] */
3005     acc += (q31_t) S->state[2] << 15;
3006 
3007     /* saturate the output */
3008     out = (q15_t) (__SSAT_16((acc >> 15)));
3009 
3010     /* Update state */
3011     S->state[1] = S->state[0];
3012     S->state[0] = in;
3013     S->state[2] = out;
3014 
3015     /* return to application */
3016     return (out);
3017 }
3018 /**
3019  * @}
3020  */ // end of PID group
3021 
3022 csky_status csky_mat_inverse_f32(
3023     const csky_matrix_instance_f32 *src,
3024     csky_matrix_instance_f32 *dst);
3025 
3026 csky_status csky_mat_inverse_f64(
3027     const csky_matrix_instance_f64 *src,
3028     csky_matrix_instance_f64 *dst);
3029 
3030 /**
3031  * @ingroup groupController
3032  */
3033 
3034 /**
3035  * @defgroup clarke Vector Clarke Transform
3036  * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
3037  * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
3038  * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
3039  * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
3040  * \image html clarke.gif Stator current space vector and its components in (a,b).
3041  * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
3042  * can be calculated using only <code>Ia</code> and <code>Ib</code>.
3043  *
3044  * The function operates on a single sample of data and each call to the function returns the processed output.
3045  * The library provides separate functions for Q31 and floating-point data types.
3046  * \par Algorithm
3047  * \image html clarkeFormula.gif
3048  * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
3049  * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
3050  * \par Fixed-Point Behavior
3051  * Care must be taken when using the Q31 version of the Clarke transform.
3052  * In particular, the overflow and saturation behavior of the accumulator used must be considered.
3053  * Refer to the function specific documentation below for usage guidelines.
3054  */
3055 
3056 /**
3057  * @addtogroup clarke
3058  * @{
3059  */
3060 
3061 /**
3062  *
3063  * @brief  Floating-point Clarke transform
3064  * @param[in]  Ia       input three-phase coordinate a
3065  * @param[in]  Ib       input three-phase coordinate b
3066  * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
3067  * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
3068  */
csky_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)3069 __STATIC_INLINE void csky_clarke_f32(
3070     float32_t Ia,
3071     float32_t Ib,
3072     float32_t *pIalpha,
3073     float32_t *pIbeta)
3074 {
3075     /* Calculate pIalpha using the equation, pIalpha = Ia */
3076     *pIalpha = Ia;
3077 
3078     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
3079     *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
3080 }
3081 
3082 /**
3083  * @}
3084  */ // end of clarke group
3085 
3086 /**
3087  * @addtogroup clarke
3088  * @{
3089  */
3090 
3091 /**
3092  * @brief  Clarke transform for Q31 version
3093  * @param[in]  Ia       input three-phase coordinate a
3094  * @param[in]  Ib       input three-phase coordinate b
3095  * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
3096  * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
3097  *
3098  * <b>Scaling and Overflow Behavior:</b>
3099  * \par
3100  * The function is implemented using an internal 32-bit accumulator.
3101  * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
3102  * There is saturation on the addition, hence there is no risk of overflow.
3103  */
csky_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)3104 __STATIC_INLINE void csky_clarke_q31(
3105     q31_t Ia,
3106     q31_t Ib,
3107     q31_t *pIalpha,
3108     q31_t *pIbeta)
3109 {
3110     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
3111 
3112     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
3113     *pIalpha = Ia;
3114 
3115 #ifdef CSKY_SIMD
3116     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
3117     product1 = mult_32x32_dext_30(Ia, 0x24F34E8B);
3118 
3119     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
3120     product2 = mult_32x32_dext_30(Ib, 0x49E69D16);
3121 #else
3122     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
3123     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
3124 
3125     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
3126     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
3127 #endif
3128 
3129     /* pIbeta is calculated by adding the intermediate products */
3130     *pIbeta = __QADD(product1, product2);
3131 }
3132 
3133 /**
3134  * @}
3135  */ // end of clarke group
3136 
3137 void csky_q7_to_q31(
3138     q7_t *pSrc,
3139     q31_t *pDst,
3140     uint32_t blockSize);
3141 
3142 /**
3143  * @ingroup groupController
3144  */
3145 /**
3146  * @defgroup inv_clarke Vector Inverse Clarke Transform
3147  * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
3148  *
3149  * The function operates on a single sample of data and each call to the function returns the processed output.
3150  * The library provides separate functions for Q31 and floating-point data types.
3151  * \par Algorithm
3152  * \image html clarkeInvFormula.gif
3153  * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
3154  * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
3155  * \par Fixed-Point Behavior
3156  * Care must be taken when using the Q31 version of the Clarke transform.
3157  * In particular, the overflow and saturation behavior of the accumulator used must be considered.
3158  * Refer to the function specific documentation below for usage guidelines.
3159  */
3160 
3161 /**
3162  * @addtogroup inv_clarke
3163  * @{
3164  */
3165 
3166  /**
3167  * @brief  Floating-point Inverse Clarke transform
3168  * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
3169  * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
3170  * @param[out] pIa     points to output three-phase coordinate <code>a</code>
3171  * @param[out] pIb     points to output three-phase coordinate <code>b</code>
3172  */
csky_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)3173 __STATIC_INLINE void csky_inv_clarke_f32(
3174     float32_t Ialpha,
3175     float32_t Ibeta,
3176     float32_t *pIa,
3177     float32_t *pIb)
3178 {
3179     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
3180     *pIa = Ialpha;
3181 
3182     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
3183     *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta;
3184 }
3185 
3186 /**
3187  * @}
3188  */ // end of inv_clarke group
3189 
3190 /**
3191  * @addtogroup inv_clarke
3192  * @{
3193  */
3194 
3195 /**
3196  * @brief  Inverse Clarke transform for Q31 version
3197  * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
3198  * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
3199  * @param[out] pIa     points to output three-phase coordinate <code>a</code>
3200  * @param[out] pIb     points to output three-phase coordinate <code>b</code>
3201  *
3202  * <b>Scaling and Overflow Behavior:</b>
3203  * \par
3204  * The function is implemented using an internal 32-bit accumulator.
3205  * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
3206  * There is saturation on the subtraction, hence there is no risk of overflow.
3207  */
csky_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)3208 __STATIC_INLINE void csky_inv_clarke_q31(
3209     q31_t Ialpha,
3210     q31_t Ibeta,
3211     q31_t *pIa,
3212     q31_t *pIb)
3213 {
3214     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
3215 
3216     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
3217     *pIa = Ialpha;
3218 
3219 #ifdef CSKY_SIMD
3220     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
3221     product1 = mult_32x32_dext_31(Ialpha, 0x40000000);
3222 
3223     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
3224     product2 = mult_32x32_dext_31(Ibeta, 0x6ED9EBA1);
3225 #else
3226     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
3227     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
3228 
3229     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
3230     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
3231 #endif
3232 
3233     /* pIb is calculated by subtracting the products */
3234     *pIb = __QSUB(product2, product1);
3235 }
3236 
3237 /**
3238  * @}
3239  */ // end of inv_clarke group
3240 
3241 void csky_q7_to_q15(
3242     q7_t *pSrc,
3243     q15_t *pDst,
3244     uint32_t blockSize);
3245 
3246 /**
3247  * @ingroup groupController
3248  */
3249 /**
3250  * @defgroup park Vector Park Transform
3251  *
3252  * Forward Park transform converts the input two-coordinate vector to flux and torque components.
3253  * The Park transform can be used to realize the transformation of the <code>Ialpha</code>
3254  * and the <code>Ibeta</code> currents
3255  * from the stationary to the moving reference frame and control the spatial relationship between
3256  * the stator vector current and rotor flux vector.
3257  * If we consider the d axis aligned with the rotor flux, the diagram below shows the
3258  * current vector and the relationship from the two reference frames:
3259  * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
3260  *
3261  * The function operates on a single sample of data and each call to the function returns the processed output.
3262  * The library provides separate functions for Q31 and floating-point data types.
3263  * \par Algorithm
3264  * \image html parkFormula.gif
3265  * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
3266  * <code>pId</code> and <code>pIq</code> are rotor vector components
3267  * and <code>cosVal</code> and <code>sinVal</code> are the
3268  * cosine and sine values of theta (rotor flux position).
3269  * \par Fixed-Point Behavior
3270  * Care must be taken when using the Q31 version of the Park transform.
3271  * In particular, the overflow and saturation behavior of the accumulator used must be considered.
3272  * Refer to the function specific documentation below for usage guidelines.
3273  */
3274 /**
3275  * @addtogroup park
3276  * @{
3277  */
3278 /**
3279  * @brief Floating-point Park transform
3280  * @param[in]  Ialpha  input two-phase vector coordinate alpha
3281  * @param[in]  Ibeta   input two-phase vector coordinate beta
3282  * @param[out] pId     points to output   rotor reference frame d
3283  * @param[out] pIq     points to output   rotor reference frame q
3284  * @param[in]  sinVal  sine value of rotation angle theta
3285  * @param[in]  cosVal  cosine value of rotation angle theta
3286  *
3287  * The function implements the forward Park transform.
3288  *
3289  */
csky_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)3290 __STATIC_INLINE void csky_park_f32(
3291     float32_t Ialpha,
3292     float32_t Ibeta,
3293     float32_t *pId,
3294     float32_t *pIq,
3295     float32_t sinVal,
3296     float32_t cosVal)
3297 {
3298     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
3299     *pId = Ialpha * cosVal + Ibeta * sinVal;
3300     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
3301     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
3302 }
3303 /**
3304  * @}
3305  */ // end of park group
3306 
3307 /**
3308  * @addtogroup park
3309  * @{
3310  */
3311 /**
3312  * @brief  Park transform for Q31 version
3313  * @param[in]  Ialpha  input two-phase vector coordinate alpha
3314  * @param[in]  Ibeta   input two-phase vector coordinate beta
3315  * @param[out] pId     points to output rotor reference frame d
3316  * @param[out] pIq     points to output rotor reference frame q
3317  * @param[in]  sinVal  sine value of rotation angle theta
3318  * @param[in]  cosVal  cosine value of rotation angle theta
3319  *
3320  * <b>Scaling and Overflow Behavior:</b>
3321  * \par
3322  * The function is implemented using an internal 32-bit accumulator.
3323  * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
3324  * There is saturation on the addition and subtraction, hence there is no risk of overflow.
3325  */
csky_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)3326 __STATIC_INLINE void csky_park_q31(
3327     q31_t Ialpha,
3328     q31_t Ibeta,
3329     q31_t *pId,
3330     q31_t *pIq,
3331     q31_t sinVal,
3332     q31_t cosVal)
3333 {
3334 #ifdef CSKY_SIMD
3335     __ASM volatile(
3336                   "rmul.s32.h t0, %0, %3\n\t"
3337                   "rmul.s32.h t1, %1, %2\n\t"
3338                   "add.s32.s  t0, t0, t1\n\t"
3339                   "st.w       t0, (%4, 0x0)\n\t"
3340                   "rmul.s32.h t0, %0, %2\n\t"
3341                   "rmul.s32.h t1, %1, %3\n\t"
3342                   "sub.s32.s  t1, t1, t0\n\t"
3343                   "st.w       t1, (%5, 0x0)\n\t"
3344                   ::"r"(Ialpha), "r"(Ibeta), "r"(sinVal), "r"(cosVal), "r"(pId), "r"(pIq)
3345                   :"t0", "t1", "memory");
3346 #else
3347     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
3348     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
3349     /* Intermediate product is calculated by (Ialpha * cosVal) */
3350     product1 = clip_q63_to_q31 (((q63_t) (Ialpha) * (cosVal)) >> 31);
3351     /* Intermediate product is calculated by (Ibeta * sinVal) */
3352     product2 = clip_q63_to_q31 (((q63_t) (Ibeta) * (sinVal)) >> 31);
3353     /* Intermediate product is calculated by (Ialpha * sinVal) */
3354     product3 = clip_q63_to_q31 (((q63_t) (Ialpha) * (sinVal)) >> 31);
3355     /* Intermediate product is calculated by (Ibeta * cosVal) */
3356     product4 = clip_q63_to_q31 (((q63_t) (Ibeta) * (cosVal)) >> 31);
3357     /* Calculate pId by adding the two intermediate products 1 and 2 */
3358     *pId = __QADD(product1, product2);
3359     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
3360     *pIq = __QSUB(product4, product3);
3361 #endif
3362 }
3363 /**
3364  * @}
3365  */ // end of park group
3366 
3367 void csky_q7_to_float(
3368     q7_t *pSrc,
3369     float32_t *pDst,
3370     uint32_t blockSize);
3371 
3372 /**
3373  * @ingroup groupController
3374  */
3375 /**
3376  * @defgroup inv_park Vector Inverse Park transform
3377  * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
3378  *
3379  * The function operates on a single sample of data and each call to the function returns the processed output.
3380  * The library provides separate functions for Q31 and floating-point data types.
3381  * \par Algorithm
3382  * \image html parkInvFormula.gif
3383  * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
3384  * <code>Id</code> and <code>Iq</code> are rotor vector components
3385  * and <code>cosVal</code> and <code>sinVal</code> are the
3386  * cosine and sine values of theta (rotor flux position).
3387  * \par Fixed-Point Behavior
3388  * Care must be taken when using the Q31 version of the Park transform.
3389  * In particular, the overflow and saturation behavior of the accumulator used must be considered.
3390  * Refer to the function specific documentation below for usage guidelines.
3391  */
3392 /**
3393  * @addtogroup inv_park
3394  * @{
3395  */
3396  /**
3397  * @brief  Floating-point Inverse Park transform
3398  * @param[in]  Id       input coordinate of rotor reference frame d
3399  * @param[in]  Iq       input coordinate of rotor reference frame q
3400  * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
3401  * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
3402  * @param[in]  sinVal   sine value of rotation angle theta
3403  * @param[in]  cosVal   cosine value of rotation angle theta
3404  */
csky_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)3405 __STATIC_INLINE void csky_inv_park_f32(
3406     float32_t Id,
3407     float32_t Iq,
3408     float32_t *pIalpha,
3409     float32_t *pIbeta,
3410     float32_t sinVal,
3411     float32_t cosVal)
3412 {
3413   /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
3414   *pIalpha = Id * cosVal - Iq * sinVal;
3415   /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
3416   *pIbeta = Id * sinVal + Iq * cosVal;
3417 }
3418 /**
3419  * @}
3420  */ // end of inv_park group
3421 
3422 /**
3423  * @addtogroup inv_park
3424  * @{
3425  */
3426 /**
3427  * @brief  Inverse Park transform for   Q31 version
3428  * @param[in]  Id       input coordinate of rotor reference frame d
3429  * @param[in]  Iq       input coordinate of rotor reference frame q
3430  * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
3431  * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
3432  * @param[in]  sinVal   sine value of rotation angle theta
3433  * @param[in]  cosVal   cosine value of rotation angle theta
3434  *
3435  * <b>Scaling and Overflow Behavior:</b>
3436  * \par
3437  * The function is implemented using an internal 32-bit accumulator.
3438  * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
3439  * There is saturation on the addition, hence there is no risk of overflow.
3440  */
csky_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)3441 __STATIC_INLINE void csky_inv_park_q31(
3442     q31_t Id,
3443     q31_t Iq,
3444     q31_t *pIalpha,
3445     q31_t *pIbeta,
3446     q31_t sinVal,
3447     q31_t cosVal)
3448 {
3449 #ifdef CSKY_SIMD
3450     __ASM volatile(
3451                   "rmul.s32.h t0, %0, %3\n\t"
3452                   "rmul.s32.h t1, %1, %2\n\t"
3453                   "sub.s32.s  t0, t0, t1\n\t"
3454                   "st.w       t0, (%4, 0x0)\n\t"
3455                   "rmul.s32.h t0, %0, %2\n\t"
3456                   "rmul.s32.h t1, %1, %3\n\t"
3457                   "add.s32.s  t0, t0, t1\n\t"
3458                   "st.w       t0, (%5, 0x0)\n\t"
3459                   ::"r"(Id), "r"(Iq), "r"(sinVal), "r"(cosVal), "r"(pIalpha), "r"(pIbeta)
3460                   :"t0", "t1", "memory");
3461 
3462 #else
3463     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
3464     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
3465     /* Intermediate product is calculated by (Id * cosVal) */
3466     product1 = clip_q63_to_q31 (((q63_t) (Id) * (cosVal)) >> 31);
3467     /* Intermediate product is calculated by (Iq * sinVal) */
3468     product2 = clip_q63_to_q31 (((q63_t) (Iq) * (sinVal)) >> 31);
3469     /* Intermediate product is calculated by (Id * sinVal) */
3470     product3 = clip_q63_to_q31 (((q63_t) (Id) * (sinVal)) >> 31);
3471     /* Intermediate product is calculated by (Iq * cosVal) */
3472     product4 = clip_q63_to_q31 (((q63_t) (Iq) * (cosVal)) >> 31);
3473     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
3474     *pIalpha = __QSUB(product1, product2);
3475     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
3476     *pIbeta = __QADD(product4, product3);
3477 #endif
3478 }
3479 
3480 /**
3481  * @}
3482  */ // end of inv_park group
3483 
3484 void csky_q31_to_float(
3485     q31_t *pSrc,
3486     float32_t *pDst,
3487     uint32_t blockSize);
3488 
3489 /**
3490  * @ingroup groupInterpolation
3491  */
3492 /**
3493  * @defgroup LinearInterpolate Linear Interpolation
3494  *
3495  * Linear interpolation is a method of curve fitting using linear polynomials.
3496  * Linear interpolation works by effectively drawing a straight line between two neighboring samples
3497  * and returning the appropriate point along that line
3498  *
3499  * \par
3500  * \image html LinearInterp.gif "Linear interpolation"
3501  *
3502  * \par
3503  * A  Linear Interpolate function calculates an output value(y), for the input(x)
3504  * using linear interpolation of the input values x0, x1( nearest input values)
3505  * and the output values y0 and y1(nearest output values)
3506  *
3507  * \par Algorithm:
3508  * <pre>
3509  *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
3510  *       where x0, x1 are nearest values of input x
3511  *             y0, y1 are nearest values to output y
3512  * </pre>
3513  *
3514  * \par
3515  * This set of functions implements Linear interpolation process
3516  * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
3517  * sample of data and each call to the function returns a single processed value.
3518  * <code>S</code> points to an instance of the Linear Interpolate function data structure.
3519  * <code>x</code> is the input sample value. The functions returns the output value.
3520  *
3521  * \par
3522  * if x is outside of the table boundary, Linear interpolation returns first value of the table
3523  * if x is below input range and returns last value of table if x is above range.
3524  */
3525 /**
3526  * @addtogroup LinearInterpolate
3527  * @{
3528  */
3529 /**
3530  * @brief  Process function for the floating-point Linear Interpolation Function.
3531  * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
3532  * @param[in]     x  input sample to process
3533  * @return y processed output sample.
3534  *
3535  */
csky_linear_interp_f32(csky_linear_interp_instance_f32 * S,float32_t x)3536 __STATIC_INLINE float32_t csky_linear_interp_f32(
3537     csky_linear_interp_instance_f32 * S,
3538     float32_t x)
3539 {
3540     float32_t y;
3541     float32_t x0, x1;                            /* Nearest input values */
3542     float32_t y0, y1;                            /* Nearest output values */
3543     float32_t xSpacing = S->xSpacing;            /* spacing between input values */
3544     int32_t i;                                   /* Index variable */
3545     float32_t *pYData = S->pYData;               /* pointer to output table */
3546     /* Calculation of index */
3547     i = (int32_t) ((x - S->x1) / xSpacing);
3548     if (i < 0) {
3549         /* Iniatilize output for below specified range as least output value of table */
3550         y = pYData[0];
3551     } else if ((uint32_t)i >= S->nValues) {
3552         /* Iniatilize output for above specified range as last output value of table */
3553         y = pYData[S->nValues - 1];
3554     } else {
3555         /* Calculation of nearest input values */
3556         x0 = S->x1 +  i      * xSpacing;
3557         x1 = S->x1 + (i + 1) * xSpacing;
3558         /* Read of nearest output values */
3559         y0 = pYData[i];
3560         y1 = pYData[i + 1];
3561         /* Calculation of output */
3562         y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
3563     }
3564     /* returns output value */
3565     return (y);
3566 }
3567 /**
3568  * @}
3569  */ // end of LinearInterpolate group
3570 
3571 /**
3572  * @addtogroup LinearInterpolate
3573  * @{
3574  */
3575 
3576 /**
3577  * @brief  Process function for the Q31 Linear Interpolation Function.
3578  * @param[in] pYData   pointer to Q31 Linear Interpolation table
3579  * @param[in] x        input sample to process
3580  * @param[in] nValues  number of table values
3581  * @return y processed output sample.
3582  *
3583  * \par
3584  * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index
3585  * and 20 bits for fractional part.
3586  * This function can support maximum of table size 2^12.
3587  *
3588  */
csky_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)3589 __STATIC_INLINE q31_t csky_linear_interp_q31(
3590     q31_t *pYData,
3591     q31_t x,
3592     uint32_t nValues)
3593 {
3594     q31_t y;                                     /* output */
3595     q31_t y0, y1;                                /* Nearest output values */
3596     q31_t fract;                                 /* fractional part */
3597     int32_t index;                               /* Index to read nearest output values */
3598     /* Input is in 12.20 format */
3599     /* 12 bits for the table index */
3600     /* Index value calculation */
3601     index = ((x & (q31_t)0xFFF00000) >> 20);
3602     if (index >= (int32_t)(nValues - 1)) {
3603         return (pYData[nValues - 1]);
3604     } else if (index < 0) {
3605         return (pYData[0]);
3606     } else {
3607         /* 20 bits for the fractional part */
3608         /* shift left by 11 to keep fract in 1.31 format */
3609         fract = (x & 0x000FFFFF) << 11;
3610         /* Read two nearest output values from the index in 1.31(q31) format */
3611         y0 = pYData[index];
3612         y1 = pYData[index + 1];
3613 #ifdef CSKY_SIMD
3614         /* Calculation of y0 * (1-fract) and y is in 2.30 format */
3615         y = mult_32x32_keep32(y0, (0x7FFFFFFF - fract));
3616         /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
3617         y = multAcc_32x32_keep32(y, y1, fract);
3618 #else
3619         /* Calculation of y0 * (1-fract) and y is in 2.30 format */
3620         y = ((q31_t) (((q63_t) y0 * (0x7FFFFFFF - fract)) >> 32));
3621         /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
3622         y += ((q31_t) (((q63_t) y1 * fract) >> 32));
3623 #endif
3624         /* Convert y to 1.31 format */
3625         return (y << 1u);
3626     }
3627 }
3628 /**
3629  * @}
3630  */ // end of LinearInterpolate group
3631 
3632 /**
3633  * @addtogroup LinearInterpolate
3634  * @{
3635  */
3636 /**
3637  *
3638  * @brief  Process function for the Q15 Linear Interpolation Function.
3639  * @param[in] pYData   pointer to Q15 Linear Interpolation table
3640  * @param[in] x        input sample to process
3641  * @param[in] nValues  number of table values
3642  * @return y processed output sample.
3643  *
3644  * \par
3645  * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index
3646  * and 20 bits for fractional part.
3647  * This function can support maximum of table size 2^12.
3648  *
3649  */
csky_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)3650 __STATIC_INLINE q15_t csky_linear_interp_q15(
3651     q15_t *pYData,
3652     q31_t x,
3653     uint32_t nValues)
3654 {
3655     q63_t y;                                     /* output */
3656     q15_t y0, y1;                                /* Nearest output values */
3657     q31_t fract;                                 /* fractional part */
3658     int32_t index;                               /* Index to read nearest output values */
3659     /* Input is in 12.20 format */
3660     /* 12 bits for the table index */
3661     /* Index value calculation */
3662     index = ((x & (int32_t)0xFFF00000) >> 20);
3663     if (index >= (int32_t)(nValues - 1)) {
3664         return (pYData[nValues - 1]);
3665     } else if (index < 0) {
3666         return (pYData[0]);
3667     } else {
3668         /* 20 bits for the fractional part */
3669         /* fract is in 12.20 format */
3670         fract = (x & 0x000FFFFF);
3671         /* Read two nearest output values from the index */
3672         y0 = pYData[index];
3673         y1 = pYData[index + 1];
3674 #ifdef CSKY_SIMD
3675         /* Calculation of y0 * (1-fract) and y is in 13.35 format */
3676         y = mult_32x32_keep64(y0, (0xFFFFF - fract));
3677         /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
3678         y = multAcc_32x32_keep64(y, y1, (fract));
3679 #else
3680         /* Calculation of y0 * (1-fract) and y is in 13.35 format */
3681         y = ((q63_t) y0 * (0xFFFFF - fract));
3682         /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
3683         y += ((q63_t) y1 * (fract));
3684 #endif
3685         /* convert y to 1.15 format */
3686         return (q15_t) (y >> 20);
3687     }
3688 }
3689 /**
3690  * @}
3691  */ // end of LinearInterpolate group
3692 
3693 /**
3694  * @addtogroup LinearInterpolate
3695  * @{
3696  */
3697 /**
3698  *
3699  * @brief  Process function for the Q7 Linear Interpolation Function.
3700  * @param[in] pYData   pointer to Q7 Linear Interpolation table
3701  * @param[in] x        input sample to process
3702  * @param[in] nValues  number of table values
3703  * @return y processed output sample.
3704  *
3705  * \par
3706  * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index
3707  * and 20 bits for fractional part.
3708  * This function can support maximum of table size 2^12.
3709  */
csky_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)3710 __STATIC_INLINE q7_t csky_linear_interp_q7(
3711     q7_t *pYData,
3712     q31_t x,
3713     uint32_t nValues)
3714 {
3715     q31_t y;                                     /* output */
3716     q7_t y0, y1;                                 /* Nearest output values */
3717     q31_t fract;                                 /* fractional part */
3718     uint32_t index;                              /* Index to read nearest output values */
3719     /* Input is in 12.20 format */
3720     /* 12 bits for the table index */
3721     /* Index value calculation */
3722     if (x < 0) {
3723         return (pYData[0]);
3724     }
3725     index = (x >> 20) & 0xfff;
3726     if (index >= (nValues - 1)) {
3727         return (pYData[nValues - 1]);
3728     } else {
3729         /* 20 bits for the fractional part */
3730         /* fract is in 12.20 format */
3731         fract = (x & 0x000FFFFF);
3732         /* Read two nearest output values from the index and are in 1.7(q7) format */
3733         y0 = pYData[index];
3734         y1 = pYData[index + 1];
3735         /* Calculation of y0 * (1-fract) and y is in 13.27(q27) format */
3736         y = ((y0 * (0xFFFFF - fract)));
3737         /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
3738         y += (y1 * fract);
3739         /* convert y to 1.7(q7) format */
3740         return (q7_t) (y >> 20);
3741     }
3742 }
3743 /**
3744  * @}
3745  */ // end of LinearInterpolate group
3746 
3747 float32_t csky_sin_f32(
3748     float32_t x);
3749 
3750 q31_t csky_sin_q31(
3751     q31_t x);
3752 
3753 q15_t csky_sin_q15(
3754     q15_t x);
3755 
3756 float32_t csky_cos_f32(
3757     float32_t x);
3758 
3759 q31_t csky_cos_q31(
3760     q31_t x);
3761 
3762 q15_t csky_cos_q15(
3763     q15_t x);
3764 
3765 csky_status csky_sqrt_f32(
3766     float32_t in,
3767     float32_t *pOut);
3768 
3769 csky_status csky_sqrt_q31(
3770     q31_t in,
3771     q31_t *pOut);
3772 
3773 csky_status csky_sqrt_q15(
3774     q15_t in,
3775     q15_t *pOut);
3776 
3777 /* double format */
3778 typedef union _myNumber {
3779     q31_t  i[2];
3780     float64_t x;
3781 }mynumber;
3782 
3783 /* the coefficient for log2 table looh up */
3784 typedef union {
3785     q31_t i[5800];
3786     float64_t x[2900];
3787 }log2_cof1;
3788 
3789 typedef union {
3790     q31_t i[4350];
3791     float64_t x[2175];
3792 }log2_cof2;
3793 
3794 /* the coefficient for exp table looh up */
3795 typedef union {
3796     q31_t i[1424];
3797     float64_t x[712];
3798 }exp_cof1;
3799 
3800 typedef union {
3801     q31_t i[2048];
3802     float64_t x[1024];
3803 }exp_cof2;
3804 
3805 union ieee754_double {
3806     float64_t d;
3807 
3808     struct {
3809         unsigned int mantissa1 : 32;
3810         unsigned int mantissa0 : 20;
3811         unsigned int exponent : 11;
3812         unsigned int negative : 1;
3813     } ieee;
3814     struct {
3815         unsigned int mantissa1 : 32;
3816         unsigned int mantissa0 : 19;
3817         unsigned int quiet_nan : 1;
3818         unsigned int exponent : 11;
3819         unsigned int negative : 1;
3820     } ieee_nan;
3821 };
3822 
3823 typedef struct {
3824     q31_t e;
3825     long  d[40];
3826 }mp_no;
3827 
3828 float64_t csky_pow_f64(
3829     float64_t x,
3830     float64_t y);
3831 
3832 float64_t csky_log_f64(
3833     float64_t x);
3834 
3835 float64_t csky_exp_f64(
3836     float64_t x);
3837 
3838 float64_t csky_pow2_f64(
3839     float64_t x);
3840 
3841 float64_t csky_log2_f64(
3842     float64_t x);
3843 
3844 float64_t csky_log10_f64(
3845     float64_t x);
3846 
3847 void csky_power_q31(
3848     q31_t *pSrc,
3849     uint32_t blockSize,
3850     q63_t *pResult);
3851 
3852 void csky_power_int32(
3853     int32_t *pSrc,
3854     uint32_t blockSize,
3855     q63_t *pResult);
3856 
3857 void csky_power_int32(
3858     int32_t *pSrc,
3859     uint32_t blockSize,
3860     q63_t *pResult);
3861 
3862 void csky_power_f32(
3863     float32_t *pSrc,
3864     uint32_t blockSize,
3865     float32_t *pResult);
3866 
3867 void csky_power_q15(
3868     q15_t *pSrc,
3869     uint32_t blockSize,
3870     q63_t *pResult);
3871 
3872 void csky_power_q7(
3873     q7_t *pSrc,
3874     uint32_t blockSize,
3875     q31_t *pResult);
3876 
3877 void csky_mean_q7(
3878     q7_t *pSrc,
3879     uint32_t blockSize,
3880     q7_t *pResult);
3881 
3882 void csky_mean_q15(
3883     q15_t *pSrc,
3884     uint32_t blockSize,
3885     q15_t *pResult);
3886 
3887 void csky_mean_q31(
3888     q31_t *pSrc,
3889     uint32_t blockSize,
3890     q31_t *pResult);
3891 
3892 void csky_mean_f32(
3893     float32_t *pSrc,
3894     uint32_t blockSize,
3895     float32_t *pResult);
3896 
3897 void csky_var_f32(
3898     float32_t *pSrc,
3899     uint32_t blockSize,
3900     float32_t *pResult);
3901 
3902 void csky_var_q31(
3903     q31_t *pSrc,
3904     uint32_t blockSize,
3905     q31_t *pResult);
3906 
3907 void csky_var_q15(
3908     q15_t *pSrc,
3909     uint32_t blockSize,
3910     q15_t *pResult);
3911 
3912 void csky_rms_f32(
3913     float32_t *pSrc,
3914     uint32_t blockSize,
3915     float32_t *pResult);
3916 
3917 void csky_rms_q31(
3918     q31_t *pSrc,
3919     uint32_t blockSize,
3920     q31_t *pResult);
3921 
3922 void csky_rms_q15(
3923     q15_t *pSrc,
3924     uint32_t blockSize,
3925     q15_t *pResult);
3926 
3927 void csky_std_f32(
3928     float32_t *pSrc,
3929     uint32_t blockSize,
3930     float32_t *pResult);
3931 
3932 void csky_std_q31(
3933     q31_t *pSrc,
3934     uint32_t blockSize,
3935     q31_t *pResult);
3936 
3937 void csky_std_q15(
3938     q15_t *pSrc,
3939     uint32_t blockSize,
3940     q15_t *pResult);
3941 
3942 void csky_cmplx_mag_f32(
3943     float32_t *pSrc,
3944     float32_t *pDst,
3945     uint32_t numSamples);
3946 
3947 void csky_cmplx_mag_q31(
3948     q31_t *pSrc,
3949     q31_t *pDst,
3950     uint32_t numSamples);
3951 
3952 void csky_cmplx_mag_q15(
3953     q15_t *pSrc,
3954     q15_t *pDst,
3955     uint32_t numSamples);
3956 
3957 void csky_cmplx_dot_prod_q15(
3958     q15_t *pSrcA,
3959     q15_t *pSrcB,
3960     uint32_t numSamples,
3961     q31_t *realResult,
3962     q31_t *imagResult);
3963 
3964 void csky_cmplx_dot_prod_q31(
3965     q31_t *pSrcA,
3966     q31_t *pSrcB,
3967     uint32_t numSamples,
3968     q63_t *realResult,
3969     q63_t *imagResult);
3970 
3971 void csky_cmplx_dot_prod_f32(
3972     float32_t *pSrcA,
3973     float32_t *pSrcB,
3974     uint32_t numSamples,
3975     float32_t *realResult,
3976     float32_t *imagResult);
3977 
3978 void csky_cmplx_mult_real_q15(
3979     q15_t *pSrcCmplx,
3980     q15_t *pSrcReal,
3981     q15_t *pCmplxDst,
3982     uint32_t numSamples);
3983 
3984 void csky_cmplx_mult_real_q31(
3985     q31_t *pSrcCmplx,
3986     q31_t *pSrcReal,
3987     q31_t *pCmplxDst,
3988     uint32_t numSamples);
3989 
3990 void csky_cmplx_mult_real_f32(
3991     float32_t *pSrcCmplx,
3992     float32_t *pSrcReal,
3993     float32_t *pCmplxDst,
3994     uint32_t numSamples);
3995 
3996 void csky_min_q7(
3997     q7_t *pSrc,
3998     uint32_t blockSize,
3999     q7_t *result,
4000     uint32_t *index);
4001 
4002 void csky_min_q15(
4003     q15_t *pSrc,
4004     uint32_t blockSize,
4005     q15_t *pResult,
4006     uint32_t *pIndex);
4007 
4008 void csky_min_q31(
4009     q31_t *pSrc,
4010     uint32_t blockSize,
4011     q31_t *pResult,
4012     uint32_t *pIndex);
4013 
4014 void csky_min_f32(
4015     float32_t *pSrc,
4016     uint32_t blockSize,
4017     float32_t *pResult,
4018     uint32_t *pIndex);
4019 
4020 void csky_max_q7(
4021     q7_t *pSrc,
4022     uint32_t blockSize,
4023     q7_t *pResult,
4024     uint32_t *pIndex);
4025 
4026 void csky_max_q15(
4027     q15_t *pSrc,
4028     uint32_t blockSize,
4029     q15_t *pResult,
4030     uint32_t *pIndex);
4031 
4032 void csky_max_q31(
4033     q31_t *pSrc,
4034     uint32_t blockSize,
4035     q31_t *pResult,
4036     uint32_t *pIndex);
4037 
4038 void csky_max_f32(
4039     float32_t *pSrc,
4040     uint32_t blockSize,
4041     float32_t *pResult,
4042     uint32_t *pIndex);
4043 
4044 void csky_cmplx_mult_cmplx_q15(
4045     q15_t *pSrcA,
4046     q15_t *pSrcB,
4047     q15_t *pDst,
4048     uint32_t numSamples);
4049 
4050 void csky_cmplx_mult_cmplx_q31(
4051     q31_t *pSrcA,
4052     q31_t *pSrcB,
4053     q31_t *pDst,
4054     uint32_t numSamples);
4055 
4056 void csky_cmplx_mult_cmplx_f32(
4057     float32_t *pSrcA,
4058     float32_t *pSrcB,
4059     float32_t *pDst,
4060     uint32_t numSamples);
4061 
4062 void csky_cmplx_mult_cmplx_re_q15(
4063     q15_t *pSrcA,
4064     q15_t *pSrcB,
4065     q15_t *pDst,
4066     uint32_t numSamples);
4067 
4068 void csky_cmplx_mult_cmplx_re_q31(
4069     q31_t *pSrcA,
4070     q31_t *pSrcB,
4071     q31_t *pDst,
4072     uint32_t numSamples);
4073 
4074 void csky_cmplx_mult_cmplx_re_f32(
4075     float32_t *pSrcA,
4076     float32_t *pSrcB,
4077     float32_t *pDst,
4078     uint32_t numSamples);
4079 
4080 void csky_float_to_q31(
4081     float32_t *pSrc,
4082     q31_t *pDst,
4083     uint32_t blockSize);
4084 
4085 void csky_float_to_q15(
4086     float32_t *pSrc,
4087     q15_t *pDst,
4088     uint32_t blockSize);
4089 
4090 void csky_float_to_q7(
4091     float32_t *pSrc,
4092     q7_t *pDst,
4093     uint32_t blockSize);
4094 
4095 void csky_q31_to_q15(
4096     q31_t *pSrc,
4097     q15_t *pDst,
4098     uint32_t blockSize);
4099 
4100 void csky_q31_to_q7(
4101     q31_t *pSrc,
4102     q7_t *pDst,
4103     uint32_t blockSize);
4104 
4105 void csky_q15_to_float(
4106     q15_t *pSrc,
4107     float32_t *pDst,
4108     uint32_t blockSize);
4109 
4110 void csky_q15_to_q31(
4111     q15_t *pSrc,
4112     q31_t *pDst,
4113     uint32_t blockSize);
4114 
4115 void csky_q15_to_q7(
4116     q15_t *pSrc,
4117     q7_t *pDst,
4118     uint32_t blockSize);
4119 
4120 /**
4121  * @ingroup groupInterpolation
4122  */
4123 /**
4124  * @defgroup BilinearInterpolate Bilinear Interpolation
4125  *
4126  * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
4127  * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
4128  * determines values between the grid points.
4129  * Bilinear interpolation is equivalent to two step linear interpolation,
4130  * first in the x-dimension and then in the y-dimension.
4131  * Bilinear interpolation is often used in image processing to rescale images.
4132  * The CSI DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
4133  *
4134  * <b>Algorithm</b>
4135  * \par
4136  * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
4137  * For floating-point, the instance structure is defined as:
4138  * <pre>
4139  *   typedef struct
4140  *   {
4141  *     uint16_t numRows;
4142  *     uint16_t numCols;
4143  *     float32_t *pData;
4144  * } csky_bilinear_interp_instance_f32;
4145  * </pre>
4146  *
4147  * \par
4148  * where <code>numRows</code> specifies the number of rows in the table;
4149  * <code>numCols</code> specifies the number of columns in the table;
4150  * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
4151  * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
4152  * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
4153  *
4154  * \par
4155  * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
4156  * <pre>
4157  *     XF = floor(x)
4158  *     YF = floor(y)
4159  * </pre>
4160  * \par
4161  * The interpolated output point is computed as:
4162  * <pre>
4163  *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
4164  *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
4165  *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
4166  *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
4167  * </pre>
4168  * Note that the coordinates (x, y) contain integer and fractional components.
4169  * The integer components specify which portion of the table to use while the
4170  * fractional components control the interpolation processor.
4171  *
4172  * \par
4173  * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
4174  */
4175 /**
4176  * @addtogroup BilinearInterpolate
4177  * @{
4178  */
4179 /**
4180 *
4181 * @brief  Floating-point bilinear interpolation.
4182 * @param[in,out] S  points to an instance of the interpolation structure.
4183 * @param[in]     X  interpolation coordinate.
4184 * @param[in]     Y  interpolation coordinate.
4185 * @return out interpolated value.
4186 */
csky_bilinear_interp_f32(const csky_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)4187 __STATIC_INLINE float32_t csky_bilinear_interp_f32(
4188     const csky_bilinear_interp_instance_f32 * S,
4189     float32_t X,
4190     float32_t Y)
4191 {
4192     float32_t out;
4193     float32_t f00, f01, f10, f11;
4194     float32_t *pData = S->pData;
4195     int32_t xIndex, yIndex, index;
4196     float32_t xdiff, ydiff;
4197     float32_t b1, b2, b3, b4;
4198     xIndex = (int32_t) X;
4199     yIndex = (int32_t) Y;
4200     /* Care taken for table outside boundary */
4201     /* Returns zero output when values are outside table boundary */
4202     if (xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0 || yIndex > (S->numCols - 1)) {
4203         return (0);
4204     }
4205     /* Calculation of index for two nearest points in X-direction */
4206     index = (xIndex - 1) + (yIndex - 1) * S->numCols;
4207     /* Read two nearest points in X-direction */
4208     f00 = pData[index];
4209     f01 = pData[index + 1];
4210     /* Calculation of index for two nearest points in Y-direction */
4211     index = (xIndex - 1) + (yIndex) * S->numCols;
4212     /* Read two nearest points in Y-direction */
4213     f10 = pData[index];
4214     f11 = pData[index + 1];
4215     /* Calculation of intermediate values */
4216     b1 = f00;
4217     b2 = f01 - f00;
4218     b3 = f10 - f00;
4219     b4 = f00 - f01 - f10 + f11;
4220     /* Calculation of fractional part in X */
4221     xdiff = X - xIndex;
4222     /* Calculation of fractional part in Y */
4223     ydiff = Y - yIndex;
4224     /* Calculation of bi-linear interpolated output */
4225     out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
4226     /* return to application */
4227     return (out);
4228 }
4229 /**
4230  * @}
4231  */ // end of BilinearInterpolate group
4232 
4233 /**
4234  * @addtogroup BilinearInterpolate
4235  * @{
4236  */
4237 /**
4238 *
4239 * @brief  Q31 bilinear interpolation.
4240 * @param[in,out] S  points to an instance of the interpolation structure.
4241 * @param[in]     X  interpolation coordinate in 12.20 format.
4242 * @param[in]     Y  interpolation coordinate in 12.20 format.
4243 * @return out interpolated value.
4244 */
csky_bilinear_interp_q31(csky_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)4245 __STATIC_INLINE q31_t csky_bilinear_interp_q31(
4246     csky_bilinear_interp_instance_q31 *S,
4247     q31_t X,
4248     q31_t Y)
4249 {
4250     q31_t out;                                   /* Temporary output */
4251     q31_t acc = 0;                               /* output */
4252     q31_t xfract, yfract;                        /* X, Y fractional parts */
4253     q31_t x1, x2, y1, y2;                        /* Nearest output values */
4254     int32_t rI, cI;                              /* Row and column indices */
4255     q31_t *pYData = S->pData;                    /* pointer to output table values */
4256     uint32_t nCols = S->numCols;                 /* num of rows */
4257     /* Input is in 12.20 format */
4258     /* 12 bits for the table index */
4259     /* Index value calculation */
4260     rI = ((X & (q31_t)0xFFF00000) >> 20);
4261     /* Input is in 12.20 format */
4262     /* 12 bits for the table index */
4263     /* Index value calculation */
4264     cI = ((Y & (q31_t)0xFFF00000) >> 20);
4265     /* Care taken for table outside boundary */
4266     /* Returns zero output when values are outside table boundary */
4267     if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) {
4268         return (0);
4269     }
4270     /* 20 bits for the fractional part */
4271     /* shift left xfract by 11 to keep 1.31 format */
4272     xfract = (X & 0x000FFFFF) << 11u;
4273     /* Read two nearest output values from the index */
4274     x1 = pYData[(rI) + (int32_t)nCols * (cI)    ];
4275     x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1];
4276     /* 20 bits for the fractional part */
4277     /* shift left yfract by 11 to keep 1.31 format */
4278     yfract = (Y & 0x000FFFFF) << 11u;
4279     /* Read two nearest output values from the index */
4280     y1 = pYData[(rI) + (int32_t)nCols * (cI + 1)    ];
4281     y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1];
4282 #ifdef CSKY_SIMD
4283     /* Calculation of x1 * (1-xfract) * (1-yfract) and acc is in 3.29(q29) format */
4284     out = mult_32x32_keep32(x1, (0x7FFFFFFF - xfract));
4285     acc = mult_32x32_keep32(out, (0x7FFFFFFF - yfract));
4286     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
4287     out = mult_32x32_keep32(x2, (0x7FFFFFFF - yfract));
4288     acc = multAcc_32x32_keep32(acc, out, xfract);
4289     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
4290     out = mult_32x32_keep32(y1,  (0x7FFFFFFF - xfract));
4291     acc = multAcc_32x32_keep32(acc, out, yfract);
4292     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
4293     out = mult_32x32_keep32(y2, xfract);
4294     acc = multAcc_32x32_keep32(acc, out, yfract);
4295 #else
4296     /* Calculation of x1 * (1-xfract) * (1-yfract) and acc is in 3.29(q29) format */
4297     out = ((q31_t) (((q63_t) x1  * (0x7FFFFFFF - xfract)) >> 32)); // 32:byte alignment
4298     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32)); // 32:byte alignment
4299     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
4300     out = ((q31_t) (((q63_t) x2 * (0x7FFFFFFF - yfract)) >> 32)); // 32:byte alignment
4301     acc += ((q31_t) (((q63_t) out * (xfract)) >> 32)); // 32:byte alignment
4302     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
4303     out = ((q31_t) (((q63_t) y1 * (0x7FFFFFFF - xfract)) >> 32)); // 32:byte alignment
4304     acc += ((q31_t) (((q63_t) out * (yfract)) >> 32)); // 32:byte alignment
4305     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
4306     out = ((q31_t) (((q63_t) y2 * (xfract)) >> 32)); // 32:byte alignment
4307     acc += ((q31_t) (((q63_t) out * (yfract)) >> 32)); // 32:byte alignment
4308 #endif
4309     /* Convert acc to 1.31(q31) format */
4310     return ((q31_t)(acc << 2)); // 2:byte alignment
4311 }
4312 /**
4313  * @}
4314  */ // end of BilinearInterpolate group
4315 
4316 /**
4317  * @addtogroup BilinearInterpolate
4318  * @{
4319  */
4320 /**
4321 * @brief  Q15 bilinear interpolation.
4322 * @param[in,out] S  points to an instance of the interpolation structure.
4323 * @param[in]     X  interpolation coordinate in 12.20 format.
4324 * @param[in]     Y  interpolation coordinate in 12.20 format.
4325 * @return out interpolated value.
4326 */
csky_bilinear_interp_q15(csky_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)4327 __STATIC_INLINE q15_t csky_bilinear_interp_q15(
4328     csky_bilinear_interp_instance_q15 * S,
4329     q31_t X,
4330     q31_t Y)
4331 {
4332     q63_t acc = 0;                               /* output */
4333     q31_t out;                                   /* Temporary output */
4334     q15_t x1, x2, y1, y2;                        /* Nearest output values */
4335     q31_t xfract, yfract;                        /* X, Y fractional parts */
4336     int32_t rI, cI;                              /* Row and column indices */
4337     q15_t *pYData = S->pData;                    /* pointer to output table values */
4338     uint32_t nCols = S->numCols;                 /* num of rows */
4339     /* Input is in 12.20 format */
4340     /* 12 bits for the table index */
4341     /* Index value calculation */
4342     rI = ((X & (q31_t)0xFFF00000) >> 20);
4343     /* Input is in 12.20 format */
4344     /* 12 bits for the table index */
4345     /* Index value calculation */
4346     cI = ((Y & (q31_t)0xFFF00000) >> 20);
4347     /* Care taken for table outside boundary */
4348     /* Returns zero output when values are outside table boundary */
4349     if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) {
4350         return (0);
4351     }
4352     /* 20 bits for the fractional part */
4353     /* xfract should be in 12.20 format */
4354     xfract = (X & 0x000FFFFF);
4355     /* Read two nearest output values from the index */
4356     x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
4357     x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
4358     /* 20 bits for the fractional part */
4359     /* yfract should be in 12.20 format */
4360     yfract = (Y & 0x000FFFFF);
4361     /* Read two nearest output values from the index */
4362     y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
4363     y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
4364     /* Calculation of x1 * (1-xfract) * (1-yfract) and acc is in 13.51 format */
4365     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
4366     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
4367 #ifdef CSKY_SIMD
4368     out = mult_32x32_dext_4(x1, (0xFFFFF - xfract));
4369     acc = mult_32x32_keep64(out, (0xFFFFF - yfract));
4370     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
4371     out = mult_32x32_dext_4(x2, (0xFFFFF - yfract));
4372     acc = multAcc_32x32_keep64(acc, out, (xfract));
4373     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
4374     out = mult_32x32_dext_4(y1, (0xFFFFF - xfract));
4375     acc = multAcc_32x32_keep64(acc, out, (yfract));
4376     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
4377     out = mult_32x32_dext_4(y2, (xfract));
4378     acc = multAcc_32x32_keep64(acc, out, (yfract));
4379 #else
4380     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
4381     acc = ((q63_t) out * (0xFFFFF - yfract));
4382     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
4383     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
4384     acc += ((q63_t) out * (xfract));
4385     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
4386     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
4387     acc += ((q63_t) out * (yfract));
4388     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
4389     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
4390     acc += ((q63_t) out * (yfract));
4391 #endif
4392     /* acc is in 13.51 format and down shift acc by 36 times */
4393     /* Convert out to 1.15 format */
4394     return ((q15_t)(acc >> 36));
4395 }
4396 /**
4397  * @}
4398  */ // end of BilinearInterpolate group
4399 
4400 void test(q7_t *pSrc, q7_t *pDst);
4401 
4402 /**
4403  * @addtogroup BilinearInterpolate
4404  * @{
4405  */
4406 /**
4407 * @brief  Q7 bilinear interpolation.
4408 * @param[in,out] S  points to an instance of the interpolation structure.
4409 * @param[in]     X  interpolation coordinate in 12.20 format.
4410 * @param[in]     Y  interpolation coordinate in 12.20 format.
4411 * @return out interpolated value.
4412 */
csky_bilinear_interp_q7(csky_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)4413 __STATIC_INLINE q7_t csky_bilinear_interp_q7(
4414     csky_bilinear_interp_instance_q7 * S,
4415     q31_t X,
4416     q31_t Y)
4417 {
4418     q63_t acc = 0;                               /* output */
4419     q31_t out;                                   /* Temporary output */
4420     q31_t xfract, yfract;                        /* X, Y fractional parts */
4421     q7_t x1, x2, y1, y2;                         /* Nearest output values */
4422     int32_t rI, cI;                              /* Row and column indices */
4423     q7_t *pYData = S->pData;                     /* pointer to output table values */
4424     uint32_t nCols = S->numCols;                 /* num of rows */
4425     /* Input is in 12.20 format */
4426     /* 12 bits for the table index */
4427     /* Index value calculation */
4428     rI = ((X & (q31_t)0xFFF00000) >> 20);
4429     /* Input is in 12.20 format */
4430     /* 12 bits for the table index */
4431     /* Index value calculation */
4432     cI = ((Y & (q31_t)0xFFF00000) >> 20);
4433     /* Care taken for table outside boundary */
4434     /* Returns zero output when values are outside table boundary */
4435     if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) {
4436         return (0);
4437     }
4438     /* 20 bits for the fractional part */
4439     /* xfract should be in 12.20 format */
4440     xfract = (X & (q31_t)0x000FFFFF);
4441     /* Read two nearest output values from the index */
4442     x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
4443     x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
4444     /* 20 bits for the fractional part */
4445     /* yfract should be in 12.20 format */
4446     yfract = (Y & (q31_t)0x000FFFFF);
4447     /* Read two nearest output values from the index */
4448     y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
4449     y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
4450     /* Calculation of x1 * (1-xfract) * (1-yfract) and acc is in 16.47 format */
4451     out = ((x1 * (0xFFFFF - xfract)));
4452 #ifdef CSKY_SIMD
4453     acc = multAcc_32x32_keep64(acc, out, (0xFFFFF - yfract));
4454     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
4455     out = ((x2 * (0xFFFFF - yfract)));
4456     acc = multAcc_32x32_keep64(acc, out, xfract);
4457     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
4458     out = ((y1 * (0xFFFFF - xfract)));
4459     acc = multAcc_32x32_keep64(acc, out, yfract);
4460     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
4461     out = ((y2 * (yfract)));
4462     acc = multAcc_32x32_keep64(acc, out, xfract);
4463 #else
4464     acc = (((q63_t) out * (0xFFFFF - yfract)));
4465     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
4466     out = ((x2 * (0xFFFFF - yfract)));
4467     acc += (((q63_t) out * (xfract)));
4468     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
4469     out = ((y1 * (0xFFFFF - xfract)));
4470     acc += (((q63_t) out * (yfract)));
4471     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
4472     out = ((y2 * (yfract)));
4473     acc += (((q63_t) out * (xfract)));
4474 #endif
4475     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
4476     return ((q7_t)(acc >> 40));
4477 }
4478 /**
4479  * @}
4480  */ // end of BilinearInterpolate group
4481 
4482 /**
4483  * @ingroup groupMath
4484  */
4485 
4486 /**
4487  * @defgroup ShiftRight Right Shift
4488  *
4489  * Shift the input value to right with appointed bits, its basic format is:
4490  * <pre>
4491  *     a = (a) >> (shift),   1 =< shift <= bitof(a) - 1.
4492  * </pre>
4493  * The basic format is only designed for q31.
4494  *
4495  * and the extended format should be rounding to +inf:
4496  * <pre>
4497  *     a = (a + (1<<(shift - 1)) >> (shift),   1 =< shift <= bitof(a) - 1.
4498  * </pre>
4499  *
4500  * which are designed for q31, q31 positive and q63.
4501  */
4502 
4503 /**
4504  * @addtogroup ShiftRight
4505  * @{
4506  */
4507 /**
4508  * @brief  right shift Q31 version
4509  * @param[in]  a        input value to be shift.
4510  * @param[in]  shift    input positive value, the number of bits to be shift.
4511  * @param[out] result   the shifted a.
4512  *
4513  * <b>Scaling and Overflow Behavior:</b>
4514  * \par
4515  * The function is only used for right shift. So, the value of shift is
4516  * between[1,31].
4517  */
csky_shr_q31(q31_t a,q31_t shift)4518 __STATIC_INLINE q31_t csky_shr_q31(
4519     q31_t a,
4520     q31_t shift)
4521 {
4522     q31_t res;
4523 #ifdef CSKY_SIMD
4524     __ASM volatile(
4525                   "asr        %0, %1, %2\n\t"
4526                   :"=r"(res), "=r"(a), "=r"(shift):"0"(res), "1"(a), "2"(shift));
4527 #else
4528     res =  ((a) >> (shift));
4529 #endif
4530     return res;
4531 }
4532 
4533 #define SHR(a, shift)                 csky_shr_q31(a, shift)
4534 
4535 /**
4536  * @}
4537  */ // end of ShiftRight group
4538 
4539 /**
4540  * @addtogroup ShiftRight
4541  * @{
4542  */
4543 /**
4544  * @brief  right shift Q31 version
4545  * @param[in]  a        input value to be shift.
4546  * @param[in]  shift    input positive value, the number of bits to be shift.
4547  * @param[out] result   the shifted a.
4548  *
4549  * <b>Scaling and Overflow Behavior:</b>
4550  * \par
4551  * The function is only used for right shift. So, the value of shift is
4552  * between[1,31]. And the output value is rounding to +inf.
4553  */
csky_pshr_q31(q31_t a,q31_t shift)4554 __STATIC_INLINE q31_t csky_pshr_q31(
4555     q31_t a,
4556     q31_t shift)
4557 {
4558     q31_t res;
4559 #ifdef CSKY_SIMD
4560     __ASM volatile(
4561                   "asr.s32.r  %0, %1, %2\n\t"
4562                   :"=r"(res), "=r"(a), "=r"(shift):"0"(res), "1"(a), "2"(shift));
4563 #else
4564     res =  (a >= 0?(SHR((a) + (1<<(shift - 1)), shift))\
4565                  :(SHR((a) + ((1<<shift)>>1) -1, shift)));
4566 #endif
4567     return res;
4568 }
4569 
4570 /**
4571  * @}
4572  */ // end of ShiftRight group
4573 
4574 /**
4575  * @addtogroup ShiftRight
4576  * @{
4577  */
4578 /**
4579  * @brief  right shift Q31 version
4580  * @param[in]  a        input positive value to be shift.
4581  * @param[in]  shift    input positive value, the number of bits to be shift.
4582  * @param[out] result   the shifted a.
4583  *
4584  * <b>Scaling and Overflow Behavior:</b>
4585  * \par
4586  * The function is only used for right shift. So, the value of shift is
4587  * between[1,31]. And the output value is rounding to +inf.
4588  */
csky_pshr_pos_q31(q31_t a,q31_t shift)4589 __STATIC_INLINE q31_t csky_pshr_pos_q31(
4590     q31_t a,
4591     q31_t shift)
4592 {
4593     q31_t res;
4594 #ifdef CSKY_SIMD
4595     __ASM volatile(
4596                   "asr.s32.r  %0, %1, %2\n\t"
4597                   :"=r"(res), "=r"(a), "=r"(shift):"0"(res), "1"(a), "2"(shift));
4598 #else
4599     res = SHR((a) + (1<<(shift - 1)), shift);
4600 #endif
4601     return res;
4602 }
4603 
4604 /**
4605  * @}
4606  */ // end of ShiftRight group
4607 
4608 /**
4609  * @addtogroup ShiftRight
4610  * @{
4611  */
4612 /**
4613  * @brief  right shift Q63 version
4614  * @param[in]  a        input value to be shift.
4615  * @param[in]  shift    input positive value, the number of bits to be shift.
4616  * @param[out] result   the shifted a.
4617  *
4618  * <b>Scaling and Overflow Behavior:</b>
4619  * \par
4620  * The function is only used for right shift. So, the value of shift is
4621  * between[1,63]. And the output value is rounding to +inf.
4622  */
csky_pshr_q63(q63_t a,q31_t shift)4623 __STATIC_INLINE q63_t csky_pshr_q63(
4624     q63_t a,
4625     q31_t shift)
4626 {
4627     q63_t res;
4628 #ifdef CSKY_SIMD
4629   __ASM volatile(
4630                 "subi       t0, %2, 1\n\t"
4631                 "cmphsi     t0, 32\n\t"
4632                 "bt         1f\n\t"
4633                 "movi       t1, 1\n\t"
4634                 "lsl        t0, t1, t0\n\t"
4635                 "movi       t1, 0\n\t"
4636                 "add.s64.s  %1, %1, t0\n\t"
4637                 "dext       %0, %1, %R1, %2\n\t"
4638                 "asr        %R0, %R1, %2\n\t"
4639                 "br         2f\n\t"
4640                 "1:\n\t"
4641                 "subi       %2, %2, 32\n\t"
4642                 "subi       t0, t0, 32\n\t"
4643                 "movi       t1, 1\n\t"
4644                 "lsl        t1, t1, t0\n\t"
4645                 "add.s32.s  %R1, %R1, t1\n\t"
4646                 "asr        %0, %R1, %2\n\t"
4647                 "asri       %R0, %R1, 31\n\t"
4648                 "2:\n\t"
4649                 :"=r"(res), "=r"(a), "=r"(shift):"0"(res), "1"(a), "2"(shift):"t0", "t1");
4650 #else
4651   res =  (a >= 0?(SHR((a) + ((q63_t)1<<(shift - 1)), shift))\
4652                :(SHR((a) + (((q63_t)1<<shift)>>1) -1, shift)));
4653 #endif
4654   return res;
4655 }
4656 
4657 /**
4658  * @}
4659  */ // end of ShiftRight group
4660 
4661 #define PSHR(a, shift)                csky_pshr_q31(a, shift)
4662 #define PSHR_POSITIVE(a, shift)       csky_pshr_pos_q31(a, shift)
4663 #define PSHR64(a, shift)              csky_pshr_q63(a, shift)
4664 
4665 #ifdef CSKY_SIMD
4666 #else
4667 /* SMMLAR */
4668 #define multAcc_32x32_keep32_R(a, x, y) \
4669     (a) = (q31_t) (((((q63_t) (a)) << 32) + ((q63_t) (x) * (y)) + 0x80000000LL) >> 32)
4670 
4671 /* SMMLSR */
4672 #define multSub_32x32_keep32_R(a, x, y) \
4673     (a) = (q31_t) (((((q63_t) (a)) << 32) - ((q63_t) (x) * (y)) + 0x80000000LL) >> 32)
4674 
4675 /* SMMULR */
4676 #define mult_32x32_keep32_R(a, x, y) \
4677     (a) = (q31_t) (((q63_t) (x) * (y) + 0x80000000LL) >> 32)
4678 
4679 /* SMMLA */
4680 #define multAcc_32x32_keep32(a, x, y) \
4681     (a) += (q31_t) (((q63_t) (x) * (y)) >> 32)
4682 
4683 /* SMMLS */
4684 #define multSub_32x32_keep32(a, x, y) \
4685     (a) -= (q31_t) (((q63_t) (x) * (y)) >> 32)
4686 
4687 /* SMMUL */
4688 #define mult_32x32_keep32(a, x, y) \
4689     (a) = (q31_t) (((q63_t) (x) * (y)) >> 32)
4690 #endif
4691 
4692 #ifdef   __cplusplus
4693 }
4694 #endif
4695 
4696 #endif /* _CSKY_MATH_H */
4697 
4698 /**
4699  *
4700  * End of file.
4701  */