• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  * @file     riscv_math.h
3  * @brief    Public header file for NMSIS DSP Library
4  * @version  V1.6.0
5  * @date     18. March 2019
6  ******************************************************************************/
7 /*
8  * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
9  * Copyright (c) 2019 Nuclei Limited. All rights reserved.
10  *
11  * SPDX-License-Identifier: Apache-2.0
12  *
13  * Licensed under the Apache License, Version 2.0 (the License); you may
14  * not use this file except in compliance with the License.
15  * You may obtain a copy of the License at
16  *
17  * www.apache.org/licenses/LICENSE-2.0
18  *
19  * Unless required by applicable law or agreed to in writing, software
20  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
21  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22  * See the License for the specific language governing permissions and
23  * limitations under the License.
24  */
25 
26 /**
27    \mainpage NMSIS DSP Software Library
28    *
29    * Introduction
30    * ------------
31    *
32    * This user manual describes the NMSIS DSP software library,
33    * a suite of common signal processing functions for use on Nuclei N/NX processor based devices.
34    *
35    * The library is divided into a number of functions each covering a specific category:
36    * - Basic math functions
37    * - Fast math functions
38    * - Complex math functions
39    * - Filters
40    * - Matrix functions
41    * - Transform functions
42    * - Motor control functions
43    * - Statistical functions
44    * - Support functions
45    * - Interpolation functions
46    *
47    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
48    * 32-bit integer and 32-bit floating-point values.
49    *
50    * The library functions are declared in the public file <code>riscv_math.h</code> which is placed in the <code>Include</code> folder.
51    * Simply include this file and link the appropriate library in the application and begin calling the library functions.
52    * The Library supports single public header file <code>riscv_math.h</code> for Nuclei N cores with little endian.
53    * Same header file will be used for floating point unit(FPU) variants.
54    *
55    * \note Please refer to [NMSIS-DSP](../../../dsp/index.html)
56    *
57    * Examples
58    * --------
59    *
60    * The library ships with a number of examples which demonstrate how to use the library functions.
61    *
62    * Toolchain Support
63    * -----------------
64    *
65    * The library has been developed and tested with nuclei riscv gcc toolchain.
66    *
67    * Building the Library
68    * --------------------
69    *
70    * In NMSIS repo, it contains a Makefile to rebuild libraries on nuclei riscv gcc toolchain in the <code>NMSIS/</code> folder.
71    * * In *NMSIS* folder, you can run `make gen_dsp_lib` to build and install DSP library into **NMSIS/Library/DSP/GCC** folder.
72    *
73    * Preprocessor Macros
74    * -------------------
75    *
76    * Each library project have different preprocessor macros.
77    *
78    * - RISCV_MATH_MATRIX_CHECK:
79    *
80    * Define macro RISCV_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
81    *
82    * - RISCV_MATH_ROUNDING:
83    *
84    * Define macro RISCV_MATH_ROUNDING for rounding on support functions
85    *
86    * - RISCV_MATH_LOOPUNROLL:
87    *
88    * Define macro RISCV_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions
89    *
90    */
91 
92 
93 /**
94  * @defgroup groupMath Basic Math Functions
95  */
96 
97 /**
98  * @defgroup groupFastMath Fast Math Functions
99  * This set of functions provides a fast approximation to sine, cosine, and square root.
100  * As compared to most of the other functions in the NMSIS math library, the fast math functions
101  * operate on individual values and not arrays.
102  * There are separate functions for Q15, Q31, and floating-point data.
103  *
104  */
105 
106 /**
107  * @defgroup groupCmplxMath Complex Math Functions
108  * This set of functions operates on complex data vectors.
109  * The data in the complex arrays is stored in an interleaved fashion
110  * (real, imag, real, imag, ...).
111  * In the API functions, the number of samples in a complex array refers
112  * to the number of complex values; the array contains twice this number of
113  * real values.
114  */
115 
116 /**
117  * @defgroup groupFilters Filtering Functions
118  */
119 
120 /**
121  * @defgroup groupMatrix Matrix Functions
122  *
123  * This set of functions provides basic matrix math operations.
124  * The functions operate on matrix data structures.  For example,
125  * the type
126  * definition for the floating-point matrix structure is shown
127  * below:
128  * <pre>
129  *     typedef struct
130  *     {
131  *       uint16_t numRows;     // number of rows of the matrix.
132  *       uint16_t numCols;     // number of columns of the matrix.
133  *       float32_t *pData;     // points to the data of the matrix.
134  *     } riscv_matrix_instance_f32;
135  * </pre>
136  * There are similar definitions for Q15 and Q31 data types.
137  *
138  * The structure specifies the size of the matrix and then points to
139  * an array of data.  The array is of size <code>numRows X numCols</code>
140  * and the values are arranged in row order.  That is, the
141  * matrix element (i, j) is stored at:
142  * <pre>
143  *     pData[i*numCols + j]
144  * </pre>
145  *
146  * \par Init Functions
147  * There is an associated initialization function for each type of matrix
148  * data structure.
149  * The initialization function sets the values of the internal structure fields.
150  * Refer to \ref riscv_mat_init_f32(), \ref riscv_mat_init_q31() and \ref riscv_mat_init_q15()
151  * for floating-point, Q31 and Q15 types,  respectively.
152  *
153  * \par
154  * Use of the initialization function is optional. However, if initialization function is used
155  * then the instance structure cannot be placed into a const data section.
156  * To place the instance structure in a const data
157  * section, manually initialize the data structure.  For example:
158  * <pre>
159  * <code>riscv_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
160  * <code>riscv_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
161  * <code>riscv_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
162  * </pre>
163  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
164  * specifies the number of columns, and <code>pData</code> points to the
165  * data array.
166  *
167  * \par Size Checking
168  * By default all of the matrix functions perform size checking on the input and
169  * output matrices. For example, the matrix addition function verifies that the
170  * two input matrices and the output matrix all have the same number of rows and
171  * columns. If the size check fails the functions return:
172  * <pre>
173  *     RISCV_MATH_SIZE_MISMATCH
174  * </pre>
175  * Otherwise the functions return
176  * <pre>
177  *     RISCV_MATH_SUCCESS
178  * </pre>
179  * There is some overhead associated with this matrix size checking.
180  * The matrix size checking is enabled via the \#define
181  * <pre>
182  *     RISCV_MATH_MATRIX_CHECK
183  * </pre>
184  * within the library project settings.  By default this macro is defined
185  * and size checking is enabled. By changing the project settings and
186  * undefining this macro size checking is eliminated and the functions
187  * run a bit faster. With size checking disabled the functions always
188  * return <code>RISCV_MATH_SUCCESS</code>.
189  */
190 
191 /**
192  * @defgroup groupTransforms Transform Functions
193  */
194 
195 /**
196  * @defgroup groupController Controller Functions
197  */
198 
199 /**
200  * @defgroup groupStats Statistics Functions
201  */
202 
203 /**
204  * @defgroup groupSupport Support Functions
205  */
206 
207 /**
208  * @defgroup groupInterpolation Interpolation Functions
209  * These functions perform 1- and 2-dimensional interpolation of data.
210  * Linear interpolation is used for 1-dimensional data and
211  * bilinear interpolation is used for 2-dimensional data.
212  */
213 
214 /**
215  * @defgroup groupExamples Examples
216  */
217 
218 #ifndef _RISCV_MATH_H
219 #define _RISCV_MATH_H
220 
221 #ifdef   __cplusplus
222 extern "C"
223 {
224 #endif
225 
226 /* Compiler specific diagnostic adjustment */
227 #if   defined ( __CC_ARM )
228 
229 #elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
230 
231 #elif defined ( __GNUC__ )
232   #pragma GCC diagnostic push
233   #pragma GCC diagnostic ignored "-Wsign-conversion"
234   #pragma GCC diagnostic ignored "-Wconversion"
235   #pragma GCC diagnostic ignored "-Wunused-parameter"
236 
237 #elif defined ( __ICCRISCV__ )
238 
239 #elif defined ( __TI_RISCV__ )
240 
241 #elif defined ( __CSMC__ )
242 
243 #elif defined ( __TASKING__ )
244 
245 #elif defined ( _MSC_VER )
246 
247 #else
248   #error Unknown compiler
249 #endif
250 
251 
252 /* Included for instrinsics definitions */
253 #if !defined ( _MSC_VER )
254 
255 #define __NMSIS_GENERIC
256 #if (defined (__RISCV_FEATURE_DSP) && (__RISCV_FEATURE_DSP == 1))
257     #define __DSP_PRESENT   1
258 #endif
259 #include "nmsis_core.h"
260 
261 
262 #else
263 #include <stdint.h>
264 #define __STATIC_FORCEINLINE static __forceinline
265 #define __ALIGNED(x) __declspec(align(x))
266 #define LOW_OPTIMIZATION_ENTER
267 #define LOW_OPTIMIZATION_EXIT
268 #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
269 #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
270 #endif
271 
272 #include "string.h"
273 #include <math.h>
274 #include "float.h"
275 
276 /* evaluate RISCV DSP feature */
277 #if (defined (__RISCV_FEATURE_DSP) && (__RISCV_FEATURE_DSP == 1))
278     #define RISCV_MATH_DSP
279 #endif
280 
281   /**
282    * @brief Macros required for reciprocal calculation in Normalized LMS
283    */
284 
285 #define DELTA_Q31          (0x100)
286 #define DELTA_Q15          0x5
287 #define INDEX_MASK         0x0000003F
288 #ifndef PI
289   #define PI               3.14159265358979f
290 #endif
291 
292   /**
293    * @brief Macros required for SINE and COSINE Fast math approximations
294    */
295 
296 #define FAST_MATH_TABLE_SIZE  512
297 #define FAST_MATH_Q31_SHIFT   (32 - 10)
298 #define FAST_MATH_Q15_SHIFT   (16 - 10)
299 #define CONTROLLER_Q31_SHIFT  (32 - 9)
300 #define TABLE_SPACING_Q31     0x400000
301 #define TABLE_SPACING_Q15     0x80
302 
303   /**
304    * @brief Macros required for SINE and COSINE Controller functions
305    */
306   /* 1.31(q31) Fixed value of 2/360 */
307   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
308 #define INPUT_SPACING         0xB60B61
309 
310 
311   /**
312    * @brief Error status returned by some functions in the library.
313    */
314 
315   typedef enum
316   {
317     RISCV_MATH_SUCCESS        =  0,        /**< No error */
318     RISCV_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
319     RISCV_MATH_LENGTH_ERROR   = -2,        /**< Length of data buffer is incorrect */
320     RISCV_MATH_SIZE_MISMATCH  = -3,        /**< Size of matrices is not compatible with the operation */
321     RISCV_MATH_NANINF         = -4,        /**< Not-a-number (NaN) or infinity is generated */
322     RISCV_MATH_SINGULAR       = -5,        /**< Input matrix is singular and cannot be inverted */
323     RISCV_MATH_TEST_FAILURE   = -6         /**< Test Failed */
324   } riscv_status;
325 
326   /**
327    * @brief 8-bit fractional data type in 1.7 format.
328    */
329   typedef int8_t q7_t;
330 
331   /**
332    * @brief 16-bit fractional data type in 1.15 format.
333    */
334   typedef int16_t q15_t;
335 
336   /**
337    * @brief 32-bit fractional data type in 1.31 format.
338    */
339   typedef int32_t q31_t;
340 
341   /**
342    * @brief 64-bit fractional data type in 1.63 format.
343    */
344   typedef int64_t q63_t;
345 
346   /**
347    * @brief 32-bit floating-point type definition.
348    */
349   typedef float float32_t;
350 
351   /**
352    * @brief 64-bit floating-point type definition.
353    */
354   typedef double float64_t;
355 
356 
357 /**
358   @brief definition to read/write two 16 bit values.
359   @deprecated
360  */
361 #define __SIMD32_TYPE int32_t
362 
363 #define __SIMD32(addr)        (*(__SIMD32_TYPE **) & (addr))
364 #define __SIMD32_CONST(addr)  ( (__SIMD32_TYPE * )   (addr))
365 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE * )   (addr))
366 #define __SIMD64(addr)        (*(      int64_t **) & (addr))
367 
368 /* SIMD replacement */
369 
370 /**
371   @brief         Read 2 Q31 from Q31 pointer and increment pointer afterwards.
372   @param[in]     pQ31      points to input value
373   @return        Q63 value
374  */
read_q31x2_ia(q31_t ** pQ31)375 __STATIC_FORCEINLINE q63_t read_q31x2_ia (
376   q31_t ** pQ31)
377 {
378   q63_t val;
379 #ifndef RISCV_ALIGN_ACCESS
380 #if __RISCV_XLEN == 64
381   val = __LD(*pQ31);
382 #else
383   val = *((q63_t *)*pQ31);
384 #endif /* __RISCV_XLEN == 64 */
385 #else
386   memcpy((void *)(&val), (void *)(*pQ31), 8);
387 #endif
388   *pQ31 += 2;
389   return (val);
390 }
391 
392 /**
393   @brief         Read 2 Q31 from Q31 pointer and decrement pointer afterwards.
394   @param[in]     pQ31      points to input value
395   @return        Q63 value
396  */
read_q31x2_da(q31_t ** pQ31)397 __STATIC_FORCEINLINE q63_t read_q31x2_da (
398   q31_t ** pQ31)
399 {
400   q63_t val;
401 #ifndef RISCV_ALIGN_ACCESS
402 #if __RISCV_XLEN == 64
403   val = __LD(*pQ31);
404 #else
405   val = *((q63_t *)*pQ31);
406 #endif /* __RISCV_XLEN == 64 */
407 #else
408   memcpy((void *)(&val), (void *)(*pQ31), 8);
409 #endif
410   *pQ31 -= 2;
411   return (val);
412 }
413 
414 /**
415   @brief         Read 2 Q31 from Q31 pointer.
416   @param[in]     pQ31      points to input value
417   @return        Q63 value
418  */
read_q31x2(q31_t * pQ31)419 __STATIC_FORCEINLINE q63_t read_q31x2 (
420   q31_t * pQ31)
421 {
422   q63_t val;
423 #ifndef RISCV_ALIGN_ACCESS
424 #if __RISCV_XLEN == 64
425   val = __LD(pQ31);
426 #else
427   val = *((q63_t *)pQ31);
428 #endif /* __RISCV_XLEN == 64 */
429 #else
430   memcpy((void *)(&val), (void *)(pQ31), 8);
431 #endif
432   return (val);
433 }
434 
435 /**
436   @brief         Write 2 Q31 to Q31 pointer and increment pointer afterwards.
437   @param[in]     pQ31      points to input value
438   @param[in]     value     Q63 value
439   @return        none
440  */
write_q31x2_ia(q31_t ** pQ31,q63_t value)441 __STATIC_FORCEINLINE void write_q31x2_ia (
442         q31_t ** pQ31,
443         q63_t    value)
444 {
445 #ifndef RISCV_ALIGN_ACCESS
446 #if __RISCV_XLEN == 64
447   __SD(*pQ31, value);
448 #else
449   *((q63_t *)*pQ31) = value;
450 #endif /* __RISCV_XLEN == 64 */
451 #else
452   memcpy((void *)(*pQ31), (void *)(&value), 8);
453 #endif
454   *pQ31 += 2;
455 }
456 
457 /**
458   @brief         Write 2 Q31 to Q31 pointer.
459   @param[in]     pQ31      points to input value
460   @param[in]     value     Q63 value
461   @return        none
462  */
write_q31x2(q31_t * pQ31,q63_t value)463 __STATIC_FORCEINLINE void write_q31x2 (
464         q31_t * pQ31,
465         q63_t value)
466 {
467 #ifndef RISCV_ALIGN_ACCESS
468 #if __RISCV_XLEN == 64
469   __SD(pQ31, value);
470 #else
471   *((q63_t *)pQ31) = value;
472 #endif /* __RISCV_XLEN == 64 */
473 #else
474   memcpy((void *)(pQ31), (void *)(&value), 8);
475 #endif
476 }
477 
478 /**
479   @brief         Read 2 Q15 from Q15 pointer.
480   @param[in]     pQ15      points to input value
481   @return        Q31 value
482  */
read_q15x2(q15_t * pQ15)483 __STATIC_FORCEINLINE q31_t read_q15x2 (
484   q15_t * pQ15)
485 {
486   q31_t val;
487 
488 #ifndef RISCV_ALIGN_ACCESS
489   __ASM volatile (
490     "lw %0, (%1)"
491     :"=r"(val)
492     :"r"(pQ15)
493   );
494 #else
495   memcpy((void *)(&val), (void *)(pQ15), 4);
496 #endif
497   return (val);
498 }
499 
500 /**
501   @brief         Read 2 Q15 from Q15 pointer and increment pointer afterwards.
502   @param[in]     pQ15      points to input value
503   @return        Q31 value
504  */
read_q15x2_ia(q15_t ** pQ15)505 __STATIC_FORCEINLINE q31_t read_q15x2_ia (
506   q15_t ** pQ15)
507 {
508   q31_t val;
509 
510 #ifndef RISCV_ALIGN_ACCESS
511   __ASM volatile (
512     "lw %0, (%1)"
513     :"=r"(val)
514     :"r"(*pQ15)
515   );
516 #else
517   memcpy((void *)(&val), (void *)(*pQ15), 4);
518 #endif
519   *pQ15 += 2;
520 
521   return (val);
522 }
523 
524 /**
525   @brief         Read 4 Q15 from Q15 pointer and increment pointer afterwards.
526   @param[in]     pQ15      points to input value
527   @return        Q63 value
528  */
read_q15x4_ia(q15_t ** pQ15)529 __STATIC_FORCEINLINE q63_t read_q15x4_ia (
530         q15_t ** pQ15)
531 {
532   q63_t val;
533 #ifndef RISCV_ALIGN_ACCESS
534   val = *((q63_t *)*pQ15);
535 #else
536   memcpy((void *)(&val), (void *)(*pQ15), 8);
537 #endif
538   *pQ15 += 4;
539 
540   return (val);
541 }
542 
543 /**
544   @brief         Read 4 Q15 from Q15 pointer.
545   @param[in]     pQ15      points to input value
546   @return        Q63 value
547  */
read_q15x4(q15_t * pQ15)548 __STATIC_FORCEINLINE q63_t read_q15x4 (
549         q15_t * pQ15)
550 {
551   q63_t val;
552 #ifndef RISCV_ALIGN_ACCESS
553 #if __RISCV_XLEN == 64
554   val = __LD(pQ15);
555 #else
556   val = *((q63_t *)pQ15);
557 #endif /* __RISCV_XLEN == 64 */
558 #else
559   memcpy((void *)(&val), (void *)(pQ15), 8);
560 #endif
561   return (val);
562 }
563 
564 /**
565   @brief         Read 2 Q15 from Q15 pointer and decrement pointer afterwards.
566   @param[in]     pQ15      points to input value
567   @return        Q31 value
568  */
read_q15x2_da(q15_t ** pQ15)569 __STATIC_FORCEINLINE q31_t read_q15x2_da (
570   q15_t ** pQ15)
571 {
572   q31_t val;
573 
574 #ifndef RISCV_ALIGN_ACCESS
575   __ASM volatile (
576     "lw %0, (%1)"
577     :"=r"(val)
578     :"r"(*pQ15)
579   );
580 #else
581   memcpy((void *)(&val), (void *)(*pQ15), 4);
582 #endif
583   *pQ15 -= 2;
584 
585   return (val);
586 }
587 
588 /**
589   @brief         Read 4 Q15 from Q15 pointer and decrement pointer afterwards.
590   @param[in]     pQ15      points to input value
591   @return        Q31 value
592  */
read_q15x4_da(q15_t ** pQ15)593 __STATIC_FORCEINLINE q63_t read_q15x4_da (
594         q15_t ** pQ15)
595 {
596     q63_t val;
597 #ifndef RISCV_ALIGN_ACCESS
598     val = *((q63_t *)*pQ15);
599 #else
600     memcpy((void *)(&val), (void *)(*pQ15), 8);
601 #endif
602     *pQ15 -= 4;
603 
604     return (val);
605 }
606 
607 /**
608   @brief         Write 2 Q15 to Q15 pointer and increment pointer afterwards.
609   @param[in]     pQ15      points to input value
610   @param[in]     value     Q31 value
611   @return        none
612  */
write_q15x2_ia(q15_t ** pQ15,q31_t value)613 __STATIC_FORCEINLINE void write_q15x2_ia (
614   q15_t ** pQ15,
615   q31_t    value)
616 {
617 #ifndef RISCV_ALIGN_ACCESS
618   __ASM volatile (
619     "sw %0, (%1)"
620     :
621     :"r"(value), "r"(*pQ15)
622     :"memory"
623   );
624 #else
625   memcpy((void *)(*pQ15), (void *)(&value), 4);
626 #endif
627   *pQ15 += 2;
628 }
629 
630 /**
631   @brief         Write 4 Q15 to Q15 pointer and increment pointer afterwards.
632   @param[in]     pQ15      points to input value
633   @param[in]     value     Q31 value
634   @return        none
635  */
write_q15x4_ia(q15_t ** pQ15,q63_t value)636 __STATIC_FORCEINLINE void write_q15x4_ia (
637         q15_t ** pQ15,
638         q63_t    value)
639 {
640 #ifndef RISCV_ALIGN_ACCESS
641     *((q63_t *)*pQ15) = value;
642 #else
643     memcpy((void *)(*pQ15), (void *)(&value), 8);
644 #endif
645     *pQ15 += 4;
646 }
647 
648 /**
649   @brief         Write 4 Q15 to Q15 pointer and decrement pointer afterwards.
650   @param[in]     pQ15      points to input value
651   @param[in]     value     Q31 value
652   @return        none
653  */
write_q15x4_da(q15_t ** pQ15,q63_t value)654 __STATIC_FORCEINLINE void write_q15x4_da (
655         q15_t ** pQ15,
656         q63_t    value)
657 {
658 #ifndef RISCV_ALIGN_ACCESS
659     *((q63_t *)*pQ15) = value;
660 #else
661     memcpy((void *)(*pQ15), (void *)(&value), 8);
662 #endif
663     *pQ15 -= 4;
664 }
665 
666 /**
667   @brief         Write 2 Q15 to Q15 pointer.
668   @param[in]     pQ15      points to input value
669   @param[in]     value     Q31 value
670   @return        none
671  */
write_q15x2(q15_t * pQ15,q31_t value)672 __STATIC_FORCEINLINE void write_q15x2 (
673   q15_t * pQ15,
674   q31_t   value)
675 {
676 #ifndef RISCV_ALIGN_ACCESS
677   __ASM volatile (
678     "sw %0, (%1)"
679     :
680     :"r"(value), "r"(pQ15)
681     :"memory"
682   );
683 #else
684   memcpy((void *)(pQ15), (void *)(&value), 4);
685 #endif
686 
687 }
688 
689 /**
690   @brief         Write 4 Q15 to Q15 pointer.
691   @param[in]     pQ15      points to input value
692   @param[in]     value     Q31 value
693   @return        none
694  */
write_q15x4(q15_t * pQ15,q63_t value)695 __STATIC_FORCEINLINE void write_q15x4 (
696         q15_t * pQ15,
697         q63_t   value)
698 {
699 #ifndef RISCV_ALIGN_ACCESS
700   *((q63_t *)pQ15) = value;
701 #else
702   memcpy((void *)(pQ15), (void *)(&value), 8);
703 #endif
704 }
705 
706 /**
707   @brief         Read 8 Q7 from Q7 pointer and increment pointer afterwards.
708   @param[in]     pQ7       points to input value
709   @return        Q63 value
710  */
read_q7x8_ia(q7_t ** pQ7)711 __STATIC_FORCEINLINE q63_t read_q7x8_ia (
712         q7_t ** pQ7)
713 {
714     q63_t val;
715 #ifndef RISCV_ALIGN_ACCESS
716     val = *((q63_t *)*pQ7);
717 #else
718     memcpy((void *)(&val), (void *)(*pQ7), 8);
719 #endif
720     *pQ7 += 8;
721 
722     return val;
723 }
724 
725 /**
726   @brief         Read 8 Q7 from Q7 pointer and decrement pointer afterwards.
727   @param[in]     pQ7       points to input value
728   @return        Q63 value
729  */
read_q7x8_da(q7_t ** pQ7)730 __STATIC_FORCEINLINE q63_t read_q7x8_da (
731         q7_t ** pQ7)
732 {
733     q63_t val;
734 #ifndef RISCV_ALIGN_ACCESS
735     val = *((q63_t *)*pQ7);
736 #else
737     memcpy((void *)(&val), (void *)(*pQ7), 8);
738 #endif
739     *pQ7 -= 8;
740     return val;
741 }
742 
743 /**
744   @brief         Read 4 Q7 from Q7 pointer and increment pointer afterwards.
745   @param[in]     pQ7       points to input value
746   @return        Q31 value
747  */
read_q7x4_ia(q7_t ** pQ7)748 __STATIC_FORCEINLINE q31_t read_q7x4_ia (
749   q7_t ** pQ7)
750 {
751   q31_t val;
752 
753 #ifndef RISCV_ALIGN_ACCESS
754   __ASM volatile (
755     "lw %0, (%1)"
756     :"=r"(val)
757     :"r"(*pQ7)
758   );
759 #else
760   memcpy((void *)(&val), (void *)(*pQ7), 4);
761 #endif
762   *pQ7 += 4;
763 
764   return (val);
765 }
766 
767 /**
768   @brief         Read 4 Q7 from Q7 pointer and decrement pointer afterwards.
769   @param[in]     pQ7       points to input value
770   @return        Q31 value
771  */
read_q7x4_da(q7_t ** pQ7)772 __STATIC_FORCEINLINE q31_t read_q7x4_da (
773   q7_t ** pQ7)
774 {
775   q31_t val;
776 
777 #ifndef RISCV_ALIGN_ACCESS
778   __ASM volatile (
779     "lw %0, (%1)"
780     :"=r"(val)
781     :"r"(*pQ7)
782   );
783 #else
784   memcpy((void *)(&val), (void *)(*pQ7), 4);
785 #endif
786   *pQ7 -= 4;
787 
788   return (val);
789 }
790 
791 /**
792   @brief         Write 8 Q7 to Q7 pointer and increment pointer afterwards.
793   @param[in]     pQ7       points to input value
794   @param[in]     value     Q63 value
795   @return        none
796  */
write_q7x8_ia(q7_t ** pQ7,q63_t value)797 __STATIC_FORCEINLINE void write_q7x8_ia (
798         q7_t ** pQ7,
799         q63_t   value)
800 {
801 #ifndef RISCV_ALIGN_ACCESS
802     *((q63_t *)*pQ7) = value;
803 #else
804     memcpy((void *)(*pQ7), (void *)(&value), 8);
805 #endif
806     *pQ7 += 8;
807 }
808 
809 /**
810   @brief         Write 4 Q7 to Q7 pointer and increment pointer afterwards.
811   @param[in]     pQ7       points to input value
812   @param[in]     value     Q31 value
813   @return        none
814  */
write_q7x4_ia(q7_t ** pQ7,q31_t value)815 __STATIC_FORCEINLINE void write_q7x4_ia (
816   q7_t ** pQ7,
817   q31_t   value)
818 {
819   q31_t val = value;
820 
821 #ifndef RISCV_ALIGN_ACCESS
822   __ASM volatile (
823     "sw %0, (%1)"
824     :
825     :"r"(value), "r"(*pQ7)
826     :"memory"
827   );
828 #else
829   memcpy((void *)(*pQ7), (void *)(&value), 4);
830 #endif
831   *pQ7 += 4;
832 }
833 
834 /**
835 * @brief definition to pack four 8 bit values.
836 */
837 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
838                               (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
839                               (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
840                               (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
841 
842 
843 
844   /**
845    * @brief Clips Q63 to Q31 values.
846    */
clip_q63_to_q31(q63_t x)847   __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
848   q63_t x)
849   {
850     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
851       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
852   }
853 
854   /**
855    * @brief Clips Q63 to Q15 values.
856    */
clip_q63_to_q15(q63_t x)857   __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
858   q63_t x)
859   {
860     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
861       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
862   }
863 
864   /**
865    * @brief Clips Q31 to Q7 values.
866    */
clip_q31_to_q7(q31_t x)867   __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
868   q31_t x)
869   {
870     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
871       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
872   }
873 
874   /**
875    * @brief Clips Q31 to Q15 values.
876    */
clip_q31_to_q15(q31_t x)877   __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
878   q31_t x)
879   {
880     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
881       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
882   }
883 
884   /**
885    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
886    */
mult32x64(q63_t x,q31_t y)887   __STATIC_FORCEINLINE q63_t mult32x64(
888   q63_t x,
889   q31_t y)
890   {
891     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
892             (((q63_t) (x >> 32)                * y)      )  );
893   }
894 
895   /**
896    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
897    */
riscv_recip_q31(q31_t in,q31_t * dst,const q31_t * pRecipTable)898   __STATIC_FORCEINLINE uint32_t riscv_recip_q31(
899         q31_t in,
900         q31_t * dst,
901   const q31_t * pRecipTable)
902   {
903     q31_t out;
904     uint32_t tempVal;
905     uint32_t index, i;
906     uint32_t signBits;
907 
908     if (in > 0)
909     {
910       signBits = ((uint32_t) (__CLZ( in) - 1));
911     }
912     else
913     {
914       signBits = ((uint32_t) (__CLZ(-in) - 1));
915     }
916 
917     /* Convert input sample to 1.31 format */
918     in = (in << signBits);
919 
920     /* calculation of index for initial approximated Val */
921     index = (uint32_t)(in >> 24);
922     index = (index & INDEX_MASK);
923 
924     /* 1.31 with exp 1 */
925     out = pRecipTable[index];
926 
927     /* calculation of reciprocal value */
928     /* running approximation for two iterations */
929     for (i = 0U; i < 2U; i++)
930     {
931       tempVal = (uint32_t) (((q63_t) in * out) >> 31);
932       tempVal = 0x7FFFFFFFu - tempVal;
933       /*      1.31 with exp 1 */
934       /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */
935       out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30);
936     }
937 
938     /* write output */
939     *dst = out;
940 
941     /* return num of signbits of out = 1/in value */
942     return (signBits + 1U);
943   }
944 
945 
946   /**
947    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
948    */
riscv_recip_q15(q15_t in,q15_t * dst,const q15_t * pRecipTable)949   __STATIC_FORCEINLINE uint32_t riscv_recip_q15(
950         q15_t in,
951         q15_t * dst,
952   const q15_t * pRecipTable)
953   {
954     q15_t out = 0;
955     uint32_t tempVal = 0;
956     uint32_t index = 0, i = 0;
957     uint32_t signBits = 0;
958 
959     if (in > 0)
960     {
961       signBits = ((uint32_t)(__CLZ( in) - 17));
962     }
963     else
964     {
965       signBits = ((uint32_t)(__CLZ(-in) - 17));
966     }
967 
968     /* Convert input sample to 1.15 format */
969     in = (in << signBits);
970 
971     /* calculation of index for initial approximated Val */
972     index = (uint32_t)(in >>  8);
973     index = (index & INDEX_MASK);
974 
975     /*      1.15 with exp 1  */
976     out = pRecipTable[index];
977 
978     /* calculation of reciprocal value */
979     /* running approximation for two iterations */
980     for (i = 0U; i < 2U; i++)
981     {
982       tempVal = (uint32_t) (((q31_t) in * out) >> 15);
983       tempVal = 0x7FFFu - tempVal;
984       /*      1.15 with exp 1 */
985       out = (q15_t) (((q31_t) out * tempVal) >> 14);
986       /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
987     }
988 
989     /* write output */
990     *dst = out;
991 
992     /* return num of signbits of out = 1/in value */
993     return (signBits + 1);
994   }
995 
996 
997 /*
998  * @brief C custom defined intrinsic functions
999  */
1000 #if !defined (RISCV_MATH_DSP)
1001 
1002   /*
1003    * @brief C custom defined QADD8
1004    */
__QADD8(uint32_t x,uint32_t y)1005   __STATIC_FORCEINLINE uint32_t __QADD8(
1006   uint32_t x,
1007   uint32_t y)
1008   {
1009     q31_t r, s, t, u;
1010 
1011     r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
1012     s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
1013     t = __SSAT(((((q31_t)x <<  8) >> 24) + (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
1014     u = __SSAT(((((q31_t)x      ) >> 24) + (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
1015 
1016     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
1017   }
1018 
1019 
1020   /*
1021    * @brief C custom defined QSUB8
1022    */
__QSUB8(uint32_t x,uint32_t y)1023   __STATIC_FORCEINLINE uint32_t __QSUB8(
1024   uint32_t x,
1025   uint32_t y)
1026   {
1027     q31_t r, s, t, u;
1028 
1029     r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
1030     s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
1031     t = __SSAT(((((q31_t)x <<  8) >> 24) - (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
1032     u = __SSAT(((((q31_t)x      ) >> 24) - (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
1033 
1034     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
1035   }
1036 
1037 
1038   /*
1039    * @brief C custom defined QADD16
1040    */
__QADD16(uint32_t x,uint32_t y)1041   __STATIC_FORCEINLINE uint32_t __QADD16(
1042   uint32_t x,
1043   uint32_t y)
1044   {
1045 /*  q31_t r,     s;  without initialisation 'riscv_offset_q15 test' fails  but 'intrinsic' tests pass! for armCC */
1046     q31_t r = 0, s = 0;
1047 
1048     r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
1049     s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
1050 
1051     return ((uint32_t)((s << 16) | (r      )));
1052   }
1053 
1054 
1055   /*
1056    * @brief C custom defined SHADD16
1057    */
__SHADD16(uint32_t x,uint32_t y)1058   __STATIC_FORCEINLINE uint32_t __SHADD16(
1059   uint32_t x,
1060   uint32_t y)
1061   {
1062     q31_t r, s;
1063 
1064     r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1065     s = (((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1066 
1067     return ((uint32_t)((s << 16) | (r      )));
1068   }
1069 
1070 
1071   /*
1072    * @brief C custom defined QSUB16
1073    */
__QSUB16(uint32_t x,uint32_t y)1074   __STATIC_FORCEINLINE uint32_t __QSUB16(
1075   uint32_t x,
1076   uint32_t y)
1077   {
1078     q31_t r, s;
1079 
1080     r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
1081     s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
1082 
1083     return ((uint32_t)((s << 16) | (r      )));
1084   }
1085 
1086 
1087   /*
1088    * @brief C custom defined SHSUB16
1089    */
__SHSUB16(uint32_t x,uint32_t y)1090   __STATIC_FORCEINLINE uint32_t __SHSUB16(
1091   uint32_t x,
1092   uint32_t y)
1093   {
1094     q31_t r, s;
1095 
1096     r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1097     s = (((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1098 
1099     return ((uint32_t)((s << 16) | (r      )));
1100   }
1101 
1102 
1103   /*
1104    * @brief C custom defined QASX
1105    */
__QASX(uint32_t x,uint32_t y)1106   __STATIC_FORCEINLINE uint32_t __QASX(
1107   uint32_t x,
1108   uint32_t y)
1109   {
1110     q31_t r, s;
1111 
1112     r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
1113     s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
1114 
1115     return ((uint32_t)((s << 16) | (r      )));
1116   }
1117 
1118 
1119   /*
1120    * @brief C custom defined SHASX
1121    */
__SHASX(uint32_t x,uint32_t y)1122   __STATIC_FORCEINLINE uint32_t __SHASX(
1123   uint32_t x,
1124   uint32_t y)
1125   {
1126     q31_t r, s;
1127 
1128     r = (((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1129     s = (((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1130 
1131     return ((uint32_t)((s << 16) | (r      )));
1132   }
1133 
1134 
1135   /*
1136    * @brief C custom defined QSAX
1137    */
__QSAX(uint32_t x,uint32_t y)1138   __STATIC_FORCEINLINE uint32_t __QSAX(
1139   uint32_t x,
1140   uint32_t y)
1141   {
1142     q31_t r, s;
1143 
1144     r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
1145     s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
1146 
1147     return ((uint32_t)((s << 16) | (r      )));
1148   }
1149 
1150 
1151   /*
1152    * @brief C custom defined SHSAX
1153    */
__SHSAX(uint32_t x,uint32_t y)1154   __STATIC_FORCEINLINE uint32_t __SHSAX(
1155   uint32_t x,
1156   uint32_t y)
1157   {
1158     q31_t r, s;
1159 
1160     r = (((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1161     s = (((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1162 
1163     return ((uint32_t)((s << 16) | (r      )));
1164   }
1165 
1166 
1167   /*
1168    * @brief C custom defined SMUSDX
1169    */
__SMUSDX(uint32_t x,uint32_t y)1170   __STATIC_FORCEINLINE uint32_t __SMUSDX(
1171   uint32_t x,
1172   uint32_t y)
1173   {
1174     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
1175                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
1176   }
1177 
1178   /*
1179    * @brief C custom defined SMUADX
1180    */
__SMUADX(uint32_t x,uint32_t y)1181   __STATIC_FORCEINLINE uint32_t __SMUADX(
1182   uint32_t x,
1183   uint32_t y)
1184   {
1185     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
1186                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
1187   }
1188 
1189 
1190   /*
1191    * @brief C custom defined QADD
1192    */
__QADD(int32_t x,int32_t y)1193   __STATIC_FORCEINLINE int32_t __QADD(
1194   int32_t x,
1195   int32_t y)
1196   {
1197     return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
1198   }
1199 
1200 
1201   /*
1202    * @brief C custom defined QSUB
1203    */
__QSUB(int32_t x,int32_t y)1204   __STATIC_FORCEINLINE int32_t __QSUB(
1205   int32_t x,
1206   int32_t y)
1207   {
1208     return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
1209   }
1210 
1211 
1212   /*
1213    * @brief C custom defined SMLAD
1214    */
__SMLAD(uint32_t x,uint32_t y,uint32_t sum)1215   __STATIC_FORCEINLINE uint32_t __SMLAD(
1216   uint32_t x,
1217   uint32_t y,
1218   uint32_t sum)
1219   {
1220     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
1221                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
1222                        ( ((q31_t)sum    )                                  )   ));
1223   }
1224 
1225 
1226   /*
1227    * @brief C custom defined SMLADX
1228    */
__SMLADX(uint32_t x,uint32_t y,uint32_t sum)1229   __STATIC_FORCEINLINE uint32_t __SMLADX(
1230   uint32_t x,
1231   uint32_t y,
1232   uint32_t sum)
1233   {
1234     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
1235                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
1236                        ( ((q31_t)sum    )                                  )   ));
1237   }
1238 
1239 
1240   /*
1241    * @brief C custom defined SMLSDX
1242    */
__SMLSDX(uint32_t x,uint32_t y,uint32_t sum)1243   __STATIC_FORCEINLINE uint32_t __SMLSDX(
1244   uint32_t x,
1245   uint32_t y,
1246   uint32_t sum)
1247   {
1248     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
1249                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
1250                        ( ((q31_t)sum    )                                  )   ));
1251   }
1252 
1253 
1254   /*
1255    * @brief C custom defined SMLALD
1256    */
__SMLALD(uint32_t x,uint32_t y,uint64_t sum)1257   __STATIC_FORCEINLINE uint64_t __SMLALD(
1258   uint32_t x,
1259   uint32_t y,
1260   uint64_t sum)
1261   {
1262 /*  return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
1263     return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
1264                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
1265                        ( ((q63_t)sum    )                                  )   ));
1266   }
1267 
1268 
1269   /*
1270    * @brief C custom defined SMLALDX
1271    */
__SMLALDX(uint32_t x,uint32_t y,uint64_t sum)1272   __STATIC_FORCEINLINE uint64_t __SMLALDX(
1273   uint32_t x,
1274   uint32_t y,
1275   uint64_t sum)
1276   {
1277 /*  return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
1278     return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
1279                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
1280                        ( ((q63_t)sum    )                                  )   ));
1281   }
1282 
1283 
1284   /*
1285    * @brief C custom defined SMUAD
1286    */
__SMUAD(uint32_t x,uint32_t y)1287   __STATIC_FORCEINLINE uint32_t __SMUAD(
1288   uint32_t x,
1289   uint32_t y)
1290   {
1291     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
1292                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
1293   }
1294 
1295 
1296   /*
1297    * @brief C custom defined SMUSD
1298    */
__SMUSD(uint32_t x,uint32_t y)1299   __STATIC_FORCEINLINE uint32_t __SMUSD(
1300   uint32_t x,
1301   uint32_t y)
1302   {
1303     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
1304                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
1305   }
1306 
1307 
1308   /*
1309    * @brief C custom defined SXTB16
1310    */
__SXTB16(uint32_t x)1311   __STATIC_FORCEINLINE uint32_t __SXTB16(
1312   uint32_t x)
1313   {
1314     return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
1315                        ((((q31_t)x <<  8) >>  8) & (q31_t)0xFFFF0000)  ));
1316   }
1317 
1318   /*
1319    * @brief C custom defined SMMLA
1320    */
__SMMLA(int32_t x,int32_t y,int32_t sum)1321   __STATIC_FORCEINLINE int32_t __SMMLA(
1322   int32_t x,
1323   int32_t y,
1324   int32_t sum)
1325   {
1326     return (sum + (int32_t) (((int64_t) x * y) >> 32));
1327   }
1328 
1329 #endif /* !defined (RISCV_MATH_DSP) */
1330 
1331 
1332   /**
1333    * @brief Instance structure for the Q7 FIR filter.
1334    */
1335   typedef struct
1336   {
1337           uint16_t numTaps;        /**< number of filter coefficients in the filter. */
1338           q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1339     const q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1340   } riscv_fir_instance_q7;
1341 
1342   /**
1343    * @brief Instance structure for the Q15 FIR filter.
1344    */
1345   typedef struct
1346   {
1347           uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1348           q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1349     const q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1350   } riscv_fir_instance_q15;
1351 
1352   /**
1353    * @brief Instance structure for the Q31 FIR filter.
1354    */
1355   typedef struct
1356   {
1357           uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1358           q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1359     const q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1360   } riscv_fir_instance_q31;
1361 
1362   /**
1363    * @brief Instance structure for the floating-point FIR filter.
1364    */
1365   typedef struct
1366   {
1367           uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1368           float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1369     const float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1370   } riscv_fir_instance_f32;
1371 
1372   /**
1373    * @brief Processing function for the Q7 FIR filter.
1374    * @param[in]  S          points to an instance of the Q7 FIR filter structure.
1375    * @param[in]  pSrc       points to the block of input data.
1376    * @param[out] pDst       points to the block of output data.
1377    * @param[in]  blockSize  number of samples to process.
1378    */
1379   void riscv_fir_q7(
1380   const riscv_fir_instance_q7 * S,
1381   const q7_t * pSrc,
1382         q7_t * pDst,
1383         uint32_t blockSize);
1384 
1385   /**
1386    * @brief  Initialization function for the Q7 FIR filter.
1387    * @param[in,out] S          points to an instance of the Q7 FIR structure.
1388    * @param[in]     numTaps    Number of filter coefficients in the filter.
1389    * @param[in]     pCoeffs    points to the filter coefficients.
1390    * @param[in]     pState     points to the state buffer.
1391    * @param[in]     blockSize  number of samples that are processed.
1392    */
1393   void riscv_fir_init_q7(
1394         riscv_fir_instance_q7 * S,
1395         uint16_t numTaps,
1396   const q7_t * pCoeffs,
1397         q7_t * pState,
1398         uint32_t blockSize);
1399 
1400   /**
1401    * @brief Processing function for the Q15 FIR filter.
1402    * @param[in]  S          points to an instance of the Q15 FIR structure.
1403    * @param[in]  pSrc       points to the block of input data.
1404    * @param[out] pDst       points to the block of output data.
1405    * @param[in]  blockSize  number of samples to process.
1406    */
1407   void riscv_fir_q15(
1408   const riscv_fir_instance_q15 * S,
1409   const q15_t * pSrc,
1410         q15_t * pDst,
1411         uint32_t blockSize);
1412 
1413   /**
1414    * @brief Processing function for the fast Q15 FIR filter (fast version).
1415    * @param[in]  S          points to an instance of the Q15 FIR filter structure.
1416    * @param[in]  pSrc       points to the block of input data.
1417    * @param[out] pDst       points to the block of output data.
1418    * @param[in]  blockSize  number of samples to process.
1419    */
1420   void riscv_fir_fast_q15(
1421   const riscv_fir_instance_q15 * S,
1422   const q15_t * pSrc,
1423         q15_t * pDst,
1424         uint32_t blockSize);
1425 
1426   /**
1427    * @brief  Initialization function for the Q15 FIR filter.
1428    * @param[in,out] S          points to an instance of the Q15 FIR filter structure.
1429    * @param[in]     numTaps    Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1430    * @param[in]     pCoeffs    points to the filter coefficients.
1431    * @param[in]     pState     points to the state buffer.
1432    * @param[in]     blockSize  number of samples that are processed at a time.
1433    * @return     The function returns either
1434    * <code>RISCV_MATH_SUCCESS</code> if initialization was successful or
1435    * <code>RISCV_MATH_ARGUMENT_ERROR</code> if <code>numTaps</code> is not a supported value.
1436    */
1437   riscv_status riscv_fir_init_q15(
1438         riscv_fir_instance_q15 * S,
1439         uint16_t numTaps,
1440   const q15_t * pCoeffs,
1441         q15_t * pState,
1442         uint32_t blockSize);
1443 
1444   /**
1445    * @brief Processing function for the Q31 FIR filter.
1446    * @param[in]  S          points to an instance of the Q31 FIR filter structure.
1447    * @param[in]  pSrc       points to the block of input data.
1448    * @param[out] pDst       points to the block of output data.
1449    * @param[in]  blockSize  number of samples to process.
1450    */
1451   void riscv_fir_q31(
1452   const riscv_fir_instance_q31 * S,
1453   const q31_t * pSrc,
1454         q31_t * pDst,
1455         uint32_t blockSize);
1456 
1457   /**
1458    * @brief Processing function for the fast Q31 FIR filter (fast version).
1459    * @param[in]  S          points to an instance of the Q31 FIR filter structure.
1460    * @param[in]  pSrc       points to the block of input data.
1461    * @param[out] pDst       points to the block of output data.
1462    * @param[in]  blockSize  number of samples to process.
1463    */
1464   void riscv_fir_fast_q31(
1465   const riscv_fir_instance_q31 * S,
1466   const q31_t * pSrc,
1467         q31_t * pDst,
1468         uint32_t blockSize);
1469 
1470   /**
1471    * @brief  Initialization function for the Q31 FIR filter.
1472    * @param[in,out] S          points to an instance of the Q31 FIR structure.
1473    * @param[in]     numTaps    Number of filter coefficients in the filter.
1474    * @param[in]     pCoeffs    points to the filter coefficients.
1475    * @param[in]     pState     points to the state buffer.
1476    * @param[in]     blockSize  number of samples that are processed at a time.
1477    */
1478   void riscv_fir_init_q31(
1479         riscv_fir_instance_q31 * S,
1480         uint16_t numTaps,
1481   const q31_t * pCoeffs,
1482         q31_t * pState,
1483         uint32_t blockSize);
1484 
1485   /**
1486    * @brief Processing function for the floating-point FIR filter.
1487    * @param[in]  S          points to an instance of the floating-point FIR structure.
1488    * @param[in]  pSrc       points to the block of input data.
1489    * @param[out] pDst       points to the block of output data.
1490    * @param[in]  blockSize  number of samples to process.
1491    */
1492   void riscv_fir_f32(
1493   const riscv_fir_instance_f32 * S,
1494   const float32_t * pSrc,
1495         float32_t * pDst,
1496         uint32_t blockSize);
1497 
1498   /**
1499    * @brief  Initialization function for the floating-point FIR filter.
1500    * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
1501    * @param[in]     numTaps    Number of filter coefficients in the filter.
1502    * @param[in]     pCoeffs    points to the filter coefficients.
1503    * @param[in]     pState     points to the state buffer.
1504    * @param[in]     blockSize  number of samples that are processed at a time.
1505    */
1506   void riscv_fir_init_f32(
1507         riscv_fir_instance_f32 * S,
1508         uint16_t numTaps,
1509   const float32_t * pCoeffs,
1510         float32_t * pState,
1511         uint32_t blockSize);
1512 
1513   /**
1514    * @brief Instance structure for the Q15 Biquad cascade filter.
1515    */
1516   typedef struct
1517   {
1518           int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1519           q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1520     const q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1521           int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
1522   } riscv_biquad_casd_df1_inst_q15;
1523 
1524   /**
1525    * @brief Instance structure for the Q31 Biquad cascade filter.
1526    */
1527   typedef struct
1528   {
1529           uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1530           q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1531     const q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1532           uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1533   } riscv_biquad_casd_df1_inst_q31;
1534 
1535   /**
1536    * @brief Instance structure for the floating-point Biquad cascade filter.
1537    */
1538   typedef struct
1539   {
1540           uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1541           float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1542     const float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1543   } riscv_biquad_casd_df1_inst_f32;
1544 
1545   /**
1546    * @brief Processing function for the Q15 Biquad cascade filter.
1547    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
1548    * @param[in]  pSrc       points to the block of input data.
1549    * @param[out] pDst       points to the block of output data.
1550    * @param[in]  blockSize  number of samples to process.
1551    */
1552   void riscv_biquad_cascade_df1_q15(
1553   const riscv_biquad_casd_df1_inst_q15 * S,
1554   const q15_t * pSrc,
1555         q15_t * pDst,
1556         uint32_t blockSize);
1557 
1558   /**
1559    * @brief  Initialization function for the Q15 Biquad cascade filter.
1560    * @param[in,out] S          points to an instance of the Q15 Biquad cascade structure.
1561    * @param[in]     numStages  number of 2nd order stages in the filter.
1562    * @param[in]     pCoeffs    points to the filter coefficients.
1563    * @param[in]     pState     points to the state buffer.
1564    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
1565    */
1566   void riscv_biquad_cascade_df1_init_q15(
1567         riscv_biquad_casd_df1_inst_q15 * S,
1568         uint8_t numStages,
1569   const q15_t * pCoeffs,
1570         q15_t * pState,
1571         int8_t postShift);
1572 
1573   /**
1574    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for RISC-V Core with DSP enabled.
1575    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
1576    * @param[in]  pSrc       points to the block of input data.
1577    * @param[out] pDst       points to the block of output data.
1578    * @param[in]  blockSize  number of samples to process.
1579    */
1580   void riscv_biquad_cascade_df1_fast_q15(
1581   const riscv_biquad_casd_df1_inst_q15 * S,
1582   const q15_t * pSrc,
1583         q15_t * pDst,
1584         uint32_t blockSize);
1585 
1586   /**
1587    * @brief Processing function for the Q31 Biquad cascade filter
1588    * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
1589    * @param[in]  pSrc       points to the block of input data.
1590    * @param[out] pDst       points to the block of output data.
1591    * @param[in]  blockSize  number of samples to process.
1592    */
1593   void riscv_biquad_cascade_df1_q31(
1594   const riscv_biquad_casd_df1_inst_q31 * S,
1595   const q31_t * pSrc,
1596         q31_t * pDst,
1597         uint32_t blockSize);
1598 
1599   /**
1600    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for RISC-V Core with DSP enabled.
1601    * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
1602    * @param[in]  pSrc       points to the block of input data.
1603    * @param[out] pDst       points to the block of output data.
1604    * @param[in]  blockSize  number of samples to process.
1605    */
1606   void riscv_biquad_cascade_df1_fast_q31(
1607   const riscv_biquad_casd_df1_inst_q31 * S,
1608   const q31_t * pSrc,
1609         q31_t * pDst,
1610         uint32_t blockSize);
1611 
1612   /**
1613    * @brief  Initialization function for the Q31 Biquad cascade filter.
1614    * @param[in,out] S          points to an instance of the Q31 Biquad cascade structure.
1615    * @param[in]     numStages  number of 2nd order stages in the filter.
1616    * @param[in]     pCoeffs    points to the filter coefficients.
1617    * @param[in]     pState     points to the state buffer.
1618    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
1619    */
1620   void riscv_biquad_cascade_df1_init_q31(
1621         riscv_biquad_casd_df1_inst_q31 * S,
1622         uint8_t numStages,
1623   const q31_t * pCoeffs,
1624         q31_t * pState,
1625         int8_t postShift);
1626 
1627   /**
1628    * @brief Processing function for the floating-point Biquad cascade filter.
1629    * @param[in]  S          points to an instance of the floating-point Biquad cascade structure.
1630    * @param[in]  pSrc       points to the block of input data.
1631    * @param[out] pDst       points to the block of output data.
1632    * @param[in]  blockSize  number of samples to process.
1633    */
1634   void riscv_biquad_cascade_df1_f32(
1635   const riscv_biquad_casd_df1_inst_f32 * S,
1636   const float32_t * pSrc,
1637         float32_t * pDst,
1638         uint32_t blockSize);
1639 
1640   /**
1641    * @brief  Initialization function for the floating-point Biquad cascade filter.
1642    * @param[in,out] S          points to an instance of the floating-point Biquad cascade structure.
1643    * @param[in]     numStages  number of 2nd order stages in the filter.
1644    * @param[in]     pCoeffs    points to the filter coefficients.
1645    * @param[in]     pState     points to the state buffer.
1646    */
1647   void riscv_biquad_cascade_df1_init_f32(
1648         riscv_biquad_casd_df1_inst_f32 * S,
1649         uint8_t numStages,
1650   const float32_t * pCoeffs,
1651         float32_t * pState);
1652 
1653   /**
1654    * @brief Instance structure for the floating-point matrix structure.
1655    */
1656   typedef struct
1657   {
1658     uint16_t numRows;     /**< number of rows of the matrix.     */
1659     uint16_t numCols;     /**< number of columns of the matrix.  */
1660     float32_t *pData;     /**< points to the data of the matrix. */
1661   } riscv_matrix_instance_f32;
1662 
1663 
1664   /**
1665    * @brief Instance structure for the floating-point matrix structure.
1666    */
1667   typedef struct
1668   {
1669     uint16_t numRows;     /**< number of rows of the matrix.     */
1670     uint16_t numCols;     /**< number of columns of the matrix.  */
1671     float64_t *pData;     /**< points to the data of the matrix. */
1672   } riscv_matrix_instance_f64;
1673 
1674   /**
1675    * @brief Instance structure for the Q15 matrix structure.
1676    */
1677   typedef struct
1678   {
1679     uint16_t numRows;     /**< number of rows of the matrix.     */
1680     uint16_t numCols;     /**< number of columns of the matrix.  */
1681     q15_t *pData;         /**< points to the data of the matrix. */
1682   } riscv_matrix_instance_q15;
1683 
1684   /**
1685    * @brief Instance structure for the Q31 matrix structure.
1686    */
1687   typedef struct
1688   {
1689     uint16_t numRows;     /**< number of rows of the matrix.     */
1690     uint16_t numCols;     /**< number of columns of the matrix.  */
1691     q31_t *pData;         /**< points to the data of the matrix. */
1692   } riscv_matrix_instance_q31;
1693 
1694   /**
1695    * @brief Floating-point matrix addition.
1696    * @param[in]  pSrcA  points to the first input matrix structure
1697    * @param[in]  pSrcB  points to the second input matrix structure
1698    * @param[out] pDst   points to output matrix structure
1699    * @return     The function returns either
1700    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1701    */
1702 riscv_status riscv_mat_add_f32(
1703   const riscv_matrix_instance_f32 * pSrcA,
1704   const riscv_matrix_instance_f32 * pSrcB,
1705         riscv_matrix_instance_f32 * pDst);
1706 
1707   /**
1708    * @brief Q15 matrix addition.
1709    * @param[in]   pSrcA  points to the first input matrix structure
1710    * @param[in]   pSrcB  points to the second input matrix structure
1711    * @param[out]  pDst   points to output matrix structure
1712    * @return     The function returns either
1713    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1714    */
1715 riscv_status riscv_mat_add_q15(
1716   const riscv_matrix_instance_q15 * pSrcA,
1717   const riscv_matrix_instance_q15 * pSrcB,
1718         riscv_matrix_instance_q15 * pDst);
1719 
1720   /**
1721    * @brief Q31 matrix addition.
1722    * @param[in]  pSrcA  points to the first input matrix structure
1723    * @param[in]  pSrcB  points to the second input matrix structure
1724    * @param[out] pDst   points to output matrix structure
1725    * @return     The function returns either
1726    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1727    */
1728 riscv_status riscv_mat_add_q31(
1729   const riscv_matrix_instance_q31 * pSrcA,
1730   const riscv_matrix_instance_q31 * pSrcB,
1731         riscv_matrix_instance_q31 * pDst);
1732 
1733   /**
1734    * @brief Floating-point, complex, matrix multiplication.
1735    * @param[in]  pSrcA  points to the first input matrix structure
1736    * @param[in]  pSrcB  points to the second input matrix structure
1737    * @param[out] pDst   points to output matrix structure
1738    * @return     The function returns either
1739    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1740    */
1741 riscv_status riscv_mat_cmplx_mult_f32(
1742   const riscv_matrix_instance_f32 * pSrcA,
1743   const riscv_matrix_instance_f32 * pSrcB,
1744         riscv_matrix_instance_f32 * pDst);
1745 
1746   /**
1747    * @brief Q15, complex,  matrix multiplication.
1748    * @param[in]  pSrcA  points to the first input matrix structure
1749    * @param[in]  pSrcB  points to the second input matrix structure
1750    * @param[out] pDst   points to output matrix structure
1751    * @return     The function returns either
1752    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1753    */
1754 riscv_status riscv_mat_cmplx_mult_q15(
1755   const riscv_matrix_instance_q15 * pSrcA,
1756   const riscv_matrix_instance_q15 * pSrcB,
1757         riscv_matrix_instance_q15 * pDst,
1758         q15_t * pScratch);
1759 
1760   /**
1761    * @brief Q31, complex, matrix multiplication.
1762    * @param[in]  pSrcA  points to the first input matrix structure
1763    * @param[in]  pSrcB  points to the second input matrix structure
1764    * @param[out] pDst   points to output matrix structure
1765    * @return     The function returns either
1766    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1767    */
1768 riscv_status riscv_mat_cmplx_mult_q31(
1769   const riscv_matrix_instance_q31 * pSrcA,
1770   const riscv_matrix_instance_q31 * pSrcB,
1771         riscv_matrix_instance_q31 * pDst);
1772 
1773   /**
1774    * @brief Floating-point matrix transpose.
1775    * @param[in]  pSrc  points to the input matrix
1776    * @param[out] pDst  points to the output matrix
1777    * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
1778    * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1779    */
1780 riscv_status riscv_mat_trans_f32(
1781   const riscv_matrix_instance_f32 * pSrc,
1782         riscv_matrix_instance_f32 * pDst);
1783 
1784   /**
1785    * @brief Q15 matrix transpose.
1786    * @param[in]  pSrc  points to the input matrix
1787    * @param[out] pDst  points to the output matrix
1788    * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
1789    * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1790    */
1791 riscv_status riscv_mat_trans_q15(
1792   const riscv_matrix_instance_q15 * pSrc,
1793         riscv_matrix_instance_q15 * pDst);
1794 
1795   /**
1796    * @brief Q31 matrix transpose.
1797    * @param[in]  pSrc  points to the input matrix
1798    * @param[out] pDst  points to the output matrix
1799    * @return    The function returns either  <code>RISCV_MATH_SIZE_MISMATCH</code>
1800    * or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1801    */
1802 riscv_status riscv_mat_trans_q31(
1803   const riscv_matrix_instance_q31 * pSrc,
1804         riscv_matrix_instance_q31 * pDst);
1805 
1806   /**
1807    * @brief Floating-point matrix multiplication
1808    * @param[in]  pSrcA  points to the first input matrix structure
1809    * @param[in]  pSrcB  points to the second input matrix structure
1810    * @param[out] pDst   points to output matrix structure
1811    * @return     The function returns either
1812    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1813    */
1814 riscv_status riscv_mat_mult_f32(
1815   const riscv_matrix_instance_f32 * pSrcA,
1816   const riscv_matrix_instance_f32 * pSrcB,
1817         riscv_matrix_instance_f32 * pDst);
1818 
1819   /**
1820    * @brief Q15 matrix multiplication
1821    * @param[in]  pSrcA   points to the first input matrix structure
1822    * @param[in]  pSrcB   points to the second input matrix structure
1823    * @param[out] pDst    points to output matrix structure
1824    * @param[in]  pState  points to the array for storing intermediate results
1825    * @return     The function returns either
1826    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1827    */
1828 riscv_status riscv_mat_mult_q15(
1829   const riscv_matrix_instance_q15 * pSrcA,
1830   const riscv_matrix_instance_q15 * pSrcB,
1831         riscv_matrix_instance_q15 * pDst,
1832         q15_t * pState);
1833 
1834   /**
1835    * @brief Q15 matrix multiplication (fast variant) for RISC-V Core with DSP enabled
1836    * @param[in]  pSrcA   points to the first input matrix structure
1837    * @param[in]  pSrcB   points to the second input matrix structure
1838    * @param[out] pDst    points to output matrix structure
1839    * @param[in]  pState  points to the array for storing intermediate results
1840    * @return     The function returns either
1841    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1842    */
1843 riscv_status riscv_mat_mult_fast_q15(
1844   const riscv_matrix_instance_q15 * pSrcA,
1845   const riscv_matrix_instance_q15 * pSrcB,
1846         riscv_matrix_instance_q15 * pDst,
1847         q15_t * pState);
1848 
1849   /**
1850    * @brief Q31 matrix multiplication
1851    * @param[in]  pSrcA  points to the first input matrix structure
1852    * @param[in]  pSrcB  points to the second input matrix structure
1853    * @param[out] pDst   points to output matrix structure
1854    * @return     The function returns either
1855    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1856    */
1857 riscv_status riscv_mat_mult_q31(
1858   const riscv_matrix_instance_q31 * pSrcA,
1859   const riscv_matrix_instance_q31 * pSrcB,
1860         riscv_matrix_instance_q31 * pDst);
1861 
1862   /**
1863    * @brief Q31 matrix multiplication (fast variant) for RISC-V Core with DSP enabled
1864    * @param[in]  pSrcA  points to the first input matrix structure
1865    * @param[in]  pSrcB  points to the second input matrix structure
1866    * @param[out] pDst   points to output matrix structure
1867    * @return     The function returns either
1868    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1869    */
1870 riscv_status riscv_mat_mult_fast_q31(
1871   const riscv_matrix_instance_q31 * pSrcA,
1872   const riscv_matrix_instance_q31 * pSrcB,
1873         riscv_matrix_instance_q31 * pDst);
1874 
1875   /**
1876    * @brief Floating-point matrix subtraction
1877    * @param[in]  pSrcA  points to the first input matrix structure
1878    * @param[in]  pSrcB  points to the second input matrix structure
1879    * @param[out] pDst   points to output matrix structure
1880    * @return     The function returns either
1881    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1882    */
1883 riscv_status riscv_mat_sub_f32(
1884   const riscv_matrix_instance_f32 * pSrcA,
1885   const riscv_matrix_instance_f32 * pSrcB,
1886         riscv_matrix_instance_f32 * pDst);
1887 
1888   /**
1889    * @brief Q15 matrix subtraction
1890    * @param[in]  pSrcA  points to the first input matrix structure
1891    * @param[in]  pSrcB  points to the second input matrix structure
1892    * @param[out] pDst   points to output matrix structure
1893    * @return     The function returns either
1894    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1895    */
1896 riscv_status riscv_mat_sub_q15(
1897   const riscv_matrix_instance_q15 * pSrcA,
1898   const riscv_matrix_instance_q15 * pSrcB,
1899         riscv_matrix_instance_q15 * pDst);
1900 
1901   /**
1902    * @brief Q31 matrix subtraction
1903    * @param[in]  pSrcA  points to the first input matrix structure
1904    * @param[in]  pSrcB  points to the second input matrix structure
1905    * @param[out] pDst   points to output matrix structure
1906    * @return     The function returns either
1907    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1908    */
1909 riscv_status riscv_mat_sub_q31(
1910   const riscv_matrix_instance_q31 * pSrcA,
1911   const riscv_matrix_instance_q31 * pSrcB,
1912         riscv_matrix_instance_q31 * pDst);
1913 
1914   /**
1915    * @brief Floating-point matrix scaling.
1916    * @param[in]  pSrc   points to the input matrix
1917    * @param[in]  scale  scale factor
1918    * @param[out] pDst   points to the output matrix
1919    * @return     The function returns either
1920    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1921    */
1922 riscv_status riscv_mat_scale_f32(
1923   const riscv_matrix_instance_f32 * pSrc,
1924         float32_t scale,
1925         riscv_matrix_instance_f32 * pDst);
1926 
1927   /**
1928    * @brief Q15 matrix scaling.
1929    * @param[in]  pSrc        points to input matrix
1930    * @param[in]  scaleFract  fractional portion of the scale factor
1931    * @param[in]  shift       number of bits to shift the result by
1932    * @param[out] pDst        points to output matrix
1933    * @return     The function returns either
1934    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1935    */
1936 riscv_status riscv_mat_scale_q15(
1937   const riscv_matrix_instance_q15 * pSrc,
1938         q15_t scaleFract,
1939         int32_t shift,
1940         riscv_matrix_instance_q15 * pDst);
1941 
1942   /**
1943    * @brief Q31 matrix scaling.
1944    * @param[in]  pSrc        points to input matrix
1945    * @param[in]  scaleFract  fractional portion of the scale factor
1946    * @param[in]  shift       number of bits to shift the result by
1947    * @param[out] pDst        points to output matrix structure
1948    * @return     The function returns either
1949    * <code>RISCV_MATH_SIZE_MISMATCH</code> or <code>RISCV_MATH_SUCCESS</code> based on the outcome of size checking.
1950    */
1951 riscv_status riscv_mat_scale_q31(
1952   const riscv_matrix_instance_q31 * pSrc,
1953         q31_t scaleFract,
1954         int32_t shift,
1955         riscv_matrix_instance_q31 * pDst);
1956 
1957   /**
1958    * @brief  Q31 matrix initialization.
1959    * @param[in,out] S         points to an instance of the floating-point matrix structure.
1960    * @param[in]     nRows     number of rows in the matrix.
1961    * @param[in]     nColumns  number of columns in the matrix.
1962    * @param[in]     pData     points to the matrix data array.
1963    */
1964 void riscv_mat_init_q31(
1965         riscv_matrix_instance_q31 * S,
1966         uint16_t nRows,
1967         uint16_t nColumns,
1968         q31_t * pData);
1969 
1970   /**
1971    * @brief  Q15 matrix initialization.
1972    * @param[in,out] S         points to an instance of the floating-point matrix structure.
1973    * @param[in]     nRows     number of rows in the matrix.
1974    * @param[in]     nColumns  number of columns in the matrix.
1975    * @param[in]     pData     points to the matrix data array.
1976    */
1977 void riscv_mat_init_q15(
1978         riscv_matrix_instance_q15 * S,
1979         uint16_t nRows,
1980         uint16_t nColumns,
1981         q15_t * pData);
1982 
1983   /**
1984    * @brief  Floating-point matrix initialization.
1985    * @param[in,out] S         points to an instance of the floating-point matrix structure.
1986    * @param[in]     nRows     number of rows in the matrix.
1987    * @param[in]     nColumns  number of columns in the matrix.
1988    * @param[in]     pData     points to the matrix data array.
1989    */
1990 void riscv_mat_init_f32(
1991         riscv_matrix_instance_f32 * S,
1992         uint16_t nRows,
1993         uint16_t nColumns,
1994         float32_t * pData);
1995 
1996 
1997   /**
1998    * @brief Instance structure for the Q15 PID Control.
1999    */
2000   typedef struct
2001   {
2002           q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
2003 #if !defined (RISCV_MATH_DSP)
2004           q15_t A1;
2005           q15_t A2;
2006 #else
2007           q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
2008 #endif
2009           q15_t state[3];     /**< The state array of length 3. */
2010           q15_t Kp;           /**< The proportional gain. */
2011           q15_t Ki;           /**< The integral gain. */
2012           q15_t Kd;           /**< The derivative gain. */
2013   } riscv_pid_instance_q15;
2014 
2015   /**
2016    * @brief Instance structure for the Q31 PID Control.
2017    */
2018   typedef struct
2019   {
2020           q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
2021           q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
2022           q31_t A2;            /**< The derived gain, A2 = Kd . */
2023           q31_t state[3];      /**< The state array of length 3. */
2024           q31_t Kp;            /**< The proportional gain. */
2025           q31_t Ki;            /**< The integral gain. */
2026           q31_t Kd;            /**< The derivative gain. */
2027   } riscv_pid_instance_q31;
2028 
2029   /**
2030    * @brief Instance structure for the floating-point PID Control.
2031    */
2032   typedef struct
2033   {
2034           float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
2035           float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
2036           float32_t A2;          /**< The derived gain, A2 = Kd . */
2037           float32_t state[3];    /**< The state array of length 3. */
2038           float32_t Kp;          /**< The proportional gain. */
2039           float32_t Ki;          /**< The integral gain. */
2040           float32_t Kd;          /**< The derivative gain. */
2041   } riscv_pid_instance_f32;
2042 
2043 
2044 
2045   /**
2046    * @brief  Initialization function for the floating-point PID Control.
2047    * @param[in,out] S               points to an instance of the PID structure.
2048    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
2049    */
2050   void riscv_pid_init_f32(
2051         riscv_pid_instance_f32 * S,
2052         int32_t resetStateFlag);
2053 
2054 
2055   /**
2056    * @brief  Reset function for the floating-point PID Control.
2057    * @param[in,out] S  is an instance of the floating-point PID Control structure
2058    */
2059   void riscv_pid_reset_f32(
2060         riscv_pid_instance_f32 * S);
2061 
2062 
2063   /**
2064    * @brief  Initialization function for the Q31 PID Control.
2065    * @param[in,out] S               points to an instance of the Q15 PID structure.
2066    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
2067    */
2068   void riscv_pid_init_q31(
2069         riscv_pid_instance_q31 * S,
2070         int32_t resetStateFlag);
2071 
2072 
2073   /**
2074    * @brief  Reset function for the Q31 PID Control.
2075    * @param[in,out] S   points to an instance of the Q31 PID Control structure
2076    */
2077 
2078   void riscv_pid_reset_q31(
2079         riscv_pid_instance_q31 * S);
2080 
2081 
2082   /**
2083    * @brief  Initialization function for the Q15 PID Control.
2084    * @param[in,out] S               points to an instance of the Q15 PID structure.
2085    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
2086    */
2087   void riscv_pid_init_q15(
2088         riscv_pid_instance_q15 * S,
2089         int32_t resetStateFlag);
2090 
2091 
2092   /**
2093    * @brief  Reset function for the Q15 PID Control.
2094    * @param[in,out] S  points to an instance of the q15 PID Control structure
2095    */
2096   void riscv_pid_reset_q15(
2097         riscv_pid_instance_q15 * S);
2098 
2099 
2100   /**
2101    * @brief Instance structure for the floating-point Linear Interpolate function.
2102    */
2103   typedef struct
2104   {
2105           uint32_t nValues;           /**< nValues */
2106           float32_t x1;               /**< x1 */
2107           float32_t xSpacing;         /**< xSpacing */
2108           float32_t *pYData;          /**< pointer to the table of Y values */
2109   } riscv_linear_interp_instance_f32;
2110 
2111   /**
2112    * @brief Instance structure for the floating-point bilinear interpolation function.
2113    */
2114   typedef struct
2115   {
2116           uint16_t numRows;   /**< number of rows in the data table. */
2117           uint16_t numCols;   /**< number of columns in the data table. */
2118           float32_t *pData;   /**< points to the data table. */
2119   } riscv_bilinear_interp_instance_f32;
2120 
2121    /**
2122    * @brief Instance structure for the Q31 bilinear interpolation function.
2123    */
2124   typedef struct
2125   {
2126           uint16_t numRows;   /**< number of rows in the data table. */
2127           uint16_t numCols;   /**< number of columns in the data table. */
2128           q31_t *pData;       /**< points to the data table. */
2129   } riscv_bilinear_interp_instance_q31;
2130 
2131    /**
2132    * @brief Instance structure for the Q15 bilinear interpolation function.
2133    */
2134   typedef struct
2135   {
2136           uint16_t numRows;   /**< number of rows in the data table. */
2137           uint16_t numCols;   /**< number of columns in the data table. */
2138           q15_t *pData;       /**< points to the data table. */
2139   } riscv_bilinear_interp_instance_q15;
2140 
2141    /**
2142    * @brief Instance structure for the Q15 bilinear interpolation function.
2143    */
2144   typedef struct
2145   {
2146           uint16_t numRows;   /**< number of rows in the data table. */
2147           uint16_t numCols;   /**< number of columns in the data table. */
2148           q7_t *pData;        /**< points to the data table. */
2149   } riscv_bilinear_interp_instance_q7;
2150 
2151 
2152   /**
2153    * @brief Q7 vector multiplication.
2154    * @param[in]  pSrcA      points to the first input vector
2155    * @param[in]  pSrcB      points to the second input vector
2156    * @param[out] pDst       points to the output vector
2157    * @param[in]  blockSize  number of samples in each vector
2158    */
2159   void riscv_mult_q7(
2160   const q7_t * pSrcA,
2161   const q7_t * pSrcB,
2162         q7_t * pDst,
2163         uint32_t blockSize);
2164 
2165 
2166   /**
2167    * @brief Q15 vector multiplication.
2168    * @param[in]  pSrcA      points to the first input vector
2169    * @param[in]  pSrcB      points to the second input vector
2170    * @param[out] pDst       points to the output vector
2171    * @param[in]  blockSize  number of samples in each vector
2172    */
2173   void riscv_mult_q15(
2174   const q15_t * pSrcA,
2175   const q15_t * pSrcB,
2176         q15_t * pDst,
2177         uint32_t blockSize);
2178 
2179 
2180   /**
2181    * @brief Q31 vector multiplication.
2182    * @param[in]  pSrcA      points to the first input vector
2183    * @param[in]  pSrcB      points to the second input vector
2184    * @param[out] pDst       points to the output vector
2185    * @param[in]  blockSize  number of samples in each vector
2186    */
2187   void riscv_mult_q31(
2188   const q31_t * pSrcA,
2189   const q31_t * pSrcB,
2190         q31_t * pDst,
2191         uint32_t blockSize);
2192 
2193 
2194   /**
2195    * @brief Floating-point vector multiplication.
2196    * @param[in]  pSrcA      points to the first input vector
2197    * @param[in]  pSrcB      points to the second input vector
2198    * @param[out] pDst       points to the output vector
2199    * @param[in]  blockSize  number of samples in each vector
2200    */
2201   void riscv_mult_f32(
2202   const float32_t * pSrcA,
2203   const float32_t * pSrcB,
2204         float32_t * pDst,
2205         uint32_t blockSize);
2206 
2207 
2208   /**
2209    * @brief Instance structure for the Q15 CFFT/CIFFT function.
2210    */
2211   typedef struct
2212   {
2213           uint16_t fftLen;                 /**< length of the FFT. */
2214           uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2215           uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2216     const q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
2217     const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2218           uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2219           uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2220   } riscv_cfft_radix2_instance_q15;
2221 
2222 /* Deprecated */
2223   riscv_status riscv_cfft_radix2_init_q15(
2224         riscv_cfft_radix2_instance_q15 * S,
2225         uint16_t fftLen,
2226         uint8_t ifftFlag,
2227         uint8_t bitReverseFlag);
2228 
2229 /* Deprecated */
2230   void riscv_cfft_radix2_q15(
2231   const riscv_cfft_radix2_instance_q15 * S,
2232         q15_t * pSrc);
2233 
2234 
2235   /**
2236    * @brief Instance structure for the Q15 CFFT/CIFFT function.
2237    */
2238   typedef struct
2239   {
2240           uint16_t fftLen;                 /**< length of the FFT. */
2241           uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2242           uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2243     const q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
2244     const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2245           uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2246           uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2247   } riscv_cfft_radix4_instance_q15;
2248 
2249 /* Deprecated */
2250   riscv_status riscv_cfft_radix4_init_q15(
2251         riscv_cfft_radix4_instance_q15 * S,
2252         uint16_t fftLen,
2253         uint8_t ifftFlag,
2254         uint8_t bitReverseFlag);
2255 
2256 /* Deprecated */
2257   void riscv_cfft_radix4_q15(
2258   const riscv_cfft_radix4_instance_q15 * S,
2259         q15_t * pSrc);
2260 
2261   /**
2262    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
2263    */
2264   typedef struct
2265   {
2266           uint16_t fftLen;                 /**< length of the FFT. */
2267           uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2268           uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2269     const q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
2270     const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2271           uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2272           uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2273   } riscv_cfft_radix2_instance_q31;
2274 
2275 /* Deprecated */
2276   riscv_status riscv_cfft_radix2_init_q31(
2277         riscv_cfft_radix2_instance_q31 * S,
2278         uint16_t fftLen,
2279         uint8_t ifftFlag,
2280         uint8_t bitReverseFlag);
2281 
2282 /* Deprecated */
2283   void riscv_cfft_radix2_q31(
2284   const riscv_cfft_radix2_instance_q31 * S,
2285         q31_t * pSrc);
2286 
2287   /**
2288    * @brief Instance structure for the Q31 CFFT/CIFFT function.
2289    */
2290   typedef struct
2291   {
2292           uint16_t fftLen;                 /**< length of the FFT. */
2293           uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2294           uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2295     const q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
2296     const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2297           uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2298           uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2299   } riscv_cfft_radix4_instance_q31;
2300 
2301 /* Deprecated */
2302   void riscv_cfft_radix4_q31(
2303   const riscv_cfft_radix4_instance_q31 * S,
2304         q31_t * pSrc);
2305 
2306 /* Deprecated */
2307   riscv_status riscv_cfft_radix4_init_q31(
2308         riscv_cfft_radix4_instance_q31 * S,
2309         uint16_t fftLen,
2310         uint8_t ifftFlag,
2311         uint8_t bitReverseFlag);
2312 
2313   /**
2314    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2315    */
2316   typedef struct
2317   {
2318           uint16_t fftLen;                   /**< length of the FFT. */
2319           uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2320           uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2321     const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2322     const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2323           uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2324           uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2325           float32_t onebyfftLen;             /**< value of 1/fftLen. */
2326   } riscv_cfft_radix2_instance_f32;
2327 
2328 /* Deprecated */
2329   riscv_status riscv_cfft_radix2_init_f32(
2330         riscv_cfft_radix2_instance_f32 * S,
2331         uint16_t fftLen,
2332         uint8_t ifftFlag,
2333         uint8_t bitReverseFlag);
2334 
2335 /* Deprecated */
2336   void riscv_cfft_radix2_f32(
2337   const riscv_cfft_radix2_instance_f32 * S,
2338         float32_t * pSrc);
2339 
2340   /**
2341    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2342    */
2343   typedef struct
2344   {
2345           uint16_t fftLen;                   /**< length of the FFT. */
2346           uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2347           uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2348     const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2349     const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2350           uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2351           uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2352           float32_t onebyfftLen;             /**< value of 1/fftLen. */
2353   } riscv_cfft_radix4_instance_f32;
2354 
2355 /* Deprecated */
2356   riscv_status riscv_cfft_radix4_init_f32(
2357         riscv_cfft_radix4_instance_f32 * S,
2358         uint16_t fftLen,
2359         uint8_t ifftFlag,
2360         uint8_t bitReverseFlag);
2361 
2362 /* Deprecated */
2363   void riscv_cfft_radix4_f32(
2364   const riscv_cfft_radix4_instance_f32 * S,
2365         float32_t * pSrc);
2366 
2367   /**
2368    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
2369    */
2370   typedef struct
2371   {
2372           uint16_t fftLen;                   /**< length of the FFT. */
2373     const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
2374     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2375           uint16_t bitRevLength;             /**< bit reversal table length. */
2376   } riscv_cfft_instance_q15;
2377 
2378 void riscv_cfft_q15(
2379     const riscv_cfft_instance_q15 * S,
2380           q15_t * p1,
2381           uint8_t ifftFlag,
2382           uint8_t bitReverseFlag);
2383 
2384   /**
2385    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
2386    */
2387   typedef struct
2388   {
2389           uint16_t fftLen;                   /**< length of the FFT. */
2390     const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
2391     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2392           uint16_t bitRevLength;             /**< bit reversal table length. */
2393   } riscv_cfft_instance_q31;
2394 
2395 void riscv_cfft_q31(
2396     const riscv_cfft_instance_q31 * S,
2397           q31_t * p1,
2398           uint8_t ifftFlag,
2399           uint8_t bitReverseFlag);
2400 
2401   /**
2402    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2403    */
2404   typedef struct
2405   {
2406           uint16_t fftLen;                   /**< length of the FFT. */
2407     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
2408     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2409           uint16_t bitRevLength;             /**< bit reversal table length. */
2410   } riscv_cfft_instance_f32;
2411 
2412   void riscv_cfft_f32(
2413   const riscv_cfft_instance_f32 * S,
2414         float32_t * p1,
2415         uint8_t ifftFlag,
2416         uint8_t bitReverseFlag);
2417 
2418   /**
2419    * @brief Instance structure for the Q15 RFFT/RIFFT function.
2420    */
2421   typedef struct
2422   {
2423           uint32_t fftLenReal;                      /**< length of the real FFT. */
2424           uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2425           uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2426           uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2427     const q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
2428     const q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
2429     const riscv_cfft_instance_q15 *pCfft;       /**< points to the complex FFT instance. */
2430   } riscv_rfft_instance_q15;
2431 
2432   riscv_status riscv_rfft_init_q15(
2433         riscv_rfft_instance_q15 * S,
2434         uint32_t fftLenReal,
2435         uint32_t ifftFlagR,
2436         uint32_t bitReverseFlag);
2437 
2438   void riscv_rfft_q15(
2439   const riscv_rfft_instance_q15 * S,
2440         q15_t * pSrc,
2441         q15_t * pDst);
2442 
2443   /**
2444    * @brief Instance structure for the Q31 RFFT/RIFFT function.
2445    */
2446   typedef struct
2447   {
2448           uint32_t fftLenReal;                        /**< length of the real FFT. */
2449           uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2450           uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2451           uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2452     const q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
2453     const q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
2454     const riscv_cfft_instance_q31 *pCfft;         /**< points to the complex FFT instance. */
2455   } riscv_rfft_instance_q31;
2456 
2457   riscv_status riscv_rfft_init_q31(
2458         riscv_rfft_instance_q31 * S,
2459         uint32_t fftLenReal,
2460         uint32_t ifftFlagR,
2461         uint32_t bitReverseFlag);
2462 
2463   void riscv_rfft_q31(
2464   const riscv_rfft_instance_q31 * S,
2465         q31_t * pSrc,
2466         q31_t * pDst);
2467 
2468   /**
2469    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2470    */
2471   typedef struct
2472   {
2473           uint32_t fftLenReal;                        /**< length of the real FFT. */
2474           uint16_t fftLenBy2;                         /**< length of the complex FFT. */
2475           uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2476           uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2477           uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2478     const float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
2479     const float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
2480           riscv_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
2481   } riscv_rfft_instance_f32;
2482 
2483   riscv_status riscv_rfft_init_f32(
2484         riscv_rfft_instance_f32 * S,
2485         riscv_cfft_radix4_instance_f32 * S_CFFT,
2486         uint32_t fftLenReal,
2487         uint32_t ifftFlagR,
2488         uint32_t bitReverseFlag);
2489 
2490   void riscv_rfft_f32(
2491   const riscv_rfft_instance_f32 * S,
2492         float32_t * pSrc,
2493         float32_t * pDst);
2494 
2495   /**
2496    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2497    */
2498 typedef struct
2499   {
2500           riscv_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
2501           uint16_t fftLenRFFT;             /**< length of the real sequence */
2502     const float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
2503   } riscv_rfft_fast_instance_f32 ;
2504 
2505 riscv_status riscv_rfft_fast_init_f32 (
2506          riscv_rfft_fast_instance_f32 * S,
2507          uint16_t fftLen);
2508 
2509 riscv_status riscv_rfft_32_fast_init_f32 ( riscv_rfft_fast_instance_f32 * S );
2510 
2511 riscv_status riscv_rfft_64_fast_init_f32 ( riscv_rfft_fast_instance_f32 * S );
2512 
2513 riscv_status riscv_rfft_128_fast_init_f32 ( riscv_rfft_fast_instance_f32 * S );
2514 
2515 riscv_status riscv_rfft_256_fast_init_f32 ( riscv_rfft_fast_instance_f32 * S );
2516 
2517 riscv_status riscv_rfft_512_fast_init_f32 ( riscv_rfft_fast_instance_f32 * S );
2518 
2519 riscv_status riscv_rfft_1024_fast_init_f32 ( riscv_rfft_fast_instance_f32 * S );
2520 
2521 riscv_status riscv_rfft_2048_fast_init_f32 ( riscv_rfft_fast_instance_f32 * S );
2522 
2523 riscv_status riscv_rfft_4096_fast_init_f32 ( riscv_rfft_fast_instance_f32 * S );
2524 
2525 
2526   void riscv_rfft_fast_f32(
2527         riscv_rfft_fast_instance_f32 * S,
2528         float32_t * p, float32_t * pOut,
2529         uint8_t ifftFlag);
2530 
2531   /**
2532    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
2533    */
2534   typedef struct
2535   {
2536           uint16_t N;                          /**< length of the DCT4. */
2537           uint16_t Nby2;                       /**< half of the length of the DCT4. */
2538           float32_t normalize;                 /**< normalizing factor. */
2539     const float32_t *pTwiddle;                 /**< points to the twiddle factor table. */
2540     const float32_t *pCosFactor;               /**< points to the cosFactor table. */
2541           riscv_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
2542           riscv_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
2543   } riscv_dct4_instance_f32;
2544 
2545 
2546   /**
2547    * @brief  Initialization function for the floating-point DCT4/IDCT4.
2548    * @param[in,out] S          points to an instance of floating-point DCT4/IDCT4 structure.
2549    * @param[in]     S_RFFT     points to an instance of floating-point RFFT/RIFFT structure.
2550    * @param[in]     S_CFFT     points to an instance of floating-point CFFT/CIFFT structure.
2551    * @param[in]     N          length of the DCT4.
2552    * @param[in]     Nby2       half of the length of the DCT4.
2553    * @param[in]     normalize  normalizing factor.
2554    * @return      riscv_status function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2555    */
2556   riscv_status riscv_dct4_init_f32(
2557         riscv_dct4_instance_f32 * S,
2558         riscv_rfft_instance_f32 * S_RFFT,
2559         riscv_cfft_radix4_instance_f32 * S_CFFT,
2560         uint16_t N,
2561         uint16_t Nby2,
2562         float32_t normalize);
2563 
2564 
2565   /**
2566    * @brief Processing function for the floating-point DCT4/IDCT4.
2567    * @param[in]     S              points to an instance of the floating-point DCT4/IDCT4 structure.
2568    * @param[in]     pState         points to state buffer.
2569    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
2570    */
2571   void riscv_dct4_f32(
2572   const riscv_dct4_instance_f32 * S,
2573         float32_t * pState,
2574         float32_t * pInlineBuffer);
2575 
2576 
2577   /**
2578    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
2579    */
2580   typedef struct
2581   {
2582           uint16_t N;                          /**< length of the DCT4. */
2583           uint16_t Nby2;                       /**< half of the length of the DCT4. */
2584           q31_t normalize;                     /**< normalizing factor. */
2585     const q31_t *pTwiddle;                     /**< points to the twiddle factor table. */
2586     const q31_t *pCosFactor;                   /**< points to the cosFactor table. */
2587           riscv_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
2588           riscv_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
2589   } riscv_dct4_instance_q31;
2590 
2591 
2592   /**
2593    * @brief  Initialization function for the Q31 DCT4/IDCT4.
2594    * @param[in,out] S          points to an instance of Q31 DCT4/IDCT4 structure.
2595    * @param[in]     S_RFFT     points to an instance of Q31 RFFT/RIFFT structure
2596    * @param[in]     S_CFFT     points to an instance of Q31 CFFT/CIFFT structure
2597    * @param[in]     N          length of the DCT4.
2598    * @param[in]     Nby2       half of the length of the DCT4.
2599    * @param[in]     normalize  normalizing factor.
2600    * @return      riscv_status function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2601    */
2602   riscv_status riscv_dct4_init_q31(
2603         riscv_dct4_instance_q31 * S,
2604         riscv_rfft_instance_q31 * S_RFFT,
2605         riscv_cfft_radix4_instance_q31 * S_CFFT,
2606         uint16_t N,
2607         uint16_t Nby2,
2608         q31_t normalize);
2609 
2610 
2611   /**
2612    * @brief Processing function for the Q31 DCT4/IDCT4.
2613    * @param[in]     S              points to an instance of the Q31 DCT4 structure.
2614    * @param[in]     pState         points to state buffer.
2615    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
2616    */
2617   void riscv_dct4_q31(
2618   const riscv_dct4_instance_q31 * S,
2619         q31_t * pState,
2620         q31_t * pInlineBuffer);
2621 
2622 
2623   /**
2624    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
2625    */
2626   typedef struct
2627   {
2628           uint16_t N;                          /**< length of the DCT4. */
2629           uint16_t Nby2;                       /**< half of the length of the DCT4. */
2630           q15_t normalize;                     /**< normalizing factor. */
2631     const q15_t *pTwiddle;                     /**< points to the twiddle factor table. */
2632     const q15_t *pCosFactor;                   /**< points to the cosFactor table. */
2633           riscv_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
2634           riscv_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
2635   } riscv_dct4_instance_q15;
2636 
2637 
2638   /**
2639    * @brief  Initialization function for the Q15 DCT4/IDCT4.
2640    * @param[in,out] S          points to an instance of Q15 DCT4/IDCT4 structure.
2641    * @param[in]     S_RFFT     points to an instance of Q15 RFFT/RIFFT structure.
2642    * @param[in]     S_CFFT     points to an instance of Q15 CFFT/CIFFT structure.
2643    * @param[in]     N          length of the DCT4.
2644    * @param[in]     Nby2       half of the length of the DCT4.
2645    * @param[in]     normalize  normalizing factor.
2646    * @return      riscv_status function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2647    */
2648   riscv_status riscv_dct4_init_q15(
2649         riscv_dct4_instance_q15 * S,
2650         riscv_rfft_instance_q15 * S_RFFT,
2651         riscv_cfft_radix4_instance_q15 * S_CFFT,
2652         uint16_t N,
2653         uint16_t Nby2,
2654         q15_t normalize);
2655 
2656 
2657   /**
2658    * @brief Processing function for the Q15 DCT4/IDCT4.
2659    * @param[in]     S              points to an instance of the Q15 DCT4 structure.
2660    * @param[in]     pState         points to state buffer.
2661    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
2662    */
2663   void riscv_dct4_q15(
2664   const riscv_dct4_instance_q15 * S,
2665         q15_t * pState,
2666         q15_t * pInlineBuffer);
2667 
2668 
2669   /**
2670    * @brief Floating-point vector addition.
2671    * @param[in]  pSrcA      points to the first input vector
2672    * @param[in]  pSrcB      points to the second input vector
2673    * @param[out] pDst       points to the output vector
2674    * @param[in]  blockSize  number of samples in each vector
2675    */
2676   void riscv_add_f32(
2677   const float32_t * pSrcA,
2678   const float32_t * pSrcB,
2679         float32_t * pDst,
2680         uint32_t blockSize);
2681 
2682 
2683   /**
2684    * @brief Q7 vector addition.
2685    * @param[in]  pSrcA      points to the first input vector
2686    * @param[in]  pSrcB      points to the second input vector
2687    * @param[out] pDst       points to the output vector
2688    * @param[in]  blockSize  number of samples in each vector
2689    */
2690   void riscv_add_q7(
2691   const q7_t * pSrcA,
2692   const q7_t * pSrcB,
2693         q7_t * pDst,
2694         uint32_t blockSize);
2695 
2696 
2697   /**
2698    * @brief Q15 vector addition.
2699    * @param[in]  pSrcA      points to the first input vector
2700    * @param[in]  pSrcB      points to the second input vector
2701    * @param[out] pDst       points to the output vector
2702    * @param[in]  blockSize  number of samples in each vector
2703    */
2704   void riscv_add_q15(
2705   const q15_t * pSrcA,
2706   const q15_t * pSrcB,
2707         q15_t * pDst,
2708         uint32_t blockSize);
2709 
2710 
2711   /**
2712    * @brief Q31 vector addition.
2713    * @param[in]  pSrcA      points to the first input vector
2714    * @param[in]  pSrcB      points to the second input vector
2715    * @param[out] pDst       points to the output vector
2716    * @param[in]  blockSize  number of samples in each vector
2717    */
2718   void riscv_add_q31(
2719   const q31_t * pSrcA,
2720   const q31_t * pSrcB,
2721         q31_t * pDst,
2722         uint32_t blockSize);
2723 
2724 
2725   /**
2726    * @brief Floating-point vector subtraction.
2727    * @param[in]  pSrcA      points to the first input vector
2728    * @param[in]  pSrcB      points to the second input vector
2729    * @param[out] pDst       points to the output vector
2730    * @param[in]  blockSize  number of samples in each vector
2731    */
2732   void riscv_sub_f32(
2733   const float32_t * pSrcA,
2734   const float32_t * pSrcB,
2735         float32_t * pDst,
2736         uint32_t blockSize);
2737 
2738 
2739   /**
2740    * @brief Q7 vector subtraction.
2741    * @param[in]  pSrcA      points to the first input vector
2742    * @param[in]  pSrcB      points to the second input vector
2743    * @param[out] pDst       points to the output vector
2744    * @param[in]  blockSize  number of samples in each vector
2745    */
2746   void riscv_sub_q7(
2747   const q7_t * pSrcA,
2748   const q7_t * pSrcB,
2749         q7_t * pDst,
2750         uint32_t blockSize);
2751 
2752 
2753   /**
2754    * @brief Q15 vector subtraction.
2755    * @param[in]  pSrcA      points to the first input vector
2756    * @param[in]  pSrcB      points to the second input vector
2757    * @param[out] pDst       points to the output vector
2758    * @param[in]  blockSize  number of samples in each vector
2759    */
2760   void riscv_sub_q15(
2761   const q15_t * pSrcA,
2762   const q15_t * pSrcB,
2763         q15_t * pDst,
2764         uint32_t blockSize);
2765 
2766 
2767   /**
2768    * @brief Q31 vector subtraction.
2769    * @param[in]  pSrcA      points to the first input vector
2770    * @param[in]  pSrcB      points to the second input vector
2771    * @param[out] pDst       points to the output vector
2772    * @param[in]  blockSize  number of samples in each vector
2773    */
2774   void riscv_sub_q31(
2775   const q31_t * pSrcA,
2776   const q31_t * pSrcB,
2777         q31_t * pDst,
2778         uint32_t blockSize);
2779 
2780 
2781   /**
2782    * @brief Multiplies a floating-point vector by a scalar.
2783    * @param[in]  pSrc       points to the input vector
2784    * @param[in]  scale      scale factor to be applied
2785    * @param[out] pDst       points to the output vector
2786    * @param[in]  blockSize  number of samples in the vector
2787    */
2788   void riscv_scale_f32(
2789   const float32_t * pSrc,
2790         float32_t scale,
2791         float32_t * pDst,
2792         uint32_t blockSize);
2793 
2794 
2795   /**
2796    * @brief Multiplies a Q7 vector by a scalar.
2797    * @param[in]  pSrc        points to the input vector
2798    * @param[in]  scaleFract  fractional portion of the scale value
2799    * @param[in]  shift       number of bits to shift the result by
2800    * @param[out] pDst        points to the output vector
2801    * @param[in]  blockSize   number of samples in the vector
2802    */
2803   void riscv_scale_q7(
2804   const q7_t * pSrc,
2805         q7_t scaleFract,
2806         int8_t shift,
2807         q7_t * pDst,
2808         uint32_t blockSize);
2809 
2810 
2811   /**
2812    * @brief Multiplies a Q15 vector by a scalar.
2813    * @param[in]  pSrc        points to the input vector
2814    * @param[in]  scaleFract  fractional portion of the scale value
2815    * @param[in]  shift       number of bits to shift the result by
2816    * @param[out] pDst        points to the output vector
2817    * @param[in]  blockSize   number of samples in the vector
2818    */
2819   void riscv_scale_q15(
2820   const q15_t * pSrc,
2821         q15_t scaleFract,
2822         int8_t shift,
2823         q15_t * pDst,
2824         uint32_t blockSize);
2825 
2826 
2827   /**
2828    * @brief Multiplies a Q31 vector by a scalar.
2829    * @param[in]  pSrc        points to the input vector
2830    * @param[in]  scaleFract  fractional portion of the scale value
2831    * @param[in]  shift       number of bits to shift the result by
2832    * @param[out] pDst        points to the output vector
2833    * @param[in]  blockSize   number of samples in the vector
2834    */
2835   void riscv_scale_q31(
2836   const q31_t * pSrc,
2837         q31_t scaleFract,
2838         int8_t shift,
2839         q31_t * pDst,
2840         uint32_t blockSize);
2841 
2842 
2843   /**
2844    * @brief Q7 vector absolute value.
2845    * @param[in]  pSrc       points to the input buffer
2846    * @param[out] pDst       points to the output buffer
2847    * @param[in]  blockSize  number of samples in each vector
2848    */
2849   void riscv_abs_q7(
2850   const q7_t * pSrc,
2851         q7_t * pDst,
2852         uint32_t blockSize);
2853 
2854 
2855   /**
2856    * @brief Floating-point vector absolute value.
2857    * @param[in]  pSrc       points to the input buffer
2858    * @param[out] pDst       points to the output buffer
2859    * @param[in]  blockSize  number of samples in each vector
2860    */
2861   void riscv_abs_f32(
2862   const float32_t * pSrc,
2863         float32_t * pDst,
2864         uint32_t blockSize);
2865 
2866 
2867   /**
2868    * @brief Q15 vector absolute value.
2869    * @param[in]  pSrc       points to the input buffer
2870    * @param[out] pDst       points to the output buffer
2871    * @param[in]  blockSize  number of samples in each vector
2872    */
2873   void riscv_abs_q15(
2874   const q15_t * pSrc,
2875         q15_t * pDst,
2876         uint32_t blockSize);
2877 
2878 
2879   /**
2880    * @brief Q31 vector absolute value.
2881    * @param[in]  pSrc       points to the input buffer
2882    * @param[out] pDst       points to the output buffer
2883    * @param[in]  blockSize  number of samples in each vector
2884    */
2885   void riscv_abs_q31(
2886   const q31_t * pSrc,
2887         q31_t * pDst,
2888         uint32_t blockSize);
2889 
2890 
2891   /**
2892    * @brief Dot product of floating-point vectors.
2893    * @param[in]  pSrcA      points to the first input vector
2894    * @param[in]  pSrcB      points to the second input vector
2895    * @param[in]  blockSize  number of samples in each vector
2896    * @param[out] result     output result returned here
2897    */
2898   void riscv_dot_prod_f32(
2899   const float32_t * pSrcA,
2900   const float32_t * pSrcB,
2901         uint32_t blockSize,
2902         float32_t * result);
2903 
2904 
2905   /**
2906    * @brief Dot product of Q7 vectors.
2907    * @param[in]  pSrcA      points to the first input vector
2908    * @param[in]  pSrcB      points to the second input vector
2909    * @param[in]  blockSize  number of samples in each vector
2910    * @param[out] result     output result returned here
2911    */
2912   void riscv_dot_prod_q7(
2913   const q7_t * pSrcA,
2914   const q7_t * pSrcB,
2915         uint32_t blockSize,
2916         q31_t * result);
2917 
2918 
2919   /**
2920    * @brief Dot product of Q15 vectors.
2921    * @param[in]  pSrcA      points to the first input vector
2922    * @param[in]  pSrcB      points to the second input vector
2923    * @param[in]  blockSize  number of samples in each vector
2924    * @param[out] result     output result returned here
2925    */
2926   void riscv_dot_prod_q15(
2927   const q15_t * pSrcA,
2928   const q15_t * pSrcB,
2929         uint32_t blockSize,
2930         q63_t * result);
2931 
2932 
2933   /**
2934    * @brief Dot product of Q31 vectors.
2935    * @param[in]  pSrcA      points to the first input vector
2936    * @param[in]  pSrcB      points to the second input vector
2937    * @param[in]  blockSize  number of samples in each vector
2938    * @param[out] result     output result returned here
2939    */
2940   void riscv_dot_prod_q31(
2941   const q31_t * pSrcA,
2942   const q31_t * pSrcB,
2943         uint32_t blockSize,
2944         q63_t * result);
2945 
2946 
2947   /**
2948    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
2949    * @param[in]  pSrc       points to the input vector
2950    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
2951    * @param[out] pDst       points to the output vector
2952    * @param[in]  blockSize  number of samples in the vector
2953    */
2954   void riscv_shift_q7(
2955   const q7_t * pSrc,
2956         int8_t shiftBits,
2957         q7_t * pDst,
2958         uint32_t blockSize);
2959 
2960 
2961   /**
2962    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
2963    * @param[in]  pSrc       points to the input vector
2964    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
2965    * @param[out] pDst       points to the output vector
2966    * @param[in]  blockSize  number of samples in the vector
2967    */
2968   void riscv_shift_q15(
2969   const q15_t * pSrc,
2970         int8_t shiftBits,
2971         q15_t * pDst,
2972         uint32_t blockSize);
2973 
2974 
2975   /**
2976    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
2977    * @param[in]  pSrc       points to the input vector
2978    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
2979    * @param[out] pDst       points to the output vector
2980    * @param[in]  blockSize  number of samples in the vector
2981    */
2982   void riscv_shift_q31(
2983   const q31_t * pSrc,
2984         int8_t shiftBits,
2985         q31_t * pDst,
2986         uint32_t blockSize);
2987 
2988 
2989   /**
2990    * @brief  Adds a constant offset to a floating-point vector.
2991    * @param[in]  pSrc       points to the input vector
2992    * @param[in]  offset     is the offset to be added
2993    * @param[out] pDst       points to the output vector
2994    * @param[in]  blockSize  number of samples in the vector
2995    */
2996   void riscv_offset_f32(
2997   const float32_t * pSrc,
2998         float32_t offset,
2999         float32_t * pDst,
3000         uint32_t blockSize);
3001 
3002 
3003   /**
3004    * @brief  Adds a constant offset to a Q7 vector.
3005    * @param[in]  pSrc       points to the input vector
3006    * @param[in]  offset     is the offset to be added
3007    * @param[out] pDst       points to the output vector
3008    * @param[in]  blockSize  number of samples in the vector
3009    */
3010   void riscv_offset_q7(
3011   const q7_t * pSrc,
3012         q7_t offset,
3013         q7_t * pDst,
3014         uint32_t blockSize);
3015 
3016 
3017   /**
3018    * @brief  Adds a constant offset to a Q15 vector.
3019    * @param[in]  pSrc       points to the input vector
3020    * @param[in]  offset     is the offset to be added
3021    * @param[out] pDst       points to the output vector
3022    * @param[in]  blockSize  number of samples in the vector
3023    */
3024   void riscv_offset_q15(
3025   const q15_t * pSrc,
3026         q15_t offset,
3027         q15_t * pDst,
3028         uint32_t blockSize);
3029 
3030 
3031   /**
3032    * @brief  Adds a constant offset to a Q31 vector.
3033    * @param[in]  pSrc       points to the input vector
3034    * @param[in]  offset     is the offset to be added
3035    * @param[out] pDst       points to the output vector
3036    * @param[in]  blockSize  number of samples in the vector
3037    */
3038   void riscv_offset_q31(
3039   const q31_t * pSrc,
3040         q31_t offset,
3041         q31_t * pDst,
3042         uint32_t blockSize);
3043 
3044 
3045   /**
3046    * @brief  Negates the elements of a floating-point vector.
3047    * @param[in]  pSrc       points to the input vector
3048    * @param[out] pDst       points to the output vector
3049    * @param[in]  blockSize  number of samples in the vector
3050    */
3051   void riscv_negate_f32(
3052   const float32_t * pSrc,
3053         float32_t * pDst,
3054         uint32_t blockSize);
3055 
3056 
3057   /**
3058    * @brief  Negates the elements of a Q7 vector.
3059    * @param[in]  pSrc       points to the input vector
3060    * @param[out] pDst       points to the output vector
3061    * @param[in]  blockSize  number of samples in the vector
3062    */
3063   void riscv_negate_q7(
3064   const q7_t * pSrc,
3065         q7_t * pDst,
3066         uint32_t blockSize);
3067 
3068 
3069   /**
3070    * @brief  Negates the elements of a Q15 vector.
3071    * @param[in]  pSrc       points to the input vector
3072    * @param[out] pDst       points to the output vector
3073    * @param[in]  blockSize  number of samples in the vector
3074    */
3075   void riscv_negate_q15(
3076   const q15_t * pSrc,
3077         q15_t * pDst,
3078         uint32_t blockSize);
3079 
3080 
3081   /**
3082    * @brief  Negates the elements of a Q31 vector.
3083    * @param[in]  pSrc       points to the input vector
3084    * @param[out] pDst       points to the output vector
3085    * @param[in]  blockSize  number of samples in the vector
3086    */
3087   void riscv_negate_q31(
3088   const q31_t * pSrc,
3089         q31_t * pDst,
3090         uint32_t blockSize);
3091 
3092 
3093   /**
3094    * @brief  Copies the elements of a floating-point vector.
3095    * @param[in]  pSrc       input pointer
3096    * @param[out] pDst       output pointer
3097    * @param[in]  blockSize  number of samples to process
3098    */
3099   void riscv_copy_f32(
3100   const float32_t * pSrc,
3101         float32_t * pDst,
3102         uint32_t blockSize);
3103 
3104 
3105   /**
3106    * @brief  Copies the elements of a Q7 vector.
3107    * @param[in]  pSrc       input pointer
3108    * @param[out] pDst       output pointer
3109    * @param[in]  blockSize  number of samples to process
3110    */
3111   void riscv_copy_q7(
3112   const q7_t * pSrc,
3113         q7_t * pDst,
3114         uint32_t blockSize);
3115 
3116 
3117   /**
3118    * @brief  Copies the elements of a Q15 vector.
3119    * @param[in]  pSrc       input pointer
3120    * @param[out] pDst       output pointer
3121    * @param[in]  blockSize  number of samples to process
3122    */
3123   void riscv_copy_q15(
3124   const q15_t * pSrc,
3125         q15_t * pDst,
3126         uint32_t blockSize);
3127 
3128 
3129   /**
3130    * @brief  Copies the elements of a Q31 vector.
3131    * @param[in]  pSrc       input pointer
3132    * @param[out] pDst       output pointer
3133    * @param[in]  blockSize  number of samples to process
3134    */
3135   void riscv_copy_q31(
3136   const q31_t * pSrc,
3137         q31_t * pDst,
3138         uint32_t blockSize);
3139 
3140 
3141   /**
3142    * @brief  Fills a constant value into a floating-point vector.
3143    * @param[in]  value      input value to be filled
3144    * @param[out] pDst       output pointer
3145    * @param[in]  blockSize  number of samples to process
3146    */
3147   void riscv_fill_f32(
3148         float32_t value,
3149         float32_t * pDst,
3150         uint32_t blockSize);
3151 
3152 
3153   /**
3154    * @brief  Fills a constant value into a Q7 vector.
3155    * @param[in]  value      input value to be filled
3156    * @param[out] pDst       output pointer
3157    * @param[in]  blockSize  number of samples to process
3158    */
3159   void riscv_fill_q7(
3160         q7_t value,
3161         q7_t * pDst,
3162         uint32_t blockSize);
3163 
3164 
3165   /**
3166    * @brief  Fills a constant value into a Q15 vector.
3167    * @param[in]  value      input value to be filled
3168    * @param[out] pDst       output pointer
3169    * @param[in]  blockSize  number of samples to process
3170    */
3171   void riscv_fill_q15(
3172         q15_t value,
3173         q15_t * pDst,
3174         uint32_t blockSize);
3175 
3176 
3177   /**
3178    * @brief  Fills a constant value into a Q31 vector.
3179    * @param[in]  value      input value to be filled
3180    * @param[out] pDst       output pointer
3181    * @param[in]  blockSize  number of samples to process
3182    */
3183   void riscv_fill_q31(
3184         q31_t value,
3185         q31_t * pDst,
3186         uint32_t blockSize);
3187 
3188 
3189 /**
3190  * @brief Convolution of floating-point sequences.
3191  * @param[in]  pSrcA    points to the first input sequence.
3192  * @param[in]  srcALen  length of the first input sequence.
3193  * @param[in]  pSrcB    points to the second input sequence.
3194  * @param[in]  srcBLen  length of the second input sequence.
3195  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
3196  */
3197   void riscv_conv_f32(
3198   const float32_t * pSrcA,
3199         uint32_t srcALen,
3200   const float32_t * pSrcB,
3201         uint32_t srcBLen,
3202         float32_t * pDst);
3203 
3204 
3205   /**
3206    * @brief Convolution of Q15 sequences.
3207    * @param[in]  pSrcA      points to the first input sequence.
3208    * @param[in]  srcALen    length of the first input sequence.
3209    * @param[in]  pSrcB      points to the second input sequence.
3210    * @param[in]  srcBLen    length of the second input sequence.
3211    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
3212    * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3213    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
3214    */
3215   void riscv_conv_opt_q15(
3216   const q15_t * pSrcA,
3217         uint32_t srcALen,
3218   const q15_t * pSrcB,
3219         uint32_t srcBLen,
3220         q15_t * pDst,
3221         q15_t * pScratch1,
3222         q15_t * pScratch2);
3223 
3224 
3225 /**
3226  * @brief Convolution of Q15 sequences.
3227  * @param[in]  pSrcA    points to the first input sequence.
3228  * @param[in]  srcALen  length of the first input sequence.
3229  * @param[in]  pSrcB    points to the second input sequence.
3230  * @param[in]  srcBLen  length of the second input sequence.
3231  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
3232  */
3233   void riscv_conv_q15(
3234   const q15_t * pSrcA,
3235         uint32_t srcALen,
3236   const q15_t * pSrcB,
3237         uint32_t srcBLen,
3238         q15_t * pDst);
3239 
3240 
3241   /**
3242    * @brief Convolution of Q15 sequences (fast version) for RISC-V Core with DSP enabled
3243    * @param[in]  pSrcA    points to the first input sequence.
3244    * @param[in]  srcALen  length of the first input sequence.
3245    * @param[in]  pSrcB    points to the second input sequence.
3246    * @param[in]  srcBLen  length of the second input sequence.
3247    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
3248    */
3249   void riscv_conv_fast_q15(
3250   const q15_t * pSrcA,
3251         uint32_t srcALen,
3252   const q15_t * pSrcB,
3253         uint32_t srcBLen,
3254         q15_t * pDst);
3255 
3256 
3257   /**
3258    * @brief Convolution of Q15 sequences (fast version) for RISC-V Core with DSP enabled
3259    * @param[in]  pSrcA      points to the first input sequence.
3260    * @param[in]  srcALen    length of the first input sequence.
3261    * @param[in]  pSrcB      points to the second input sequence.
3262    * @param[in]  srcBLen    length of the second input sequence.
3263    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
3264    * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3265    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
3266    */
3267   void riscv_conv_fast_opt_q15(
3268   const q15_t * pSrcA,
3269         uint32_t srcALen,
3270   const q15_t * pSrcB,
3271         uint32_t srcBLen,
3272         q15_t * pDst,
3273         q15_t * pScratch1,
3274         q15_t * pScratch2);
3275 
3276 
3277   /**
3278    * @brief Convolution of Q31 sequences.
3279    * @param[in]  pSrcA    points to the first input sequence.
3280    * @param[in]  srcALen  length of the first input sequence.
3281    * @param[in]  pSrcB    points to the second input sequence.
3282    * @param[in]  srcBLen  length of the second input sequence.
3283    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
3284    */
3285   void riscv_conv_q31(
3286   const q31_t * pSrcA,
3287         uint32_t srcALen,
3288   const q31_t * pSrcB,
3289         uint32_t srcBLen,
3290         q31_t * pDst);
3291 
3292 
3293   /**
3294    * @brief Convolution of Q31 sequences (fast version) for RISC-V Core with DSP enabled
3295    * @param[in]  pSrcA    points to the first input sequence.
3296    * @param[in]  srcALen  length of the first input sequence.
3297    * @param[in]  pSrcB    points to the second input sequence.
3298    * @param[in]  srcBLen  length of the second input sequence.
3299    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
3300    */
3301   void riscv_conv_fast_q31(
3302   const q31_t * pSrcA,
3303         uint32_t srcALen,
3304   const q31_t * pSrcB,
3305         uint32_t srcBLen,
3306         q31_t * pDst);
3307 
3308 
3309     /**
3310    * @brief Convolution of Q7 sequences.
3311    * @param[in]  pSrcA      points to the first input sequence.
3312    * @param[in]  srcALen    length of the first input sequence.
3313    * @param[in]  pSrcB      points to the second input sequence.
3314    * @param[in]  srcBLen    length of the second input sequence.
3315    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
3316    * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3317    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3318    */
3319   void riscv_conv_opt_q7(
3320   const q7_t * pSrcA,
3321         uint32_t srcALen,
3322   const q7_t * pSrcB,
3323         uint32_t srcBLen,
3324         q7_t * pDst,
3325         q15_t * pScratch1,
3326         q15_t * pScratch2);
3327 
3328 
3329   /**
3330    * @brief Convolution of Q7 sequences.
3331    * @param[in]  pSrcA    points to the first input sequence.
3332    * @param[in]  srcALen  length of the first input sequence.
3333    * @param[in]  pSrcB    points to the second input sequence.
3334    * @param[in]  srcBLen  length of the second input sequence.
3335    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
3336    */
3337   void riscv_conv_q7(
3338   const q7_t * pSrcA,
3339         uint32_t srcALen,
3340   const q7_t * pSrcB,
3341         uint32_t srcBLen,
3342         q7_t * pDst);
3343 
3344 
3345   /**
3346    * @brief Partial convolution of floating-point sequences.
3347    * @param[in]  pSrcA       points to the first input sequence.
3348    * @param[in]  srcALen     length of the first input sequence.
3349    * @param[in]  pSrcB       points to the second input sequence.
3350    * @param[in]  srcBLen     length of the second input sequence.
3351    * @param[out] pDst        points to the block of output data
3352    * @param[in]  firstIndex  is the first output sample to start with.
3353    * @param[in]  numPoints   is the number of output points to be computed.
3354    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3355    */
3356   riscv_status riscv_conv_partial_f32(
3357   const float32_t * pSrcA,
3358         uint32_t srcALen,
3359   const float32_t * pSrcB,
3360         uint32_t srcBLen,
3361         float32_t * pDst,
3362         uint32_t firstIndex,
3363         uint32_t numPoints);
3364 
3365 
3366   /**
3367    * @brief Partial convolution of Q15 sequences.
3368    * @param[in]  pSrcA       points to the first input sequence.
3369    * @param[in]  srcALen     length of the first input sequence.
3370    * @param[in]  pSrcB       points to the second input sequence.
3371    * @param[in]  srcBLen     length of the second input sequence.
3372    * @param[out] pDst        points to the block of output data
3373    * @param[in]  firstIndex  is the first output sample to start with.
3374    * @param[in]  numPoints   is the number of output points to be computed.
3375    * @param[in]  pScratch1   points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3376    * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
3377    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3378    */
3379   riscv_status riscv_conv_partial_opt_q15(
3380   const q15_t * pSrcA,
3381         uint32_t srcALen,
3382   const q15_t * pSrcB,
3383         uint32_t srcBLen,
3384         q15_t * pDst,
3385         uint32_t firstIndex,
3386         uint32_t numPoints,
3387         q15_t * pScratch1,
3388         q15_t * pScratch2);
3389 
3390 
3391   /**
3392    * @brief Partial convolution of Q15 sequences.
3393    * @param[in]  pSrcA       points to the first input sequence.
3394    * @param[in]  srcALen     length of the first input sequence.
3395    * @param[in]  pSrcB       points to the second input sequence.
3396    * @param[in]  srcBLen     length of the second input sequence.
3397    * @param[out] pDst        points to the block of output data
3398    * @param[in]  firstIndex  is the first output sample to start with.
3399    * @param[in]  numPoints   is the number of output points to be computed.
3400    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3401    */
3402   riscv_status riscv_conv_partial_q15(
3403   const q15_t * pSrcA,
3404         uint32_t srcALen,
3405   const q15_t * pSrcB,
3406         uint32_t srcBLen,
3407         q15_t * pDst,
3408         uint32_t firstIndex,
3409         uint32_t numPoints);
3410 
3411 
3412   /**
3413    * @brief Partial convolution of Q15 sequences (fast version) for RISC-V Core with DSP enabled
3414    * @param[in]  pSrcA       points to the first input sequence.
3415    * @param[in]  srcALen     length of the first input sequence.
3416    * @param[in]  pSrcB       points to the second input sequence.
3417    * @param[in]  srcBLen     length of the second input sequence.
3418    * @param[out] pDst        points to the block of output data
3419    * @param[in]  firstIndex  is the first output sample to start with.
3420    * @param[in]  numPoints   is the number of output points to be computed.
3421    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3422    */
3423   riscv_status riscv_conv_partial_fast_q15(
3424   const q15_t * pSrcA,
3425         uint32_t srcALen,
3426   const q15_t * pSrcB,
3427         uint32_t srcBLen,
3428         q15_t * pDst,
3429         uint32_t firstIndex,
3430         uint32_t numPoints);
3431 
3432 
3433   /**
3434    * @brief Partial convolution of Q15 sequences (fast version) for RISC-V Core with DSP enabled
3435    * @param[in]  pSrcA       points to the first input sequence.
3436    * @param[in]  srcALen     length of the first input sequence.
3437    * @param[in]  pSrcB       points to the second input sequence.
3438    * @param[in]  srcBLen     length of the second input sequence.
3439    * @param[out] pDst        points to the block of output data
3440    * @param[in]  firstIndex  is the first output sample to start with.
3441    * @param[in]  numPoints   is the number of output points to be computed.
3442    * @param[in]  pScratch1   points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3443    * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
3444    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3445    */
3446   riscv_status riscv_conv_partial_fast_opt_q15(
3447   const q15_t * pSrcA,
3448         uint32_t srcALen,
3449   const q15_t * pSrcB,
3450         uint32_t srcBLen,
3451         q15_t * pDst,
3452         uint32_t firstIndex,
3453         uint32_t numPoints,
3454         q15_t * pScratch1,
3455         q15_t * pScratch2);
3456 
3457 
3458   /**
3459    * @brief Partial convolution of Q31 sequences.
3460    * @param[in]  pSrcA       points to the first input sequence.
3461    * @param[in]  srcALen     length of the first input sequence.
3462    * @param[in]  pSrcB       points to the second input sequence.
3463    * @param[in]  srcBLen     length of the second input sequence.
3464    * @param[out] pDst        points to the block of output data
3465    * @param[in]  firstIndex  is the first output sample to start with.
3466    * @param[in]  numPoints   is the number of output points to be computed.
3467    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3468    */
3469   riscv_status riscv_conv_partial_q31(
3470   const q31_t * pSrcA,
3471         uint32_t srcALen,
3472   const q31_t * pSrcB,
3473         uint32_t srcBLen,
3474         q31_t * pDst,
3475         uint32_t firstIndex,
3476         uint32_t numPoints);
3477 
3478 
3479   /**
3480    * @brief Partial convolution of Q31 sequences (fast version) for RISC-V Core with DSP enabled
3481    * @param[in]  pSrcA       points to the first input sequence.
3482    * @param[in]  srcALen     length of the first input sequence.
3483    * @param[in]  pSrcB       points to the second input sequence.
3484    * @param[in]  srcBLen     length of the second input sequence.
3485    * @param[out] pDst        points to the block of output data
3486    * @param[in]  firstIndex  is the first output sample to start with.
3487    * @param[in]  numPoints   is the number of output points to be computed.
3488    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3489    */
3490   riscv_status riscv_conv_partial_fast_q31(
3491   const q31_t * pSrcA,
3492         uint32_t srcALen,
3493   const q31_t * pSrcB,
3494         uint32_t srcBLen,
3495         q31_t * pDst,
3496         uint32_t firstIndex,
3497         uint32_t numPoints);
3498 
3499 
3500   /**
3501    * @brief Partial convolution of Q7 sequences
3502    * @param[in]  pSrcA       points to the first input sequence.
3503    * @param[in]  srcALen     length of the first input sequence.
3504    * @param[in]  pSrcB       points to the second input sequence.
3505    * @param[in]  srcBLen     length of the second input sequence.
3506    * @param[out] pDst        points to the block of output data
3507    * @param[in]  firstIndex  is the first output sample to start with.
3508    * @param[in]  numPoints   is the number of output points to be computed.
3509    * @param[in]  pScratch1   points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3510    * @param[in]  pScratch2   points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3511    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3512    */
3513   riscv_status riscv_conv_partial_opt_q7(
3514   const q7_t * pSrcA,
3515         uint32_t srcALen,
3516   const q7_t * pSrcB,
3517         uint32_t srcBLen,
3518         q7_t * pDst,
3519         uint32_t firstIndex,
3520         uint32_t numPoints,
3521         q15_t * pScratch1,
3522         q15_t * pScratch2);
3523 
3524 
3525 /**
3526    * @brief Partial convolution of Q7 sequences.
3527    * @param[in]  pSrcA       points to the first input sequence.
3528    * @param[in]  srcALen     length of the first input sequence.
3529    * @param[in]  pSrcB       points to the second input sequence.
3530    * @param[in]  srcBLen     length of the second input sequence.
3531    * @param[out] pDst        points to the block of output data
3532    * @param[in]  firstIndex  is the first output sample to start with.
3533    * @param[in]  numPoints   is the number of output points to be computed.
3534    * @return  Returns either RISCV_MATH_SUCCESS if the function completed correctly or RISCV_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3535    */
3536   riscv_status riscv_conv_partial_q7(
3537   const q7_t * pSrcA,
3538         uint32_t srcALen,
3539   const q7_t * pSrcB,
3540         uint32_t srcBLen,
3541         q7_t * pDst,
3542         uint32_t firstIndex,
3543         uint32_t numPoints);
3544 
3545 
3546   /**
3547    * @brief Instance structure for the Q15 FIR decimator.
3548    */
3549   typedef struct
3550   {
3551           uint8_t M;                  /**< decimation factor. */
3552           uint16_t numTaps;           /**< number of coefficients in the filter. */
3553     const q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
3554           q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3555   } riscv_fir_decimate_instance_q15;
3556 
3557   /**
3558    * @brief Instance structure for the Q31 FIR decimator.
3559    */
3560   typedef struct
3561   {
3562           uint8_t M;                  /**< decimation factor. */
3563           uint16_t numTaps;           /**< number of coefficients in the filter. */
3564     const q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
3565           q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3566   } riscv_fir_decimate_instance_q31;
3567 
3568 /**
3569   @brief Instance structure for floating-point FIR decimator.
3570  */
3571 typedef struct
3572   {
3573           uint8_t M;                  /**< decimation factor. */
3574           uint16_t numTaps;           /**< number of coefficients in the filter. */
3575     const float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
3576           float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3577   } riscv_fir_decimate_instance_f32;
3578 
3579 
3580 /**
3581   @brief         Processing function for floating-point FIR decimator.
3582   @param[in]     S         points to an instance of the floating-point FIR decimator structure
3583   @param[in]     pSrc      points to the block of input data
3584   @param[out]    pDst      points to the block of output data
3585   @param[in]     blockSize number of samples to process
3586  */
3587 void riscv_fir_decimate_f32(
3588   const riscv_fir_decimate_instance_f32 * S,
3589   const float32_t * pSrc,
3590         float32_t * pDst,
3591         uint32_t blockSize);
3592 
3593 
3594 /**
3595   @brief         Initialization function for the floating-point FIR decimator.
3596   @param[in,out] S          points to an instance of the floating-point FIR decimator structure
3597   @param[in]     numTaps    number of coefficients in the filter
3598   @param[in]     M          decimation factor
3599   @param[in]     pCoeffs    points to the filter coefficients
3600   @param[in]     pState     points to the state buffer
3601   @param[in]     blockSize  number of input samples to process per call
3602   @return        execution status
3603                    - \ref RISCV_MATH_SUCCESS      : Operation successful
3604                    - \ref RISCV_MATH_LENGTH_ERROR : <code>blockSize</code> is not a multiple of <code>M</code>
3605  */
3606 riscv_status riscv_fir_decimate_init_f32(
3607         riscv_fir_decimate_instance_f32 * S,
3608         uint16_t numTaps,
3609         uint8_t M,
3610   const float32_t * pCoeffs,
3611         float32_t * pState,
3612         uint32_t blockSize);
3613 
3614 
3615   /**
3616    * @brief Processing function for the Q15 FIR decimator.
3617    * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
3618    * @param[in]  pSrc       points to the block of input data.
3619    * @param[out] pDst       points to the block of output data
3620    * @param[in]  blockSize  number of input samples to process per call.
3621    */
3622   void riscv_fir_decimate_q15(
3623   const riscv_fir_decimate_instance_q15 * S,
3624   const q15_t * pSrc,
3625         q15_t * pDst,
3626         uint32_t blockSize);
3627 
3628 
3629   /**
3630    * @brief Processing function for the Q15 FIR decimator (fast variant) for RISC-V Core with DSP enabled.
3631    * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
3632    * @param[in]  pSrc       points to the block of input data.
3633    * @param[out] pDst       points to the block of output data
3634    * @param[in]  blockSize  number of input samples to process per call.
3635    */
3636   void riscv_fir_decimate_fast_q15(
3637   const riscv_fir_decimate_instance_q15 * S,
3638   const q15_t * pSrc,
3639         q15_t * pDst,
3640         uint32_t blockSize);
3641 
3642 
3643   /**
3644    * @brief  Initialization function for the Q15 FIR decimator.
3645    * @param[in,out] S          points to an instance of the Q15 FIR decimator structure.
3646    * @param[in]     numTaps    number of coefficients in the filter.
3647    * @param[in]     M          decimation factor.
3648    * @param[in]     pCoeffs    points to the filter coefficients.
3649    * @param[in]     pState     points to the state buffer.
3650    * @param[in]     blockSize  number of input samples to process per call.
3651    * @return    The function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_LENGTH_ERROR if
3652    * <code>blockSize</code> is not a multiple of <code>M</code>.
3653    */
3654   riscv_status riscv_fir_decimate_init_q15(
3655         riscv_fir_decimate_instance_q15 * S,
3656         uint16_t numTaps,
3657         uint8_t M,
3658   const q15_t * pCoeffs,
3659         q15_t * pState,
3660         uint32_t blockSize);
3661 
3662 
3663   /**
3664    * @brief Processing function for the Q31 FIR decimator.
3665    * @param[in]  S     points to an instance of the Q31 FIR decimator structure.
3666    * @param[in]  pSrc  points to the block of input data.
3667    * @param[out] pDst  points to the block of output data
3668    * @param[in] blockSize number of input samples to process per call.
3669    */
3670   void riscv_fir_decimate_q31(
3671   const riscv_fir_decimate_instance_q31 * S,
3672   const q31_t * pSrc,
3673         q31_t * pDst,
3674         uint32_t blockSize);
3675 
3676   /**
3677    * @brief Processing function for the Q31 FIR decimator (fast variant) for RISC-V Core with DSP enabled.
3678    * @param[in]  S          points to an instance of the Q31 FIR decimator structure.
3679    * @param[in]  pSrc       points to the block of input data.
3680    * @param[out] pDst       points to the block of output data
3681    * @param[in]  blockSize  number of input samples to process per call.
3682    */
3683   void riscv_fir_decimate_fast_q31(
3684   const riscv_fir_decimate_instance_q31 * S,
3685   const q31_t * pSrc,
3686         q31_t * pDst,
3687         uint32_t blockSize);
3688 
3689 
3690   /**
3691    * @brief  Initialization function for the Q31 FIR decimator.
3692    * @param[in,out] S          points to an instance of the Q31 FIR decimator structure.
3693    * @param[in]     numTaps    number of coefficients in the filter.
3694    * @param[in]     M          decimation factor.
3695    * @param[in]     pCoeffs    points to the filter coefficients.
3696    * @param[in]     pState     points to the state buffer.
3697    * @param[in]     blockSize  number of input samples to process per call.
3698    * @return    The function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_LENGTH_ERROR if
3699    * <code>blockSize</code> is not a multiple of <code>M</code>.
3700    */
3701   riscv_status riscv_fir_decimate_init_q31(
3702         riscv_fir_decimate_instance_q31 * S,
3703         uint16_t numTaps,
3704         uint8_t M,
3705   const q31_t * pCoeffs,
3706         q31_t * pState,
3707         uint32_t blockSize);
3708 
3709 
3710   /**
3711    * @brief Instance structure for the Q15 FIR interpolator.
3712    */
3713   typedef struct
3714   {
3715         uint8_t L;                      /**< upsample factor. */
3716         uint16_t phaseLength;           /**< length of each polyphase filter component. */
3717   const q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3718         q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3719   } riscv_fir_interpolate_instance_q15;
3720 
3721   /**
3722    * @brief Instance structure for the Q31 FIR interpolator.
3723    */
3724   typedef struct
3725   {
3726         uint8_t L;                      /**< upsample factor. */
3727         uint16_t phaseLength;           /**< length of each polyphase filter component. */
3728   const q31_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3729         q31_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3730   } riscv_fir_interpolate_instance_q31;
3731 
3732   /**
3733    * @brief Instance structure for the floating-point FIR interpolator.
3734    */
3735   typedef struct
3736   {
3737         uint8_t L;                     /**< upsample factor. */
3738         uint16_t phaseLength;          /**< length of each polyphase filter component. */
3739   const float32_t *pCoeffs;            /**< points to the coefficient array. The array is of length L*phaseLength. */
3740         float32_t *pState;             /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
3741   } riscv_fir_interpolate_instance_f32;
3742 
3743 
3744   /**
3745    * @brief Processing function for the Q15 FIR interpolator.
3746    * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
3747    * @param[in]  pSrc       points to the block of input data.
3748    * @param[out] pDst       points to the block of output data.
3749    * @param[in]  blockSize  number of input samples to process per call.
3750    */
3751   void riscv_fir_interpolate_q15(
3752   const riscv_fir_interpolate_instance_q15 * S,
3753   const q15_t * pSrc,
3754         q15_t * pDst,
3755         uint32_t blockSize);
3756 
3757 
3758   /**
3759    * @brief  Initialization function for the Q15 FIR interpolator.
3760    * @param[in,out] S          points to an instance of the Q15 FIR interpolator structure.
3761    * @param[in]     L          upsample factor.
3762    * @param[in]     numTaps    number of filter coefficients in the filter.
3763    * @param[in]     pCoeffs    points to the filter coefficient buffer.
3764    * @param[in]     pState     points to the state buffer.
3765    * @param[in]     blockSize  number of input samples to process per call.
3766    * @return        The function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_LENGTH_ERROR if
3767    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3768    */
3769   riscv_status riscv_fir_interpolate_init_q15(
3770         riscv_fir_interpolate_instance_q15 * S,
3771         uint8_t L,
3772         uint16_t numTaps,
3773   const q15_t * pCoeffs,
3774         q15_t * pState,
3775         uint32_t blockSize);
3776 
3777 
3778   /**
3779    * @brief Processing function for the Q31 FIR interpolator.
3780    * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
3781    * @param[in]  pSrc       points to the block of input data.
3782    * @param[out] pDst       points to the block of output data.
3783    * @param[in]  blockSize  number of input samples to process per call.
3784    */
3785   void riscv_fir_interpolate_q31(
3786   const riscv_fir_interpolate_instance_q31 * S,
3787   const q31_t * pSrc,
3788         q31_t * pDst,
3789         uint32_t blockSize);
3790 
3791 
3792   /**
3793    * @brief  Initialization function for the Q31 FIR interpolator.
3794    * @param[in,out] S          points to an instance of the Q31 FIR interpolator structure.
3795    * @param[in]     L          upsample factor.
3796    * @param[in]     numTaps    number of filter coefficients in the filter.
3797    * @param[in]     pCoeffs    points to the filter coefficient buffer.
3798    * @param[in]     pState     points to the state buffer.
3799    * @param[in]     blockSize  number of input samples to process per call.
3800    * @return        The function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_LENGTH_ERROR if
3801    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3802    */
3803   riscv_status riscv_fir_interpolate_init_q31(
3804         riscv_fir_interpolate_instance_q31 * S,
3805         uint8_t L,
3806         uint16_t numTaps,
3807   const q31_t * pCoeffs,
3808         q31_t * pState,
3809         uint32_t blockSize);
3810 
3811 
3812   /**
3813    * @brief Processing function for the floating-point FIR interpolator.
3814    * @param[in]  S          points to an instance of the floating-point FIR interpolator structure.
3815    * @param[in]  pSrc       points to the block of input data.
3816    * @param[out] pDst       points to the block of output data.
3817    * @param[in]  blockSize  number of input samples to process per call.
3818    */
3819   void riscv_fir_interpolate_f32(
3820   const riscv_fir_interpolate_instance_f32 * S,
3821   const float32_t * pSrc,
3822         float32_t * pDst,
3823         uint32_t blockSize);
3824 
3825 
3826   /**
3827    * @brief  Initialization function for the floating-point FIR interpolator.
3828    * @param[in,out] S          points to an instance of the floating-point FIR interpolator structure.
3829    * @param[in]     L          upsample factor.
3830    * @param[in]     numTaps    number of filter coefficients in the filter.
3831    * @param[in]     pCoeffs    points to the filter coefficient buffer.
3832    * @param[in]     pState     points to the state buffer.
3833    * @param[in]     blockSize  number of input samples to process per call.
3834    * @return        The function returns RISCV_MATH_SUCCESS if initialization is successful or RISCV_MATH_LENGTH_ERROR if
3835    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3836    */
3837   riscv_status riscv_fir_interpolate_init_f32(
3838         riscv_fir_interpolate_instance_f32 * S,
3839         uint8_t L,
3840         uint16_t numTaps,
3841   const float32_t * pCoeffs,
3842         float32_t * pState,
3843         uint32_t blockSize);
3844 
3845 
3846   /**
3847    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
3848    */
3849   typedef struct
3850   {
3851           uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3852           q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3853     const q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
3854           uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
3855   } riscv_biquad_cas_df1_32x64_ins_q31;
3856 
3857 
3858   /**
3859    * @param[in]  S          points to an instance of the high precision Q31 Biquad cascade filter structure.
3860    * @param[in]  pSrc       points to the block of input data.
3861    * @param[out] pDst       points to the block of output data
3862    * @param[in]  blockSize  number of samples to process.
3863    */
3864   void riscv_biquad_cas_df1_32x64_q31(
3865   const riscv_biquad_cas_df1_32x64_ins_q31 * S,
3866         q31_t * pSrc,
3867         q31_t * pDst,
3868         uint32_t blockSize);
3869 
3870 
3871   /**
3872    * @param[in,out] S          points to an instance of the high precision Q31 Biquad cascade filter structure.
3873    * @param[in]     numStages  number of 2nd order stages in the filter.
3874    * @param[in]     pCoeffs    points to the filter coefficients.
3875    * @param[in]     pState     points to the state buffer.
3876    * @param[in]     postShift  shift to be applied to the output. Varies according to the coefficients format
3877    */
3878   void riscv_biquad_cas_df1_32x64_init_q31(
3879         riscv_biquad_cas_df1_32x64_ins_q31 * S,
3880         uint8_t numStages,
3881   const q31_t * pCoeffs,
3882         q63_t * pState,
3883         uint8_t postShift);
3884 
3885 
3886   /**
3887    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3888    */
3889   typedef struct
3890   {
3891           uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3892           float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3893     const float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3894   } riscv_biquad_cascade_df2T_instance_f32;
3895 
3896   /**
3897    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3898    */
3899   typedef struct
3900   {
3901           uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3902           float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3903     const float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3904   } riscv_biquad_cascade_stereo_df2T_instance_f32;
3905 
3906   /**
3907    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3908    */
3909   typedef struct
3910   {
3911           uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3912           float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3913           float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3914   } riscv_biquad_cascade_df2T_instance_f64;
3915 
3916 
3917   /**
3918    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3919    * @param[in]  S          points to an instance of the filter data structure.
3920    * @param[in]  pSrc       points to the block of input data.
3921    * @param[out] pDst       points to the block of output data
3922    * @param[in]  blockSize  number of samples to process.
3923    */
3924   void riscv_biquad_cascade_df2T_f32(
3925   const riscv_biquad_cascade_df2T_instance_f32 * S,
3926   const float32_t * pSrc,
3927         float32_t * pDst,
3928         uint32_t blockSize);
3929 
3930 
3931   /**
3932    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
3933    * @param[in]  S          points to an instance of the filter data structure.
3934    * @param[in]  pSrc       points to the block of input data.
3935    * @param[out] pDst       points to the block of output data
3936    * @param[in]  blockSize  number of samples to process.
3937    */
3938   void riscv_biquad_cascade_stereo_df2T_f32(
3939   const riscv_biquad_cascade_stereo_df2T_instance_f32 * S,
3940   const float32_t * pSrc,
3941         float32_t * pDst,
3942         uint32_t blockSize);
3943 
3944 
3945   /**
3946    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3947    * @param[in]  S          points to an instance of the filter data structure.
3948    * @param[in]  pSrc       points to the block of input data.
3949    * @param[out] pDst       points to the block of output data
3950    * @param[in]  blockSize  number of samples to process.
3951    */
3952   void riscv_biquad_cascade_df2T_f64(
3953   const riscv_biquad_cascade_df2T_instance_f64 * S,
3954         float64_t * pSrc,
3955         float64_t * pDst,
3956         uint32_t blockSize);
3957 
3958 
3959   /**
3960    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3961    * @param[in,out] S          points to an instance of the filter data structure.
3962    * @param[in]     numStages  number of 2nd order stages in the filter.
3963    * @param[in]     pCoeffs    points to the filter coefficients.
3964    * @param[in]     pState     points to the state buffer.
3965    */
3966   void riscv_biquad_cascade_df2T_init_f32(
3967         riscv_biquad_cascade_df2T_instance_f32 * S,
3968         uint8_t numStages,
3969   const float32_t * pCoeffs,
3970         float32_t * pState);
3971 
3972 
3973   /**
3974    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3975    * @param[in,out] S          points to an instance of the filter data structure.
3976    * @param[in]     numStages  number of 2nd order stages in the filter.
3977    * @param[in]     pCoeffs    points to the filter coefficients.
3978    * @param[in]     pState     points to the state buffer.
3979    */
3980   void riscv_biquad_cascade_stereo_df2T_init_f32(
3981         riscv_biquad_cascade_stereo_df2T_instance_f32 * S,
3982         uint8_t numStages,
3983   const float32_t * pCoeffs,
3984         float32_t * pState);
3985 
3986 
3987   /**
3988    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3989    * @param[in,out] S          points to an instance of the filter data structure.
3990    * @param[in]     numStages  number of 2nd order stages in the filter.
3991    * @param[in]     pCoeffs    points to the filter coefficients.
3992    * @param[in]     pState     points to the state buffer.
3993    */
3994   void riscv_biquad_cascade_df2T_init_f64(
3995         riscv_biquad_cascade_df2T_instance_f64 * S,
3996         uint8_t numStages,
3997         float64_t * pCoeffs,
3998         float64_t * pState);
3999 
4000 
4001   /**
4002    * @brief Instance structure for the Q15 FIR lattice filter.
4003    */
4004   typedef struct
4005   {
4006           uint16_t numStages;                  /**< number of filter stages. */
4007           q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
4008     const q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
4009   } riscv_fir_lattice_instance_q15;
4010 
4011   /**
4012    * @brief Instance structure for the Q31 FIR lattice filter.
4013    */
4014   typedef struct
4015   {
4016           uint16_t numStages;                  /**< number of filter stages. */
4017           q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
4018     const q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
4019   } riscv_fir_lattice_instance_q31;
4020 
4021   /**
4022    * @brief Instance structure for the floating-point FIR lattice filter.
4023    */
4024   typedef struct
4025   {
4026           uint16_t numStages;                  /**< number of filter stages. */
4027           float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
4028     const float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
4029   } riscv_fir_lattice_instance_f32;
4030 
4031 
4032   /**
4033    * @brief Initialization function for the Q15 FIR lattice filter.
4034    * @param[in] S          points to an instance of the Q15 FIR lattice structure.
4035    * @param[in] numStages  number of filter stages.
4036    * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
4037    * @param[in] pState     points to the state buffer.  The array is of length numStages.
4038    */
4039   void riscv_fir_lattice_init_q15(
4040         riscv_fir_lattice_instance_q15 * S,
4041         uint16_t numStages,
4042   const q15_t * pCoeffs,
4043         q15_t * pState);
4044 
4045 
4046   /**
4047    * @brief Processing function for the Q15 FIR lattice filter.
4048    * @param[in]  S          points to an instance of the Q15 FIR lattice structure.
4049    * @param[in]  pSrc       points to the block of input data.
4050    * @param[out] pDst       points to the block of output data.
4051    * @param[in]  blockSize  number of samples to process.
4052    */
4053   void riscv_fir_lattice_q15(
4054   const riscv_fir_lattice_instance_q15 * S,
4055   const q15_t * pSrc,
4056         q15_t * pDst,
4057         uint32_t blockSize);
4058 
4059 
4060   /**
4061    * @brief Initialization function for the Q31 FIR lattice filter.
4062    * @param[in] S          points to an instance of the Q31 FIR lattice structure.
4063    * @param[in] numStages  number of filter stages.
4064    * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
4065    * @param[in] pState     points to the state buffer.   The array is of length numStages.
4066    */
4067   void riscv_fir_lattice_init_q31(
4068         riscv_fir_lattice_instance_q31 * S,
4069         uint16_t numStages,
4070   const q31_t * pCoeffs,
4071         q31_t * pState);
4072 
4073 
4074   /**
4075    * @brief Processing function for the Q31 FIR lattice filter.
4076    * @param[in]  S          points to an instance of the Q31 FIR lattice structure.
4077    * @param[in]  pSrc       points to the block of input data.
4078    * @param[out] pDst       points to the block of output data
4079    * @param[in]  blockSize  number of samples to process.
4080    */
4081   void riscv_fir_lattice_q31(
4082   const riscv_fir_lattice_instance_q31 * S,
4083   const q31_t * pSrc,
4084         q31_t * pDst,
4085         uint32_t blockSize);
4086 
4087 
4088 /**
4089  * @brief Initialization function for the floating-point FIR lattice filter.
4090  * @param[in] S          points to an instance of the floating-point FIR lattice structure.
4091  * @param[in] numStages  number of filter stages.
4092  * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
4093  * @param[in] pState     points to the state buffer.  The array is of length numStages.
4094  */
4095   void riscv_fir_lattice_init_f32(
4096         riscv_fir_lattice_instance_f32 * S,
4097         uint16_t numStages,
4098   const float32_t * pCoeffs,
4099         float32_t * pState);
4100 
4101 
4102   /**
4103    * @brief Processing function for the floating-point FIR lattice filter.
4104    * @param[in]  S          points to an instance of the floating-point FIR lattice structure.
4105    * @param[in]  pSrc       points to the block of input data.
4106    * @param[out] pDst       points to the block of output data
4107    * @param[in]  blockSize  number of samples to process.
4108    */
4109   void riscv_fir_lattice_f32(
4110   const riscv_fir_lattice_instance_f32 * S,
4111   const float32_t * pSrc,
4112         float32_t * pDst,
4113         uint32_t blockSize);
4114 
4115 
4116   /**
4117    * @brief Instance structure for the Q15 IIR lattice filter.
4118    */
4119   typedef struct
4120   {
4121           uint16_t numStages;                  /**< number of stages in the filter. */
4122           q15_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
4123           q15_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
4124           q15_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
4125   } riscv_iir_lattice_instance_q15;
4126 
4127   /**
4128    * @brief Instance structure for the Q31 IIR lattice filter.
4129    */
4130   typedef struct
4131   {
4132           uint16_t numStages;                  /**< number of stages in the filter. */
4133           q31_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
4134           q31_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
4135           q31_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
4136   } riscv_iir_lattice_instance_q31;
4137 
4138   /**
4139    * @brief Instance structure for the floating-point IIR lattice filter.
4140    */
4141   typedef struct
4142   {
4143           uint16_t numStages;                  /**< number of stages in the filter. */
4144           float32_t *pState;                   /**< points to the state variable array. The array is of length numStages+blockSize. */
4145           float32_t *pkCoeffs;                 /**< points to the reflection coefficient array. The array is of length numStages. */
4146           float32_t *pvCoeffs;                 /**< points to the ladder coefficient array. The array is of length numStages+1. */
4147   } riscv_iir_lattice_instance_f32;
4148 
4149 
4150   /**
4151    * @brief Processing function for the floating-point IIR lattice filter.
4152    * @param[in]  S          points to an instance of the floating-point IIR lattice structure.
4153    * @param[in]  pSrc       points to the block of input data.
4154    * @param[out] pDst       points to the block of output data.
4155    * @param[in]  blockSize  number of samples to process.
4156    */
4157   void riscv_iir_lattice_f32(
4158   const riscv_iir_lattice_instance_f32 * S,
4159   const float32_t * pSrc,
4160         float32_t * pDst,
4161         uint32_t blockSize);
4162 
4163 
4164   /**
4165    * @brief Initialization function for the floating-point IIR lattice filter.
4166    * @param[in] S          points to an instance of the floating-point IIR lattice structure.
4167    * @param[in] numStages  number of stages in the filter.
4168    * @param[in] pkCoeffs   points to the reflection coefficient buffer.  The array is of length numStages.
4169    * @param[in] pvCoeffs   points to the ladder coefficient buffer.  The array is of length numStages+1.
4170    * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize-1.
4171    * @param[in] blockSize  number of samples to process.
4172    */
4173   void riscv_iir_lattice_init_f32(
4174         riscv_iir_lattice_instance_f32 * S,
4175         uint16_t numStages,
4176         float32_t * pkCoeffs,
4177         float32_t * pvCoeffs,
4178         float32_t * pState,
4179         uint32_t blockSize);
4180 
4181 
4182   /**
4183    * @brief Processing function for the Q31 IIR lattice filter.
4184    * @param[in]  S          points to an instance of the Q31 IIR lattice structure.
4185    * @param[in]  pSrc       points to the block of input data.
4186    * @param[out] pDst       points to the block of output data.
4187    * @param[in]  blockSize  number of samples to process.
4188    */
4189   void riscv_iir_lattice_q31(
4190   const riscv_iir_lattice_instance_q31 * S,
4191   const q31_t * pSrc,
4192         q31_t * pDst,
4193         uint32_t blockSize);
4194 
4195 
4196   /**
4197    * @brief Initialization function for the Q31 IIR lattice filter.
4198    * @param[in] S          points to an instance of the Q31 IIR lattice structure.
4199    * @param[in] numStages  number of stages in the filter.
4200    * @param[in] pkCoeffs   points to the reflection coefficient buffer.  The array is of length numStages.
4201    * @param[in] pvCoeffs   points to the ladder coefficient buffer.  The array is of length numStages+1.
4202    * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize.
4203    * @param[in] blockSize  number of samples to process.
4204    */
4205   void riscv_iir_lattice_init_q31(
4206         riscv_iir_lattice_instance_q31 * S,
4207         uint16_t numStages,
4208         q31_t * pkCoeffs,
4209         q31_t * pvCoeffs,
4210         q31_t * pState,
4211         uint32_t blockSize);
4212 
4213 
4214   /**
4215    * @brief Processing function for the Q15 IIR lattice filter.
4216    * @param[in]  S          points to an instance of the Q15 IIR lattice structure.
4217    * @param[in]  pSrc       points to the block of input data.
4218    * @param[out] pDst       points to the block of output data.
4219    * @param[in]  blockSize  number of samples to process.
4220    */
4221   void riscv_iir_lattice_q15(
4222   const riscv_iir_lattice_instance_q15 * S,
4223   const q15_t * pSrc,
4224         q15_t * pDst,
4225         uint32_t blockSize);
4226 
4227 
4228 /**
4229  * @brief Initialization function for the Q15 IIR lattice filter.
4230  * @param[in] S          points to an instance of the fixed-point Q15 IIR lattice structure.
4231  * @param[in] numStages  number of stages in the filter.
4232  * @param[in] pkCoeffs   points to reflection coefficient buffer.  The array is of length numStages.
4233  * @param[in] pvCoeffs   points to ladder coefficient buffer.  The array is of length numStages+1.
4234  * @param[in] pState     points to state buffer.  The array is of length numStages+blockSize.
4235  * @param[in] blockSize  number of samples to process per call.
4236  */
4237   void riscv_iir_lattice_init_q15(
4238         riscv_iir_lattice_instance_q15 * S,
4239         uint16_t numStages,
4240         q15_t * pkCoeffs,
4241         q15_t * pvCoeffs,
4242         q15_t * pState,
4243         uint32_t blockSize);
4244 
4245 
4246   /**
4247    * @brief Instance structure for the floating-point LMS filter.
4248    */
4249   typedef struct
4250   {
4251           uint16_t numTaps;    /**< number of coefficients in the filter. */
4252           float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4253           float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
4254           float32_t mu;        /**< step size that controls filter coefficient updates. */
4255   } riscv_lms_instance_f32;
4256 
4257 
4258   /**
4259    * @brief Processing function for floating-point LMS filter.
4260    * @param[in]  S          points to an instance of the floating-point LMS filter structure.
4261    * @param[in]  pSrc       points to the block of input data.
4262    * @param[in]  pRef       points to the block of reference data.
4263    * @param[out] pOut       points to the block of output data.
4264    * @param[out] pErr       points to the block of error data.
4265    * @param[in]  blockSize  number of samples to process.
4266    */
4267   void riscv_lms_f32(
4268   const riscv_lms_instance_f32 * S,
4269   const float32_t * pSrc,
4270         float32_t * pRef,
4271         float32_t * pOut,
4272         float32_t * pErr,
4273         uint32_t blockSize);
4274 
4275 
4276   /**
4277    * @brief Initialization function for floating-point LMS filter.
4278    * @param[in] S          points to an instance of the floating-point LMS filter structure.
4279    * @param[in] numTaps    number of filter coefficients.
4280    * @param[in] pCoeffs    points to the coefficient buffer.
4281    * @param[in] pState     points to state buffer.
4282    * @param[in] mu         step size that controls filter coefficient updates.
4283    * @param[in] blockSize  number of samples to process.
4284    */
4285   void riscv_lms_init_f32(
4286         riscv_lms_instance_f32 * S,
4287         uint16_t numTaps,
4288         float32_t * pCoeffs,
4289         float32_t * pState,
4290         float32_t mu,
4291         uint32_t blockSize);
4292 
4293 
4294   /**
4295    * @brief Instance structure for the Q15 LMS filter.
4296    */
4297   typedef struct
4298   {
4299           uint16_t numTaps;    /**< number of coefficients in the filter. */
4300           q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4301           q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4302           q15_t mu;            /**< step size that controls filter coefficient updates. */
4303           uint32_t postShift;  /**< bit shift applied to coefficients. */
4304   } riscv_lms_instance_q15;
4305 
4306 
4307   /**
4308    * @brief Initialization function for the Q15 LMS filter.
4309    * @param[in] S          points to an instance of the Q15 LMS filter structure.
4310    * @param[in] numTaps    number of filter coefficients.
4311    * @param[in] pCoeffs    points to the coefficient buffer.
4312    * @param[in] pState     points to the state buffer.
4313    * @param[in] mu         step size that controls filter coefficient updates.
4314    * @param[in] blockSize  number of samples to process.
4315    * @param[in] postShift  bit shift applied to coefficients.
4316    */
4317   void riscv_lms_init_q15(
4318         riscv_lms_instance_q15 * S,
4319         uint16_t numTaps,
4320         q15_t * pCoeffs,
4321         q15_t * pState,
4322         q15_t mu,
4323         uint32_t blockSize,
4324         uint32_t postShift);
4325 
4326 
4327   /**
4328    * @brief Processing function for Q15 LMS filter.
4329    * @param[in]  S          points to an instance of the Q15 LMS filter structure.
4330    * @param[in]  pSrc       points to the block of input data.
4331    * @param[in]  pRef       points to the block of reference data.
4332    * @param[out] pOut       points to the block of output data.
4333    * @param[out] pErr       points to the block of error data.
4334    * @param[in]  blockSize  number of samples to process.
4335    */
4336   void riscv_lms_q15(
4337   const riscv_lms_instance_q15 * S,
4338   const q15_t * pSrc,
4339         q15_t * pRef,
4340         q15_t * pOut,
4341         q15_t * pErr,
4342         uint32_t blockSize);
4343 
4344 
4345   /**
4346    * @brief Instance structure for the Q31 LMS filter.
4347    */
4348   typedef struct
4349   {
4350           uint16_t numTaps;    /**< number of coefficients in the filter. */
4351           q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4352           q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4353           q31_t mu;            /**< step size that controls filter coefficient updates. */
4354           uint32_t postShift;  /**< bit shift applied to coefficients. */
4355   } riscv_lms_instance_q31;
4356 
4357 
4358   /**
4359    * @brief Processing function for Q31 LMS filter.
4360    * @param[in]  S          points to an instance of the Q15 LMS filter structure.
4361    * @param[in]  pSrc       points to the block of input data.
4362    * @param[in]  pRef       points to the block of reference data.
4363    * @param[out] pOut       points to the block of output data.
4364    * @param[out] pErr       points to the block of error data.
4365    * @param[in]  blockSize  number of samples to process.
4366    */
4367   void riscv_lms_q31(
4368   const riscv_lms_instance_q31 * S,
4369   const q31_t * pSrc,
4370         q31_t * pRef,
4371         q31_t * pOut,
4372         q31_t * pErr,
4373         uint32_t blockSize);
4374 
4375 
4376   /**
4377    * @brief Initialization function for Q31 LMS filter.
4378    * @param[in] S          points to an instance of the Q31 LMS filter structure.
4379    * @param[in] numTaps    number of filter coefficients.
4380    * @param[in] pCoeffs    points to coefficient buffer.
4381    * @param[in] pState     points to state buffer.
4382    * @param[in] mu         step size that controls filter coefficient updates.
4383    * @param[in] blockSize  number of samples to process.
4384    * @param[in] postShift  bit shift applied to coefficients.
4385    */
4386   void riscv_lms_init_q31(
4387         riscv_lms_instance_q31 * S,
4388         uint16_t numTaps,
4389         q31_t * pCoeffs,
4390         q31_t * pState,
4391         q31_t mu,
4392         uint32_t blockSize,
4393         uint32_t postShift);
4394 
4395 
4396   /**
4397    * @brief Instance structure for the floating-point normalized LMS filter.
4398    */
4399   typedef struct
4400   {
4401           uint16_t numTaps;     /**< number of coefficients in the filter. */
4402           float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4403           float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
4404           float32_t mu;         /**< step size that control filter coefficient updates. */
4405           float32_t energy;     /**< saves previous frame energy. */
4406           float32_t x0;         /**< saves previous input sample. */
4407   } riscv_lms_norm_instance_f32;
4408 
4409 
4410   /**
4411    * @brief Processing function for floating-point normalized LMS filter.
4412    * @param[in]  S          points to an instance of the floating-point normalized LMS filter structure.
4413    * @param[in]  pSrc       points to the block of input data.
4414    * @param[in]  pRef       points to the block of reference data.
4415    * @param[out] pOut       points to the block of output data.
4416    * @param[out] pErr       points to the block of error data.
4417    * @param[in]  blockSize  number of samples to process.
4418    */
4419   void riscv_lms_norm_f32(
4420         riscv_lms_norm_instance_f32 * S,
4421   const float32_t * pSrc,
4422         float32_t * pRef,
4423         float32_t * pOut,
4424         float32_t * pErr,
4425         uint32_t blockSize);
4426 
4427 
4428   /**
4429    * @brief Initialization function for floating-point normalized LMS filter.
4430    * @param[in] S          points to an instance of the floating-point LMS filter structure.
4431    * @param[in] numTaps    number of filter coefficients.
4432    * @param[in] pCoeffs    points to coefficient buffer.
4433    * @param[in] pState     points to state buffer.
4434    * @param[in] mu         step size that controls filter coefficient updates.
4435    * @param[in] blockSize  number of samples to process.
4436    */
4437   void riscv_lms_norm_init_f32(
4438         riscv_lms_norm_instance_f32 * S,
4439         uint16_t numTaps,
4440         float32_t * pCoeffs,
4441         float32_t * pState,
4442         float32_t mu,
4443         uint32_t blockSize);
4444 
4445 
4446   /**
4447    * @brief Instance structure for the Q31 normalized LMS filter.
4448    */
4449   typedef struct
4450   {
4451           uint16_t numTaps;     /**< number of coefficients in the filter. */
4452           q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4453           q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4454           q31_t mu;             /**< step size that controls filter coefficient updates. */
4455           uint8_t postShift;    /**< bit shift applied to coefficients. */
4456     const q31_t *recipTable;    /**< points to the reciprocal initial value table. */
4457           q31_t energy;         /**< saves previous frame energy. */
4458           q31_t x0;             /**< saves previous input sample. */
4459   } riscv_lms_norm_instance_q31;
4460 
4461 
4462   /**
4463    * @brief Processing function for Q31 normalized LMS filter.
4464    * @param[in]  S          points to an instance of the Q31 normalized LMS filter structure.
4465    * @param[in]  pSrc       points to the block of input data.
4466    * @param[in]  pRef       points to the block of reference data.
4467    * @param[out] pOut       points to the block of output data.
4468    * @param[out] pErr       points to the block of error data.
4469    * @param[in]  blockSize  number of samples to process.
4470    */
4471   void riscv_lms_norm_q31(
4472         riscv_lms_norm_instance_q31 * S,
4473   const q31_t * pSrc,
4474         q31_t * pRef,
4475         q31_t * pOut,
4476         q31_t * pErr,
4477         uint32_t blockSize);
4478 
4479 
4480   /**
4481    * @brief Initialization function for Q31 normalized LMS filter.
4482    * @param[in] S          points to an instance of the Q31 normalized LMS filter structure.
4483    * @param[in] numTaps    number of filter coefficients.
4484    * @param[in] pCoeffs    points to coefficient buffer.
4485    * @param[in] pState     points to state buffer.
4486    * @param[in] mu         step size that controls filter coefficient updates.
4487    * @param[in] blockSize  number of samples to process.
4488    * @param[in] postShift  bit shift applied to coefficients.
4489    */
4490   void riscv_lms_norm_init_q31(
4491         riscv_lms_norm_instance_q31 * S,
4492         uint16_t numTaps,
4493         q31_t * pCoeffs,
4494         q31_t * pState,
4495         q31_t mu,
4496         uint32_t blockSize,
4497         uint8_t postShift);
4498 
4499 
4500   /**
4501    * @brief Instance structure for the Q15 normalized LMS filter.
4502    */
4503   typedef struct
4504   {
4505           uint16_t numTaps;     /**< Number of coefficients in the filter. */
4506           q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4507           q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4508           q15_t mu;             /**< step size that controls filter coefficient updates. */
4509           uint8_t postShift;    /**< bit shift applied to coefficients. */
4510     const q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
4511           q15_t energy;         /**< saves previous frame energy. */
4512           q15_t x0;             /**< saves previous input sample. */
4513   } riscv_lms_norm_instance_q15;
4514 
4515 
4516   /**
4517    * @brief Processing function for Q15 normalized LMS filter.
4518    * @param[in]  S          points to an instance of the Q15 normalized LMS filter structure.
4519    * @param[in]  pSrc       points to the block of input data.
4520    * @param[in]  pRef       points to the block of reference data.
4521    * @param[out] pOut       points to the block of output data.
4522    * @param[out] pErr       points to the block of error data.
4523    * @param[in]  blockSize  number of samples to process.
4524    */
4525   void riscv_lms_norm_q15(
4526         riscv_lms_norm_instance_q15 * S,
4527   const q15_t * pSrc,
4528         q15_t * pRef,
4529         q15_t * pOut,
4530         q15_t * pErr,
4531         uint32_t blockSize);
4532 
4533 
4534   /**
4535    * @brief Initialization function for Q15 normalized LMS filter.
4536    * @param[in] S          points to an instance of the Q15 normalized LMS filter structure.
4537    * @param[in] numTaps    number of filter coefficients.
4538    * @param[in] pCoeffs    points to coefficient buffer.
4539    * @param[in] pState     points to state buffer.
4540    * @param[in] mu         step size that controls filter coefficient updates.
4541    * @param[in] blockSize  number of samples to process.
4542    * @param[in] postShift  bit shift applied to coefficients.
4543    */
4544   void riscv_lms_norm_init_q15(
4545         riscv_lms_norm_instance_q15 * S,
4546         uint16_t numTaps,
4547         q15_t * pCoeffs,
4548         q15_t * pState,
4549         q15_t mu,
4550         uint32_t blockSize,
4551         uint8_t postShift);
4552 
4553 
4554   /**
4555    * @brief Correlation of floating-point sequences.
4556    * @param[in]  pSrcA    points to the first input sequence.
4557    * @param[in]  srcALen  length of the first input sequence.
4558    * @param[in]  pSrcB    points to the second input sequence.
4559    * @param[in]  srcBLen  length of the second input sequence.
4560    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4561    */
4562   void riscv_correlate_f32(
4563   const float32_t * pSrcA,
4564         uint32_t srcALen,
4565   const float32_t * pSrcB,
4566         uint32_t srcBLen,
4567         float32_t * pDst);
4568 
4569 
4570 /**
4571  @brief Correlation of Q15 sequences
4572  @param[in]  pSrcA     points to the first input sequence
4573  @param[in]  srcALen   length of the first input sequence
4574  @param[in]  pSrcB     points to the second input sequence
4575  @param[in]  srcBLen   length of the second input sequence
4576  @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4577  @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4578 */
4579 void riscv_correlate_opt_q15(
4580   const q15_t * pSrcA,
4581         uint32_t srcALen,
4582   const q15_t * pSrcB,
4583         uint32_t srcBLen,
4584         q15_t * pDst,
4585         q15_t * pScratch);
4586 
4587 
4588 /**
4589   @brief Correlation of Q15 sequences.
4590   @param[in]  pSrcA    points to the first input sequence
4591   @param[in]  srcALen  length of the first input sequence
4592   @param[in]  pSrcB    points to the second input sequence
4593   @param[in]  srcBLen  length of the second input sequence
4594   @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4595  */
4596   void riscv_correlate_q15(
4597   const q15_t * pSrcA,
4598         uint32_t srcALen,
4599   const q15_t * pSrcB,
4600         uint32_t srcBLen,
4601         q15_t * pDst);
4602 
4603 
4604 /**
4605   @brief         Correlation of Q15 sequences (fast version).
4606   @param[in]     pSrcA      points to the first input sequence
4607   @param[in]     srcALen    length of the first input sequence
4608   @param[in]     pSrcB      points to the second input sequence
4609   @param[in]     srcBLen    length of the second input sequence
4610   @param[out]    pDst       points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.
4611   @return        none
4612  */
4613 void riscv_correlate_fast_q15(
4614   const q15_t * pSrcA,
4615         uint32_t srcALen,
4616   const q15_t * pSrcB,
4617         uint32_t srcBLen,
4618         q15_t * pDst);
4619 
4620 /**
4621   @brief Correlation of Q15 sequences (fast version).
4622   @param[in]  pSrcA     points to the first input sequence.
4623   @param[in]  srcALen   length of the first input sequence.
4624   @param[in]  pSrcB     points to the second input sequence.
4625   @param[in]  srcBLen   length of the second input sequence.
4626   @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4627   @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4628  */
4629 void riscv_correlate_fast_opt_q15(
4630   const q15_t * pSrcA,
4631         uint32_t srcALen,
4632   const q15_t * pSrcB,
4633         uint32_t srcBLen,
4634         q15_t * pDst,
4635         q15_t * pScratch);
4636 
4637 
4638   /**
4639    * @brief Correlation of Q31 sequences.
4640    * @param[in]  pSrcA    points to the first input sequence.
4641    * @param[in]  srcALen  length of the first input sequence.
4642    * @param[in]  pSrcB    points to the second input sequence.
4643    * @param[in]  srcBLen  length of the second input sequence.
4644    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4645    */
4646   void riscv_correlate_q31(
4647   const q31_t * pSrcA,
4648         uint32_t srcALen,
4649   const q31_t * pSrcB,
4650         uint32_t srcBLen,
4651         q31_t * pDst);
4652 
4653 
4654 /**
4655   @brief Correlation of Q31 sequences (fast version).
4656   @param[in]  pSrcA    points to the first input sequence
4657   @param[in]  srcALen  length of the first input sequence
4658   @param[in]  pSrcB    points to the second input sequence
4659   @param[in]  srcBLen  length of the second input sequence
4660   @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4661  */
4662 void riscv_correlate_fast_q31(
4663   const q31_t * pSrcA,
4664         uint32_t srcALen,
4665   const q31_t * pSrcB,
4666         uint32_t srcBLen,
4667         q31_t * pDst);
4668 
4669 
4670  /**
4671    * @brief Correlation of Q7 sequences.
4672    * @param[in]  pSrcA      points to the first input sequence.
4673    * @param[in]  srcALen    length of the first input sequence.
4674    * @param[in]  pSrcB      points to the second input sequence.
4675    * @param[in]  srcBLen    length of the second input sequence.
4676    * @param[out] pDst       points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4677    * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4678    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
4679    */
4680   void riscv_correlate_opt_q7(
4681   const q7_t * pSrcA,
4682         uint32_t srcALen,
4683   const q7_t * pSrcB,
4684         uint32_t srcBLen,
4685         q7_t * pDst,
4686         q15_t * pScratch1,
4687         q15_t * pScratch2);
4688 
4689 
4690   /**
4691    * @brief Correlation of Q7 sequences.
4692    * @param[in]  pSrcA    points to the first input sequence.
4693    * @param[in]  srcALen  length of the first input sequence.
4694    * @param[in]  pSrcB    points to the second input sequence.
4695    * @param[in]  srcBLen  length of the second input sequence.
4696    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4697    */
4698   void riscv_correlate_q7(
4699   const q7_t * pSrcA,
4700         uint32_t srcALen,
4701   const q7_t * pSrcB,
4702         uint32_t srcBLen,
4703         q7_t * pDst);
4704 
4705 
4706   /**
4707    * @brief Instance structure for the floating-point sparse FIR filter.
4708    */
4709   typedef struct
4710   {
4711           uint16_t numTaps;             /**< number of coefficients in the filter. */
4712           uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4713           float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4714     const float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
4715           uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4716           int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4717   } riscv_fir_sparse_instance_f32;
4718 
4719   /**
4720    * @brief Instance structure for the Q31 sparse FIR filter.
4721    */
4722   typedef struct
4723   {
4724           uint16_t numTaps;             /**< number of coefficients in the filter. */
4725           uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4726           q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4727     const q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4728           uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4729           int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4730   } riscv_fir_sparse_instance_q31;
4731 
4732   /**
4733    * @brief Instance structure for the Q15 sparse FIR filter.
4734    */
4735   typedef struct
4736   {
4737           uint16_t numTaps;             /**< number of coefficients in the filter. */
4738           uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4739           q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4740     const q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4741           uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4742           int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4743   } riscv_fir_sparse_instance_q15;
4744 
4745   /**
4746    * @brief Instance structure for the Q7 sparse FIR filter.
4747    */
4748   typedef struct
4749   {
4750           uint16_t numTaps;             /**< number of coefficients in the filter. */
4751           uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4752           q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4753     const q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
4754           uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4755           int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4756   } riscv_fir_sparse_instance_q7;
4757 
4758 
4759   /**
4760    * @brief Processing function for the floating-point sparse FIR filter.
4761    * @param[in]  S           points to an instance of the floating-point sparse FIR structure.
4762    * @param[in]  pSrc        points to the block of input data.
4763    * @param[out] pDst        points to the block of output data
4764    * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
4765    * @param[in]  blockSize   number of input samples to process per call.
4766    */
4767   void riscv_fir_sparse_f32(
4768         riscv_fir_sparse_instance_f32 * S,
4769   const float32_t * pSrc,
4770         float32_t * pDst,
4771         float32_t * pScratchIn,
4772         uint32_t blockSize);
4773 
4774 
4775   /**
4776    * @brief  Initialization function for the floating-point sparse FIR filter.
4777    * @param[in,out] S          points to an instance of the floating-point sparse FIR structure.
4778    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4779    * @param[in]     pCoeffs    points to the array of filter coefficients.
4780    * @param[in]     pState     points to the state buffer.
4781    * @param[in]     pTapDelay  points to the array of offset times.
4782    * @param[in]     maxDelay   maximum offset time supported.
4783    * @param[in]     blockSize  number of samples that will be processed per block.
4784    */
4785   void riscv_fir_sparse_init_f32(
4786         riscv_fir_sparse_instance_f32 * S,
4787         uint16_t numTaps,
4788   const float32_t * pCoeffs,
4789         float32_t * pState,
4790         int32_t * pTapDelay,
4791         uint16_t maxDelay,
4792         uint32_t blockSize);
4793 
4794 
4795   /**
4796    * @brief Processing function for the Q31 sparse FIR filter.
4797    * @param[in]  S           points to an instance of the Q31 sparse FIR structure.
4798    * @param[in]  pSrc        points to the block of input data.
4799    * @param[out] pDst        points to the block of output data
4800    * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
4801    * @param[in]  blockSize   number of input samples to process per call.
4802    */
4803   void riscv_fir_sparse_q31(
4804         riscv_fir_sparse_instance_q31 * S,
4805   const q31_t * pSrc,
4806         q31_t * pDst,
4807         q31_t * pScratchIn,
4808         uint32_t blockSize);
4809 
4810 
4811   /**
4812    * @brief  Initialization function for the Q31 sparse FIR filter.
4813    * @param[in,out] S          points to an instance of the Q31 sparse FIR structure.
4814    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4815    * @param[in]     pCoeffs    points to the array of filter coefficients.
4816    * @param[in]     pState     points to the state buffer.
4817    * @param[in]     pTapDelay  points to the array of offset times.
4818    * @param[in]     maxDelay   maximum offset time supported.
4819    * @param[in]     blockSize  number of samples that will be processed per block.
4820    */
4821   void riscv_fir_sparse_init_q31(
4822         riscv_fir_sparse_instance_q31 * S,
4823         uint16_t numTaps,
4824   const q31_t * pCoeffs,
4825         q31_t * pState,
4826         int32_t * pTapDelay,
4827         uint16_t maxDelay,
4828         uint32_t blockSize);
4829 
4830 
4831   /**
4832    * @brief Processing function for the Q15 sparse FIR filter.
4833    * @param[in]  S            points to an instance of the Q15 sparse FIR structure.
4834    * @param[in]  pSrc         points to the block of input data.
4835    * @param[out] pDst         points to the block of output data
4836    * @param[in]  pScratchIn   points to a temporary buffer of size blockSize.
4837    * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
4838    * @param[in]  blockSize    number of input samples to process per call.
4839    */
4840   void riscv_fir_sparse_q15(
4841         riscv_fir_sparse_instance_q15 * S,
4842   const q15_t * pSrc,
4843         q15_t * pDst,
4844         q15_t * pScratchIn,
4845         q31_t * pScratchOut,
4846         uint32_t blockSize);
4847 
4848 
4849   /**
4850    * @brief  Initialization function for the Q15 sparse FIR filter.
4851    * @param[in,out] S          points to an instance of the Q15 sparse FIR structure.
4852    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4853    * @param[in]     pCoeffs    points to the array of filter coefficients.
4854    * @param[in]     pState     points to the state buffer.
4855    * @param[in]     pTapDelay  points to the array of offset times.
4856    * @param[in]     maxDelay   maximum offset time supported.
4857    * @param[in]     blockSize  number of samples that will be processed per block.
4858    */
4859   void riscv_fir_sparse_init_q15(
4860         riscv_fir_sparse_instance_q15 * S,
4861         uint16_t numTaps,
4862   const q15_t * pCoeffs,
4863         q15_t * pState,
4864         int32_t * pTapDelay,
4865         uint16_t maxDelay,
4866         uint32_t blockSize);
4867 
4868 
4869   /**
4870    * @brief Processing function for the Q7 sparse FIR filter.
4871    * @param[in]  S            points to an instance of the Q7 sparse FIR structure.
4872    * @param[in]  pSrc         points to the block of input data.
4873    * @param[out] pDst         points to the block of output data
4874    * @param[in]  pScratchIn   points to a temporary buffer of size blockSize.
4875    * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
4876    * @param[in]  blockSize    number of input samples to process per call.
4877    */
4878   void riscv_fir_sparse_q7(
4879         riscv_fir_sparse_instance_q7 * S,
4880   const q7_t * pSrc,
4881         q7_t * pDst,
4882         q7_t * pScratchIn,
4883         q31_t * pScratchOut,
4884         uint32_t blockSize);
4885 
4886 
4887   /**
4888    * @brief  Initialization function for the Q7 sparse FIR filter.
4889    * @param[in,out] S          points to an instance of the Q7 sparse FIR structure.
4890    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4891    * @param[in]     pCoeffs    points to the array of filter coefficients.
4892    * @param[in]     pState     points to the state buffer.
4893    * @param[in]     pTapDelay  points to the array of offset times.
4894    * @param[in]     maxDelay   maximum offset time supported.
4895    * @param[in]     blockSize  number of samples that will be processed per block.
4896    */
4897   void riscv_fir_sparse_init_q7(
4898         riscv_fir_sparse_instance_q7 * S,
4899         uint16_t numTaps,
4900   const q7_t * pCoeffs,
4901         q7_t * pState,
4902         int32_t * pTapDelay,
4903         uint16_t maxDelay,
4904         uint32_t blockSize);
4905 
4906 
4907   /**
4908    * @brief  Floating-point sin_cos function.
4909    * @param[in]  theta   input value in degrees
4910    * @param[out] pSinVal  points to the processed sine output.
4911    * @param[out] pCosVal  points to the processed cos output.
4912    */
4913   void riscv_sin_cos_f32(
4914         float32_t theta,
4915         float32_t * pSinVal,
4916         float32_t * pCosVal);
4917 
4918 
4919   /**
4920    * @brief  Q31 sin_cos function.
4921    * @param[in]  theta    scaled input value in degrees
4922    * @param[out] pSinVal  points to the processed sine output.
4923    * @param[out] pCosVal  points to the processed cosine output.
4924    */
4925   void riscv_sin_cos_q31(
4926         q31_t theta,
4927         q31_t * pSinVal,
4928         q31_t * pCosVal);
4929 
4930 
4931   /**
4932    * @brief  Floating-point complex conjugate.
4933    * @param[in]  pSrc        points to the input vector
4934    * @param[out] pDst        points to the output vector
4935    * @param[in]  numSamples  number of complex samples in each vector
4936    */
4937   void riscv_cmplx_conj_f32(
4938   const float32_t * pSrc,
4939         float32_t * pDst,
4940         uint32_t numSamples);
4941 
4942   /**
4943    * @brief  Q31 complex conjugate.
4944    * @param[in]  pSrc        points to the input vector
4945    * @param[out] pDst        points to the output vector
4946    * @param[in]  numSamples  number of complex samples in each vector
4947    */
4948   void riscv_cmplx_conj_q31(
4949   const q31_t * pSrc,
4950         q31_t * pDst,
4951         uint32_t numSamples);
4952 
4953 
4954   /**
4955    * @brief  Q15 complex conjugate.
4956    * @param[in]  pSrc        points to the input vector
4957    * @param[out] pDst        points to the output vector
4958    * @param[in]  numSamples  number of complex samples in each vector
4959    */
4960   void riscv_cmplx_conj_q15(
4961   const q15_t * pSrc,
4962         q15_t * pDst,
4963         uint32_t numSamples);
4964 
4965 
4966   /**
4967    * @brief  Floating-point complex magnitude squared
4968    * @param[in]  pSrc        points to the complex input vector
4969    * @param[out] pDst        points to the real output vector
4970    * @param[in]  numSamples  number of complex samples in the input vector
4971    */
4972   void riscv_cmplx_mag_squared_f32(
4973   const float32_t * pSrc,
4974         float32_t * pDst,
4975         uint32_t numSamples);
4976 
4977 
4978   /**
4979    * @brief  Q31 complex magnitude squared
4980    * @param[in]  pSrc        points to the complex input vector
4981    * @param[out] pDst        points to the real output vector
4982    * @param[in]  numSamples  number of complex samples in the input vector
4983    */
4984   void riscv_cmplx_mag_squared_q31(
4985   const q31_t * pSrc,
4986         q31_t * pDst,
4987         uint32_t numSamples);
4988 
4989 
4990   /**
4991    * @brief  Q15 complex magnitude squared
4992    * @param[in]  pSrc        points to the complex input vector
4993    * @param[out] pDst        points to the real output vector
4994    * @param[in]  numSamples  number of complex samples in the input vector
4995    */
4996   void riscv_cmplx_mag_squared_q15(
4997   const q15_t * pSrc,
4998         q15_t * pDst,
4999         uint32_t numSamples);
5000 
5001 
5002  /**
5003    * @ingroup groupController
5004    */
5005 
5006   /**
5007    * @defgroup PID PID Motor Control
5008    *
5009    * A Proportional Integral Derivative (PID) controller is a generic feedback control
5010    * loop mechanism widely used in industrial control systems.
5011    * A PID controller is the most commonly used type of feedback controller.
5012    *
5013    * This set of functions implements (PID) controllers
5014    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
5015    * of data and each call to the function returns a single processed value.
5016    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
5017    * is the input sample value. The functions return the output value.
5018    *
5019    * \par Algorithm:
5020    * <pre>
5021    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
5022    *    A0 = Kp + Ki + Kd
5023    *    A1 = (-Kp ) - (2 * Kd )
5024    *    A2 = Kd
5025    * </pre>
5026    *
5027    * \par
5028    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
5029    *
5030    * \par
5031    * \image html PID.png "Proportional Integral Derivative Controller"
5032    *
5033    * \par
5034    * The PID controller calculates an "error" value as the difference between
5035    * the measured output and the reference input.
5036    * The controller attempts to minimize the error by adjusting the process control inputs.
5037    * The proportional value determines the reaction to the current error,
5038    * the integral value determines the reaction based on the sum of recent errors,
5039    * and the derivative value determines the reaction based on the rate at which the error has been changing.
5040    *
5041    * \par Instance Structure
5042    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
5043    * A separate instance structure must be defined for each PID Controller.
5044    * There are separate instance structure declarations for each of the 3 supported data types.
5045    *
5046    * \par Reset Functions
5047    * There is also an associated reset function for each data type which clears the state array.
5048    *
5049    * \par Initialization Functions
5050    * There is also an associated initialization function for each data type.
5051    * The initialization function performs the following operations:
5052    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
5053    * - Zeros out the values in the state buffer.
5054    *
5055    * \par
5056    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
5057    *
5058    * \par Fixed-Point Behavior
5059    * Care must be taken when using the fixed-point versions of the PID Controller functions.
5060    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
5061    * Refer to the function specific documentation below for usage guidelines.
5062    */
5063 
5064   /**
5065    * @addtogroup PID
5066    * @{
5067    */
5068 
5069   /**
5070    * @brief         Process function for the floating-point PID Control.
5071    * @param[in,out] S   is an instance of the floating-point PID Control structure
5072    * @param[in]     in  input sample to process
5073    * @return        processed output sample.
5074    */
riscv_pid_f32(riscv_pid_instance_f32 * S,float32_t in)5075   __STATIC_FORCEINLINE float32_t riscv_pid_f32(
5076   riscv_pid_instance_f32 * S,
5077   float32_t in)
5078   {
5079     float32_t out;
5080 
5081     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
5082     out = (S->A0 * in) +
5083       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
5084 
5085     /* Update state */
5086     S->state[1] = S->state[0];
5087     S->state[0] = in;
5088     S->state[2] = out;
5089 
5090     /* return to application */
5091     return (out);
5092 
5093   }
5094 
5095 /**
5096   @brief         Process function for the Q31 PID Control.
5097   @param[in,out] S  points to an instance of the Q31 PID Control structure
5098   @param[in]     in  input sample to process
5099   @return        processed output sample.
5100 
5101   \par Scaling and Overflow Behavior
5102          The function is implemented using an internal 64-bit accumulator.
5103          The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
5104          Thus, if the accumulator result overflows it wraps around rather than clip.
5105          In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
5106          After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
5107  */
riscv_pid_q31(riscv_pid_instance_q31 * S,q31_t in)5108 __STATIC_FORCEINLINE q31_t riscv_pid_q31(
5109   riscv_pid_instance_q31 * S,
5110   q31_t in)
5111   {
5112     q63_t acc;
5113     q31_t out;
5114 
5115     /* acc = A0 * x[n]  */
5116     acc = (q63_t) S->A0 * in;
5117 
5118     /* acc += A1 * x[n-1] */
5119     acc += (q63_t) S->A1 * S->state[0];
5120 
5121     /* acc += A2 * x[n-2]  */
5122     acc += (q63_t) S->A2 * S->state[1];
5123 
5124     /* convert output to 1.31 format to add y[n-1] */
5125     out = (q31_t) (acc >> 31U);
5126 
5127     /* out += y[n-1] */
5128     out += S->state[2];
5129 
5130     /* Update state */
5131     S->state[1] = S->state[0];
5132     S->state[0] = in;
5133     S->state[2] = out;
5134 
5135     /* return to application */
5136     return (out);
5137   }
5138 
5139 
5140 /**
5141   @brief         Process function for the Q15 PID Control.
5142   @param[in,out] S   points to an instance of the Q15 PID Control structure
5143   @param[in]     in  input sample to process
5144   @return        processed output sample.
5145 
5146   \par Scaling and Overflow Behavior
5147          The function is implemented using a 64-bit internal accumulator.
5148          Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
5149          The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
5150          There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
5151          After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
5152          Lastly, the accumulator is saturated to yield a result in 1.15 format.
5153  */
riscv_pid_q15(riscv_pid_instance_q15 * S,q15_t in)5154 __STATIC_FORCEINLINE q15_t riscv_pid_q15(
5155   riscv_pid_instance_q15 * S,
5156   q15_t in)
5157   {
5158     q63_t acc;
5159     q15_t out;
5160 
5161 #if defined (RISCV_MATH_DSP)
5162     /* Implementation of PID controller */
5163 
5164     /* acc = A0 * x[n]  */
5165     acc = (q31_t) __RV_KMDA((uint32_t)S->A0, (uint32_t)in);
5166 
5167     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
5168     acc = (q63_t)__RV_SMALDA((uint64_t)acc, (uint32_t)S->A1, (uint32_t)read_q15x2 (S->state));
5169 #else
5170     /* acc = A0 * x[n]  */
5171     acc = ((q31_t) S->A0) * in;
5172 
5173     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
5174     acc += (q31_t) S->A1 * S->state[0];
5175     acc += (q31_t) S->A2 * S->state[1];
5176 #endif
5177 
5178     /* acc += y[n-1] */
5179     acc += (q31_t) S->state[2] << 15;
5180 
5181     /* saturate the output */
5182     out = (q15_t) (__SSAT((acc >> 15), 16));
5183 
5184     /* Update state */
5185     S->state[1] = S->state[0];
5186     S->state[0] = in;
5187     S->state[2] = out;
5188 
5189     /* return to application */
5190     return (out);
5191   }
5192 
5193   /**
5194    * @} end of PID group
5195    */
5196 
5197 
5198   /**
5199    * @brief Floating-point matrix inverse.
5200    * @param[in]  src   points to the instance of the input floating-point matrix structure.
5201    * @param[out] dst   points to the instance of the output floating-point matrix structure.
5202    * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
5203    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status RISCV_MATH_SINGULAR.
5204    */
5205   riscv_status riscv_mat_inverse_f32(
5206   const riscv_matrix_instance_f32 * src,
5207   riscv_matrix_instance_f32 * dst);
5208 
5209 
5210   /**
5211    * @brief Floating-point matrix inverse.
5212    * @param[in]  src   points to the instance of the input floating-point matrix structure.
5213    * @param[out] dst   points to the instance of the output floating-point matrix structure.
5214    * @return The function returns RISCV_MATH_SIZE_MISMATCH, if the dimensions do not match.
5215    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status RISCV_MATH_SINGULAR.
5216    */
5217   riscv_status riscv_mat_inverse_f64(
5218   const riscv_matrix_instance_f64 * src,
5219   riscv_matrix_instance_f64 * dst);
5220 
5221 
5222 
5223   /**
5224    * @ingroup groupController
5225    */
5226 
5227   /**
5228    * @defgroup clarke Vector Clarke Transform
5229    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
5230    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
5231    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
5232    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
5233    * \image html clarke.png Stator current space vector and its components in (a,b).
5234    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
5235    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
5236    *
5237    * The function operates on a single sample of data and each call to the function returns the processed output.
5238    * The library provides separate functions for Q31 and floating-point data types.
5239    * \par Algorithm
5240    * \image html clarkeFormula.png
5241    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
5242    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
5243    * \par Fixed-Point Behavior
5244    * Care must be taken when using the Q31 version of the Clarke transform.
5245    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5246    * Refer to the function specific documentation below for usage guidelines.
5247    */
5248 
5249   /**
5250    * @addtogroup clarke
5251    * @{
5252    */
5253 
5254   /**
5255    *
5256    * @brief  Floating-point Clarke transform
5257    * @param[in]  Ia       input three-phase coordinate <code>a</code>
5258    * @param[in]  Ib       input three-phase coordinate <code>b</code>
5259    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
5260    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
5261    * @return        none
5262    */
riscv_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)5263   __STATIC_FORCEINLINE void riscv_clarke_f32(
5264   float32_t Ia,
5265   float32_t Ib,
5266   float32_t * pIalpha,
5267   float32_t * pIbeta)
5268   {
5269     /* Calculate pIalpha using the equation, pIalpha = Ia */
5270     *pIalpha = Ia;
5271 
5272     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
5273     *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
5274   }
5275 
5276 
5277 /**
5278   @brief  Clarke transform for Q31 version
5279   @param[in]  Ia       input three-phase coordinate <code>a</code>
5280   @param[in]  Ib       input three-phase coordinate <code>b</code>
5281   @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
5282   @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
5283   @return     none
5284 
5285   \par Scaling and Overflow Behavior
5286          The function is implemented using an internal 32-bit accumulator.
5287          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5288          There is saturation on the addition, hence there is no risk of overflow.
5289  */
riscv_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)5290 __STATIC_FORCEINLINE void riscv_clarke_q31(
5291   q31_t Ia,
5292   q31_t Ib,
5293   q31_t * pIalpha,
5294   q31_t * pIbeta)
5295   {
5296     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5297 
5298     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
5299     *pIalpha = Ia;
5300 
5301     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
5302     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
5303 
5304     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
5305     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
5306 
5307     /* pIbeta is calculated by adding the intermediate products */
5308     *pIbeta = __QADD(product1, product2);
5309   }
5310 
5311   /**
5312    * @} end of clarke group
5313    */
5314 
5315 
5316   /**
5317    * @ingroup groupController
5318    */
5319 
5320   /**
5321    * @defgroup inv_clarke Vector Inverse Clarke Transform
5322    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
5323    *
5324    * The function operates on a single sample of data and each call to the function returns the processed output.
5325    * The library provides separate functions for Q31 and floating-point data types.
5326    * \par Algorithm
5327    * \image html clarkeInvFormula.png
5328    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
5329    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
5330    * \par Fixed-Point Behavior
5331    * Care must be taken when using the Q31 version of the Clarke transform.
5332    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5333    * Refer to the function specific documentation below for usage guidelines.
5334    */
5335 
5336   /**
5337    * @addtogroup inv_clarke
5338    * @{
5339    */
5340 
5341    /**
5342    * @brief  Floating-point Inverse Clarke transform
5343    * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
5344    * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
5345    * @param[out] pIa     points to output three-phase coordinate <code>a</code>
5346    * @param[out] pIb     points to output three-phase coordinate <code>b</code>
5347    * @return     none
5348    */
riscv_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)5349   __STATIC_FORCEINLINE void riscv_inv_clarke_f32(
5350   float32_t Ialpha,
5351   float32_t Ibeta,
5352   float32_t * pIa,
5353   float32_t * pIb)
5354   {
5355     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5356     *pIa = Ialpha;
5357 
5358     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5359     *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta;
5360   }
5361 
5362 
5363 /**
5364   @brief  Inverse Clarke transform for Q31 version
5365   @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
5366   @param[in]  Ibeta   input two-phase orthogonal vector axis beta
5367   @param[out] pIa     points to output three-phase coordinate <code>a</code>
5368   @param[out] pIb     points to output three-phase coordinate <code>b</code>
5369   @return     none
5370 
5371   \par Scaling and Overflow Behavior
5372          The function is implemented using an internal 32-bit accumulator.
5373          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5374          There is saturation on the subtraction, hence there is no risk of overflow.
5375  */
riscv_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)5376 __STATIC_FORCEINLINE void riscv_inv_clarke_q31(
5377   q31_t Ialpha,
5378   q31_t Ibeta,
5379   q31_t * pIa,
5380   q31_t * pIb)
5381   {
5382     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5383 
5384     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5385     *pIa = Ialpha;
5386 
5387     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
5388     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
5389 
5390     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
5391     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
5392 
5393     /* pIb is calculated by subtracting the products */
5394     *pIb = __QSUB(product2, product1);
5395   }
5396 
5397   /**
5398    * @} end of inv_clarke group
5399    */
5400 
5401 
5402 
5403   /**
5404    * @ingroup groupController
5405    */
5406 
5407   /**
5408    * @defgroup park Vector Park Transform
5409    *
5410    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
5411    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
5412    * from the stationary to the moving reference frame and control the spatial relationship between
5413    * the stator vector current and rotor flux vector.
5414    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
5415    * current vector and the relationship from the two reference frames:
5416    * \image html park.png "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
5417    *
5418    * The function operates on a single sample of data and each call to the function returns the processed output.
5419    * The library provides separate functions for Q31 and floating-point data types.
5420    * \par Algorithm
5421    * \image html parkFormula.png
5422    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
5423    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5424    * cosine and sine values of theta (rotor flux position).
5425    * \par Fixed-Point Behavior
5426    * Care must be taken when using the Q31 version of the Park transform.
5427    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5428    * Refer to the function specific documentation below for usage guidelines.
5429    */
5430 
5431   /**
5432    * @addtogroup park
5433    * @{
5434    */
5435 
5436   /**
5437    * @brief Floating-point Park transform
5438    * @param[in]  Ialpha  input two-phase vector coordinate alpha
5439    * @param[in]  Ibeta   input two-phase vector coordinate beta
5440    * @param[out] pId     points to output   rotor reference frame d
5441    * @param[out] pIq     points to output   rotor reference frame q
5442    * @param[in]  sinVal  sine value of rotation angle theta
5443    * @param[in]  cosVal  cosine value of rotation angle theta
5444    * @return     none
5445    *
5446    * The function implements the forward Park transform.
5447    *
5448    */
riscv_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)5449   __STATIC_FORCEINLINE void riscv_park_f32(
5450   float32_t Ialpha,
5451   float32_t Ibeta,
5452   float32_t * pId,
5453   float32_t * pIq,
5454   float32_t sinVal,
5455   float32_t cosVal)
5456   {
5457     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
5458     *pId = Ialpha * cosVal + Ibeta * sinVal;
5459 
5460     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
5461     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
5462   }
5463 
5464 
5465 /**
5466   @brief  Park transform for Q31 version
5467   @param[in]  Ialpha  input two-phase vector coordinate alpha
5468   @param[in]  Ibeta   input two-phase vector coordinate beta
5469   @param[out] pId     points to output rotor reference frame d
5470   @param[out] pIq     points to output rotor reference frame q
5471   @param[in]  sinVal  sine value of rotation angle theta
5472   @param[in]  cosVal  cosine value of rotation angle theta
5473   @return     none
5474 
5475   \par Scaling and Overflow Behavior
5476          The function is implemented using an internal 32-bit accumulator.
5477          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5478          There is saturation on the addition and subtraction, hence there is no risk of overflow.
5479  */
riscv_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)5480 __STATIC_FORCEINLINE void riscv_park_q31(
5481   q31_t Ialpha,
5482   q31_t Ibeta,
5483   q31_t * pId,
5484   q31_t * pIq,
5485   q31_t sinVal,
5486   q31_t cosVal)
5487   {
5488     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5489     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5490 
5491     /* Intermediate product is calculated by (Ialpha * cosVal) */
5492     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
5493 
5494     /* Intermediate product is calculated by (Ibeta * sinVal) */
5495     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
5496 
5497 
5498     /* Intermediate product is calculated by (Ialpha * sinVal) */
5499     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
5500 
5501     /* Intermediate product is calculated by (Ibeta * cosVal) */
5502     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
5503 
5504     /* Calculate pId by adding the two intermediate products 1 and 2 */
5505     *pId = __QADD(product1, product2);
5506 
5507     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
5508     *pIq = __QSUB(product4, product3);
5509   }
5510 
5511   /**
5512    * @} end of park group
5513    */
5514 
5515 
5516   /**
5517    * @ingroup groupController
5518    */
5519 
5520   /**
5521    * @defgroup inv_park Vector Inverse Park transform
5522    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
5523    *
5524    * The function operates on a single sample of data and each call to the function returns the processed output.
5525    * The library provides separate functions for Q31 and floating-point data types.
5526    * \par Algorithm
5527    * \image html parkInvFormula.png
5528    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
5529    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5530    * cosine and sine values of theta (rotor flux position).
5531    * \par Fixed-Point Behavior
5532    * Care must be taken when using the Q31 version of the Park transform.
5533    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5534    * Refer to the function specific documentation below for usage guidelines.
5535    */
5536 
5537   /**
5538    * @addtogroup inv_park
5539    * @{
5540    */
5541 
5542    /**
5543    * @brief  Floating-point Inverse Park transform
5544    * @param[in]  Id       input coordinate of rotor reference frame d
5545    * @param[in]  Iq       input coordinate of rotor reference frame q
5546    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
5547    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
5548    * @param[in]  sinVal   sine value of rotation angle theta
5549    * @param[in]  cosVal   cosine value of rotation angle theta
5550    * @return     none
5551    */
riscv_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)5552   __STATIC_FORCEINLINE void riscv_inv_park_f32(
5553   float32_t Id,
5554   float32_t Iq,
5555   float32_t * pIalpha,
5556   float32_t * pIbeta,
5557   float32_t sinVal,
5558   float32_t cosVal)
5559   {
5560     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
5561     *pIalpha = Id * cosVal - Iq * sinVal;
5562 
5563     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
5564     *pIbeta = Id * sinVal + Iq * cosVal;
5565   }
5566 
5567 
5568 /**
5569   @brief  Inverse Park transform for   Q31 version
5570   @param[in]  Id       input coordinate of rotor reference frame d
5571   @param[in]  Iq       input coordinate of rotor reference frame q
5572   @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
5573   @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
5574   @param[in]  sinVal   sine value of rotation angle theta
5575   @param[in]  cosVal   cosine value of rotation angle theta
5576   @return     none
5577 
5578   @par Scaling and Overflow Behavior
5579          The function is implemented using an internal 32-bit accumulator.
5580          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5581          There is saturation on the addition, hence there is no risk of overflow.
5582  */
riscv_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)5583 __STATIC_FORCEINLINE void riscv_inv_park_q31(
5584   q31_t Id,
5585   q31_t Iq,
5586   q31_t * pIalpha,
5587   q31_t * pIbeta,
5588   q31_t sinVal,
5589   q31_t cosVal)
5590   {
5591     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5592     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5593 
5594     /* Intermediate product is calculated by (Id * cosVal) */
5595     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
5596 
5597     /* Intermediate product is calculated by (Iq * sinVal) */
5598     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
5599 
5600 
5601     /* Intermediate product is calculated by (Id * sinVal) */
5602     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
5603 
5604     /* Intermediate product is calculated by (Iq * cosVal) */
5605     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
5606 
5607     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
5608     *pIalpha = __QSUB(product1, product2);
5609 
5610     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
5611     *pIbeta = __QADD(product4, product3);
5612   }
5613 
5614   /**
5615    * @} end of Inverse park group
5616    */
5617 
5618 
5619   /**
5620    * @ingroup groupInterpolation
5621    */
5622 
5623   /**
5624    * @defgroup LinearInterpolate Linear Interpolation
5625    *
5626    * Linear interpolation is a method of curve fitting using linear polynomials.
5627    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
5628    *
5629    * \par
5630    * \image html LinearInterp.png "Linear interpolation"
5631    *
5632    * \par
5633    * A  Linear Interpolate function calculates an output value(y), for the input(x)
5634    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
5635    *
5636    * \par Algorithm:
5637    * <pre>
5638    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
5639    *       where x0, x1 are nearest values of input x
5640    *             y0, y1 are nearest values to output y
5641    * </pre>
5642    *
5643    * \par
5644    * This set of functions implements Linear interpolation process
5645    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
5646    * sample of data and each call to the function returns a single processed value.
5647    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
5648    * <code>x</code> is the input sample value. The functions returns the output value.
5649    *
5650    * \par
5651    * if x is outside of the table boundary, Linear interpolation returns first value of the table
5652    * if x is below input range and returns last value of table if x is above range.
5653    */
5654 
5655   /**
5656    * @addtogroup LinearInterpolate
5657    * @{
5658    */
5659 
5660   /**
5661    * @brief  Process function for the floating-point Linear Interpolation Function.
5662    * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
5663    * @param[in]     x  input sample to process
5664    * @return y processed output sample.
5665    *
5666    */
riscv_linear_interp_f32(riscv_linear_interp_instance_f32 * S,float32_t x)5667   __STATIC_FORCEINLINE float32_t riscv_linear_interp_f32(
5668   riscv_linear_interp_instance_f32 * S,
5669   float32_t x)
5670   {
5671     float32_t y;
5672     float32_t x0, x1;                            /* Nearest input values */
5673     float32_t y0, y1;                            /* Nearest output values */
5674     float32_t xSpacing = S->xSpacing;            /* spacing between input values */
5675     int32_t i;                                   /* Index variable */
5676     float32_t *pYData = S->pYData;               /* pointer to output table */
5677 
5678     /* Calculation of index */
5679     i = (int32_t) ((x - S->x1) / xSpacing);
5680 
5681     if (i < 0)
5682     {
5683       /* Iniatilize output for below specified range as least output value of table */
5684       y = pYData[0];
5685     }
5686     else if ((uint32_t)i >= (S->nValues - 1))
5687     {
5688       /* Iniatilize output for above specified range as last output value of table */
5689       y = pYData[S->nValues - 1];
5690     }
5691     else
5692     {
5693       /* Calculation of nearest input values */
5694       x0 = S->x1 +  i      * xSpacing;
5695       x1 = S->x1 + (i + 1) * xSpacing;
5696 
5697       /* Read of nearest output values */
5698       y0 = pYData[i];
5699       y1 = pYData[i + 1];
5700 
5701       /* Calculation of output */
5702       y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
5703 
5704     }
5705 
5706     /* returns output value */
5707     return (y);
5708   }
5709 
5710 
5711    /**
5712    *
5713    * @brief  Process function for the Q31 Linear Interpolation Function.
5714    * @param[in] pYData   pointer to Q31 Linear Interpolation table
5715    * @param[in] x        input sample to process
5716    * @param[in] nValues  number of table values
5717    * @return y processed output sample.
5718    *
5719    * \par
5720    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5721    * This function can support maximum of table size 2^12.
5722    *
5723    */
riscv_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)5724   __STATIC_FORCEINLINE q31_t riscv_linear_interp_q31(
5725   q31_t * pYData,
5726   q31_t x,
5727   uint32_t nValues)
5728   {
5729     q31_t y;                                     /* output */
5730     q31_t y0, y1;                                /* Nearest output values */
5731     q31_t fract;                                 /* fractional part */
5732     int32_t index;                               /* Index to read nearest output values */
5733 
5734     /* Input is in 12.20 format */
5735     /* 12 bits for the table index */
5736     /* Index value calculation */
5737     index = ((x & (q31_t)0xFFF00000) >> 20);
5738 
5739     if (index >= (int32_t)(nValues - 1))
5740     {
5741       return (pYData[nValues - 1]);
5742     }
5743     else if (index < 0)
5744     {
5745       return (pYData[0]);
5746     }
5747     else
5748     {
5749       /* 20 bits for the fractional part */
5750       /* shift left by 11 to keep fract in 1.31 format */
5751       fract = (x & 0x000FFFFF) << 11;
5752 
5753       /* Read two nearest output values from the index in 1.31(q31) format */
5754       y0 = pYData[index];
5755       y1 = pYData[index + 1];
5756 
5757       /* Calculation of y0 * (1-fract) and y is in 2.30 format */
5758       y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
5759 
5760       /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
5761       y += ((q31_t) (((q63_t) y1 * fract) >> 32));
5762 
5763       /* Convert y to 1.31 format */
5764       return (y << 1U);
5765     }
5766   }
5767 
5768 
5769   /**
5770    *
5771    * @brief  Process function for the Q15 Linear Interpolation Function.
5772    * @param[in] pYData   pointer to Q15 Linear Interpolation table
5773    * @param[in] x        input sample to process
5774    * @param[in] nValues  number of table values
5775    * @return y processed output sample.
5776    *
5777    * \par
5778    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5779    * This function can support maximum of table size 2^12.
5780    *
5781    */
riscv_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)5782   __STATIC_FORCEINLINE q15_t riscv_linear_interp_q15(
5783   q15_t * pYData,
5784   q31_t x,
5785   uint32_t nValues)
5786   {
5787     q63_t y;                                     /* output */
5788     q15_t y0, y1;                                /* Nearest output values */
5789     q31_t fract;                                 /* fractional part */
5790     int32_t index;                               /* Index to read nearest output values */
5791 
5792     /* Input is in 12.20 format */
5793     /* 12 bits for the table index */
5794     /* Index value calculation */
5795     index = ((x & (int32_t)0xFFF00000) >> 20);
5796 
5797     if (index >= (int32_t)(nValues - 1))
5798     {
5799       return (pYData[nValues - 1]);
5800     }
5801     else if (index < 0)
5802     {
5803       return (pYData[0]);
5804     }
5805     else
5806     {
5807       /* 20 bits for the fractional part */
5808       /* fract is in 12.20 format */
5809       fract = (x & 0x000FFFFF);
5810 
5811       /* Read two nearest output values from the index */
5812       y0 = pYData[index];
5813       y1 = pYData[index + 1];
5814 
5815       /* Calculation of y0 * (1-fract) and y is in 13.35 format */
5816       y = ((q63_t) y0 * (0xFFFFF - fract));
5817 
5818       /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
5819       y += ((q63_t) y1 * (fract));
5820 
5821       /* convert y to 1.15 format */
5822       return (q15_t) (y >> 20);
5823     }
5824   }
5825 
5826 
5827   /**
5828    *
5829    * @brief  Process function for the Q7 Linear Interpolation Function.
5830    * @param[in] pYData   pointer to Q7 Linear Interpolation table
5831    * @param[in] x        input sample to process
5832    * @param[in] nValues  number of table values
5833    * @return y processed output sample.
5834    *
5835    * \par
5836    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5837    * This function can support maximum of table size 2^12.
5838    */
riscv_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)5839   __STATIC_FORCEINLINE q7_t riscv_linear_interp_q7(
5840   q7_t * pYData,
5841   q31_t x,
5842   uint32_t nValues)
5843   {
5844     q31_t y;                                     /* output */
5845     q7_t y0, y1;                                 /* Nearest output values */
5846     q31_t fract;                                 /* fractional part */
5847     uint32_t index;                              /* Index to read nearest output values */
5848 
5849     /* Input is in 12.20 format */
5850     /* 12 bits for the table index */
5851     /* Index value calculation */
5852     if (x < 0)
5853     {
5854       return (pYData[0]);
5855     }
5856     index = (x >> 20) & 0xfff;
5857 
5858     if (index >= (nValues - 1))
5859     {
5860       return (pYData[nValues - 1]);
5861     }
5862     else
5863     {
5864       /* 20 bits for the fractional part */
5865       /* fract is in 12.20 format */
5866       fract = (x & 0x000FFFFF);
5867 
5868       /* Read two nearest output values from the index and are in 1.7(q7) format */
5869       y0 = pYData[index];
5870       y1 = pYData[index + 1];
5871 
5872       /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
5873       y = ((y0 * (0xFFFFF - fract)));
5874 
5875       /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
5876       y += (y1 * fract);
5877 
5878       /* convert y to 1.7(q7) format */
5879       return (q7_t) (y >> 20);
5880      }
5881   }
5882 
5883   /**
5884    * @} end of LinearInterpolate group
5885    */
5886 
5887   /**
5888    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
5889    * @param[in] x  input value in radians.
5890    * @return  sin(x).
5891    */
5892   float32_t riscv_sin_f32(
5893   float32_t x);
5894 
5895 
5896   /**
5897    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
5898    * @param[in] x  Scaled input value in radians.
5899    * @return  sin(x).
5900    */
5901   q31_t riscv_sin_q31(
5902   q31_t x);
5903 
5904 
5905   /**
5906    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
5907    * @param[in] x  Scaled input value in radians.
5908    * @return  sin(x).
5909    */
5910   q15_t riscv_sin_q15(
5911   q15_t x);
5912 
5913 
5914   /**
5915    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
5916    * @param[in] x  input value in radians.
5917    * @return  cos(x).
5918    */
5919   float32_t riscv_cos_f32(
5920   float32_t x);
5921 
5922 
5923   /**
5924    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
5925    * @param[in] x  Scaled input value in radians.
5926    * @return  cos(x).
5927    */
5928   q31_t riscv_cos_q31(
5929   q31_t x);
5930 
5931 
5932   /**
5933    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
5934    * @param[in] x  Scaled input value in radians.
5935    * @return  cos(x).
5936    */
5937   q15_t riscv_cos_q15(
5938   q15_t x);
5939 
5940 
5941   /**
5942    * @ingroup groupFastMath
5943    */
5944 
5945 
5946   /**
5947    * @defgroup SQRT Square Root
5948    *
5949    * Computes the square root of a number.
5950    * There are separate functions for Q15, Q31, and floating-point data types.
5951    * The square root function is computed using the Newton-Raphson algorithm.
5952    * This is an iterative algorithm of the form:
5953    * <pre>
5954    *      x1 = x0 - f(x0)/f'(x0)
5955    * </pre>
5956    * where <code>x1</code> is the current estimate,
5957    * <code>x0</code> is the previous estimate, and
5958    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
5959    * For the square root function, the algorithm reduces to:
5960    * <pre>
5961    *     x0 = in/2                         [initial guess]
5962    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
5963    * </pre>
5964    */
5965 
5966 
5967   /**
5968    * @addtogroup SQRT
5969    * @{
5970    */
5971 
5972 /**
5973   @brief         Floating-point square root function.
5974   @param[in]     in    input value
5975   @param[out]    pOut  square root of input value
5976   @return        execution status
5977                    - \ref RISCV_MATH_SUCCESS        : input value is positive
5978                    - \ref RISCV_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
5979  */
riscv_sqrt_f32(float32_t in,float32_t * pOut)5980 __STATIC_FORCEINLINE riscv_status riscv_sqrt_f32(
5981   float32_t in,
5982   float32_t * pOut)
5983   {
5984 
5985     if (in >= 0.0f)
5986     {
5987 #if defined ( __riscv_flen )
5988       __ASM volatile("fsqrt.s %0, %1" : "=f"(*pOut) : "f"(in));
5989 #else
5990       *pOut = sqrtf(in);
5991 #endif /*__riscv_flen*/
5992 
5993       return (RISCV_MATH_SUCCESS);
5994     }
5995     else
5996     {
5997       *pOut = 0.0f;
5998       return (RISCV_MATH_ARGUMENT_ERROR);
5999     }
6000   }
6001 
6002 
6003 /**
6004   @brief         Q31 square root function.
6005   @param[in]     in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF
6006   @param[out]    pOut  points to square root of input value
6007   @return        execution status
6008                    - \ref RISCV_MATH_SUCCESS        : input value is positive
6009                    - \ref RISCV_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
6010  */
6011 riscv_status riscv_sqrt_q31(
6012   q31_t in,
6013   q31_t * pOut);
6014 
6015 
6016 /**
6017   @brief         Q15 square root function.
6018   @param[in]     in    input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF
6019   @param[out]    pOut  points to square root of input value
6020   @return        execution status
6021                    - \ref RISCV_MATH_SUCCESS        : input value is positive
6022                    - \ref RISCV_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
6023  */
6024 riscv_status riscv_sqrt_q15(
6025   q15_t in,
6026   q15_t * pOut);
6027 
6028   /**
6029    * @brief  Vector Floating-point square root function.
6030    * @param[in]  pIn   input vector.
6031    * @param[out] pOut  vector of square roots of input elements.
6032    * @param[in]  len   length of input vector.
6033    * @return The function returns RISCV_MATH_SUCCESS if input value is positive value or RISCV_MATH_ARGUMENT_ERROR if
6034    * <code>in</code> is negative value and returns zero output for negative values.
6035    */
6036   void riscv_vsqrt_f32(
6037   float32_t * pIn,
6038   float32_t * pOut,
6039   uint16_t len);
6040 
6041   void riscv_vsqrt_q31(
6042   q31_t * pIn,
6043   q31_t * pOut,
6044   uint16_t len);
6045 
6046   void riscv_vsqrt_q15(
6047   q15_t * pIn,
6048   q15_t * pOut,
6049   uint16_t len);
6050 
6051   /**
6052    * @} end of SQRT group
6053    */
6054 
6055 
6056   /**
6057    * @brief floating-point Circular write function.
6058    */
riscv_circularWrite_f32(int32_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const int32_t * src,int32_t srcInc,uint32_t blockSize)6059   __STATIC_FORCEINLINE void riscv_circularWrite_f32(
6060   int32_t * circBuffer,
6061   int32_t L,
6062   uint16_t * writeOffset,
6063   int32_t bufferInc,
6064   const int32_t * src,
6065   int32_t srcInc,
6066   uint32_t blockSize)
6067   {
6068     uint32_t i = 0U;
6069     int32_t wOffset;
6070 
6071     /* Copy the value of Index pointer that points
6072      * to the current location where the input samples to be copied */
6073     wOffset = *writeOffset;
6074 
6075     /* Loop over the blockSize */
6076     i = blockSize;
6077 
6078     while (i > 0U)
6079     {
6080       /* copy the input sample to the circular buffer */
6081       circBuffer[wOffset] = *src;
6082 
6083       /* Update the input pointer */
6084       src += srcInc;
6085 
6086       /* Circularly update wOffset.  Watch out for positive and negative value */
6087       wOffset += bufferInc;
6088       if (wOffset >= L)
6089         wOffset -= L;
6090 
6091       /* Decrement the loop counter */
6092       i--;
6093     }
6094 
6095     /* Update the index pointer */
6096     *writeOffset = (uint16_t)wOffset;
6097   }
6098 
6099 
6100 
6101   /**
6102    * @brief floating-point Circular Read function.
6103    */
riscv_circularRead_f32(int32_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,int32_t * dst,int32_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6104   __STATIC_FORCEINLINE void riscv_circularRead_f32(
6105   int32_t * circBuffer,
6106   int32_t L,
6107   int32_t * readOffset,
6108   int32_t bufferInc,
6109   int32_t * dst,
6110   int32_t * dst_base,
6111   int32_t dst_length,
6112   int32_t dstInc,
6113   uint32_t blockSize)
6114   {
6115     uint32_t i = 0U;
6116     int32_t rOffset;
6117     int32_t* dst_end;
6118 
6119     /* Copy the value of Index pointer that points
6120      * to the current location from where the input samples to be read */
6121     rOffset = *readOffset;
6122     dst_end = dst_base + dst_length;
6123 
6124     /* Loop over the blockSize */
6125     i = blockSize;
6126 
6127     while (i > 0U)
6128     {
6129       /* copy the sample from the circular buffer to the destination buffer */
6130       *dst = circBuffer[rOffset];
6131 
6132       /* Update the input pointer */
6133       dst += dstInc;
6134 
6135       if (dst == dst_end)
6136       {
6137         dst = dst_base;
6138       }
6139 
6140       /* Circularly update rOffset.  Watch out for positive and negative value  */
6141       rOffset += bufferInc;
6142 
6143       if (rOffset >= L)
6144       {
6145         rOffset -= L;
6146       }
6147 
6148       /* Decrement the loop counter */
6149       i--;
6150     }
6151 
6152     /* Update the index pointer */
6153     *readOffset = rOffset;
6154   }
6155 
6156 
6157   /**
6158    * @brief Q15 Circular write function.
6159    */
riscv_circularWrite_q15(q15_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q15_t * src,int32_t srcInc,uint32_t blockSize)6160   __STATIC_FORCEINLINE void riscv_circularWrite_q15(
6161   q15_t * circBuffer,
6162   int32_t L,
6163   uint16_t * writeOffset,
6164   int32_t bufferInc,
6165   const q15_t * src,
6166   int32_t srcInc,
6167   uint32_t blockSize)
6168   {
6169     uint32_t i = 0U;
6170     int32_t wOffset;
6171 
6172     /* Copy the value of Index pointer that points
6173      * to the current location where the input samples to be copied */
6174     wOffset = *writeOffset;
6175 
6176     /* Loop over the blockSize */
6177     i = blockSize;
6178 
6179     while (i > 0U)
6180     {
6181       /* copy the input sample to the circular buffer */
6182       circBuffer[wOffset] = *src;
6183 
6184       /* Update the input pointer */
6185       src += srcInc;
6186 
6187       /* Circularly update wOffset.  Watch out for positive and negative value */
6188       wOffset += bufferInc;
6189       if (wOffset >= L)
6190         wOffset -= L;
6191 
6192       /* Decrement the loop counter */
6193       i--;
6194     }
6195 
6196     /* Update the index pointer */
6197     *writeOffset = (uint16_t)wOffset;
6198   }
6199 
6200 
6201   /**
6202    * @brief Q15 Circular Read function.
6203    */
riscv_circularRead_q15(q15_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q15_t * dst,q15_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6204   __STATIC_FORCEINLINE void riscv_circularRead_q15(
6205   q15_t * circBuffer,
6206   int32_t L,
6207   int32_t * readOffset,
6208   int32_t bufferInc,
6209   q15_t * dst,
6210   q15_t * dst_base,
6211   int32_t dst_length,
6212   int32_t dstInc,
6213   uint32_t blockSize)
6214   {
6215     uint32_t i = 0;
6216     int32_t rOffset;
6217     q15_t* dst_end;
6218 
6219     /* Copy the value of Index pointer that points
6220      * to the current location from where the input samples to be read */
6221     rOffset = *readOffset;
6222 
6223     dst_end = dst_base + dst_length;
6224 
6225     /* Loop over the blockSize */
6226     i = blockSize;
6227 
6228     while (i > 0U)
6229     {
6230       /* copy the sample from the circular buffer to the destination buffer */
6231       *dst = circBuffer[rOffset];
6232 
6233       /* Update the input pointer */
6234       dst += dstInc;
6235 
6236       if (dst == dst_end)
6237       {
6238         dst = dst_base;
6239       }
6240 
6241       /* Circularly update wOffset.  Watch out for positive and negative value */
6242       rOffset += bufferInc;
6243 
6244       if (rOffset >= L)
6245       {
6246         rOffset -= L;
6247       }
6248 
6249       /* Decrement the loop counter */
6250       i--;
6251     }
6252 
6253     /* Update the index pointer */
6254     *readOffset = rOffset;
6255   }
6256 
6257 
6258   /**
6259    * @brief Q7 Circular write function.
6260    */
riscv_circularWrite_q7(q7_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q7_t * src,int32_t srcInc,uint32_t blockSize)6261   __STATIC_FORCEINLINE void riscv_circularWrite_q7(
6262   q7_t * circBuffer,
6263   int32_t L,
6264   uint16_t * writeOffset,
6265   int32_t bufferInc,
6266   const q7_t * src,
6267   int32_t srcInc,
6268   uint32_t blockSize)
6269   {
6270     uint32_t i = 0U;
6271     int32_t wOffset;
6272 
6273     /* Copy the value of Index pointer that points
6274      * to the current location where the input samples to be copied */
6275     wOffset = *writeOffset;
6276 
6277     /* Loop over the blockSize */
6278     i = blockSize;
6279 
6280     while (i > 0U)
6281     {
6282       /* copy the input sample to the circular buffer */
6283       circBuffer[wOffset] = *src;
6284 
6285       /* Update the input pointer */
6286       src += srcInc;
6287 
6288       /* Circularly update wOffset.  Watch out for positive and negative value */
6289       wOffset += bufferInc;
6290       if (wOffset >= L)
6291         wOffset -= L;
6292 
6293       /* Decrement the loop counter */
6294       i--;
6295     }
6296 
6297     /* Update the index pointer */
6298     *writeOffset = (uint16_t)wOffset;
6299   }
6300 
6301 
6302   /**
6303    * @brief Q7 Circular Read function.
6304    */
riscv_circularRead_q7(q7_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q7_t * dst,q7_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6305   __STATIC_FORCEINLINE void riscv_circularRead_q7(
6306   q7_t * circBuffer,
6307   int32_t L,
6308   int32_t * readOffset,
6309   int32_t bufferInc,
6310   q7_t * dst,
6311   q7_t * dst_base,
6312   int32_t dst_length,
6313   int32_t dstInc,
6314   uint32_t blockSize)
6315   {
6316     uint32_t i = 0;
6317     int32_t rOffset;
6318     q7_t* dst_end;
6319 
6320     /* Copy the value of Index pointer that points
6321      * to the current location from where the input samples to be read */
6322     rOffset = *readOffset;
6323 
6324     dst_end = dst_base + dst_length;
6325 
6326     /* Loop over the blockSize */
6327     i = blockSize;
6328 
6329     while (i > 0U)
6330     {
6331       /* copy the sample from the circular buffer to the destination buffer */
6332       *dst = circBuffer[rOffset];
6333 
6334       /* Update the input pointer */
6335       dst += dstInc;
6336 
6337       if (dst == dst_end)
6338       {
6339         dst = dst_base;
6340       }
6341 
6342       /* Circularly update rOffset.  Watch out for positive and negative value */
6343       rOffset += bufferInc;
6344 
6345       if (rOffset >= L)
6346       {
6347         rOffset -= L;
6348       }
6349 
6350       /* Decrement the loop counter */
6351       i--;
6352     }
6353 
6354     /* Update the index pointer */
6355     *readOffset = rOffset;
6356   }
6357 
6358 
6359   /**
6360    * @brief  Sum of the squares of the elements of a Q31 vector.
6361    * @param[in]  pSrc       is input pointer
6362    * @param[in]  blockSize  is the number of samples to process
6363    * @param[out] pResult    is output value.
6364    */
6365   void riscv_power_q31(
6366   const q31_t * pSrc,
6367         uint32_t blockSize,
6368         q63_t * pResult);
6369 
6370 
6371   /**
6372    * @brief  Sum of the squares of the elements of a floating-point vector.
6373    * @param[in]  pSrc       is input pointer
6374    * @param[in]  blockSize  is the number of samples to process
6375    * @param[out] pResult    is output value.
6376    */
6377   void riscv_power_f32(
6378   const float32_t * pSrc,
6379         uint32_t blockSize,
6380         float32_t * pResult);
6381 
6382 
6383   /**
6384    * @brief  Sum of the squares of the elements of a Q15 vector.
6385    * @param[in]  pSrc       is input pointer
6386    * @param[in]  blockSize  is the number of samples to process
6387    * @param[out] pResult    is output value.
6388    */
6389   void riscv_power_q15(
6390   const q15_t * pSrc,
6391         uint32_t blockSize,
6392         q63_t * pResult);
6393 
6394 
6395   /**
6396    * @brief  Sum of the squares of the elements of a Q7 vector.
6397    * @param[in]  pSrc       is input pointer
6398    * @param[in]  blockSize  is the number of samples to process
6399    * @param[out] pResult    is output value.
6400    */
6401   void riscv_power_q7(
6402   const q7_t * pSrc,
6403         uint32_t blockSize,
6404         q31_t * pResult);
6405 
6406 
6407   /**
6408    * @brief  Mean value of a Q7 vector.
6409    * @param[in]  pSrc       is input pointer
6410    * @param[in]  blockSize  is the number of samples to process
6411    * @param[out] pResult    is output value.
6412    */
6413   void riscv_mean_q7(
6414   const q7_t * pSrc,
6415         uint32_t blockSize,
6416         q7_t * pResult);
6417 
6418 
6419   /**
6420    * @brief  Mean value of a Q15 vector.
6421    * @param[in]  pSrc       is input pointer
6422    * @param[in]  blockSize  is the number of samples to process
6423    * @param[out] pResult    is output value.
6424    */
6425   void riscv_mean_q15(
6426   const q15_t * pSrc,
6427         uint32_t blockSize,
6428         q15_t * pResult);
6429 
6430 
6431   /**
6432    * @brief  Mean value of a Q31 vector.
6433    * @param[in]  pSrc       is input pointer
6434    * @param[in]  blockSize  is the number of samples to process
6435    * @param[out] pResult    is output value.
6436    */
6437   void riscv_mean_q31(
6438   const q31_t * pSrc,
6439         uint32_t blockSize,
6440         q31_t * pResult);
6441 
6442 
6443   /**
6444    * @brief  Mean value of a floating-point vector.
6445    * @param[in]  pSrc       is input pointer
6446    * @param[in]  blockSize  is the number of samples to process
6447    * @param[out] pResult    is output value.
6448    */
6449   void riscv_mean_f32(
6450   const float32_t * pSrc,
6451         uint32_t blockSize,
6452         float32_t * pResult);
6453 
6454 
6455   /**
6456    * @brief  Variance of the elements of a floating-point vector.
6457    * @param[in]  pSrc       is input pointer
6458    * @param[in]  blockSize  is the number of samples to process
6459    * @param[out] pResult    is output value.
6460    */
6461   void riscv_var_f32(
6462   const float32_t * pSrc,
6463         uint32_t blockSize,
6464         float32_t * pResult);
6465 
6466 
6467   /**
6468    * @brief  Variance of the elements of a Q31 vector.
6469    * @param[in]  pSrc       is input pointer
6470    * @param[in]  blockSize  is the number of samples to process
6471    * @param[out] pResult    is output value.
6472    */
6473   void riscv_var_q31(
6474   const q31_t * pSrc,
6475         uint32_t blockSize,
6476         q31_t * pResult);
6477 
6478 
6479   /**
6480    * @brief  Variance of the elements of a Q15 vector.
6481    * @param[in]  pSrc       is input pointer
6482    * @param[in]  blockSize  is the number of samples to process
6483    * @param[out] pResult    is output value.
6484    */
6485   void riscv_var_q15(
6486   const q15_t * pSrc,
6487         uint32_t blockSize,
6488         q15_t * pResult);
6489 
6490 
6491   /**
6492    * @brief  Root Mean Square of the elements of a floating-point vector.
6493    * @param[in]  pSrc       is input pointer
6494    * @param[in]  blockSize  is the number of samples to process
6495    * @param[out] pResult    is output value.
6496    */
6497   void riscv_rms_f32(
6498   const float32_t * pSrc,
6499         uint32_t blockSize,
6500         float32_t * pResult);
6501 
6502 
6503   /**
6504    * @brief  Root Mean Square of the elements of a Q31 vector.
6505    * @param[in]  pSrc       is input pointer
6506    * @param[in]  blockSize  is the number of samples to process
6507    * @param[out] pResult    is output value.
6508    */
6509   void riscv_rms_q31(
6510   const q31_t * pSrc,
6511         uint32_t blockSize,
6512         q31_t * pResult);
6513 
6514 
6515   /**
6516    * @brief  Root Mean Square of the elements of a Q15 vector.
6517    * @param[in]  pSrc       is input pointer
6518    * @param[in]  blockSize  is the number of samples to process
6519    * @param[out] pResult    is output value.
6520    */
6521   void riscv_rms_q15(
6522   const q15_t * pSrc,
6523         uint32_t blockSize,
6524         q15_t * pResult);
6525 
6526 
6527   /**
6528    * @brief  Standard deviation of the elements of a floating-point vector.
6529    * @param[in]  pSrc       is input pointer
6530    * @param[in]  blockSize  is the number of samples to process
6531    * @param[out] pResult    is output value.
6532    */
6533   void riscv_std_f32(
6534   const float32_t * pSrc,
6535         uint32_t blockSize,
6536         float32_t * pResult);
6537 
6538 
6539   /**
6540    * @brief  Standard deviation of the elements of a Q31 vector.
6541    * @param[in]  pSrc       is input pointer
6542    * @param[in]  blockSize  is the number of samples to process
6543    * @param[out] pResult    is output value.
6544    */
6545   void riscv_std_q31(
6546   const q31_t * pSrc,
6547         uint32_t blockSize,
6548         q31_t * pResult);
6549 
6550 
6551   /**
6552    * @brief  Standard deviation of the elements of a Q15 vector.
6553    * @param[in]  pSrc       is input pointer
6554    * @param[in]  blockSize  is the number of samples to process
6555    * @param[out] pResult    is output value.
6556    */
6557   void riscv_std_q15(
6558   const q15_t * pSrc,
6559         uint32_t blockSize,
6560         q15_t * pResult);
6561 
6562 
6563   /**
6564    * @brief  Floating-point complex magnitude
6565    * @param[in]  pSrc        points to the complex input vector
6566    * @param[out] pDst        points to the real output vector
6567    * @param[in]  numSamples  number of complex samples in the input vector
6568    */
6569   void riscv_cmplx_mag_f32(
6570   const float32_t * pSrc,
6571         float32_t * pDst,
6572         uint32_t numSamples);
6573 
6574 
6575   /**
6576    * @brief  Q31 complex magnitude
6577    * @param[in]  pSrc        points to the complex input vector
6578    * @param[out] pDst        points to the real output vector
6579    * @param[in]  numSamples  number of complex samples in the input vector
6580    */
6581   void riscv_cmplx_mag_q31(
6582   const q31_t * pSrc,
6583         q31_t * pDst,
6584         uint32_t numSamples);
6585 
6586 
6587   /**
6588    * @brief  Q15 complex magnitude
6589    * @param[in]  pSrc        points to the complex input vector
6590    * @param[out] pDst        points to the real output vector
6591    * @param[in]  numSamples  number of complex samples in the input vector
6592    */
6593   void riscv_cmplx_mag_q15(
6594   const q15_t * pSrc,
6595         q15_t * pDst,
6596         uint32_t numSamples);
6597 
6598 
6599   /**
6600    * @brief  Q15 complex dot product
6601    * @param[in]  pSrcA       points to the first input vector
6602    * @param[in]  pSrcB       points to the second input vector
6603    * @param[in]  numSamples  number of complex samples in each vector
6604    * @param[out] realResult  real part of the result returned here
6605    * @param[out] imagResult  imaginary part of the result returned here
6606    */
6607   void riscv_cmplx_dot_prod_q15(
6608   const q15_t * pSrcA,
6609   const q15_t * pSrcB,
6610         uint32_t numSamples,
6611         q31_t * realResult,
6612         q31_t * imagResult);
6613 
6614 
6615   /**
6616    * @brief  Q31 complex dot product
6617    * @param[in]  pSrcA       points to the first input vector
6618    * @param[in]  pSrcB       points to the second input vector
6619    * @param[in]  numSamples  number of complex samples in each vector
6620    * @param[out] realResult  real part of the result returned here
6621    * @param[out] imagResult  imaginary part of the result returned here
6622    */
6623   void riscv_cmplx_dot_prod_q31(
6624   const q31_t * pSrcA,
6625   const q31_t * pSrcB,
6626         uint32_t numSamples,
6627         q63_t * realResult,
6628         q63_t * imagResult);
6629 
6630 
6631   /**
6632    * @brief  Floating-point complex dot product
6633    * @param[in]  pSrcA       points to the first input vector
6634    * @param[in]  pSrcB       points to the second input vector
6635    * @param[in]  numSamples  number of complex samples in each vector
6636    * @param[out] realResult  real part of the result returned here
6637    * @param[out] imagResult  imaginary part of the result returned here
6638    */
6639   void riscv_cmplx_dot_prod_f32(
6640   const float32_t * pSrcA,
6641   const float32_t * pSrcB,
6642         uint32_t numSamples,
6643         float32_t * realResult,
6644         float32_t * imagResult);
6645 
6646 
6647   /**
6648    * @brief  Q15 complex-by-real multiplication
6649    * @param[in]  pSrcCmplx   points to the complex input vector
6650    * @param[in]  pSrcReal    points to the real input vector
6651    * @param[out] pCmplxDst   points to the complex output vector
6652    * @param[in]  numSamples  number of samples in each vector
6653    */
6654   void riscv_cmplx_mult_real_q15(
6655   const q15_t * pSrcCmplx,
6656   const q15_t * pSrcReal,
6657         q15_t * pCmplxDst,
6658         uint32_t numSamples);
6659 
6660 
6661   /**
6662    * @brief  Q31 complex-by-real multiplication
6663    * @param[in]  pSrcCmplx   points to the complex input vector
6664    * @param[in]  pSrcReal    points to the real input vector
6665    * @param[out] pCmplxDst   points to the complex output vector
6666    * @param[in]  numSamples  number of samples in each vector
6667    */
6668   void riscv_cmplx_mult_real_q31(
6669   const q31_t * pSrcCmplx,
6670   const q31_t * pSrcReal,
6671         q31_t * pCmplxDst,
6672         uint32_t numSamples);
6673 
6674 
6675   /**
6676    * @brief  Floating-point complex-by-real multiplication
6677    * @param[in]  pSrcCmplx   points to the complex input vector
6678    * @param[in]  pSrcReal    points to the real input vector
6679    * @param[out] pCmplxDst   points to the complex output vector
6680    * @param[in]  numSamples  number of samples in each vector
6681    */
6682   void riscv_cmplx_mult_real_f32(
6683   const float32_t * pSrcCmplx,
6684   const float32_t * pSrcReal,
6685         float32_t * pCmplxDst,
6686         uint32_t numSamples);
6687 
6688 
6689   /**
6690    * @brief  Minimum value of a Q7 vector.
6691    * @param[in]  pSrc       is input pointer
6692    * @param[in]  blockSize  is the number of samples to process
6693    * @param[out] result     is output pointer
6694    * @param[in]  index      is the array index of the minimum value in the input buffer.
6695    */
6696   void riscv_min_q7(
6697   const q7_t * pSrc,
6698         uint32_t blockSize,
6699         q7_t * result,
6700         uint32_t * index);
6701 
6702 
6703   /**
6704    * @brief  Minimum value of a Q15 vector.
6705    * @param[in]  pSrc       is input pointer
6706    * @param[in]  blockSize  is the number of samples to process
6707    * @param[out] pResult    is output pointer
6708    * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
6709    */
6710   void riscv_min_q15(
6711   const q15_t * pSrc,
6712         uint32_t blockSize,
6713         q15_t * pResult,
6714         uint32_t * pIndex);
6715 
6716 
6717   /**
6718    * @brief  Minimum value of a Q31 vector.
6719    * @param[in]  pSrc       is input pointer
6720    * @param[in]  blockSize  is the number of samples to process
6721    * @param[out] pResult    is output pointer
6722    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
6723    */
6724   void riscv_min_q31(
6725   const q31_t * pSrc,
6726         uint32_t blockSize,
6727         q31_t * pResult,
6728         uint32_t * pIndex);
6729 
6730 
6731   /**
6732    * @brief  Minimum value of a floating-point vector.
6733    * @param[in]  pSrc       is input pointer
6734    * @param[in]  blockSize  is the number of samples to process
6735    * @param[out] pResult    is output pointer
6736    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
6737    */
6738   void riscv_min_f32(
6739   const float32_t * pSrc,
6740         uint32_t blockSize,
6741         float32_t * pResult,
6742         uint32_t * pIndex);
6743 
6744 
6745 /**
6746  * @brief Maximum value of a Q7 vector.
6747  * @param[in]  pSrc       points to the input buffer
6748  * @param[in]  blockSize  length of the input vector
6749  * @param[out] pResult    maximum value returned here
6750  * @param[out] pIndex     index of maximum value returned here
6751  */
6752   void riscv_max_q7(
6753   const q7_t * pSrc,
6754         uint32_t blockSize,
6755         q7_t * pResult,
6756         uint32_t * pIndex);
6757 
6758 
6759 /**
6760  * @brief Maximum value of a Q15 vector.
6761  * @param[in]  pSrc       points to the input buffer
6762  * @param[in]  blockSize  length of the input vector
6763  * @param[out] pResult    maximum value returned here
6764  * @param[out] pIndex     index of maximum value returned here
6765  */
6766   void riscv_max_q15(
6767   const q15_t * pSrc,
6768         uint32_t blockSize,
6769         q15_t * pResult,
6770         uint32_t * pIndex);
6771 
6772 
6773 /**
6774  * @brief Maximum value of a Q31 vector.
6775  * @param[in]  pSrc       points to the input buffer
6776  * @param[in]  blockSize  length of the input vector
6777  * @param[out] pResult    maximum value returned here
6778  * @param[out] pIndex     index of maximum value returned here
6779  */
6780   void riscv_max_q31(
6781   const q31_t * pSrc,
6782         uint32_t blockSize,
6783         q31_t * pResult,
6784         uint32_t * pIndex);
6785 
6786 
6787 /**
6788  * @brief Maximum value of a floating-point vector.
6789  * @param[in]  pSrc       points to the input buffer
6790  * @param[in]  blockSize  length of the input vector
6791  * @param[out] pResult    maximum value returned here
6792  * @param[out] pIndex     index of maximum value returned here
6793  */
6794   void riscv_max_f32(
6795   const float32_t * pSrc,
6796         uint32_t blockSize,
6797         float32_t * pResult,
6798         uint32_t * pIndex);
6799 
6800 
6801   /**
6802    * @brief  Q15 complex-by-complex multiplication
6803    * @param[in]  pSrcA       points to the first input vector
6804    * @param[in]  pSrcB       points to the second input vector
6805    * @param[out] pDst        points to the output vector
6806    * @param[in]  numSamples  number of complex samples in each vector
6807    */
6808   void riscv_cmplx_mult_cmplx_q15(
6809   const q15_t * pSrcA,
6810   const q15_t * pSrcB,
6811         q15_t * pDst,
6812         uint32_t numSamples);
6813 
6814 
6815   /**
6816    * @brief  Q31 complex-by-complex multiplication
6817    * @param[in]  pSrcA       points to the first input vector
6818    * @param[in]  pSrcB       points to the second input vector
6819    * @param[out] pDst        points to the output vector
6820    * @param[in]  numSamples  number of complex samples in each vector
6821    */
6822   void riscv_cmplx_mult_cmplx_q31(
6823   const q31_t * pSrcA,
6824   const q31_t * pSrcB,
6825         q31_t * pDst,
6826         uint32_t numSamples);
6827 
6828 
6829   /**
6830    * @brief  Floating-point complex-by-complex multiplication
6831    * @param[in]  pSrcA       points to the first input vector
6832    * @param[in]  pSrcB       points to the second input vector
6833    * @param[out] pDst        points to the output vector
6834    * @param[in]  numSamples  number of complex samples in each vector
6835    */
6836   void riscv_cmplx_mult_cmplx_f32(
6837   const float32_t * pSrcA,
6838   const float32_t * pSrcB,
6839         float32_t * pDst,
6840         uint32_t numSamples);
6841 
6842 
6843   /**
6844    * @brief Converts the elements of the floating-point vector to Q31 vector.
6845    * @param[in]  pSrc       points to the floating-point input vector
6846    * @param[out] pDst       points to the Q31 output vector
6847    * @param[in]  blockSize  length of the input vector
6848    */
6849   void riscv_float_to_q31(
6850   const float32_t * pSrc,
6851         q31_t * pDst,
6852         uint32_t blockSize);
6853 
6854 
6855   /**
6856    * @brief Converts the elements of the floating-point vector to Q15 vector.
6857    * @param[in]  pSrc       points to the floating-point input vector
6858    * @param[out] pDst       points to the Q15 output vector
6859    * @param[in]  blockSize  length of the input vector
6860    */
6861   void riscv_float_to_q15(
6862   const float32_t * pSrc,
6863         q15_t * pDst,
6864         uint32_t blockSize);
6865 
6866 
6867   /**
6868    * @brief Converts the elements of the floating-point vector to Q7 vector.
6869    * @param[in]  pSrc       points to the floating-point input vector
6870    * @param[out] pDst       points to the Q7 output vector
6871    * @param[in]  blockSize  length of the input vector
6872    */
6873   void riscv_float_to_q7(
6874   const float32_t * pSrc,
6875         q7_t * pDst,
6876         uint32_t blockSize);
6877 
6878 
6879   /**
6880    * @brief  Converts the elements of the Q31 vector to floating-point vector.
6881    * @param[in]  pSrc       is input pointer
6882    * @param[out] pDst       is output pointer
6883    * @param[in]  blockSize  is the number of samples to process
6884    */
6885   void riscv_q31_to_float(
6886   const q31_t * pSrc,
6887         float32_t * pDst,
6888         uint32_t blockSize);
6889 
6890 
6891   /**
6892    * @brief  Converts the elements of the Q31 vector to Q15 vector.
6893    * @param[in]  pSrc       is input pointer
6894    * @param[out] pDst       is output pointer
6895    * @param[in]  blockSize  is the number of samples to process
6896    */
6897   void riscv_q31_to_q15(
6898   const q31_t * pSrc,
6899         q15_t * pDst,
6900         uint32_t blockSize);
6901 
6902 
6903   /**
6904    * @brief  Converts the elements of the Q31 vector to Q7 vector.
6905    * @param[in]  pSrc       is input pointer
6906    * @param[out] pDst       is output pointer
6907    * @param[in]  blockSize  is the number of samples to process
6908    */
6909   void riscv_q31_to_q7(
6910   const q31_t * pSrc,
6911         q7_t * pDst,
6912         uint32_t blockSize);
6913 
6914 
6915   /**
6916    * @brief  Converts the elements of the Q15 vector to floating-point vector.
6917    * @param[in]  pSrc       is input pointer
6918    * @param[out] pDst       is output pointer
6919    * @param[in]  blockSize  is the number of samples to process
6920    */
6921   void riscv_q15_to_float(
6922   const q15_t * pSrc,
6923         float32_t * pDst,
6924         uint32_t blockSize);
6925 
6926 
6927   /**
6928    * @brief  Converts the elements of the Q15 vector to Q31 vector.
6929    * @param[in]  pSrc       is input pointer
6930    * @param[out] pDst       is output pointer
6931    * @param[in]  blockSize  is the number of samples to process
6932    */
6933   void riscv_q15_to_q31(
6934   const q15_t * pSrc,
6935         q31_t * pDst,
6936         uint32_t blockSize);
6937 
6938 
6939   /**
6940    * @brief  Converts the elements of the Q15 vector to Q7 vector.
6941    * @param[in]  pSrc       is input pointer
6942    * @param[out] pDst       is output pointer
6943    * @param[in]  blockSize  is the number of samples to process
6944    */
6945   void riscv_q15_to_q7(
6946   const q15_t * pSrc,
6947         q7_t * pDst,
6948         uint32_t blockSize);
6949 
6950 
6951   /**
6952    * @brief  Converts the elements of the Q7 vector to floating-point vector.
6953    * @param[in]  pSrc       is input pointer
6954    * @param[out] pDst       is output pointer
6955    * @param[in]  blockSize  is the number of samples to process
6956    */
6957   void riscv_q7_to_float(
6958   const q7_t * pSrc,
6959         float32_t * pDst,
6960         uint32_t blockSize);
6961 
6962 
6963   /**
6964    * @brief  Converts the elements of the Q7 vector to Q31 vector.
6965    * @param[in]  pSrc       input pointer
6966    * @param[out] pDst       output pointer
6967    * @param[in]  blockSize  number of samples to process
6968    */
6969   void riscv_q7_to_q31(
6970   const q7_t * pSrc,
6971         q31_t * pDst,
6972         uint32_t blockSize);
6973 
6974 
6975   /**
6976    * @brief  Converts the elements of the Q7 vector to Q15 vector.
6977    * @param[in]  pSrc       input pointer
6978    * @param[out] pDst       output pointer
6979    * @param[in]  blockSize  number of samples to process
6980    */
6981   void riscv_q7_to_q15(
6982   const q7_t * pSrc,
6983         q15_t * pDst,
6984         uint32_t blockSize);
6985 
6986 
6987   /**
6988    * @ingroup groupInterpolation
6989    */
6990 
6991   /**
6992    * @defgroup BilinearInterpolate Bilinear Interpolation
6993    *
6994    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
6995    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
6996    * determines values between the grid points.
6997    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
6998    * Bilinear interpolation is often used in image processing to rescale images.
6999    * The NMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
7000    *
7001    * <b>Algorithm</b>
7002    * \par
7003    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
7004    * For floating-point, the instance structure is defined as:
7005    * <pre>
7006    *   typedef struct
7007    *   {
7008    *     uint16_t numRows;
7009    *     uint16_t numCols;
7010    *     float32_t *pData;
7011    * } riscv_bilinear_interp_instance_f32;
7012    * </pre>
7013    *
7014    * \par
7015    * where <code>numRows</code> specifies the number of rows in the table;
7016    * <code>numCols</code> specifies the number of columns in the table;
7017    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
7018    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
7019    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
7020    *
7021    * \par
7022    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
7023    * <pre>
7024    *     XF = floor(x)
7025    *     YF = floor(y)
7026    * </pre>
7027    * \par
7028    * The interpolated output point is computed as:
7029    * <pre>
7030    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
7031    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
7032    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
7033    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
7034    * </pre>
7035    * Note that the coordinates (x, y) contain integer and fractional components.
7036    * The integer components specify which portion of the table to use while the
7037    * fractional components control the interpolation processor.
7038    *
7039    * \par
7040    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
7041    */
7042 
7043 
7044   /**
7045    * @addtogroup BilinearInterpolate
7046    * @{
7047    */
7048 
7049   /**
7050   * @brief  Floating-point bilinear interpolation.
7051   * @param[in,out] S  points to an instance of the interpolation structure.
7052   * @param[in]     X  interpolation coordinate.
7053   * @param[in]     Y  interpolation coordinate.
7054   * @return out interpolated value.
7055   */
riscv_bilinear_interp_f32(const riscv_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)7056   __STATIC_FORCEINLINE float32_t riscv_bilinear_interp_f32(
7057   const riscv_bilinear_interp_instance_f32 * S,
7058   float32_t X,
7059   float32_t Y)
7060   {
7061     float32_t out;
7062     float32_t f00, f01, f10, f11;
7063     float32_t *pData = S->pData;
7064     int32_t xIndex, yIndex, index;
7065     float32_t xdiff, ydiff;
7066     float32_t b1, b2, b3, b4;
7067 
7068     xIndex = (int32_t) X;
7069     yIndex = (int32_t) Y;
7070 
7071     /* Care taken for table outside boundary */
7072     /* Returns zero output when values are outside table boundary */
7073     if (xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0 || yIndex > (S->numCols - 1))
7074     {
7075       return (0);
7076     }
7077 
7078     /* Calculation of index for two nearest points in X-direction */
7079     index = (xIndex - 1) + (yIndex - 1) * S->numCols;
7080 
7081 
7082     /* Read two nearest points in X-direction */
7083     f00 = pData[index];
7084     f01 = pData[index + 1];
7085 
7086     /* Calculation of index for two nearest points in Y-direction */
7087     index = (xIndex - 1) + (yIndex) * S->numCols;
7088 
7089 
7090     /* Read two nearest points in Y-direction */
7091     f10 = pData[index];
7092     f11 = pData[index + 1];
7093 
7094     /* Calculation of intermediate values */
7095     b1 = f00;
7096     b2 = f01 - f00;
7097     b3 = f10 - f00;
7098     b4 = f00 - f01 - f10 + f11;
7099 
7100     /* Calculation of fractional part in X */
7101     xdiff = X - xIndex;
7102 
7103     /* Calculation of fractional part in Y */
7104     ydiff = Y - yIndex;
7105 
7106     /* Calculation of bi-linear interpolated output */
7107     out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
7108 
7109     /* return to application */
7110     return (out);
7111   }
7112 
7113 
7114   /**
7115   * @brief  Q31 bilinear interpolation.
7116   * @param[in,out] S  points to an instance of the interpolation structure.
7117   * @param[in]     X  interpolation coordinate in 12.20 format.
7118   * @param[in]     Y  interpolation coordinate in 12.20 format.
7119   * @return out interpolated value.
7120   */
riscv_bilinear_interp_q31(riscv_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)7121   __STATIC_FORCEINLINE q31_t riscv_bilinear_interp_q31(
7122   riscv_bilinear_interp_instance_q31 * S,
7123   q31_t X,
7124   q31_t Y)
7125   {
7126     q31_t out;                                   /* Temporary output */
7127     q31_t acc = 0;                               /* output */
7128     q31_t xfract, yfract;                        /* X, Y fractional parts */
7129     q31_t x1, x2, y1, y2;                        /* Nearest output values */
7130     int32_t rI, cI;                              /* Row and column indices */
7131     q31_t *pYData = S->pData;                    /* pointer to output table values */
7132     uint32_t nCols = S->numCols;                 /* num of rows */
7133 
7134     /* Input is in 12.20 format */
7135     /* 12 bits for the table index */
7136     /* Index value calculation */
7137     rI = ((X & (q31_t)0xFFF00000) >> 20);
7138 
7139     /* Input is in 12.20 format */
7140     /* 12 bits for the table index */
7141     /* Index value calculation */
7142     cI = ((Y & (q31_t)0xFFF00000) >> 20);
7143 
7144     /* Care taken for table outside boundary */
7145     /* Returns zero output when values are outside table boundary */
7146     if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7147     {
7148       return (0);
7149     }
7150 
7151     /* 20 bits for the fractional part */
7152     /* shift left xfract by 11 to keep 1.31 format */
7153     xfract = (X & 0x000FFFFF) << 11U;
7154 
7155     /* Read two nearest output values from the index */
7156     x1 = pYData[(rI) + (int32_t)nCols * (cI)    ];
7157     x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1];
7158 
7159     /* 20 bits for the fractional part */
7160     /* shift left yfract by 11 to keep 1.31 format */
7161     yfract = (Y & 0x000FFFFF) << 11U;
7162 
7163     /* Read two nearest output values from the index */
7164     y1 = pYData[(rI) + (int32_t)nCols * (cI + 1)    ];
7165     y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1];
7166 
7167     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
7168     out = ((q31_t) (((q63_t) x1  * (0x7FFFFFFF - xfract)) >> 32));
7169     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
7170 
7171     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
7172     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
7173     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
7174 
7175     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
7176     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
7177     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7178 
7179     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
7180     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
7181     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7182 
7183     /* Convert acc to 1.31(q31) format */
7184     return ((q31_t)(acc << 2));
7185   }
7186 
7187 
7188   /**
7189   * @brief  Q15 bilinear interpolation.
7190   * @param[in,out] S  points to an instance of the interpolation structure.
7191   * @param[in]     X  interpolation coordinate in 12.20 format.
7192   * @param[in]     Y  interpolation coordinate in 12.20 format.
7193   * @return out interpolated value.
7194   */
riscv_bilinear_interp_q15(riscv_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)7195   __STATIC_FORCEINLINE q15_t riscv_bilinear_interp_q15(
7196   riscv_bilinear_interp_instance_q15 * S,
7197   q31_t X,
7198   q31_t Y)
7199   {
7200     q63_t acc = 0;                               /* output */
7201     q31_t out;                                   /* Temporary output */
7202     q15_t x1, x2, y1, y2;                        /* Nearest output values */
7203     q31_t xfract, yfract;                        /* X, Y fractional parts */
7204     int32_t rI, cI;                              /* Row and column indices */
7205     q15_t *pYData = S->pData;                    /* pointer to output table values */
7206     uint32_t nCols = S->numCols;                 /* num of rows */
7207 
7208     /* Input is in 12.20 format */
7209     /* 12 bits for the table index */
7210     /* Index value calculation */
7211     rI = ((X & (q31_t)0xFFF00000) >> 20);
7212 
7213     /* Input is in 12.20 format */
7214     /* 12 bits for the table index */
7215     /* Index value calculation */
7216     cI = ((Y & (q31_t)0xFFF00000) >> 20);
7217 
7218     /* Care taken for table outside boundary */
7219     /* Returns zero output when values are outside table boundary */
7220     if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7221     {
7222       return (0);
7223     }
7224 
7225     /* 20 bits for the fractional part */
7226     /* xfract should be in 12.20 format */
7227     xfract = (X & 0x000FFFFF);
7228 
7229     /* Read two nearest output values from the index */
7230     x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
7231     x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
7232 
7233     /* 20 bits for the fractional part */
7234     /* yfract should be in 12.20 format */
7235     yfract = (Y & 0x000FFFFF);
7236 
7237     /* Read two nearest output values from the index */
7238     y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
7239     y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
7240 
7241     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
7242 
7243     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
7244     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
7245     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4U);
7246     acc = ((q63_t) out * (0xFFFFF - yfract));
7247 
7248     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
7249     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4U);
7250     acc += ((q63_t) out * (xfract));
7251 
7252     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
7253     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4U);
7254     acc += ((q63_t) out * (yfract));
7255 
7256     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
7257     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4U);
7258     acc += ((q63_t) out * (yfract));
7259 
7260     /* acc is in 13.51 format and down shift acc by 36 times */
7261     /* Convert out to 1.15 format */
7262     return ((q15_t)(acc >> 36));
7263   }
7264 
7265 
7266   /**
7267   * @brief  Q7 bilinear interpolation.
7268   * @param[in,out] S  points to an instance of the interpolation structure.
7269   * @param[in]     X  interpolation coordinate in 12.20 format.
7270   * @param[in]     Y  interpolation coordinate in 12.20 format.
7271   * @return out interpolated value.
7272   */
riscv_bilinear_interp_q7(riscv_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)7273   __STATIC_FORCEINLINE q7_t riscv_bilinear_interp_q7(
7274   riscv_bilinear_interp_instance_q7 * S,
7275   q31_t X,
7276   q31_t Y)
7277   {
7278     q63_t acc = 0;                               /* output */
7279     q31_t out;                                   /* Temporary output */
7280     q31_t xfract, yfract;                        /* X, Y fractional parts */
7281     q7_t x1, x2, y1, y2;                         /* Nearest output values */
7282     int32_t rI, cI;                              /* Row and column indices */
7283     q7_t *pYData = S->pData;                     /* pointer to output table values */
7284     uint32_t nCols = S->numCols;                 /* num of rows */
7285 
7286     /* Input is in 12.20 format */
7287     /* 12 bits for the table index */
7288     /* Index value calculation */
7289     rI = ((X & (q31_t)0xFFF00000) >> 20);
7290 
7291     /* Input is in 12.20 format */
7292     /* 12 bits for the table index */
7293     /* Index value calculation */
7294     cI = ((Y & (q31_t)0xFFF00000) >> 20);
7295 
7296     /* Care taken for table outside boundary */
7297     /* Returns zero output when values are outside table boundary */
7298     if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7299     {
7300       return (0);
7301     }
7302 
7303     /* 20 bits for the fractional part */
7304     /* xfract should be in 12.20 format */
7305     xfract = (X & (q31_t)0x000FFFFF);
7306 
7307     /* Read two nearest output values from the index */
7308     x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
7309     x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
7310 
7311     /* 20 bits for the fractional part */
7312     /* yfract should be in 12.20 format */
7313     yfract = (Y & (q31_t)0x000FFFFF);
7314 
7315     /* Read two nearest output values from the index */
7316     y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
7317     y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
7318 
7319     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
7320     out = ((x1 * (0xFFFFF - xfract)));
7321     acc = (((q63_t) out * (0xFFFFF - yfract)));
7322 
7323     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
7324     out = ((x2 * (0xFFFFF - yfract)));
7325     acc += (((q63_t) out * (xfract)));
7326 
7327     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
7328     out = ((y1 * (0xFFFFF - xfract)));
7329     acc += (((q63_t) out * (yfract)));
7330 
7331     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
7332     out = ((y2 * (yfract)));
7333     acc += (((q63_t) out * (xfract)));
7334 
7335     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
7336     return ((q7_t)(acc >> 40));
7337   }
7338 
7339   /**
7340    * @} end of BilinearInterpolate group
7341    */
7342 
7343 
7344 /* SMMLAR */
7345 #define multAcc_32x32_keep32_R(a, x, y) \
7346     a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
7347 
7348 /* SMMLSR */
7349 #define multSub_32x32_keep32_R(a, x, y) \
7350     a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
7351 
7352 /* SMMULR */
7353 #define mult_32x32_keep32_R(a, x, y) \
7354     a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
7355 
7356 /* SMMLA */
7357 #define multAcc_32x32_keep32(a, x, y) \
7358     a += (q31_t) (((q63_t) x * y) >> 32)
7359 
7360 /* SMMLS */
7361 #define multSub_32x32_keep32(a, x, y) \
7362     a -= (q31_t) (((q63_t) x * y) >> 32)
7363 
7364 /* SMMUL */
7365 #define mult_32x32_keep32(a, x, y) \
7366     a = (q31_t) (((q63_t) x * y ) >> 32)
7367 
7368 
7369 #define LOW_OPTIMIZATION_ENTER \
7370    __attribute__(( optimize("-O1") ))
7371 #define LOW_OPTIMIZATION_EXIT
7372 #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7373 #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7374 
7375 
7376 #ifdef   __cplusplus
7377 }
7378 #endif
7379 
7380 
7381 #endif /* _RISCV_MATH_H */
7382 
7383 /**
7384  *
7385  * End of file.
7386  */
7387