• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  * @file     arm_math.h
3  * @brief    Public header file for CMSIS DSP Library
4  * @version  V1.7.0
5  * @date     18. March 2019
6  ******************************************************************************/
7 /*
8  * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
9  *
10  * SPDX-License-Identifier: Apache-2.0
11  *
12  * Licensed under the Apache License, Version 2.0 (the License); you may
13  * not use this file except in compliance with the License.
14  * You may obtain a copy of the License at
15  *
16  * www.apache.org/licenses/LICENSE-2.0
17  *
18  * Unless required by applicable law or agreed to in writing, software
19  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
20  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  * See the License for the specific language governing permissions and
22  * limitations under the License.
23  */
24 
25 /**
26    \mainpage CMSIS DSP Software Library
27    *
28    * Introduction
29    * ------------
30    *
31    * This user manual describes the CMSIS DSP software library,
32    * a suite of common signal processing functions for use on Cortex-M and Cortex-A processor
33    * based devices.
34    *
35    * The library is divided into a number of functions each covering a specific category:
36    * - Basic math functions
37    * - Fast math functions
38    * - Complex math functions
39    * - Filtering functions
40    * - Matrix functions
41    * - Transform functions
42    * - Motor control functions
43    * - Statistical functions
44    * - Support functions
45    * - Interpolation functions
46    * - Support Vector Machine functions (SVM)
47    * - Bayes classifier functions
48    * - Distance functions
49    *
50    * The library has generally separate functions for operating on 8-bit integers, 16-bit integers,
51    * 32-bit integer and 32-bit floating-point values.
52    *
53    * Using the Library
54    * ------------
55    *
56    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
57    *
58    * Here is the list of pre-built libraries :
59    * - arm_cortexM7lfdp_math.lib (Cortex-M7, Little endian, Double Precision Floating Point Unit)
60    * - arm_cortexM7bfdp_math.lib (Cortex-M7, Big endian, Double Precision Floating Point Unit)
61    * - arm_cortexM7lfsp_math.lib (Cortex-M7, Little endian, Single Precision Floating Point Unit)
62    * - arm_cortexM7bfsp_math.lib (Cortex-M7, Big endian and Single Precision Floating Point Unit on)
63    * - arm_cortexM7l_math.lib (Cortex-M7, Little endian)
64    * - arm_cortexM7b_math.lib (Cortex-M7, Big endian)
65    * - arm_cortexM4lf_math.lib (Cortex-M4, Little endian, Floating Point Unit)
66    * - arm_cortexM4bf_math.lib (Cortex-M4, Big endian, Floating Point Unit)
67    * - arm_cortexM4l_math.lib (Cortex-M4, Little endian)
68    * - arm_cortexM4b_math.lib (Cortex-M4, Big endian)
69    * - arm_cortexM3l_math.lib (Cortex-M3, Little endian)
70    * - arm_cortexM3b_math.lib (Cortex-M3, Big endian)
71    * - arm_cortexM0l_math.lib (Cortex-M0 / Cortex-M0+, Little endian)
72    * - arm_cortexM0b_math.lib (Cortex-M0 / Cortex-M0+, Big endian)
73    * - arm_ARMv8MBLl_math.lib (Armv8-M Baseline, Little endian)
74    * - arm_ARMv8MMLl_math.lib (Armv8-M Mainline, Little endian)
75    * - arm_ARMv8MMLlfsp_math.lib (Armv8-M Mainline, Little endian, Single Precision Floating Point Unit)
76    * - arm_ARMv8MMLld_math.lib (Armv8-M Mainline, Little endian, DSP instructions)
77    * - arm_ARMv8MMLldfsp_math.lib (Armv8-M Mainline, Little endian, DSP instructions, Single Precision Floating Point Unit)
78    *
79    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
80    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
81    * public header file <code> arm_math.h</code> for Cortex-M cores with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
82    *
83    *
84    * Examples
85    * --------
86    *
87    * The library ships with a number of examples which demonstrate how to use the library functions.
88    *
89    * Toolchain Support
90    * ------------
91    *
92    * The library is now tested on Fast Models building with cmake.
93    * Core M0, M7, A5 are tested.
94    *
95    *
96    *
97    * Building the Library
98    * ------------
99    *
100    * The library installer contains a project file to rebuild libraries on MDK toolchain in the <code>CMSIS\\DSP\\Projects\\ARM</code> folder.
101    * - arm_cortexM_math.uvprojx
102    *
103    *
104    * The libraries can be built by opening the arm_cortexM_math.uvprojx project in MDK-ARM, selecting a specific target, and defining the optional preprocessor macros detailed above.
105    *
106    * There is also a work in progress cmake build. The README file is giving more details.
107    *
108    * Preprocessor Macros
109    * ------------
110    *
111    * Each library project have different preprocessor macros.
112    *
113    * - ARM_MATH_BIG_ENDIAN:
114    *
115    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
116    *
117    * - ARM_MATH_MATRIX_CHECK:
118    *
119    * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
120    *
121    * - ARM_MATH_ROUNDING:
122    *
123    * Define macro ARM_MATH_ROUNDING for rounding on support functions
124    *
125    * - ARM_MATH_LOOPUNROLL:
126    *
127    * Define macro ARM_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions
128    *
129    * - ARM_MATH_NEON:
130    *
131    * Define macro ARM_MATH_NEON to enable Neon versions of the DSP functions.
132    * It is not enabled by default when Neon is available because performances are
133    * dependent on the compiler and target architecture.
134    *
135    * - ARM_MATH_NEON_EXPERIMENTAL:
136    *
137    * Define macro ARM_MATH_NEON_EXPERIMENTAL to enable experimental Neon versions of
138    * of some DSP functions. Experimental Neon versions currently do not have better
139    * performances than the scalar versions.
140    *
141    * - ARM_MATH_HELIUM:
142    *
143    * It implies the flags ARM_MATH_MVEF and ARM_MATH_MVEI and ARM_MATH_FLOAT16.
144    *
145    * - ARM_MATH_MVEF:
146    *
147    * Select Helium versions of the f32 algorithms.
148    * It implies ARM_MATH_FLOAT16 and ARM_MATH_MVEI.
149    *
150    * - ARM_MATH_MVEI:
151    *
152    * Select Helium versions of the int and fixed point algorithms.
153    *
154    * - ARM_MATH_FLOAT16:
155    *
156    * Float16 implementations of some algorithms (Requires MVE extension).
157    *
158    * <hr>
159    * CMSIS-DSP in ARM::CMSIS Pack
160    * -----------------------------
161    *
162    * The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
163    * |File/Folder                      |Content                                                                 |
164    * |---------------------------------|------------------------------------------------------------------------|
165    * |\b CMSIS\\Documentation\\DSP     | This documentation                                                     |
166    * |\b CMSIS\\DSP\\DSP_Lib_TestSuite | DSP_Lib test suite                                                     |
167    * |\b CMSIS\\DSP\\Examples          | Example projects demonstrating the usage of the library functions      |
168    * |\b CMSIS\\DSP\\Include           | DSP_Lib include files                                                  |
169    * |\b CMSIS\\DSP\\Lib               | DSP_Lib binaries                                                       |
170    * |\b CMSIS\\DSP\\Projects          | Projects to rebuild DSP_Lib binaries                                   |
171    * |\b CMSIS\\DSP\\Source            | DSP_Lib source files                                                   |
172    *
173    * <hr>
174    * Revision History of CMSIS-DSP
175    * ------------
176    * Please refer to \ref ChangeLog_pg.
177    */
178 
179 
180 /**
181  * @defgroup groupMath Basic Math Functions
182  */
183 
184 /**
185  * @defgroup groupFastMath Fast Math Functions
186  * This set of functions provides a fast approximation to sine, cosine, and square root.
187  * As compared to most of the other functions in the CMSIS math library, the fast math functions
188  * operate on individual values and not arrays.
189  * There are separate functions for Q15, Q31, and floating-point data.
190  *
191  */
192 
193 /**
194  * @defgroup groupCmplxMath Complex Math Functions
195  * This set of functions operates on complex data vectors.
196  * The data in the complex arrays is stored in an interleaved fashion
197  * (real, imag, real, imag, ...).
198  * In the API functions, the number of samples in a complex array refers
199  * to the number of complex values; the array contains twice this number of
200  * real values.
201  */
202 
203 /**
204  * @defgroup groupFilters Filtering Functions
205  */
206 
207 /**
208  * @defgroup groupMatrix Matrix Functions
209  *
210  * This set of functions provides basic matrix math operations.
211  * The functions operate on matrix data structures.  For example,
212  * the type
213  * definition for the floating-point matrix structure is shown
214  * below:
215  * <pre>
216  *     typedef struct
217  *     {
218  *       uint16_t numRows;     // number of rows of the matrix.
219  *       uint16_t numCols;     // number of columns of the matrix.
220  *       float32_t *pData;     // points to the data of the matrix.
221  *     } arm_matrix_instance_f32;
222  * </pre>
223  * There are similar definitions for Q15 and Q31 data types.
224  *
225  * The structure specifies the size of the matrix and then points to
226  * an array of data.  The array is of size <code>numRows X numCols</code>
227  * and the values are arranged in row order.  That is, the
228  * matrix element (i, j) is stored at:
229  * <pre>
230  *     pData[i*numCols + j]
231  * </pre>
232  *
233  * \par Init Functions
234  * There is an associated initialization function for each type of matrix
235  * data structure.
236  * The initialization function sets the values of the internal structure fields.
237  * Refer to \ref arm_mat_init_f32(), \ref arm_mat_init_q31() and \ref arm_mat_init_q15()
238  * for floating-point, Q31 and Q15 types,  respectively.
239  *
240  * \par
241  * Use of the initialization function is optional. However, if initialization function is used
242  * then the instance structure cannot be placed into a const data section.
243  * To place the instance structure in a const data
244  * section, manually initialize the data structure.  For example:
245  * <pre>
246  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
247  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
248  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
249  * </pre>
250  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
251  * specifies the number of columns, and <code>pData</code> points to the
252  * data array.
253  *
254  * \par Size Checking
255  * By default all of the matrix functions perform size checking on the input and
256  * output matrices. For example, the matrix addition function verifies that the
257  * two input matrices and the output matrix all have the same number of rows and
258  * columns. If the size check fails the functions return:
259  * <pre>
260  *     ARM_MATH_SIZE_MISMATCH
261  * </pre>
262  * Otherwise the functions return
263  * <pre>
264  *     ARM_MATH_SUCCESS
265  * </pre>
266  * There is some overhead associated with this matrix size checking.
267  * The matrix size checking is enabled via the \#define
268  * <pre>
269  *     ARM_MATH_MATRIX_CHECK
270  * </pre>
271  * within the library project settings.  By default this macro is defined
272  * and size checking is enabled. By changing the project settings and
273  * undefining this macro size checking is eliminated and the functions
274  * run a bit faster. With size checking disabled the functions always
275  * return <code>ARM_MATH_SUCCESS</code>.
276  */
277 
278 /**
279  * @defgroup groupTransforms Transform Functions
280  */
281 
282 /**
283  * @defgroup groupController Controller Functions
284  */
285 
286 /**
287  * @defgroup groupStats Statistics Functions
288  */
289 
290 /**
291  * @defgroup groupSupport Support Functions
292  */
293 
294 /**
295  * @defgroup groupInterpolation Interpolation Functions
296  * These functions perform 1- and 2-dimensional interpolation of data.
297  * Linear interpolation is used for 1-dimensional data and
298  * bilinear interpolation is used for 2-dimensional data.
299  */
300 
301 /**
302  * @defgroup groupExamples Examples
303  */
304 
305 /**
306  * @defgroup groupSVM SVM Functions
307  * This set of functions is implementing SVM classification on 2 classes.
308  * The training must be done from scikit-learn. The parameters can be easily
309  * generated from the scikit-learn object. Some examples are given in
310  * DSP/Testing/PatternGeneration/SVM.py
311  *
312  * If more than 2 classes are needed, the functions in this folder
313  * will have to be used, as building blocks, to do multi-class classification.
314  *
315  * No multi-class classification is provided in this SVM folder.
316  *
317  */
318 
319 
320 /**
321  * @defgroup groupBayes Bayesian estimators
322  *
323  * Implement the naive gaussian Bayes estimator.
324  * The training must be done from scikit-learn.
325  *
326  * The parameters can be easily
327  * generated from the scikit-learn object. Some examples are given in
328  * DSP/Testing/PatternGeneration/Bayes.py
329  */
330 
331 /**
332  * @defgroup groupDistance Distance functions
333  *
334  * Distance functions for use with clustering algorithms.
335  * There are distance functions for float vectors and boolean vectors.
336  *
337  */
338 
339 
340 #ifndef _ARM_MATH_H
341 #define _ARM_MATH_H
342 
343 #ifdef   __cplusplus
344 extern "C"
345 {
346 #endif
347 
348 /* Compiler specific diagnostic adjustment */
349 #if   defined ( __CC_ARM )
350 
351 #elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
352 
353 #elif defined ( __GNUC__ )
354   #pragma GCC diagnostic push
355   #pragma GCC diagnostic ignored "-Wsign-conversion"
356   #pragma GCC diagnostic ignored "-Wconversion"
357   #pragma GCC diagnostic ignored "-Wunused-parameter"
358 
359 #elif defined ( __ICCARM__ )
360 
361 #elif defined ( __TI_ARM__ )
362 
363 #elif defined ( __CSMC__ )
364 
365 #elif defined ( __TASKING__ )
366 
367 #elif defined ( _MSC_VER )
368 
369 #else
370   #error Unknown compiler
371 #endif
372 
373 
374 /* Included for instrinsics definitions */
375 #if defined (_MSC_VER )
376 #include <stdint.h>
377 #define __STATIC_FORCEINLINE static __forceinline
378 #define __STATIC_INLINE static __inline
379 #define __ALIGNED(x) __declspec(align(x))
380 
381 #elif defined (__GNUC_PYTHON__)
382 #include <stdint.h>
383 #define  __ALIGNED(x) __attribute__((aligned(x)))
384 #define __STATIC_FORCEINLINE static __attribute__((inline))
385 #define __STATIC_INLINE static __attribute__((inline))
386 #pragma GCC diagnostic ignored "-Wunused-function"
387 #pragma GCC diagnostic ignored "-Wattributes"
388 
389 #else
390 #include "cmsis_compiler.h"
391 #endif
392 
393 
394 
395 #include <string.h>
396 #include <math.h>
397 #include <float.h>
398 #include <limits.h>
399 
400 
401 #define F64_MAX   ((float64_t)DBL_MAX)
402 #define F32_MAX   ((float32_t)FLT_MAX)
403 
404 #if defined(ARM_MATH_FLOAT16)
405 #define F16_MAX   ((float16_t)FLT_MAX)
406 #endif
407 
408 #define F64_MIN   (-DBL_MAX)
409 #define F32_MIN   (-FLT_MAX)
410 
411 #if defined(ARM_MATH_FLOAT16)
412 #define F16_MIN   (-(float16_t)FLT_MAX)
413 #endif
414 
415 #define F64_ABSMAX   ((float64_t)DBL_MAX)
416 #define F32_ABSMAX   ((float32_t)FLT_MAX)
417 
418 #if defined(ARM_MATH_FLOAT16)
419 #define F16_ABSMAX   ((float16_t)FLT_MAX)
420 #endif
421 
422 #define F64_ABSMIN   ((float64_t)0.0)
423 #define F32_ABSMIN   ((float32_t)0.0)
424 
425 #if defined(ARM_MATH_FLOAT16)
426 #define F16_ABSMIN   ((float16_t)0.0)
427 #endif
428 
429 #define Q31_MAX   ((q31_t)(0x7FFFFFFFL))
430 #define Q15_MAX   ((q15_t)(0x7FFF))
431 #define Q7_MAX    ((q7_t)(0x7F))
432 #define Q31_MIN   ((q31_t)(0x80000000L))
433 #define Q15_MIN   ((q15_t)(0x8000))
434 #define Q7_MIN    ((q7_t)(0x80))
435 
436 #define Q31_ABSMAX   ((q31_t)(0x7FFFFFFFL))
437 #define Q15_ABSMAX   ((q15_t)(0x7FFF))
438 #define Q7_ABSMAX    ((q7_t)(0x7F))
439 #define Q31_ABSMIN   ((q31_t)0)
440 #define Q15_ABSMIN   ((q15_t)0)
441 #define Q7_ABSMIN    ((q7_t)0)
442 
443 /* evaluate ARM DSP feature */
444 #if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
445   #define ARM_MATH_DSP                   1
446 #endif
447 
448 #if defined(ARM_MATH_NEON)
449 #include <arm_neon.h>
450 #endif
451 
452 #if defined (ARM_MATH_HELIUM)
453   #define ARM_MATH_MVEF
454   #define ARM_MATH_FLOAT16
455 #endif
456 
457 #if defined (ARM_MATH_MVEF)
458   #define ARM_MATH_MVEI
459   #define ARM_MATH_FLOAT16
460 #endif
461 
462 #if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)
463 #include <arm_mve.h>
464 #endif
465 
466 
467   /**
468    * @brief Macros required for reciprocal calculation in Normalized LMS
469    */
470 
471 #define DELTA_Q31          ((q31_t)(0x100))
472 #define DELTA_Q15          ((q15_t)0x5)
473 #define INDEX_MASK         0x0000003F
474 #ifndef PI
475   #define PI               3.14159265358979f
476 #endif
477 
478   /**
479    * @brief Macros required for SINE and COSINE Fast math approximations
480    */
481 
482 #define FAST_MATH_TABLE_SIZE  512
483 #define FAST_MATH_Q31_SHIFT   (32 - 10)
484 #define FAST_MATH_Q15_SHIFT   (16 - 10)
485 #define CONTROLLER_Q31_SHIFT  (32 - 9)
486 #define TABLE_SPACING_Q31     0x400000
487 #define TABLE_SPACING_Q15     0x80
488 
489   /**
490    * @brief Macros required for SINE and COSINE Controller functions
491    */
492   /* 1.31(q31) Fixed value of 2/360 */
493   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
494 #define INPUT_SPACING         0xB60B61
495 
496   /**
497    * @brief Macros for complex numbers
498    */
499 
500   /* Dimension C vector space */
501   #define CMPLX_DIM 2
502 
503   /**
504    * @brief Error status returned by some functions in the library.
505    */
506 
507   typedef enum
508   {
509     ARM_MATH_SUCCESS        =  0,        /**< No error */
510     ARM_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
511     ARM_MATH_LENGTH_ERROR   = -2,        /**< Length of data buffer is incorrect */
512     ARM_MATH_SIZE_MISMATCH  = -3,        /**< Size of matrices is not compatible with the operation */
513     ARM_MATH_NANINF         = -4,        /**< Not-a-number (NaN) or infinity is generated */
514     ARM_MATH_SINGULAR       = -5,        /**< Input matrix is singular and cannot be inverted */
515     ARM_MATH_TEST_FAILURE   = -6         /**< Test Failed */
516   } arm_status;
517 
518   /**
519    * @brief 8-bit fractional data type in 1.7 format.
520    */
521   typedef int8_t q7_t;
522 
523   /**
524    * @brief 16-bit fractional data type in 1.15 format.
525    */
526   typedef int16_t q15_t;
527 
528   /**
529    * @brief 32-bit fractional data type in 1.31 format.
530    */
531   typedef int32_t q31_t;
532 
533   /**
534    * @brief 64-bit fractional data type in 1.63 format.
535    */
536   typedef int64_t q63_t;
537 
538   /**
539    * @brief 32-bit floating-point type definition.
540    */
541   typedef float float32_t;
542 
543   /**
544    * @brief 64-bit floating-point type definition.
545    */
546   typedef double float64_t;
547 
548   /**
549    * @brief vector types
550    */
551 #if defined(ARM_MATH_NEON) || defined (ARM_MATH_MVEI)
552   /**
553    * @brief 64-bit fractional 128-bit vector data type in 1.63 format
554    */
555   typedef int64x2_t q63x2_t;
556 
557   /**
558    * @brief 32-bit fractional 128-bit vector data type in 1.31 format.
559    */
560   typedef int32x4_t q31x4_t;
561 
562   /**
563    * @brief 16-bit fractional 128-bit vector data type with 16-bit alignement in 1.15 format.
564    */
565   typedef __ALIGNED(2) int16x8_t q15x8_t;
566 
567  /**
568    * @brief 8-bit fractional 128-bit vector data type with 8-bit alignement in 1.7 format.
569    */
570   typedef __ALIGNED(1) int8x16_t q7x16_t;
571 
572     /**
573    * @brief 32-bit fractional 128-bit vector pair data type in 1.31 format.
574    */
575   typedef int32x4x2_t q31x4x2_t;
576 
577   /**
578    * @brief 32-bit fractional 128-bit vector quadruplet data type in 1.31 format.
579    */
580   typedef int32x4x4_t q31x4x4_t;
581 
582   /**
583    * @brief 16-bit fractional 128-bit vector pair data type in 1.15 format.
584    */
585   typedef int16x8x2_t q15x8x2_t;
586 
587   /**
588    * @brief 16-bit fractional 128-bit vector quadruplet data type in 1.15 format.
589    */
590   typedef int16x8x4_t q15x8x4_t;
591 
592   /**
593    * @brief 8-bit fractional 128-bit vector pair data type in 1.7 format.
594    */
595   typedef int8x16x2_t q7x16x2_t;
596 
597   /**
598    * @brief 8-bit fractional 128-bit vector quadruplet data type in 1.7 format.
599    */
600    typedef int8x16x4_t q7x16x4_t;
601 
602   /**
603    * @brief 32-bit fractional data type in 9.23 format.
604    */
605   typedef int32_t q23_t;
606 
607   /**
608    * @brief 32-bit fractional 128-bit vector data type in 9.23 format.
609    */
610   typedef int32x4_t q23x4_t;
611 
612   /**
613    * @brief 64-bit status 128-bit vector data type.
614    */
615   typedef int64x2_t status64x2_t;
616 
617   /**
618    * @brief 32-bit status 128-bit vector data type.
619    */
620   typedef int32x4_t status32x4_t;
621 
622   /**
623    * @brief 16-bit status 128-bit vector data type.
624    */
625   typedef int16x8_t status16x8_t;
626 
627   /**
628    * @brief 8-bit status 128-bit vector data type.
629    */
630   typedef int8x16_t status8x16_t;
631 
632 
633 #endif
634 
635 #if defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF) /* floating point vector*/
636   /**
637    * @brief 32-bit floating-point 128-bit vector type
638    */
639   typedef float32x4_t f32x4_t;
640 
641 #if defined(ARM_MATH_FLOAT16)
642   /**
643    * @brief 16-bit floating-point 128-bit vector data type
644    */
645   typedef __ALIGNED(2) float16x8_t f16x8_t;
646 #endif
647 
648   /**
649    * @brief 32-bit floating-point 128-bit vector pair data type
650    */
651   typedef float32x4x2_t f32x4x2_t;
652 
653   /**
654    * @brief 32-bit floating-point 128-bit vector quadruplet data type
655    */
656   typedef float32x4x4_t f32x4x4_t;
657 
658 #if defined(ARM_MATH_FLOAT16)
659   /**
660    * @brief 16-bit floating-point 128-bit vector pair data type
661    */
662   typedef float16x8x2_t f16x8x2_t;
663 
664   /**
665    * @brief 16-bit floating-point 128-bit vector quadruplet data type
666    */
667   typedef float16x8x4_t f16x8x4_t;
668 #endif
669 
670   /**
671    * @brief 32-bit ubiquitous 128-bit vector data type
672    */
673   typedef union _any32x4_t
674   {
675       float32x4_t     f;
676       int32x4_t       i;
677   } any32x4_t;
678 
679 #if defined(ARM_MATH_FLOAT16)
680   /**
681    * @brief 16-bit ubiquitous 128-bit vector data type
682    */
683   typedef union _any16x8_t
684   {
685       float16x8_t     f;
686       int16x8_t       i;
687   } any16x8_t;
688 #endif
689 
690 #endif
691 
692 #if defined(ARM_MATH_NEON)
693   /**
694    * @brief 32-bit fractional 64-bit vector data type in 1.31 format.
695    */
696   typedef int32x2_t  q31x2_t;
697 
698   /**
699    * @brief 16-bit fractional 64-bit vector data type in 1.15 format.
700    */
701   typedef  __ALIGNED(2) int16x4_t q15x4_t;
702 
703   /**
704    * @brief 8-bit fractional 64-bit vector data type in 1.7 format.
705    */
706   typedef  __ALIGNED(1) int8x8_t q7x8_t;
707 
708   /**
709    * @brief 32-bit float 64-bit vector data type.
710    */
711   typedef float32x2_t  f32x2_t;
712 
713 #if defined(ARM_MATH_FLOAT16)
714   /**
715    * @brief 16-bit float 64-bit vector data type.
716    */
717   typedef  __ALIGNED(2) float16x4_t f16x4_t;
718 #endif
719 
720   /**
721    * @brief 32-bit floating-point 128-bit vector triplet data type
722    */
723   typedef float32x4x3_t f32x4x3_t;
724 
725 #if defined(ARM_MATH_FLOAT16)
726   /**
727    * @brief 16-bit floating-point 128-bit vector triplet data type
728    */
729   typedef float16x8x3_t f16x8x3_t;
730 #endif
731 
732   /**
733    * @brief 32-bit fractional 128-bit vector triplet data type in 1.31 format
734    */
735   typedef int32x4x3_t q31x4x3_t;
736 
737   /**
738    * @brief 16-bit fractional 128-bit vector triplet data type in 1.15 format
739    */
740   typedef int16x8x3_t q15x8x3_t;
741 
742   /**
743    * @brief 8-bit fractional 128-bit vector triplet data type in 1.7 format
744    */
745   typedef int8x16x3_t q7x16x3_t;
746 
747   /**
748    * @brief 32-bit floating-point 64-bit vector pair data type
749    */
750   typedef float32x2x2_t f32x2x2_t;
751 
752   /**
753    * @brief 32-bit floating-point 64-bit vector triplet data type
754    */
755   typedef float32x2x3_t f32x2x3_t;
756 
757   /**
758    * @brief 32-bit floating-point 64-bit vector quadruplet data type
759    */
760   typedef float32x2x4_t f32x2x4_t;
761 
762 #if defined(ARM_MATH_FLOAT16)
763   /**
764    * @brief 16-bit floating-point 64-bit vector pair data type
765    */
766   typedef float16x4x2_t f16x4x2_t;
767 
768   /**
769    * @brief 16-bit floating-point 64-bit vector triplet data type
770    */
771   typedef float16x4x3_t f16x4x3_t;
772 
773   /**
774    * @brief 16-bit floating-point 64-bit vector quadruplet data type
775    */
776   typedef float16x4x4_t f16x4x4_t;
777 #endif
778 
779   /**
780    * @brief 32-bit fractional 64-bit vector pair data type in 1.31 format
781    */
782   typedef int32x2x2_t q31x2x2_t;
783 
784   /**
785    * @brief 32-bit fractional 64-bit vector triplet data type in 1.31 format
786    */
787   typedef int32x2x3_t q31x2x3_t;
788 
789   /**
790    * @brief 32-bit fractional 64-bit vector quadruplet data type in 1.31 format
791    */
792   typedef int32x4x3_t q31x2x4_t;
793 
794   /**
795    * @brief 16-bit fractional 64-bit vector pair data type in 1.15 format
796    */
797   typedef int16x4x2_t q15x4x2_t;
798 
799   /**
800    * @brief 16-bit fractional 64-bit vector triplet data type in 1.15 format
801    */
802   typedef int16x4x2_t q15x4x3_t;
803 
804   /**
805    * @brief 16-bit fractional 64-bit vector quadruplet data type in 1.15 format
806    */
807   typedef int16x4x3_t q15x4x4_t;
808 
809   /**
810    * @brief 8-bit fractional 64-bit vector pair data type in 1.7 format
811    */
812   typedef int8x8x2_t q7x8x2_t;
813 
814   /**
815    * @brief 8-bit fractional 64-bit vector triplet data type in 1.7 format
816    */
817   typedef int8x8x3_t q7x8x3_t;
818 
819   /**
820    * @brief 8-bit fractional 64-bit vector quadruplet data type in 1.7 format
821    */
822   typedef int8x8x4_t q7x8x4_t;
823 
824   /**
825    * @brief 32-bit ubiquitous 64-bit vector data type
826    */
827   typedef union _any32x2_t
828   {
829       float32x2_t     f;
830       int32x2_t       i;
831   } any32x2_t;
832 
833 #if defined(ARM_MATH_FLOAT16)
834   /**
835    * @brief 16-bit ubiquitous 64-bit vector data type
836    */
837   typedef union _any16x4_t
838   {
839       float16x4_t     f;
840       int16x4_t       i;
841   } any16x4_t;
842 #endif
843 
844   /**
845    * @brief 32-bit status 64-bit vector data type.
846    */
847   typedef int32x4_t status32x2_t;
848 
849   /**
850    * @brief 16-bit status 64-bit vector data type.
851    */
852   typedef int16x8_t status16x4_t;
853 
854   /**
855    * @brief 8-bit status 64-bit vector data type.
856    */
857   typedef int8x16_t status8x8_t;
858 
859 #endif
860 
861 
862 
863 /**
864   @brief definition to read/write two 16 bit values.
865   @deprecated
866  */
867 #if   defined ( __CC_ARM )
868   #define __SIMD32_TYPE int32_t __packed
869 #elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
870   #define __SIMD32_TYPE int32_t
871 #elif defined ( __GNUC__ )
872   #define __SIMD32_TYPE int32_t
873 #elif defined ( __ICCARM__ )
874   #define __SIMD32_TYPE int32_t __packed
875 #elif defined ( __TI_ARM__ )
876   #define __SIMD32_TYPE int32_t
877 #elif defined ( __CSMC__ )
878   #define __SIMD32_TYPE int32_t
879 #elif defined ( __TASKING__ )
880   #define __SIMD32_TYPE __un(aligned) int32_t
881 #elif defined(_MSC_VER )
882   #define __SIMD32_TYPE int32_t
883 #else
884   #error Unknown compiler
885 #endif
886 
887 #define __SIMD32(addr)        (*(__SIMD32_TYPE **) & (addr))
888 #define __SIMD32_CONST(addr)  ( (__SIMD32_TYPE * )   (addr))
889 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE * )   (addr))
890 #define __SIMD64(addr)        (*(      int64_t **) & (addr))
891 
892 #define STEP(x) (x) <= 0 ? 0 : 1
893 #define SQ(x) ((x) * (x))
894 
895 /* SIMD replacement */
896 
897 
898 /**
899   @brief         Read 2 Q15 from Q15 pointer.
900   @param[in]     pQ15      points to input value
901   @return        Q31 value
902  */
read_q15x2(q15_t * pQ15)903 __STATIC_FORCEINLINE q31_t read_q15x2 (
904   q15_t * pQ15)
905 {
906   q31_t val;
907 
908 #ifdef __ARM_FEATURE_UNALIGNED
909   memcpy (&val, pQ15, 4);
910 #else
911   val = (pQ15[1] << 16) | (pQ15[0] & 0x0FFFF) ;
912 #endif
913 
914   return (val);
915 }
916 
917 /**
918   @brief         Read 2 Q15 from Q15 pointer and increment pointer afterwards.
919   @param[in]     pQ15      points to input value
920   @return        Q31 value
921  */
read_q15x2_ia(q15_t ** pQ15)922 __STATIC_FORCEINLINE q31_t read_q15x2_ia (
923   q15_t ** pQ15)
924 {
925   q31_t val;
926 
927 #ifdef __ARM_FEATURE_UNALIGNED
928   memcpy (&val, *pQ15, 4);
929 #else
930   val = ((*pQ15)[1] << 16) | ((*pQ15)[0] & 0x0FFFF);
931 #endif
932 
933  *pQ15 += 2;
934  return (val);
935 }
936 
937 /**
938   @brief         Read 2 Q15 from Q15 pointer and decrement pointer afterwards.
939   @param[in]     pQ15      points to input value
940   @return        Q31 value
941  */
read_q15x2_da(q15_t ** pQ15)942 __STATIC_FORCEINLINE q31_t read_q15x2_da (
943   q15_t ** pQ15)
944 {
945   q31_t val;
946 
947 #ifdef __ARM_FEATURE_UNALIGNED
948   memcpy (&val, *pQ15, 4);
949 #else
950   val = ((*pQ15)[1] << 16) | ((*pQ15)[0] & 0x0FFFF);
951 #endif
952 
953   *pQ15 -= 2;
954   return (val);
955 }
956 
957 /**
958   @brief         Write 2 Q15 to Q15 pointer and increment pointer afterwards.
959   @param[in]     pQ15      points to input value
960   @param[in]     value     Q31 value
961   @return        none
962  */
write_q15x2_ia(q15_t ** pQ15,q31_t value)963 __STATIC_FORCEINLINE void write_q15x2_ia (
964   q15_t ** pQ15,
965   q31_t    value)
966 {
967   q31_t val = value;
968 #ifdef __ARM_FEATURE_UNALIGNED
969   memcpy (*pQ15, &val, 4);
970 #else
971   (*pQ15)[0] = (val & 0x0FFFF);
972   (*pQ15)[1] = (val >> 16) & 0x0FFFF;
973 #endif
974 
975  *pQ15 += 2;
976 }
977 
978 /**
979   @brief         Write 2 Q15 to Q15 pointer.
980   @param[in]     pQ15      points to input value
981   @param[in]     value     Q31 value
982   @return        none
983  */
write_q15x2(q15_t * pQ15,q31_t value)984 __STATIC_FORCEINLINE void write_q15x2 (
985   q15_t * pQ15,
986   q31_t   value)
987 {
988   q31_t val = value;
989 
990 #ifdef __ARM_FEATURE_UNALIGNED
991   memcpy (pQ15, &val, 4);
992 #else
993   pQ15[0] = val & 0x0FFFF;
994   pQ15[1] = val >> 16;
995 #endif
996 }
997 
998 
999 /**
1000   @brief         Read 4 Q7 from Q7 pointer and increment pointer afterwards.
1001   @param[in]     pQ7       points to input value
1002   @return        Q31 value
1003  */
read_q7x4_ia(q7_t ** pQ7)1004 __STATIC_FORCEINLINE q31_t read_q7x4_ia (
1005   q7_t ** pQ7)
1006 {
1007   q31_t val;
1008 
1009 
1010 #ifdef __ARM_FEATURE_UNALIGNED
1011   memcpy (&val, *pQ7, 4);
1012 #else
1013   val =(((*pQ7)[3] & 0x0FF) << 24)  | (((*pQ7)[2] & 0x0FF) << 16)  | (((*pQ7)[1] & 0x0FF) << 8)  | ((*pQ7)[0] & 0x0FF);
1014 #endif
1015 
1016   *pQ7 += 4;
1017 
1018   return (val);
1019 }
1020 
1021 /**
1022   @brief         Read 4 Q7 from Q7 pointer and decrement pointer afterwards.
1023   @param[in]     pQ7       points to input value
1024   @return        Q31 value
1025  */
read_q7x4_da(q7_t ** pQ7)1026 __STATIC_FORCEINLINE q31_t read_q7x4_da (
1027   q7_t ** pQ7)
1028 {
1029   q31_t val;
1030 #ifdef __ARM_FEATURE_UNALIGNED
1031   memcpy (&val, *pQ7, 4);
1032 #else
1033   val = ((((*pQ7)[3]) & 0x0FF) << 24) | ((((*pQ7)[2]) & 0x0FF) << 16)   | ((((*pQ7)[1]) & 0x0FF) << 8)  | ((*pQ7)[0] & 0x0FF);
1034 #endif
1035   *pQ7 -= 4;
1036 
1037   return (val);
1038 }
1039 
1040 /**
1041   @brief         Write 4 Q7 to Q7 pointer and increment pointer afterwards.
1042   @param[in]     pQ7       points to input value
1043   @param[in]     value     Q31 value
1044   @return        none
1045  */
write_q7x4_ia(q7_t ** pQ7,q31_t value)1046 __STATIC_FORCEINLINE void write_q7x4_ia (
1047   q7_t ** pQ7,
1048   q31_t   value)
1049 {
1050   q31_t val = value;
1051 #ifdef __ARM_FEATURE_UNALIGNED
1052   memcpy (*pQ7, &val, 4);
1053 #else
1054   (*pQ7)[0] = val & 0x0FF;
1055   (*pQ7)[1] = (val >> 8) & 0x0FF;
1056   (*pQ7)[2] = (val >> 16) & 0x0FF;
1057   (*pQ7)[3] = (val >> 24) & 0x0FF;
1058 
1059 #endif
1060   *pQ7 += 4;
1061 }
1062 
1063 /*
1064 
1065 Normally those kind of definitions are in a compiler file
1066 in Core or Core_A.
1067 
1068 But for MSVC compiler it is a bit special. The goal is very specific
1069 to CMSIS-DSP and only to allow the use of this library from other
1070 systems like Python or Matlab.
1071 
1072 MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
1073 compiler file in Core or Core_A would not make sense.
1074 
1075 */
1076 #if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__)
__CLZ(uint32_t data)1077     __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
1078     {
1079       if (data == 0U) { return 32U; }
1080 
1081       uint32_t count = 0U;
1082       uint32_t mask = 0x80000000U;
1083 
1084       while ((data & mask) == 0U)
1085       {
1086         count += 1U;
1087         mask = mask >> 1U;
1088       }
1089       return count;
1090     }
1091 
__SSAT(int32_t val,uint32_t sat)1092   __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
1093   {
1094     if ((sat >= 1U) && (sat <= 32U))
1095     {
1096       const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
1097       const int32_t min = -1 - max ;
1098       if (val > max)
1099       {
1100         return max;
1101       }
1102       else if (val < min)
1103       {
1104         return min;
1105       }
1106     }
1107     return val;
1108   }
1109 
__USAT(int32_t val,uint32_t sat)1110   __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
1111   {
1112     if (sat <= 31U)
1113     {
1114       const uint32_t max = ((1U << sat) - 1U);
1115       if (val > (int32_t)max)
1116       {
1117         return max;
1118       }
1119       else if (val < 0)
1120       {
1121         return 0U;
1122       }
1123     }
1124     return (uint32_t)val;
1125   }
1126 #endif
1127 
1128 #ifndef ARM_MATH_DSP
1129   /**
1130    * @brief definition to pack two 16 bit values.
1131    */
1132   #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0x0000FFFF) | \
1133                                       (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
1134   #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0xFFFF0000) | \
1135                                       (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
1136 #endif
1137 
1138    /**
1139    * @brief definition to pack four 8 bit values.
1140    */
1141 #ifndef ARM_MATH_BIG_ENDIAN
1142   #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
1143                                   (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
1144                                   (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
1145                                   (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
1146 #else
1147   #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
1148                                   (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
1149                                   (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
1150                                   (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
1151 #endif
1152 
1153 
1154   /**
1155    * @brief Clips Q63 to Q31 values.
1156    */
clip_q63_to_q31(q63_t x)1157   __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
1158   q63_t x)
1159   {
1160     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
1161       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
1162   }
1163 
1164   /**
1165    * @brief Clips Q63 to Q15 values.
1166    */
clip_q63_to_q15(q63_t x)1167   __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
1168   q63_t x)
1169   {
1170     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
1171       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
1172   }
1173 
1174   /**
1175    * @brief Clips Q31 to Q7 values.
1176    */
clip_q31_to_q7(q31_t x)1177   __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
1178   q31_t x)
1179   {
1180     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
1181       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
1182   }
1183 
1184   /**
1185    * @brief Clips Q31 to Q15 values.
1186    */
clip_q31_to_q15(q31_t x)1187   __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
1188   q31_t x)
1189   {
1190     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
1191       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
1192   }
1193 
1194   /**
1195    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
1196    */
mult32x64(q63_t x,q31_t y)1197   __STATIC_FORCEINLINE q63_t mult32x64(
1198   q63_t x,
1199   q31_t y)
1200   {
1201     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
1202             (((q63_t) (x >> 32)                * y)      )  );
1203   }
1204 
1205   /**
1206    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
1207    */
arm_recip_q31(q31_t in,q31_t * dst,const q31_t * pRecipTable)1208   __STATIC_FORCEINLINE uint32_t arm_recip_q31(
1209         q31_t in,
1210         q31_t * dst,
1211   const q31_t * pRecipTable)
1212   {
1213     q31_t out;
1214     uint32_t tempVal;
1215     uint32_t index, i;
1216     uint32_t signBits;
1217 
1218     if (in > 0)
1219     {
1220       signBits = ((uint32_t) (__CLZ( in) - 1));
1221     }
1222     else
1223     {
1224       signBits = ((uint32_t) (__CLZ(-in) - 1));
1225     }
1226 
1227     /* Convert input sample to 1.31 format */
1228     in = (in << signBits);
1229 
1230     /* calculation of index for initial approximated Val */
1231     index = (uint32_t)(in >> 24);
1232     index = (index & INDEX_MASK);
1233 
1234     /* 1.31 with exp 1 */
1235     out = pRecipTable[index];
1236 
1237     /* calculation of reciprocal value */
1238     /* running approximation for two iterations */
1239     for (i = 0U; i < 2U; i++)
1240     {
1241       tempVal = (uint32_t) (((q63_t) in * out) >> 31);
1242       tempVal = 0x7FFFFFFFu - tempVal;
1243       /*      1.31 with exp 1 */
1244       /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */
1245       out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30);
1246     }
1247 
1248     /* write output */
1249     *dst = out;
1250 
1251     /* return num of signbits of out = 1/in value */
1252     return (signBits + 1U);
1253   }
1254 
1255 
1256   /**
1257    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
1258    */
arm_recip_q15(q15_t in,q15_t * dst,const q15_t * pRecipTable)1259   __STATIC_FORCEINLINE uint32_t arm_recip_q15(
1260         q15_t in,
1261         q15_t * dst,
1262   const q15_t * pRecipTable)
1263   {
1264     q15_t out = 0;
1265     uint32_t tempVal = 0;
1266     uint32_t index = 0, i = 0;
1267     uint32_t signBits = 0;
1268 
1269     if (in > 0)
1270     {
1271       signBits = ((uint32_t)(__CLZ( in) - 17));
1272     }
1273     else
1274     {
1275       signBits = ((uint32_t)(__CLZ(-in) - 17));
1276     }
1277 
1278     /* Convert input sample to 1.15 format */
1279     in = (in << signBits);
1280 
1281     /* calculation of index for initial approximated Val */
1282     index = (uint32_t)(in >>  8);
1283     index = (index & INDEX_MASK);
1284 
1285     /*      1.15 with exp 1  */
1286     out = pRecipTable[index];
1287 
1288     /* calculation of reciprocal value */
1289     /* running approximation for two iterations */
1290     for (i = 0U; i < 2U; i++)
1291     {
1292       tempVal = (uint32_t) (((q31_t) in * out) >> 15);
1293       tempVal = 0x7FFFu - tempVal;
1294       /*      1.15 with exp 1 */
1295       out = (q15_t) (((q31_t) out * tempVal) >> 14);
1296       /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
1297     }
1298 
1299     /* write output */
1300     *dst = out;
1301 
1302     /* return num of signbits of out = 1/in value */
1303     return (signBits + 1);
1304   }
1305 
1306 /**
1307  * @brief Integer exponentiation
1308  * @param[in]    x           value
1309  * @param[in]    nb          integer exponent >= 1
1310  * @return x^nb
1311  *
1312  */
arm_exponent_f32(float32_t x,int32_t nb)1313 __STATIC_INLINE float32_t arm_exponent_f32(float32_t x, int32_t nb)
1314 {
1315     float32_t r = x;
1316     nb --;
1317     while(nb > 0)
1318     {
1319         r = r * x;
1320         nb--;
1321     }
1322     return(r);
1323 }
1324 
1325 /**
1326  * @brief  64-bit to 32-bit unsigned normalization
1327  * @param[in]  in           is input unsigned long long value
1328  * @param[out] normalized   is the 32-bit normalized value
1329  * @param[out] norm         is norm scale
1330  */
arm_norm_64_to_32u(uint64_t in,int32_t * normalized,int32_t * norm)1331 __STATIC_INLINE  void arm_norm_64_to_32u(uint64_t in, int32_t * normalized, int32_t *norm)
1332 {
1333     int32_t     n1;
1334     int32_t     hi = (int32_t) (in >> 32);
1335     int32_t     lo = (int32_t) ((in << 32) >> 32);
1336 
1337     n1 = __CLZ(hi) - 32;
1338     if (!n1)
1339     {
1340         /*
1341          * input fits in 32-bit
1342          */
1343         n1 = __CLZ(lo);
1344         if (!n1)
1345         {
1346             /*
1347              * MSB set, need to scale down by 1
1348              */
1349             *norm = -1;
1350             *normalized = (((uint32_t) lo) >> 1);
1351         } else
1352         {
1353             if (n1 == 32)
1354             {
1355                 /*
1356                  * input is zero
1357                  */
1358                 *norm = 0;
1359                 *normalized = 0;
1360             } else
1361             {
1362                 /*
1363                  * 32-bit normalization
1364                  */
1365                 *norm = n1 - 1;
1366                 *normalized = lo << *norm;
1367             }
1368         }
1369     } else
1370     {
1371         /*
1372          * input fits in 64-bit
1373          */
1374         n1 = 1 - n1;
1375         *norm = -n1;
1376         /*
1377          * 64 bit normalization
1378          */
1379         *normalized = (((uint32_t) lo) >> n1) | (hi << (32 - n1));
1380     }
1381 }
1382 
arm_div_q63_to_q31(q63_t num,q31_t den)1383 __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
1384 {
1385     q31_t   result;
1386     uint64_t   absNum;
1387     int32_t   normalized;
1388     int32_t   norm;
1389 
1390     /*
1391      * if sum fits in 32bits
1392      * avoid costly 64-bit division
1393      */
1394     absNum = num > 0 ? num : -num;
1395     arm_norm_64_to_32u(absNum, &normalized, &norm);
1396     if (norm > 0)
1397         /*
1398          * 32-bit division
1399          */
1400         result = (q31_t) num / den;
1401     else
1402         /*
1403          * 64-bit division
1404          */
1405         result = (q31_t) (num / den);
1406 
1407     return result;
1408 }
1409 
1410 
1411 /*
1412  * @brief C custom defined intrinsic functions
1413  */
1414 #if !defined (ARM_MATH_DSP)
1415 
1416   /*
1417    * @brief C custom defined QADD8
1418    */
__QADD8(uint32_t x,uint32_t y)1419   __STATIC_FORCEINLINE uint32_t __QADD8(
1420   uint32_t x,
1421   uint32_t y)
1422   {
1423     q31_t r, s, t, u;
1424 
1425     r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
1426     s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
1427     t = __SSAT(((((q31_t)x <<  8) >> 24) + (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
1428     u = __SSAT(((((q31_t)x      ) >> 24) + (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
1429 
1430     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
1431   }
1432 
1433 
1434   /*
1435    * @brief C custom defined QSUB8
1436    */
__QSUB8(uint32_t x,uint32_t y)1437   __STATIC_FORCEINLINE uint32_t __QSUB8(
1438   uint32_t x,
1439   uint32_t y)
1440   {
1441     q31_t r, s, t, u;
1442 
1443     r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
1444     s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
1445     t = __SSAT(((((q31_t)x <<  8) >> 24) - (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
1446     u = __SSAT(((((q31_t)x      ) >> 24) - (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
1447 
1448     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
1449   }
1450 
1451 
1452   /*
1453    * @brief C custom defined QADD16
1454    */
__QADD16(uint32_t x,uint32_t y)1455   __STATIC_FORCEINLINE uint32_t __QADD16(
1456   uint32_t x,
1457   uint32_t y)
1458   {
1459 /*  q31_t r,     s;  without initialisation 'arm_offset_q15 test' fails  but 'intrinsic' tests pass! for armCC */
1460     q31_t r = 0, s = 0;
1461 
1462     r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
1463     s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
1464 
1465     return ((uint32_t)((s << 16) | (r      )));
1466   }
1467 
1468 
1469   /*
1470    * @brief C custom defined SHADD16
1471    */
__SHADD16(uint32_t x,uint32_t y)1472   __STATIC_FORCEINLINE uint32_t __SHADD16(
1473   uint32_t x,
1474   uint32_t y)
1475   {
1476     q31_t r, s;
1477 
1478     r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1479     s = (((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1480 
1481     return ((uint32_t)((s << 16) | (r      )));
1482   }
1483 
1484 
1485   /*
1486    * @brief C custom defined QSUB16
1487    */
__QSUB16(uint32_t x,uint32_t y)1488   __STATIC_FORCEINLINE uint32_t __QSUB16(
1489   uint32_t x,
1490   uint32_t y)
1491   {
1492     q31_t r, s;
1493 
1494     r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
1495     s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
1496 
1497     return ((uint32_t)((s << 16) | (r      )));
1498   }
1499 
1500 
1501   /*
1502    * @brief C custom defined SHSUB16
1503    */
__SHSUB16(uint32_t x,uint32_t y)1504   __STATIC_FORCEINLINE uint32_t __SHSUB16(
1505   uint32_t x,
1506   uint32_t y)
1507   {
1508     q31_t r, s;
1509 
1510     r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1511     s = (((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1512 
1513     return ((uint32_t)((s << 16) | (r      )));
1514   }
1515 
1516 
1517   /*
1518    * @brief C custom defined QASX
1519    */
__QASX(uint32_t x,uint32_t y)1520   __STATIC_FORCEINLINE uint32_t __QASX(
1521   uint32_t x,
1522   uint32_t y)
1523   {
1524     q31_t r, s;
1525 
1526     r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
1527     s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
1528 
1529     return ((uint32_t)((s << 16) | (r      )));
1530   }
1531 
1532 
1533   /*
1534    * @brief C custom defined SHASX
1535    */
__SHASX(uint32_t x,uint32_t y)1536   __STATIC_FORCEINLINE uint32_t __SHASX(
1537   uint32_t x,
1538   uint32_t y)
1539   {
1540     q31_t r, s;
1541 
1542     r = (((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1543     s = (((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1544 
1545     return ((uint32_t)((s << 16) | (r      )));
1546   }
1547 
1548 
1549   /*
1550    * @brief C custom defined QSAX
1551    */
__QSAX(uint32_t x,uint32_t y)1552   __STATIC_FORCEINLINE uint32_t __QSAX(
1553   uint32_t x,
1554   uint32_t y)
1555   {
1556     q31_t r, s;
1557 
1558     r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
1559     s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
1560 
1561     return ((uint32_t)((s << 16) | (r      )));
1562   }
1563 
1564 
1565   /*
1566    * @brief C custom defined SHSAX
1567    */
__SHSAX(uint32_t x,uint32_t y)1568   __STATIC_FORCEINLINE uint32_t __SHSAX(
1569   uint32_t x,
1570   uint32_t y)
1571   {
1572     q31_t r, s;
1573 
1574     r = (((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1575     s = (((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1576 
1577     return ((uint32_t)((s << 16) | (r      )));
1578   }
1579 
1580 
1581   /*
1582    * @brief C custom defined SMUSDX
1583    */
__SMUSDX(uint32_t x,uint32_t y)1584   __STATIC_FORCEINLINE uint32_t __SMUSDX(
1585   uint32_t x,
1586   uint32_t y)
1587   {
1588     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
1589                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
1590   }
1591 
1592   /*
1593    * @brief C custom defined SMUADX
1594    */
__SMUADX(uint32_t x,uint32_t y)1595   __STATIC_FORCEINLINE uint32_t __SMUADX(
1596   uint32_t x,
1597   uint32_t y)
1598   {
1599     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
1600                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
1601   }
1602 
1603 
1604   /*
1605    * @brief C custom defined QADD
1606    */
__QADD(int32_t x,int32_t y)1607   __STATIC_FORCEINLINE int32_t __QADD(
1608   int32_t x,
1609   int32_t y)
1610   {
1611     return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
1612   }
1613 
1614 
1615   /*
1616    * @brief C custom defined QSUB
1617    */
__QSUB(int32_t x,int32_t y)1618   __STATIC_FORCEINLINE int32_t __QSUB(
1619   int32_t x,
1620   int32_t y)
1621   {
1622     return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
1623   }
1624 
1625 
1626   /*
1627    * @brief C custom defined SMLAD
1628    */
__SMLAD(uint32_t x,uint32_t y,uint32_t sum)1629   __STATIC_FORCEINLINE uint32_t __SMLAD(
1630   uint32_t x,
1631   uint32_t y,
1632   uint32_t sum)
1633   {
1634     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
1635                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
1636                        ( ((q31_t)sum    )                                  )   ));
1637   }
1638 
1639 
1640   /*
1641    * @brief C custom defined SMLADX
1642    */
__SMLADX(uint32_t x,uint32_t y,uint32_t sum)1643   __STATIC_FORCEINLINE uint32_t __SMLADX(
1644   uint32_t x,
1645   uint32_t y,
1646   uint32_t sum)
1647   {
1648     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
1649                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
1650                        ( ((q31_t)sum    )                                  )   ));
1651   }
1652 
1653 
1654   /*
1655    * @brief C custom defined SMLSDX
1656    */
__SMLSDX(uint32_t x,uint32_t y,uint32_t sum)1657   __STATIC_FORCEINLINE uint32_t __SMLSDX(
1658   uint32_t x,
1659   uint32_t y,
1660   uint32_t sum)
1661   {
1662     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
1663                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
1664                        ( ((q31_t)sum    )                                  )   ));
1665   }
1666 
1667 
1668   /*
1669    * @brief C custom defined SMLALD
1670    */
__SMLALD(uint32_t x,uint32_t y,uint64_t sum)1671   __STATIC_FORCEINLINE uint64_t __SMLALD(
1672   uint32_t x,
1673   uint32_t y,
1674   uint64_t sum)
1675   {
1676 /*  return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
1677     return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
1678                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
1679                        ( ((q63_t)sum    )                                  )   ));
1680   }
1681 
1682 
1683   /*
1684    * @brief C custom defined SMLALDX
1685    */
__SMLALDX(uint32_t x,uint32_t y,uint64_t sum)1686   __STATIC_FORCEINLINE uint64_t __SMLALDX(
1687   uint32_t x,
1688   uint32_t y,
1689   uint64_t sum)
1690   {
1691 /*  return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
1692     return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
1693                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
1694                        ( ((q63_t)sum    )                                  )   ));
1695   }
1696 
1697 
1698   /*
1699    * @brief C custom defined SMUAD
1700    */
__SMUAD(uint32_t x,uint32_t y)1701   __STATIC_FORCEINLINE uint32_t __SMUAD(
1702   uint32_t x,
1703   uint32_t y)
1704   {
1705     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
1706                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
1707   }
1708 
1709 
1710   /*
1711    * @brief C custom defined SMUSD
1712    */
__SMUSD(uint32_t x,uint32_t y)1713   __STATIC_FORCEINLINE uint32_t __SMUSD(
1714   uint32_t x,
1715   uint32_t y)
1716   {
1717     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
1718                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
1719   }
1720 
1721 
1722   /*
1723    * @brief C custom defined SXTB16
1724    */
__SXTB16(uint32_t x)1725   __STATIC_FORCEINLINE uint32_t __SXTB16(
1726   uint32_t x)
1727   {
1728     return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
1729                        ((((q31_t)x <<  8) >>  8) & (q31_t)0xFFFF0000)  ));
1730   }
1731 
1732   /*
1733    * @brief C custom defined SMMLA
1734    */
__SMMLA(int32_t x,int32_t y,int32_t sum)1735   __STATIC_FORCEINLINE int32_t __SMMLA(
1736   int32_t x,
1737   int32_t y,
1738   int32_t sum)
1739   {
1740     return (sum + (int32_t) (((int64_t) x * y) >> 32));
1741   }
1742 
1743 #endif /* !defined (ARM_MATH_DSP) */
1744 
1745 
1746   /**
1747    * @brief Instance structure for the Q7 FIR filter.
1748    */
1749   typedef struct
1750   {
1751           uint16_t numTaps;        /**< number of filter coefficients in the filter. */
1752           q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1753     const q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1754   } arm_fir_instance_q7;
1755 
1756   /**
1757    * @brief Instance structure for the Q15 FIR filter.
1758    */
1759   typedef struct
1760   {
1761           uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1762           q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1763     const q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1764   } arm_fir_instance_q15;
1765 
1766   /**
1767    * @brief Instance structure for the Q31 FIR filter.
1768    */
1769   typedef struct
1770   {
1771           uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1772           q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1773     const q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1774   } arm_fir_instance_q31;
1775 
1776   /**
1777    * @brief Instance structure for the floating-point FIR filter.
1778    */
1779   typedef struct
1780   {
1781           uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1782           float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1783     const float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1784   } arm_fir_instance_f32;
1785 
1786   /**
1787    * @brief Processing function for the Q7 FIR filter.
1788    * @param[in]  S          points to an instance of the Q7 FIR filter structure.
1789    * @param[in]  pSrc       points to the block of input data.
1790    * @param[out] pDst       points to the block of output data.
1791    * @param[in]  blockSize  number of samples to process.
1792    */
1793   void arm_fir_q7(
1794   const arm_fir_instance_q7 * S,
1795   const q7_t * pSrc,
1796         q7_t * pDst,
1797         uint32_t blockSize);
1798 
1799   /**
1800    * @brief  Initialization function for the Q7 FIR filter.
1801    * @param[in,out] S          points to an instance of the Q7 FIR structure.
1802    * @param[in]     numTaps    Number of filter coefficients in the filter.
1803    * @param[in]     pCoeffs    points to the filter coefficients.
1804    * @param[in]     pState     points to the state buffer.
1805    * @param[in]     blockSize  number of samples that are processed.
1806    */
1807   void arm_fir_init_q7(
1808         arm_fir_instance_q7 * S,
1809         uint16_t numTaps,
1810   const q7_t * pCoeffs,
1811         q7_t * pState,
1812         uint32_t blockSize);
1813 
1814   /**
1815    * @brief Processing function for the Q15 FIR filter.
1816    * @param[in]  S          points to an instance of the Q15 FIR structure.
1817    * @param[in]  pSrc       points to the block of input data.
1818    * @param[out] pDst       points to the block of output data.
1819    * @param[in]  blockSize  number of samples to process.
1820    */
1821   void arm_fir_q15(
1822   const arm_fir_instance_q15 * S,
1823   const q15_t * pSrc,
1824         q15_t * pDst,
1825         uint32_t blockSize);
1826 
1827   /**
1828    * @brief Processing function for the fast Q15 FIR filter (fast version).
1829    * @param[in]  S          points to an instance of the Q15 FIR filter structure.
1830    * @param[in]  pSrc       points to the block of input data.
1831    * @param[out] pDst       points to the block of output data.
1832    * @param[in]  blockSize  number of samples to process.
1833    */
1834   void arm_fir_fast_q15(
1835   const arm_fir_instance_q15 * S,
1836   const q15_t * pSrc,
1837         q15_t * pDst,
1838         uint32_t blockSize);
1839 
1840   /**
1841    * @brief  Initialization function for the Q15 FIR filter.
1842    * @param[in,out] S          points to an instance of the Q15 FIR filter structure.
1843    * @param[in]     numTaps    Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1844    * @param[in]     pCoeffs    points to the filter coefficients.
1845    * @param[in]     pState     points to the state buffer.
1846    * @param[in]     blockSize  number of samples that are processed at a time.
1847    * @return     The function returns either
1848    * <code>ARM_MATH_SUCCESS</code> if initialization was successful or
1849    * <code>ARM_MATH_ARGUMENT_ERROR</code> if <code>numTaps</code> is not a supported value.
1850    */
1851   arm_status arm_fir_init_q15(
1852         arm_fir_instance_q15 * S,
1853         uint16_t numTaps,
1854   const q15_t * pCoeffs,
1855         q15_t * pState,
1856         uint32_t blockSize);
1857 
1858   /**
1859    * @brief Processing function for the Q31 FIR filter.
1860    * @param[in]  S          points to an instance of the Q31 FIR filter structure.
1861    * @param[in]  pSrc       points to the block of input data.
1862    * @param[out] pDst       points to the block of output data.
1863    * @param[in]  blockSize  number of samples to process.
1864    */
1865   void arm_fir_q31(
1866   const arm_fir_instance_q31 * S,
1867   const q31_t * pSrc,
1868         q31_t * pDst,
1869         uint32_t blockSize);
1870 
1871   /**
1872    * @brief Processing function for the fast Q31 FIR filter (fast version).
1873    * @param[in]  S          points to an instance of the Q31 FIR filter structure.
1874    * @param[in]  pSrc       points to the block of input data.
1875    * @param[out] pDst       points to the block of output data.
1876    * @param[in]  blockSize  number of samples to process.
1877    */
1878   void arm_fir_fast_q31(
1879   const arm_fir_instance_q31 * S,
1880   const q31_t * pSrc,
1881         q31_t * pDst,
1882         uint32_t blockSize);
1883 
1884   /**
1885    * @brief  Initialization function for the Q31 FIR filter.
1886    * @param[in,out] S          points to an instance of the Q31 FIR structure.
1887    * @param[in]     numTaps    Number of filter coefficients in the filter.
1888    * @param[in]     pCoeffs    points to the filter coefficients.
1889    * @param[in]     pState     points to the state buffer.
1890    * @param[in]     blockSize  number of samples that are processed at a time.
1891    */
1892   void arm_fir_init_q31(
1893         arm_fir_instance_q31 * S,
1894         uint16_t numTaps,
1895   const q31_t * pCoeffs,
1896         q31_t * pState,
1897         uint32_t blockSize);
1898 
1899   /**
1900    * @brief Processing function for the floating-point FIR filter.
1901    * @param[in]  S          points to an instance of the floating-point FIR structure.
1902    * @param[in]  pSrc       points to the block of input data.
1903    * @param[out] pDst       points to the block of output data.
1904    * @param[in]  blockSize  number of samples to process.
1905    */
1906   void arm_fir_f32(
1907   const arm_fir_instance_f32 * S,
1908   const float32_t * pSrc,
1909         float32_t * pDst,
1910         uint32_t blockSize);
1911 
1912   /**
1913    * @brief  Initialization function for the floating-point FIR filter.
1914    * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
1915    * @param[in]     numTaps    Number of filter coefficients in the filter.
1916    * @param[in]     pCoeffs    points to the filter coefficients.
1917    * @param[in]     pState     points to the state buffer.
1918    * @param[in]     blockSize  number of samples that are processed at a time.
1919    */
1920   void arm_fir_init_f32(
1921         arm_fir_instance_f32 * S,
1922         uint16_t numTaps,
1923   const float32_t * pCoeffs,
1924         float32_t * pState,
1925         uint32_t blockSize);
1926 
1927   /**
1928    * @brief Instance structure for the Q15 Biquad cascade filter.
1929    */
1930   typedef struct
1931   {
1932           int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1933           q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1934     const q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1935           int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
1936   } arm_biquad_casd_df1_inst_q15;
1937 
1938   /**
1939    * @brief Instance structure for the Q31 Biquad cascade filter.
1940    */
1941   typedef struct
1942   {
1943           uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1944           q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1945     const q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1946           uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1947   } arm_biquad_casd_df1_inst_q31;
1948 
1949   /**
1950    * @brief Instance structure for the floating-point Biquad cascade filter.
1951    */
1952   typedef struct
1953   {
1954           uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1955           float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1956     const float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1957   } arm_biquad_casd_df1_inst_f32;
1958 
1959 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
1960   /**
1961    * @brief Instance structure for the modified Biquad coefs required by vectorized code.
1962    */
1963   typedef struct
1964   {
1965       float32_t coeffs[8][4]; /**< Points to the array of modified coefficients.  The array is of length 32. There is one per stage */
1966   } arm_biquad_mod_coef_f32;
1967 #endif
1968 
1969   /**
1970    * @brief Processing function for the Q15 Biquad cascade filter.
1971    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
1972    * @param[in]  pSrc       points to the block of input data.
1973    * @param[out] pDst       points to the block of output data.
1974    * @param[in]  blockSize  number of samples to process.
1975    */
1976   void arm_biquad_cascade_df1_q15(
1977   const arm_biquad_casd_df1_inst_q15 * S,
1978   const q15_t * pSrc,
1979         q15_t * pDst,
1980         uint32_t blockSize);
1981 
1982   /**
1983    * @brief  Initialization function for the Q15 Biquad cascade filter.
1984    * @param[in,out] S          points to an instance of the Q15 Biquad cascade structure.
1985    * @param[in]     numStages  number of 2nd order stages in the filter.
1986    * @param[in]     pCoeffs    points to the filter coefficients.
1987    * @param[in]     pState     points to the state buffer.
1988    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
1989    */
1990   void arm_biquad_cascade_df1_init_q15(
1991         arm_biquad_casd_df1_inst_q15 * S,
1992         uint8_t numStages,
1993   const q15_t * pCoeffs,
1994         q15_t * pState,
1995         int8_t postShift);
1996 
1997   /**
1998    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1999    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
2000    * @param[in]  pSrc       points to the block of input data.
2001    * @param[out] pDst       points to the block of output data.
2002    * @param[in]  blockSize  number of samples to process.
2003    */
2004   void arm_biquad_cascade_df1_fast_q15(
2005   const arm_biquad_casd_df1_inst_q15 * S,
2006   const q15_t * pSrc,
2007         q15_t * pDst,
2008         uint32_t blockSize);
2009 
2010   /**
2011    * @brief Processing function for the Q31 Biquad cascade filter
2012    * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
2013    * @param[in]  pSrc       points to the block of input data.
2014    * @param[out] pDst       points to the block of output data.
2015    * @param[in]  blockSize  number of samples to process.
2016    */
2017   void arm_biquad_cascade_df1_q31(
2018   const arm_biquad_casd_df1_inst_q31 * S,
2019   const q31_t * pSrc,
2020         q31_t * pDst,
2021         uint32_t blockSize);
2022 
2023   /**
2024    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
2025    * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
2026    * @param[in]  pSrc       points to the block of input data.
2027    * @param[out] pDst       points to the block of output data.
2028    * @param[in]  blockSize  number of samples to process.
2029    */
2030   void arm_biquad_cascade_df1_fast_q31(
2031   const arm_biquad_casd_df1_inst_q31 * S,
2032   const q31_t * pSrc,
2033         q31_t * pDst,
2034         uint32_t blockSize);
2035 
2036   /**
2037    * @brief  Initialization function for the Q31 Biquad cascade filter.
2038    * @param[in,out] S          points to an instance of the Q31 Biquad cascade structure.
2039    * @param[in]     numStages  number of 2nd order stages in the filter.
2040    * @param[in]     pCoeffs    points to the filter coefficients.
2041    * @param[in]     pState     points to the state buffer.
2042    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
2043    */
2044   void arm_biquad_cascade_df1_init_q31(
2045         arm_biquad_casd_df1_inst_q31 * S,
2046         uint8_t numStages,
2047   const q31_t * pCoeffs,
2048         q31_t * pState,
2049         int8_t postShift);
2050 
2051   /**
2052    * @brief Processing function for the floating-point Biquad cascade filter.
2053    * @param[in]  S          points to an instance of the floating-point Biquad cascade structure.
2054    * @param[in]  pSrc       points to the block of input data.
2055    * @param[out] pDst       points to the block of output data.
2056    * @param[in]  blockSize  number of samples to process.
2057    */
2058   void arm_biquad_cascade_df1_f32(
2059   const arm_biquad_casd_df1_inst_f32 * S,
2060   const float32_t * pSrc,
2061         float32_t * pDst,
2062         uint32_t blockSize);
2063 
2064   /**
2065    * @brief  Initialization function for the floating-point Biquad cascade filter.
2066    * @param[in,out] S          points to an instance of the floating-point Biquad cascade structure.
2067    * @param[in]     numStages  number of 2nd order stages in the filter.
2068    * @param[in]     pCoeffs    points to the filter coefficients.
2069    * @param[in]     pCoeffsMod points to the modified filter coefficients (only MVE version).
2070    * @param[in]     pState     points to the state buffer.
2071    */
2072 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
2073   void arm_biquad_cascade_df1_mve_init_f32(
2074       arm_biquad_casd_df1_inst_f32 * S,
2075       uint8_t numStages,
2076       const float32_t * pCoeffs,
2077       arm_biquad_mod_coef_f32 * pCoeffsMod,
2078       float32_t * pState);
2079 #endif
2080 
2081   void arm_biquad_cascade_df1_init_f32(
2082         arm_biquad_casd_df1_inst_f32 * S,
2083         uint8_t numStages,
2084   const float32_t * pCoeffs,
2085         float32_t * pState);
2086 
2087 
2088   /**
2089    * @brief         Compute the logical bitwise AND of two fixed-point vectors.
2090    * @param[in]     pSrcA      points to input vector A
2091    * @param[in]     pSrcB      points to input vector B
2092    * @param[out]    pDst       points to output vector
2093    * @param[in]     blockSize  number of samples in each vector
2094    * @return        none
2095    */
2096   void arm_and_u16(
2097     const uint16_t * pSrcA,
2098     const uint16_t * pSrcB,
2099           uint16_t * pDst,
2100           uint32_t blockSize);
2101 
2102   /**
2103    * @brief         Compute the logical bitwise AND of two fixed-point vectors.
2104    * @param[in]     pSrcA      points to input vector A
2105    * @param[in]     pSrcB      points to input vector B
2106    * @param[out]    pDst       points to output vector
2107    * @param[in]     blockSize  number of samples in each vector
2108    * @return        none
2109    */
2110   void arm_and_u32(
2111     const uint32_t * pSrcA,
2112     const uint32_t * pSrcB,
2113           uint32_t * pDst,
2114           uint32_t blockSize);
2115 
2116   /**
2117    * @brief         Compute the logical bitwise AND of two fixed-point vectors.
2118    * @param[in]     pSrcA      points to input vector A
2119    * @param[in]     pSrcB      points to input vector B
2120    * @param[out]    pDst       points to output vector
2121    * @param[in]     blockSize  number of samples in each vector
2122    * @return        none
2123    */
2124   void arm_and_u8(
2125     const uint8_t * pSrcA,
2126     const uint8_t * pSrcB,
2127           uint8_t * pDst,
2128           uint32_t blockSize);
2129 
2130   /**
2131    * @brief         Compute the logical bitwise OR of two fixed-point vectors.
2132    * @param[in]     pSrcA      points to input vector A
2133    * @param[in]     pSrcB      points to input vector B
2134    * @param[out]    pDst       points to output vector
2135    * @param[in]     blockSize  number of samples in each vector
2136    * @return        none
2137    */
2138   void arm_or_u16(
2139     const uint16_t * pSrcA,
2140     const uint16_t * pSrcB,
2141           uint16_t * pDst,
2142           uint32_t blockSize);
2143 
2144   /**
2145    * @brief         Compute the logical bitwise OR of two fixed-point vectors.
2146    * @param[in]     pSrcA      points to input vector A
2147    * @param[in]     pSrcB      points to input vector B
2148    * @param[out]    pDst       points to output vector
2149    * @param[in]     blockSize  number of samples in each vector
2150    * @return        none
2151    */
2152   void arm_or_u32(
2153     const uint32_t * pSrcA,
2154     const uint32_t * pSrcB,
2155           uint32_t * pDst,
2156           uint32_t blockSize);
2157 
2158   /**
2159    * @brief         Compute the logical bitwise OR of two fixed-point vectors.
2160    * @param[in]     pSrcA      points to input vector A
2161    * @param[in]     pSrcB      points to input vector B
2162    * @param[out]    pDst       points to output vector
2163    * @param[in]     blockSize  number of samples in each vector
2164    * @return        none
2165    */
2166   void arm_or_u8(
2167     const uint8_t * pSrcA,
2168     const uint8_t * pSrcB,
2169           uint8_t * pDst,
2170           uint32_t blockSize);
2171 
2172   /**
2173    * @brief         Compute the logical bitwise NOT of a fixed-point vector.
2174    * @param[in]     pSrc       points to input vector
2175    * @param[out]    pDst       points to output vector
2176    * @param[in]     blockSize  number of samples in each vector
2177    * @return        none
2178    */
2179   void arm_not_u16(
2180     const uint16_t * pSrc,
2181           uint16_t * pDst,
2182           uint32_t blockSize);
2183 
2184   /**
2185    * @brief         Compute the logical bitwise NOT of a fixed-point vector.
2186    * @param[in]     pSrc       points to input vector
2187    * @param[out]    pDst       points to output vector
2188    * @param[in]     blockSize  number of samples in each vector
2189    * @return        none
2190    */
2191   void arm_not_u32(
2192     const uint32_t * pSrc,
2193           uint32_t * pDst,
2194           uint32_t blockSize);
2195 
2196   /**
2197    * @brief         Compute the logical bitwise NOT of a fixed-point vector.
2198    * @param[in]     pSrc       points to input vector
2199    * @param[out]    pDst       points to output vector
2200    * @param[in]     blockSize  number of samples in each vector
2201    * @return        none
2202    */
2203   void arm_not_u8(
2204     const uint8_t * pSrc,
2205           uint8_t * pDst,
2206           uint32_t blockSize);
2207 
2208 /**
2209    * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
2210    * @param[in]     pSrcA      points to input vector A
2211    * @param[in]     pSrcB      points to input vector B
2212    * @param[out]    pDst       points to output vector
2213    * @param[in]     blockSize  number of samples in each vector
2214    * @return        none
2215    */
2216   void arm_xor_u16(
2217     const uint16_t * pSrcA,
2218     const uint16_t * pSrcB,
2219           uint16_t * pDst,
2220           uint32_t blockSize);
2221 
2222   /**
2223    * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
2224    * @param[in]     pSrcA      points to input vector A
2225    * @param[in]     pSrcB      points to input vector B
2226    * @param[out]    pDst       points to output vector
2227    * @param[in]     blockSize  number of samples in each vector
2228    * @return        none
2229    */
2230   void arm_xor_u32(
2231     const uint32_t * pSrcA,
2232     const uint32_t * pSrcB,
2233           uint32_t * pDst,
2234           uint32_t blockSize);
2235 
2236   /**
2237    * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
2238    * @param[in]     pSrcA      points to input vector A
2239    * @param[in]     pSrcB      points to input vector B
2240    * @param[out]    pDst       points to output vector
2241    * @param[in]     blockSize  number of samples in each vector
2242    * @return        none
2243    */
2244   void arm_xor_u8(
2245     const uint8_t * pSrcA,
2246     const uint8_t * pSrcB,
2247           uint8_t * pDst,
2248     uint32_t blockSize);
2249 
2250   /**
2251    * @brief Struct for specifying sorting algorithm
2252    */
2253   typedef enum
2254   {
2255     ARM_SORT_BITONIC   = 0,
2256              /**< Bitonic sort   */
2257     ARM_SORT_BUBBLE    = 1,
2258              /**< Bubble sort    */
2259     ARM_SORT_HEAP      = 2,
2260              /**< Heap sort      */
2261     ARM_SORT_INSERTION = 3,
2262              /**< Insertion sort */
2263     ARM_SORT_QUICK     = 4,
2264              /**< Quick sort     */
2265     ARM_SORT_SELECTION = 5
2266              /**< Selection sort */
2267   } arm_sort_alg;
2268 
2269   /**
2270    * @brief Struct for specifying sorting algorithm
2271    */
2272   typedef enum
2273   {
2274     ARM_SORT_DESCENDING = 0,
2275              /**< Descending order (9 to 0) */
2276     ARM_SORT_ASCENDING = 1
2277              /**< Ascending order (0 to 9) */
2278   } arm_sort_dir;
2279 
2280   /**
2281    * @brief Instance structure for the sorting algorithms.
2282    */
2283   typedef struct
2284   {
2285     arm_sort_alg alg;        /**< Sorting algorithm selected */
2286     arm_sort_dir dir;        /**< Sorting order (direction)  */
2287   } arm_sort_instance_f32;
2288 
2289   /**
2290    * @param[in]  S          points to an instance of the sorting structure.
2291    * @param[in]  pSrc       points to the block of input data.
2292    * @param[out] pDst       points to the block of output data.
2293    * @param[in]  blockSize  number of samples to process.
2294    */
2295   void arm_sort_f32(
2296     const arm_sort_instance_f32 * S,
2297           float32_t * pSrc,
2298           float32_t * pDst,
2299           uint32_t blockSize);
2300 
2301   /**
2302    * @param[in,out]  S            points to an instance of the sorting structure.
2303    * @param[in]      alg          Selected algorithm.
2304    * @param[in]      dir          Sorting order.
2305    */
2306   void arm_sort_init_f32(
2307     arm_sort_instance_f32 * S,
2308     arm_sort_alg alg,
2309     arm_sort_dir dir);
2310 
2311   /**
2312    * @brief Instance structure for the sorting algorithms.
2313    */
2314   typedef struct
2315   {
2316     arm_sort_dir dir;        /**< Sorting order (direction)  */
2317     float32_t * buffer;      /**< Working buffer */
2318   } arm_merge_sort_instance_f32;
2319 
2320   /**
2321    * @param[in]      S          points to an instance of the sorting structure.
2322    * @param[in,out]  pSrc       points to the block of input data.
2323    * @param[out]     pDst       points to the block of output data
2324    * @param[in]      blockSize  number of samples to process.
2325    */
2326   void arm_merge_sort_f32(
2327     const arm_merge_sort_instance_f32 * S,
2328           float32_t *pSrc,
2329           float32_t *pDst,
2330           uint32_t blockSize);
2331 
2332   /**
2333    * @param[in,out]  S            points to an instance of the sorting structure.
2334    * @param[in]      dir          Sorting order.
2335    * @param[in]      buffer       Working buffer.
2336    */
2337   void arm_merge_sort_init_f32(
2338     arm_merge_sort_instance_f32 * S,
2339     arm_sort_dir dir,
2340     float32_t * buffer);
2341 
2342   /**
2343    * @brief Struct for specifying cubic spline type
2344    */
2345   typedef enum
2346   {
2347     ARM_SPLINE_NATURAL = 0,           /**< Natural spline */
2348     ARM_SPLINE_PARABOLIC_RUNOUT = 1   /**< Parabolic runout spline */
2349   } arm_spline_type;
2350 
2351   /**
2352    * @brief Instance structure for the floating-point cubic spline interpolation.
2353    */
2354   typedef struct
2355   {
2356     arm_spline_type type;      /**< Type (boundary conditions) */
2357     const float32_t * x;       /**< x values */
2358     const float32_t * y;       /**< y values */
2359     uint32_t n_x;              /**< Number of known data points */
2360     float32_t * coeffs;        /**< Coefficients buffer (b,c, and d) */
2361   } arm_spline_instance_f32;
2362 
2363   /**
2364    * @brief Processing function for the floating-point cubic spline interpolation.
2365    * @param[in]  S          points to an instance of the floating-point spline structure.
2366    * @param[in]  xq         points to the x values ot the interpolated data points.
2367    * @param[out] pDst       points to the block of output data.
2368    * @param[in]  blockSize  number of samples of output data.
2369    */
2370   void arm_spline_f32(
2371         arm_spline_instance_f32 * S,
2372   const float32_t * xq,
2373         float32_t * pDst,
2374         uint32_t blockSize);
2375 
2376   /**
2377    * @brief Initialization function for the floating-point cubic spline interpolation.
2378    * @param[in,out] S        points to an instance of the floating-point spline structure.
2379    * @param[in]     type     type of cubic spline interpolation (boundary conditions)
2380    * @param[in]     x        points to the x values of the known data points.
2381    * @param[in]     y        points to the y values of the known data points.
2382    * @param[in]     n        number of known data points.
2383    * @param[in]     coeffs   coefficients array for b, c, and d
2384    * @param[in]     tempBuffer   buffer array for internal computations
2385    */
2386   void arm_spline_init_f32(
2387           arm_spline_instance_f32 * S,
2388           arm_spline_type type,
2389     const float32_t * x,
2390     const float32_t * y,
2391           uint32_t n,
2392           float32_t * coeffs,
2393           float32_t * tempBuffer);
2394 
2395   /**
2396    * @brief Instance structure for the floating-point matrix structure.
2397    */
2398   typedef struct
2399   {
2400     uint16_t numRows;     /**< number of rows of the matrix.     */
2401     uint16_t numCols;     /**< number of columns of the matrix.  */
2402     float32_t *pData;     /**< points to the data of the matrix. */
2403   } arm_matrix_instance_f32;
2404 
2405  /**
2406    * @brief Instance structure for the floating-point matrix structure.
2407    */
2408   typedef struct
2409   {
2410     uint16_t numRows;     /**< number of rows of the matrix.     */
2411     uint16_t numCols;     /**< number of columns of the matrix.  */
2412     float64_t *pData;     /**< points to the data of the matrix. */
2413   } arm_matrix_instance_f64;
2414 
2415   /**
2416    * @brief Instance structure for the Q15 matrix structure.
2417    */
2418   typedef struct
2419   {
2420     uint16_t numRows;     /**< number of rows of the matrix.     */
2421     uint16_t numCols;     /**< number of columns of the matrix.  */
2422     q15_t *pData;         /**< points to the data of the matrix. */
2423   } arm_matrix_instance_q15;
2424 
2425   /**
2426    * @brief Instance structure for the Q31 matrix structure.
2427    */
2428   typedef struct
2429   {
2430     uint16_t numRows;     /**< number of rows of the matrix.     */
2431     uint16_t numCols;     /**< number of columns of the matrix.  */
2432     q31_t *pData;         /**< points to the data of the matrix. */
2433   } arm_matrix_instance_q31;
2434 
2435   /**
2436    * @brief Floating-point matrix addition.
2437    * @param[in]  pSrcA  points to the first input matrix structure
2438    * @param[in]  pSrcB  points to the second input matrix structure
2439    * @param[out] pDst   points to output matrix structure
2440    * @return     The function returns either
2441    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2442    */
2443 arm_status arm_mat_add_f32(
2444   const arm_matrix_instance_f32 * pSrcA,
2445   const arm_matrix_instance_f32 * pSrcB,
2446         arm_matrix_instance_f32 * pDst);
2447 
2448   /**
2449    * @brief Q15 matrix addition.
2450    * @param[in]   pSrcA  points to the first input matrix structure
2451    * @param[in]   pSrcB  points to the second input matrix structure
2452    * @param[out]  pDst   points to output matrix structure
2453    * @return     The function returns either
2454    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2455    */
2456 arm_status arm_mat_add_q15(
2457   const arm_matrix_instance_q15 * pSrcA,
2458   const arm_matrix_instance_q15 * pSrcB,
2459         arm_matrix_instance_q15 * pDst);
2460 
2461   /**
2462    * @brief Q31 matrix addition.
2463    * @param[in]  pSrcA  points to the first input matrix structure
2464    * @param[in]  pSrcB  points to the second input matrix structure
2465    * @param[out] pDst   points to output matrix structure
2466    * @return     The function returns either
2467    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2468    */
2469 arm_status arm_mat_add_q31(
2470   const arm_matrix_instance_q31 * pSrcA,
2471   const arm_matrix_instance_q31 * pSrcB,
2472         arm_matrix_instance_q31 * pDst);
2473 
2474   /**
2475    * @brief Floating-point, complex, matrix multiplication.
2476    * @param[in]  pSrcA  points to the first input matrix structure
2477    * @param[in]  pSrcB  points to the second input matrix structure
2478    * @param[out] pDst   points to output matrix structure
2479    * @return     The function returns either
2480    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2481    */
2482 arm_status arm_mat_cmplx_mult_f32(
2483   const arm_matrix_instance_f32 * pSrcA,
2484   const arm_matrix_instance_f32 * pSrcB,
2485         arm_matrix_instance_f32 * pDst);
2486 
2487   /**
2488    * @brief Q15, complex,  matrix multiplication.
2489    * @param[in]  pSrcA  points to the first input matrix structure
2490    * @param[in]  pSrcB  points to the second input matrix structure
2491    * @param[out] pDst   points to output matrix structure
2492    * @return     The function returns either
2493    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2494    */
2495 arm_status arm_mat_cmplx_mult_q15(
2496   const arm_matrix_instance_q15 * pSrcA,
2497   const arm_matrix_instance_q15 * pSrcB,
2498         arm_matrix_instance_q15 * pDst,
2499         q15_t * pScratch);
2500 
2501   /**
2502    * @brief Q31, complex, matrix multiplication.
2503    * @param[in]  pSrcA  points to the first input matrix structure
2504    * @param[in]  pSrcB  points to the second input matrix structure
2505    * @param[out] pDst   points to output matrix structure
2506    * @return     The function returns either
2507    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2508    */
2509 arm_status arm_mat_cmplx_mult_q31(
2510   const arm_matrix_instance_q31 * pSrcA,
2511   const arm_matrix_instance_q31 * pSrcB,
2512         arm_matrix_instance_q31 * pDst);
2513 
2514   /**
2515    * @brief Floating-point matrix transpose.
2516    * @param[in]  pSrc  points to the input matrix
2517    * @param[out] pDst  points to the output matrix
2518    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
2519    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2520    */
2521 arm_status arm_mat_trans_f32(
2522   const arm_matrix_instance_f32 * pSrc,
2523         arm_matrix_instance_f32 * pDst);
2524 
2525   /**
2526    * @brief Q15 matrix transpose.
2527    * @param[in]  pSrc  points to the input matrix
2528    * @param[out] pDst  points to the output matrix
2529    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
2530    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2531    */
2532 arm_status arm_mat_trans_q15(
2533   const arm_matrix_instance_q15 * pSrc,
2534         arm_matrix_instance_q15 * pDst);
2535 
2536   /**
2537    * @brief Q31 matrix transpose.
2538    * @param[in]  pSrc  points to the input matrix
2539    * @param[out] pDst  points to the output matrix
2540    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
2541    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2542    */
2543 arm_status arm_mat_trans_q31(
2544   const arm_matrix_instance_q31 * pSrc,
2545         arm_matrix_instance_q31 * pDst);
2546 
2547   /**
2548    * @brief Floating-point matrix multiplication
2549    * @param[in]  pSrcA  points to the first input matrix structure
2550    * @param[in]  pSrcB  points to the second input matrix structure
2551    * @param[out] pDst   points to output matrix structure
2552    * @return     The function returns either
2553    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2554    */
2555 arm_status arm_mat_mult_f32(
2556   const arm_matrix_instance_f32 * pSrcA,
2557   const arm_matrix_instance_f32 * pSrcB,
2558         arm_matrix_instance_f32 * pDst);
2559 
2560   /**
2561    * @brief Q15 matrix multiplication
2562    * @param[in]  pSrcA   points to the first input matrix structure
2563    * @param[in]  pSrcB   points to the second input matrix structure
2564    * @param[out] pDst    points to output matrix structure
2565    * @param[in]  pState  points to the array for storing intermediate results
2566    * @return     The function returns either
2567    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2568    */
2569 arm_status arm_mat_mult_q15(
2570   const arm_matrix_instance_q15 * pSrcA,
2571   const arm_matrix_instance_q15 * pSrcB,
2572         arm_matrix_instance_q15 * pDst,
2573         q15_t * pState);
2574 
2575   /**
2576    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
2577    * @param[in]  pSrcA   points to the first input matrix structure
2578    * @param[in]  pSrcB   points to the second input matrix structure
2579    * @param[out] pDst    points to output matrix structure
2580    * @param[in]  pState  points to the array for storing intermediate results
2581    * @return     The function returns either
2582    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2583    */
2584 arm_status arm_mat_mult_fast_q15(
2585   const arm_matrix_instance_q15 * pSrcA,
2586   const arm_matrix_instance_q15 * pSrcB,
2587         arm_matrix_instance_q15 * pDst,
2588         q15_t * pState);
2589 
2590   /**
2591    * @brief Q31 matrix multiplication
2592    * @param[in]  pSrcA  points to the first input matrix structure
2593    * @param[in]  pSrcB  points to the second input matrix structure
2594    * @param[out] pDst   points to output matrix structure
2595    * @return     The function returns either
2596    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2597    */
2598 arm_status arm_mat_mult_q31(
2599   const arm_matrix_instance_q31 * pSrcA,
2600   const arm_matrix_instance_q31 * pSrcB,
2601         arm_matrix_instance_q31 * pDst);
2602 
2603   /**
2604    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
2605    * @param[in]  pSrcA  points to the first input matrix structure
2606    * @param[in]  pSrcB  points to the second input matrix structure
2607    * @param[out] pDst   points to output matrix structure
2608    * @return     The function returns either
2609    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2610    */
2611 arm_status arm_mat_mult_fast_q31(
2612   const arm_matrix_instance_q31 * pSrcA,
2613   const arm_matrix_instance_q31 * pSrcB,
2614         arm_matrix_instance_q31 * pDst);
2615 
2616   /**
2617    * @brief Floating-point matrix subtraction
2618    * @param[in]  pSrcA  points to the first input matrix structure
2619    * @param[in]  pSrcB  points to the second input matrix structure
2620    * @param[out] pDst   points to output matrix structure
2621    * @return     The function returns either
2622    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2623    */
2624 arm_status arm_mat_sub_f32(
2625   const arm_matrix_instance_f32 * pSrcA,
2626   const arm_matrix_instance_f32 * pSrcB,
2627         arm_matrix_instance_f32 * pDst);
2628 
2629   /**
2630    * @brief Q15 matrix subtraction
2631    * @param[in]  pSrcA  points to the first input matrix structure
2632    * @param[in]  pSrcB  points to the second input matrix structure
2633    * @param[out] pDst   points to output matrix structure
2634    * @return     The function returns either
2635    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2636    */
2637 arm_status arm_mat_sub_q15(
2638   const arm_matrix_instance_q15 * pSrcA,
2639   const arm_matrix_instance_q15 * pSrcB,
2640         arm_matrix_instance_q15 * pDst);
2641 
2642   /**
2643    * @brief Q31 matrix subtraction
2644    * @param[in]  pSrcA  points to the first input matrix structure
2645    * @param[in]  pSrcB  points to the second input matrix structure
2646    * @param[out] pDst   points to output matrix structure
2647    * @return     The function returns either
2648    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2649    */
2650 arm_status arm_mat_sub_q31(
2651   const arm_matrix_instance_q31 * pSrcA,
2652   const arm_matrix_instance_q31 * pSrcB,
2653         arm_matrix_instance_q31 * pDst);
2654 
2655   /**
2656    * @brief Floating-point matrix scaling.
2657    * @param[in]  pSrc   points to the input matrix
2658    * @param[in]  scale  scale factor
2659    * @param[out] pDst   points to the output matrix
2660    * @return     The function returns either
2661    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2662    */
2663 arm_status arm_mat_scale_f32(
2664   const arm_matrix_instance_f32 * pSrc,
2665         float32_t scale,
2666         arm_matrix_instance_f32 * pDst);
2667 
2668   /**
2669    * @brief Q15 matrix scaling.
2670    * @param[in]  pSrc        points to input matrix
2671    * @param[in]  scaleFract  fractional portion of the scale factor
2672    * @param[in]  shift       number of bits to shift the result by
2673    * @param[out] pDst        points to output matrix
2674    * @return     The function returns either
2675    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2676    */
2677 arm_status arm_mat_scale_q15(
2678   const arm_matrix_instance_q15 * pSrc,
2679         q15_t scaleFract,
2680         int32_t shift,
2681         arm_matrix_instance_q15 * pDst);
2682 
2683   /**
2684    * @brief Q31 matrix scaling.
2685    * @param[in]  pSrc        points to input matrix
2686    * @param[in]  scaleFract  fractional portion of the scale factor
2687    * @param[in]  shift       number of bits to shift the result by
2688    * @param[out] pDst        points to output matrix structure
2689    * @return     The function returns either
2690    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
2691    */
2692 arm_status arm_mat_scale_q31(
2693   const arm_matrix_instance_q31 * pSrc,
2694         q31_t scaleFract,
2695         int32_t shift,
2696         arm_matrix_instance_q31 * pDst);
2697 
2698   /**
2699    * @brief  Q31 matrix initialization.
2700    * @param[in,out] S         points to an instance of the floating-point matrix structure.
2701    * @param[in]     nRows     number of rows in the matrix.
2702    * @param[in]     nColumns  number of columns in the matrix.
2703    * @param[in]     pData     points to the matrix data array.
2704    */
2705 void arm_mat_init_q31(
2706         arm_matrix_instance_q31 * S,
2707         uint16_t nRows,
2708         uint16_t nColumns,
2709         q31_t * pData);
2710 
2711   /**
2712    * @brief  Q15 matrix initialization.
2713    * @param[in,out] S         points to an instance of the floating-point matrix structure.
2714    * @param[in]     nRows     number of rows in the matrix.
2715    * @param[in]     nColumns  number of columns in the matrix.
2716    * @param[in]     pData     points to the matrix data array.
2717    */
2718 void arm_mat_init_q15(
2719         arm_matrix_instance_q15 * S,
2720         uint16_t nRows,
2721         uint16_t nColumns,
2722         q15_t * pData);
2723 
2724   /**
2725    * @brief  Floating-point matrix initialization.
2726    * @param[in,out] S         points to an instance of the floating-point matrix structure.
2727    * @param[in]     nRows     number of rows in the matrix.
2728    * @param[in]     nColumns  number of columns in the matrix.
2729    * @param[in]     pData     points to the matrix data array.
2730    */
2731 void arm_mat_init_f32(
2732         arm_matrix_instance_f32 * S,
2733         uint16_t nRows,
2734         uint16_t nColumns,
2735         float32_t * pData);
2736 
2737 
2738   /**
2739    * @brief Instance structure for the Q15 PID Control.
2740    */
2741   typedef struct
2742   {
2743           q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
2744 #if !defined (ARM_MATH_DSP)
2745           q15_t A1;
2746           q15_t A2;
2747 #else
2748           q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
2749 #endif
2750           q15_t state[3];     /**< The state array of length 3. */
2751           q15_t Kp;           /**< The proportional gain. */
2752           q15_t Ki;           /**< The integral gain. */
2753           q15_t Kd;           /**< The derivative gain. */
2754   } arm_pid_instance_q15;
2755 
2756   /**
2757    * @brief Instance structure for the Q31 PID Control.
2758    */
2759   typedef struct
2760   {
2761           q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
2762           q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
2763           q31_t A2;            /**< The derived gain, A2 = Kd . */
2764           q31_t state[3];      /**< The state array of length 3. */
2765           q31_t Kp;            /**< The proportional gain. */
2766           q31_t Ki;            /**< The integral gain. */
2767           q31_t Kd;            /**< The derivative gain. */
2768   } arm_pid_instance_q31;
2769 
2770   /**
2771    * @brief Instance structure for the floating-point PID Control.
2772    */
2773   typedef struct
2774   {
2775           float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
2776           float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
2777           float32_t A2;          /**< The derived gain, A2 = Kd . */
2778           float32_t state[3];    /**< The state array of length 3. */
2779           float32_t Kp;          /**< The proportional gain. */
2780           float32_t Ki;          /**< The integral gain. */
2781           float32_t Kd;          /**< The derivative gain. */
2782   } arm_pid_instance_f32;
2783 
2784 
2785 
2786   /**
2787    * @brief  Initialization function for the floating-point PID Control.
2788    * @param[in,out] S               points to an instance of the PID structure.
2789    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
2790    */
2791   void arm_pid_init_f32(
2792         arm_pid_instance_f32 * S,
2793         int32_t resetStateFlag);
2794 
2795 
2796   /**
2797    * @brief  Reset function for the floating-point PID Control.
2798    * @param[in,out] S  is an instance of the floating-point PID Control structure
2799    */
2800   void arm_pid_reset_f32(
2801         arm_pid_instance_f32 * S);
2802 
2803 
2804   /**
2805    * @brief  Initialization function for the Q31 PID Control.
2806    * @param[in,out] S               points to an instance of the Q15 PID structure.
2807    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
2808    */
2809   void arm_pid_init_q31(
2810         arm_pid_instance_q31 * S,
2811         int32_t resetStateFlag);
2812 
2813 
2814   /**
2815    * @brief  Reset function for the Q31 PID Control.
2816    * @param[in,out] S   points to an instance of the Q31 PID Control structure
2817    */
2818 
2819   void arm_pid_reset_q31(
2820         arm_pid_instance_q31 * S);
2821 
2822 
2823   /**
2824    * @brief  Initialization function for the Q15 PID Control.
2825    * @param[in,out] S               points to an instance of the Q15 PID structure.
2826    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
2827    */
2828   void arm_pid_init_q15(
2829         arm_pid_instance_q15 * S,
2830         int32_t resetStateFlag);
2831 
2832 
2833   /**
2834    * @brief  Reset function for the Q15 PID Control.
2835    * @param[in,out] S  points to an instance of the q15 PID Control structure
2836    */
2837   void arm_pid_reset_q15(
2838         arm_pid_instance_q15 * S);
2839 
2840 
2841   /**
2842    * @brief Instance structure for the floating-point Linear Interpolate function.
2843    */
2844   typedef struct
2845   {
2846           uint32_t nValues;           /**< nValues */
2847           float32_t x1;               /**< x1 */
2848           float32_t xSpacing;         /**< xSpacing */
2849           float32_t *pYData;          /**< pointer to the table of Y values */
2850   } arm_linear_interp_instance_f32;
2851 
2852   /**
2853    * @brief Instance structure for the floating-point bilinear interpolation function.
2854    */
2855   typedef struct
2856   {
2857           uint16_t numRows;   /**< number of rows in the data table. */
2858           uint16_t numCols;   /**< number of columns in the data table. */
2859           float32_t *pData;   /**< points to the data table. */
2860   } arm_bilinear_interp_instance_f32;
2861 
2862    /**
2863    * @brief Instance structure for the Q31 bilinear interpolation function.
2864    */
2865   typedef struct
2866   {
2867           uint16_t numRows;   /**< number of rows in the data table. */
2868           uint16_t numCols;   /**< number of columns in the data table. */
2869           q31_t *pData;       /**< points to the data table. */
2870   } arm_bilinear_interp_instance_q31;
2871 
2872    /**
2873    * @brief Instance structure for the Q15 bilinear interpolation function.
2874    */
2875   typedef struct
2876   {
2877           uint16_t numRows;   /**< number of rows in the data table. */
2878           uint16_t numCols;   /**< number of columns in the data table. */
2879           q15_t *pData;       /**< points to the data table. */
2880   } arm_bilinear_interp_instance_q15;
2881 
2882    /**
2883    * @brief Instance structure for the Q15 bilinear interpolation function.
2884    */
2885   typedef struct
2886   {
2887           uint16_t numRows;   /**< number of rows in the data table. */
2888           uint16_t numCols;   /**< number of columns in the data table. */
2889           q7_t *pData;        /**< points to the data table. */
2890   } arm_bilinear_interp_instance_q7;
2891 
2892 
2893   /**
2894    * @brief Q7 vector multiplication.
2895    * @param[in]  pSrcA      points to the first input vector
2896    * @param[in]  pSrcB      points to the second input vector
2897    * @param[out] pDst       points to the output vector
2898    * @param[in]  blockSize  number of samples in each vector
2899    */
2900   void arm_mult_q7(
2901   const q7_t * pSrcA,
2902   const q7_t * pSrcB,
2903         q7_t * pDst,
2904         uint32_t blockSize);
2905 
2906 
2907   /**
2908    * @brief Q15 vector multiplication.
2909    * @param[in]  pSrcA      points to the first input vector
2910    * @param[in]  pSrcB      points to the second input vector
2911    * @param[out] pDst       points to the output vector
2912    * @param[in]  blockSize  number of samples in each vector
2913    */
2914   void arm_mult_q15(
2915   const q15_t * pSrcA,
2916   const q15_t * pSrcB,
2917         q15_t * pDst,
2918         uint32_t blockSize);
2919 
2920 
2921   /**
2922    * @brief Q31 vector multiplication.
2923    * @param[in]  pSrcA      points to the first input vector
2924    * @param[in]  pSrcB      points to the second input vector
2925    * @param[out] pDst       points to the output vector
2926    * @param[in]  blockSize  number of samples in each vector
2927    */
2928   void arm_mult_q31(
2929   const q31_t * pSrcA,
2930   const q31_t * pSrcB,
2931         q31_t * pDst,
2932         uint32_t blockSize);
2933 
2934 
2935   /**
2936    * @brief Floating-point vector multiplication.
2937    * @param[in]  pSrcA      points to the first input vector
2938    * @param[in]  pSrcB      points to the second input vector
2939    * @param[out] pDst       points to the output vector
2940    * @param[in]  blockSize  number of samples in each vector
2941    */
2942   void arm_mult_f32(
2943   const float32_t * pSrcA,
2944   const float32_t * pSrcB,
2945         float32_t * pDst,
2946         uint32_t blockSize);
2947 
2948 
2949   /**
2950    * @brief Instance structure for the Q15 CFFT/CIFFT function.
2951    */
2952   typedef struct
2953   {
2954           uint16_t fftLen;                 /**< length of the FFT. */
2955           uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2956           uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2957     const q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
2958     const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2959           uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2960           uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2961   } arm_cfft_radix2_instance_q15;
2962 
2963 /* Deprecated */
2964   arm_status arm_cfft_radix2_init_q15(
2965         arm_cfft_radix2_instance_q15 * S,
2966         uint16_t fftLen,
2967         uint8_t ifftFlag,
2968         uint8_t bitReverseFlag);
2969 
2970 /* Deprecated */
2971   void arm_cfft_radix2_q15(
2972   const arm_cfft_radix2_instance_q15 * S,
2973         q15_t * pSrc);
2974 
2975 
2976   /**
2977    * @brief Instance structure for the Q15 CFFT/CIFFT function.
2978    */
2979   typedef struct
2980   {
2981           uint16_t fftLen;                 /**< length of the FFT. */
2982           uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2983           uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2984     const q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
2985     const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2986           uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2987           uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2988   } arm_cfft_radix4_instance_q15;
2989 
2990 /* Deprecated */
2991   arm_status arm_cfft_radix4_init_q15(
2992         arm_cfft_radix4_instance_q15 * S,
2993         uint16_t fftLen,
2994         uint8_t ifftFlag,
2995         uint8_t bitReverseFlag);
2996 
2997 /* Deprecated */
2998   void arm_cfft_radix4_q15(
2999   const arm_cfft_radix4_instance_q15 * S,
3000         q15_t * pSrc);
3001 
3002   /**
3003    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
3004    */
3005   typedef struct
3006   {
3007           uint16_t fftLen;                 /**< length of the FFT. */
3008           uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
3009           uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
3010     const q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
3011     const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
3012           uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
3013           uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
3014   } arm_cfft_radix2_instance_q31;
3015 
3016 /* Deprecated */
3017   arm_status arm_cfft_radix2_init_q31(
3018         arm_cfft_radix2_instance_q31 * S,
3019         uint16_t fftLen,
3020         uint8_t ifftFlag,
3021         uint8_t bitReverseFlag);
3022 
3023 /* Deprecated */
3024   void arm_cfft_radix2_q31(
3025   const arm_cfft_radix2_instance_q31 * S,
3026         q31_t * pSrc);
3027 
3028   /**
3029    * @brief Instance structure for the Q31 CFFT/CIFFT function.
3030    */
3031   typedef struct
3032   {
3033           uint16_t fftLen;                 /**< length of the FFT. */
3034           uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
3035           uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
3036     const q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
3037     const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
3038           uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
3039           uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
3040   } arm_cfft_radix4_instance_q31;
3041 
3042 /* Deprecated */
3043   void arm_cfft_radix4_q31(
3044   const arm_cfft_radix4_instance_q31 * S,
3045         q31_t * pSrc);
3046 
3047 /* Deprecated */
3048   arm_status arm_cfft_radix4_init_q31(
3049         arm_cfft_radix4_instance_q31 * S,
3050         uint16_t fftLen,
3051         uint8_t ifftFlag,
3052         uint8_t bitReverseFlag);
3053 
3054   /**
3055    * @brief Instance structure for the floating-point CFFT/CIFFT function.
3056    */
3057   typedef struct
3058   {
3059           uint16_t fftLen;                   /**< length of the FFT. */
3060           uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
3061           uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
3062     const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
3063     const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
3064           uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
3065           uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
3066           float32_t onebyfftLen;             /**< value of 1/fftLen. */
3067   } arm_cfft_radix2_instance_f32;
3068 
3069 /* Deprecated */
3070   arm_status arm_cfft_radix2_init_f32(
3071         arm_cfft_radix2_instance_f32 * S,
3072         uint16_t fftLen,
3073         uint8_t ifftFlag,
3074         uint8_t bitReverseFlag);
3075 
3076 /* Deprecated */
3077   void arm_cfft_radix2_f32(
3078   const arm_cfft_radix2_instance_f32 * S,
3079         float32_t * pSrc);
3080 
3081   /**
3082    * @brief Instance structure for the floating-point CFFT/CIFFT function.
3083    */
3084   typedef struct
3085   {
3086           uint16_t fftLen;                   /**< length of the FFT. */
3087           uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
3088           uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
3089     const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
3090     const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
3091           uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
3092           uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
3093           float32_t onebyfftLen;             /**< value of 1/fftLen. */
3094   } arm_cfft_radix4_instance_f32;
3095 
3096 /* Deprecated */
3097   arm_status arm_cfft_radix4_init_f32(
3098         arm_cfft_radix4_instance_f32 * S,
3099         uint16_t fftLen,
3100         uint8_t ifftFlag,
3101         uint8_t bitReverseFlag);
3102 
3103 /* Deprecated */
3104   void arm_cfft_radix4_f32(
3105   const arm_cfft_radix4_instance_f32 * S,
3106         float32_t * pSrc);
3107 
3108   /**
3109    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
3110    */
3111   typedef struct
3112   {
3113           uint16_t fftLen;                   /**< length of the FFT. */
3114     const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
3115     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
3116           uint16_t bitRevLength;             /**< bit reversal table length. */
3117 #if defined(ARM_MATH_MVEI)
3118    const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
3119    const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
3120    const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
3121    const q15_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
3122    const q15_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
3123    const q15_t *rearranged_twiddle_stride3;
3124 #endif
3125   } arm_cfft_instance_q15;
3126 
3127 arm_status arm_cfft_init_q15(
3128   arm_cfft_instance_q15 * S,
3129   uint16_t fftLen);
3130 
3131 void arm_cfft_q15(
3132     const arm_cfft_instance_q15 * S,
3133           q15_t * p1,
3134           uint8_t ifftFlag,
3135           uint8_t bitReverseFlag);
3136 
3137   /**
3138    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
3139    */
3140   typedef struct
3141   {
3142           uint16_t fftLen;                   /**< length of the FFT. */
3143     const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
3144     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
3145           uint16_t bitRevLength;             /**< bit reversal table length. */
3146 #if defined(ARM_MATH_MVEI)
3147    const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
3148    const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
3149    const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
3150    const q31_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
3151    const q31_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
3152    const q31_t *rearranged_twiddle_stride3;
3153 #endif
3154   } arm_cfft_instance_q31;
3155 
3156 arm_status arm_cfft_init_q31(
3157   arm_cfft_instance_q31 * S,
3158   uint16_t fftLen);
3159 
3160 void arm_cfft_q31(
3161     const arm_cfft_instance_q31 * S,
3162           q31_t * p1,
3163           uint8_t ifftFlag,
3164           uint8_t bitReverseFlag);
3165 
3166   /**
3167    * @brief Instance structure for the floating-point CFFT/CIFFT function.
3168    */
3169   typedef struct
3170   {
3171           uint16_t fftLen;                   /**< length of the FFT. */
3172     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
3173     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
3174           uint16_t bitRevLength;             /**< bit reversal table length. */
3175 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
3176    const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
3177    const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
3178    const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
3179    const float32_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
3180    const float32_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
3181    const float32_t *rearranged_twiddle_stride3;
3182 #endif
3183   } arm_cfft_instance_f32;
3184 
3185 
3186   arm_status arm_cfft_init_f32(
3187   arm_cfft_instance_f32 * S,
3188   uint16_t fftLen);
3189 
3190   void arm_cfft_f32(
3191   const arm_cfft_instance_f32 * S,
3192         float32_t * p1,
3193         uint8_t ifftFlag,
3194         uint8_t bitReverseFlag);
3195 
3196 
3197   /**
3198    * @brief Instance structure for the Double Precision Floating-point CFFT/CIFFT function.
3199    */
3200   typedef struct
3201   {
3202           uint16_t fftLen;                   /**< length of the FFT. */
3203     const float64_t *pTwiddle;         /**< points to the Twiddle factor table. */
3204     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
3205           uint16_t bitRevLength;             /**< bit reversal table length. */
3206   } arm_cfft_instance_f64;
3207 
3208   void arm_cfft_f64(
3209   const arm_cfft_instance_f64 * S,
3210         float64_t * p1,
3211         uint8_t ifftFlag,
3212         uint8_t bitReverseFlag);
3213 
3214   /**
3215    * @brief Instance structure for the Q15 RFFT/RIFFT function.
3216    */
3217   typedef struct
3218   {
3219           uint32_t fftLenReal;                      /**< length of the real FFT. */
3220           uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
3221           uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
3222           uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
3223     const q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
3224     const q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
3225 #if defined(ARM_MATH_MVEI)
3226     arm_cfft_instance_q15 cfftInst;
3227 #else
3228     const arm_cfft_instance_q15 *pCfft;       /**< points to the complex FFT instance. */
3229 #endif
3230   } arm_rfft_instance_q15;
3231 
3232   arm_status arm_rfft_init_q15(
3233         arm_rfft_instance_q15 * S,
3234         uint32_t fftLenReal,
3235         uint32_t ifftFlagR,
3236         uint32_t bitReverseFlag);
3237 
3238   void arm_rfft_q15(
3239   const arm_rfft_instance_q15 * S,
3240         q15_t * pSrc,
3241         q15_t * pDst);
3242 
3243   /**
3244    * @brief Instance structure for the Q31 RFFT/RIFFT function.
3245    */
3246   typedef struct
3247   {
3248           uint32_t fftLenReal;                        /**< length of the real FFT. */
3249           uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
3250           uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
3251           uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
3252     const q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
3253     const q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
3254 #if defined(ARM_MATH_MVEI)
3255     arm_cfft_instance_q31 cfftInst;
3256 #else
3257     const arm_cfft_instance_q31 *pCfft;         /**< points to the complex FFT instance. */
3258 #endif
3259   } arm_rfft_instance_q31;
3260 
3261   arm_status arm_rfft_init_q31(
3262         arm_rfft_instance_q31 * S,
3263         uint32_t fftLenReal,
3264         uint32_t ifftFlagR,
3265         uint32_t bitReverseFlag);
3266 
3267   void arm_rfft_q31(
3268   const arm_rfft_instance_q31 * S,
3269         q31_t * pSrc,
3270         q31_t * pDst);
3271 
3272   /**
3273    * @brief Instance structure for the floating-point RFFT/RIFFT function.
3274    */
3275   typedef struct
3276   {
3277           uint32_t fftLenReal;                        /**< length of the real FFT. */
3278           uint16_t fftLenBy2;                         /**< length of the complex FFT. */
3279           uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
3280           uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
3281           uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
3282     const float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
3283     const float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
3284           arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
3285   } arm_rfft_instance_f32;
3286 
3287   arm_status arm_rfft_init_f32(
3288         arm_rfft_instance_f32 * S,
3289         arm_cfft_radix4_instance_f32 * S_CFFT,
3290         uint32_t fftLenReal,
3291         uint32_t ifftFlagR,
3292         uint32_t bitReverseFlag);
3293 
3294   void arm_rfft_f32(
3295   const arm_rfft_instance_f32 * S,
3296         float32_t * pSrc,
3297         float32_t * pDst);
3298 
3299   /**
3300    * @brief Instance structure for the Double Precision Floating-point RFFT/RIFFT function.
3301    */
3302 typedef struct
3303   {
3304           arm_cfft_instance_f64 Sint;      /**< Internal CFFT structure. */
3305           uint16_t fftLenRFFT;             /**< length of the real sequence */
3306     const float64_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
3307   } arm_rfft_fast_instance_f64 ;
3308 
3309 arm_status arm_rfft_fast_init_f64 (
3310          arm_rfft_fast_instance_f64 * S,
3311          uint16_t fftLen);
3312 
3313 
3314 void arm_rfft_fast_f64(
3315     arm_rfft_fast_instance_f64 * S,
3316     float64_t * p, float64_t * pOut,
3317     uint8_t ifftFlag);
3318 
3319 
3320   /**
3321    * @brief Instance structure for the floating-point RFFT/RIFFT function.
3322    */
3323 typedef struct
3324   {
3325           arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
3326           uint16_t fftLenRFFT;             /**< length of the real sequence */
3327     const float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
3328   } arm_rfft_fast_instance_f32 ;
3329 
3330 arm_status arm_rfft_fast_init_f32 (
3331          arm_rfft_fast_instance_f32 * S,
3332          uint16_t fftLen);
3333 
3334 
3335   void arm_rfft_fast_f32(
3336         const arm_rfft_fast_instance_f32 * S,
3337         float32_t * p, float32_t * pOut,
3338         uint8_t ifftFlag);
3339 
3340   /**
3341    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
3342    */
3343   typedef struct
3344   {
3345           uint16_t N;                          /**< length of the DCT4. */
3346           uint16_t Nby2;                       /**< half of the length of the DCT4. */
3347           float32_t normalize;                 /**< normalizing factor. */
3348     const float32_t *pTwiddle;                 /**< points to the twiddle factor table. */
3349     const float32_t *pCosFactor;               /**< points to the cosFactor table. */
3350           arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
3351           arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
3352   } arm_dct4_instance_f32;
3353 
3354 
3355   /**
3356    * @brief  Initialization function for the floating-point DCT4/IDCT4.
3357    * @param[in,out] S          points to an instance of floating-point DCT4/IDCT4 structure.
3358    * @param[in]     S_RFFT     points to an instance of floating-point RFFT/RIFFT structure.
3359    * @param[in]     S_CFFT     points to an instance of floating-point CFFT/CIFFT structure.
3360    * @param[in]     N          length of the DCT4.
3361    * @param[in]     Nby2       half of the length of the DCT4.
3362    * @param[in]     normalize  normalizing factor.
3363    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
3364    */
3365   arm_status arm_dct4_init_f32(
3366         arm_dct4_instance_f32 * S,
3367         arm_rfft_instance_f32 * S_RFFT,
3368         arm_cfft_radix4_instance_f32 * S_CFFT,
3369         uint16_t N,
3370         uint16_t Nby2,
3371         float32_t normalize);
3372 
3373 
3374   /**
3375    * @brief Processing function for the floating-point DCT4/IDCT4.
3376    * @param[in]     S              points to an instance of the floating-point DCT4/IDCT4 structure.
3377    * @param[in]     pState         points to state buffer.
3378    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
3379    */
3380   void arm_dct4_f32(
3381   const arm_dct4_instance_f32 * S,
3382         float32_t * pState,
3383         float32_t * pInlineBuffer);
3384 
3385 
3386   /**
3387    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
3388    */
3389   typedef struct
3390   {
3391           uint16_t N;                          /**< length of the DCT4. */
3392           uint16_t Nby2;                       /**< half of the length of the DCT4. */
3393           q31_t normalize;                     /**< normalizing factor. */
3394     const q31_t *pTwiddle;                     /**< points to the twiddle factor table. */
3395     const q31_t *pCosFactor;                   /**< points to the cosFactor table. */
3396           arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
3397           arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
3398   } arm_dct4_instance_q31;
3399 
3400 
3401   /**
3402    * @brief  Initialization function for the Q31 DCT4/IDCT4.
3403    * @param[in,out] S          points to an instance of Q31 DCT4/IDCT4 structure.
3404    * @param[in]     S_RFFT     points to an instance of Q31 RFFT/RIFFT structure
3405    * @param[in]     S_CFFT     points to an instance of Q31 CFFT/CIFFT structure
3406    * @param[in]     N          length of the DCT4.
3407    * @param[in]     Nby2       half of the length of the DCT4.
3408    * @param[in]     normalize  normalizing factor.
3409    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
3410    */
3411   arm_status arm_dct4_init_q31(
3412         arm_dct4_instance_q31 * S,
3413         arm_rfft_instance_q31 * S_RFFT,
3414         arm_cfft_radix4_instance_q31 * S_CFFT,
3415         uint16_t N,
3416         uint16_t Nby2,
3417         q31_t normalize);
3418 
3419 
3420   /**
3421    * @brief Processing function for the Q31 DCT4/IDCT4.
3422    * @param[in]     S              points to an instance of the Q31 DCT4 structure.
3423    * @param[in]     pState         points to state buffer.
3424    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
3425    */
3426   void arm_dct4_q31(
3427   const arm_dct4_instance_q31 * S,
3428         q31_t * pState,
3429         q31_t * pInlineBuffer);
3430 
3431 
3432   /**
3433    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
3434    */
3435   typedef struct
3436   {
3437           uint16_t N;                          /**< length of the DCT4. */
3438           uint16_t Nby2;                       /**< half of the length of the DCT4. */
3439           q15_t normalize;                     /**< normalizing factor. */
3440     const q15_t *pTwiddle;                     /**< points to the twiddle factor table. */
3441     const q15_t *pCosFactor;                   /**< points to the cosFactor table. */
3442           arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
3443           arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
3444   } arm_dct4_instance_q15;
3445 
3446 
3447   /**
3448    * @brief  Initialization function for the Q15 DCT4/IDCT4.
3449    * @param[in,out] S          points to an instance of Q15 DCT4/IDCT4 structure.
3450    * @param[in]     S_RFFT     points to an instance of Q15 RFFT/RIFFT structure.
3451    * @param[in]     S_CFFT     points to an instance of Q15 CFFT/CIFFT structure.
3452    * @param[in]     N          length of the DCT4.
3453    * @param[in]     Nby2       half of the length of the DCT4.
3454    * @param[in]     normalize  normalizing factor.
3455    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
3456    */
3457   arm_status arm_dct4_init_q15(
3458         arm_dct4_instance_q15 * S,
3459         arm_rfft_instance_q15 * S_RFFT,
3460         arm_cfft_radix4_instance_q15 * S_CFFT,
3461         uint16_t N,
3462         uint16_t Nby2,
3463         q15_t normalize);
3464 
3465 
3466   /**
3467    * @brief Processing function for the Q15 DCT4/IDCT4.
3468    * @param[in]     S              points to an instance of the Q15 DCT4 structure.
3469    * @param[in]     pState         points to state buffer.
3470    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
3471    */
3472   void arm_dct4_q15(
3473   const arm_dct4_instance_q15 * S,
3474         q15_t * pState,
3475         q15_t * pInlineBuffer);
3476 
3477 
3478   /**
3479    * @brief Floating-point vector addition.
3480    * @param[in]  pSrcA      points to the first input vector
3481    * @param[in]  pSrcB      points to the second input vector
3482    * @param[out] pDst       points to the output vector
3483    * @param[in]  blockSize  number of samples in each vector
3484    */
3485   void arm_add_f32(
3486   const float32_t * pSrcA,
3487   const float32_t * pSrcB,
3488         float32_t * pDst,
3489         uint32_t blockSize);
3490 
3491 
3492   /**
3493    * @brief Q7 vector addition.
3494    * @param[in]  pSrcA      points to the first input vector
3495    * @param[in]  pSrcB      points to the second input vector
3496    * @param[out] pDst       points to the output vector
3497    * @param[in]  blockSize  number of samples in each vector
3498    */
3499   void arm_add_q7(
3500   const q7_t * pSrcA,
3501   const q7_t * pSrcB,
3502         q7_t * pDst,
3503         uint32_t blockSize);
3504 
3505 
3506   /**
3507    * @brief Q15 vector addition.
3508    * @param[in]  pSrcA      points to the first input vector
3509    * @param[in]  pSrcB      points to the second input vector
3510    * @param[out] pDst       points to the output vector
3511    * @param[in]  blockSize  number of samples in each vector
3512    */
3513   void arm_add_q15(
3514   const q15_t * pSrcA,
3515   const q15_t * pSrcB,
3516         q15_t * pDst,
3517         uint32_t blockSize);
3518 
3519 
3520   /**
3521    * @brief Q31 vector addition.
3522    * @param[in]  pSrcA      points to the first input vector
3523    * @param[in]  pSrcB      points to the second input vector
3524    * @param[out] pDst       points to the output vector
3525    * @param[in]  blockSize  number of samples in each vector
3526    */
3527   void arm_add_q31(
3528   const q31_t * pSrcA,
3529   const q31_t * pSrcB,
3530         q31_t * pDst,
3531         uint32_t blockSize);
3532 
3533 
3534   /**
3535    * @brief Floating-point vector subtraction.
3536    * @param[in]  pSrcA      points to the first input vector
3537    * @param[in]  pSrcB      points to the second input vector
3538    * @param[out] pDst       points to the output vector
3539    * @param[in]  blockSize  number of samples in each vector
3540    */
3541   void arm_sub_f32(
3542   const float32_t * pSrcA,
3543   const float32_t * pSrcB,
3544         float32_t * pDst,
3545         uint32_t blockSize);
3546 
3547 
3548   /**
3549    * @brief Q7 vector subtraction.
3550    * @param[in]  pSrcA      points to the first input vector
3551    * @param[in]  pSrcB      points to the second input vector
3552    * @param[out] pDst       points to the output vector
3553    * @param[in]  blockSize  number of samples in each vector
3554    */
3555   void arm_sub_q7(
3556   const q7_t * pSrcA,
3557   const q7_t * pSrcB,
3558         q7_t * pDst,
3559         uint32_t blockSize);
3560 
3561 
3562   /**
3563    * @brief Q15 vector subtraction.
3564    * @param[in]  pSrcA      points to the first input vector
3565    * @param[in]  pSrcB      points to the second input vector
3566    * @param[out] pDst       points to the output vector
3567    * @param[in]  blockSize  number of samples in each vector
3568    */
3569   void arm_sub_q15(
3570   const q15_t * pSrcA,
3571   const q15_t * pSrcB,
3572         q15_t * pDst,
3573         uint32_t blockSize);
3574 
3575 
3576   /**
3577    * @brief Q31 vector subtraction.
3578    * @param[in]  pSrcA      points to the first input vector
3579    * @param[in]  pSrcB      points to the second input vector
3580    * @param[out] pDst       points to the output vector
3581    * @param[in]  blockSize  number of samples in each vector
3582    */
3583   void arm_sub_q31(
3584   const q31_t * pSrcA,
3585   const q31_t * pSrcB,
3586         q31_t * pDst,
3587         uint32_t blockSize);
3588 
3589 
3590   /**
3591    * @brief Multiplies a floating-point vector by a scalar.
3592    * @param[in]  pSrc       points to the input vector
3593    * @param[in]  scale      scale factor to be applied
3594    * @param[out] pDst       points to the output vector
3595    * @param[in]  blockSize  number of samples in the vector
3596    */
3597   void arm_scale_f32(
3598   const float32_t * pSrc,
3599         float32_t scale,
3600         float32_t * pDst,
3601         uint32_t blockSize);
3602 
3603 
3604   /**
3605    * @brief Multiplies a Q7 vector by a scalar.
3606    * @param[in]  pSrc        points to the input vector
3607    * @param[in]  scaleFract  fractional portion of the scale value
3608    * @param[in]  shift       number of bits to shift the result by
3609    * @param[out] pDst        points to the output vector
3610    * @param[in]  blockSize   number of samples in the vector
3611    */
3612   void arm_scale_q7(
3613   const q7_t * pSrc,
3614         q7_t scaleFract,
3615         int8_t shift,
3616         q7_t * pDst,
3617         uint32_t blockSize);
3618 
3619 
3620   /**
3621    * @brief Multiplies a Q15 vector by a scalar.
3622    * @param[in]  pSrc        points to the input vector
3623    * @param[in]  scaleFract  fractional portion of the scale value
3624    * @param[in]  shift       number of bits to shift the result by
3625    * @param[out] pDst        points to the output vector
3626    * @param[in]  blockSize   number of samples in the vector
3627    */
3628   void arm_scale_q15(
3629   const q15_t * pSrc,
3630         q15_t scaleFract,
3631         int8_t shift,
3632         q15_t * pDst,
3633         uint32_t blockSize);
3634 
3635 
3636   /**
3637    * @brief Multiplies a Q31 vector by a scalar.
3638    * @param[in]  pSrc        points to the input vector
3639    * @param[in]  scaleFract  fractional portion of the scale value
3640    * @param[in]  shift       number of bits to shift the result by
3641    * @param[out] pDst        points to the output vector
3642    * @param[in]  blockSize   number of samples in the vector
3643    */
3644   void arm_scale_q31(
3645   const q31_t * pSrc,
3646         q31_t scaleFract,
3647         int8_t shift,
3648         q31_t * pDst,
3649         uint32_t blockSize);
3650 
3651 
3652   /**
3653    * @brief Q7 vector absolute value.
3654    * @param[in]  pSrc       points to the input buffer
3655    * @param[out] pDst       points to the output buffer
3656    * @param[in]  blockSize  number of samples in each vector
3657    */
3658   void arm_abs_q7(
3659   const q7_t * pSrc,
3660         q7_t * pDst,
3661         uint32_t blockSize);
3662 
3663 
3664   /**
3665    * @brief Floating-point vector absolute value.
3666    * @param[in]  pSrc       points to the input buffer
3667    * @param[out] pDst       points to the output buffer
3668    * @param[in]  blockSize  number of samples in each vector
3669    */
3670   void arm_abs_f32(
3671   const float32_t * pSrc,
3672         float32_t * pDst,
3673         uint32_t blockSize);
3674 
3675 
3676   /**
3677    * @brief Q15 vector absolute value.
3678    * @param[in]  pSrc       points to the input buffer
3679    * @param[out] pDst       points to the output buffer
3680    * @param[in]  blockSize  number of samples in each vector
3681    */
3682   void arm_abs_q15(
3683   const q15_t * pSrc,
3684         q15_t * pDst,
3685         uint32_t blockSize);
3686 
3687 
3688   /**
3689    * @brief Q31 vector absolute value.
3690    * @param[in]  pSrc       points to the input buffer
3691    * @param[out] pDst       points to the output buffer
3692    * @param[in]  blockSize  number of samples in each vector
3693    */
3694   void arm_abs_q31(
3695   const q31_t * pSrc,
3696         q31_t * pDst,
3697         uint32_t blockSize);
3698 
3699 
3700   /**
3701    * @brief Dot product of floating-point vectors.
3702    * @param[in]  pSrcA      points to the first input vector
3703    * @param[in]  pSrcB      points to the second input vector
3704    * @param[in]  blockSize  number of samples in each vector
3705    * @param[out] result     output result returned here
3706    */
3707   void arm_dot_prod_f32(
3708   const float32_t * pSrcA,
3709   const float32_t * pSrcB,
3710         uint32_t blockSize,
3711         float32_t * result);
3712 
3713 
3714   /**
3715    * @brief Dot product of Q7 vectors.
3716    * @param[in]  pSrcA      points to the first input vector
3717    * @param[in]  pSrcB      points to the second input vector
3718    * @param[in]  blockSize  number of samples in each vector
3719    * @param[out] result     output result returned here
3720    */
3721   void arm_dot_prod_q7(
3722   const q7_t * pSrcA,
3723   const q7_t * pSrcB,
3724         uint32_t blockSize,
3725         q31_t * result);
3726 
3727 
3728   /**
3729    * @brief Dot product of Q15 vectors.
3730    * @param[in]  pSrcA      points to the first input vector
3731    * @param[in]  pSrcB      points to the second input vector
3732    * @param[in]  blockSize  number of samples in each vector
3733    * @param[out] result     output result returned here
3734    */
3735   void arm_dot_prod_q15(
3736   const q15_t * pSrcA,
3737   const q15_t * pSrcB,
3738         uint32_t blockSize,
3739         q63_t * result);
3740 
3741 
3742   /**
3743    * @brief Dot product of Q31 vectors.
3744    * @param[in]  pSrcA      points to the first input vector
3745    * @param[in]  pSrcB      points to the second input vector
3746    * @param[in]  blockSize  number of samples in each vector
3747    * @param[out] result     output result returned here
3748    */
3749   void arm_dot_prod_q31(
3750   const q31_t * pSrcA,
3751   const q31_t * pSrcB,
3752         uint32_t blockSize,
3753         q63_t * result);
3754 
3755 
3756   /**
3757    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
3758    * @param[in]  pSrc       points to the input vector
3759    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
3760    * @param[out] pDst       points to the output vector
3761    * @param[in]  blockSize  number of samples in the vector
3762    */
3763   void arm_shift_q7(
3764   const q7_t * pSrc,
3765         int8_t shiftBits,
3766         q7_t * pDst,
3767         uint32_t blockSize);
3768 
3769 
3770   /**
3771    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
3772    * @param[in]  pSrc       points to the input vector
3773    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
3774    * @param[out] pDst       points to the output vector
3775    * @param[in]  blockSize  number of samples in the vector
3776    */
3777   void arm_shift_q15(
3778   const q15_t * pSrc,
3779         int8_t shiftBits,
3780         q15_t * pDst,
3781         uint32_t blockSize);
3782 
3783 
3784   /**
3785    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
3786    * @param[in]  pSrc       points to the input vector
3787    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
3788    * @param[out] pDst       points to the output vector
3789    * @param[in]  blockSize  number of samples in the vector
3790    */
3791   void arm_shift_q31(
3792   const q31_t * pSrc,
3793         int8_t shiftBits,
3794         q31_t * pDst,
3795         uint32_t blockSize);
3796 
3797 
3798   /**
3799    * @brief  Adds a constant offset to a floating-point vector.
3800    * @param[in]  pSrc       points to the input vector
3801    * @param[in]  offset     is the offset to be added
3802    * @param[out] pDst       points to the output vector
3803    * @param[in]  blockSize  number of samples in the vector
3804    */
3805   void arm_offset_f32(
3806   const float32_t * pSrc,
3807         float32_t offset,
3808         float32_t * pDst,
3809         uint32_t blockSize);
3810 
3811 
3812   /**
3813    * @brief  Adds a constant offset to a Q7 vector.
3814    * @param[in]  pSrc       points to the input vector
3815    * @param[in]  offset     is the offset to be added
3816    * @param[out] pDst       points to the output vector
3817    * @param[in]  blockSize  number of samples in the vector
3818    */
3819   void arm_offset_q7(
3820   const q7_t * pSrc,
3821         q7_t offset,
3822         q7_t * pDst,
3823         uint32_t blockSize);
3824 
3825 
3826   /**
3827    * @brief  Adds a constant offset to a Q15 vector.
3828    * @param[in]  pSrc       points to the input vector
3829    * @param[in]  offset     is the offset to be added
3830    * @param[out] pDst       points to the output vector
3831    * @param[in]  blockSize  number of samples in the vector
3832    */
3833   void arm_offset_q15(
3834   const q15_t * pSrc,
3835         q15_t offset,
3836         q15_t * pDst,
3837         uint32_t blockSize);
3838 
3839 
3840   /**
3841    * @brief  Adds a constant offset to a Q31 vector.
3842    * @param[in]  pSrc       points to the input vector
3843    * @param[in]  offset     is the offset to be added
3844    * @param[out] pDst       points to the output vector
3845    * @param[in]  blockSize  number of samples in the vector
3846    */
3847   void arm_offset_q31(
3848   const q31_t * pSrc,
3849         q31_t offset,
3850         q31_t * pDst,
3851         uint32_t blockSize);
3852 
3853 
3854   /**
3855    * @brief  Negates the elements of a floating-point vector.
3856    * @param[in]  pSrc       points to the input vector
3857    * @param[out] pDst       points to the output vector
3858    * @param[in]  blockSize  number of samples in the vector
3859    */
3860   void arm_negate_f32(
3861   const float32_t * pSrc,
3862         float32_t * pDst,
3863         uint32_t blockSize);
3864 
3865 
3866   /**
3867    * @brief  Negates the elements of a Q7 vector.
3868    * @param[in]  pSrc       points to the input vector
3869    * @param[out] pDst       points to the output vector
3870    * @param[in]  blockSize  number of samples in the vector
3871    */
3872   void arm_negate_q7(
3873   const q7_t * pSrc,
3874         q7_t * pDst,
3875         uint32_t blockSize);
3876 
3877 
3878   /**
3879    * @brief  Negates the elements of a Q15 vector.
3880    * @param[in]  pSrc       points to the input vector
3881    * @param[out] pDst       points to the output vector
3882    * @param[in]  blockSize  number of samples in the vector
3883    */
3884   void arm_negate_q15(
3885   const q15_t * pSrc,
3886         q15_t * pDst,
3887         uint32_t blockSize);
3888 
3889 
3890   /**
3891    * @brief  Negates the elements of a Q31 vector.
3892    * @param[in]  pSrc       points to the input vector
3893    * @param[out] pDst       points to the output vector
3894    * @param[in]  blockSize  number of samples in the vector
3895    */
3896   void arm_negate_q31(
3897   const q31_t * pSrc,
3898         q31_t * pDst,
3899         uint32_t blockSize);
3900 
3901 
3902   /**
3903    * @brief  Copies the elements of a floating-point vector.
3904    * @param[in]  pSrc       input pointer
3905    * @param[out] pDst       output pointer
3906    * @param[in]  blockSize  number of samples to process
3907    */
3908   void arm_copy_f32(
3909   const float32_t * pSrc,
3910         float32_t * pDst,
3911         uint32_t blockSize);
3912 
3913 
3914   /**
3915    * @brief  Copies the elements of a Q7 vector.
3916    * @param[in]  pSrc       input pointer
3917    * @param[out] pDst       output pointer
3918    * @param[in]  blockSize  number of samples to process
3919    */
3920   void arm_copy_q7(
3921   const q7_t * pSrc,
3922         q7_t * pDst,
3923         uint32_t blockSize);
3924 
3925 
3926   /**
3927    * @brief  Copies the elements of a Q15 vector.
3928    * @param[in]  pSrc       input pointer
3929    * @param[out] pDst       output pointer
3930    * @param[in]  blockSize  number of samples to process
3931    */
3932   void arm_copy_q15(
3933   const q15_t * pSrc,
3934         q15_t * pDst,
3935         uint32_t blockSize);
3936 
3937 
3938   /**
3939    * @brief  Copies the elements of a Q31 vector.
3940    * @param[in]  pSrc       input pointer
3941    * @param[out] pDst       output pointer
3942    * @param[in]  blockSize  number of samples to process
3943    */
3944   void arm_copy_q31(
3945   const q31_t * pSrc,
3946         q31_t * pDst,
3947         uint32_t blockSize);
3948 
3949 
3950   /**
3951    * @brief  Fills a constant value into a floating-point vector.
3952    * @param[in]  value      input value to be filled
3953    * @param[out] pDst       output pointer
3954    * @param[in]  blockSize  number of samples to process
3955    */
3956   void arm_fill_f32(
3957         float32_t value,
3958         float32_t * pDst,
3959         uint32_t blockSize);
3960 
3961 
3962   /**
3963    * @brief  Fills a constant value into a Q7 vector.
3964    * @param[in]  value      input value to be filled
3965    * @param[out] pDst       output pointer
3966    * @param[in]  blockSize  number of samples to process
3967    */
3968   void arm_fill_q7(
3969         q7_t value,
3970         q7_t * pDst,
3971         uint32_t blockSize);
3972 
3973 
3974   /**
3975    * @brief  Fills a constant value into a Q15 vector.
3976    * @param[in]  value      input value to be filled
3977    * @param[out] pDst       output pointer
3978    * @param[in]  blockSize  number of samples to process
3979    */
3980   void arm_fill_q15(
3981         q15_t value,
3982         q15_t * pDst,
3983         uint32_t blockSize);
3984 
3985 
3986   /**
3987    * @brief  Fills a constant value into a Q31 vector.
3988    * @param[in]  value      input value to be filled
3989    * @param[out] pDst       output pointer
3990    * @param[in]  blockSize  number of samples to process
3991    */
3992   void arm_fill_q31(
3993         q31_t value,
3994         q31_t * pDst,
3995         uint32_t blockSize);
3996 
3997 
3998 /**
3999  * @brief Convolution of floating-point sequences.
4000  * @param[in]  pSrcA    points to the first input sequence.
4001  * @param[in]  srcALen  length of the first input sequence.
4002  * @param[in]  pSrcB    points to the second input sequence.
4003  * @param[in]  srcBLen  length of the second input sequence.
4004  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
4005  */
4006   void arm_conv_f32(
4007   const float32_t * pSrcA,
4008         uint32_t srcALen,
4009   const float32_t * pSrcB,
4010         uint32_t srcBLen,
4011         float32_t * pDst);
4012 
4013 
4014   /**
4015    * @brief Convolution of Q15 sequences.
4016    * @param[in]  pSrcA      points to the first input sequence.
4017    * @param[in]  srcALen    length of the first input sequence.
4018    * @param[in]  pSrcB      points to the second input sequence.
4019    * @param[in]  srcBLen    length of the second input sequence.
4020    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
4021    * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4022    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
4023    */
4024   void arm_conv_opt_q15(
4025   const q15_t * pSrcA,
4026         uint32_t srcALen,
4027   const q15_t * pSrcB,
4028         uint32_t srcBLen,
4029         q15_t * pDst,
4030         q15_t * pScratch1,
4031         q15_t * pScratch2);
4032 
4033 
4034 /**
4035  * @brief Convolution of Q15 sequences.
4036  * @param[in]  pSrcA    points to the first input sequence.
4037  * @param[in]  srcALen  length of the first input sequence.
4038  * @param[in]  pSrcB    points to the second input sequence.
4039  * @param[in]  srcBLen  length of the second input sequence.
4040  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
4041  */
4042   void arm_conv_q15(
4043   const q15_t * pSrcA,
4044         uint32_t srcALen,
4045   const q15_t * pSrcB,
4046         uint32_t srcBLen,
4047         q15_t * pDst);
4048 
4049 
4050   /**
4051    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
4052    * @param[in]  pSrcA    points to the first input sequence.
4053    * @param[in]  srcALen  length of the first input sequence.
4054    * @param[in]  pSrcB    points to the second input sequence.
4055    * @param[in]  srcBLen  length of the second input sequence.
4056    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
4057    */
4058   void arm_conv_fast_q15(
4059   const q15_t * pSrcA,
4060         uint32_t srcALen,
4061   const q15_t * pSrcB,
4062         uint32_t srcBLen,
4063         q15_t * pDst);
4064 
4065 
4066   /**
4067    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
4068    * @param[in]  pSrcA      points to the first input sequence.
4069    * @param[in]  srcALen    length of the first input sequence.
4070    * @param[in]  pSrcB      points to the second input sequence.
4071    * @param[in]  srcBLen    length of the second input sequence.
4072    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
4073    * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4074    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
4075    */
4076   void arm_conv_fast_opt_q15(
4077   const q15_t * pSrcA,
4078         uint32_t srcALen,
4079   const q15_t * pSrcB,
4080         uint32_t srcBLen,
4081         q15_t * pDst,
4082         q15_t * pScratch1,
4083         q15_t * pScratch2);
4084 
4085 
4086   /**
4087    * @brief Convolution of Q31 sequences.
4088    * @param[in]  pSrcA    points to the first input sequence.
4089    * @param[in]  srcALen  length of the first input sequence.
4090    * @param[in]  pSrcB    points to the second input sequence.
4091    * @param[in]  srcBLen  length of the second input sequence.
4092    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
4093    */
4094   void arm_conv_q31(
4095   const q31_t * pSrcA,
4096         uint32_t srcALen,
4097   const q31_t * pSrcB,
4098         uint32_t srcBLen,
4099         q31_t * pDst);
4100 
4101 
4102   /**
4103    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4104    * @param[in]  pSrcA    points to the first input sequence.
4105    * @param[in]  srcALen  length of the first input sequence.
4106    * @param[in]  pSrcB    points to the second input sequence.
4107    * @param[in]  srcBLen  length of the second input sequence.
4108    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
4109    */
4110   void arm_conv_fast_q31(
4111   const q31_t * pSrcA,
4112         uint32_t srcALen,
4113   const q31_t * pSrcB,
4114         uint32_t srcBLen,
4115         q31_t * pDst);
4116 
4117 
4118     /**
4119    * @brief Convolution of Q7 sequences.
4120    * @param[in]  pSrcA      points to the first input sequence.
4121    * @param[in]  srcALen    length of the first input sequence.
4122    * @param[in]  pSrcB      points to the second input sequence.
4123    * @param[in]  srcBLen    length of the second input sequence.
4124    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
4125    * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4126    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
4127    */
4128   void arm_conv_opt_q7(
4129   const q7_t * pSrcA,
4130         uint32_t srcALen,
4131   const q7_t * pSrcB,
4132         uint32_t srcBLen,
4133         q7_t * pDst,
4134         q15_t * pScratch1,
4135         q15_t * pScratch2);
4136 
4137 
4138   /**
4139    * @brief Convolution of Q7 sequences.
4140    * @param[in]  pSrcA    points to the first input sequence.
4141    * @param[in]  srcALen  length of the first input sequence.
4142    * @param[in]  pSrcB    points to the second input sequence.
4143    * @param[in]  srcBLen  length of the second input sequence.
4144    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
4145    */
4146   void arm_conv_q7(
4147   const q7_t * pSrcA,
4148         uint32_t srcALen,
4149   const q7_t * pSrcB,
4150         uint32_t srcBLen,
4151         q7_t * pDst);
4152 
4153 
4154   /**
4155    * @brief Partial convolution of floating-point sequences.
4156    * @param[in]  pSrcA       points to the first input sequence.
4157    * @param[in]  srcALen     length of the first input sequence.
4158    * @param[in]  pSrcB       points to the second input sequence.
4159    * @param[in]  srcBLen     length of the second input sequence.
4160    * @param[out] pDst        points to the block of output data
4161    * @param[in]  firstIndex  is the first output sample to start with.
4162    * @param[in]  numPoints   is the number of output points to be computed.
4163    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4164    */
4165   arm_status arm_conv_partial_f32(
4166   const float32_t * pSrcA,
4167         uint32_t srcALen,
4168   const float32_t * pSrcB,
4169         uint32_t srcBLen,
4170         float32_t * pDst,
4171         uint32_t firstIndex,
4172         uint32_t numPoints);
4173 
4174 
4175   /**
4176    * @brief Partial convolution of Q15 sequences.
4177    * @param[in]  pSrcA       points to the first input sequence.
4178    * @param[in]  srcALen     length of the first input sequence.
4179    * @param[in]  pSrcB       points to the second input sequence.
4180    * @param[in]  srcBLen     length of the second input sequence.
4181    * @param[out] pDst        points to the block of output data
4182    * @param[in]  firstIndex  is the first output sample to start with.
4183    * @param[in]  numPoints   is the number of output points to be computed.
4184    * @param[in]  pScratch1   points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4185    * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
4186    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4187    */
4188   arm_status arm_conv_partial_opt_q15(
4189   const q15_t * pSrcA,
4190         uint32_t srcALen,
4191   const q15_t * pSrcB,
4192         uint32_t srcBLen,
4193         q15_t * pDst,
4194         uint32_t firstIndex,
4195         uint32_t numPoints,
4196         q15_t * pScratch1,
4197         q15_t * pScratch2);
4198 
4199 
4200   /**
4201    * @brief Partial convolution of Q15 sequences.
4202    * @param[in]  pSrcA       points to the first input sequence.
4203    * @param[in]  srcALen     length of the first input sequence.
4204    * @param[in]  pSrcB       points to the second input sequence.
4205    * @param[in]  srcBLen     length of the second input sequence.
4206    * @param[out] pDst        points to the block of output data
4207    * @param[in]  firstIndex  is the first output sample to start with.
4208    * @param[in]  numPoints   is the number of output points to be computed.
4209    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4210    */
4211   arm_status arm_conv_partial_q15(
4212   const q15_t * pSrcA,
4213         uint32_t srcALen,
4214   const q15_t * pSrcB,
4215         uint32_t srcBLen,
4216         q15_t * pDst,
4217         uint32_t firstIndex,
4218         uint32_t numPoints);
4219 
4220 
4221   /**
4222    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
4223    * @param[in]  pSrcA       points to the first input sequence.
4224    * @param[in]  srcALen     length of the first input sequence.
4225    * @param[in]  pSrcB       points to the second input sequence.
4226    * @param[in]  srcBLen     length of the second input sequence.
4227    * @param[out] pDst        points to the block of output data
4228    * @param[in]  firstIndex  is the first output sample to start with.
4229    * @param[in]  numPoints   is the number of output points to be computed.
4230    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4231    */
4232   arm_status arm_conv_partial_fast_q15(
4233   const q15_t * pSrcA,
4234         uint32_t srcALen,
4235   const q15_t * pSrcB,
4236         uint32_t srcBLen,
4237         q15_t * pDst,
4238         uint32_t firstIndex,
4239         uint32_t numPoints);
4240 
4241 
4242   /**
4243    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
4244    * @param[in]  pSrcA       points to the first input sequence.
4245    * @param[in]  srcALen     length of the first input sequence.
4246    * @param[in]  pSrcB       points to the second input sequence.
4247    * @param[in]  srcBLen     length of the second input sequence.
4248    * @param[out] pDst        points to the block of output data
4249    * @param[in]  firstIndex  is the first output sample to start with.
4250    * @param[in]  numPoints   is the number of output points to be computed.
4251    * @param[in]  pScratch1   points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4252    * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
4253    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4254    */
4255   arm_status arm_conv_partial_fast_opt_q15(
4256   const q15_t * pSrcA,
4257         uint32_t srcALen,
4258   const q15_t * pSrcB,
4259         uint32_t srcBLen,
4260         q15_t * pDst,
4261         uint32_t firstIndex,
4262         uint32_t numPoints,
4263         q15_t * pScratch1,
4264         q15_t * pScratch2);
4265 
4266 
4267   /**
4268    * @brief Partial convolution of Q31 sequences.
4269    * @param[in]  pSrcA       points to the first input sequence.
4270    * @param[in]  srcALen     length of the first input sequence.
4271    * @param[in]  pSrcB       points to the second input sequence.
4272    * @param[in]  srcBLen     length of the second input sequence.
4273    * @param[out] pDst        points to the block of output data
4274    * @param[in]  firstIndex  is the first output sample to start with.
4275    * @param[in]  numPoints   is the number of output points to be computed.
4276    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4277    */
4278   arm_status arm_conv_partial_q31(
4279   const q31_t * pSrcA,
4280         uint32_t srcALen,
4281   const q31_t * pSrcB,
4282         uint32_t srcBLen,
4283         q31_t * pDst,
4284         uint32_t firstIndex,
4285         uint32_t numPoints);
4286 
4287 
4288   /**
4289    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4290    * @param[in]  pSrcA       points to the first input sequence.
4291    * @param[in]  srcALen     length of the first input sequence.
4292    * @param[in]  pSrcB       points to the second input sequence.
4293    * @param[in]  srcBLen     length of the second input sequence.
4294    * @param[out] pDst        points to the block of output data
4295    * @param[in]  firstIndex  is the first output sample to start with.
4296    * @param[in]  numPoints   is the number of output points to be computed.
4297    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4298    */
4299   arm_status arm_conv_partial_fast_q31(
4300   const q31_t * pSrcA,
4301         uint32_t srcALen,
4302   const q31_t * pSrcB,
4303         uint32_t srcBLen,
4304         q31_t * pDst,
4305         uint32_t firstIndex,
4306         uint32_t numPoints);
4307 
4308 
4309   /**
4310    * @brief Partial convolution of Q7 sequences
4311    * @param[in]  pSrcA       points to the first input sequence.
4312    * @param[in]  srcALen     length of the first input sequence.
4313    * @param[in]  pSrcB       points to the second input sequence.
4314    * @param[in]  srcBLen     length of the second input sequence.
4315    * @param[out] pDst        points to the block of output data
4316    * @param[in]  firstIndex  is the first output sample to start with.
4317    * @param[in]  numPoints   is the number of output points to be computed.
4318    * @param[in]  pScratch1   points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4319    * @param[in]  pScratch2   points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
4320    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4321    */
4322   arm_status arm_conv_partial_opt_q7(
4323   const q7_t * pSrcA,
4324         uint32_t srcALen,
4325   const q7_t * pSrcB,
4326         uint32_t srcBLen,
4327         q7_t * pDst,
4328         uint32_t firstIndex,
4329         uint32_t numPoints,
4330         q15_t * pScratch1,
4331         q15_t * pScratch2);
4332 
4333 
4334 /**
4335    * @brief Partial convolution of Q7 sequences.
4336    * @param[in]  pSrcA       points to the first input sequence.
4337    * @param[in]  srcALen     length of the first input sequence.
4338    * @param[in]  pSrcB       points to the second input sequence.
4339    * @param[in]  srcBLen     length of the second input sequence.
4340    * @param[out] pDst        points to the block of output data
4341    * @param[in]  firstIndex  is the first output sample to start with.
4342    * @param[in]  numPoints   is the number of output points to be computed.
4343    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
4344    */
4345   arm_status arm_conv_partial_q7(
4346   const q7_t * pSrcA,
4347         uint32_t srcALen,
4348   const q7_t * pSrcB,
4349         uint32_t srcBLen,
4350         q7_t * pDst,
4351         uint32_t firstIndex,
4352         uint32_t numPoints);
4353 
4354 
4355   /**
4356    * @brief Instance structure for the Q15 FIR decimator.
4357    */
4358   typedef struct
4359   {
4360           uint8_t M;                  /**< decimation factor. */
4361           uint16_t numTaps;           /**< number of coefficients in the filter. */
4362     const q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
4363           q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4364   } arm_fir_decimate_instance_q15;
4365 
4366   /**
4367    * @brief Instance structure for the Q31 FIR decimator.
4368    */
4369   typedef struct
4370   {
4371           uint8_t M;                  /**< decimation factor. */
4372           uint16_t numTaps;           /**< number of coefficients in the filter. */
4373     const q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
4374           q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4375   } arm_fir_decimate_instance_q31;
4376 
4377 /**
4378   @brief Instance structure for floating-point FIR decimator.
4379  */
4380 typedef struct
4381   {
4382           uint8_t M;                  /**< decimation factor. */
4383           uint16_t numTaps;           /**< number of coefficients in the filter. */
4384     const float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
4385           float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4386   } arm_fir_decimate_instance_f32;
4387 
4388 
4389 /**
4390   @brief         Processing function for floating-point FIR decimator.
4391   @param[in]     S         points to an instance of the floating-point FIR decimator structure
4392   @param[in]     pSrc      points to the block of input data
4393   @param[out]    pDst      points to the block of output data
4394   @param[in]     blockSize number of samples to process
4395  */
4396 void arm_fir_decimate_f32(
4397   const arm_fir_decimate_instance_f32 * S,
4398   const float32_t * pSrc,
4399         float32_t * pDst,
4400         uint32_t blockSize);
4401 
4402 
4403 /**
4404   @brief         Initialization function for the floating-point FIR decimator.
4405   @param[in,out] S          points to an instance of the floating-point FIR decimator structure
4406   @param[in]     numTaps    number of coefficients in the filter
4407   @param[in]     M          decimation factor
4408   @param[in]     pCoeffs    points to the filter coefficients
4409   @param[in]     pState     points to the state buffer
4410   @param[in]     blockSize  number of input samples to process per call
4411   @return        execution status
4412                    - \ref ARM_MATH_SUCCESS      : Operation successful
4413                    - \ref ARM_MATH_LENGTH_ERROR : <code>blockSize</code> is not a multiple of <code>M</code>
4414  */
4415 arm_status arm_fir_decimate_init_f32(
4416         arm_fir_decimate_instance_f32 * S,
4417         uint16_t numTaps,
4418         uint8_t M,
4419   const float32_t * pCoeffs,
4420         float32_t * pState,
4421         uint32_t blockSize);
4422 
4423 
4424   /**
4425    * @brief Processing function for the Q15 FIR decimator.
4426    * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
4427    * @param[in]  pSrc       points to the block of input data.
4428    * @param[out] pDst       points to the block of output data
4429    * @param[in]  blockSize  number of input samples to process per call.
4430    */
4431   void arm_fir_decimate_q15(
4432   const arm_fir_decimate_instance_q15 * S,
4433   const q15_t * pSrc,
4434         q15_t * pDst,
4435         uint32_t blockSize);
4436 
4437 
4438   /**
4439    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
4440    * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
4441    * @param[in]  pSrc       points to the block of input data.
4442    * @param[out] pDst       points to the block of output data
4443    * @param[in]  blockSize  number of input samples to process per call.
4444    */
4445   void arm_fir_decimate_fast_q15(
4446   const arm_fir_decimate_instance_q15 * S,
4447   const q15_t * pSrc,
4448         q15_t * pDst,
4449         uint32_t blockSize);
4450 
4451 
4452   /**
4453    * @brief  Initialization function for the Q15 FIR decimator.
4454    * @param[in,out] S          points to an instance of the Q15 FIR decimator structure.
4455    * @param[in]     numTaps    number of coefficients in the filter.
4456    * @param[in]     M          decimation factor.
4457    * @param[in]     pCoeffs    points to the filter coefficients.
4458    * @param[in]     pState     points to the state buffer.
4459    * @param[in]     blockSize  number of input samples to process per call.
4460    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
4461    * <code>blockSize</code> is not a multiple of <code>M</code>.
4462    */
4463   arm_status arm_fir_decimate_init_q15(
4464         arm_fir_decimate_instance_q15 * S,
4465         uint16_t numTaps,
4466         uint8_t M,
4467   const q15_t * pCoeffs,
4468         q15_t * pState,
4469         uint32_t blockSize);
4470 
4471 
4472   /**
4473    * @brief Processing function for the Q31 FIR decimator.
4474    * @param[in]  S     points to an instance of the Q31 FIR decimator structure.
4475    * @param[in]  pSrc  points to the block of input data.
4476    * @param[out] pDst  points to the block of output data
4477    * @param[in] blockSize number of input samples to process per call.
4478    */
4479   void arm_fir_decimate_q31(
4480   const arm_fir_decimate_instance_q31 * S,
4481   const q31_t * pSrc,
4482         q31_t * pDst,
4483         uint32_t blockSize);
4484 
4485   /**
4486    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
4487    * @param[in]  S          points to an instance of the Q31 FIR decimator structure.
4488    * @param[in]  pSrc       points to the block of input data.
4489    * @param[out] pDst       points to the block of output data
4490    * @param[in]  blockSize  number of input samples to process per call.
4491    */
4492   void arm_fir_decimate_fast_q31(
4493   const arm_fir_decimate_instance_q31 * S,
4494   const q31_t * pSrc,
4495         q31_t * pDst,
4496         uint32_t blockSize);
4497 
4498 
4499   /**
4500    * @brief  Initialization function for the Q31 FIR decimator.
4501    * @param[in,out] S          points to an instance of the Q31 FIR decimator structure.
4502    * @param[in]     numTaps    number of coefficients in the filter.
4503    * @param[in]     M          decimation factor.
4504    * @param[in]     pCoeffs    points to the filter coefficients.
4505    * @param[in]     pState     points to the state buffer.
4506    * @param[in]     blockSize  number of input samples to process per call.
4507    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
4508    * <code>blockSize</code> is not a multiple of <code>M</code>.
4509    */
4510   arm_status arm_fir_decimate_init_q31(
4511         arm_fir_decimate_instance_q31 * S,
4512         uint16_t numTaps,
4513         uint8_t M,
4514   const q31_t * pCoeffs,
4515         q31_t * pState,
4516         uint32_t blockSize);
4517 
4518 
4519   /**
4520    * @brief Instance structure for the Q15 FIR interpolator.
4521    */
4522   typedef struct
4523   {
4524         uint8_t L;                      /**< upsample factor. */
4525         uint16_t phaseLength;           /**< length of each polyphase filter component. */
4526   const q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
4527         q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
4528   } arm_fir_interpolate_instance_q15;
4529 
4530   /**
4531    * @brief Instance structure for the Q31 FIR interpolator.
4532    */
4533   typedef struct
4534   {
4535         uint8_t L;                      /**< upsample factor. */
4536         uint16_t phaseLength;           /**< length of each polyphase filter component. */
4537   const q31_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
4538         q31_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
4539   } arm_fir_interpolate_instance_q31;
4540 
4541   /**
4542    * @brief Instance structure for the floating-point FIR interpolator.
4543    */
4544   typedef struct
4545   {
4546         uint8_t L;                     /**< upsample factor. */
4547         uint16_t phaseLength;          /**< length of each polyphase filter component. */
4548   const float32_t *pCoeffs;            /**< points to the coefficient array. The array is of length L*phaseLength. */
4549         float32_t *pState;             /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
4550   } arm_fir_interpolate_instance_f32;
4551 
4552 
4553   /**
4554    * @brief Processing function for the Q15 FIR interpolator.
4555    * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
4556    * @param[in]  pSrc       points to the block of input data.
4557    * @param[out] pDst       points to the block of output data.
4558    * @param[in]  blockSize  number of input samples to process per call.
4559    */
4560   void arm_fir_interpolate_q15(
4561   const arm_fir_interpolate_instance_q15 * S,
4562   const q15_t * pSrc,
4563         q15_t * pDst,
4564         uint32_t blockSize);
4565 
4566 
4567   /**
4568    * @brief  Initialization function for the Q15 FIR interpolator.
4569    * @param[in,out] S          points to an instance of the Q15 FIR interpolator structure.
4570    * @param[in]     L          upsample factor.
4571    * @param[in]     numTaps    number of filter coefficients in the filter.
4572    * @param[in]     pCoeffs    points to the filter coefficient buffer.
4573    * @param[in]     pState     points to the state buffer.
4574    * @param[in]     blockSize  number of input samples to process per call.
4575    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
4576    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
4577    */
4578   arm_status arm_fir_interpolate_init_q15(
4579         arm_fir_interpolate_instance_q15 * S,
4580         uint8_t L,
4581         uint16_t numTaps,
4582   const q15_t * pCoeffs,
4583         q15_t * pState,
4584         uint32_t blockSize);
4585 
4586 
4587   /**
4588    * @brief Processing function for the Q31 FIR interpolator.
4589    * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
4590    * @param[in]  pSrc       points to the block of input data.
4591    * @param[out] pDst       points to the block of output data.
4592    * @param[in]  blockSize  number of input samples to process per call.
4593    */
4594   void arm_fir_interpolate_q31(
4595   const arm_fir_interpolate_instance_q31 * S,
4596   const q31_t * pSrc,
4597         q31_t * pDst,
4598         uint32_t blockSize);
4599 
4600 
4601   /**
4602    * @brief  Initialization function for the Q31 FIR interpolator.
4603    * @param[in,out] S          points to an instance of the Q31 FIR interpolator structure.
4604    * @param[in]     L          upsample factor.
4605    * @param[in]     numTaps    number of filter coefficients in the filter.
4606    * @param[in]     pCoeffs    points to the filter coefficient buffer.
4607    * @param[in]     pState     points to the state buffer.
4608    * @param[in]     blockSize  number of input samples to process per call.
4609    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
4610    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
4611    */
4612   arm_status arm_fir_interpolate_init_q31(
4613         arm_fir_interpolate_instance_q31 * S,
4614         uint8_t L,
4615         uint16_t numTaps,
4616   const q31_t * pCoeffs,
4617         q31_t * pState,
4618         uint32_t blockSize);
4619 
4620 
4621   /**
4622    * @brief Processing function for the floating-point FIR interpolator.
4623    * @param[in]  S          points to an instance of the floating-point FIR interpolator structure.
4624    * @param[in]  pSrc       points to the block of input data.
4625    * @param[out] pDst       points to the block of output data.
4626    * @param[in]  blockSize  number of input samples to process per call.
4627    */
4628   void arm_fir_interpolate_f32(
4629   const arm_fir_interpolate_instance_f32 * S,
4630   const float32_t * pSrc,
4631         float32_t * pDst,
4632         uint32_t blockSize);
4633 
4634 
4635   /**
4636    * @brief  Initialization function for the floating-point FIR interpolator.
4637    * @param[in,out] S          points to an instance of the floating-point FIR interpolator structure.
4638    * @param[in]     L          upsample factor.
4639    * @param[in]     numTaps    number of filter coefficients in the filter.
4640    * @param[in]     pCoeffs    points to the filter coefficient buffer.
4641    * @param[in]     pState     points to the state buffer.
4642    * @param[in]     blockSize  number of input samples to process per call.
4643    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
4644    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
4645    */
4646   arm_status arm_fir_interpolate_init_f32(
4647         arm_fir_interpolate_instance_f32 * S,
4648         uint8_t L,
4649         uint16_t numTaps,
4650   const float32_t * pCoeffs,
4651         float32_t * pState,
4652         uint32_t blockSize);
4653 
4654 
4655   /**
4656    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
4657    */
4658   typedef struct
4659   {
4660           uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
4661           q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
4662     const q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
4663           uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
4664   } arm_biquad_cas_df1_32x64_ins_q31;
4665 
4666 
4667   /**
4668    * @param[in]  S          points to an instance of the high precision Q31 Biquad cascade filter structure.
4669    * @param[in]  pSrc       points to the block of input data.
4670    * @param[out] pDst       points to the block of output data
4671    * @param[in]  blockSize  number of samples to process.
4672    */
4673   void arm_biquad_cas_df1_32x64_q31(
4674   const arm_biquad_cas_df1_32x64_ins_q31 * S,
4675   const q31_t * pSrc,
4676         q31_t * pDst,
4677         uint32_t blockSize);
4678 
4679 
4680   /**
4681    * @param[in,out] S          points to an instance of the high precision Q31 Biquad cascade filter structure.
4682    * @param[in]     numStages  number of 2nd order stages in the filter.
4683    * @param[in]     pCoeffs    points to the filter coefficients.
4684    * @param[in]     pState     points to the state buffer.
4685    * @param[in]     postShift  shift to be applied to the output. Varies according to the coefficients format
4686    */
4687   void arm_biquad_cas_df1_32x64_init_q31(
4688         arm_biquad_cas_df1_32x64_ins_q31 * S,
4689         uint8_t numStages,
4690   const q31_t * pCoeffs,
4691         q63_t * pState,
4692         uint8_t postShift);
4693 
4694 
4695   /**
4696    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
4697    */
4698   typedef struct
4699   {
4700           uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
4701           float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
4702     const float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
4703   } arm_biquad_cascade_df2T_instance_f32;
4704 
4705   /**
4706    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
4707    */
4708   typedef struct
4709   {
4710           uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
4711           float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
4712     const float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
4713   } arm_biquad_cascade_stereo_df2T_instance_f32;
4714 
4715   /**
4716    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
4717    */
4718   typedef struct
4719   {
4720           uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
4721           float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
4722     const float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
4723   } arm_biquad_cascade_df2T_instance_f64;
4724 
4725 
4726   /**
4727    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
4728    * @param[in]  S          points to an instance of the filter data structure.
4729    * @param[in]  pSrc       points to the block of input data.
4730    * @param[out] pDst       points to the block of output data
4731    * @param[in]  blockSize  number of samples to process.
4732    */
4733   void arm_biquad_cascade_df2T_f32(
4734   const arm_biquad_cascade_df2T_instance_f32 * S,
4735   const float32_t * pSrc,
4736         float32_t * pDst,
4737         uint32_t blockSize);
4738 
4739 
4740   /**
4741    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
4742    * @param[in]  S          points to an instance of the filter data structure.
4743    * @param[in]  pSrc       points to the block of input data.
4744    * @param[out] pDst       points to the block of output data
4745    * @param[in]  blockSize  number of samples to process.
4746    */
4747   void arm_biquad_cascade_stereo_df2T_f32(
4748   const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
4749   const float32_t * pSrc,
4750         float32_t * pDst,
4751         uint32_t blockSize);
4752 
4753 
4754   /**
4755    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
4756    * @param[in]  S          points to an instance of the filter data structure.
4757    * @param[in]  pSrc       points to the block of input data.
4758    * @param[out] pDst       points to the block of output data
4759    * @param[in]  blockSize  number of samples to process.
4760    */
4761   void arm_biquad_cascade_df2T_f64(
4762   const arm_biquad_cascade_df2T_instance_f64 * S,
4763   const float64_t * pSrc,
4764         float64_t * pDst,
4765         uint32_t blockSize);
4766 
4767 
4768 #if defined(ARM_MATH_NEON)
4769 void arm_biquad_cascade_df2T_compute_coefs_f32(
4770   arm_biquad_cascade_df2T_instance_f32 * S,
4771   uint8_t numStages,
4772   float32_t * pCoeffs);
4773 #endif
4774   /**
4775    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
4776    * @param[in,out] S          points to an instance of the filter data structure.
4777    * @param[in]     numStages  number of 2nd order stages in the filter.
4778    * @param[in]     pCoeffs    points to the filter coefficients.
4779    * @param[in]     pState     points to the state buffer.
4780    */
4781   void arm_biquad_cascade_df2T_init_f32(
4782         arm_biquad_cascade_df2T_instance_f32 * S,
4783         uint8_t numStages,
4784   const float32_t * pCoeffs,
4785         float32_t * pState);
4786 
4787 
4788   /**
4789    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
4790    * @param[in,out] S          points to an instance of the filter data structure.
4791    * @param[in]     numStages  number of 2nd order stages in the filter.
4792    * @param[in]     pCoeffs    points to the filter coefficients.
4793    * @param[in]     pState     points to the state buffer.
4794    */
4795   void arm_biquad_cascade_stereo_df2T_init_f32(
4796         arm_biquad_cascade_stereo_df2T_instance_f32 * S,
4797         uint8_t numStages,
4798   const float32_t * pCoeffs,
4799         float32_t * pState);
4800 
4801 
4802   /**
4803    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
4804    * @param[in,out] S          points to an instance of the filter data structure.
4805    * @param[in]     numStages  number of 2nd order stages in the filter.
4806    * @param[in]     pCoeffs    points to the filter coefficients.
4807    * @param[in]     pState     points to the state buffer.
4808    */
4809   void arm_biquad_cascade_df2T_init_f64(
4810         arm_biquad_cascade_df2T_instance_f64 * S,
4811         uint8_t numStages,
4812         const float64_t * pCoeffs,
4813         float64_t * pState);
4814 
4815 
4816   /**
4817    * @brief Instance structure for the Q15 FIR lattice filter.
4818    */
4819   typedef struct
4820   {
4821           uint16_t numStages;                  /**< number of filter stages. */
4822           q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
4823     const q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
4824   } arm_fir_lattice_instance_q15;
4825 
4826   /**
4827    * @brief Instance structure for the Q31 FIR lattice filter.
4828    */
4829   typedef struct
4830   {
4831           uint16_t numStages;                  /**< number of filter stages. */
4832           q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
4833     const q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
4834   } arm_fir_lattice_instance_q31;
4835 
4836   /**
4837    * @brief Instance structure for the floating-point FIR lattice filter.
4838    */
4839   typedef struct
4840   {
4841           uint16_t numStages;                  /**< number of filter stages. */
4842           float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
4843     const float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
4844   } arm_fir_lattice_instance_f32;
4845 
4846 
4847   /**
4848    * @brief Initialization function for the Q15 FIR lattice filter.
4849    * @param[in] S          points to an instance of the Q15 FIR lattice structure.
4850    * @param[in] numStages  number of filter stages.
4851    * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
4852    * @param[in] pState     points to the state buffer.  The array is of length numStages.
4853    */
4854   void arm_fir_lattice_init_q15(
4855         arm_fir_lattice_instance_q15 * S,
4856         uint16_t numStages,
4857   const q15_t * pCoeffs,
4858         q15_t * pState);
4859 
4860 
4861   /**
4862    * @brief Processing function for the Q15 FIR lattice filter.
4863    * @param[in]  S          points to an instance of the Q15 FIR lattice structure.
4864    * @param[in]  pSrc       points to the block of input data.
4865    * @param[out] pDst       points to the block of output data.
4866    * @param[in]  blockSize  number of samples to process.
4867    */
4868   void arm_fir_lattice_q15(
4869   const arm_fir_lattice_instance_q15 * S,
4870   const q15_t * pSrc,
4871         q15_t * pDst,
4872         uint32_t blockSize);
4873 
4874 
4875   /**
4876    * @brief Initialization function for the Q31 FIR lattice filter.
4877    * @param[in] S          points to an instance of the Q31 FIR lattice structure.
4878    * @param[in] numStages  number of filter stages.
4879    * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
4880    * @param[in] pState     points to the state buffer.   The array is of length numStages.
4881    */
4882   void arm_fir_lattice_init_q31(
4883         arm_fir_lattice_instance_q31 * S,
4884         uint16_t numStages,
4885   const q31_t * pCoeffs,
4886         q31_t * pState);
4887 
4888 
4889   /**
4890    * @brief Processing function for the Q31 FIR lattice filter.
4891    * @param[in]  S          points to an instance of the Q31 FIR lattice structure.
4892    * @param[in]  pSrc       points to the block of input data.
4893    * @param[out] pDst       points to the block of output data
4894    * @param[in]  blockSize  number of samples to process.
4895    */
4896   void arm_fir_lattice_q31(
4897   const arm_fir_lattice_instance_q31 * S,
4898   const q31_t * pSrc,
4899         q31_t * pDst,
4900         uint32_t blockSize);
4901 
4902 
4903 /**
4904  * @brief Initialization function for the floating-point FIR lattice filter.
4905  * @param[in] S          points to an instance of the floating-point FIR lattice structure.
4906  * @param[in] numStages  number of filter stages.
4907  * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
4908  * @param[in] pState     points to the state buffer.  The array is of length numStages.
4909  */
4910   void arm_fir_lattice_init_f32(
4911         arm_fir_lattice_instance_f32 * S,
4912         uint16_t numStages,
4913   const float32_t * pCoeffs,
4914         float32_t * pState);
4915 
4916 
4917   /**
4918    * @brief Processing function for the floating-point FIR lattice filter.
4919    * @param[in]  S          points to an instance of the floating-point FIR lattice structure.
4920    * @param[in]  pSrc       points to the block of input data.
4921    * @param[out] pDst       points to the block of output data
4922    * @param[in]  blockSize  number of samples to process.
4923    */
4924   void arm_fir_lattice_f32(
4925   const arm_fir_lattice_instance_f32 * S,
4926   const float32_t * pSrc,
4927         float32_t * pDst,
4928         uint32_t blockSize);
4929 
4930 
4931   /**
4932    * @brief Instance structure for the Q15 IIR lattice filter.
4933    */
4934   typedef struct
4935   {
4936           uint16_t numStages;                  /**< number of stages in the filter. */
4937           q15_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
4938           q15_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
4939           q15_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
4940   } arm_iir_lattice_instance_q15;
4941 
4942   /**
4943    * @brief Instance structure for the Q31 IIR lattice filter.
4944    */
4945   typedef struct
4946   {
4947           uint16_t numStages;                  /**< number of stages in the filter. */
4948           q31_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
4949           q31_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
4950           q31_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
4951   } arm_iir_lattice_instance_q31;
4952 
4953   /**
4954    * @brief Instance structure for the floating-point IIR lattice filter.
4955    */
4956   typedef struct
4957   {
4958           uint16_t numStages;                  /**< number of stages in the filter. */
4959           float32_t *pState;                   /**< points to the state variable array. The array is of length numStages+blockSize. */
4960           float32_t *pkCoeffs;                 /**< points to the reflection coefficient array. The array is of length numStages. */
4961           float32_t *pvCoeffs;                 /**< points to the ladder coefficient array. The array is of length numStages+1. */
4962   } arm_iir_lattice_instance_f32;
4963 
4964 
4965   /**
4966    * @brief Processing function for the floating-point IIR lattice filter.
4967    * @param[in]  S          points to an instance of the floating-point IIR lattice structure.
4968    * @param[in]  pSrc       points to the block of input data.
4969    * @param[out] pDst       points to the block of output data.
4970    * @param[in]  blockSize  number of samples to process.
4971    */
4972   void arm_iir_lattice_f32(
4973   const arm_iir_lattice_instance_f32 * S,
4974   const float32_t * pSrc,
4975         float32_t * pDst,
4976         uint32_t blockSize);
4977 
4978 
4979   /**
4980    * @brief Initialization function for the floating-point IIR lattice filter.
4981    * @param[in] S          points to an instance of the floating-point IIR lattice structure.
4982    * @param[in] numStages  number of stages in the filter.
4983    * @param[in] pkCoeffs   points to the reflection coefficient buffer.  The array is of length numStages.
4984    * @param[in] pvCoeffs   points to the ladder coefficient buffer.  The array is of length numStages+1.
4985    * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize-1.
4986    * @param[in] blockSize  number of samples to process.
4987    */
4988   void arm_iir_lattice_init_f32(
4989         arm_iir_lattice_instance_f32 * S,
4990         uint16_t numStages,
4991         float32_t * pkCoeffs,
4992         float32_t * pvCoeffs,
4993         float32_t * pState,
4994         uint32_t blockSize);
4995 
4996 
4997   /**
4998    * @brief Processing function for the Q31 IIR lattice filter.
4999    * @param[in]  S          points to an instance of the Q31 IIR lattice structure.
5000    * @param[in]  pSrc       points to the block of input data.
5001    * @param[out] pDst       points to the block of output data.
5002    * @param[in]  blockSize  number of samples to process.
5003    */
5004   void arm_iir_lattice_q31(
5005   const arm_iir_lattice_instance_q31 * S,
5006   const q31_t * pSrc,
5007         q31_t * pDst,
5008         uint32_t blockSize);
5009 
5010 
5011   /**
5012    * @brief Initialization function for the Q31 IIR lattice filter.
5013    * @param[in] S          points to an instance of the Q31 IIR lattice structure.
5014    * @param[in] numStages  number of stages in the filter.
5015    * @param[in] pkCoeffs   points to the reflection coefficient buffer.  The array is of length numStages.
5016    * @param[in] pvCoeffs   points to the ladder coefficient buffer.  The array is of length numStages+1.
5017    * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize.
5018    * @param[in] blockSize  number of samples to process.
5019    */
5020   void arm_iir_lattice_init_q31(
5021         arm_iir_lattice_instance_q31 * S,
5022         uint16_t numStages,
5023         q31_t * pkCoeffs,
5024         q31_t * pvCoeffs,
5025         q31_t * pState,
5026         uint32_t blockSize);
5027 
5028 
5029   /**
5030    * @brief Processing function for the Q15 IIR lattice filter.
5031    * @param[in]  S          points to an instance of the Q15 IIR lattice structure.
5032    * @param[in]  pSrc       points to the block of input data.
5033    * @param[out] pDst       points to the block of output data.
5034    * @param[in]  blockSize  number of samples to process.
5035    */
5036   void arm_iir_lattice_q15(
5037   const arm_iir_lattice_instance_q15 * S,
5038   const q15_t * pSrc,
5039         q15_t * pDst,
5040         uint32_t blockSize);
5041 
5042 
5043 /**
5044  * @brief Initialization function for the Q15 IIR lattice filter.
5045  * @param[in] S          points to an instance of the fixed-point Q15 IIR lattice structure.
5046  * @param[in] numStages  number of stages in the filter.
5047  * @param[in] pkCoeffs   points to reflection coefficient buffer.  The array is of length numStages.
5048  * @param[in] pvCoeffs   points to ladder coefficient buffer.  The array is of length numStages+1.
5049  * @param[in] pState     points to state buffer.  The array is of length numStages+blockSize.
5050  * @param[in] blockSize  number of samples to process per call.
5051  */
5052   void arm_iir_lattice_init_q15(
5053         arm_iir_lattice_instance_q15 * S,
5054         uint16_t numStages,
5055         q15_t * pkCoeffs,
5056         q15_t * pvCoeffs,
5057         q15_t * pState,
5058         uint32_t blockSize);
5059 
5060 
5061   /**
5062    * @brief Instance structure for the floating-point LMS filter.
5063    */
5064   typedef struct
5065   {
5066           uint16_t numTaps;    /**< number of coefficients in the filter. */
5067           float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
5068           float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
5069           float32_t mu;        /**< step size that controls filter coefficient updates. */
5070   } arm_lms_instance_f32;
5071 
5072 
5073   /**
5074    * @brief Processing function for floating-point LMS filter.
5075    * @param[in]  S          points to an instance of the floating-point LMS filter structure.
5076    * @param[in]  pSrc       points to the block of input data.
5077    * @param[in]  pRef       points to the block of reference data.
5078    * @param[out] pOut       points to the block of output data.
5079    * @param[out] pErr       points to the block of error data.
5080    * @param[in]  blockSize  number of samples to process.
5081    */
5082   void arm_lms_f32(
5083   const arm_lms_instance_f32 * S,
5084   const float32_t * pSrc,
5085         float32_t * pRef,
5086         float32_t * pOut,
5087         float32_t * pErr,
5088         uint32_t blockSize);
5089 
5090 
5091   /**
5092    * @brief Initialization function for floating-point LMS filter.
5093    * @param[in] S          points to an instance of the floating-point LMS filter structure.
5094    * @param[in] numTaps    number of filter coefficients.
5095    * @param[in] pCoeffs    points to the coefficient buffer.
5096    * @param[in] pState     points to state buffer.
5097    * @param[in] mu         step size that controls filter coefficient updates.
5098    * @param[in] blockSize  number of samples to process.
5099    */
5100   void arm_lms_init_f32(
5101         arm_lms_instance_f32 * S,
5102         uint16_t numTaps,
5103         float32_t * pCoeffs,
5104         float32_t * pState,
5105         float32_t mu,
5106         uint32_t blockSize);
5107 
5108 
5109   /**
5110    * @brief Instance structure for the Q15 LMS filter.
5111    */
5112   typedef struct
5113   {
5114           uint16_t numTaps;    /**< number of coefficients in the filter. */
5115           q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
5116           q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
5117           q15_t mu;            /**< step size that controls filter coefficient updates. */
5118           uint32_t postShift;  /**< bit shift applied to coefficients. */
5119   } arm_lms_instance_q15;
5120 
5121 
5122   /**
5123    * @brief Initialization function for the Q15 LMS filter.
5124    * @param[in] S          points to an instance of the Q15 LMS filter structure.
5125    * @param[in] numTaps    number of filter coefficients.
5126    * @param[in] pCoeffs    points to the coefficient buffer.
5127    * @param[in] pState     points to the state buffer.
5128    * @param[in] mu         step size that controls filter coefficient updates.
5129    * @param[in] blockSize  number of samples to process.
5130    * @param[in] postShift  bit shift applied to coefficients.
5131    */
5132   void arm_lms_init_q15(
5133         arm_lms_instance_q15 * S,
5134         uint16_t numTaps,
5135         q15_t * pCoeffs,
5136         q15_t * pState,
5137         q15_t mu,
5138         uint32_t blockSize,
5139         uint32_t postShift);
5140 
5141 
5142   /**
5143    * @brief Processing function for Q15 LMS filter.
5144    * @param[in]  S          points to an instance of the Q15 LMS filter structure.
5145    * @param[in]  pSrc       points to the block of input data.
5146    * @param[in]  pRef       points to the block of reference data.
5147    * @param[out] pOut       points to the block of output data.
5148    * @param[out] pErr       points to the block of error data.
5149    * @param[in]  blockSize  number of samples to process.
5150    */
5151   void arm_lms_q15(
5152   const arm_lms_instance_q15 * S,
5153   const q15_t * pSrc,
5154         q15_t * pRef,
5155         q15_t * pOut,
5156         q15_t * pErr,
5157         uint32_t blockSize);
5158 
5159 
5160   /**
5161    * @brief Instance structure for the Q31 LMS filter.
5162    */
5163   typedef struct
5164   {
5165           uint16_t numTaps;    /**< number of coefficients in the filter. */
5166           q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
5167           q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
5168           q31_t mu;            /**< step size that controls filter coefficient updates. */
5169           uint32_t postShift;  /**< bit shift applied to coefficients. */
5170   } arm_lms_instance_q31;
5171 
5172 
5173   /**
5174    * @brief Processing function for Q31 LMS filter.
5175    * @param[in]  S          points to an instance of the Q15 LMS filter structure.
5176    * @param[in]  pSrc       points to the block of input data.
5177    * @param[in]  pRef       points to the block of reference data.
5178    * @param[out] pOut       points to the block of output data.
5179    * @param[out] pErr       points to the block of error data.
5180    * @param[in]  blockSize  number of samples to process.
5181    */
5182   void arm_lms_q31(
5183   const arm_lms_instance_q31 * S,
5184   const q31_t * pSrc,
5185         q31_t * pRef,
5186         q31_t * pOut,
5187         q31_t * pErr,
5188         uint32_t blockSize);
5189 
5190 
5191   /**
5192    * @brief Initialization function for Q31 LMS filter.
5193    * @param[in] S          points to an instance of the Q31 LMS filter structure.
5194    * @param[in] numTaps    number of filter coefficients.
5195    * @param[in] pCoeffs    points to coefficient buffer.
5196    * @param[in] pState     points to state buffer.
5197    * @param[in] mu         step size that controls filter coefficient updates.
5198    * @param[in] blockSize  number of samples to process.
5199    * @param[in] postShift  bit shift applied to coefficients.
5200    */
5201   void arm_lms_init_q31(
5202         arm_lms_instance_q31 * S,
5203         uint16_t numTaps,
5204         q31_t * pCoeffs,
5205         q31_t * pState,
5206         q31_t mu,
5207         uint32_t blockSize,
5208         uint32_t postShift);
5209 
5210 
5211   /**
5212    * @brief Instance structure for the floating-point normalized LMS filter.
5213    */
5214   typedef struct
5215   {
5216           uint16_t numTaps;     /**< number of coefficients in the filter. */
5217           float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
5218           float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
5219           float32_t mu;         /**< step size that control filter coefficient updates. */
5220           float32_t energy;     /**< saves previous frame energy. */
5221           float32_t x0;         /**< saves previous input sample. */
5222   } arm_lms_norm_instance_f32;
5223 
5224 
5225   /**
5226    * @brief Processing function for floating-point normalized LMS filter.
5227    * @param[in]  S          points to an instance of the floating-point normalized LMS filter structure.
5228    * @param[in]  pSrc       points to the block of input data.
5229    * @param[in]  pRef       points to the block of reference data.
5230    * @param[out] pOut       points to the block of output data.
5231    * @param[out] pErr       points to the block of error data.
5232    * @param[in]  blockSize  number of samples to process.
5233    */
5234   void arm_lms_norm_f32(
5235         arm_lms_norm_instance_f32 * S,
5236   const float32_t * pSrc,
5237         float32_t * pRef,
5238         float32_t * pOut,
5239         float32_t * pErr,
5240         uint32_t blockSize);
5241 
5242 
5243   /**
5244    * @brief Initialization function for floating-point normalized LMS filter.
5245    * @param[in] S          points to an instance of the floating-point LMS filter structure.
5246    * @param[in] numTaps    number of filter coefficients.
5247    * @param[in] pCoeffs    points to coefficient buffer.
5248    * @param[in] pState     points to state buffer.
5249    * @param[in] mu         step size that controls filter coefficient updates.
5250    * @param[in] blockSize  number of samples to process.
5251    */
5252   void arm_lms_norm_init_f32(
5253         arm_lms_norm_instance_f32 * S,
5254         uint16_t numTaps,
5255         float32_t * pCoeffs,
5256         float32_t * pState,
5257         float32_t mu,
5258         uint32_t blockSize);
5259 
5260 
5261   /**
5262    * @brief Instance structure for the Q31 normalized LMS filter.
5263    */
5264   typedef struct
5265   {
5266           uint16_t numTaps;     /**< number of coefficients in the filter. */
5267           q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
5268           q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
5269           q31_t mu;             /**< step size that controls filter coefficient updates. */
5270           uint8_t postShift;    /**< bit shift applied to coefficients. */
5271     const q31_t *recipTable;    /**< points to the reciprocal initial value table. */
5272           q31_t energy;         /**< saves previous frame energy. */
5273           q31_t x0;             /**< saves previous input sample. */
5274   } arm_lms_norm_instance_q31;
5275 
5276 
5277   /**
5278    * @brief Processing function for Q31 normalized LMS filter.
5279    * @param[in]  S          points to an instance of the Q31 normalized LMS filter structure.
5280    * @param[in]  pSrc       points to the block of input data.
5281    * @param[in]  pRef       points to the block of reference data.
5282    * @param[out] pOut       points to the block of output data.
5283    * @param[out] pErr       points to the block of error data.
5284    * @param[in]  blockSize  number of samples to process.
5285    */
5286   void arm_lms_norm_q31(
5287         arm_lms_norm_instance_q31 * S,
5288   const q31_t * pSrc,
5289         q31_t * pRef,
5290         q31_t * pOut,
5291         q31_t * pErr,
5292         uint32_t blockSize);
5293 
5294 
5295   /**
5296    * @brief Initialization function for Q31 normalized LMS filter.
5297    * @param[in] S          points to an instance of the Q31 normalized LMS filter structure.
5298    * @param[in] numTaps    number of filter coefficients.
5299    * @param[in] pCoeffs    points to coefficient buffer.
5300    * @param[in] pState     points to state buffer.
5301    * @param[in] mu         step size that controls filter coefficient updates.
5302    * @param[in] blockSize  number of samples to process.
5303    * @param[in] postShift  bit shift applied to coefficients.
5304    */
5305   void arm_lms_norm_init_q31(
5306         arm_lms_norm_instance_q31 * S,
5307         uint16_t numTaps,
5308         q31_t * pCoeffs,
5309         q31_t * pState,
5310         q31_t mu,
5311         uint32_t blockSize,
5312         uint8_t postShift);
5313 
5314 
5315   /**
5316    * @brief Instance structure for the Q15 normalized LMS filter.
5317    */
5318   typedef struct
5319   {
5320           uint16_t numTaps;     /**< Number of coefficients in the filter. */
5321           q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
5322           q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
5323           q15_t mu;             /**< step size that controls filter coefficient updates. */
5324           uint8_t postShift;    /**< bit shift applied to coefficients. */
5325     const q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
5326           q15_t energy;         /**< saves previous frame energy. */
5327           q15_t x0;             /**< saves previous input sample. */
5328   } arm_lms_norm_instance_q15;
5329 
5330 
5331   /**
5332    * @brief Processing function for Q15 normalized LMS filter.
5333    * @param[in]  S          points to an instance of the Q15 normalized LMS filter structure.
5334    * @param[in]  pSrc       points to the block of input data.
5335    * @param[in]  pRef       points to the block of reference data.
5336    * @param[out] pOut       points to the block of output data.
5337    * @param[out] pErr       points to the block of error data.
5338    * @param[in]  blockSize  number of samples to process.
5339    */
5340   void arm_lms_norm_q15(
5341         arm_lms_norm_instance_q15 * S,
5342   const q15_t * pSrc,
5343         q15_t * pRef,
5344         q15_t * pOut,
5345         q15_t * pErr,
5346         uint32_t blockSize);
5347 
5348 
5349   /**
5350    * @brief Initialization function for Q15 normalized LMS filter.
5351    * @param[in] S          points to an instance of the Q15 normalized LMS filter structure.
5352    * @param[in] numTaps    number of filter coefficients.
5353    * @param[in] pCoeffs    points to coefficient buffer.
5354    * @param[in] pState     points to state buffer.
5355    * @param[in] mu         step size that controls filter coefficient updates.
5356    * @param[in] blockSize  number of samples to process.
5357    * @param[in] postShift  bit shift applied to coefficients.
5358    */
5359   void arm_lms_norm_init_q15(
5360         arm_lms_norm_instance_q15 * S,
5361         uint16_t numTaps,
5362         q15_t * pCoeffs,
5363         q15_t * pState,
5364         q15_t mu,
5365         uint32_t blockSize,
5366         uint8_t postShift);
5367 
5368 
5369   /**
5370    * @brief Correlation of floating-point sequences.
5371    * @param[in]  pSrcA    points to the first input sequence.
5372    * @param[in]  srcALen  length of the first input sequence.
5373    * @param[in]  pSrcB    points to the second input sequence.
5374    * @param[in]  srcBLen  length of the second input sequence.
5375    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
5376    */
5377   void arm_correlate_f32(
5378   const float32_t * pSrcA,
5379         uint32_t srcALen,
5380   const float32_t * pSrcB,
5381         uint32_t srcBLen,
5382         float32_t * pDst);
5383 
5384 
5385 /**
5386  @brief Correlation of Q15 sequences
5387  @param[in]  pSrcA     points to the first input sequence
5388  @param[in]  srcALen   length of the first input sequence
5389  @param[in]  pSrcB     points to the second input sequence
5390  @param[in]  srcBLen   length of the second input sequence
5391  @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
5392  @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
5393 */
5394 void arm_correlate_opt_q15(
5395   const q15_t * pSrcA,
5396         uint32_t srcALen,
5397   const q15_t * pSrcB,
5398         uint32_t srcBLen,
5399         q15_t * pDst,
5400         q15_t * pScratch);
5401 
5402 
5403 /**
5404   @brief Correlation of Q15 sequences.
5405   @param[in]  pSrcA    points to the first input sequence
5406   @param[in]  srcALen  length of the first input sequence
5407   @param[in]  pSrcB    points to the second input sequence
5408   @param[in]  srcBLen  length of the second input sequence
5409   @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
5410  */
5411   void arm_correlate_q15(
5412   const q15_t * pSrcA,
5413         uint32_t srcALen,
5414   const q15_t * pSrcB,
5415         uint32_t srcBLen,
5416         q15_t * pDst);
5417 
5418 
5419 /**
5420   @brief         Correlation of Q15 sequences (fast version).
5421   @param[in]     pSrcA      points to the first input sequence
5422   @param[in]     srcALen    length of the first input sequence
5423   @param[in]     pSrcB      points to the second input sequence
5424   @param[in]     srcBLen    length of the second input sequence
5425   @param[out]    pDst       points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.
5426   @return        none
5427  */
5428 void arm_correlate_fast_q15(
5429   const q15_t * pSrcA,
5430         uint32_t srcALen,
5431   const q15_t * pSrcB,
5432         uint32_t srcBLen,
5433         q15_t * pDst);
5434 
5435 
5436 /**
5437   @brief Correlation of Q15 sequences (fast version).
5438   @param[in]  pSrcA     points to the first input sequence.
5439   @param[in]  srcALen   length of the first input sequence.
5440   @param[in]  pSrcB     points to the second input sequence.
5441   @param[in]  srcBLen   length of the second input sequence.
5442   @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
5443   @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
5444  */
5445 void arm_correlate_fast_opt_q15(
5446   const q15_t * pSrcA,
5447         uint32_t srcALen,
5448   const q15_t * pSrcB,
5449         uint32_t srcBLen,
5450         q15_t * pDst,
5451         q15_t * pScratch);
5452 
5453 
5454   /**
5455    * @brief Correlation of Q31 sequences.
5456    * @param[in]  pSrcA    points to the first input sequence.
5457    * @param[in]  srcALen  length of the first input sequence.
5458    * @param[in]  pSrcB    points to the second input sequence.
5459    * @param[in]  srcBLen  length of the second input sequence.
5460    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
5461    */
5462   void arm_correlate_q31(
5463   const q31_t * pSrcA,
5464         uint32_t srcALen,
5465   const q31_t * pSrcB,
5466         uint32_t srcBLen,
5467         q31_t * pDst);
5468 
5469 
5470 /**
5471   @brief Correlation of Q31 sequences (fast version).
5472   @param[in]  pSrcA    points to the first input sequence
5473   @param[in]  srcALen  length of the first input sequence
5474   @param[in]  pSrcB    points to the second input sequence
5475   @param[in]  srcBLen  length of the second input sequence
5476   @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
5477  */
5478 void arm_correlate_fast_q31(
5479   const q31_t * pSrcA,
5480         uint32_t srcALen,
5481   const q31_t * pSrcB,
5482         uint32_t srcBLen,
5483         q31_t * pDst);
5484 
5485 
5486  /**
5487    * @brief Correlation of Q7 sequences.
5488    * @param[in]  pSrcA      points to the first input sequence.
5489    * @param[in]  srcALen    length of the first input sequence.
5490    * @param[in]  pSrcB      points to the second input sequence.
5491    * @param[in]  srcBLen    length of the second input sequence.
5492    * @param[out] pDst       points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
5493    * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
5494    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
5495    */
5496   void arm_correlate_opt_q7(
5497   const q7_t * pSrcA,
5498         uint32_t srcALen,
5499   const q7_t * pSrcB,
5500         uint32_t srcBLen,
5501         q7_t * pDst,
5502         q15_t * pScratch1,
5503         q15_t * pScratch2);
5504 
5505 
5506   /**
5507    * @brief Correlation of Q7 sequences.
5508    * @param[in]  pSrcA    points to the first input sequence.
5509    * @param[in]  srcALen  length of the first input sequence.
5510    * @param[in]  pSrcB    points to the second input sequence.
5511    * @param[in]  srcBLen  length of the second input sequence.
5512    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
5513    */
5514   void arm_correlate_q7(
5515   const q7_t * pSrcA,
5516         uint32_t srcALen,
5517   const q7_t * pSrcB,
5518         uint32_t srcBLen,
5519         q7_t * pDst);
5520 
5521 
5522   /**
5523    * @brief Instance structure for the floating-point sparse FIR filter.
5524    */
5525   typedef struct
5526   {
5527           uint16_t numTaps;             /**< number of coefficients in the filter. */
5528           uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
5529           float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
5530     const float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
5531           uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
5532           int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
5533   } arm_fir_sparse_instance_f32;
5534 
5535   /**
5536    * @brief Instance structure for the Q31 sparse FIR filter.
5537    */
5538   typedef struct
5539   {
5540           uint16_t numTaps;             /**< number of coefficients in the filter. */
5541           uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
5542           q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
5543     const q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
5544           uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
5545           int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
5546   } arm_fir_sparse_instance_q31;
5547 
5548   /**
5549    * @brief Instance structure for the Q15 sparse FIR filter.
5550    */
5551   typedef struct
5552   {
5553           uint16_t numTaps;             /**< number of coefficients in the filter. */
5554           uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
5555           q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
5556     const q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
5557           uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
5558           int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
5559   } arm_fir_sparse_instance_q15;
5560 
5561   /**
5562    * @brief Instance structure for the Q7 sparse FIR filter.
5563    */
5564   typedef struct
5565   {
5566           uint16_t numTaps;             /**< number of coefficients in the filter. */
5567           uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
5568           q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
5569     const q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
5570           uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
5571           int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
5572   } arm_fir_sparse_instance_q7;
5573 
5574 
5575   /**
5576    * @brief Processing function for the floating-point sparse FIR filter.
5577    * @param[in]  S           points to an instance of the floating-point sparse FIR structure.
5578    * @param[in]  pSrc        points to the block of input data.
5579    * @param[out] pDst        points to the block of output data
5580    * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
5581    * @param[in]  blockSize   number of input samples to process per call.
5582    */
5583   void arm_fir_sparse_f32(
5584         arm_fir_sparse_instance_f32 * S,
5585   const float32_t * pSrc,
5586         float32_t * pDst,
5587         float32_t * pScratchIn,
5588         uint32_t blockSize);
5589 
5590 
5591   /**
5592    * @brief  Initialization function for the floating-point sparse FIR filter.
5593    * @param[in,out] S          points to an instance of the floating-point sparse FIR structure.
5594    * @param[in]     numTaps    number of nonzero coefficients in the filter.
5595    * @param[in]     pCoeffs    points to the array of filter coefficients.
5596    * @param[in]     pState     points to the state buffer.
5597    * @param[in]     pTapDelay  points to the array of offset times.
5598    * @param[in]     maxDelay   maximum offset time supported.
5599    * @param[in]     blockSize  number of samples that will be processed per block.
5600    */
5601   void arm_fir_sparse_init_f32(
5602         arm_fir_sparse_instance_f32 * S,
5603         uint16_t numTaps,
5604   const float32_t * pCoeffs,
5605         float32_t * pState,
5606         int32_t * pTapDelay,
5607         uint16_t maxDelay,
5608         uint32_t blockSize);
5609 
5610 
5611   /**
5612    * @brief Processing function for the Q31 sparse FIR filter.
5613    * @param[in]  S           points to an instance of the Q31 sparse FIR structure.
5614    * @param[in]  pSrc        points to the block of input data.
5615    * @param[out] pDst        points to the block of output data
5616    * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
5617    * @param[in]  blockSize   number of input samples to process per call.
5618    */
5619   void arm_fir_sparse_q31(
5620         arm_fir_sparse_instance_q31 * S,
5621   const q31_t * pSrc,
5622         q31_t * pDst,
5623         q31_t * pScratchIn,
5624         uint32_t blockSize);
5625 
5626 
5627   /**
5628    * @brief  Initialization function for the Q31 sparse FIR filter.
5629    * @param[in,out] S          points to an instance of the Q31 sparse FIR structure.
5630    * @param[in]     numTaps    number of nonzero coefficients in the filter.
5631    * @param[in]     pCoeffs    points to the array of filter coefficients.
5632    * @param[in]     pState     points to the state buffer.
5633    * @param[in]     pTapDelay  points to the array of offset times.
5634    * @param[in]     maxDelay   maximum offset time supported.
5635    * @param[in]     blockSize  number of samples that will be processed per block.
5636    */
5637   void arm_fir_sparse_init_q31(
5638         arm_fir_sparse_instance_q31 * S,
5639         uint16_t numTaps,
5640   const q31_t * pCoeffs,
5641         q31_t * pState,
5642         int32_t * pTapDelay,
5643         uint16_t maxDelay,
5644         uint32_t blockSize);
5645 
5646 
5647   /**
5648    * @brief Processing function for the Q15 sparse FIR filter.
5649    * @param[in]  S            points to an instance of the Q15 sparse FIR structure.
5650    * @param[in]  pSrc         points to the block of input data.
5651    * @param[out] pDst         points to the block of output data
5652    * @param[in]  pScratchIn   points to a temporary buffer of size blockSize.
5653    * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
5654    * @param[in]  blockSize    number of input samples to process per call.
5655    */
5656   void arm_fir_sparse_q15(
5657         arm_fir_sparse_instance_q15 * S,
5658   const q15_t * pSrc,
5659         q15_t * pDst,
5660         q15_t * pScratchIn,
5661         q31_t * pScratchOut,
5662         uint32_t blockSize);
5663 
5664 
5665   /**
5666    * @brief  Initialization function for the Q15 sparse FIR filter.
5667    * @param[in,out] S          points to an instance of the Q15 sparse FIR structure.
5668    * @param[in]     numTaps    number of nonzero coefficients in the filter.
5669    * @param[in]     pCoeffs    points to the array of filter coefficients.
5670    * @param[in]     pState     points to the state buffer.
5671    * @param[in]     pTapDelay  points to the array of offset times.
5672    * @param[in]     maxDelay   maximum offset time supported.
5673    * @param[in]     blockSize  number of samples that will be processed per block.
5674    */
5675   void arm_fir_sparse_init_q15(
5676         arm_fir_sparse_instance_q15 * S,
5677         uint16_t numTaps,
5678   const q15_t * pCoeffs,
5679         q15_t * pState,
5680         int32_t * pTapDelay,
5681         uint16_t maxDelay,
5682         uint32_t blockSize);
5683 
5684 
5685   /**
5686    * @brief Processing function for the Q7 sparse FIR filter.
5687    * @param[in]  S            points to an instance of the Q7 sparse FIR structure.
5688    * @param[in]  pSrc         points to the block of input data.
5689    * @param[out] pDst         points to the block of output data
5690    * @param[in]  pScratchIn   points to a temporary buffer of size blockSize.
5691    * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
5692    * @param[in]  blockSize    number of input samples to process per call.
5693    */
5694   void arm_fir_sparse_q7(
5695         arm_fir_sparse_instance_q7 * S,
5696   const q7_t * pSrc,
5697         q7_t * pDst,
5698         q7_t * pScratchIn,
5699         q31_t * pScratchOut,
5700         uint32_t blockSize);
5701 
5702 
5703   /**
5704    * @brief  Initialization function for the Q7 sparse FIR filter.
5705    * @param[in,out] S          points to an instance of the Q7 sparse FIR structure.
5706    * @param[in]     numTaps    number of nonzero coefficients in the filter.
5707    * @param[in]     pCoeffs    points to the array of filter coefficients.
5708    * @param[in]     pState     points to the state buffer.
5709    * @param[in]     pTapDelay  points to the array of offset times.
5710    * @param[in]     maxDelay   maximum offset time supported.
5711    * @param[in]     blockSize  number of samples that will be processed per block.
5712    */
5713   void arm_fir_sparse_init_q7(
5714         arm_fir_sparse_instance_q7 * S,
5715         uint16_t numTaps,
5716   const q7_t * pCoeffs,
5717         q7_t * pState,
5718         int32_t * pTapDelay,
5719         uint16_t maxDelay,
5720         uint32_t blockSize);
5721 
5722 
5723   /**
5724    * @brief  Floating-point sin_cos function.
5725    * @param[in]  theta   input value in degrees
5726    * @param[out] pSinVal  points to the processed sine output.
5727    * @param[out] pCosVal  points to the processed cos output.
5728    */
5729   void arm_sin_cos_f32(
5730         float32_t theta,
5731         float32_t * pSinVal,
5732         float32_t * pCosVal);
5733 
5734 
5735   /**
5736    * @brief  Q31 sin_cos function.
5737    * @param[in]  theta    scaled input value in degrees
5738    * @param[out] pSinVal  points to the processed sine output.
5739    * @param[out] pCosVal  points to the processed cosine output.
5740    */
5741   void arm_sin_cos_q31(
5742         q31_t theta,
5743         q31_t * pSinVal,
5744         q31_t * pCosVal);
5745 
5746 
5747   /**
5748    * @brief  Floating-point complex conjugate.
5749    * @param[in]  pSrc        points to the input vector
5750    * @param[out] pDst        points to the output vector
5751    * @param[in]  numSamples  number of complex samples in each vector
5752    */
5753   void arm_cmplx_conj_f32(
5754   const float32_t * pSrc,
5755         float32_t * pDst,
5756         uint32_t numSamples);
5757 
5758   /**
5759    * @brief  Q31 complex conjugate.
5760    * @param[in]  pSrc        points to the input vector
5761    * @param[out] pDst        points to the output vector
5762    * @param[in]  numSamples  number of complex samples in each vector
5763    */
5764   void arm_cmplx_conj_q31(
5765   const q31_t * pSrc,
5766         q31_t * pDst,
5767         uint32_t numSamples);
5768 
5769 
5770   /**
5771    * @brief  Q15 complex conjugate.
5772    * @param[in]  pSrc        points to the input vector
5773    * @param[out] pDst        points to the output vector
5774    * @param[in]  numSamples  number of complex samples in each vector
5775    */
5776   void arm_cmplx_conj_q15(
5777   const q15_t * pSrc,
5778         q15_t * pDst,
5779         uint32_t numSamples);
5780 
5781 
5782   /**
5783    * @brief  Floating-point complex magnitude squared
5784    * @param[in]  pSrc        points to the complex input vector
5785    * @param[out] pDst        points to the real output vector
5786    * @param[in]  numSamples  number of complex samples in the input vector
5787    */
5788   void arm_cmplx_mag_squared_f32(
5789   const float32_t * pSrc,
5790         float32_t * pDst,
5791         uint32_t numSamples);
5792 
5793 
5794   /**
5795    * @brief  Q31 complex magnitude squared
5796    * @param[in]  pSrc        points to the complex input vector
5797    * @param[out] pDst        points to the real output vector
5798    * @param[in]  numSamples  number of complex samples in the input vector
5799    */
5800   void arm_cmplx_mag_squared_q31(
5801   const q31_t * pSrc,
5802         q31_t * pDst,
5803         uint32_t numSamples);
5804 
5805 
5806   /**
5807    * @brief  Q15 complex magnitude squared
5808    * @param[in]  pSrc        points to the complex input vector
5809    * @param[out] pDst        points to the real output vector
5810    * @param[in]  numSamples  number of complex samples in the input vector
5811    */
5812   void arm_cmplx_mag_squared_q15(
5813   const q15_t * pSrc,
5814         q15_t * pDst,
5815         uint32_t numSamples);
5816 
5817 
5818  /**
5819    * @ingroup groupController
5820    */
5821 
5822   /**
5823    * @defgroup PID PID Motor Control
5824    *
5825    * A Proportional Integral Derivative (PID) controller is a generic feedback control
5826    * loop mechanism widely used in industrial control systems.
5827    * A PID controller is the most commonly used type of feedback controller.
5828    *
5829    * This set of functions implements (PID) controllers
5830    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
5831    * of data and each call to the function returns a single processed value.
5832    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
5833    * is the input sample value. The functions return the output value.
5834    *
5835    * \par Algorithm:
5836    * <pre>
5837    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
5838    *    A0 = Kp + Ki + Kd
5839    *    A1 = (-Kp ) - (2 * Kd )
5840    *    A2 = Kd
5841    * </pre>
5842    *
5843    * \par
5844    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
5845    *
5846    * \par
5847    * \image html PID.gif "Proportional Integral Derivative Controller"
5848    *
5849    * \par
5850    * The PID controller calculates an "error" value as the difference between
5851    * the measured output and the reference input.
5852    * The controller attempts to minimize the error by adjusting the process control inputs.
5853    * The proportional value determines the reaction to the current error,
5854    * the integral value determines the reaction based on the sum of recent errors,
5855    * and the derivative value determines the reaction based on the rate at which the error has been changing.
5856    *
5857    * \par Instance Structure
5858    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
5859    * A separate instance structure must be defined for each PID Controller.
5860    * There are separate instance structure declarations for each of the 3 supported data types.
5861    *
5862    * \par Reset Functions
5863    * There is also an associated reset function for each data type which clears the state array.
5864    *
5865    * \par Initialization Functions
5866    * There is also an associated initialization function for each data type.
5867    * The initialization function performs the following operations:
5868    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
5869    * - Zeros out the values in the state buffer.
5870    *
5871    * \par
5872    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
5873    *
5874    * \par Fixed-Point Behavior
5875    * Care must be taken when using the fixed-point versions of the PID Controller functions.
5876    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
5877    * Refer to the function specific documentation below for usage guidelines.
5878    */
5879 
5880   /**
5881    * @addtogroup PID
5882    * @{
5883    */
5884 
5885   /**
5886    * @brief         Process function for the floating-point PID Control.
5887    * @param[in,out] S   is an instance of the floating-point PID Control structure
5888    * @param[in]     in  input sample to process
5889    * @return        processed output sample.
5890    */
arm_pid_f32(arm_pid_instance_f32 * S,float32_t in)5891   __STATIC_FORCEINLINE float32_t arm_pid_f32(
5892   arm_pid_instance_f32 * S,
5893   float32_t in)
5894   {
5895     float32_t out;
5896 
5897     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
5898     out = (S->A0 * in) +
5899       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
5900 
5901     /* Update state */
5902     S->state[1] = S->state[0];
5903     S->state[0] = in;
5904     S->state[2] = out;
5905 
5906     /* return to application */
5907     return (out);
5908 
5909   }
5910 
5911 /**
5912   @brief         Process function for the Q31 PID Control.
5913   @param[in,out] S  points to an instance of the Q31 PID Control structure
5914   @param[in]     in  input sample to process
5915   @return        processed output sample.
5916 
5917   \par Scaling and Overflow Behavior
5918          The function is implemented using an internal 64-bit accumulator.
5919          The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
5920          Thus, if the accumulator result overflows it wraps around rather than clip.
5921          In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
5922          After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
5923  */
arm_pid_q31(arm_pid_instance_q31 * S,q31_t in)5924 __STATIC_FORCEINLINE q31_t arm_pid_q31(
5925   arm_pid_instance_q31 * S,
5926   q31_t in)
5927   {
5928     q63_t acc;
5929     q31_t out;
5930 
5931     /* acc = A0 * x[n]  */
5932     acc = (q63_t) S->A0 * in;
5933 
5934     /* acc += A1 * x[n-1] */
5935     acc += (q63_t) S->A1 * S->state[0];
5936 
5937     /* acc += A2 * x[n-2]  */
5938     acc += (q63_t) S->A2 * S->state[1];
5939 
5940     /* convert output to 1.31 format to add y[n-1] */
5941     out = (q31_t) (acc >> 31U);
5942 
5943     /* out += y[n-1] */
5944     out += S->state[2];
5945 
5946     /* Update state */
5947     S->state[1] = S->state[0];
5948     S->state[0] = in;
5949     S->state[2] = out;
5950 
5951     /* return to application */
5952     return (out);
5953   }
5954 
5955 
5956 /**
5957   @brief         Process function for the Q15 PID Control.
5958   @param[in,out] S   points to an instance of the Q15 PID Control structure
5959   @param[in]     in  input sample to process
5960   @return        processed output sample.
5961 
5962   \par Scaling and Overflow Behavior
5963          The function is implemented using a 64-bit internal accumulator.
5964          Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
5965          The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
5966          There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
5967          After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
5968          Lastly, the accumulator is saturated to yield a result in 1.15 format.
5969  */
arm_pid_q15(arm_pid_instance_q15 * S,q15_t in)5970 __STATIC_FORCEINLINE q15_t arm_pid_q15(
5971   arm_pid_instance_q15 * S,
5972   q15_t in)
5973   {
5974     q63_t acc;
5975     q15_t out;
5976 
5977 #if defined (ARM_MATH_DSP)
5978     /* Implementation of PID controller */
5979 
5980     /* acc = A0 * x[n]  */
5981     acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in);
5982 
5983     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
5984     acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)read_q15x2 (S->state), (uint64_t)acc);
5985 #else
5986     /* acc = A0 * x[n]  */
5987     acc = ((q31_t) S->A0) * in;
5988 
5989     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
5990     acc += (q31_t) S->A1 * S->state[0];
5991     acc += (q31_t) S->A2 * S->state[1];
5992 #endif
5993 
5994     /* acc += y[n-1] */
5995     acc += (q31_t) S->state[2] << 15;
5996 
5997     /* saturate the output */
5998     out = (q15_t) (__SSAT((q31_t)(acc >> 15), 16));
5999 
6000     /* Update state */
6001     S->state[1] = S->state[0];
6002     S->state[0] = in;
6003     S->state[2] = out;
6004 
6005     /* return to application */
6006     return (out);
6007   }
6008 
6009   /**
6010    * @} end of PID group
6011    */
6012 
6013 
6014   /**
6015    * @brief Floating-point matrix inverse.
6016    * @param[in]  src   points to the instance of the input floating-point matrix structure.
6017    * @param[out] dst   points to the instance of the output floating-point matrix structure.
6018    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
6019    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
6020    */
6021   arm_status arm_mat_inverse_f32(
6022   const arm_matrix_instance_f32 * src,
6023   arm_matrix_instance_f32 * dst);
6024 
6025 
6026   /**
6027    * @brief Floating-point matrix inverse.
6028    * @param[in]  src   points to the instance of the input floating-point matrix structure.
6029    * @param[out] dst   points to the instance of the output floating-point matrix structure.
6030    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
6031    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
6032    */
6033   arm_status arm_mat_inverse_f64(
6034   const arm_matrix_instance_f64 * src,
6035   arm_matrix_instance_f64 * dst);
6036 
6037 
6038 
6039   /**
6040    * @ingroup groupController
6041    */
6042 
6043   /**
6044    * @defgroup clarke Vector Clarke Transform
6045    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
6046    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
6047    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
6048    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
6049    * \image html clarke.gif Stator current space vector and its components in (a,b).
6050    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
6051    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
6052    *
6053    * The function operates on a single sample of data and each call to the function returns the processed output.
6054    * The library provides separate functions for Q31 and floating-point data types.
6055    * \par Algorithm
6056    * \image html clarkeFormula.gif
6057    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
6058    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
6059    * \par Fixed-Point Behavior
6060    * Care must be taken when using the Q31 version of the Clarke transform.
6061    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
6062    * Refer to the function specific documentation below for usage guidelines.
6063    */
6064 
6065   /**
6066    * @addtogroup clarke
6067    * @{
6068    */
6069 
6070   /**
6071    *
6072    * @brief  Floating-point Clarke transform
6073    * @param[in]  Ia       input three-phase coordinate <code>a</code>
6074    * @param[in]  Ib       input three-phase coordinate <code>b</code>
6075    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
6076    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
6077    * @return        none
6078    */
arm_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)6079   __STATIC_FORCEINLINE void arm_clarke_f32(
6080   float32_t Ia,
6081   float32_t Ib,
6082   float32_t * pIalpha,
6083   float32_t * pIbeta)
6084   {
6085     /* Calculate pIalpha using the equation, pIalpha = Ia */
6086     *pIalpha = Ia;
6087 
6088     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
6089     *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
6090   }
6091 
6092 
6093 /**
6094   @brief  Clarke transform for Q31 version
6095   @param[in]  Ia       input three-phase coordinate <code>a</code>
6096   @param[in]  Ib       input three-phase coordinate <code>b</code>
6097   @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
6098   @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
6099   @return     none
6100 
6101   \par Scaling and Overflow Behavior
6102          The function is implemented using an internal 32-bit accumulator.
6103          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
6104          There is saturation on the addition, hence there is no risk of overflow.
6105  */
arm_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)6106 __STATIC_FORCEINLINE void arm_clarke_q31(
6107   q31_t Ia,
6108   q31_t Ib,
6109   q31_t * pIalpha,
6110   q31_t * pIbeta)
6111   {
6112     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
6113 
6114     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
6115     *pIalpha = Ia;
6116 
6117     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
6118     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
6119 
6120     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
6121     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
6122 
6123     /* pIbeta is calculated by adding the intermediate products */
6124     *pIbeta = __QADD(product1, product2);
6125   }
6126 
6127   /**
6128    * @} end of clarke group
6129    */
6130 
6131 
6132   /**
6133    * @ingroup groupController
6134    */
6135 
6136   /**
6137    * @defgroup inv_clarke Vector Inverse Clarke Transform
6138    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
6139    *
6140    * The function operates on a single sample of data and each call to the function returns the processed output.
6141    * The library provides separate functions for Q31 and floating-point data types.
6142    * \par Algorithm
6143    * \image html clarkeInvFormula.gif
6144    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
6145    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
6146    * \par Fixed-Point Behavior
6147    * Care must be taken when using the Q31 version of the Clarke transform.
6148    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
6149    * Refer to the function specific documentation below for usage guidelines.
6150    */
6151 
6152   /**
6153    * @addtogroup inv_clarke
6154    * @{
6155    */
6156 
6157    /**
6158    * @brief  Floating-point Inverse Clarke transform
6159    * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
6160    * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
6161    * @param[out] pIa     points to output three-phase coordinate <code>a</code>
6162    * @param[out] pIb     points to output three-phase coordinate <code>b</code>
6163    * @return     none
6164    */
arm_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)6165   __STATIC_FORCEINLINE void arm_inv_clarke_f32(
6166   float32_t Ialpha,
6167   float32_t Ibeta,
6168   float32_t * pIa,
6169   float32_t * pIb)
6170   {
6171     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
6172     *pIa = Ialpha;
6173 
6174     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
6175     *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta;
6176   }
6177 
6178 
6179 /**
6180   @brief  Inverse Clarke transform for Q31 version
6181   @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
6182   @param[in]  Ibeta   input two-phase orthogonal vector axis beta
6183   @param[out] pIa     points to output three-phase coordinate <code>a</code>
6184   @param[out] pIb     points to output three-phase coordinate <code>b</code>
6185   @return     none
6186 
6187   \par Scaling and Overflow Behavior
6188          The function is implemented using an internal 32-bit accumulator.
6189          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
6190          There is saturation on the subtraction, hence there is no risk of overflow.
6191  */
arm_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)6192 __STATIC_FORCEINLINE void arm_inv_clarke_q31(
6193   q31_t Ialpha,
6194   q31_t Ibeta,
6195   q31_t * pIa,
6196   q31_t * pIb)
6197   {
6198     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
6199 
6200     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
6201     *pIa = Ialpha;
6202 
6203     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
6204     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
6205 
6206     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
6207     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
6208 
6209     /* pIb is calculated by subtracting the products */
6210     *pIb = __QSUB(product2, product1);
6211   }
6212 
6213   /**
6214    * @} end of inv_clarke group
6215    */
6216 
6217 
6218 
6219   /**
6220    * @ingroup groupController
6221    */
6222 
6223   /**
6224    * @defgroup park Vector Park Transform
6225    *
6226    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
6227    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
6228    * from the stationary to the moving reference frame and control the spatial relationship between
6229    * the stator vector current and rotor flux vector.
6230    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
6231    * current vector and the relationship from the two reference frames:
6232    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
6233    *
6234    * The function operates on a single sample of data and each call to the function returns the processed output.
6235    * The library provides separate functions for Q31 and floating-point data types.
6236    * \par Algorithm
6237    * \image html parkFormula.gif
6238    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
6239    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
6240    * cosine and sine values of theta (rotor flux position).
6241    * \par Fixed-Point Behavior
6242    * Care must be taken when using the Q31 version of the Park transform.
6243    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
6244    * Refer to the function specific documentation below for usage guidelines.
6245    */
6246 
6247   /**
6248    * @addtogroup park
6249    * @{
6250    */
6251 
6252   /**
6253    * @brief Floating-point Park transform
6254    * @param[in]  Ialpha  input two-phase vector coordinate alpha
6255    * @param[in]  Ibeta   input two-phase vector coordinate beta
6256    * @param[out] pId     points to output   rotor reference frame d
6257    * @param[out] pIq     points to output   rotor reference frame q
6258    * @param[in]  sinVal  sine value of rotation angle theta
6259    * @param[in]  cosVal  cosine value of rotation angle theta
6260    * @return     none
6261    *
6262    * The function implements the forward Park transform.
6263    *
6264    */
arm_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)6265   __STATIC_FORCEINLINE void arm_park_f32(
6266   float32_t Ialpha,
6267   float32_t Ibeta,
6268   float32_t * pId,
6269   float32_t * pIq,
6270   float32_t sinVal,
6271   float32_t cosVal)
6272   {
6273     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
6274     *pId = Ialpha * cosVal + Ibeta * sinVal;
6275 
6276     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
6277     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
6278   }
6279 
6280 
6281 /**
6282   @brief  Park transform for Q31 version
6283   @param[in]  Ialpha  input two-phase vector coordinate alpha
6284   @param[in]  Ibeta   input two-phase vector coordinate beta
6285   @param[out] pId     points to output rotor reference frame d
6286   @param[out] pIq     points to output rotor reference frame q
6287   @param[in]  sinVal  sine value of rotation angle theta
6288   @param[in]  cosVal  cosine value of rotation angle theta
6289   @return     none
6290 
6291   \par Scaling and Overflow Behavior
6292          The function is implemented using an internal 32-bit accumulator.
6293          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
6294          There is saturation on the addition and subtraction, hence there is no risk of overflow.
6295  */
arm_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)6296 __STATIC_FORCEINLINE void arm_park_q31(
6297   q31_t Ialpha,
6298   q31_t Ibeta,
6299   q31_t * pId,
6300   q31_t * pIq,
6301   q31_t sinVal,
6302   q31_t cosVal)
6303   {
6304     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
6305     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
6306 
6307     /* Intermediate product is calculated by (Ialpha * cosVal) */
6308     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
6309 
6310     /* Intermediate product is calculated by (Ibeta * sinVal) */
6311     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
6312 
6313 
6314     /* Intermediate product is calculated by (Ialpha * sinVal) */
6315     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
6316 
6317     /* Intermediate product is calculated by (Ibeta * cosVal) */
6318     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
6319 
6320     /* Calculate pId by adding the two intermediate products 1 and 2 */
6321     *pId = __QADD(product1, product2);
6322 
6323     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
6324     *pIq = __QSUB(product4, product3);
6325   }
6326 
6327   /**
6328    * @} end of park group
6329    */
6330 
6331 
6332   /**
6333    * @ingroup groupController
6334    */
6335 
6336   /**
6337    * @defgroup inv_park Vector Inverse Park transform
6338    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
6339    *
6340    * The function operates on a single sample of data and each call to the function returns the processed output.
6341    * The library provides separate functions for Q31 and floating-point data types.
6342    * \par Algorithm
6343    * \image html parkInvFormula.gif
6344    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
6345    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
6346    * cosine and sine values of theta (rotor flux position).
6347    * \par Fixed-Point Behavior
6348    * Care must be taken when using the Q31 version of the Park transform.
6349    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
6350    * Refer to the function specific documentation below for usage guidelines.
6351    */
6352 
6353   /**
6354    * @addtogroup inv_park
6355    * @{
6356    */
6357 
6358    /**
6359    * @brief  Floating-point Inverse Park transform
6360    * @param[in]  Id       input coordinate of rotor reference frame d
6361    * @param[in]  Iq       input coordinate of rotor reference frame q
6362    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
6363    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
6364    * @param[in]  sinVal   sine value of rotation angle theta
6365    * @param[in]  cosVal   cosine value of rotation angle theta
6366    * @return     none
6367    */
arm_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)6368   __STATIC_FORCEINLINE void arm_inv_park_f32(
6369   float32_t Id,
6370   float32_t Iq,
6371   float32_t * pIalpha,
6372   float32_t * pIbeta,
6373   float32_t sinVal,
6374   float32_t cosVal)
6375   {
6376     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
6377     *pIalpha = Id * cosVal - Iq * sinVal;
6378 
6379     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
6380     *pIbeta = Id * sinVal + Iq * cosVal;
6381   }
6382 
6383 
6384 /**
6385   @brief  Inverse Park transform for   Q31 version
6386   @param[in]  Id       input coordinate of rotor reference frame d
6387   @param[in]  Iq       input coordinate of rotor reference frame q
6388   @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
6389   @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
6390   @param[in]  sinVal   sine value of rotation angle theta
6391   @param[in]  cosVal   cosine value of rotation angle theta
6392   @return     none
6393 
6394   @par Scaling and Overflow Behavior
6395          The function is implemented using an internal 32-bit accumulator.
6396          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
6397          There is saturation on the addition, hence there is no risk of overflow.
6398  */
arm_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)6399 __STATIC_FORCEINLINE void arm_inv_park_q31(
6400   q31_t Id,
6401   q31_t Iq,
6402   q31_t * pIalpha,
6403   q31_t * pIbeta,
6404   q31_t sinVal,
6405   q31_t cosVal)
6406   {
6407     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
6408     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
6409 
6410     /* Intermediate product is calculated by (Id * cosVal) */
6411     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
6412 
6413     /* Intermediate product is calculated by (Iq * sinVal) */
6414     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
6415 
6416 
6417     /* Intermediate product is calculated by (Id * sinVal) */
6418     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
6419 
6420     /* Intermediate product is calculated by (Iq * cosVal) */
6421     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
6422 
6423     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
6424     *pIalpha = __QSUB(product1, product2);
6425 
6426     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
6427     *pIbeta = __QADD(product4, product3);
6428   }
6429 
6430   /**
6431    * @} end of Inverse park group
6432    */
6433 
6434 
6435   /**
6436    * @ingroup groupInterpolation
6437    */
6438 
6439   /**
6440    * @defgroup LinearInterpolate Linear Interpolation
6441    *
6442    * Linear interpolation is a method of curve fitting using linear polynomials.
6443    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
6444    *
6445    * \par
6446    * \image html LinearInterp.gif "Linear interpolation"
6447    *
6448    * \par
6449    * A  Linear Interpolate function calculates an output value(y), for the input(x)
6450    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
6451    *
6452    * \par Algorithm:
6453    * <pre>
6454    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
6455    *       where x0, x1 are nearest values of input x
6456    *             y0, y1 are nearest values to output y
6457    * </pre>
6458    *
6459    * \par
6460    * This set of functions implements Linear interpolation process
6461    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
6462    * sample of data and each call to the function returns a single processed value.
6463    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
6464    * <code>x</code> is the input sample value. The functions returns the output value.
6465    *
6466    * \par
6467    * if x is outside of the table boundary, Linear interpolation returns first value of the table
6468    * if x is below input range and returns last value of table if x is above range.
6469    */
6470 
6471   /**
6472    * @addtogroup LinearInterpolate
6473    * @{
6474    */
6475 
6476   /**
6477    * @brief  Process function for the floating-point Linear Interpolation Function.
6478    * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
6479    * @param[in]     x  input sample to process
6480    * @return y processed output sample.
6481    *
6482    */
arm_linear_interp_f32(arm_linear_interp_instance_f32 * S,float32_t x)6483   __STATIC_FORCEINLINE float32_t arm_linear_interp_f32(
6484   arm_linear_interp_instance_f32 * S,
6485   float32_t x)
6486   {
6487     float32_t y;
6488     float32_t x0, x1;                            /* Nearest input values */
6489     float32_t y0, y1;                            /* Nearest output values */
6490     float32_t xSpacing = S->xSpacing;            /* spacing between input values */
6491     int32_t i;                                   /* Index variable */
6492     float32_t *pYData = S->pYData;               /* pointer to output table */
6493 
6494     /* Calculation of index */
6495     i = (int32_t) ((x - S->x1) / xSpacing);
6496 
6497     if (i < 0)
6498     {
6499       /* Iniatilize output for below specified range as least output value of table */
6500       y = pYData[0];
6501     }
6502     else if ((uint32_t)i >= (S->nValues - 1))
6503     {
6504       /* Iniatilize output for above specified range as last output value of table */
6505       y = pYData[S->nValues - 1];
6506     }
6507     else
6508     {
6509       /* Calculation of nearest input values */
6510       x0 = S->x1 +  i      * xSpacing;
6511       x1 = S->x1 + (i + 1) * xSpacing;
6512 
6513       /* Read of nearest output values */
6514       y0 = pYData[i];
6515       y1 = pYData[i + 1];
6516 
6517       /* Calculation of output */
6518       y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
6519 
6520     }
6521 
6522     /* returns output value */
6523     return (y);
6524   }
6525 
6526 
6527    /**
6528    *
6529    * @brief  Process function for the Q31 Linear Interpolation Function.
6530    * @param[in] pYData   pointer to Q31 Linear Interpolation table
6531    * @param[in] x        input sample to process
6532    * @param[in] nValues  number of table values
6533    * @return y processed output sample.
6534    *
6535    * \par
6536    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
6537    * This function can support maximum of table size 2^12.
6538    *
6539    */
arm_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)6540   __STATIC_FORCEINLINE q31_t arm_linear_interp_q31(
6541   q31_t * pYData,
6542   q31_t x,
6543   uint32_t nValues)
6544   {
6545     q31_t y;                                     /* output */
6546     q31_t y0, y1;                                /* Nearest output values */
6547     q31_t fract;                                 /* fractional part */
6548     int32_t index;                               /* Index to read nearest output values */
6549 
6550     /* Input is in 12.20 format */
6551     /* 12 bits for the table index */
6552     /* Index value calculation */
6553     index = ((x & (q31_t)0xFFF00000) >> 20);
6554 
6555     if (index >= (int32_t)(nValues - 1))
6556     {
6557       return (pYData[nValues - 1]);
6558     }
6559     else if (index < 0)
6560     {
6561       return (pYData[0]);
6562     }
6563     else
6564     {
6565       /* 20 bits for the fractional part */
6566       /* shift left by 11 to keep fract in 1.31 format */
6567       fract = (x & 0x000FFFFF) << 11;
6568 
6569       /* Read two nearest output values from the index in 1.31(q31) format */
6570       y0 = pYData[index];
6571       y1 = pYData[index + 1];
6572 
6573       /* Calculation of y0 * (1-fract) and y is in 2.30 format */
6574       y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
6575 
6576       /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
6577       y += ((q31_t) (((q63_t) y1 * fract) >> 32));
6578 
6579       /* Convert y to 1.31 format */
6580       return (y << 1U);
6581     }
6582   }
6583 
6584 
6585   /**
6586    *
6587    * @brief  Process function for the Q15 Linear Interpolation Function.
6588    * @param[in] pYData   pointer to Q15 Linear Interpolation table
6589    * @param[in] x        input sample to process
6590    * @param[in] nValues  number of table values
6591    * @return y processed output sample.
6592    *
6593    * \par
6594    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
6595    * This function can support maximum of table size 2^12.
6596    *
6597    */
arm_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)6598   __STATIC_FORCEINLINE q15_t arm_linear_interp_q15(
6599   q15_t * pYData,
6600   q31_t x,
6601   uint32_t nValues)
6602   {
6603     q63_t y;                                     /* output */
6604     q15_t y0, y1;                                /* Nearest output values */
6605     q31_t fract;                                 /* fractional part */
6606     int32_t index;                               /* Index to read nearest output values */
6607 
6608     /* Input is in 12.20 format */
6609     /* 12 bits for the table index */
6610     /* Index value calculation */
6611     index = ((x & (int32_t)0xFFF00000) >> 20);
6612 
6613     if (index >= (int32_t)(nValues - 1))
6614     {
6615       return (pYData[nValues - 1]);
6616     }
6617     else if (index < 0)
6618     {
6619       return (pYData[0]);
6620     }
6621     else
6622     {
6623       /* 20 bits for the fractional part */
6624       /* fract is in 12.20 format */
6625       fract = (x & 0x000FFFFF);
6626 
6627       /* Read two nearest output values from the index */
6628       y0 = pYData[index];
6629       y1 = pYData[index + 1];
6630 
6631       /* Calculation of y0 * (1-fract) and y is in 13.35 format */
6632       y = ((q63_t) y0 * (0xFFFFF - fract));
6633 
6634       /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
6635       y += ((q63_t) y1 * (fract));
6636 
6637       /* convert y to 1.15 format */
6638       return (q15_t) (y >> 20);
6639     }
6640   }
6641 
6642 
6643   /**
6644    *
6645    * @brief  Process function for the Q7 Linear Interpolation Function.
6646    * @param[in] pYData   pointer to Q7 Linear Interpolation table
6647    * @param[in] x        input sample to process
6648    * @param[in] nValues  number of table values
6649    * @return y processed output sample.
6650    *
6651    * \par
6652    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
6653    * This function can support maximum of table size 2^12.
6654    */
arm_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)6655   __STATIC_FORCEINLINE q7_t arm_linear_interp_q7(
6656   q7_t * pYData,
6657   q31_t x,
6658   uint32_t nValues)
6659   {
6660     q31_t y;                                     /* output */
6661     q7_t y0, y1;                                 /* Nearest output values */
6662     q31_t fract;                                 /* fractional part */
6663     uint32_t index;                              /* Index to read nearest output values */
6664 
6665     /* Input is in 12.20 format */
6666     /* 12 bits for the table index */
6667     /* Index value calculation */
6668     if (x < 0)
6669     {
6670       return (pYData[0]);
6671     }
6672     index = (x >> 20) & 0xfff;
6673 
6674     if (index >= (nValues - 1))
6675     {
6676       return (pYData[nValues - 1]);
6677     }
6678     else
6679     {
6680       /* 20 bits for the fractional part */
6681       /* fract is in 12.20 format */
6682       fract = (x & 0x000FFFFF);
6683 
6684       /* Read two nearest output values from the index and are in 1.7(q7) format */
6685       y0 = pYData[index];
6686       y1 = pYData[index + 1];
6687 
6688       /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
6689       y = ((y0 * (0xFFFFF - fract)));
6690 
6691       /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
6692       y += (y1 * fract);
6693 
6694       /* convert y to 1.7(q7) format */
6695       return (q7_t) (y >> 20);
6696      }
6697   }
6698 
6699   /**
6700    * @} end of LinearInterpolate group
6701    */
6702 
6703   /**
6704    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
6705    * @param[in] x  input value in radians.
6706    * @return  sin(x).
6707    */
6708   float32_t arm_sin_f32(
6709   float32_t x);
6710 
6711 
6712   /**
6713    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
6714    * @param[in] x  Scaled input value in radians.
6715    * @return  sin(x).
6716    */
6717   q31_t arm_sin_q31(
6718   q31_t x);
6719 
6720 
6721   /**
6722    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
6723    * @param[in] x  Scaled input value in radians.
6724    * @return  sin(x).
6725    */
6726   q15_t arm_sin_q15(
6727   q15_t x);
6728 
6729 
6730   /**
6731    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
6732    * @param[in] x  input value in radians.
6733    * @return  cos(x).
6734    */
6735   float32_t arm_cos_f32(
6736   float32_t x);
6737 
6738 
6739   /**
6740    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
6741    * @param[in] x  Scaled input value in radians.
6742    * @return  cos(x).
6743    */
6744   q31_t arm_cos_q31(
6745   q31_t x);
6746 
6747 
6748   /**
6749    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
6750    * @param[in] x  Scaled input value in radians.
6751    * @return  cos(x).
6752    */
6753   q15_t arm_cos_q15(
6754   q15_t x);
6755 
6756 
6757 /**
6758   @brief         Floating-point vector of log values.
6759   @param[in]     pSrc       points to the input vector
6760   @param[out]    pDst       points to the output vector
6761   @param[in]     blockSize  number of samples in each vector
6762   @return        none
6763  */
6764   void arm_vlog_f32(
6765   const float32_t * pSrc,
6766         float32_t * pDst,
6767         uint32_t blockSize);
6768 
6769 /**
6770   @brief         Floating-point vector of exp values.
6771   @param[in]     pSrc       points to the input vector
6772   @param[out]    pDst       points to the output vector
6773   @param[in]     blockSize  number of samples in each vector
6774   @return        none
6775  */
6776   void arm_vexp_f32(
6777   const float32_t * pSrc,
6778         float32_t * pDst,
6779         uint32_t blockSize);
6780 
6781   /**
6782    * @ingroup groupFastMath
6783    */
6784 
6785 
6786   /**
6787    * @defgroup SQRT Square Root
6788    *
6789    * Computes the square root of a number.
6790    * There are separate functions for Q15, Q31, and floating-point data types.
6791    * The square root function is computed using the Newton-Raphson algorithm.
6792    * This is an iterative algorithm of the form:
6793    * <pre>
6794    *      x1 = x0 - f(x0)/f'(x0)
6795    * </pre>
6796    * where <code>x1</code> is the current estimate,
6797    * <code>x0</code> is the previous estimate, and
6798    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
6799    * For the square root function, the algorithm reduces to:
6800    * <pre>
6801    *     x0 = in/2                         [initial guess]
6802    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
6803    * </pre>
6804    */
6805 
6806 
6807   /**
6808    * @addtogroup SQRT
6809    * @{
6810    */
6811 
6812 /**
6813   @brief         Floating-point square root function.
6814   @param[in]     in    input value
6815   @param[out]    pOut  square root of input value
6816   @return        execution status
6817                    - \ref ARM_MATH_SUCCESS        : input value is positive
6818                    - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
6819  */
arm_sqrt_f32(float32_t in,float32_t * pOut)6820 __STATIC_FORCEINLINE arm_status arm_sqrt_f32(
6821   float32_t in,
6822   float32_t * pOut)
6823   {
6824     if (in >= 0.0f)
6825     {
6826 #if defined ( __CC_ARM )
6827   #if defined __TARGET_FPU_VFP
6828       *pOut = __sqrtf(in);
6829   #else
6830       *pOut = sqrtf(in);
6831   #endif
6832 
6833 #elif defined ( __ICCARM__ )
6834   #if defined __ARMVFP__
6835       __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in));
6836   #else
6837       *pOut = sqrtf(in);
6838   #endif
6839 
6840 #else
6841       *pOut = sqrtf(in);
6842 #endif
6843 
6844       return (ARM_MATH_SUCCESS);
6845     }
6846     else
6847     {
6848       *pOut = 0.0f;
6849       return (ARM_MATH_ARGUMENT_ERROR);
6850     }
6851   }
6852 
6853 
6854 /**
6855   @brief         Q31 square root function.
6856   @param[in]     in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF
6857   @param[out]    pOut  points to square root of input value
6858   @return        execution status
6859                    - \ref ARM_MATH_SUCCESS        : input value is positive
6860                    - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
6861  */
6862 arm_status arm_sqrt_q31(
6863   q31_t in,
6864   q31_t * pOut);
6865 
6866 
6867 /**
6868   @brief         Q15 square root function.
6869   @param[in]     in    input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF
6870   @param[out]    pOut  points to square root of input value
6871   @return        execution status
6872                    - \ref ARM_MATH_SUCCESS        : input value is positive
6873                    - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
6874  */
6875 arm_status arm_sqrt_q15(
6876   q15_t in,
6877   q15_t * pOut);
6878 
6879   /**
6880    * @brief  Vector Floating-point square root function.
6881    * @param[in]  pIn   input vector.
6882    * @param[out] pOut  vector of square roots of input elements.
6883    * @param[in]  len   length of input vector.
6884    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
6885    * <code>in</code> is negative value and returns zero output for negative values.
6886    */
6887   void arm_vsqrt_f32(
6888   float32_t * pIn,
6889   float32_t * pOut,
6890   uint16_t len);
6891 
6892   void arm_vsqrt_q31(
6893   q31_t * pIn,
6894   q31_t * pOut,
6895   uint16_t len);
6896 
6897   void arm_vsqrt_q15(
6898   q15_t * pIn,
6899   q15_t * pOut,
6900   uint16_t len);
6901 
6902   /**
6903    * @} end of SQRT group
6904    */
6905 
6906 
6907   /**
6908    * @brief floating-point Circular write function.
6909    */
arm_circularWrite_f32(int32_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const int32_t * src,int32_t srcInc,uint32_t blockSize)6910   __STATIC_FORCEINLINE void arm_circularWrite_f32(
6911   int32_t * circBuffer,
6912   int32_t L,
6913   uint16_t * writeOffset,
6914   int32_t bufferInc,
6915   const int32_t * src,
6916   int32_t srcInc,
6917   uint32_t blockSize)
6918   {
6919     uint32_t i = 0U;
6920     int32_t wOffset;
6921 
6922     /* Copy the value of Index pointer that points
6923      * to the current location where the input samples to be copied */
6924     wOffset = *writeOffset;
6925 
6926     /* Loop over the blockSize */
6927     i = blockSize;
6928 
6929     while (i > 0U)
6930     {
6931       /* copy the input sample to the circular buffer */
6932       circBuffer[wOffset] = *src;
6933 
6934       /* Update the input pointer */
6935       src += srcInc;
6936 
6937       /* Circularly update wOffset.  Watch out for positive and negative value */
6938       wOffset += bufferInc;
6939       if (wOffset >= L)
6940         wOffset -= L;
6941 
6942       /* Decrement the loop counter */
6943       i--;
6944     }
6945 
6946     /* Update the index pointer */
6947     *writeOffset = (uint16_t)wOffset;
6948   }
6949 
6950 
6951 
6952   /**
6953    * @brief floating-point Circular Read function.
6954    */
arm_circularRead_f32(int32_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,int32_t * dst,int32_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6955   __STATIC_FORCEINLINE void arm_circularRead_f32(
6956   int32_t * circBuffer,
6957   int32_t L,
6958   int32_t * readOffset,
6959   int32_t bufferInc,
6960   int32_t * dst,
6961   int32_t * dst_base,
6962   int32_t dst_length,
6963   int32_t dstInc,
6964   uint32_t blockSize)
6965   {
6966     uint32_t i = 0U;
6967     int32_t rOffset;
6968     int32_t* dst_end;
6969 
6970     /* Copy the value of Index pointer that points
6971      * to the current location from where the input samples to be read */
6972     rOffset = *readOffset;
6973     dst_end = dst_base + dst_length;
6974 
6975     /* Loop over the blockSize */
6976     i = blockSize;
6977 
6978     while (i > 0U)
6979     {
6980       /* copy the sample from the circular buffer to the destination buffer */
6981       *dst = circBuffer[rOffset];
6982 
6983       /* Update the input pointer */
6984       dst += dstInc;
6985 
6986       if (dst == dst_end)
6987       {
6988         dst = dst_base;
6989       }
6990 
6991       /* Circularly update rOffset.  Watch out for positive and negative value  */
6992       rOffset += bufferInc;
6993 
6994       if (rOffset >= L)
6995       {
6996         rOffset -= L;
6997       }
6998 
6999       /* Decrement the loop counter */
7000       i--;
7001     }
7002 
7003     /* Update the index pointer */
7004     *readOffset = rOffset;
7005   }
7006 
7007 
7008   /**
7009    * @brief Q15 Circular write function.
7010    */
arm_circularWrite_q15(q15_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q15_t * src,int32_t srcInc,uint32_t blockSize)7011   __STATIC_FORCEINLINE void arm_circularWrite_q15(
7012   q15_t * circBuffer,
7013   int32_t L,
7014   uint16_t * writeOffset,
7015   int32_t bufferInc,
7016   const q15_t * src,
7017   int32_t srcInc,
7018   uint32_t blockSize)
7019   {
7020     uint32_t i = 0U;
7021     int32_t wOffset;
7022 
7023     /* Copy the value of Index pointer that points
7024      * to the current location where the input samples to be copied */
7025     wOffset = *writeOffset;
7026 
7027     /* Loop over the blockSize */
7028     i = blockSize;
7029 
7030     while (i > 0U)
7031     {
7032       /* copy the input sample to the circular buffer */
7033       circBuffer[wOffset] = *src;
7034 
7035       /* Update the input pointer */
7036       src += srcInc;
7037 
7038       /* Circularly update wOffset.  Watch out for positive and negative value */
7039       wOffset += bufferInc;
7040       if (wOffset >= L)
7041         wOffset -= L;
7042 
7043       /* Decrement the loop counter */
7044       i--;
7045     }
7046 
7047     /* Update the index pointer */
7048     *writeOffset = (uint16_t)wOffset;
7049   }
7050 
7051 
7052   /**
7053    * @brief Q15 Circular Read function.
7054    */
arm_circularRead_q15(q15_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q15_t * dst,q15_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)7055   __STATIC_FORCEINLINE void arm_circularRead_q15(
7056   q15_t * circBuffer,
7057   int32_t L,
7058   int32_t * readOffset,
7059   int32_t bufferInc,
7060   q15_t * dst,
7061   q15_t * dst_base,
7062   int32_t dst_length,
7063   int32_t dstInc,
7064   uint32_t blockSize)
7065   {
7066     uint32_t i = 0;
7067     int32_t rOffset;
7068     q15_t* dst_end;
7069 
7070     /* Copy the value of Index pointer that points
7071      * to the current location from where the input samples to be read */
7072     rOffset = *readOffset;
7073 
7074     dst_end = dst_base + dst_length;
7075 
7076     /* Loop over the blockSize */
7077     i = blockSize;
7078 
7079     while (i > 0U)
7080     {
7081       /* copy the sample from the circular buffer to the destination buffer */
7082       *dst = circBuffer[rOffset];
7083 
7084       /* Update the input pointer */
7085       dst += dstInc;
7086 
7087       if (dst == dst_end)
7088       {
7089         dst = dst_base;
7090       }
7091 
7092       /* Circularly update wOffset.  Watch out for positive and negative value */
7093       rOffset += bufferInc;
7094 
7095       if (rOffset >= L)
7096       {
7097         rOffset -= L;
7098       }
7099 
7100       /* Decrement the loop counter */
7101       i--;
7102     }
7103 
7104     /* Update the index pointer */
7105     *readOffset = rOffset;
7106   }
7107 
7108 
7109   /**
7110    * @brief Q7 Circular write function.
7111    */
arm_circularWrite_q7(q7_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q7_t * src,int32_t srcInc,uint32_t blockSize)7112   __STATIC_FORCEINLINE void arm_circularWrite_q7(
7113   q7_t * circBuffer,
7114   int32_t L,
7115   uint16_t * writeOffset,
7116   int32_t bufferInc,
7117   const q7_t * src,
7118   int32_t srcInc,
7119   uint32_t blockSize)
7120   {
7121     uint32_t i = 0U;
7122     int32_t wOffset;
7123 
7124     /* Copy the value of Index pointer that points
7125      * to the current location where the input samples to be copied */
7126     wOffset = *writeOffset;
7127 
7128     /* Loop over the blockSize */
7129     i = blockSize;
7130 
7131     while (i > 0U)
7132     {
7133       /* copy the input sample to the circular buffer */
7134       circBuffer[wOffset] = *src;
7135 
7136       /* Update the input pointer */
7137       src += srcInc;
7138 
7139       /* Circularly update wOffset.  Watch out for positive and negative value */
7140       wOffset += bufferInc;
7141       if (wOffset >= L)
7142         wOffset -= L;
7143 
7144       /* Decrement the loop counter */
7145       i--;
7146     }
7147 
7148     /* Update the index pointer */
7149     *writeOffset = (uint16_t)wOffset;
7150   }
7151 
7152 
7153   /**
7154    * @brief Q7 Circular Read function.
7155    */
arm_circularRead_q7(q7_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q7_t * dst,q7_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)7156   __STATIC_FORCEINLINE void arm_circularRead_q7(
7157   q7_t * circBuffer,
7158   int32_t L,
7159   int32_t * readOffset,
7160   int32_t bufferInc,
7161   q7_t * dst,
7162   q7_t * dst_base,
7163   int32_t dst_length,
7164   int32_t dstInc,
7165   uint32_t blockSize)
7166   {
7167     uint32_t i = 0;
7168     int32_t rOffset;
7169     q7_t* dst_end;
7170 
7171     /* Copy the value of Index pointer that points
7172      * to the current location from where the input samples to be read */
7173     rOffset = *readOffset;
7174 
7175     dst_end = dst_base + dst_length;
7176 
7177     /* Loop over the blockSize */
7178     i = blockSize;
7179 
7180     while (i > 0U)
7181     {
7182       /* copy the sample from the circular buffer to the destination buffer */
7183       *dst = circBuffer[rOffset];
7184 
7185       /* Update the input pointer */
7186       dst += dstInc;
7187 
7188       if (dst == dst_end)
7189       {
7190         dst = dst_base;
7191       }
7192 
7193       /* Circularly update rOffset.  Watch out for positive and negative value */
7194       rOffset += bufferInc;
7195 
7196       if (rOffset >= L)
7197       {
7198         rOffset -= L;
7199       }
7200 
7201       /* Decrement the loop counter */
7202       i--;
7203     }
7204 
7205     /* Update the index pointer */
7206     *readOffset = rOffset;
7207   }
7208 
7209 
7210   /**
7211    * @brief  Sum of the squares of the elements of a Q31 vector.
7212    * @param[in]  pSrc       is input pointer
7213    * @param[in]  blockSize  is the number of samples to process
7214    * @param[out] pResult    is output value.
7215    */
7216   void arm_power_q31(
7217   const q31_t * pSrc,
7218         uint32_t blockSize,
7219         q63_t * pResult);
7220 
7221 
7222   /**
7223    * @brief  Sum of the squares of the elements of a floating-point vector.
7224    * @param[in]  pSrc       is input pointer
7225    * @param[in]  blockSize  is the number of samples to process
7226    * @param[out] pResult    is output value.
7227    */
7228   void arm_power_f32(
7229   const float32_t * pSrc,
7230         uint32_t blockSize,
7231         float32_t * pResult);
7232 
7233 
7234   /**
7235    * @brief  Sum of the squares of the elements of a Q15 vector.
7236    * @param[in]  pSrc       is input pointer
7237    * @param[in]  blockSize  is the number of samples to process
7238    * @param[out] pResult    is output value.
7239    */
7240   void arm_power_q15(
7241   const q15_t * pSrc,
7242         uint32_t blockSize,
7243         q63_t * pResult);
7244 
7245 
7246   /**
7247    * @brief  Sum of the squares of the elements of a Q7 vector.
7248    * @param[in]  pSrc       is input pointer
7249    * @param[in]  blockSize  is the number of samples to process
7250    * @param[out] pResult    is output value.
7251    */
7252   void arm_power_q7(
7253   const q7_t * pSrc,
7254         uint32_t blockSize,
7255         q31_t * pResult);
7256 
7257 
7258   /**
7259    * @brief  Mean value of a Q7 vector.
7260    * @param[in]  pSrc       is input pointer
7261    * @param[in]  blockSize  is the number of samples to process
7262    * @param[out] pResult    is output value.
7263    */
7264   void arm_mean_q7(
7265   const q7_t * pSrc,
7266         uint32_t blockSize,
7267         q7_t * pResult);
7268 
7269 
7270   /**
7271    * @brief  Mean value of a Q15 vector.
7272    * @param[in]  pSrc       is input pointer
7273    * @param[in]  blockSize  is the number of samples to process
7274    * @param[out] pResult    is output value.
7275    */
7276   void arm_mean_q15(
7277   const q15_t * pSrc,
7278         uint32_t blockSize,
7279         q15_t * pResult);
7280 
7281 
7282   /**
7283    * @brief  Mean value of a Q31 vector.
7284    * @param[in]  pSrc       is input pointer
7285    * @param[in]  blockSize  is the number of samples to process
7286    * @param[out] pResult    is output value.
7287    */
7288   void arm_mean_q31(
7289   const q31_t * pSrc,
7290         uint32_t blockSize,
7291         q31_t * pResult);
7292 
7293 
7294   /**
7295    * @brief  Mean value of a floating-point vector.
7296    * @param[in]  pSrc       is input pointer
7297    * @param[in]  blockSize  is the number of samples to process
7298    * @param[out] pResult    is output value.
7299    */
7300   void arm_mean_f32(
7301   const float32_t * pSrc,
7302         uint32_t blockSize,
7303         float32_t * pResult);
7304 
7305 
7306   /**
7307    * @brief  Variance of the elements of a floating-point vector.
7308    * @param[in]  pSrc       is input pointer
7309    * @param[in]  blockSize  is the number of samples to process
7310    * @param[out] pResult    is output value.
7311    */
7312   void arm_var_f32(
7313   const float32_t * pSrc,
7314         uint32_t blockSize,
7315         float32_t * pResult);
7316 
7317 
7318   /**
7319    * @brief  Variance of the elements of a Q31 vector.
7320    * @param[in]  pSrc       is input pointer
7321    * @param[in]  blockSize  is the number of samples to process
7322    * @param[out] pResult    is output value.
7323    */
7324   void arm_var_q31(
7325   const q31_t * pSrc,
7326         uint32_t blockSize,
7327         q31_t * pResult);
7328 
7329 
7330   /**
7331    * @brief  Variance of the elements of a Q15 vector.
7332    * @param[in]  pSrc       is input pointer
7333    * @param[in]  blockSize  is the number of samples to process
7334    * @param[out] pResult    is output value.
7335    */
7336   void arm_var_q15(
7337   const q15_t * pSrc,
7338         uint32_t blockSize,
7339         q15_t * pResult);
7340 
7341 
7342   /**
7343    * @brief  Root Mean Square of the elements of a floating-point vector.
7344    * @param[in]  pSrc       is input pointer
7345    * @param[in]  blockSize  is the number of samples to process
7346    * @param[out] pResult    is output value.
7347    */
7348   void arm_rms_f32(
7349   const float32_t * pSrc,
7350         uint32_t blockSize,
7351         float32_t * pResult);
7352 
7353 
7354   /**
7355    * @brief  Root Mean Square of the elements of a Q31 vector.
7356    * @param[in]  pSrc       is input pointer
7357    * @param[in]  blockSize  is the number of samples to process
7358    * @param[out] pResult    is output value.
7359    */
7360   void arm_rms_q31(
7361   const q31_t * pSrc,
7362         uint32_t blockSize,
7363         q31_t * pResult);
7364 
7365 
7366   /**
7367    * @brief  Root Mean Square of the elements of a Q15 vector.
7368    * @param[in]  pSrc       is input pointer
7369    * @param[in]  blockSize  is the number of samples to process
7370    * @param[out] pResult    is output value.
7371    */
7372   void arm_rms_q15(
7373   const q15_t * pSrc,
7374         uint32_t blockSize,
7375         q15_t * pResult);
7376 
7377 
7378   /**
7379    * @brief  Standard deviation of the elements of a floating-point vector.
7380    * @param[in]  pSrc       is input pointer
7381    * @param[in]  blockSize  is the number of samples to process
7382    * @param[out] pResult    is output value.
7383    */
7384   void arm_std_f32(
7385   const float32_t * pSrc,
7386         uint32_t blockSize,
7387         float32_t * pResult);
7388 
7389 
7390   /**
7391    * @brief  Standard deviation of the elements of a Q31 vector.
7392    * @param[in]  pSrc       is input pointer
7393    * @param[in]  blockSize  is the number of samples to process
7394    * @param[out] pResult    is output value.
7395    */
7396   void arm_std_q31(
7397   const q31_t * pSrc,
7398         uint32_t blockSize,
7399         q31_t * pResult);
7400 
7401 
7402   /**
7403    * @brief  Standard deviation of the elements of a Q15 vector.
7404    * @param[in]  pSrc       is input pointer
7405    * @param[in]  blockSize  is the number of samples to process
7406    * @param[out] pResult    is output value.
7407    */
7408   void arm_std_q15(
7409   const q15_t * pSrc,
7410         uint32_t blockSize,
7411         q15_t * pResult);
7412 
7413 
7414   /**
7415    * @brief  Floating-point complex magnitude
7416    * @param[in]  pSrc        points to the complex input vector
7417    * @param[out] pDst        points to the real output vector
7418    * @param[in]  numSamples  number of complex samples in the input vector
7419    */
7420   void arm_cmplx_mag_f32(
7421   const float32_t * pSrc,
7422         float32_t * pDst,
7423         uint32_t numSamples);
7424 
7425 
7426   /**
7427    * @brief  Q31 complex magnitude
7428    * @param[in]  pSrc        points to the complex input vector
7429    * @param[out] pDst        points to the real output vector
7430    * @param[in]  numSamples  number of complex samples in the input vector
7431    */
7432   void arm_cmplx_mag_q31(
7433   const q31_t * pSrc,
7434         q31_t * pDst,
7435         uint32_t numSamples);
7436 
7437 
7438   /**
7439    * @brief  Q15 complex magnitude
7440    * @param[in]  pSrc        points to the complex input vector
7441    * @param[out] pDst        points to the real output vector
7442    * @param[in]  numSamples  number of complex samples in the input vector
7443    */
7444   void arm_cmplx_mag_q15(
7445   const q15_t * pSrc,
7446         q15_t * pDst,
7447         uint32_t numSamples);
7448 
7449 
7450   /**
7451    * @brief  Q15 complex dot product
7452    * @param[in]  pSrcA       points to the first input vector
7453    * @param[in]  pSrcB       points to the second input vector
7454    * @param[in]  numSamples  number of complex samples in each vector
7455    * @param[out] realResult  real part of the result returned here
7456    * @param[out] imagResult  imaginary part of the result returned here
7457    */
7458   void arm_cmplx_dot_prod_q15(
7459   const q15_t * pSrcA,
7460   const q15_t * pSrcB,
7461         uint32_t numSamples,
7462         q31_t * realResult,
7463         q31_t * imagResult);
7464 
7465 
7466   /**
7467    * @brief  Q31 complex dot product
7468    * @param[in]  pSrcA       points to the first input vector
7469    * @param[in]  pSrcB       points to the second input vector
7470    * @param[in]  numSamples  number of complex samples in each vector
7471    * @param[out] realResult  real part of the result returned here
7472    * @param[out] imagResult  imaginary part of the result returned here
7473    */
7474   void arm_cmplx_dot_prod_q31(
7475   const q31_t * pSrcA,
7476   const q31_t * pSrcB,
7477         uint32_t numSamples,
7478         q63_t * realResult,
7479         q63_t * imagResult);
7480 
7481 
7482   /**
7483    * @brief  Floating-point complex dot product
7484    * @param[in]  pSrcA       points to the first input vector
7485    * @param[in]  pSrcB       points to the second input vector
7486    * @param[in]  numSamples  number of complex samples in each vector
7487    * @param[out] realResult  real part of the result returned here
7488    * @param[out] imagResult  imaginary part of the result returned here
7489    */
7490   void arm_cmplx_dot_prod_f32(
7491   const float32_t * pSrcA,
7492   const float32_t * pSrcB,
7493         uint32_t numSamples,
7494         float32_t * realResult,
7495         float32_t * imagResult);
7496 
7497 
7498   /**
7499    * @brief  Q15 complex-by-real multiplication
7500    * @param[in]  pSrcCmplx   points to the complex input vector
7501    * @param[in]  pSrcReal    points to the real input vector
7502    * @param[out] pCmplxDst   points to the complex output vector
7503    * @param[in]  numSamples  number of samples in each vector
7504    */
7505   void arm_cmplx_mult_real_q15(
7506   const q15_t * pSrcCmplx,
7507   const q15_t * pSrcReal,
7508         q15_t * pCmplxDst,
7509         uint32_t numSamples);
7510 
7511 
7512   /**
7513    * @brief  Q31 complex-by-real multiplication
7514    * @param[in]  pSrcCmplx   points to the complex input vector
7515    * @param[in]  pSrcReal    points to the real input vector
7516    * @param[out] pCmplxDst   points to the complex output vector
7517    * @param[in]  numSamples  number of samples in each vector
7518    */
7519   void arm_cmplx_mult_real_q31(
7520   const q31_t * pSrcCmplx,
7521   const q31_t * pSrcReal,
7522         q31_t * pCmplxDst,
7523         uint32_t numSamples);
7524 
7525 
7526   /**
7527    * @brief  Floating-point complex-by-real multiplication
7528    * @param[in]  pSrcCmplx   points to the complex input vector
7529    * @param[in]  pSrcReal    points to the real input vector
7530    * @param[out] pCmplxDst   points to the complex output vector
7531    * @param[in]  numSamples  number of samples in each vector
7532    */
7533   void arm_cmplx_mult_real_f32(
7534   const float32_t * pSrcCmplx,
7535   const float32_t * pSrcReal,
7536         float32_t * pCmplxDst,
7537         uint32_t numSamples);
7538 
7539 
7540   /**
7541    * @brief  Minimum value of a Q7 vector.
7542    * @param[in]  pSrc       is input pointer
7543    * @param[in]  blockSize  is the number of samples to process
7544    * @param[out] result     is output pointer
7545    * @param[in]  index      is the array index of the minimum value in the input buffer.
7546    */
7547   void arm_min_q7(
7548   const q7_t * pSrc,
7549         uint32_t blockSize,
7550         q7_t * result,
7551         uint32_t * index);
7552 
7553 
7554   /**
7555    * @brief  Minimum value of a Q15 vector.
7556    * @param[in]  pSrc       is input pointer
7557    * @param[in]  blockSize  is the number of samples to process
7558    * @param[out] pResult    is output pointer
7559    * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
7560    */
7561   void arm_min_q15(
7562   const q15_t * pSrc,
7563         uint32_t blockSize,
7564         q15_t * pResult,
7565         uint32_t * pIndex);
7566 
7567 
7568   /**
7569    * @brief  Minimum value of a Q31 vector.
7570    * @param[in]  pSrc       is input pointer
7571    * @param[in]  blockSize  is the number of samples to process
7572    * @param[out] pResult    is output pointer
7573    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
7574    */
7575   void arm_min_q31(
7576   const q31_t * pSrc,
7577         uint32_t blockSize,
7578         q31_t * pResult,
7579         uint32_t * pIndex);
7580 
7581 
7582   /**
7583    * @brief  Minimum value of a floating-point vector.
7584    * @param[in]  pSrc       is input pointer
7585    * @param[in]  blockSize  is the number of samples to process
7586    * @param[out] pResult    is output pointer
7587    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
7588    */
7589   void arm_min_f32(
7590   const float32_t * pSrc,
7591         uint32_t blockSize,
7592         float32_t * pResult,
7593         uint32_t * pIndex);
7594 
7595 
7596 /**
7597  * @brief Maximum value of a Q7 vector.
7598  * @param[in]  pSrc       points to the input buffer
7599  * @param[in]  blockSize  length of the input vector
7600  * @param[out] pResult    maximum value returned here
7601  * @param[out] pIndex     index of maximum value returned here
7602  */
7603   void arm_max_q7(
7604   const q7_t * pSrc,
7605         uint32_t blockSize,
7606         q7_t * pResult,
7607         uint32_t * pIndex);
7608 
7609 
7610 /**
7611  * @brief Maximum value of a Q15 vector.
7612  * @param[in]  pSrc       points to the input buffer
7613  * @param[in]  blockSize  length of the input vector
7614  * @param[out] pResult    maximum value returned here
7615  * @param[out] pIndex     index of maximum value returned here
7616  */
7617   void arm_max_q15(
7618   const q15_t * pSrc,
7619         uint32_t blockSize,
7620         q15_t * pResult,
7621         uint32_t * pIndex);
7622 
7623 
7624 /**
7625  * @brief Maximum value of a Q31 vector.
7626  * @param[in]  pSrc       points to the input buffer
7627  * @param[in]  blockSize  length of the input vector
7628  * @param[out] pResult    maximum value returned here
7629  * @param[out] pIndex     index of maximum value returned here
7630  */
7631   void arm_max_q31(
7632   const q31_t * pSrc,
7633         uint32_t blockSize,
7634         q31_t * pResult,
7635         uint32_t * pIndex);
7636 
7637 
7638 /**
7639  * @brief Maximum value of a floating-point vector.
7640  * @param[in]  pSrc       points to the input buffer
7641  * @param[in]  blockSize  length of the input vector
7642  * @param[out] pResult    maximum value returned here
7643  * @param[out] pIndex     index of maximum value returned here
7644  */
7645   void arm_max_f32(
7646   const float32_t * pSrc,
7647         uint32_t blockSize,
7648         float32_t * pResult,
7649         uint32_t * pIndex);
7650 
7651   /**
7652     @brief         Maximum value of a floating-point vector.
7653     @param[in]     pSrc       points to the input vector
7654     @param[in]     blockSize  number of samples in input vector
7655     @param[out]    pResult    maximum value returned here
7656     @return        none
7657    */
7658   void arm_max_no_idx_f32(
7659       const float32_t *pSrc,
7660       uint32_t   blockSize,
7661       float32_t *pResult);
7662 
7663   /**
7664    * @brief  Q15 complex-by-complex multiplication
7665    * @param[in]  pSrcA       points to the first input vector
7666    * @param[in]  pSrcB       points to the second input vector
7667    * @param[out] pDst        points to the output vector
7668    * @param[in]  numSamples  number of complex samples in each vector
7669    */
7670   void arm_cmplx_mult_cmplx_q15(
7671   const q15_t * pSrcA,
7672   const q15_t * pSrcB,
7673         q15_t * pDst,
7674         uint32_t numSamples);
7675 
7676 
7677   /**
7678    * @brief  Q31 complex-by-complex multiplication
7679    * @param[in]  pSrcA       points to the first input vector
7680    * @param[in]  pSrcB       points to the second input vector
7681    * @param[out] pDst        points to the output vector
7682    * @param[in]  numSamples  number of complex samples in each vector
7683    */
7684   void arm_cmplx_mult_cmplx_q31(
7685   const q31_t * pSrcA,
7686   const q31_t * pSrcB,
7687         q31_t * pDst,
7688         uint32_t numSamples);
7689 
7690 
7691   /**
7692    * @brief  Floating-point complex-by-complex multiplication
7693    * @param[in]  pSrcA       points to the first input vector
7694    * @param[in]  pSrcB       points to the second input vector
7695    * @param[out] pDst        points to the output vector
7696    * @param[in]  numSamples  number of complex samples in each vector
7697    */
7698   void arm_cmplx_mult_cmplx_f32(
7699   const float32_t * pSrcA,
7700   const float32_t * pSrcB,
7701         float32_t * pDst,
7702         uint32_t numSamples);
7703 
7704 
7705   /**
7706    * @brief Converts the elements of the floating-point vector to Q31 vector.
7707    * @param[in]  pSrc       points to the floating-point input vector
7708    * @param[out] pDst       points to the Q31 output vector
7709    * @param[in]  blockSize  length of the input vector
7710    */
7711   void arm_float_to_q31(
7712   const float32_t * pSrc,
7713         q31_t * pDst,
7714         uint32_t blockSize);
7715 
7716 
7717   /**
7718    * @brief Converts the elements of the floating-point vector to Q15 vector.
7719    * @param[in]  pSrc       points to the floating-point input vector
7720    * @param[out] pDst       points to the Q15 output vector
7721    * @param[in]  blockSize  length of the input vector
7722    */
7723   void arm_float_to_q15(
7724   const float32_t * pSrc,
7725         q15_t * pDst,
7726         uint32_t blockSize);
7727 
7728 
7729   /**
7730    * @brief Converts the elements of the floating-point vector to Q7 vector.
7731    * @param[in]  pSrc       points to the floating-point input vector
7732    * @param[out] pDst       points to the Q7 output vector
7733    * @param[in]  blockSize  length of the input vector
7734    */
7735   void arm_float_to_q7(
7736   const float32_t * pSrc,
7737         q7_t * pDst,
7738         uint32_t blockSize);
7739 
7740 
7741   /**
7742    * @brief  Converts the elements of the Q31 vector to floating-point vector.
7743    * @param[in]  pSrc       is input pointer
7744    * @param[out] pDst       is output pointer
7745    * @param[in]  blockSize  is the number of samples to process
7746    */
7747   void arm_q31_to_float(
7748   const q31_t * pSrc,
7749         float32_t * pDst,
7750         uint32_t blockSize);
7751 
7752 
7753   /**
7754    * @brief  Converts the elements of the Q31 vector to Q15 vector.
7755    * @param[in]  pSrc       is input pointer
7756    * @param[out] pDst       is output pointer
7757    * @param[in]  blockSize  is the number of samples to process
7758    */
7759   void arm_q31_to_q15(
7760   const q31_t * pSrc,
7761         q15_t * pDst,
7762         uint32_t blockSize);
7763 
7764 
7765   /**
7766    * @brief  Converts the elements of the Q31 vector to Q7 vector.
7767    * @param[in]  pSrc       is input pointer
7768    * @param[out] pDst       is output pointer
7769    * @param[in]  blockSize  is the number of samples to process
7770    */
7771   void arm_q31_to_q7(
7772   const q31_t * pSrc,
7773         q7_t * pDst,
7774         uint32_t blockSize);
7775 
7776 
7777   /**
7778    * @brief  Converts the elements of the Q15 vector to floating-point vector.
7779    * @param[in]  pSrc       is input pointer
7780    * @param[out] pDst       is output pointer
7781    * @param[in]  blockSize  is the number of samples to process
7782    */
7783   void arm_q15_to_float(
7784   const q15_t * pSrc,
7785         float32_t * pDst,
7786         uint32_t blockSize);
7787 
7788 
7789   /**
7790    * @brief  Converts the elements of the Q15 vector to Q31 vector.
7791    * @param[in]  pSrc       is input pointer
7792    * @param[out] pDst       is output pointer
7793    * @param[in]  blockSize  is the number of samples to process
7794    */
7795   void arm_q15_to_q31(
7796   const q15_t * pSrc,
7797         q31_t * pDst,
7798         uint32_t blockSize);
7799 
7800 
7801   /**
7802    * @brief  Converts the elements of the Q15 vector to Q7 vector.
7803    * @param[in]  pSrc       is input pointer
7804    * @param[out] pDst       is output pointer
7805    * @param[in]  blockSize  is the number of samples to process
7806    */
7807   void arm_q15_to_q7(
7808   const q15_t * pSrc,
7809         q7_t * pDst,
7810         uint32_t blockSize);
7811 
7812 
7813   /**
7814    * @brief  Converts the elements of the Q7 vector to floating-point vector.
7815    * @param[in]  pSrc       is input pointer
7816    * @param[out] pDst       is output pointer
7817    * @param[in]  blockSize  is the number of samples to process
7818    */
7819   void arm_q7_to_float(
7820   const q7_t * pSrc,
7821         float32_t * pDst,
7822         uint32_t blockSize);
7823 
7824 
7825   /**
7826    * @brief  Converts the elements of the Q7 vector to Q31 vector.
7827    * @param[in]  pSrc       input pointer
7828    * @param[out] pDst       output pointer
7829    * @param[in]  blockSize  number of samples to process
7830    */
7831   void arm_q7_to_q31(
7832   const q7_t * pSrc,
7833         q31_t * pDst,
7834         uint32_t blockSize);
7835 
7836 
7837   /**
7838    * @brief  Converts the elements of the Q7 vector to Q15 vector.
7839    * @param[in]  pSrc       input pointer
7840    * @param[out] pDst       output pointer
7841    * @param[in]  blockSize  number of samples to process
7842    */
7843   void arm_q7_to_q15(
7844   const q7_t * pSrc,
7845         q15_t * pDst,
7846         uint32_t blockSize);
7847 
7848 /**
7849  * @brief Struct for specifying SVM Kernel
7850  */
7851 typedef enum
7852 {
7853     ARM_ML_KERNEL_LINEAR = 0,
7854              /**< Linear kernel */
7855     ARM_ML_KERNEL_POLYNOMIAL = 1,
7856              /**< Polynomial kernel */
7857     ARM_ML_KERNEL_RBF = 2,
7858              /**< Radial Basis Function kernel */
7859     ARM_ML_KERNEL_SIGMOID = 3
7860              /**< Sigmoid kernel */
7861 } arm_ml_kernel_type;
7862 
7863 
7864 /**
7865  * @brief Instance structure for linear SVM prediction function.
7866  */
7867 typedef struct
7868 {
7869   uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
7870   uint32_t        vectorDimension;        /**< Dimension of vector space */
7871   float32_t       intercept;              /**< Intercept */
7872   const float32_t *dualCoefficients;      /**< Dual coefficients */
7873   const float32_t *supportVectors;        /**< Support vectors */
7874   const int32_t   *classes;               /**< The two SVM classes */
7875 } arm_svm_linear_instance_f32;
7876 
7877 
7878 /**
7879  * @brief Instance structure for polynomial SVM prediction function.
7880  */
7881 typedef struct
7882 {
7883   uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
7884   uint32_t        vectorDimension;        /**< Dimension of vector space */
7885   float32_t       intercept;              /**< Intercept */
7886   const float32_t *dualCoefficients;      /**< Dual coefficients */
7887   const float32_t *supportVectors;        /**< Support vectors */
7888   const int32_t   *classes;               /**< The two SVM classes */
7889   int32_t         degree;                 /**< Polynomial degree */
7890   float32_t       coef0;                  /**< Polynomial constant */
7891   float32_t       gamma;                  /**< Gamma factor */
7892 } arm_svm_polynomial_instance_f32;
7893 
7894 /**
7895  * @brief Instance structure for rbf SVM prediction function.
7896  */
7897 typedef struct
7898 {
7899   uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
7900   uint32_t        vectorDimension;        /**< Dimension of vector space */
7901   float32_t       intercept;              /**< Intercept */
7902   const float32_t *dualCoefficients;      /**< Dual coefficients */
7903   const float32_t *supportVectors;        /**< Support vectors */
7904   const int32_t   *classes;               /**< The two SVM classes */
7905   float32_t       gamma;                  /**< Gamma factor */
7906 } arm_svm_rbf_instance_f32;
7907 
7908 /**
7909  * @brief Instance structure for sigmoid SVM prediction function.
7910  */
7911 typedef struct
7912 {
7913   uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
7914   uint32_t        vectorDimension;        /**< Dimension of vector space */
7915   float32_t       intercept;              /**< Intercept */
7916   const float32_t *dualCoefficients;      /**< Dual coefficients */
7917   const float32_t *supportVectors;        /**< Support vectors */
7918   const int32_t   *classes;               /**< The two SVM classes */
7919   float32_t       coef0;                  /**< Independant constant */
7920   float32_t       gamma;                  /**< Gamma factor */
7921 } arm_svm_sigmoid_instance_f32;
7922 
7923 /**
7924  * @brief        SVM linear instance init function
7925  * @param[in]    S                      Parameters for SVM functions
7926  * @param[in]    nbOfSupportVectors     Number of support vectors
7927  * @param[in]    vectorDimension        Dimension of vector space
7928  * @param[in]    intercept              Intercept
7929  * @param[in]    dualCoefficients       Array of dual coefficients
7930  * @param[in]    supportVectors         Array of support vectors
7931  * @param[in]    classes                Array of 2 classes ID
7932  * @return none.
7933  *
7934  */
7935 
7936 
7937 void arm_svm_linear_init_f32(arm_svm_linear_instance_f32 *S,
7938   uint32_t nbOfSupportVectors,
7939   uint32_t vectorDimension,
7940   float32_t intercept,
7941   const float32_t *dualCoefficients,
7942   const float32_t *supportVectors,
7943   const int32_t  *classes);
7944 
7945 /**
7946  * @brief SVM linear prediction
7947  * @param[in]    S          Pointer to an instance of the linear SVM structure.
7948  * @param[in]    in         Pointer to input vector
7949  * @param[out]   pResult    Decision value
7950  * @return none.
7951  *
7952  */
7953 
7954 void arm_svm_linear_predict_f32(const arm_svm_linear_instance_f32 *S,
7955    const float32_t * in,
7956    int32_t * pResult);
7957 
7958 
7959 /**
7960  * @brief        SVM polynomial instance init function
7961  * @param[in]    S                      points to an instance of the polynomial SVM structure.
7962  * @param[in]    nbOfSupportVectors     Number of support vectors
7963  * @param[in]    vectorDimension        Dimension of vector space
7964  * @param[in]    intercept              Intercept
7965  * @param[in]    dualCoefficients       Array of dual coefficients
7966  * @param[in]    supportVectors         Array of support vectors
7967  * @param[in]    classes                Array of 2 classes ID
7968  * @param[in]    degree                 Polynomial degree
7969  * @param[in]    coef0                  coeff0 (scikit-learn terminology)
7970  * @param[in]    gamma                  gamma (scikit-learn terminology)
7971  * @return none.
7972  *
7973  */
7974 
7975 
7976 void arm_svm_polynomial_init_f32(arm_svm_polynomial_instance_f32 *S,
7977   uint32_t nbOfSupportVectors,
7978   uint32_t vectorDimension,
7979   float32_t intercept,
7980   const float32_t *dualCoefficients,
7981   const float32_t *supportVectors,
7982   const int32_t   *classes,
7983   int32_t      degree,
7984   float32_t coef0,
7985   float32_t gamma
7986   );
7987 
7988 /**
7989  * @brief SVM polynomial prediction
7990  * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
7991  * @param[in]    in         Pointer to input vector
7992  * @param[out]   pResult    Decision value
7993  * @return none.
7994  *
7995  */
7996 void arm_svm_polynomial_predict_f32(const arm_svm_polynomial_instance_f32 *S,
7997    const float32_t * in,
7998    int32_t * pResult);
7999 
8000 
8001 /**
8002  * @brief        SVM radial basis function instance init function
8003  * @param[in]    S                      points to an instance of the polynomial SVM structure.
8004  * @param[in]    nbOfSupportVectors     Number of support vectors
8005  * @param[in]    vectorDimension        Dimension of vector space
8006  * @param[in]    intercept              Intercept
8007  * @param[in]    dualCoefficients       Array of dual coefficients
8008  * @param[in]    supportVectors         Array of support vectors
8009  * @param[in]    classes                Array of 2 classes ID
8010  * @param[in]    gamma                  gamma (scikit-learn terminology)
8011  * @return none.
8012  *
8013  */
8014 
8015 void arm_svm_rbf_init_f32(arm_svm_rbf_instance_f32 *S,
8016   uint32_t nbOfSupportVectors,
8017   uint32_t vectorDimension,
8018   float32_t intercept,
8019   const float32_t *dualCoefficients,
8020   const float32_t *supportVectors,
8021   const int32_t   *classes,
8022   float32_t gamma
8023   );
8024 
8025 /**
8026  * @brief SVM rbf prediction
8027  * @param[in]    S         Pointer to an instance of the rbf SVM structure.
8028  * @param[in]    in        Pointer to input vector
8029  * @param[out]   pResult   decision value
8030  * @return none.
8031  *
8032  */
8033 void arm_svm_rbf_predict_f32(const arm_svm_rbf_instance_f32 *S,
8034    const float32_t * in,
8035    int32_t * pResult);
8036 
8037 /**
8038  * @brief        SVM sigmoid instance init function
8039  * @param[in]    S                      points to an instance of the rbf SVM structure.
8040  * @param[in]    nbOfSupportVectors     Number of support vectors
8041  * @param[in]    vectorDimension        Dimension of vector space
8042  * @param[in]    intercept              Intercept
8043  * @param[in]    dualCoefficients       Array of dual coefficients
8044  * @param[in]    supportVectors         Array of support vectors
8045  * @param[in]    classes                Array of 2 classes ID
8046  * @param[in]    coef0                  coeff0 (scikit-learn terminology)
8047  * @param[in]    gamma                  gamma (scikit-learn terminology)
8048  * @return none.
8049  *
8050  */
8051 
8052 void arm_svm_sigmoid_init_f32(arm_svm_sigmoid_instance_f32 *S,
8053   uint32_t nbOfSupportVectors,
8054   uint32_t vectorDimension,
8055   float32_t intercept,
8056   const float32_t *dualCoefficients,
8057   const float32_t *supportVectors,
8058   const int32_t   *classes,
8059   float32_t coef0,
8060   float32_t gamma
8061   );
8062 
8063 /**
8064  * @brief SVM sigmoid prediction
8065  * @param[in]    S        Pointer to an instance of the rbf SVM structure.
8066  * @param[in]    in       Pointer to input vector
8067  * @param[out]   pResult  Decision value
8068  * @return none.
8069  *
8070  */
8071 void arm_svm_sigmoid_predict_f32(const arm_svm_sigmoid_instance_f32 *S,
8072    const float32_t * in,
8073    int32_t * pResult);
8074 
8075 
8076 
8077 /**
8078  * @brief Instance structure for Naive Gaussian Bayesian estimator.
8079  */
8080 typedef struct
8081 {
8082   uint32_t vectorDimension;  /**< Dimension of vector space */
8083   uint32_t numberOfClasses;  /**< Number of different classes  */
8084   const float32_t *theta;          /**< Mean values for the Gaussians */
8085   const float32_t *sigma;          /**< Variances for the Gaussians */
8086   const float32_t *classPriors;    /**< Class prior probabilities */
8087   float32_t epsilon;         /**< Additive value to variances */
8088 } arm_gaussian_naive_bayes_instance_f32;
8089 
8090 /**
8091  * @brief Naive Gaussian Bayesian Estimator
8092  *
8093  * @param[in]  S         points to a naive bayes instance structure
8094  * @param[in]  in        points to the elements of the input vector.
8095  * @param[in]  pBuffer   points to a buffer of length numberOfClasses
8096  * @return The predicted class
8097  *
8098  */
8099 
8100 
8101 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S,
8102    const float32_t * in,
8103    float32_t *pBuffer);
8104 
8105 /**
8106  * @brief Computation of the LogSumExp
8107  *
8108  * In probabilistic computations, the dynamic of the probability values can be very
8109  * wide because they come from gaussian functions.
8110  * To avoid underflow and overflow issues, the values are represented by their log.
8111  * In this representation, multiplying the original exp values is easy : their logs are added.
8112  * But adding the original exp values is requiring some special handling and it is the
8113  * goal of the LogSumExp function.
8114  *
8115  * If the values are x1...xn, the function is computing:
8116  *
8117  * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
8118  * rounding issues are minimised.
8119  *
8120  * The max xm of the values is extracted and the function is computing:
8121  * xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
8122  *
8123  * @param[in]  *in         Pointer to an array of input values.
8124  * @param[in]  blockSize   Number of samples in the input array.
8125  * @return LogSumExp
8126  *
8127  */
8128 
8129 
8130 float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize);
8131 
8132 /**
8133  * @brief Dot product with log arithmetic
8134  *
8135  * Vectors are containing the log of the samples
8136  *
8137  * @param[in]       pSrcA points to the first input vector
8138  * @param[in]       pSrcB points to the second input vector
8139  * @param[in]       blockSize number of samples in each vector
8140  * @param[in]       pTmpBuffer temporary buffer of length blockSize
8141  * @return The log of the dot product .
8142  *
8143  */
8144 
8145 
8146 float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA,
8147   const float32_t * pSrcB,
8148   uint32_t blockSize,
8149   float32_t *pTmpBuffer);
8150 
8151 /**
8152  * @brief Entropy
8153  *
8154  * @param[in]  pSrcA        Array of input values.
8155  * @param[in]  blockSize    Number of samples in the input array.
8156  * @return     Entropy      -Sum(p ln p)
8157  *
8158  */
8159 
8160 
8161 float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize);
8162 
8163 
8164 /**
8165  * @brief Entropy
8166  *
8167  * @param[in]  pSrcA        Array of input values.
8168  * @param[in]  blockSize    Number of samples in the input array.
8169  * @return     Entropy      -Sum(p ln p)
8170  *
8171  */
8172 
8173 
8174 float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize);
8175 
8176 
8177 /**
8178  * @brief Kullback-Leibler
8179  *
8180  * @param[in]  pSrcA         Pointer to an array of input values for probability distribution A.
8181  * @param[in]  pSrcB         Pointer to an array of input values for probability distribution B.
8182  * @param[in]  blockSize     Number of samples in the input array.
8183  * @return Kullback-Leibler  Divergence D(A || B)
8184  *
8185  */
8186 float32_t arm_kullback_leibler_f32(const float32_t * pSrcA
8187   ,const float32_t * pSrcB
8188   ,uint32_t blockSize);
8189 
8190 
8191 /**
8192  * @brief Kullback-Leibler
8193  *
8194  * @param[in]  pSrcA         Pointer to an array of input values for probability distribution A.
8195  * @param[in]  pSrcB         Pointer to an array of input values for probability distribution B.
8196  * @param[in]  blockSize     Number of samples in the input array.
8197  * @return Kullback-Leibler  Divergence D(A || B)
8198  *
8199  */
8200 float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
8201                 const float64_t * pSrcB,
8202                 uint32_t blockSize);
8203 
8204 
8205 /**
8206  * @brief Weighted sum
8207  *
8208  *
8209  * @param[in]    *in           Array of input values.
8210  * @param[in]    *weigths      Weights
8211  * @param[in]    blockSize     Number of samples in the input array.
8212  * @return Weighted sum
8213  *
8214  */
8215 float32_t arm_weighted_sum_f32(const float32_t *in
8216   , const float32_t *weigths
8217   , uint32_t blockSize);
8218 
8219 
8220 /**
8221  * @brief Barycenter
8222  *
8223  *
8224  * @param[in]    in         List of vectors
8225  * @param[in]    weights    Weights of the vectors
8226  * @param[out]   out        Barycenter
8227  * @param[in]    nbVectors  Number of vectors
8228  * @param[in]    vecDim     Dimension of space (vector dimension)
8229  * @return       None
8230  *
8231  */
8232 void arm_barycenter_f32(const float32_t *in
8233   , const float32_t *weights
8234   , float32_t *out
8235   , uint32_t nbVectors
8236   , uint32_t vecDim);
8237 
8238 /**
8239  * @brief        Euclidean distance between two vectors
8240  * @param[in]    pA         First vector
8241  * @param[in]    pB         Second vector
8242  * @param[in]    blockSize  vector length
8243  * @return distance
8244  *
8245  */
8246 
8247 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
8248 
8249 /**
8250  * @brief        Bray-Curtis distance between two vectors
8251  * @param[in]    pA         First vector
8252  * @param[in]    pB         Second vector
8253  * @param[in]    blockSize  vector length
8254  * @return distance
8255  *
8256  */
8257 float32_t arm_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
8258 
8259 /**
8260  * @brief        Canberra distance between two vectors
8261  *
8262  * This function may divide by zero when samples pA[i] and pB[i] are both zero.
8263  * The result of the computation will be correct. So the division per zero may be
8264  * ignored.
8265  *
8266  * @param[in]    pA         First vector
8267  * @param[in]    pB         Second vector
8268  * @param[in]    blockSize  vector length
8269  * @return distance
8270  *
8271  */
8272 float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
8273 
8274 
8275 /**
8276  * @brief        Chebyshev distance between two vectors
8277  * @param[in]    pA         First vector
8278  * @param[in]    pB         Second vector
8279  * @param[in]    blockSize  vector length
8280  * @return distance
8281  *
8282  */
8283 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
8284 
8285 
8286 /**
8287  * @brief        Cityblock (Manhattan) distance between two vectors
8288  * @param[in]    pA         First vector
8289  * @param[in]    pB         Second vector
8290  * @param[in]    blockSize  vector length
8291  * @return distance
8292  *
8293  */
8294 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
8295 
8296 /**
8297  * @brief        Correlation distance between two vectors
8298  *
8299  * The input vectors are modified in place !
8300  *
8301  * @param[in]    pA         First vector
8302  * @param[in]    pB         Second vector
8303  * @param[in]    blockSize  vector length
8304  * @return distance
8305  *
8306  */
8307 float32_t arm_correlation_distance_f32(float32_t *pA,float32_t *pB, uint32_t blockSize);
8308 
8309 /**
8310  * @brief        Cosine distance between two vectors
8311  *
8312  * @param[in]    pA         First vector
8313  * @param[in]    pB         Second vector
8314  * @param[in]    blockSize  vector length
8315  * @return distance
8316  *
8317  */
8318 
8319 float32_t arm_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
8320 
8321 /**
8322  * @brief        Jensen-Shannon distance between two vectors
8323  *
8324  * This function is assuming that elements of second vector are > 0
8325  * and 0 only when the corresponding element of first vector is 0.
8326  * Otherwise the result of the computation does not make sense
8327  * and for speed reasons, the cases returning NaN or Infinity are not
8328  * managed.
8329  *
8330  * When the function is computing x log (x / y) with x 0 and y 0,
8331  * it will compute the right value (0) but a division per zero will occur
8332  * and shoudl be ignored in client code.
8333  *
8334  * @param[in]    pA         First vector
8335  * @param[in]    pB         Second vector
8336  * @param[in]    blockSize  vector length
8337  * @return distance
8338  *
8339  */
8340 
8341 float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB,uint32_t blockSize);
8342 
8343 /**
8344  * @brief        Minkowski distance between two vectors
8345  *
8346  * @param[in]    pA         First vector
8347  * @param[in]    pB         Second vector
8348  * @param[in]    n          Norm order (>= 2)
8349  * @param[in]    blockSize  vector length
8350  * @return distance
8351  *
8352  */
8353 
8354 
8355 
8356 float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize);
8357 
8358 /**
8359  * @brief        Dice distance between two vectors
8360  *
8361  * @param[in]    pA              First vector of packed booleans
8362  * @param[in]    pB              Second vector of packed booleans
8363  * @param[in]    order           Distance order
8364  * @param[in]    blockSize       Number of samples
8365  * @return distance
8366  *
8367  */
8368 
8369 
8370 float32_t arm_dice_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8371 
8372 /**
8373  * @brief        Hamming distance between two vectors
8374  *
8375  * @param[in]    pA              First vector of packed booleans
8376  * @param[in]    pB              Second vector of packed booleans
8377  * @param[in]    numberOfBools   Number of booleans
8378  * @return distance
8379  *
8380  */
8381 
8382 float32_t arm_hamming_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8383 
8384 /**
8385  * @brief        Jaccard distance between two vectors
8386  *
8387  * @param[in]    pA              First vector of packed booleans
8388  * @param[in]    pB              Second vector of packed booleans
8389  * @param[in]    numberOfBools   Number of booleans
8390  * @return distance
8391  *
8392  */
8393 
8394 float32_t arm_jaccard_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8395 
8396 /**
8397  * @brief        Kulsinski distance between two vectors
8398  *
8399  * @param[in]    pA              First vector of packed booleans
8400  * @param[in]    pB              Second vector of packed booleans
8401  * @param[in]    numberOfBools   Number of booleans
8402  * @return distance
8403  *
8404  */
8405 
8406 float32_t arm_kulsinski_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8407 
8408 /**
8409  * @brief        Roger Stanimoto distance between two vectors
8410  *
8411  * @param[in]    pA              First vector of packed booleans
8412  * @param[in]    pB              Second vector of packed booleans
8413  * @param[in]    numberOfBools   Number of booleans
8414  * @return distance
8415  *
8416  */
8417 
8418 float32_t arm_rogerstanimoto_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8419 
8420 /**
8421  * @brief        Russell-Rao distance between two vectors
8422  *
8423  * @param[in]    pA              First vector of packed booleans
8424  * @param[in]    pB              Second vector of packed booleans
8425  * @param[in]    numberOfBools   Number of booleans
8426  * @return distance
8427  *
8428  */
8429 
8430 float32_t arm_russellrao_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8431 
8432 /**
8433  * @brief        Sokal-Michener distance between two vectors
8434  *
8435  * @param[in]    pA              First vector of packed booleans
8436  * @param[in]    pB              Second vector of packed booleans
8437  * @param[in]    numberOfBools   Number of booleans
8438  * @return distance
8439  *
8440  */
8441 
8442 float32_t arm_sokalmichener_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8443 
8444 /**
8445  * @brief        Sokal-Sneath distance between two vectors
8446  *
8447  * @param[in]    pA              First vector of packed booleans
8448  * @param[in]    pB              Second vector of packed booleans
8449  * @param[in]    numberOfBools   Number of booleans
8450  * @return distance
8451  *
8452  */
8453 
8454 float32_t arm_sokalsneath_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8455 
8456 /**
8457  * @brief        Yule distance between two vectors
8458  *
8459  * @param[in]    pA              First vector of packed booleans
8460  * @param[in]    pB              Second vector of packed booleans
8461  * @param[in]    numberOfBools   Number of booleans
8462  * @return distance
8463  *
8464  */
8465 
8466 float32_t arm_yule_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
8467 
8468 
8469   /**
8470    * @ingroup groupInterpolation
8471    */
8472 
8473   /**
8474    * @defgroup BilinearInterpolate Bilinear Interpolation
8475    *
8476    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
8477    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
8478    * determines values between the grid points.
8479    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
8480    * Bilinear interpolation is often used in image processing to rescale images.
8481    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
8482    *
8483    * <b>Algorithm</b>
8484    * \par
8485    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
8486    * For floating-point, the instance structure is defined as:
8487    * <pre>
8488    *   typedef struct
8489    *   {
8490    *     uint16_t numRows;
8491    *     uint16_t numCols;
8492    *     float32_t *pData;
8493    * } arm_bilinear_interp_instance_f32;
8494    * </pre>
8495    *
8496    * \par
8497    * where <code>numRows</code> specifies the number of rows in the table;
8498    * <code>numCols</code> specifies the number of columns in the table;
8499    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
8500    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
8501    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
8502    *
8503    * \par
8504    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
8505    * <pre>
8506    *     XF = floor(x)
8507    *     YF = floor(y)
8508    * </pre>
8509    * \par
8510    * The interpolated output point is computed as:
8511    * <pre>
8512    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
8513    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
8514    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
8515    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
8516    * </pre>
8517    * Note that the coordinates (x, y) contain integer and fractional components.
8518    * The integer components specify which portion of the table to use while the
8519    * fractional components control the interpolation processor.
8520    *
8521    * \par
8522    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
8523    */
8524 
8525 
8526   /**
8527    * @addtogroup BilinearInterpolate
8528    * @{
8529    */
8530 
8531   /**
8532   * @brief  Floating-point bilinear interpolation.
8533   * @param[in,out] S  points to an instance of the interpolation structure.
8534   * @param[in]     X  interpolation coordinate.
8535   * @param[in]     Y  interpolation coordinate.
8536   * @return out interpolated value.
8537   */
arm_bilinear_interp_f32(const arm_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)8538   __STATIC_FORCEINLINE float32_t arm_bilinear_interp_f32(
8539   const arm_bilinear_interp_instance_f32 * S,
8540   float32_t X,
8541   float32_t Y)
8542   {
8543     float32_t out;
8544     float32_t f00, f01, f10, f11;
8545     float32_t *pData = S->pData;
8546     int32_t xIndex, yIndex, index;
8547     float32_t xdiff, ydiff;
8548     float32_t b1, b2, b3, b4;
8549 
8550     xIndex = (int32_t) X;
8551     yIndex = (int32_t) Y;
8552 
8553     /* Care taken for table outside boundary */
8554     /* Returns zero output when values are outside table boundary */
8555     if (xIndex < 0 || xIndex > (S->numCols - 2) || yIndex < 0 || yIndex > (S->numRows - 2))
8556     {
8557       return (0);
8558     }
8559 
8560     /* Calculation of index for two nearest points in X-direction */
8561     index = (xIndex ) + (yIndex ) * S->numCols;
8562 
8563 
8564     /* Read two nearest points in X-direction */
8565     f00 = pData[index];
8566     f01 = pData[index + 1];
8567 
8568     /* Calculation of index for two nearest points in Y-direction */
8569     index = (xIndex ) + (yIndex+1) * S->numCols;
8570 
8571 
8572     /* Read two nearest points in Y-direction */
8573     f10 = pData[index];
8574     f11 = pData[index + 1];
8575 
8576     /* Calculation of intermediate values */
8577     b1 = f00;
8578     b2 = f01 - f00;
8579     b3 = f10 - f00;
8580     b4 = f00 - f01 - f10 + f11;
8581 
8582     /* Calculation of fractional part in X */
8583     xdiff = X - xIndex;
8584 
8585     /* Calculation of fractional part in Y */
8586     ydiff = Y - yIndex;
8587 
8588     /* Calculation of bi-linear interpolated output */
8589     out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
8590 
8591     /* return to application */
8592     return (out);
8593   }
8594 
8595 
8596   /**
8597   * @brief  Q31 bilinear interpolation.
8598   * @param[in,out] S  points to an instance of the interpolation structure.
8599   * @param[in]     X  interpolation coordinate in 12.20 format.
8600   * @param[in]     Y  interpolation coordinate in 12.20 format.
8601   * @return out interpolated value.
8602   */
arm_bilinear_interp_q31(arm_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)8603   __STATIC_FORCEINLINE q31_t arm_bilinear_interp_q31(
8604   arm_bilinear_interp_instance_q31 * S,
8605   q31_t X,
8606   q31_t Y)
8607   {
8608     q31_t out;                                   /* Temporary output */
8609     q31_t acc = 0;                               /* output */
8610     q31_t xfract, yfract;                        /* X, Y fractional parts */
8611     q31_t x1, x2, y1, y2;                        /* Nearest output values */
8612     int32_t rI, cI;                              /* Row and column indices */
8613     q31_t *pYData = S->pData;                    /* pointer to output table values */
8614     uint32_t nCols = S->numCols;                 /* num of rows */
8615 
8616     /* Input is in 12.20 format */
8617     /* 12 bits for the table index */
8618     /* Index value calculation */
8619     rI = ((X & (q31_t)0xFFF00000) >> 20);
8620 
8621     /* Input is in 12.20 format */
8622     /* 12 bits for the table index */
8623     /* Index value calculation */
8624     cI = ((Y & (q31_t)0xFFF00000) >> 20);
8625 
8626     /* Care taken for table outside boundary */
8627     /* Returns zero output when values are outside table boundary */
8628     if (rI < 0 || rI > (S->numCols - 2) || cI < 0 || cI > (S->numRows - 2))
8629     {
8630       return (0);
8631     }
8632 
8633     /* 20 bits for the fractional part */
8634     /* shift left xfract by 11 to keep 1.31 format */
8635     xfract = (X & 0x000FFFFF) << 11U;
8636 
8637     /* Read two nearest output values from the index */
8638     x1 = pYData[(rI) + (int32_t)nCols * (cI)    ];
8639     x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1];
8640 
8641     /* 20 bits for the fractional part */
8642     /* shift left yfract by 11 to keep 1.31 format */
8643     yfract = (Y & 0x000FFFFF) << 11U;
8644 
8645     /* Read two nearest output values from the index */
8646     y1 = pYData[(rI) + (int32_t)nCols * (cI + 1)    ];
8647     y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1];
8648 
8649     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
8650     out = ((q31_t) (((q63_t) x1  * (0x7FFFFFFF - xfract)) >> 32));
8651     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
8652 
8653     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
8654     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
8655     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
8656 
8657     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
8658     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
8659     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
8660 
8661     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
8662     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
8663     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
8664 
8665     /* Convert acc to 1.31(q31) format */
8666     return ((q31_t)(acc << 2));
8667   }
8668 
8669 
8670   /**
8671   * @brief  Q15 bilinear interpolation.
8672   * @param[in,out] S  points to an instance of the interpolation structure.
8673   * @param[in]     X  interpolation coordinate in 12.20 format.
8674   * @param[in]     Y  interpolation coordinate in 12.20 format.
8675   * @return out interpolated value.
8676   */
arm_bilinear_interp_q15(arm_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)8677   __STATIC_FORCEINLINE q15_t arm_bilinear_interp_q15(
8678   arm_bilinear_interp_instance_q15 * S,
8679   q31_t X,
8680   q31_t Y)
8681   {
8682     q63_t acc = 0;                               /* output */
8683     q31_t out;                                   /* Temporary output */
8684     q15_t x1, x2, y1, y2;                        /* Nearest output values */
8685     q31_t xfract, yfract;                        /* X, Y fractional parts */
8686     int32_t rI, cI;                              /* Row and column indices */
8687     q15_t *pYData = S->pData;                    /* pointer to output table values */
8688     uint32_t nCols = S->numCols;                 /* num of rows */
8689 
8690     /* Input is in 12.20 format */
8691     /* 12 bits for the table index */
8692     /* Index value calculation */
8693     rI = ((X & (q31_t)0xFFF00000) >> 20);
8694 
8695     /* Input is in 12.20 format */
8696     /* 12 bits for the table index */
8697     /* Index value calculation */
8698     cI = ((Y & (q31_t)0xFFF00000) >> 20);
8699 
8700     /* Care taken for table outside boundary */
8701     /* Returns zero output when values are outside table boundary */
8702     if (rI < 0 || rI > (S->numCols - 2) || cI < 0 || cI > (S->numRows - 2))
8703     {
8704       return (0);
8705     }
8706 
8707     /* 20 bits for the fractional part */
8708     /* xfract should be in 12.20 format */
8709     xfract = (X & 0x000FFFFF);
8710 
8711     /* Read two nearest output values from the index */
8712     x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
8713     x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
8714 
8715     /* 20 bits for the fractional part */
8716     /* yfract should be in 12.20 format */
8717     yfract = (Y & 0x000FFFFF);
8718 
8719     /* Read two nearest output values from the index */
8720     y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
8721     y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
8722 
8723     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
8724 
8725     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
8726     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
8727     out = (q31_t) (((q63_t) x1 * (0x0FFFFF - xfract)) >> 4U);
8728     acc = ((q63_t) out * (0x0FFFFF - yfract));
8729 
8730     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
8731     out = (q31_t) (((q63_t) x2 * (0x0FFFFF - yfract)) >> 4U);
8732     acc += ((q63_t) out * (xfract));
8733 
8734     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
8735     out = (q31_t) (((q63_t) y1 * (0x0FFFFF - xfract)) >> 4U);
8736     acc += ((q63_t) out * (yfract));
8737 
8738     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
8739     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4U);
8740     acc += ((q63_t) out * (yfract));
8741 
8742     /* acc is in 13.51 format and down shift acc by 36 times */
8743     /* Convert out to 1.15 format */
8744     return ((q15_t)(acc >> 36));
8745   }
8746 
8747 
8748   /**
8749   * @brief  Q7 bilinear interpolation.
8750   * @param[in,out] S  points to an instance of the interpolation structure.
8751   * @param[in]     X  interpolation coordinate in 12.20 format.
8752   * @param[in]     Y  interpolation coordinate in 12.20 format.
8753   * @return out interpolated value.
8754   */
arm_bilinear_interp_q7(arm_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)8755   __STATIC_FORCEINLINE q7_t arm_bilinear_interp_q7(
8756   arm_bilinear_interp_instance_q7 * S,
8757   q31_t X,
8758   q31_t Y)
8759   {
8760     q63_t acc = 0;                               /* output */
8761     q31_t out;                                   /* Temporary output */
8762     q31_t xfract, yfract;                        /* X, Y fractional parts */
8763     q7_t x1, x2, y1, y2;                         /* Nearest output values */
8764     int32_t rI, cI;                              /* Row and column indices */
8765     q7_t *pYData = S->pData;                     /* pointer to output table values */
8766     uint32_t nCols = S->numCols;                 /* num of rows */
8767 
8768     /* Input is in 12.20 format */
8769     /* 12 bits for the table index */
8770     /* Index value calculation */
8771     rI = ((X & (q31_t)0xFFF00000) >> 20);
8772 
8773     /* Input is in 12.20 format */
8774     /* 12 bits for the table index */
8775     /* Index value calculation */
8776     cI = ((Y & (q31_t)0xFFF00000) >> 20);
8777 
8778     /* Care taken for table outside boundary */
8779     /* Returns zero output when values are outside table boundary */
8780     if (rI < 0 || rI > (S->numCols - 2) || cI < 0 || cI > (S->numRows - 2))
8781     {
8782       return (0);
8783     }
8784 
8785     /* 20 bits for the fractional part */
8786     /* xfract should be in 12.20 format */
8787     xfract = (X & (q31_t)0x000FFFFF);
8788 
8789     /* Read two nearest output values from the index */
8790     x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
8791     x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
8792 
8793     /* 20 bits for the fractional part */
8794     /* yfract should be in 12.20 format */
8795     yfract = (Y & (q31_t)0x000FFFFF);
8796 
8797     /* Read two nearest output values from the index */
8798     y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
8799     y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
8800 
8801     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
8802     out = ((x1 * (0xFFFFF - xfract)));
8803     acc = (((q63_t) out * (0xFFFFF - yfract)));
8804 
8805     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
8806     out = ((x2 * (0xFFFFF - yfract)));
8807     acc += (((q63_t) out * (xfract)));
8808 
8809     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
8810     out = ((y1 * (0xFFFFF - xfract)));
8811     acc += (((q63_t) out * (yfract)));
8812 
8813     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
8814     out = ((y2 * (yfract)));
8815     acc += (((q63_t) out * (xfract)));
8816 
8817     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
8818     return ((q7_t)(acc >> 40));
8819   }
8820 
8821   /**
8822    * @} end of BilinearInterpolate group
8823    */
8824 
8825 
8826 /* SMMLAR */
8827 #define multAcc_32x32_keep32_R(a, x, y) \
8828     a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
8829 
8830 /* SMMLSR */
8831 #define multSub_32x32_keep32_R(a, x, y) \
8832     a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
8833 
8834 /* SMMULR */
8835 #define mult_32x32_keep32_R(a, x, y) \
8836     a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
8837 
8838 /* SMMLA */
8839 #define multAcc_32x32_keep32(a, x, y) \
8840     a += (q31_t) (((q63_t) x * y) >> 32)
8841 
8842 /* SMMLS */
8843 #define multSub_32x32_keep32(a, x, y) \
8844     a -= (q31_t) (((q63_t) x * y) >> 32)
8845 
8846 /* SMMUL */
8847 #define mult_32x32_keep32(a, x, y) \
8848     a = (q31_t) (((q63_t) x * y ) >> 32)
8849 
8850 
8851 #if   defined ( __CC_ARM )
8852   /* Enter low optimization region - place directly above function definition */
8853   #if defined( __ARM_ARCH_7EM__ )
8854     #define LOW_OPTIMIZATION_ENTER \
8855        _Pragma ("push")         \
8856        _Pragma ("O1")
8857   #else
8858     #define LOW_OPTIMIZATION_ENTER
8859   #endif
8860 
8861   /* Exit low optimization region - place directly after end of function definition */
8862   #if defined ( __ARM_ARCH_7EM__ )
8863     #define LOW_OPTIMIZATION_EXIT \
8864        _Pragma ("pop")
8865   #else
8866     #define LOW_OPTIMIZATION_EXIT
8867   #endif
8868 
8869   /* Enter low optimization region - place directly above function definition */
8870   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
8871 
8872   /* Exit low optimization region - place directly after end of function definition */
8873   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
8874 
8875 #elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
8876   #define LOW_OPTIMIZATION_ENTER
8877   #define LOW_OPTIMIZATION_EXIT
8878   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
8879   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
8880 
8881 #elif defined ( __GNUC__ )
8882   #define LOW_OPTIMIZATION_ENTER \
8883        __attribute__(( optimize("-O1") ))
8884   #define LOW_OPTIMIZATION_EXIT
8885   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
8886   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
8887 
8888 #elif defined ( __ICCARM__ )
8889   /* Enter low optimization region - place directly above function definition */
8890   #if defined ( __ARM_ARCH_7EM__ )
8891     #define LOW_OPTIMIZATION_ENTER \
8892        _Pragma ("optimize=low")
8893   #else
8894     #define LOW_OPTIMIZATION_ENTER
8895   #endif
8896 
8897   /* Exit low optimization region - place directly after end of function definition */
8898   #define LOW_OPTIMIZATION_EXIT
8899 
8900   /* Enter low optimization region - place directly above function definition */
8901   #if defined ( __ARM_ARCH_7EM__ )
8902     #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
8903        _Pragma ("optimize=low")
8904   #else
8905     #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
8906   #endif
8907 
8908   /* Exit low optimization region - place directly after end of function definition */
8909   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
8910 
8911 #elif defined ( __TI_ARM__ )
8912   #define LOW_OPTIMIZATION_ENTER
8913   #define LOW_OPTIMIZATION_EXIT
8914   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
8915   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
8916 
8917 #elif defined ( __CSMC__ )
8918   #define LOW_OPTIMIZATION_ENTER
8919   #define LOW_OPTIMIZATION_EXIT
8920   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
8921   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
8922 
8923 #elif defined ( __TASKING__ )
8924   #define LOW_OPTIMIZATION_ENTER
8925   #define LOW_OPTIMIZATION_EXIT
8926   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
8927   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
8928 
8929 #elif defined ( _MSC_VER ) || defined(__GNUC_PYTHON__)
8930       #define LOW_OPTIMIZATION_ENTER
8931       #define LOW_OPTIMIZATION_EXIT
8932       #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
8933       #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
8934 #endif
8935 
8936 
8937 
8938 /* Compiler specific diagnostic adjustment */
8939 #if   defined ( __CC_ARM )
8940 
8941 #elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
8942 
8943 #elif defined ( __GNUC__ )
8944 #pragma GCC diagnostic pop
8945 
8946 #elif defined ( __ICCARM__ )
8947 
8948 #elif defined ( __TI_ARM__ )
8949 
8950 #elif defined ( __CSMC__ )
8951 
8952 #elif defined ( __TASKING__ )
8953 
8954 #elif defined ( _MSC_VER )
8955 
8956 #else
8957   #error Unknown compiler
8958 #endif
8959 
8960 #ifdef   __cplusplus
8961 }
8962 #endif
8963 
8964 
8965 #endif /* _ARM_MATH_H */
8966 
8967 /**
8968  *
8969  * End of file.
8970  */
8971