1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mse_q7.c
4 * Description: Mean square error between two Q7 vectors
5 *
6 * $Date: 04 April 2022
7 * $Revision: V1.10.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 /**
32 @ingroup groupStats
33 */
34
35 /**
36 @defgroup MSE Mean Square Error
37
38 Calculates the mean square error between two vectors.
39
40 */
41
42 /**
43 @addtogroup MSE
44 @{
45 */
46
47 /**
48 @brief Mean square error between two Q7 vectors.
49 @param[in] pSrcA points to the first input vector
50 @param[in] pSrcB points to the second input vector
51 @param[in] blockSize number of samples in input vector
52 @param[out] pResult mean square error
53 @return none
54 */
55 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mse_q7(const q7_t * pSrcA,const q7_t * pSrcB,uint32_t blockSize,q7_t * pResult)56 void arm_mse_q7(
57 const q7_t * pSrcA,
58 const q7_t * pSrcB,
59 uint32_t blockSize,
60 q7_t * pResult)
61 {
62 uint32_t blkCnt; /* loop counters */
63 q7x16_t vecSrcA,vecSrcB;
64 q31_t sum = 0LL;
65
66 /* Compute 16 outputs at a time */
67 blkCnt = blockSize >> 4U;
68 while (blkCnt > 0U)
69 {
70 vecSrcA = vld1q(pSrcA);
71 vecSrcB = vld1q(pSrcB);
72
73 vecSrcA = vshrq(vecSrcA,1);
74 vecSrcB = vshrq(vecSrcB,1);
75
76 vecSrcA = vqsubq(vecSrcA,vecSrcB);
77 /*
78 * sum lanes
79 */
80 sum = vmladavaq(sum, vecSrcA, vecSrcA);
81
82 blkCnt--;
83 pSrcA += 16;
84 pSrcB += 16;
85 }
86
87 /*
88 * tail
89 */
90 blkCnt = blockSize & 0xF;
91 if (blkCnt > 0U)
92 {
93 mve_pred16_t p0 = vctp8q(blkCnt);
94 vecSrcA = vld1q(pSrcA);
95 vecSrcB = vld1q(pSrcB);
96
97 vecSrcA = vshrq(vecSrcA,1);
98 vecSrcB = vshrq(vecSrcB,1);
99
100 vecSrcA = vqsubq(vecSrcA,vecSrcB);
101
102 sum = vmladavaq_p(sum, vecSrcA, vecSrcA, p0);
103 }
104
105 *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);
106 }
107 #else
arm_mse_q7(const q7_t * pSrcA,const q7_t * pSrcB,uint32_t blockSize,q7_t * pResult)108 void arm_mse_q7(
109 const q7_t * pSrcA,
110 const q7_t * pSrcB,
111 uint32_t blockSize,
112 q7_t * pResult)
113 {
114 uint32_t blkCnt; /* Loop counter */
115 q31_t sum = 0; /* Temporary result storage */
116 q7_t inA,inB; /* Temporary variable to store input value */
117
118
119 #if defined (ARM_MATH_LOOPUNROLL)
120
121 /* Loop unrolling: Compute 4 outputs at a time */
122 blkCnt = blockSize >> 2U;
123
124 while (blkCnt > 0U)
125 {
126 inA = *pSrcA++ >> 1;
127 inB = *pSrcB++ >> 1;
128 inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
129 sum += ((q15_t) inA * inA);
130
131 inA = *pSrcA++ >> 1;
132 inB = *pSrcB++ >> 1;
133 inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
134 sum += ((q15_t) inA * inA);
135
136 inA = *pSrcA++ >> 1;
137 inB = *pSrcB++ >> 1;
138 inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
139 sum += ((q15_t) inA * inA);
140
141 inA = *pSrcA++ >> 1;
142 inB = *pSrcB++ >> 1;
143 inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
144 sum += ((q15_t) inA * inA);
145
146 /* Decrement loop counter */
147 blkCnt--;
148 }
149
150 /* Loop unrolling: Compute remaining outputs */
151 blkCnt = blockSize % 0x4U;
152
153 #else
154
155 /* Initialize blkCnt with number of samples */
156 blkCnt = blockSize;
157
158 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
159
160 while (blkCnt > 0U)
161 {
162 inA = *pSrcA++ >> 1;
163 inB = *pSrcB++ >> 1;
164
165 inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
166 sum += ((q15_t) inA * inA);
167
168 /* Decrement loop counter */
169 blkCnt--;
170 }
171
172 /* Store result in q7 format */
173 *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);;
174 }
175 #endif /* defined(ARM_MATH_MVEI) */
176
177 /**
178 @} end of MSE group
179 */
180