• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mat_solve_upper_triangular_f32.c
4  * Description:  Solve linear system UT X = A with UT upper triangular matrix
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/matrix_functions.h"
30 
31 
32 /**
33   @ingroup groupMatrix
34  */
35 
36 
37 /**
38   @addtogroup MatrixInv
39   @{
40  */
41 
42 /**
43    * @brief Solve UT . X = A where UT is an upper triangular matrix
44    * @param[in]  ut  The upper triangular matrix
45    * @param[in]  a  The matrix a
46    * @param[out] dst The solution X of UT . X = A
47    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
48   */
49 
50 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
52 #include "arm_helium_utils.h"
53 
arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)54   arm_status arm_mat_solve_upper_triangular_f32(
55   const arm_matrix_instance_f32 * ut,
56   const arm_matrix_instance_f32 * a,
57   arm_matrix_instance_f32 * dst)
58   {
59 arm_status status;                             /* status of matrix inverse */
60 
61 
62 #ifdef ARM_MATH_MATRIX_CHECK
63 
64   /* Check for matrix mismatch condition */
65   if ((ut->numRows != ut->numCols) ||
66       (a->numRows != a->numCols) ||
67       (ut->numRows != a->numRows)   )
68   {
69     /* Set status as ARM_MATH_SIZE_MISMATCH */
70     status = ARM_MATH_SIZE_MISMATCH;
71   }
72   else
73 
74 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
75 
76   {
77 
78     int i,j,k,n;
79 
80     n = dst->numRows;
81 
82     float32_t *pX = dst->pData;
83     float32_t *pUT = ut->pData;
84     float32_t *pA = a->pData;
85 
86     float32_t *ut_row;
87     float32_t *a_col;
88 
89     float32_t invUT;
90 
91     f32x4_t vecA;
92     f32x4_t vecX;
93 
94     for(i=n-1; i >= 0 ; i--)
95     {
96       for(j=0; j+3 < n; j +=4)
97       {
98             vecA = vld1q_f32(&pA[i * n + j]);
99 
100             for(k=n-1; k > i; k--)
101             {
102                 vecX = vld1q_f32(&pX[n*k+j]);
103                 vecA = vfmsq(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
104             }
105 
106             if (pUT[n*i + i]==0.0f)
107             {
108               return(ARM_MATH_SINGULAR);
109             }
110 
111             invUT = 1.0f / pUT[n*i + i];
112             vecA = vmulq(vecA,vdupq_n_f32(invUT));
113 
114 
115             vst1q(&pX[i*n+j],vecA);
116       }
117 
118       for(; j < n; j ++)
119       {
120             a_col = &pA[j];
121 
122             ut_row = &pUT[n*i];
123 
124             float32_t tmp=a_col[i * n];
125 
126             for(k=n-1; k > i; k--)
127             {
128                 tmp -= ut_row[k] * pX[n*k+j];
129             }
130 
131             if (ut_row[i]==0.0f)
132             {
133               return(ARM_MATH_SINGULAR);
134             }
135             tmp = tmp / ut_row[i];
136             pX[i*n+j] = tmp;
137        }
138 
139     }
140     status = ARM_MATH_SUCCESS;
141 
142   }
143 
144 
145   /* Return to application */
146   return (status);
147 }
148 
149 #else
150 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)151   arm_status arm_mat_solve_upper_triangular_f32(
152   const arm_matrix_instance_f32 * ut,
153   const arm_matrix_instance_f32 * a,
154   arm_matrix_instance_f32 * dst)
155   {
156 arm_status status;                             /* status of matrix inverse */
157 
158 
159 #ifdef ARM_MATH_MATRIX_CHECK
160 
161   /* Check for matrix mismatch condition */
162   if ((ut->numRows != ut->numCols) ||
163       (a->numRows != a->numCols) ||
164       (ut->numRows != a->numRows)   )
165   {
166     /* Set status as ARM_MATH_SIZE_MISMATCH */
167     status = ARM_MATH_SIZE_MISMATCH;
168   }
169   else
170 
171 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
172 
173   {
174 
175     int i,j,k,n;
176 
177     n = dst->numRows;
178 
179     float32_t *pX = dst->pData;
180     float32_t *pUT = ut->pData;
181     float32_t *pA = a->pData;
182 
183     float32_t *ut_row;
184     float32_t *a_col;
185 
186     float32_t invUT;
187 
188     f32x4_t vecA;
189     f32x4_t vecX;
190 
191     for(i=n-1; i >= 0 ; i--)
192     {
193       for(j=0; j+3 < n; j +=4)
194       {
195             vecA = vld1q_f32(&pA[i * n + j]);
196 
197             for(k=n-1; k > i; k--)
198             {
199                 vecX = vld1q_f32(&pX[n*k+j]);
200                 vecA = vfmsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
201             }
202 
203             if (pUT[n*i + i]==0.0f)
204             {
205               return(ARM_MATH_SINGULAR);
206             }
207 
208             invUT = 1.0f / pUT[n*i + i];
209             vecA = vmulq_f32(vecA,vdupq_n_f32(invUT));
210 
211 
212             vst1q_f32(&pX[i*n+j],vecA);
213       }
214 
215       for(; j < n; j ++)
216       {
217             a_col = &pA[j];
218 
219             ut_row = &pUT[n*i];
220 
221             float32_t tmp=a_col[i * n];
222 
223             for(k=n-1; k > i; k--)
224             {
225                 tmp -= ut_row[k] * pX[n*k+j];
226             }
227 
228             if (ut_row[i]==0.0f)
229             {
230               return(ARM_MATH_SINGULAR);
231             }
232             tmp = tmp / ut_row[i];
233             pX[i*n+j] = tmp;
234        }
235 
236     }
237     status = ARM_MATH_SUCCESS;
238 
239   }
240 
241 
242   /* Return to application */
243   return (status);
244 }
245 
246 #else
arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)247   arm_status arm_mat_solve_upper_triangular_f32(
248   const arm_matrix_instance_f32 * ut,
249   const arm_matrix_instance_f32 * a,
250   arm_matrix_instance_f32 * dst)
251   {
252 arm_status status;                             /* status of matrix inverse */
253 
254 
255 #ifdef ARM_MATH_MATRIX_CHECK
256 
257   /* Check for matrix mismatch condition */
258   if ((ut->numRows != ut->numCols) ||
259       (a->numRows != a->numCols) ||
260       (ut->numRows != a->numRows)   )
261   {
262     /* Set status as ARM_MATH_SIZE_MISMATCH */
263     status = ARM_MATH_SIZE_MISMATCH;
264   }
265   else
266 
267 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
268 
269   {
270 
271     int i,j,k,n;
272 
273     n = dst->numRows;
274 
275     float32_t *pX = dst->pData;
276     float32_t *pUT = ut->pData;
277     float32_t *pA = a->pData;
278 
279     float32_t *ut_row;
280     float32_t *a_col;
281 
282     for(j=0; j < n; j ++)
283     {
284        a_col = &pA[j];
285 
286        for(i=n-1; i >= 0 ; i--)
287        {
288             ut_row = &pUT[n*i];
289 
290             float32_t tmp=a_col[i * n];
291 
292             for(k=n-1; k > i; k--)
293             {
294                 tmp -= ut_row[k] * pX[n*k+j];
295             }
296 
297             if (ut_row[i]==0.0f)
298             {
299               return(ARM_MATH_SINGULAR);
300             }
301             tmp = tmp / ut_row[i];
302             pX[i*n+j] = tmp;
303        }
304 
305     }
306     status = ARM_MATH_SUCCESS;
307 
308   }
309 
310 
311   /* Return to application */
312   return (status);
313 }
314 #endif /* #if defined(ARM_MATH_NEON) */
315 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
316 
317 /**
318   @} end of MatrixInv group
319  */
320