1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44 #include <limits>
45
46 #if defined _M_IX86 && defined _MSC_VER && _MSC_VER < 1700
47 #pragma float_control(precise, on)
48 #endif
49
50 namespace cv
51 {
52
LU(float * A,size_t astep,int m,float * b,size_t bstep,int n)53 int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
54 {
55 return hal::LU(A, astep, m, b, bstep, n);
56 }
57
LU(double * A,size_t astep,int m,double * b,size_t bstep,int n)58 int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
59 {
60 return hal::LU(A, astep, m, b, bstep, n);
61 }
62
Cholesky(float * A,size_t astep,int m,float * b,size_t bstep,int n)63 bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
64 {
65 return hal::Cholesky(A, astep, m, b, bstep, n);
66 }
67
Cholesky(double * A,size_t astep,int m,double * b,size_t bstep,int n)68 bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
69 {
70 return hal::Cholesky(A, astep, m, b, bstep, n);
71 }
72
hypot(_Tp a,_Tp b)73 template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b)
74 {
75 a = std::abs(a);
76 b = std::abs(b);
77 if( a > b )
78 {
79 b /= a;
80 return a*std::sqrt(1 + b*b);
81 }
82 if( b > 0 )
83 {
84 a /= b;
85 return b*std::sqrt(1 + a*a);
86 }
87 return 0;
88 }
89
90
91 template<typename _Tp> bool
JacobiImpl_(_Tp * A,size_t astep,_Tp * W,_Tp * V,size_t vstep,int n,uchar * buf)92 JacobiImpl_( _Tp* A, size_t astep, _Tp* W, _Tp* V, size_t vstep, int n, uchar* buf )
93 {
94 const _Tp eps = std::numeric_limits<_Tp>::epsilon();
95 int i, j, k, m;
96
97 astep /= sizeof(A[0]);
98 if( V )
99 {
100 vstep /= sizeof(V[0]);
101 for( i = 0; i < n; i++ )
102 {
103 for( j = 0; j < n; j++ )
104 V[i*vstep + j] = (_Tp)0;
105 V[i*vstep + i] = (_Tp)1;
106 }
107 }
108
109 int iters, maxIters = n*n*30;
110
111 int* indR = (int*)alignPtr(buf, sizeof(int));
112 int* indC = indR + n;
113 _Tp mv = (_Tp)0;
114
115 for( k = 0; k < n; k++ )
116 {
117 W[k] = A[(astep + 1)*k];
118 if( k < n - 1 )
119 {
120 for( m = k+1, mv = std::abs(A[astep*k + m]), i = k+2; i < n; i++ )
121 {
122 _Tp val = std::abs(A[astep*k+i]);
123 if( mv < val )
124 mv = val, m = i;
125 }
126 indR[k] = m;
127 }
128 if( k > 0 )
129 {
130 for( m = 0, mv = std::abs(A[k]), i = 1; i < k; i++ )
131 {
132 _Tp val = std::abs(A[astep*i+k]);
133 if( mv < val )
134 mv = val, m = i;
135 }
136 indC[k] = m;
137 }
138 }
139
140 if( n > 1 ) for( iters = 0; iters < maxIters; iters++ )
141 {
142 // find index (k,l) of pivot p
143 for( k = 0, mv = std::abs(A[indR[0]]), i = 1; i < n-1; i++ )
144 {
145 _Tp val = std::abs(A[astep*i + indR[i]]);
146 if( mv < val )
147 mv = val, k = i;
148 }
149 int l = indR[k];
150 for( i = 1; i < n; i++ )
151 {
152 _Tp val = std::abs(A[astep*indC[i] + i]);
153 if( mv < val )
154 mv = val, k = indC[i], l = i;
155 }
156
157 _Tp p = A[astep*k + l];
158 if( std::abs(p) <= eps )
159 break;
160 _Tp y = (_Tp)((W[l] - W[k])*0.5);
161 _Tp t = std::abs(y) + hypot(p, y);
162 _Tp s = hypot(p, t);
163 _Tp c = t/s;
164 s = p/s; t = (p/t)*p;
165 if( y < 0 )
166 s = -s, t = -t;
167 A[astep*k + l] = 0;
168
169 W[k] -= t;
170 W[l] += t;
171
172 _Tp a0, b0;
173
174 #undef rotate
175 #define rotate(v0, v1) a0 = v0, b0 = v1, v0 = a0*c - b0*s, v1 = a0*s + b0*c
176
177 // rotate rows and columns k and l
178 for( i = 0; i < k; i++ )
179 rotate(A[astep*i+k], A[astep*i+l]);
180 for( i = k+1; i < l; i++ )
181 rotate(A[astep*k+i], A[astep*i+l]);
182 for( i = l+1; i < n; i++ )
183 rotate(A[astep*k+i], A[astep*l+i]);
184
185 // rotate eigenvectors
186 if( V )
187 for( i = 0; i < n; i++ )
188 rotate(V[vstep*k+i], V[vstep*l+i]);
189
190 #undef rotate
191
192 for( j = 0; j < 2; j++ )
193 {
194 int idx = j == 0 ? k : l;
195 if( idx < n - 1 )
196 {
197 for( m = idx+1, mv = std::abs(A[astep*idx + m]), i = idx+2; i < n; i++ )
198 {
199 _Tp val = std::abs(A[astep*idx+i]);
200 if( mv < val )
201 mv = val, m = i;
202 }
203 indR[idx] = m;
204 }
205 if( idx > 0 )
206 {
207 for( m = 0, mv = std::abs(A[idx]), i = 1; i < idx; i++ )
208 {
209 _Tp val = std::abs(A[astep*i+idx]);
210 if( mv < val )
211 mv = val, m = i;
212 }
213 indC[idx] = m;
214 }
215 }
216 }
217
218 // sort eigenvalues & eigenvectors
219 for( k = 0; k < n-1; k++ )
220 {
221 m = k;
222 for( i = k+1; i < n; i++ )
223 {
224 if( W[m] < W[i] )
225 m = i;
226 }
227 if( k != m )
228 {
229 std::swap(W[m], W[k]);
230 if( V )
231 for( i = 0; i < n; i++ )
232 std::swap(V[vstep*m + i], V[vstep*k + i]);
233 }
234 }
235
236 return true;
237 }
238
Jacobi(float * S,size_t sstep,float * e,float * E,size_t estep,int n,uchar * buf)239 static bool Jacobi( float* S, size_t sstep, float* e, float* E, size_t estep, int n, uchar* buf )
240 {
241 return JacobiImpl_(S, sstep, e, E, estep, n, buf);
242 }
243
Jacobi(double * S,size_t sstep,double * e,double * E,size_t estep,int n,uchar * buf)244 static bool Jacobi( double* S, size_t sstep, double* e, double* E, size_t estep, int n, uchar* buf )
245 {
246 return JacobiImpl_(S, sstep, e, E, estep, n, buf);
247 }
248
249
250 template<typename T> struct VBLAS
251 {
dotcv::VBLAS252 int dot(const T*, const T*, int, T*) const { return 0; }
givenscv::VBLAS253 int givens(T*, T*, int, T, T) const { return 0; }
givensxcv::VBLAS254 int givensx(T*, T*, int, T, T, T*, T*) const { return 0; }
255 };
256
257 #if CV_SSE2
dot(const float * a,const float * b,int n,float * result) const258 template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
259 {
260 if( n < 8 )
261 return 0;
262 int k = 0;
263 __m128 s0 = _mm_setzero_ps(), s1 = _mm_setzero_ps();
264 for( ; k <= n - 8; k += 8 )
265 {
266 __m128 a0 = _mm_load_ps(a + k), a1 = _mm_load_ps(a + k + 4);
267 __m128 b0 = _mm_load_ps(b + k), b1 = _mm_load_ps(b + k + 4);
268
269 s0 = _mm_add_ps(s0, _mm_mul_ps(a0, b0));
270 s1 = _mm_add_ps(s1, _mm_mul_ps(a1, b1));
271 }
272 s0 = _mm_add_ps(s0, s1);
273 float sbuf[4];
274 _mm_storeu_ps(sbuf, s0);
275 *result = sbuf[0] + sbuf[1] + sbuf[2] + sbuf[3];
276 return k;
277 }
278
279
givens(float * a,float * b,int n,float c,float s) const280 template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const
281 {
282 if( n < 4 )
283 return 0;
284 int k = 0;
285 __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s);
286 for( ; k <= n - 4; k += 4 )
287 {
288 __m128 a0 = _mm_load_ps(a + k);
289 __m128 b0 = _mm_load_ps(b + k);
290 __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4));
291 __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4));
292 _mm_store_ps(a + k, t0);
293 _mm_store_ps(b + k, t1);
294 }
295 return k;
296 }
297
298
givensx(float * a,float * b,int n,float c,float s,float * anorm,float * bnorm) const299 template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s,
300 float* anorm, float* bnorm) const
301 {
302 if( n < 4 )
303 return 0;
304 int k = 0;
305 __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s);
306 __m128 sa = _mm_setzero_ps(), sb = _mm_setzero_ps();
307 for( ; k <= n - 4; k += 4 )
308 {
309 __m128 a0 = _mm_load_ps(a + k);
310 __m128 b0 = _mm_load_ps(b + k);
311 __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4));
312 __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4));
313 _mm_store_ps(a + k, t0);
314 _mm_store_ps(b + k, t1);
315 sa = _mm_add_ps(sa, _mm_mul_ps(t0, t0));
316 sb = _mm_add_ps(sb, _mm_mul_ps(t1, t1));
317 }
318 float abuf[4], bbuf[4];
319 _mm_storeu_ps(abuf, sa);
320 _mm_storeu_ps(bbuf, sb);
321 *anorm = abuf[0] + abuf[1] + abuf[2] + abuf[3];
322 *bnorm = bbuf[0] + bbuf[1] + bbuf[2] + bbuf[3];
323 return k;
324 }
325
326
dot(const double * a,const double * b,int n,double * result) const327 template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const
328 {
329 if( n < 4 )
330 return 0;
331 int k = 0;
332 __m128d s0 = _mm_setzero_pd(), s1 = _mm_setzero_pd();
333 for( ; k <= n - 4; k += 4 )
334 {
335 __m128d a0 = _mm_load_pd(a + k), a1 = _mm_load_pd(a + k + 2);
336 __m128d b0 = _mm_load_pd(b + k), b1 = _mm_load_pd(b + k + 2);
337
338 s0 = _mm_add_pd(s0, _mm_mul_pd(a0, b0));
339 s1 = _mm_add_pd(s1, _mm_mul_pd(a1, b1));
340 }
341 s0 = _mm_add_pd(s0, s1);
342 double sbuf[2];
343 _mm_storeu_pd(sbuf, s0);
344 *result = sbuf[0] + sbuf[1];
345 return k;
346 }
347
348
givens(double * a,double * b,int n,double c,double s) const349 template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const
350 {
351 int k = 0;
352 __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s);
353 for( ; k <= n - 2; k += 2 )
354 {
355 __m128d a0 = _mm_load_pd(a + k);
356 __m128d b0 = _mm_load_pd(b + k);
357 __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2));
358 __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2));
359 _mm_store_pd(a + k, t0);
360 _mm_store_pd(b + k, t1);
361 }
362 return k;
363 }
364
365
givensx(double * a,double * b,int n,double c,double s,double * anorm,double * bnorm) const366 template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double c, double s,
367 double* anorm, double* bnorm) const
368 {
369 int k = 0;
370 __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s);
371 __m128d sa = _mm_setzero_pd(), sb = _mm_setzero_pd();
372 for( ; k <= n - 2; k += 2 )
373 {
374 __m128d a0 = _mm_load_pd(a + k);
375 __m128d b0 = _mm_load_pd(b + k);
376 __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2));
377 __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2));
378 _mm_store_pd(a + k, t0);
379 _mm_store_pd(b + k, t1);
380 sa = _mm_add_pd(sa, _mm_mul_pd(t0, t0));
381 sb = _mm_add_pd(sb, _mm_mul_pd(t1, t1));
382 }
383 double abuf[2], bbuf[2];
384 _mm_storeu_pd(abuf, sa);
385 _mm_storeu_pd(bbuf, sb);
386 *anorm = abuf[0] + abuf[1];
387 *bnorm = bbuf[0] + bbuf[1];
388 return k;
389 }
390 #endif
391
392 template<typename _Tp> void
JacobiSVDImpl_(_Tp * At,size_t astep,_Tp * _W,_Tp * Vt,size_t vstep,int m,int n,int n1,double minval,_Tp eps)393 JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
394 int m, int n, int n1, double minval, _Tp eps)
395 {
396 VBLAS<_Tp> vblas;
397 AutoBuffer<double> Wbuf(n);
398 double* W = Wbuf;
399 int i, j, k, iter, max_iter = std::max(m, 30);
400 _Tp c, s;
401 double sd;
402 astep /= sizeof(At[0]);
403 vstep /= sizeof(Vt[0]);
404
405 for( i = 0; i < n; i++ )
406 {
407 for( k = 0, sd = 0; k < m; k++ )
408 {
409 _Tp t = At[i*astep + k];
410 sd += (double)t*t;
411 }
412 W[i] = sd;
413
414 if( Vt )
415 {
416 for( k = 0; k < n; k++ )
417 Vt[i*vstep + k] = 0;
418 Vt[i*vstep + i] = 1;
419 }
420 }
421
422 for( iter = 0; iter < max_iter; iter++ )
423 {
424 bool changed = false;
425
426 for( i = 0; i < n-1; i++ )
427 for( j = i+1; j < n; j++ )
428 {
429 _Tp *Ai = At + i*astep, *Aj = At + j*astep;
430 double a = W[i], p = 0, b = W[j];
431
432 for( k = 0; k < m; k++ )
433 p += (double)Ai[k]*Aj[k];
434
435 if( std::abs(p) <= eps*std::sqrt((double)a*b) )
436 continue;
437
438 p *= 2;
439 double beta = a - b, gamma = hypot((double)p, beta);
440 if( beta < 0 )
441 {
442 double delta = (gamma - beta)*0.5;
443 s = (_Tp)std::sqrt(delta/gamma);
444 c = (_Tp)(p/(gamma*s*2));
445 }
446 else
447 {
448 c = (_Tp)std::sqrt((gamma + beta)/(gamma*2));
449 s = (_Tp)(p/(gamma*c*2));
450 }
451
452 a = b = 0;
453 for( k = 0; k < m; k++ )
454 {
455 _Tp t0 = c*Ai[k] + s*Aj[k];
456 _Tp t1 = -s*Ai[k] + c*Aj[k];
457 Ai[k] = t0; Aj[k] = t1;
458
459 a += (double)t0*t0; b += (double)t1*t1;
460 }
461 W[i] = a; W[j] = b;
462
463 changed = true;
464
465 if( Vt )
466 {
467 _Tp *Vi = Vt + i*vstep, *Vj = Vt + j*vstep;
468 k = vblas.givens(Vi, Vj, n, c, s);
469
470 for( ; k < n; k++ )
471 {
472 _Tp t0 = c*Vi[k] + s*Vj[k];
473 _Tp t1 = -s*Vi[k] + c*Vj[k];
474 Vi[k] = t0; Vj[k] = t1;
475 }
476 }
477 }
478 if( !changed )
479 break;
480 }
481
482 for( i = 0; i < n; i++ )
483 {
484 for( k = 0, sd = 0; k < m; k++ )
485 {
486 _Tp t = At[i*astep + k];
487 sd += (double)t*t;
488 }
489 W[i] = std::sqrt(sd);
490 }
491
492 for( i = 0; i < n-1; i++ )
493 {
494 j = i;
495 for( k = i+1; k < n; k++ )
496 {
497 if( W[j] < W[k] )
498 j = k;
499 }
500 if( i != j )
501 {
502 std::swap(W[i], W[j]);
503 if( Vt )
504 {
505 for( k = 0; k < m; k++ )
506 std::swap(At[i*astep + k], At[j*astep + k]);
507
508 for( k = 0; k < n; k++ )
509 std::swap(Vt[i*vstep + k], Vt[j*vstep + k]);
510 }
511 }
512 }
513
514 for( i = 0; i < n; i++ )
515 _W[i] = (_Tp)W[i];
516
517 if( !Vt )
518 return;
519
520 RNG rng(0x12345678);
521 for( i = 0; i < n1; i++ )
522 {
523 sd = i < n ? W[i] : 0;
524
525 for( int ii = 0; ii < 100 && sd <= minval; ii++ )
526 {
527 // if we got a zero singular value, then in order to get the corresponding left singular vector
528 // we generate a random vector, project it to the previously computed left singular vectors,
529 // subtract the projection and normalize the difference.
530 const _Tp val0 = (_Tp)(1./m);
531 for( k = 0; k < m; k++ )
532 {
533 _Tp val = (rng.next() & 256) != 0 ? val0 : -val0;
534 At[i*astep + k] = val;
535 }
536 for( iter = 0; iter < 2; iter++ )
537 {
538 for( j = 0; j < i; j++ )
539 {
540 sd = 0;
541 for( k = 0; k < m; k++ )
542 sd += At[i*astep + k]*At[j*astep + k];
543 _Tp asum = 0;
544 for( k = 0; k < m; k++ )
545 {
546 _Tp t = (_Tp)(At[i*astep + k] - sd*At[j*astep + k]);
547 At[i*astep + k] = t;
548 asum += std::abs(t);
549 }
550 asum = asum > eps*100 ? 1/asum : 0;
551 for( k = 0; k < m; k++ )
552 At[i*astep + k] *= asum;
553 }
554 }
555 sd = 0;
556 for( k = 0; k < m; k++ )
557 {
558 _Tp t = At[i*astep + k];
559 sd += (double)t*t;
560 }
561 sd = std::sqrt(sd);
562 }
563
564 s = (_Tp)(sd > minval ? 1/sd : 0.);
565 for( k = 0; k < m; k++ )
566 At[i*astep + k] *= s;
567 }
568 }
569
570
JacobiSVD(float * At,size_t astep,float * W,float * Vt,size_t vstep,int m,int n,int n1=-1)571 static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
572 {
573 JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
574 }
575
JacobiSVD(double * At,size_t astep,double * W,double * Vt,size_t vstep,int m,int n,int n1=-1)576 static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
577 {
578 JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
579 }
580
581 /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
582 template<typename T1, typename T2, typename T3> static void
MatrAXPY(int m,int n,const T1 * x,int dx,const T2 * a,int inca,T3 * y,int dy)583 MatrAXPY( int m, int n, const T1* x, int dx,
584 const T2* a, int inca, T3* y, int dy )
585 {
586 int i;
587 for( i = 0; i < m; i++, x += dx, y += dy )
588 {
589 T2 s = a[i*inca];
590 int j = 0;
591 #if CV_ENABLE_UNROLLED
592 for(; j <= n - 4; j += 4 )
593 {
594 T3 t0 = (T3)(y[j] + s*x[j]);
595 T3 t1 = (T3)(y[j+1] + s*x[j+1]);
596 y[j] = t0;
597 y[j+1] = t1;
598 t0 = (T3)(y[j+2] + s*x[j+2]);
599 t1 = (T3)(y[j+3] + s*x[j+3]);
600 y[j+2] = t0;
601 y[j+3] = t1;
602 }
603 #endif
604 for( ; j < n; j++ )
605 y[j] = (T3)(y[j] + s*x[j]);
606 }
607 }
608
609 template<typename T> static void
SVBkSbImpl_(int m,int n,const T * w,int incw,const T * u,int ldu,bool uT,const T * v,int ldv,bool vT,const T * b,int ldb,int nb,T * x,int ldx,double * buffer,T eps)610 SVBkSbImpl_( int m, int n, const T* w, int incw,
611 const T* u, int ldu, bool uT,
612 const T* v, int ldv, bool vT,
613 const T* b, int ldb, int nb,
614 T* x, int ldx, double* buffer, T eps )
615 {
616 double threshold = 0;
617 int udelta0 = uT ? ldu : 1, udelta1 = uT ? 1 : ldu;
618 int vdelta0 = vT ? ldv : 1, vdelta1 = vT ? 1 : ldv;
619 int i, j, nm = std::min(m, n);
620
621 if( !b )
622 nb = m;
623
624 for( i = 0; i < n; i++ )
625 for( j = 0; j < nb; j++ )
626 x[i*ldx + j] = 0;
627
628 for( i = 0; i < nm; i++ )
629 threshold += w[i*incw];
630 threshold *= eps;
631
632 // v * inv(w) * uT * b
633 for( i = 0; i < nm; i++, u += udelta0, v += vdelta0 )
634 {
635 double wi = w[i*incw];
636 if( (double)std::abs(wi) <= threshold )
637 continue;
638 wi = 1/wi;
639
640 if( nb == 1 )
641 {
642 double s = 0;
643 if( b )
644 for( j = 0; j < m; j++ )
645 s += u[j*udelta1]*b[j*ldb];
646 else
647 s = u[0];
648 s *= wi;
649
650 for( j = 0; j < n; j++ )
651 x[j*ldx] = (T)(x[j*ldx] + s*v[j*vdelta1]);
652 }
653 else
654 {
655 if( b )
656 {
657 for( j = 0; j < nb; j++ )
658 buffer[j] = 0;
659 MatrAXPY( m, nb, b, ldb, u, udelta1, buffer, 0 );
660 for( j = 0; j < nb; j++ )
661 buffer[j] *= wi;
662 }
663 else
664 {
665 for( j = 0; j < nb; j++ )
666 buffer[j] = u[j*udelta1]*wi;
667 }
668 MatrAXPY( n, nb, buffer, 0, v, vdelta1, x, ldx );
669 }
670 }
671 }
672
673 static void
SVBkSb(int m,int n,const float * w,size_t wstep,const float * u,size_t ustep,bool uT,const float * v,size_t vstep,bool vT,const float * b,size_t bstep,int nb,float * x,size_t xstep,uchar * buffer)674 SVBkSb( int m, int n, const float* w, size_t wstep,
675 const float* u, size_t ustep, bool uT,
676 const float* v, size_t vstep, bool vT,
677 const float* b, size_t bstep, int nb,
678 float* x, size_t xstep, uchar* buffer )
679 {
680 SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
681 u, (int)(ustep/sizeof(u[0])), uT,
682 v, (int)(vstep/sizeof(v[0])), vT,
683 b, (int)(bstep/sizeof(b[0])), nb,
684 x, (int)(xstep/sizeof(x[0])),
685 (double*)alignPtr(buffer, sizeof(double)), (float)(DBL_EPSILON*2) );
686 }
687
688 static void
SVBkSb(int m,int n,const double * w,size_t wstep,const double * u,size_t ustep,bool uT,const double * v,size_t vstep,bool vT,const double * b,size_t bstep,int nb,double * x,size_t xstep,uchar * buffer)689 SVBkSb( int m, int n, const double* w, size_t wstep,
690 const double* u, size_t ustep, bool uT,
691 const double* v, size_t vstep, bool vT,
692 const double* b, size_t bstep, int nb,
693 double* x, size_t xstep, uchar* buffer )
694 {
695 SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
696 u, (int)(ustep/sizeof(u[0])), uT,
697 v, (int)(vstep/sizeof(v[0])), vT,
698 b, (int)(bstep/sizeof(b[0])), nb,
699 x, (int)(xstep/sizeof(x[0])),
700 (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 );
701 }
702
703 }
704
705 /****************************************************************************************\
706 * Determinant of the matrix *
707 \****************************************************************************************/
708
709 #define det2(m) ((double)m(0,0)*m(1,1) - (double)m(0,1)*m(1,0))
710 #define det3(m) (m(0,0)*((double)m(1,1)*m(2,2) - (double)m(1,2)*m(2,1)) - \
711 m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \
712 m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0)))
713
determinant(InputArray _mat)714 double cv::determinant( InputArray _mat )
715 {
716 Mat mat = _mat.getMat();
717 double result = 0;
718 int type = mat.type(), rows = mat.rows;
719 size_t step = mat.step;
720 const uchar* m = mat.ptr();
721
722 CV_Assert( !mat.empty() );
723 CV_Assert( mat.rows == mat.cols && (type == CV_32F || type == CV_64F));
724
725 #define Mf(y, x) ((float*)(m + y*step))[x]
726 #define Md(y, x) ((double*)(m + y*step))[x]
727
728 if( type == CV_32F )
729 {
730 if( rows == 2 )
731 result = det2(Mf);
732 else if( rows == 3 )
733 result = det3(Mf);
734 else if( rows == 1 )
735 result = Mf(0,0);
736 else
737 {
738 size_t bufSize = rows*rows*sizeof(float);
739 AutoBuffer<uchar> buffer(bufSize);
740 Mat a(rows, rows, CV_32F, (uchar*)buffer);
741 mat.copyTo(a);
742
743 result = hal::LU(a.ptr<float>(), a.step, rows, 0, 0, 0);
744 if( result )
745 {
746 for( int i = 0; i < rows; i++ )
747 result *= a.at<float>(i,i);
748 result = 1./result;
749 }
750 }
751 }
752 else
753 {
754 if( rows == 2 )
755 result = det2(Md);
756 else if( rows == 3 )
757 result = det3(Md);
758 else if( rows == 1 )
759 result = Md(0,0);
760 else
761 {
762 size_t bufSize = rows*rows*sizeof(double);
763 AutoBuffer<uchar> buffer(bufSize);
764 Mat a(rows, rows, CV_64F, (uchar*)buffer);
765 mat.copyTo(a);
766
767 result = hal::LU(a.ptr<double>(), a.step, rows, 0, 0, 0);
768 if( result )
769 {
770 for( int i = 0; i < rows; i++ )
771 result *= a.at<double>(i,i);
772 result = 1./result;
773 }
774 }
775 }
776
777 #undef Mf
778 #undef Md
779
780 return result;
781 }
782
783 /****************************************************************************************\
784 * Inverse (or pseudo-inverse) of a matrix *
785 \****************************************************************************************/
786
787 #define Sf( y, x ) ((float*)(srcdata + y*srcstep))[x]
788 #define Sd( y, x ) ((double*)(srcdata + y*srcstep))[x]
789 #define Df( y, x ) ((float*)(dstdata + y*dststep))[x]
790 #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x]
791
invert(InputArray _src,OutputArray _dst,int method)792 double cv::invert( InputArray _src, OutputArray _dst, int method )
793 {
794 bool result = false;
795 Mat src = _src.getMat();
796 int type = src.type();
797
798 CV_Assert(type == CV_32F || type == CV_64F);
799
800 size_t esz = CV_ELEM_SIZE(type);
801 int m = src.rows, n = src.cols;
802
803 if( method == DECOMP_SVD )
804 {
805 int nm = std::min(m, n);
806
807 AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double));
808 uchar* buf = alignPtr((uchar*)_buf, (int)esz);
809 Mat u(m, nm, type, buf);
810 Mat w(nm, 1, type, u.ptr() + m*nm*esz);
811 Mat vt(nm, n, type, w.ptr() + nm*esz);
812
813 SVD::compute(src, w, u, vt);
814 SVD::backSubst(w, u, vt, Mat(), _dst);
815 return type == CV_32F ?
816 (w.ptr<float>()[0] >= FLT_EPSILON ?
817 w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
818 (w.ptr<double>()[0] >= DBL_EPSILON ?
819 w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
820 }
821
822 CV_Assert( m == n );
823
824 if( method == DECOMP_EIG )
825 {
826 AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double));
827 uchar* buf = alignPtr((uchar*)_buf, (int)esz);
828 Mat u(n, n, type, buf);
829 Mat w(n, 1, type, u.ptr() + n*n*esz);
830 Mat vt(n, n, type, w.ptr() + n*esz);
831
832 eigen(src, w, vt);
833 transpose(vt, u);
834 SVD::backSubst(w, u, vt, Mat(), _dst);
835 return type == CV_32F ?
836 (w.ptr<float>()[0] >= FLT_EPSILON ?
837 w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
838 (w.ptr<double>()[0] >= DBL_EPSILON ?
839 w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
840 }
841
842 CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
843
844 _dst.create( n, n, type );
845 Mat dst = _dst.getMat();
846
847 if( n <= 3 )
848 {
849 const uchar* srcdata = src.ptr();
850 uchar* dstdata = dst.ptr();
851 size_t srcstep = src.step;
852 size_t dststep = dst.step;
853
854 if( n == 2 )
855 {
856 if( type == CV_32FC1 )
857 {
858 double d = det2(Sf);
859 if( d != 0. )
860 {
861 result = true;
862 d = 1./d;
863
864 #if CV_SSE2
865 if(USE_SSE2)
866 {
867 __m128 zero = _mm_setzero_ps();
868 __m128 t0 = _mm_loadl_pi(zero, (const __m64*)srcdata); //t0 = sf(0,0) sf(0,1)
869 __m128 t1 = _mm_loadh_pi(zero, (const __m64*)(srcdata+srcstep)); //t1 = sf(1,0) sf(1,1)
870 __m128 s0 = _mm_or_ps(t0, t1);
871 __m128 det =_mm_set1_ps((float)d);
872 s0 = _mm_mul_ps(s0, det);
873 static const uchar CV_DECL_ALIGNED(16) inv[16] = {0,0,0,0,0,0,0,0x80,0,0,0,0x80,0,0,0,0};
874 __m128 pattern = _mm_load_ps((const float*)inv);
875 s0 = _mm_xor_ps(s0, pattern);//==-1*s0
876 s0 = _mm_shuffle_ps(s0, s0, _MM_SHUFFLE(0,2,1,3));
877 _mm_storel_pi((__m64*)dstdata, s0);
878 _mm_storeh_pi((__m64*)((float*)(dstdata+dststep)), s0);
879 }
880 else
881 #endif
882 {
883 double t0, t1;
884 t0 = Sf(0,0)*d;
885 t1 = Sf(1,1)*d;
886 Df(1,1) = (float)t0;
887 Df(0,0) = (float)t1;
888 t0 = -Sf(0,1)*d;
889 t1 = -Sf(1,0)*d;
890 Df(0,1) = (float)t0;
891 Df(1,0) = (float)t1;
892 }
893
894 }
895 }
896 else
897 {
898 double d = det2(Sd);
899 if( d != 0. )
900 {
901 result = true;
902 d = 1./d;
903 #if CV_SSE2
904 if(USE_SSE2)
905 {
906 __m128d s0 = _mm_loadu_pd((const double*)srcdata); //s0 = sf(0,0) sf(0,1)
907 __m128d s1 = _mm_loadu_pd ((const double*)(srcdata+srcstep));//s1 = sf(1,0) sf(1,1)
908 __m128d sm = _mm_unpacklo_pd(s0, _mm_load_sd((const double*)(srcdata+srcstep)+1)); //sm = sf(0,0) sf(1,1) - main diagonal
909 __m128d ss = _mm_shuffle_pd(s0, s1, _MM_SHUFFLE2(0,1)); //ss = sf(0,1) sf(1,0) - secondary diagonal
910 __m128d det = _mm_load1_pd((const double*)&d);
911 sm = _mm_mul_pd(sm, det);
912
913 static const uchar CV_DECL_ALIGNED(16) inv[8] = {0,0,0,0,0,0,0,0x80};
914 __m128d pattern = _mm_load1_pd((double*)inv);
915 ss = _mm_mul_pd(ss, det);
916 ss = _mm_xor_pd(ss, pattern);//==-1*ss
917
918 s0 = _mm_shuffle_pd(sm, ss, _MM_SHUFFLE2(0,1));
919 s1 = _mm_shuffle_pd(ss, sm, _MM_SHUFFLE2(0,1));
920 _mm_storeu_pd((double*)dstdata, s0);
921 _mm_storeu_pd((double*)(dstdata+dststep), s1);
922 }
923 else
924 #endif
925 {
926 double t0, t1;
927 t0 = Sd(0,0)*d;
928 t1 = Sd(1,1)*d;
929 Dd(1,1) = t0;
930 Dd(0,0) = t1;
931 t0 = -Sd(0,1)*d;
932 t1 = -Sd(1,0)*d;
933 Dd(0,1) = t0;
934 Dd(1,0) = t1;
935 }
936 }
937 }
938 }
939 else if( n == 3 )
940 {
941 if( type == CV_32FC1 )
942 {
943 double d = det3(Sf);
944
945 if( d != 0. )
946 {
947 double t[12];
948
949 result = true;
950 d = 1./d;
951 t[0] = (((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d);
952 t[1] = (((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d);
953 t[2] = (((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d);
954
955 t[3] = (((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d);
956 t[4] = (((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d);
957 t[5] = (((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d);
958
959 t[6] = (((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d);
960 t[7] = (((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d);
961 t[8] = (((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d);
962
963 Df(0,0) = (float)t[0]; Df(0,1) = (float)t[1]; Df(0,2) = (float)t[2];
964 Df(1,0) = (float)t[3]; Df(1,1) = (float)t[4]; Df(1,2) = (float)t[5];
965 Df(2,0) = (float)t[6]; Df(2,1) = (float)t[7]; Df(2,2) = (float)t[8];
966 }
967 }
968 else
969 {
970 double d = det3(Sd);
971 if( d != 0. )
972 {
973 result = true;
974 d = 1./d;
975 double t[9];
976
977 t[0] = (Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1)) * d;
978 t[1] = (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2)) * d;
979 t[2] = (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1)) * d;
980
981 t[3] = (Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2)) * d;
982 t[4] = (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0)) * d;
983 t[5] = (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2)) * d;
984
985 t[6] = (Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0)) * d;
986 t[7] = (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1)) * d;
987 t[8] = (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0)) * d;
988
989 Dd(0,0) = t[0]; Dd(0,1) = t[1]; Dd(0,2) = t[2];
990 Dd(1,0) = t[3]; Dd(1,1) = t[4]; Dd(1,2) = t[5];
991 Dd(2,0) = t[6]; Dd(2,1) = t[7]; Dd(2,2) = t[8];
992 }
993 }
994 }
995 else
996 {
997 assert( n == 1 );
998
999 if( type == CV_32FC1 )
1000 {
1001 double d = Sf(0,0);
1002 if( d != 0. )
1003 {
1004 result = true;
1005 Df(0,0) = (float)(1./d);
1006 }
1007 }
1008 else
1009 {
1010 double d = Sd(0,0);
1011 if( d != 0. )
1012 {
1013 result = true;
1014 Dd(0,0) = 1./d;
1015 }
1016 }
1017 }
1018 if( !result )
1019 dst = Scalar(0);
1020 return result;
1021 }
1022
1023 int elem_size = CV_ELEM_SIZE(type);
1024 AutoBuffer<uchar> buf(n*n*elem_size);
1025 Mat src1(n, n, type, (uchar*)buf);
1026 src.copyTo(src1);
1027 setIdentity(dst);
1028
1029 if( method == DECOMP_LU && type == CV_32F )
1030 result = hal::LU(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
1031 else if( method == DECOMP_LU && type == CV_64F )
1032 result = hal::LU(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
1033 else if( method == DECOMP_CHOLESKY && type == CV_32F )
1034 result = hal::Cholesky(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
1035 else
1036 result = hal::Cholesky(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
1037
1038 if( !result )
1039 dst = Scalar(0);
1040
1041 return result;
1042 }
1043
1044
1045
1046 /****************************************************************************************\
1047 * Solving a linear system *
1048 \****************************************************************************************/
1049
solve(InputArray _src,InputArray _src2arg,OutputArray _dst,int method)1050 bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
1051 {
1052 bool result = true;
1053 Mat src = _src.getMat(), _src2 = _src2arg.getMat();
1054 int type = src.type();
1055 bool is_normal = (method & DECOMP_NORMAL) != 0;
1056
1057 CV_Assert( type == _src2.type() && (type == CV_32F || type == CV_64F) );
1058
1059 method &= ~DECOMP_NORMAL;
1060 CV_Assert( (method != DECOMP_LU && method != DECOMP_CHOLESKY) ||
1061 is_normal || src.rows == src.cols );
1062
1063 // check case of a single equation and small matrix
1064 if( (method == DECOMP_LU || method == DECOMP_CHOLESKY) && !is_normal &&
1065 src.rows <= 3 && src.rows == src.cols && _src2.cols == 1 )
1066 {
1067 _dst.create( src.cols, _src2.cols, src.type() );
1068 Mat dst = _dst.getMat();
1069
1070 #define bf(y) ((float*)(bdata + y*src2step))[0]
1071 #define bd(y) ((double*)(bdata + y*src2step))[0]
1072
1073 const uchar* srcdata = src.ptr();
1074 const uchar* bdata = _src2.ptr();
1075 uchar* dstdata = dst.ptr();
1076 size_t srcstep = src.step;
1077 size_t src2step = _src2.step;
1078 size_t dststep = dst.step;
1079
1080 if( src.rows == 2 )
1081 {
1082 if( type == CV_32FC1 )
1083 {
1084 double d = det2(Sf);
1085 if( d != 0. )
1086 {
1087 double t;
1088 d = 1./d;
1089 t = (float)(((double)bf(0)*Sf(1,1) - (double)bf(1)*Sf(0,1))*d);
1090 Df(1,0) = (float)(((double)bf(1)*Sf(0,0) - (double)bf(0)*Sf(1,0))*d);
1091 Df(0,0) = (float)t;
1092 }
1093 else
1094 result = false;
1095 }
1096 else
1097 {
1098 double d = det2(Sd);
1099 if( d != 0. )
1100 {
1101 double t;
1102 d = 1./d;
1103 t = (bd(0)*Sd(1,1) - bd(1)*Sd(0,1))*d;
1104 Dd(1,0) = (bd(1)*Sd(0,0) - bd(0)*Sd(1,0))*d;
1105 Dd(0,0) = t;
1106 }
1107 else
1108 result = false;
1109 }
1110 }
1111 else if( src.rows == 3 )
1112 {
1113 if( type == CV_32FC1 )
1114 {
1115 double d = det3(Sf);
1116 if( d != 0. )
1117 {
1118 float t[3];
1119 d = 1./d;
1120
1121 t[0] = (float)(d*
1122 (bf(0)*((double)Sf(1,1)*Sf(2,2) - (double)Sf(1,2)*Sf(2,1)) -
1123 Sf(0,1)*((double)bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) +
1124 Sf(0,2)*((double)bf(1)*Sf(2,1) - (double)Sf(1,1)*bf(2))));
1125
1126 t[1] = (float)(d*
1127 (Sf(0,0)*(double)(bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) -
1128 bf(0)*((double)Sf(1,0)*Sf(2,2) - (double)Sf(1,2)*Sf(2,0)) +
1129 Sf(0,2)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0))));
1130
1131 t[2] = (float)(d*
1132 (Sf(0,0)*((double)Sf(1,1)*bf(2) - (double)bf(1)*Sf(2,1)) -
1133 Sf(0,1)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)) +
1134 bf(0)*((double)Sf(1,0)*Sf(2,1) - (double)Sf(1,1)*Sf(2,0))));
1135
1136 Df(0,0) = t[0];
1137 Df(1,0) = t[1];
1138 Df(2,0) = t[2];
1139 }
1140 else
1141 result = false;
1142 }
1143 else
1144 {
1145 double d = det3(Sd);
1146 if( d != 0. )
1147 {
1148 double t[9];
1149
1150 d = 1./d;
1151
1152 t[0] = ((Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1))*bd(0) +
1153 (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2))*bd(1) +
1154 (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1))*bd(2))*d;
1155
1156 t[1] = ((Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2))*bd(0) +
1157 (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0))*bd(1) +
1158 (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2))*bd(2))*d;
1159
1160 t[2] = ((Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0))*bd(0) +
1161 (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1))*bd(1) +
1162 (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0))*bd(2))*d;
1163
1164 Dd(0,0) = t[0];
1165 Dd(1,0) = t[1];
1166 Dd(2,0) = t[2];
1167 }
1168 else
1169 result = false;
1170 }
1171 }
1172 else
1173 {
1174 assert( src.rows == 1 );
1175
1176 if( type == CV_32FC1 )
1177 {
1178 double d = Sf(0,0);
1179 if( d != 0. )
1180 Df(0,0) = (float)(bf(0)/d);
1181 else
1182 result = false;
1183 }
1184 else
1185 {
1186 double d = Sd(0,0);
1187 if( d != 0. )
1188 Dd(0,0) = (bd(0)/d);
1189 else
1190 result = false;
1191 }
1192 }
1193 return result;
1194 }
1195
1196 if( method == DECOMP_QR )
1197 method = DECOMP_SVD;
1198
1199 int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols;
1200 size_t esz = CV_ELEM_SIZE(type), bufsize = 0;
1201 size_t vstep = alignSize(n*esz, 16);
1202 size_t astep = method == DECOMP_SVD && !is_normal ? alignSize(m*esz, 16) : vstep;
1203 AutoBuffer<uchar> buffer;
1204
1205 Mat src2 = _src2;
1206 _dst.create( src.cols, src2.cols, src.type() );
1207 Mat dst = _dst.getMat();
1208
1209 if( m < n )
1210 CV_Error(CV_StsBadArg, "The function can not solve under-determined linear systems" );
1211
1212 if( m == n )
1213 is_normal = false;
1214 else if( is_normal )
1215 {
1216 m_ = n;
1217 if( method == DECOMP_SVD )
1218 method = DECOMP_EIG;
1219 }
1220
1221 size_t asize = astep*(method == DECOMP_SVD || is_normal ? n : m);
1222 bufsize += asize + 32;
1223
1224 if( is_normal )
1225 bufsize += n*nb*esz;
1226
1227 if( method == DECOMP_SVD || method == DECOMP_EIG )
1228 bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32;
1229
1230 buffer.allocate(bufsize);
1231 uchar* ptr = alignPtr((uchar*)buffer, 16);
1232
1233 Mat a(m_, n, type, ptr, astep);
1234
1235 if( is_normal )
1236 mulTransposed(src, a, true);
1237 else if( method != DECOMP_SVD )
1238 src.copyTo(a);
1239 else
1240 {
1241 a = Mat(n, m_, type, ptr, astep);
1242 transpose(src, a);
1243 }
1244 ptr += asize;
1245
1246 if( !is_normal )
1247 {
1248 if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
1249 src2.copyTo(dst);
1250 }
1251 else
1252 {
1253 // a'*b
1254 if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
1255 gemm( src, src2, 1, Mat(), 0, dst, GEMM_1_T );
1256 else
1257 {
1258 Mat tmp(n, nb, type, ptr);
1259 ptr += n*nb*esz;
1260 gemm( src, src2, 1, Mat(), 0, tmp, GEMM_1_T );
1261 src2 = tmp;
1262 }
1263 }
1264
1265 if( method == DECOMP_LU )
1266 {
1267 if( type == CV_32F )
1268 result = hal::LU(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
1269 else
1270 result = hal::LU(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
1271 }
1272 else if( method == DECOMP_CHOLESKY )
1273 {
1274 if( type == CV_32F )
1275 result = hal::Cholesky(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
1276 else
1277 result = hal::Cholesky(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
1278 }
1279 else
1280 {
1281 ptr = alignPtr(ptr, 16);
1282 Mat v(n, n, type, ptr, vstep), w(n, 1, type, ptr + vstep*n), u;
1283 ptr += n*(vstep + esz);
1284
1285 if( method == DECOMP_EIG )
1286 {
1287 if( type == CV_32F )
1288 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr);
1289 else
1290 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
1291 u = v;
1292 }
1293 else
1294 {
1295 if( type == CV_32F )
1296 JacobiSVD(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, m_, n);
1297 else
1298 JacobiSVD(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, m_, n);
1299 u = a;
1300 }
1301
1302 if( type == CV_32F )
1303 {
1304 SVBkSb(m_, n, w.ptr<float>(), 0, u.ptr<float>(), u.step, true,
1305 v.ptr<float>(), v.step, true, src2.ptr<float>(),
1306 src2.step, nb, dst.ptr<float>(), dst.step, ptr);
1307 }
1308 else
1309 {
1310 SVBkSb(m_, n, w.ptr<double>(), 0, u.ptr<double>(), u.step, true,
1311 v.ptr<double>(), v.step, true, src2.ptr<double>(),
1312 src2.step, nb, dst.ptr<double>(), dst.step, ptr);
1313 }
1314 result = true;
1315 }
1316
1317 if( !result )
1318 dst = Scalar(0);
1319
1320 return result;
1321 }
1322
1323
1324 /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix ///////////////
1325
eigen(InputArray _src,OutputArray _evals,OutputArray _evects)1326 bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
1327 {
1328 Mat src = _src.getMat();
1329 int type = src.type();
1330 int n = src.rows;
1331
1332 CV_Assert( src.rows == src.cols );
1333 CV_Assert (type == CV_32F || type == CV_64F);
1334
1335 Mat v;
1336 if( _evects.needed() )
1337 {
1338 _evects.create(n, n, type);
1339 v = _evects.getMat();
1340 }
1341
1342 size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16);
1343 AutoBuffer<uchar> buf(n*astep + n*5*elemSize + 32);
1344 uchar* ptr = alignPtr((uchar*)buf, 16);
1345 Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n);
1346 ptr += astep*n + elemSize*n;
1347 src.copyTo(a);
1348 bool ok = type == CV_32F ?
1349 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr) :
1350 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
1351
1352 w.copyTo(_evals);
1353 return ok;
1354 }
1355
1356 namespace cv
1357 {
1358
_SVDcompute(InputArray _aarr,OutputArray _w,OutputArray _u,OutputArray _vt,int flags)1359 static void _SVDcompute( InputArray _aarr, OutputArray _w,
1360 OutputArray _u, OutputArray _vt, int flags )
1361 {
1362 Mat src = _aarr.getMat();
1363 int m = src.rows, n = src.cols;
1364 int type = src.type();
1365 bool compute_uv = _u.needed() || _vt.needed();
1366 bool full_uv = (flags & SVD::FULL_UV) != 0;
1367
1368 CV_Assert( type == CV_32F || type == CV_64F );
1369
1370 if( flags & SVD::NO_UV )
1371 {
1372 _u.release();
1373 _vt.release();
1374 compute_uv = full_uv = false;
1375 }
1376
1377 bool at = false;
1378 if( m < n )
1379 {
1380 std::swap(m, n);
1381 at = true;
1382 }
1383
1384 int urows = full_uv ? m : n;
1385 size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16);
1386 AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32);
1387 uchar* buf = alignPtr((uchar*)_buf, 16);
1388 Mat temp_a(n, m, type, buf, astep);
1389 Mat temp_w(n, 1, type, buf + urows*astep);
1390 Mat temp_u(urows, m, type, buf, astep), temp_v;
1391
1392 if( compute_uv )
1393 temp_v = Mat(n, n, type, alignPtr(buf + urows*astep + n*esz, 16), vstep);
1394
1395 if( urows > n )
1396 temp_u = Scalar::all(0);
1397
1398 if( !at )
1399 transpose(src, temp_a);
1400 else
1401 src.copyTo(temp_a);
1402
1403 if( type == CV_32F )
1404 {
1405 JacobiSVD(temp_a.ptr<float>(), temp_u.step, temp_w.ptr<float>(),
1406 temp_v.ptr<float>(), temp_v.step, m, n, compute_uv ? urows : 0);
1407 }
1408 else
1409 {
1410 JacobiSVD(temp_a.ptr<double>(), temp_u.step, temp_w.ptr<double>(),
1411 temp_v.ptr<double>(), temp_v.step, m, n, compute_uv ? urows : 0);
1412 }
1413 temp_w.copyTo(_w);
1414 if( compute_uv )
1415 {
1416 if( !at )
1417 {
1418 if( _u.needed() )
1419 transpose(temp_u, _u);
1420 if( _vt.needed() )
1421 temp_v.copyTo(_vt);
1422 }
1423 else
1424 {
1425 if( _u.needed() )
1426 transpose(temp_v, _u);
1427 if( _vt.needed() )
1428 temp_u.copyTo(_vt);
1429 }
1430 }
1431 }
1432
1433
compute(InputArray a,OutputArray w,OutputArray u,OutputArray vt,int flags)1434 void SVD::compute( InputArray a, OutputArray w, OutputArray u, OutputArray vt, int flags )
1435 {
1436 _SVDcompute(a, w, u, vt, flags);
1437 }
1438
compute(InputArray a,OutputArray w,int flags)1439 void SVD::compute( InputArray a, OutputArray w, int flags )
1440 {
1441 _SVDcompute(a, w, noArray(), noArray(), flags);
1442 }
1443
backSubst(InputArray _w,InputArray _u,InputArray _vt,InputArray _rhs,OutputArray _dst)1444 void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt,
1445 InputArray _rhs, OutputArray _dst )
1446 {
1447 Mat w = _w.getMat(), u = _u.getMat(), vt = _vt.getMat(), rhs = _rhs.getMat();
1448 int type = w.type(), esz = (int)w.elemSize();
1449 int m = u.rows, n = vt.cols, nb = rhs.data ? rhs.cols : m, nm = std::min(m, n);
1450 size_t wstep = w.rows == 1 ? (size_t)esz : w.cols == 1 ? (size_t)w.step : (size_t)w.step + esz;
1451 AutoBuffer<uchar> buffer(nb*sizeof(double) + 16);
1452 CV_Assert( w.type() == u.type() && u.type() == vt.type() && u.data && vt.data && w.data );
1453 CV_Assert( u.cols >= nm && vt.rows >= nm &&
1454 (w.size() == Size(nm, 1) || w.size() == Size(1, nm) || w.size() == Size(vt.rows, u.cols)) );
1455 CV_Assert( rhs.data == 0 || (rhs.type() == type && rhs.rows == m) );
1456
1457 _dst.create( n, nb, type );
1458 Mat dst = _dst.getMat();
1459 if( type == CV_32F )
1460 SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false,
1461 vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb,
1462 dst.ptr<float>(), dst.step, buffer);
1463 else if( type == CV_64F )
1464 SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false,
1465 vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb,
1466 dst.ptr<double>(), dst.step, buffer);
1467 else
1468 CV_Error( CV_StsUnsupportedFormat, "" );
1469 }
1470
1471
operator ()(InputArray a,int flags)1472 SVD& SVD::operator ()(InputArray a, int flags)
1473 {
1474 _SVDcompute(a, w, u, vt, flags);
1475 return *this;
1476 }
1477
1478
backSubst(InputArray rhs,OutputArray dst) const1479 void SVD::backSubst( InputArray rhs, OutputArray dst ) const
1480 {
1481 backSubst( w, u, vt, rhs, dst );
1482 }
1483
1484 }
1485
1486
SVDecomp(InputArray src,OutputArray w,OutputArray u,OutputArray vt,int flags)1487 void cv::SVDecomp(InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags)
1488 {
1489 SVD::compute(src, w, u, vt, flags);
1490 }
1491
SVBackSubst(InputArray w,InputArray u,InputArray vt,InputArray rhs,OutputArray dst)1492 void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, OutputArray dst)
1493 {
1494 SVD::backSubst(w, u, vt, rhs, dst);
1495 }
1496
1497
1498 CV_IMPL double
cvDet(const CvArr * arr)1499 cvDet( const CvArr* arr )
1500 {
1501 if( CV_IS_MAT(arr) && ((CvMat*)arr)->rows <= 3 )
1502 {
1503 CvMat* mat = (CvMat*)arr;
1504 int type = CV_MAT_TYPE(mat->type);
1505 int rows = mat->rows;
1506 uchar* m = mat->data.ptr;
1507 int step = mat->step;
1508 CV_Assert( rows == mat->cols );
1509
1510 #define Mf(y, x) ((float*)(m + y*step))[x]
1511 #define Md(y, x) ((double*)(m + y*step))[x]
1512
1513 if( type == CV_32F )
1514 {
1515 if( rows == 2 )
1516 return det2(Mf);
1517 if( rows == 3 )
1518 return det3(Mf);
1519 }
1520 else if( type == CV_64F )
1521 {
1522 if( rows == 2 )
1523 return det2(Md);
1524 if( rows == 3 )
1525 return det3(Md);
1526 }
1527 }
1528 return cv::determinant(cv::cvarrToMat(arr));
1529 }
1530
1531
1532 CV_IMPL double
cvInvert(const CvArr * srcarr,CvArr * dstarr,int method)1533 cvInvert( const CvArr* srcarr, CvArr* dstarr, int method )
1534 {
1535 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1536
1537 CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows );
1538 return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
1539 method == CV_SVD ? cv::DECOMP_SVD :
1540 method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU );
1541 }
1542
1543
1544 CV_IMPL int
cvSolve(const CvArr * Aarr,const CvArr * barr,CvArr * xarr,int method)1545 cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method )
1546 {
1547 cv::Mat A = cv::cvarrToMat(Aarr), b = cv::cvarrToMat(barr), x = cv::cvarrToMat(xarr);
1548
1549 CV_Assert( A.type() == x.type() && A.cols == x.rows && x.cols == b.cols );
1550 bool is_normal = (method & CV_NORMAL) != 0;
1551 method &= ~CV_NORMAL;
1552 return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
1553 method == CV_SVD ? cv::DECOMP_SVD :
1554 method == CV_SVD_SYM ? cv::DECOMP_EIG :
1555 A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) );
1556 }
1557
1558
1559 CV_IMPL void
cvEigenVV(CvArr * srcarr,CvArr * evectsarr,CvArr * evalsarr,double,int,int)1560 cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
1561 int, int )
1562 {
1563 cv::Mat src = cv::cvarrToMat(srcarr), evals0 = cv::cvarrToMat(evalsarr), evals = evals0;
1564 if( evectsarr )
1565 {
1566 cv::Mat evects0 = cv::cvarrToMat(evectsarr), evects = evects0;
1567 eigen(src, evals, evects);
1568 if( evects0.data != evects.data )
1569 {
1570 const uchar* p = evects0.ptr();
1571 evects.convertTo(evects0, evects0.type());
1572 CV_Assert( p == evects0.ptr() );
1573 }
1574 }
1575 else
1576 eigen(src, evals);
1577 if( evals0.data != evals.data )
1578 {
1579 const uchar* p = evals0.ptr();
1580 if( evals0.size() == evals.size() )
1581 evals.convertTo(evals0, evals0.type());
1582 else if( evals0.type() == evals.type() )
1583 cv::transpose(evals, evals0);
1584 else
1585 cv::Mat(evals.t()).convertTo(evals0, evals0.type());
1586 CV_Assert( p == evals0.ptr() );
1587 }
1588 }
1589
1590
1591 CV_IMPL void
cvSVD(CvArr * aarr,CvArr * warr,CvArr * uarr,CvArr * varr,int flags)1592 cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
1593 {
1594 cv::Mat a = cv::cvarrToMat(aarr), w = cv::cvarrToMat(warr), u, v;
1595 int m = a.rows, n = a.cols, type = a.type(), mn = std::max(m, n), nm = std::min(m, n);
1596
1597 CV_Assert( w.type() == type &&
1598 (w.size() == cv::Size(nm,1) || w.size() == cv::Size(1, nm) ||
1599 w.size() == cv::Size(nm, nm) || w.size() == cv::Size(n, m)) );
1600
1601 cv::SVD svd;
1602
1603 if( w.size() == cv::Size(nm, 1) )
1604 svd.w = cv::Mat(nm, 1, type, w.ptr() );
1605 else if( w.isContinuous() )
1606 svd.w = w;
1607
1608 if( uarr )
1609 {
1610 u = cv::cvarrToMat(uarr);
1611 CV_Assert( u.type() == type );
1612 svd.u = u;
1613 }
1614
1615 if( varr )
1616 {
1617 v = cv::cvarrToMat(varr);
1618 CV_Assert( v.type() == type );
1619 svd.vt = v;
1620 }
1621
1622 svd(a, ((flags & CV_SVD_MODIFY_A) ? cv::SVD::MODIFY_A : 0) |
1623 ((!svd.u.data && !svd.vt.data) ? cv::SVD::NO_UV : 0) |
1624 ((m != n && (svd.u.size() == cv::Size(mn, mn) ||
1625 svd.vt.size() == cv::Size(mn, mn))) ? cv::SVD::FULL_UV : 0));
1626
1627 if( !u.empty() )
1628 {
1629 if( flags & CV_SVD_U_T )
1630 cv::transpose( svd.u, u );
1631 else if( u.data != svd.u.data )
1632 {
1633 CV_Assert( u.size() == svd.u.size() );
1634 svd.u.copyTo(u);
1635 }
1636 }
1637
1638 if( !v.empty() )
1639 {
1640 if( !(flags & CV_SVD_V_T) )
1641 cv::transpose( svd.vt, v );
1642 else if( v.data != svd.vt.data )
1643 {
1644 CV_Assert( v.size() == svd.vt.size() );
1645 svd.vt.copyTo(v);
1646 }
1647 }
1648
1649 if( w.data != svd.w.data )
1650 {
1651 if( w.size() == svd.w.size() )
1652 svd.w.copyTo(w);
1653 else
1654 {
1655 w = cv::Scalar(0);
1656 cv::Mat wd = w.diag();
1657 svd.w.copyTo(wd);
1658 }
1659 }
1660 }
1661
1662
1663 CV_IMPL void
cvSVBkSb(const CvArr * warr,const CvArr * uarr,const CvArr * varr,const CvArr * rhsarr,CvArr * dstarr,int flags)1664 cvSVBkSb( const CvArr* warr, const CvArr* uarr,
1665 const CvArr* varr, const CvArr* rhsarr,
1666 CvArr* dstarr, int flags )
1667 {
1668 cv::Mat w = cv::cvarrToMat(warr), u = cv::cvarrToMat(uarr),
1669 v = cv::cvarrToMat(varr), rhs,
1670 dst = cv::cvarrToMat(dstarr), dst0 = dst;
1671 if( flags & CV_SVD_U_T )
1672 {
1673 cv::Mat tmp;
1674 transpose(u, tmp);
1675 u = tmp;
1676 }
1677 if( !(flags & CV_SVD_V_T) )
1678 {
1679 cv::Mat tmp;
1680 transpose(v, tmp);
1681 v = tmp;
1682 }
1683 if( rhsarr )
1684 rhs = cv::cvarrToMat(rhsarr);
1685
1686 cv::SVD::backSubst(w, u, v, rhs, dst);
1687 CV_Assert( dst.data == dst0.data );
1688 }
1689