1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 #include <limits> 45 46 #if defined _M_IX86 && defined _MSC_VER && _MSC_VER < 1700 47 #pragma float_control(precise, on) 48 #endif 49 50 namespace cv 51 { 52 LU(float * A,size_t astep,int m,float * b,size_t bstep,int n)53 int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n) 54 { 55 return hal::LU(A, astep, m, b, bstep, n); 56 } 57 LU(double * A,size_t astep,int m,double * b,size_t bstep,int n)58 int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n) 59 { 60 return hal::LU(A, astep, m, b, bstep, n); 61 } 62 Cholesky(float * A,size_t astep,int m,float * b,size_t bstep,int n)63 bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n) 64 { 65 return hal::Cholesky(A, astep, m, b, bstep, n); 66 } 67 Cholesky(double * A,size_t astep,int m,double * b,size_t bstep,int n)68 bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n) 69 { 70 return hal::Cholesky(A, astep, m, b, bstep, n); 71 } 72 hypot(_Tp a,_Tp b)73 template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b) 74 { 75 a = std::abs(a); 76 b = std::abs(b); 77 if( a > b ) 78 { 79 b /= a; 80 return a*std::sqrt(1 + b*b); 81 } 82 if( b > 0 ) 83 { 84 a /= b; 85 return b*std::sqrt(1 + a*a); 86 } 87 return 0; 88 } 89 90 91 template<typename _Tp> bool JacobiImpl_(_Tp * A,size_t astep,_Tp * W,_Tp * V,size_t vstep,int n,uchar * buf)92 JacobiImpl_( _Tp* A, size_t astep, _Tp* W, _Tp* V, size_t vstep, int n, uchar* buf ) 93 { 94 const _Tp eps = std::numeric_limits<_Tp>::epsilon(); 95 int i, j, k, m; 96 97 astep /= sizeof(A[0]); 98 if( V ) 99 { 100 vstep /= sizeof(V[0]); 101 for( i = 0; i < n; i++ ) 102 { 103 for( j = 0; j < n; j++ ) 104 V[i*vstep + j] = (_Tp)0; 105 V[i*vstep + i] = (_Tp)1; 106 } 107 } 108 109 int iters, maxIters = n*n*30; 110 111 int* indR = (int*)alignPtr(buf, sizeof(int)); 112 int* indC = indR + n; 113 _Tp mv = (_Tp)0; 114 115 for( k = 0; k < n; k++ ) 116 { 117 W[k] = A[(astep + 1)*k]; 118 if( k < n - 1 ) 119 { 120 for( m = k+1, mv = std::abs(A[astep*k + m]), i = k+2; i < n; i++ ) 121 { 122 _Tp val = std::abs(A[astep*k+i]); 123 if( mv < val ) 124 mv = val, m = i; 125 } 126 indR[k] = m; 127 } 128 if( k > 0 ) 129 { 130 for( m = 0, mv = std::abs(A[k]), i = 1; i < k; i++ ) 131 { 132 _Tp val = std::abs(A[astep*i+k]); 133 if( mv < val ) 134 mv = val, m = i; 135 } 136 indC[k] = m; 137 } 138 } 139 140 if( n > 1 ) for( iters = 0; iters < maxIters; iters++ ) 141 { 142 // find index (k,l) of pivot p 143 for( k = 0, mv = std::abs(A[indR[0]]), i = 1; i < n-1; i++ ) 144 { 145 _Tp val = std::abs(A[astep*i + indR[i]]); 146 if( mv < val ) 147 mv = val, k = i; 148 } 149 int l = indR[k]; 150 for( i = 1; i < n; i++ ) 151 { 152 _Tp val = std::abs(A[astep*indC[i] + i]); 153 if( mv < val ) 154 mv = val, k = indC[i], l = i; 155 } 156 157 _Tp p = A[astep*k + l]; 158 if( std::abs(p) <= eps ) 159 break; 160 _Tp y = (_Tp)((W[l] - W[k])*0.5); 161 _Tp t = std::abs(y) + hypot(p, y); 162 _Tp s = hypot(p, t); 163 _Tp c = t/s; 164 s = p/s; t = (p/t)*p; 165 if( y < 0 ) 166 s = -s, t = -t; 167 A[astep*k + l] = 0; 168 169 W[k] -= t; 170 W[l] += t; 171 172 _Tp a0, b0; 173 174 #undef rotate 175 #define rotate(v0, v1) a0 = v0, b0 = v1, v0 = a0*c - b0*s, v1 = a0*s + b0*c 176 177 // rotate rows and columns k and l 178 for( i = 0; i < k; i++ ) 179 rotate(A[astep*i+k], A[astep*i+l]); 180 for( i = k+1; i < l; i++ ) 181 rotate(A[astep*k+i], A[astep*i+l]); 182 for( i = l+1; i < n; i++ ) 183 rotate(A[astep*k+i], A[astep*l+i]); 184 185 // rotate eigenvectors 186 if( V ) 187 for( i = 0; i < n; i++ ) 188 rotate(V[vstep*k+i], V[vstep*l+i]); 189 190 #undef rotate 191 192 for( j = 0; j < 2; j++ ) 193 { 194 int idx = j == 0 ? k : l; 195 if( idx < n - 1 ) 196 { 197 for( m = idx+1, mv = std::abs(A[astep*idx + m]), i = idx+2; i < n; i++ ) 198 { 199 _Tp val = std::abs(A[astep*idx+i]); 200 if( mv < val ) 201 mv = val, m = i; 202 } 203 indR[idx] = m; 204 } 205 if( idx > 0 ) 206 { 207 for( m = 0, mv = std::abs(A[idx]), i = 1; i < idx; i++ ) 208 { 209 _Tp val = std::abs(A[astep*i+idx]); 210 if( mv < val ) 211 mv = val, m = i; 212 } 213 indC[idx] = m; 214 } 215 } 216 } 217 218 // sort eigenvalues & eigenvectors 219 for( k = 0; k < n-1; k++ ) 220 { 221 m = k; 222 for( i = k+1; i < n; i++ ) 223 { 224 if( W[m] < W[i] ) 225 m = i; 226 } 227 if( k != m ) 228 { 229 std::swap(W[m], W[k]); 230 if( V ) 231 for( i = 0; i < n; i++ ) 232 std::swap(V[vstep*m + i], V[vstep*k + i]); 233 } 234 } 235 236 return true; 237 } 238 Jacobi(float * S,size_t sstep,float * e,float * E,size_t estep,int n,uchar * buf)239 static bool Jacobi( float* S, size_t sstep, float* e, float* E, size_t estep, int n, uchar* buf ) 240 { 241 return JacobiImpl_(S, sstep, e, E, estep, n, buf); 242 } 243 Jacobi(double * S,size_t sstep,double * e,double * E,size_t estep,int n,uchar * buf)244 static bool Jacobi( double* S, size_t sstep, double* e, double* E, size_t estep, int n, uchar* buf ) 245 { 246 return JacobiImpl_(S, sstep, e, E, estep, n, buf); 247 } 248 249 250 template<typename T> struct VBLAS 251 { dotcv::VBLAS252 int dot(const T*, const T*, int, T*) const { return 0; } givenscv::VBLAS253 int givens(T*, T*, int, T, T) const { return 0; } givensxcv::VBLAS254 int givensx(T*, T*, int, T, T, T*, T*) const { return 0; } 255 }; 256 257 #if CV_SSE2 dot(const float * a,const float * b,int n,float * result) const258 template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const 259 { 260 if( n < 8 ) 261 return 0; 262 int k = 0; 263 __m128 s0 = _mm_setzero_ps(), s1 = _mm_setzero_ps(); 264 for( ; k <= n - 8; k += 8 ) 265 { 266 __m128 a0 = _mm_load_ps(a + k), a1 = _mm_load_ps(a + k + 4); 267 __m128 b0 = _mm_load_ps(b + k), b1 = _mm_load_ps(b + k + 4); 268 269 s0 = _mm_add_ps(s0, _mm_mul_ps(a0, b0)); 270 s1 = _mm_add_ps(s1, _mm_mul_ps(a1, b1)); 271 } 272 s0 = _mm_add_ps(s0, s1); 273 float sbuf[4]; 274 _mm_storeu_ps(sbuf, s0); 275 *result = sbuf[0] + sbuf[1] + sbuf[2] + sbuf[3]; 276 return k; 277 } 278 279 givens(float * a,float * b,int n,float c,float s) const280 template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const 281 { 282 if( n < 4 ) 283 return 0; 284 int k = 0; 285 __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s); 286 for( ; k <= n - 4; k += 4 ) 287 { 288 __m128 a0 = _mm_load_ps(a + k); 289 __m128 b0 = _mm_load_ps(b + k); 290 __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4)); 291 __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4)); 292 _mm_store_ps(a + k, t0); 293 _mm_store_ps(b + k, t1); 294 } 295 return k; 296 } 297 298 givensx(float * a,float * b,int n,float c,float s,float * anorm,float * bnorm) const299 template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s, 300 float* anorm, float* bnorm) const 301 { 302 if( n < 4 ) 303 return 0; 304 int k = 0; 305 __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s); 306 __m128 sa = _mm_setzero_ps(), sb = _mm_setzero_ps(); 307 for( ; k <= n - 4; k += 4 ) 308 { 309 __m128 a0 = _mm_load_ps(a + k); 310 __m128 b0 = _mm_load_ps(b + k); 311 __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4)); 312 __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4)); 313 _mm_store_ps(a + k, t0); 314 _mm_store_ps(b + k, t1); 315 sa = _mm_add_ps(sa, _mm_mul_ps(t0, t0)); 316 sb = _mm_add_ps(sb, _mm_mul_ps(t1, t1)); 317 } 318 float abuf[4], bbuf[4]; 319 _mm_storeu_ps(abuf, sa); 320 _mm_storeu_ps(bbuf, sb); 321 *anorm = abuf[0] + abuf[1] + abuf[2] + abuf[3]; 322 *bnorm = bbuf[0] + bbuf[1] + bbuf[2] + bbuf[3]; 323 return k; 324 } 325 326 dot(const double * a,const double * b,int n,double * result) const327 template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const 328 { 329 if( n < 4 ) 330 return 0; 331 int k = 0; 332 __m128d s0 = _mm_setzero_pd(), s1 = _mm_setzero_pd(); 333 for( ; k <= n - 4; k += 4 ) 334 { 335 __m128d a0 = _mm_load_pd(a + k), a1 = _mm_load_pd(a + k + 2); 336 __m128d b0 = _mm_load_pd(b + k), b1 = _mm_load_pd(b + k + 2); 337 338 s0 = _mm_add_pd(s0, _mm_mul_pd(a0, b0)); 339 s1 = _mm_add_pd(s1, _mm_mul_pd(a1, b1)); 340 } 341 s0 = _mm_add_pd(s0, s1); 342 double sbuf[2]; 343 _mm_storeu_pd(sbuf, s0); 344 *result = sbuf[0] + sbuf[1]; 345 return k; 346 } 347 348 givens(double * a,double * b,int n,double c,double s) const349 template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const 350 { 351 int k = 0; 352 __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s); 353 for( ; k <= n - 2; k += 2 ) 354 { 355 __m128d a0 = _mm_load_pd(a + k); 356 __m128d b0 = _mm_load_pd(b + k); 357 __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2)); 358 __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2)); 359 _mm_store_pd(a + k, t0); 360 _mm_store_pd(b + k, t1); 361 } 362 return k; 363 } 364 365 givensx(double * a,double * b,int n,double c,double s,double * anorm,double * bnorm) const366 template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double c, double s, 367 double* anorm, double* bnorm) const 368 { 369 int k = 0; 370 __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s); 371 __m128d sa = _mm_setzero_pd(), sb = _mm_setzero_pd(); 372 for( ; k <= n - 2; k += 2 ) 373 { 374 __m128d a0 = _mm_load_pd(a + k); 375 __m128d b0 = _mm_load_pd(b + k); 376 __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2)); 377 __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2)); 378 _mm_store_pd(a + k, t0); 379 _mm_store_pd(b + k, t1); 380 sa = _mm_add_pd(sa, _mm_mul_pd(t0, t0)); 381 sb = _mm_add_pd(sb, _mm_mul_pd(t1, t1)); 382 } 383 double abuf[2], bbuf[2]; 384 _mm_storeu_pd(abuf, sa); 385 _mm_storeu_pd(bbuf, sb); 386 *anorm = abuf[0] + abuf[1]; 387 *bnorm = bbuf[0] + bbuf[1]; 388 return k; 389 } 390 #endif 391 392 template<typename _Tp> void JacobiSVDImpl_(_Tp * At,size_t astep,_Tp * _W,_Tp * Vt,size_t vstep,int m,int n,int n1,double minval,_Tp eps)393 JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, 394 int m, int n, int n1, double minval, _Tp eps) 395 { 396 VBLAS<_Tp> vblas; 397 AutoBuffer<double> Wbuf(n); 398 double* W = Wbuf; 399 int i, j, k, iter, max_iter = std::max(m, 30); 400 _Tp c, s; 401 double sd; 402 astep /= sizeof(At[0]); 403 vstep /= sizeof(Vt[0]); 404 405 for( i = 0; i < n; i++ ) 406 { 407 for( k = 0, sd = 0; k < m; k++ ) 408 { 409 _Tp t = At[i*astep + k]; 410 sd += (double)t*t; 411 } 412 W[i] = sd; 413 414 if( Vt ) 415 { 416 for( k = 0; k < n; k++ ) 417 Vt[i*vstep + k] = 0; 418 Vt[i*vstep + i] = 1; 419 } 420 } 421 422 for( iter = 0; iter < max_iter; iter++ ) 423 { 424 bool changed = false; 425 426 for( i = 0; i < n-1; i++ ) 427 for( j = i+1; j < n; j++ ) 428 { 429 _Tp *Ai = At + i*astep, *Aj = At + j*astep; 430 double a = W[i], p = 0, b = W[j]; 431 432 for( k = 0; k < m; k++ ) 433 p += (double)Ai[k]*Aj[k]; 434 435 if( std::abs(p) <= eps*std::sqrt((double)a*b) ) 436 continue; 437 438 p *= 2; 439 double beta = a - b, gamma = hypot((double)p, beta); 440 if( beta < 0 ) 441 { 442 double delta = (gamma - beta)*0.5; 443 s = (_Tp)std::sqrt(delta/gamma); 444 c = (_Tp)(p/(gamma*s*2)); 445 } 446 else 447 { 448 c = (_Tp)std::sqrt((gamma + beta)/(gamma*2)); 449 s = (_Tp)(p/(gamma*c*2)); 450 } 451 452 a = b = 0; 453 for( k = 0; k < m; k++ ) 454 { 455 _Tp t0 = c*Ai[k] + s*Aj[k]; 456 _Tp t1 = -s*Ai[k] + c*Aj[k]; 457 Ai[k] = t0; Aj[k] = t1; 458 459 a += (double)t0*t0; b += (double)t1*t1; 460 } 461 W[i] = a; W[j] = b; 462 463 changed = true; 464 465 if( Vt ) 466 { 467 _Tp *Vi = Vt + i*vstep, *Vj = Vt + j*vstep; 468 k = vblas.givens(Vi, Vj, n, c, s); 469 470 for( ; k < n; k++ ) 471 { 472 _Tp t0 = c*Vi[k] + s*Vj[k]; 473 _Tp t1 = -s*Vi[k] + c*Vj[k]; 474 Vi[k] = t0; Vj[k] = t1; 475 } 476 } 477 } 478 if( !changed ) 479 break; 480 } 481 482 for( i = 0; i < n; i++ ) 483 { 484 for( k = 0, sd = 0; k < m; k++ ) 485 { 486 _Tp t = At[i*astep + k]; 487 sd += (double)t*t; 488 } 489 W[i] = std::sqrt(sd); 490 } 491 492 for( i = 0; i < n-1; i++ ) 493 { 494 j = i; 495 for( k = i+1; k < n; k++ ) 496 { 497 if( W[j] < W[k] ) 498 j = k; 499 } 500 if( i != j ) 501 { 502 std::swap(W[i], W[j]); 503 if( Vt ) 504 { 505 for( k = 0; k < m; k++ ) 506 std::swap(At[i*astep + k], At[j*astep + k]); 507 508 for( k = 0; k < n; k++ ) 509 std::swap(Vt[i*vstep + k], Vt[j*vstep + k]); 510 } 511 } 512 } 513 514 for( i = 0; i < n; i++ ) 515 _W[i] = (_Tp)W[i]; 516 517 if( !Vt ) 518 return; 519 520 RNG rng(0x12345678); 521 for( i = 0; i < n1; i++ ) 522 { 523 sd = i < n ? W[i] : 0; 524 525 for( int ii = 0; ii < 100 && sd <= minval; ii++ ) 526 { 527 // if we got a zero singular value, then in order to get the corresponding left singular vector 528 // we generate a random vector, project it to the previously computed left singular vectors, 529 // subtract the projection and normalize the difference. 530 const _Tp val0 = (_Tp)(1./m); 531 for( k = 0; k < m; k++ ) 532 { 533 _Tp val = (rng.next() & 256) != 0 ? val0 : -val0; 534 At[i*astep + k] = val; 535 } 536 for( iter = 0; iter < 2; iter++ ) 537 { 538 for( j = 0; j < i; j++ ) 539 { 540 sd = 0; 541 for( k = 0; k < m; k++ ) 542 sd += At[i*astep + k]*At[j*astep + k]; 543 _Tp asum = 0; 544 for( k = 0; k < m; k++ ) 545 { 546 _Tp t = (_Tp)(At[i*astep + k] - sd*At[j*astep + k]); 547 At[i*astep + k] = t; 548 asum += std::abs(t); 549 } 550 asum = asum > eps*100 ? 1/asum : 0; 551 for( k = 0; k < m; k++ ) 552 At[i*astep + k] *= asum; 553 } 554 } 555 sd = 0; 556 for( k = 0; k < m; k++ ) 557 { 558 _Tp t = At[i*astep + k]; 559 sd += (double)t*t; 560 } 561 sd = std::sqrt(sd); 562 } 563 564 s = (_Tp)(sd > minval ? 1/sd : 0.); 565 for( k = 0; k < m; k++ ) 566 At[i*astep + k] *= s; 567 } 568 } 569 570 JacobiSVD(float * At,size_t astep,float * W,float * Vt,size_t vstep,int m,int n,int n1=-1)571 static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1) 572 { 573 JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2); 574 } 575 JacobiSVD(double * At,size_t astep,double * W,double * Vt,size_t vstep,int m,int n,int n1=-1)576 static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1) 577 { 578 JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10); 579 } 580 581 /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */ 582 template<typename T1, typename T2, typename T3> static void MatrAXPY(int m,int n,const T1 * x,int dx,const T2 * a,int inca,T3 * y,int dy)583 MatrAXPY( int m, int n, const T1* x, int dx, 584 const T2* a, int inca, T3* y, int dy ) 585 { 586 int i; 587 for( i = 0; i < m; i++, x += dx, y += dy ) 588 { 589 T2 s = a[i*inca]; 590 int j = 0; 591 #if CV_ENABLE_UNROLLED 592 for(; j <= n - 4; j += 4 ) 593 { 594 T3 t0 = (T3)(y[j] + s*x[j]); 595 T3 t1 = (T3)(y[j+1] + s*x[j+1]); 596 y[j] = t0; 597 y[j+1] = t1; 598 t0 = (T3)(y[j+2] + s*x[j+2]); 599 t1 = (T3)(y[j+3] + s*x[j+3]); 600 y[j+2] = t0; 601 y[j+3] = t1; 602 } 603 #endif 604 for( ; j < n; j++ ) 605 y[j] = (T3)(y[j] + s*x[j]); 606 } 607 } 608 609 template<typename T> static void SVBkSbImpl_(int m,int n,const T * w,int incw,const T * u,int ldu,bool uT,const T * v,int ldv,bool vT,const T * b,int ldb,int nb,T * x,int ldx,double * buffer,T eps)610 SVBkSbImpl_( int m, int n, const T* w, int incw, 611 const T* u, int ldu, bool uT, 612 const T* v, int ldv, bool vT, 613 const T* b, int ldb, int nb, 614 T* x, int ldx, double* buffer, T eps ) 615 { 616 double threshold = 0; 617 int udelta0 = uT ? ldu : 1, udelta1 = uT ? 1 : ldu; 618 int vdelta0 = vT ? ldv : 1, vdelta1 = vT ? 1 : ldv; 619 int i, j, nm = std::min(m, n); 620 621 if( !b ) 622 nb = m; 623 624 for( i = 0; i < n; i++ ) 625 for( j = 0; j < nb; j++ ) 626 x[i*ldx + j] = 0; 627 628 for( i = 0; i < nm; i++ ) 629 threshold += w[i*incw]; 630 threshold *= eps; 631 632 // v * inv(w) * uT * b 633 for( i = 0; i < nm; i++, u += udelta0, v += vdelta0 ) 634 { 635 double wi = w[i*incw]; 636 if( (double)std::abs(wi) <= threshold ) 637 continue; 638 wi = 1/wi; 639 640 if( nb == 1 ) 641 { 642 double s = 0; 643 if( b ) 644 for( j = 0; j < m; j++ ) 645 s += u[j*udelta1]*b[j*ldb]; 646 else 647 s = u[0]; 648 s *= wi; 649 650 for( j = 0; j < n; j++ ) 651 x[j*ldx] = (T)(x[j*ldx] + s*v[j*vdelta1]); 652 } 653 else 654 { 655 if( b ) 656 { 657 for( j = 0; j < nb; j++ ) 658 buffer[j] = 0; 659 MatrAXPY( m, nb, b, ldb, u, udelta1, buffer, 0 ); 660 for( j = 0; j < nb; j++ ) 661 buffer[j] *= wi; 662 } 663 else 664 { 665 for( j = 0; j < nb; j++ ) 666 buffer[j] = u[j*udelta1]*wi; 667 } 668 MatrAXPY( n, nb, buffer, 0, v, vdelta1, x, ldx ); 669 } 670 } 671 } 672 673 static void SVBkSb(int m,int n,const float * w,size_t wstep,const float * u,size_t ustep,bool uT,const float * v,size_t vstep,bool vT,const float * b,size_t bstep,int nb,float * x,size_t xstep,uchar * buffer)674 SVBkSb( int m, int n, const float* w, size_t wstep, 675 const float* u, size_t ustep, bool uT, 676 const float* v, size_t vstep, bool vT, 677 const float* b, size_t bstep, int nb, 678 float* x, size_t xstep, uchar* buffer ) 679 { 680 SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1, 681 u, (int)(ustep/sizeof(u[0])), uT, 682 v, (int)(vstep/sizeof(v[0])), vT, 683 b, (int)(bstep/sizeof(b[0])), nb, 684 x, (int)(xstep/sizeof(x[0])), 685 (double*)alignPtr(buffer, sizeof(double)), (float)(DBL_EPSILON*2) ); 686 } 687 688 static void SVBkSb(int m,int n,const double * w,size_t wstep,const double * u,size_t ustep,bool uT,const double * v,size_t vstep,bool vT,const double * b,size_t bstep,int nb,double * x,size_t xstep,uchar * buffer)689 SVBkSb( int m, int n, const double* w, size_t wstep, 690 const double* u, size_t ustep, bool uT, 691 const double* v, size_t vstep, bool vT, 692 const double* b, size_t bstep, int nb, 693 double* x, size_t xstep, uchar* buffer ) 694 { 695 SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1, 696 u, (int)(ustep/sizeof(u[0])), uT, 697 v, (int)(vstep/sizeof(v[0])), vT, 698 b, (int)(bstep/sizeof(b[0])), nb, 699 x, (int)(xstep/sizeof(x[0])), 700 (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 ); 701 } 702 703 } 704 705 /****************************************************************************************\ 706 * Determinant of the matrix * 707 \****************************************************************************************/ 708 709 #define det2(m) ((double)m(0,0)*m(1,1) - (double)m(0,1)*m(1,0)) 710 #define det3(m) (m(0,0)*((double)m(1,1)*m(2,2) - (double)m(1,2)*m(2,1)) - \ 711 m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \ 712 m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0))) 713 determinant(InputArray _mat)714 double cv::determinant( InputArray _mat ) 715 { 716 Mat mat = _mat.getMat(); 717 double result = 0; 718 int type = mat.type(), rows = mat.rows; 719 size_t step = mat.step; 720 const uchar* m = mat.ptr(); 721 722 CV_Assert( !mat.empty() ); 723 CV_Assert( mat.rows == mat.cols && (type == CV_32F || type == CV_64F)); 724 725 #define Mf(y, x) ((float*)(m + y*step))[x] 726 #define Md(y, x) ((double*)(m + y*step))[x] 727 728 if( type == CV_32F ) 729 { 730 if( rows == 2 ) 731 result = det2(Mf); 732 else if( rows == 3 ) 733 result = det3(Mf); 734 else if( rows == 1 ) 735 result = Mf(0,0); 736 else 737 { 738 size_t bufSize = rows*rows*sizeof(float); 739 AutoBuffer<uchar> buffer(bufSize); 740 Mat a(rows, rows, CV_32F, (uchar*)buffer); 741 mat.copyTo(a); 742 743 result = hal::LU(a.ptr<float>(), a.step, rows, 0, 0, 0); 744 if( result ) 745 { 746 for( int i = 0; i < rows; i++ ) 747 result *= a.at<float>(i,i); 748 result = 1./result; 749 } 750 } 751 } 752 else 753 { 754 if( rows == 2 ) 755 result = det2(Md); 756 else if( rows == 3 ) 757 result = det3(Md); 758 else if( rows == 1 ) 759 result = Md(0,0); 760 else 761 { 762 size_t bufSize = rows*rows*sizeof(double); 763 AutoBuffer<uchar> buffer(bufSize); 764 Mat a(rows, rows, CV_64F, (uchar*)buffer); 765 mat.copyTo(a); 766 767 result = hal::LU(a.ptr<double>(), a.step, rows, 0, 0, 0); 768 if( result ) 769 { 770 for( int i = 0; i < rows; i++ ) 771 result *= a.at<double>(i,i); 772 result = 1./result; 773 } 774 } 775 } 776 777 #undef Mf 778 #undef Md 779 780 return result; 781 } 782 783 /****************************************************************************************\ 784 * Inverse (or pseudo-inverse) of a matrix * 785 \****************************************************************************************/ 786 787 #define Sf( y, x ) ((float*)(srcdata + y*srcstep))[x] 788 #define Sd( y, x ) ((double*)(srcdata + y*srcstep))[x] 789 #define Df( y, x ) ((float*)(dstdata + y*dststep))[x] 790 #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x] 791 invert(InputArray _src,OutputArray _dst,int method)792 double cv::invert( InputArray _src, OutputArray _dst, int method ) 793 { 794 bool result = false; 795 Mat src = _src.getMat(); 796 int type = src.type(); 797 798 CV_Assert(type == CV_32F || type == CV_64F); 799 800 size_t esz = CV_ELEM_SIZE(type); 801 int m = src.rows, n = src.cols; 802 803 if( method == DECOMP_SVD ) 804 { 805 int nm = std::min(m, n); 806 807 AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double)); 808 uchar* buf = alignPtr((uchar*)_buf, (int)esz); 809 Mat u(m, nm, type, buf); 810 Mat w(nm, 1, type, u.ptr() + m*nm*esz); 811 Mat vt(nm, n, type, w.ptr() + nm*esz); 812 813 SVD::compute(src, w, u, vt); 814 SVD::backSubst(w, u, vt, Mat(), _dst); 815 return type == CV_32F ? 816 (w.ptr<float>()[0] >= FLT_EPSILON ? 817 w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) : 818 (w.ptr<double>()[0] >= DBL_EPSILON ? 819 w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0); 820 } 821 822 CV_Assert( m == n ); 823 824 if( method == DECOMP_EIG ) 825 { 826 AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double)); 827 uchar* buf = alignPtr((uchar*)_buf, (int)esz); 828 Mat u(n, n, type, buf); 829 Mat w(n, 1, type, u.ptr() + n*n*esz); 830 Mat vt(n, n, type, w.ptr() + n*esz); 831 832 eigen(src, w, vt); 833 transpose(vt, u); 834 SVD::backSubst(w, u, vt, Mat(), _dst); 835 return type == CV_32F ? 836 (w.ptr<float>()[0] >= FLT_EPSILON ? 837 w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) : 838 (w.ptr<double>()[0] >= DBL_EPSILON ? 839 w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0); 840 } 841 842 CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY ); 843 844 _dst.create( n, n, type ); 845 Mat dst = _dst.getMat(); 846 847 if( n <= 3 ) 848 { 849 const uchar* srcdata = src.ptr(); 850 uchar* dstdata = dst.ptr(); 851 size_t srcstep = src.step; 852 size_t dststep = dst.step; 853 854 if( n == 2 ) 855 { 856 if( type == CV_32FC1 ) 857 { 858 double d = det2(Sf); 859 if( d != 0. ) 860 { 861 result = true; 862 d = 1./d; 863 864 #if CV_SSE2 865 if(USE_SSE2) 866 { 867 __m128 zero = _mm_setzero_ps(); 868 __m128 t0 = _mm_loadl_pi(zero, (const __m64*)srcdata); //t0 = sf(0,0) sf(0,1) 869 __m128 t1 = _mm_loadh_pi(zero, (const __m64*)(srcdata+srcstep)); //t1 = sf(1,0) sf(1,1) 870 __m128 s0 = _mm_or_ps(t0, t1); 871 __m128 det =_mm_set1_ps((float)d); 872 s0 = _mm_mul_ps(s0, det); 873 static const uchar CV_DECL_ALIGNED(16) inv[16] = {0,0,0,0,0,0,0,0x80,0,0,0,0x80,0,0,0,0}; 874 __m128 pattern = _mm_load_ps((const float*)inv); 875 s0 = _mm_xor_ps(s0, pattern);//==-1*s0 876 s0 = _mm_shuffle_ps(s0, s0, _MM_SHUFFLE(0,2,1,3)); 877 _mm_storel_pi((__m64*)dstdata, s0); 878 _mm_storeh_pi((__m64*)((float*)(dstdata+dststep)), s0); 879 } 880 else 881 #endif 882 { 883 double t0, t1; 884 t0 = Sf(0,0)*d; 885 t1 = Sf(1,1)*d; 886 Df(1,1) = (float)t0; 887 Df(0,0) = (float)t1; 888 t0 = -Sf(0,1)*d; 889 t1 = -Sf(1,0)*d; 890 Df(0,1) = (float)t0; 891 Df(1,0) = (float)t1; 892 } 893 894 } 895 } 896 else 897 { 898 double d = det2(Sd); 899 if( d != 0. ) 900 { 901 result = true; 902 d = 1./d; 903 #if CV_SSE2 904 if(USE_SSE2) 905 { 906 __m128d s0 = _mm_loadu_pd((const double*)srcdata); //s0 = sf(0,0) sf(0,1) 907 __m128d s1 = _mm_loadu_pd ((const double*)(srcdata+srcstep));//s1 = sf(1,0) sf(1,1) 908 __m128d sm = _mm_unpacklo_pd(s0, _mm_load_sd((const double*)(srcdata+srcstep)+1)); //sm = sf(0,0) sf(1,1) - main diagonal 909 __m128d ss = _mm_shuffle_pd(s0, s1, _MM_SHUFFLE2(0,1)); //ss = sf(0,1) sf(1,0) - secondary diagonal 910 __m128d det = _mm_load1_pd((const double*)&d); 911 sm = _mm_mul_pd(sm, det); 912 913 static const uchar CV_DECL_ALIGNED(16) inv[8] = {0,0,0,0,0,0,0,0x80}; 914 __m128d pattern = _mm_load1_pd((double*)inv); 915 ss = _mm_mul_pd(ss, det); 916 ss = _mm_xor_pd(ss, pattern);//==-1*ss 917 918 s0 = _mm_shuffle_pd(sm, ss, _MM_SHUFFLE2(0,1)); 919 s1 = _mm_shuffle_pd(ss, sm, _MM_SHUFFLE2(0,1)); 920 _mm_storeu_pd((double*)dstdata, s0); 921 _mm_storeu_pd((double*)(dstdata+dststep), s1); 922 } 923 else 924 #endif 925 { 926 double t0, t1; 927 t0 = Sd(0,0)*d; 928 t1 = Sd(1,1)*d; 929 Dd(1,1) = t0; 930 Dd(0,0) = t1; 931 t0 = -Sd(0,1)*d; 932 t1 = -Sd(1,0)*d; 933 Dd(0,1) = t0; 934 Dd(1,0) = t1; 935 } 936 } 937 } 938 } 939 else if( n == 3 ) 940 { 941 if( type == CV_32FC1 ) 942 { 943 double d = det3(Sf); 944 945 if( d != 0. ) 946 { 947 double t[12]; 948 949 result = true; 950 d = 1./d; 951 t[0] = (((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d); 952 t[1] = (((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d); 953 t[2] = (((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d); 954 955 t[3] = (((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d); 956 t[4] = (((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d); 957 t[5] = (((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d); 958 959 t[6] = (((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d); 960 t[7] = (((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d); 961 t[8] = (((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d); 962 963 Df(0,0) = (float)t[0]; Df(0,1) = (float)t[1]; Df(0,2) = (float)t[2]; 964 Df(1,0) = (float)t[3]; Df(1,1) = (float)t[4]; Df(1,2) = (float)t[5]; 965 Df(2,0) = (float)t[6]; Df(2,1) = (float)t[7]; Df(2,2) = (float)t[8]; 966 } 967 } 968 else 969 { 970 double d = det3(Sd); 971 if( d != 0. ) 972 { 973 result = true; 974 d = 1./d; 975 double t[9]; 976 977 t[0] = (Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1)) * d; 978 t[1] = (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2)) * d; 979 t[2] = (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1)) * d; 980 981 t[3] = (Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2)) * d; 982 t[4] = (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0)) * d; 983 t[5] = (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2)) * d; 984 985 t[6] = (Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0)) * d; 986 t[7] = (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1)) * d; 987 t[8] = (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0)) * d; 988 989 Dd(0,0) = t[0]; Dd(0,1) = t[1]; Dd(0,2) = t[2]; 990 Dd(1,0) = t[3]; Dd(1,1) = t[4]; Dd(1,2) = t[5]; 991 Dd(2,0) = t[6]; Dd(2,1) = t[7]; Dd(2,2) = t[8]; 992 } 993 } 994 } 995 else 996 { 997 assert( n == 1 ); 998 999 if( type == CV_32FC1 ) 1000 { 1001 double d = Sf(0,0); 1002 if( d != 0. ) 1003 { 1004 result = true; 1005 Df(0,0) = (float)(1./d); 1006 } 1007 } 1008 else 1009 { 1010 double d = Sd(0,0); 1011 if( d != 0. ) 1012 { 1013 result = true; 1014 Dd(0,0) = 1./d; 1015 } 1016 } 1017 } 1018 if( !result ) 1019 dst = Scalar(0); 1020 return result; 1021 } 1022 1023 int elem_size = CV_ELEM_SIZE(type); 1024 AutoBuffer<uchar> buf(n*n*elem_size); 1025 Mat src1(n, n, type, (uchar*)buf); 1026 src.copyTo(src1); 1027 setIdentity(dst); 1028 1029 if( method == DECOMP_LU && type == CV_32F ) 1030 result = hal::LU(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0; 1031 else if( method == DECOMP_LU && type == CV_64F ) 1032 result = hal::LU(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0; 1033 else if( method == DECOMP_CHOLESKY && type == CV_32F ) 1034 result = hal::Cholesky(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n); 1035 else 1036 result = hal::Cholesky(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n); 1037 1038 if( !result ) 1039 dst = Scalar(0); 1040 1041 return result; 1042 } 1043 1044 1045 1046 /****************************************************************************************\ 1047 * Solving a linear system * 1048 \****************************************************************************************/ 1049 solve(InputArray _src,InputArray _src2arg,OutputArray _dst,int method)1050 bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method ) 1051 { 1052 bool result = true; 1053 Mat src = _src.getMat(), _src2 = _src2arg.getMat(); 1054 int type = src.type(); 1055 bool is_normal = (method & DECOMP_NORMAL) != 0; 1056 1057 CV_Assert( type == _src2.type() && (type == CV_32F || type == CV_64F) ); 1058 1059 method &= ~DECOMP_NORMAL; 1060 CV_Assert( (method != DECOMP_LU && method != DECOMP_CHOLESKY) || 1061 is_normal || src.rows == src.cols ); 1062 1063 // check case of a single equation and small matrix 1064 if( (method == DECOMP_LU || method == DECOMP_CHOLESKY) && !is_normal && 1065 src.rows <= 3 && src.rows == src.cols && _src2.cols == 1 ) 1066 { 1067 _dst.create( src.cols, _src2.cols, src.type() ); 1068 Mat dst = _dst.getMat(); 1069 1070 #define bf(y) ((float*)(bdata + y*src2step))[0] 1071 #define bd(y) ((double*)(bdata + y*src2step))[0] 1072 1073 const uchar* srcdata = src.ptr(); 1074 const uchar* bdata = _src2.ptr(); 1075 uchar* dstdata = dst.ptr(); 1076 size_t srcstep = src.step; 1077 size_t src2step = _src2.step; 1078 size_t dststep = dst.step; 1079 1080 if( src.rows == 2 ) 1081 { 1082 if( type == CV_32FC1 ) 1083 { 1084 double d = det2(Sf); 1085 if( d != 0. ) 1086 { 1087 double t; 1088 d = 1./d; 1089 t = (float)(((double)bf(0)*Sf(1,1) - (double)bf(1)*Sf(0,1))*d); 1090 Df(1,0) = (float)(((double)bf(1)*Sf(0,0) - (double)bf(0)*Sf(1,0))*d); 1091 Df(0,0) = (float)t; 1092 } 1093 else 1094 result = false; 1095 } 1096 else 1097 { 1098 double d = det2(Sd); 1099 if( d != 0. ) 1100 { 1101 double t; 1102 d = 1./d; 1103 t = (bd(0)*Sd(1,1) - bd(1)*Sd(0,1))*d; 1104 Dd(1,0) = (bd(1)*Sd(0,0) - bd(0)*Sd(1,0))*d; 1105 Dd(0,0) = t; 1106 } 1107 else 1108 result = false; 1109 } 1110 } 1111 else if( src.rows == 3 ) 1112 { 1113 if( type == CV_32FC1 ) 1114 { 1115 double d = det3(Sf); 1116 if( d != 0. ) 1117 { 1118 float t[3]; 1119 d = 1./d; 1120 1121 t[0] = (float)(d* 1122 (bf(0)*((double)Sf(1,1)*Sf(2,2) - (double)Sf(1,2)*Sf(2,1)) - 1123 Sf(0,1)*((double)bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) + 1124 Sf(0,2)*((double)bf(1)*Sf(2,1) - (double)Sf(1,1)*bf(2)))); 1125 1126 t[1] = (float)(d* 1127 (Sf(0,0)*(double)(bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) - 1128 bf(0)*((double)Sf(1,0)*Sf(2,2) - (double)Sf(1,2)*Sf(2,0)) + 1129 Sf(0,2)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)))); 1130 1131 t[2] = (float)(d* 1132 (Sf(0,0)*((double)Sf(1,1)*bf(2) - (double)bf(1)*Sf(2,1)) - 1133 Sf(0,1)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)) + 1134 bf(0)*((double)Sf(1,0)*Sf(2,1) - (double)Sf(1,1)*Sf(2,0)))); 1135 1136 Df(0,0) = t[0]; 1137 Df(1,0) = t[1]; 1138 Df(2,0) = t[2]; 1139 } 1140 else 1141 result = false; 1142 } 1143 else 1144 { 1145 double d = det3(Sd); 1146 if( d != 0. ) 1147 { 1148 double t[9]; 1149 1150 d = 1./d; 1151 1152 t[0] = ((Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1))*bd(0) + 1153 (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2))*bd(1) + 1154 (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1))*bd(2))*d; 1155 1156 t[1] = ((Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2))*bd(0) + 1157 (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0))*bd(1) + 1158 (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2))*bd(2))*d; 1159 1160 t[2] = ((Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0))*bd(0) + 1161 (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1))*bd(1) + 1162 (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0))*bd(2))*d; 1163 1164 Dd(0,0) = t[0]; 1165 Dd(1,0) = t[1]; 1166 Dd(2,0) = t[2]; 1167 } 1168 else 1169 result = false; 1170 } 1171 } 1172 else 1173 { 1174 assert( src.rows == 1 ); 1175 1176 if( type == CV_32FC1 ) 1177 { 1178 double d = Sf(0,0); 1179 if( d != 0. ) 1180 Df(0,0) = (float)(bf(0)/d); 1181 else 1182 result = false; 1183 } 1184 else 1185 { 1186 double d = Sd(0,0); 1187 if( d != 0. ) 1188 Dd(0,0) = (bd(0)/d); 1189 else 1190 result = false; 1191 } 1192 } 1193 return result; 1194 } 1195 1196 if( method == DECOMP_QR ) 1197 method = DECOMP_SVD; 1198 1199 int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols; 1200 size_t esz = CV_ELEM_SIZE(type), bufsize = 0; 1201 size_t vstep = alignSize(n*esz, 16); 1202 size_t astep = method == DECOMP_SVD && !is_normal ? alignSize(m*esz, 16) : vstep; 1203 AutoBuffer<uchar> buffer; 1204 1205 Mat src2 = _src2; 1206 _dst.create( src.cols, src2.cols, src.type() ); 1207 Mat dst = _dst.getMat(); 1208 1209 if( m < n ) 1210 CV_Error(CV_StsBadArg, "The function can not solve under-determined linear systems" ); 1211 1212 if( m == n ) 1213 is_normal = false; 1214 else if( is_normal ) 1215 { 1216 m_ = n; 1217 if( method == DECOMP_SVD ) 1218 method = DECOMP_EIG; 1219 } 1220 1221 size_t asize = astep*(method == DECOMP_SVD || is_normal ? n : m); 1222 bufsize += asize + 32; 1223 1224 if( is_normal ) 1225 bufsize += n*nb*esz; 1226 1227 if( method == DECOMP_SVD || method == DECOMP_EIG ) 1228 bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32; 1229 1230 buffer.allocate(bufsize); 1231 uchar* ptr = alignPtr((uchar*)buffer, 16); 1232 1233 Mat a(m_, n, type, ptr, astep); 1234 1235 if( is_normal ) 1236 mulTransposed(src, a, true); 1237 else if( method != DECOMP_SVD ) 1238 src.copyTo(a); 1239 else 1240 { 1241 a = Mat(n, m_, type, ptr, astep); 1242 transpose(src, a); 1243 } 1244 ptr += asize; 1245 1246 if( !is_normal ) 1247 { 1248 if( method == DECOMP_LU || method == DECOMP_CHOLESKY ) 1249 src2.copyTo(dst); 1250 } 1251 else 1252 { 1253 // a'*b 1254 if( method == DECOMP_LU || method == DECOMP_CHOLESKY ) 1255 gemm( src, src2, 1, Mat(), 0, dst, GEMM_1_T ); 1256 else 1257 { 1258 Mat tmp(n, nb, type, ptr); 1259 ptr += n*nb*esz; 1260 gemm( src, src2, 1, Mat(), 0, tmp, GEMM_1_T ); 1261 src2 = tmp; 1262 } 1263 } 1264 1265 if( method == DECOMP_LU ) 1266 { 1267 if( type == CV_32F ) 1268 result = hal::LU(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0; 1269 else 1270 result = hal::LU(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0; 1271 } 1272 else if( method == DECOMP_CHOLESKY ) 1273 { 1274 if( type == CV_32F ) 1275 result = hal::Cholesky(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb); 1276 else 1277 result = hal::Cholesky(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb); 1278 } 1279 else 1280 { 1281 ptr = alignPtr(ptr, 16); 1282 Mat v(n, n, type, ptr, vstep), w(n, 1, type, ptr + vstep*n), u; 1283 ptr += n*(vstep + esz); 1284 1285 if( method == DECOMP_EIG ) 1286 { 1287 if( type == CV_32F ) 1288 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr); 1289 else 1290 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr); 1291 u = v; 1292 } 1293 else 1294 { 1295 if( type == CV_32F ) 1296 JacobiSVD(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, m_, n); 1297 else 1298 JacobiSVD(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, m_, n); 1299 u = a; 1300 } 1301 1302 if( type == CV_32F ) 1303 { 1304 SVBkSb(m_, n, w.ptr<float>(), 0, u.ptr<float>(), u.step, true, 1305 v.ptr<float>(), v.step, true, src2.ptr<float>(), 1306 src2.step, nb, dst.ptr<float>(), dst.step, ptr); 1307 } 1308 else 1309 { 1310 SVBkSb(m_, n, w.ptr<double>(), 0, u.ptr<double>(), u.step, true, 1311 v.ptr<double>(), v.step, true, src2.ptr<double>(), 1312 src2.step, nb, dst.ptr<double>(), dst.step, ptr); 1313 } 1314 result = true; 1315 } 1316 1317 if( !result ) 1318 dst = Scalar(0); 1319 1320 return result; 1321 } 1322 1323 1324 /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix /////////////// 1325 eigen(InputArray _src,OutputArray _evals,OutputArray _evects)1326 bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) 1327 { 1328 Mat src = _src.getMat(); 1329 int type = src.type(); 1330 int n = src.rows; 1331 1332 CV_Assert( src.rows == src.cols ); 1333 CV_Assert (type == CV_32F || type == CV_64F); 1334 1335 Mat v; 1336 if( _evects.needed() ) 1337 { 1338 _evects.create(n, n, type); 1339 v = _evects.getMat(); 1340 } 1341 1342 size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16); 1343 AutoBuffer<uchar> buf(n*astep + n*5*elemSize + 32); 1344 uchar* ptr = alignPtr((uchar*)buf, 16); 1345 Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n); 1346 ptr += astep*n + elemSize*n; 1347 src.copyTo(a); 1348 bool ok = type == CV_32F ? 1349 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr) : 1350 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr); 1351 1352 w.copyTo(_evals); 1353 return ok; 1354 } 1355 1356 namespace cv 1357 { 1358 _SVDcompute(InputArray _aarr,OutputArray _w,OutputArray _u,OutputArray _vt,int flags)1359 static void _SVDcompute( InputArray _aarr, OutputArray _w, 1360 OutputArray _u, OutputArray _vt, int flags ) 1361 { 1362 Mat src = _aarr.getMat(); 1363 int m = src.rows, n = src.cols; 1364 int type = src.type(); 1365 bool compute_uv = _u.needed() || _vt.needed(); 1366 bool full_uv = (flags & SVD::FULL_UV) != 0; 1367 1368 CV_Assert( type == CV_32F || type == CV_64F ); 1369 1370 if( flags & SVD::NO_UV ) 1371 { 1372 _u.release(); 1373 _vt.release(); 1374 compute_uv = full_uv = false; 1375 } 1376 1377 bool at = false; 1378 if( m < n ) 1379 { 1380 std::swap(m, n); 1381 at = true; 1382 } 1383 1384 int urows = full_uv ? m : n; 1385 size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16); 1386 AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32); 1387 uchar* buf = alignPtr((uchar*)_buf, 16); 1388 Mat temp_a(n, m, type, buf, astep); 1389 Mat temp_w(n, 1, type, buf + urows*astep); 1390 Mat temp_u(urows, m, type, buf, astep), temp_v; 1391 1392 if( compute_uv ) 1393 temp_v = Mat(n, n, type, alignPtr(buf + urows*astep + n*esz, 16), vstep); 1394 1395 if( urows > n ) 1396 temp_u = Scalar::all(0); 1397 1398 if( !at ) 1399 transpose(src, temp_a); 1400 else 1401 src.copyTo(temp_a); 1402 1403 if( type == CV_32F ) 1404 { 1405 JacobiSVD(temp_a.ptr<float>(), temp_u.step, temp_w.ptr<float>(), 1406 temp_v.ptr<float>(), temp_v.step, m, n, compute_uv ? urows : 0); 1407 } 1408 else 1409 { 1410 JacobiSVD(temp_a.ptr<double>(), temp_u.step, temp_w.ptr<double>(), 1411 temp_v.ptr<double>(), temp_v.step, m, n, compute_uv ? urows : 0); 1412 } 1413 temp_w.copyTo(_w); 1414 if( compute_uv ) 1415 { 1416 if( !at ) 1417 { 1418 if( _u.needed() ) 1419 transpose(temp_u, _u); 1420 if( _vt.needed() ) 1421 temp_v.copyTo(_vt); 1422 } 1423 else 1424 { 1425 if( _u.needed() ) 1426 transpose(temp_v, _u); 1427 if( _vt.needed() ) 1428 temp_u.copyTo(_vt); 1429 } 1430 } 1431 } 1432 1433 compute(InputArray a,OutputArray w,OutputArray u,OutputArray vt,int flags)1434 void SVD::compute( InputArray a, OutputArray w, OutputArray u, OutputArray vt, int flags ) 1435 { 1436 _SVDcompute(a, w, u, vt, flags); 1437 } 1438 compute(InputArray a,OutputArray w,int flags)1439 void SVD::compute( InputArray a, OutputArray w, int flags ) 1440 { 1441 _SVDcompute(a, w, noArray(), noArray(), flags); 1442 } 1443 backSubst(InputArray _w,InputArray _u,InputArray _vt,InputArray _rhs,OutputArray _dst)1444 void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt, 1445 InputArray _rhs, OutputArray _dst ) 1446 { 1447 Mat w = _w.getMat(), u = _u.getMat(), vt = _vt.getMat(), rhs = _rhs.getMat(); 1448 int type = w.type(), esz = (int)w.elemSize(); 1449 int m = u.rows, n = vt.cols, nb = rhs.data ? rhs.cols : m, nm = std::min(m, n); 1450 size_t wstep = w.rows == 1 ? (size_t)esz : w.cols == 1 ? (size_t)w.step : (size_t)w.step + esz; 1451 AutoBuffer<uchar> buffer(nb*sizeof(double) + 16); 1452 CV_Assert( w.type() == u.type() && u.type() == vt.type() && u.data && vt.data && w.data ); 1453 CV_Assert( u.cols >= nm && vt.rows >= nm && 1454 (w.size() == Size(nm, 1) || w.size() == Size(1, nm) || w.size() == Size(vt.rows, u.cols)) ); 1455 CV_Assert( rhs.data == 0 || (rhs.type() == type && rhs.rows == m) ); 1456 1457 _dst.create( n, nb, type ); 1458 Mat dst = _dst.getMat(); 1459 if( type == CV_32F ) 1460 SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false, 1461 vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb, 1462 dst.ptr<float>(), dst.step, buffer); 1463 else if( type == CV_64F ) 1464 SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false, 1465 vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb, 1466 dst.ptr<double>(), dst.step, buffer); 1467 else 1468 CV_Error( CV_StsUnsupportedFormat, "" ); 1469 } 1470 1471 operator ()(InputArray a,int flags)1472 SVD& SVD::operator ()(InputArray a, int flags) 1473 { 1474 _SVDcompute(a, w, u, vt, flags); 1475 return *this; 1476 } 1477 1478 backSubst(InputArray rhs,OutputArray dst) const1479 void SVD::backSubst( InputArray rhs, OutputArray dst ) const 1480 { 1481 backSubst( w, u, vt, rhs, dst ); 1482 } 1483 1484 } 1485 1486 SVDecomp(InputArray src,OutputArray w,OutputArray u,OutputArray vt,int flags)1487 void cv::SVDecomp(InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags) 1488 { 1489 SVD::compute(src, w, u, vt, flags); 1490 } 1491 SVBackSubst(InputArray w,InputArray u,InputArray vt,InputArray rhs,OutputArray dst)1492 void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, OutputArray dst) 1493 { 1494 SVD::backSubst(w, u, vt, rhs, dst); 1495 } 1496 1497 1498 CV_IMPL double cvDet(const CvArr * arr)1499 cvDet( const CvArr* arr ) 1500 { 1501 if( CV_IS_MAT(arr) && ((CvMat*)arr)->rows <= 3 ) 1502 { 1503 CvMat* mat = (CvMat*)arr; 1504 int type = CV_MAT_TYPE(mat->type); 1505 int rows = mat->rows; 1506 uchar* m = mat->data.ptr; 1507 int step = mat->step; 1508 CV_Assert( rows == mat->cols ); 1509 1510 #define Mf(y, x) ((float*)(m + y*step))[x] 1511 #define Md(y, x) ((double*)(m + y*step))[x] 1512 1513 if( type == CV_32F ) 1514 { 1515 if( rows == 2 ) 1516 return det2(Mf); 1517 if( rows == 3 ) 1518 return det3(Mf); 1519 } 1520 else if( type == CV_64F ) 1521 { 1522 if( rows == 2 ) 1523 return det2(Md); 1524 if( rows == 3 ) 1525 return det3(Md); 1526 } 1527 } 1528 return cv::determinant(cv::cvarrToMat(arr)); 1529 } 1530 1531 1532 CV_IMPL double cvInvert(const CvArr * srcarr,CvArr * dstarr,int method)1533 cvInvert( const CvArr* srcarr, CvArr* dstarr, int method ) 1534 { 1535 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 1536 1537 CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows ); 1538 return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY : 1539 method == CV_SVD ? cv::DECOMP_SVD : 1540 method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU ); 1541 } 1542 1543 1544 CV_IMPL int cvSolve(const CvArr * Aarr,const CvArr * barr,CvArr * xarr,int method)1545 cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method ) 1546 { 1547 cv::Mat A = cv::cvarrToMat(Aarr), b = cv::cvarrToMat(barr), x = cv::cvarrToMat(xarr); 1548 1549 CV_Assert( A.type() == x.type() && A.cols == x.rows && x.cols == b.cols ); 1550 bool is_normal = (method & CV_NORMAL) != 0; 1551 method &= ~CV_NORMAL; 1552 return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY : 1553 method == CV_SVD ? cv::DECOMP_SVD : 1554 method == CV_SVD_SYM ? cv::DECOMP_EIG : 1555 A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) ); 1556 } 1557 1558 1559 CV_IMPL void cvEigenVV(CvArr * srcarr,CvArr * evectsarr,CvArr * evalsarr,double,int,int)1560 cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double, 1561 int, int ) 1562 { 1563 cv::Mat src = cv::cvarrToMat(srcarr), evals0 = cv::cvarrToMat(evalsarr), evals = evals0; 1564 if( evectsarr ) 1565 { 1566 cv::Mat evects0 = cv::cvarrToMat(evectsarr), evects = evects0; 1567 eigen(src, evals, evects); 1568 if( evects0.data != evects.data ) 1569 { 1570 const uchar* p = evects0.ptr(); 1571 evects.convertTo(evects0, evects0.type()); 1572 CV_Assert( p == evects0.ptr() ); 1573 } 1574 } 1575 else 1576 eigen(src, evals); 1577 if( evals0.data != evals.data ) 1578 { 1579 const uchar* p = evals0.ptr(); 1580 if( evals0.size() == evals.size() ) 1581 evals.convertTo(evals0, evals0.type()); 1582 else if( evals0.type() == evals.type() ) 1583 cv::transpose(evals, evals0); 1584 else 1585 cv::Mat(evals.t()).convertTo(evals0, evals0.type()); 1586 CV_Assert( p == evals0.ptr() ); 1587 } 1588 } 1589 1590 1591 CV_IMPL void cvSVD(CvArr * aarr,CvArr * warr,CvArr * uarr,CvArr * varr,int flags)1592 cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags ) 1593 { 1594 cv::Mat a = cv::cvarrToMat(aarr), w = cv::cvarrToMat(warr), u, v; 1595 int m = a.rows, n = a.cols, type = a.type(), mn = std::max(m, n), nm = std::min(m, n); 1596 1597 CV_Assert( w.type() == type && 1598 (w.size() == cv::Size(nm,1) || w.size() == cv::Size(1, nm) || 1599 w.size() == cv::Size(nm, nm) || w.size() == cv::Size(n, m)) ); 1600 1601 cv::SVD svd; 1602 1603 if( w.size() == cv::Size(nm, 1) ) 1604 svd.w = cv::Mat(nm, 1, type, w.ptr() ); 1605 else if( w.isContinuous() ) 1606 svd.w = w; 1607 1608 if( uarr ) 1609 { 1610 u = cv::cvarrToMat(uarr); 1611 CV_Assert( u.type() == type ); 1612 svd.u = u; 1613 } 1614 1615 if( varr ) 1616 { 1617 v = cv::cvarrToMat(varr); 1618 CV_Assert( v.type() == type ); 1619 svd.vt = v; 1620 } 1621 1622 svd(a, ((flags & CV_SVD_MODIFY_A) ? cv::SVD::MODIFY_A : 0) | 1623 ((!svd.u.data && !svd.vt.data) ? cv::SVD::NO_UV : 0) | 1624 ((m != n && (svd.u.size() == cv::Size(mn, mn) || 1625 svd.vt.size() == cv::Size(mn, mn))) ? cv::SVD::FULL_UV : 0)); 1626 1627 if( !u.empty() ) 1628 { 1629 if( flags & CV_SVD_U_T ) 1630 cv::transpose( svd.u, u ); 1631 else if( u.data != svd.u.data ) 1632 { 1633 CV_Assert( u.size() == svd.u.size() ); 1634 svd.u.copyTo(u); 1635 } 1636 } 1637 1638 if( !v.empty() ) 1639 { 1640 if( !(flags & CV_SVD_V_T) ) 1641 cv::transpose( svd.vt, v ); 1642 else if( v.data != svd.vt.data ) 1643 { 1644 CV_Assert( v.size() == svd.vt.size() ); 1645 svd.vt.copyTo(v); 1646 } 1647 } 1648 1649 if( w.data != svd.w.data ) 1650 { 1651 if( w.size() == svd.w.size() ) 1652 svd.w.copyTo(w); 1653 else 1654 { 1655 w = cv::Scalar(0); 1656 cv::Mat wd = w.diag(); 1657 svd.w.copyTo(wd); 1658 } 1659 } 1660 } 1661 1662 1663 CV_IMPL void cvSVBkSb(const CvArr * warr,const CvArr * uarr,const CvArr * varr,const CvArr * rhsarr,CvArr * dstarr,int flags)1664 cvSVBkSb( const CvArr* warr, const CvArr* uarr, 1665 const CvArr* varr, const CvArr* rhsarr, 1666 CvArr* dstarr, int flags ) 1667 { 1668 cv::Mat w = cv::cvarrToMat(warr), u = cv::cvarrToMat(uarr), 1669 v = cv::cvarrToMat(varr), rhs, 1670 dst = cv::cvarrToMat(dstarr), dst0 = dst; 1671 if( flags & CV_SVD_U_T ) 1672 { 1673 cv::Mat tmp; 1674 transpose(u, tmp); 1675 u = tmp; 1676 } 1677 if( !(flags & CV_SVD_V_T) ) 1678 { 1679 cv::Mat tmp; 1680 transpose(v, tmp); 1681 v = tmp; 1682 } 1683 if( rhsarr ) 1684 rhs = cv::cvarrToMat(rhsarr); 1685 1686 cv::SVD::backSubst(w, u, v, rhs, dst); 1687 CV_Assert( dst.data == dst0.data ); 1688 } 1689