• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*M///////////////////////////////////////////////////////////////////////////////////////
2  //
3  //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4  //
5  //  By downloading, copying, installing or using the software you agree to this license.
6  //  If you do not agree to this license, do not download, install,
7  //  copy or use the software.
8  //
9  //
10  //                           License Agreement
11  //                For Open Source Computer Vision Library
12  //
13  // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14  // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15  // Third party copyrights are property of their respective owners.
16  //
17  // Redistribution and use in source and binary forms, with or without modification,
18  // are permitted provided that the following conditions are met:
19  //
20  //   * Redistribution's of source code must retain the above copyright notice,
21  //     this list of conditions and the following disclaimer.
22  //
23  //   * Redistribution's in binary form must reproduce the above copyright notice,
24  //     this list of conditions and the following disclaimer in the documentation
25  //     and/or other materials provided with the distribution.
26  //
27  //   * The name of the copyright holders may not be used to endorse or promote products
28  //     derived from this software without specific prior written permission.
29  //
30  // This software is provided by the copyright holders and contributors "as is" and
31  // any express or implied warranties, including, but not limited to, the implied
32  // warranties of merchantability and fitness for a particular purpose are disclaimed.
33  // In no event shall the Intel Corporation or contributors be liable for any direct,
34  // indirect, incidental, special, exemplary, or consequential damages
35  // (including, but not limited to, procurement of substitute goods or services;
36  // loss of use, data, or profits; or business interruption) however caused
37  // and on any theory of liability, whether in contract, strict liability,
38  // or tort (including negligence or otherwise) arising in any way out of
39  // the use of this software, even if advised of the possibility of such damage.
40  //
41  //M*/
42  
43  #include "precomp.hpp"
44  #include <limits>
45  
46  #if defined _M_IX86 && defined _MSC_VER && _MSC_VER < 1700
47  #pragma float_control(precise, on)
48  #endif
49  
50  namespace cv
51  {
52  
LU(float * A,size_t astep,int m,float * b,size_t bstep,int n)53  int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
54  {
55      return hal::LU(A, astep, m, b, bstep, n);
56  }
57  
LU(double * A,size_t astep,int m,double * b,size_t bstep,int n)58  int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
59  {
60      return hal::LU(A, astep, m, b, bstep, n);
61  }
62  
Cholesky(float * A,size_t astep,int m,float * b,size_t bstep,int n)63  bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
64  {
65      return hal::Cholesky(A, astep, m, b, bstep, n);
66  }
67  
Cholesky(double * A,size_t astep,int m,double * b,size_t bstep,int n)68  bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
69  {
70      return hal::Cholesky(A, astep, m, b, bstep, n);
71  }
72  
hypot(_Tp a,_Tp b)73  template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b)
74  {
75      a = std::abs(a);
76      b = std::abs(b);
77      if( a > b )
78      {
79          b /= a;
80          return a*std::sqrt(1 + b*b);
81      }
82      if( b > 0 )
83      {
84          a /= b;
85          return b*std::sqrt(1 + a*a);
86      }
87      return 0;
88  }
89  
90  
91  template<typename _Tp> bool
JacobiImpl_(_Tp * A,size_t astep,_Tp * W,_Tp * V,size_t vstep,int n,uchar * buf)92  JacobiImpl_( _Tp* A, size_t astep, _Tp* W, _Tp* V, size_t vstep, int n, uchar* buf )
93  {
94      const _Tp eps = std::numeric_limits<_Tp>::epsilon();
95      int i, j, k, m;
96  
97      astep /= sizeof(A[0]);
98      if( V )
99      {
100          vstep /= sizeof(V[0]);
101          for( i = 0; i < n; i++ )
102          {
103              for( j = 0; j < n; j++ )
104                  V[i*vstep + j] = (_Tp)0;
105              V[i*vstep + i] = (_Tp)1;
106          }
107      }
108  
109      int iters, maxIters = n*n*30;
110  
111      int* indR = (int*)alignPtr(buf, sizeof(int));
112      int* indC = indR + n;
113      _Tp mv = (_Tp)0;
114  
115      for( k = 0; k < n; k++ )
116      {
117          W[k] = A[(astep + 1)*k];
118          if( k < n - 1 )
119          {
120              for( m = k+1, mv = std::abs(A[astep*k + m]), i = k+2; i < n; i++ )
121              {
122                  _Tp val = std::abs(A[astep*k+i]);
123                  if( mv < val )
124                      mv = val, m = i;
125              }
126              indR[k] = m;
127          }
128          if( k > 0 )
129          {
130              for( m = 0, mv = std::abs(A[k]), i = 1; i < k; i++ )
131              {
132                  _Tp val = std::abs(A[astep*i+k]);
133                  if( mv < val )
134                      mv = val, m = i;
135              }
136              indC[k] = m;
137          }
138      }
139  
140      if( n > 1 ) for( iters = 0; iters < maxIters; iters++ )
141      {
142          // find index (k,l) of pivot p
143          for( k = 0, mv = std::abs(A[indR[0]]), i = 1; i < n-1; i++ )
144          {
145              _Tp val = std::abs(A[astep*i + indR[i]]);
146              if( mv < val )
147                  mv = val, k = i;
148          }
149          int l = indR[k];
150          for( i = 1; i < n; i++ )
151          {
152              _Tp val = std::abs(A[astep*indC[i] + i]);
153              if( mv < val )
154                  mv = val, k = indC[i], l = i;
155          }
156  
157          _Tp p = A[astep*k + l];
158          if( std::abs(p) <= eps )
159              break;
160          _Tp y = (_Tp)((W[l] - W[k])*0.5);
161          _Tp t = std::abs(y) + hypot(p, y);
162          _Tp s = hypot(p, t);
163          _Tp c = t/s;
164          s = p/s; t = (p/t)*p;
165          if( y < 0 )
166              s = -s, t = -t;
167          A[astep*k + l] = 0;
168  
169          W[k] -= t;
170          W[l] += t;
171  
172          _Tp a0, b0;
173  
174  #undef rotate
175  #define rotate(v0, v1) a0 = v0, b0 = v1, v0 = a0*c - b0*s, v1 = a0*s + b0*c
176  
177          // rotate rows and columns k and l
178          for( i = 0; i < k; i++ )
179              rotate(A[astep*i+k], A[astep*i+l]);
180          for( i = k+1; i < l; i++ )
181              rotate(A[astep*k+i], A[astep*i+l]);
182          for( i = l+1; i < n; i++ )
183              rotate(A[astep*k+i], A[astep*l+i]);
184  
185          // rotate eigenvectors
186          if( V )
187              for( i = 0; i < n; i++ )
188                  rotate(V[vstep*k+i], V[vstep*l+i]);
189  
190  #undef rotate
191  
192          for( j = 0; j < 2; j++ )
193          {
194              int idx = j == 0 ? k : l;
195              if( idx < n - 1 )
196              {
197                  for( m = idx+1, mv = std::abs(A[astep*idx + m]), i = idx+2; i < n; i++ )
198                  {
199                      _Tp val = std::abs(A[astep*idx+i]);
200                      if( mv < val )
201                          mv = val, m = i;
202                  }
203                  indR[idx] = m;
204              }
205              if( idx > 0 )
206              {
207                  for( m = 0, mv = std::abs(A[idx]), i = 1; i < idx; i++ )
208                  {
209                      _Tp val = std::abs(A[astep*i+idx]);
210                      if( mv < val )
211                          mv = val, m = i;
212                  }
213                  indC[idx] = m;
214              }
215          }
216      }
217  
218      // sort eigenvalues & eigenvectors
219      for( k = 0; k < n-1; k++ )
220      {
221          m = k;
222          for( i = k+1; i < n; i++ )
223          {
224              if( W[m] < W[i] )
225                  m = i;
226          }
227          if( k != m )
228          {
229              std::swap(W[m], W[k]);
230              if( V )
231                  for( i = 0; i < n; i++ )
232                      std::swap(V[vstep*m + i], V[vstep*k + i]);
233          }
234      }
235  
236      return true;
237  }
238  
Jacobi(float * S,size_t sstep,float * e,float * E,size_t estep,int n,uchar * buf)239  static bool Jacobi( float* S, size_t sstep, float* e, float* E, size_t estep, int n, uchar* buf )
240  {
241      return JacobiImpl_(S, sstep, e, E, estep, n, buf);
242  }
243  
Jacobi(double * S,size_t sstep,double * e,double * E,size_t estep,int n,uchar * buf)244  static bool Jacobi( double* S, size_t sstep, double* e, double* E, size_t estep, int n, uchar* buf )
245  {
246      return JacobiImpl_(S, sstep, e, E, estep, n, buf);
247  }
248  
249  
250  template<typename T> struct VBLAS
251  {
dotcv::VBLAS252      int dot(const T*, const T*, int, T*) const { return 0; }
givenscv::VBLAS253      int givens(T*, T*, int, T, T) const { return 0; }
givensxcv::VBLAS254      int givensx(T*, T*, int, T, T, T*, T*) const { return 0; }
255  };
256  
257  #if CV_SSE2
dot(const float * a,const float * b,int n,float * result) const258  template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
259  {
260      if( n < 8 )
261          return 0;
262      int k = 0;
263      __m128 s0 = _mm_setzero_ps(), s1 = _mm_setzero_ps();
264      for( ; k <= n - 8; k += 8 )
265      {
266          __m128 a0 = _mm_load_ps(a + k), a1 = _mm_load_ps(a + k + 4);
267          __m128 b0 = _mm_load_ps(b + k), b1 = _mm_load_ps(b + k + 4);
268  
269          s0 = _mm_add_ps(s0, _mm_mul_ps(a0, b0));
270          s1 = _mm_add_ps(s1, _mm_mul_ps(a1, b1));
271      }
272      s0 = _mm_add_ps(s0, s1);
273      float sbuf[4];
274      _mm_storeu_ps(sbuf, s0);
275      *result = sbuf[0] + sbuf[1] + sbuf[2] + sbuf[3];
276      return k;
277  }
278  
279  
givens(float * a,float * b,int n,float c,float s) const280  template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const
281  {
282      if( n < 4 )
283          return 0;
284      int k = 0;
285      __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s);
286      for( ; k <= n - 4; k += 4 )
287      {
288          __m128 a0 = _mm_load_ps(a + k);
289          __m128 b0 = _mm_load_ps(b + k);
290          __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4));
291          __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4));
292          _mm_store_ps(a + k, t0);
293          _mm_store_ps(b + k, t1);
294      }
295      return k;
296  }
297  
298  
givensx(float * a,float * b,int n,float c,float s,float * anorm,float * bnorm) const299  template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s,
300                                               float* anorm, float* bnorm) const
301  {
302      if( n < 4 )
303          return 0;
304      int k = 0;
305      __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s);
306      __m128 sa = _mm_setzero_ps(), sb = _mm_setzero_ps();
307      for( ; k <= n - 4; k += 4 )
308      {
309          __m128 a0 = _mm_load_ps(a + k);
310          __m128 b0 = _mm_load_ps(b + k);
311          __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4));
312          __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4));
313          _mm_store_ps(a + k, t0);
314          _mm_store_ps(b + k, t1);
315          sa = _mm_add_ps(sa, _mm_mul_ps(t0, t0));
316          sb = _mm_add_ps(sb, _mm_mul_ps(t1, t1));
317      }
318      float abuf[4], bbuf[4];
319      _mm_storeu_ps(abuf, sa);
320      _mm_storeu_ps(bbuf, sb);
321      *anorm = abuf[0] + abuf[1] + abuf[2] + abuf[3];
322      *bnorm = bbuf[0] + bbuf[1] + bbuf[2] + bbuf[3];
323      return k;
324  }
325  
326  
dot(const double * a,const double * b,int n,double * result) const327  template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const
328  {
329      if( n < 4 )
330          return 0;
331      int k = 0;
332      __m128d s0 = _mm_setzero_pd(), s1 = _mm_setzero_pd();
333      for( ; k <= n - 4; k += 4 )
334      {
335          __m128d a0 = _mm_load_pd(a + k), a1 = _mm_load_pd(a + k + 2);
336          __m128d b0 = _mm_load_pd(b + k), b1 = _mm_load_pd(b + k + 2);
337  
338          s0 = _mm_add_pd(s0, _mm_mul_pd(a0, b0));
339          s1 = _mm_add_pd(s1, _mm_mul_pd(a1, b1));
340      }
341      s0 = _mm_add_pd(s0, s1);
342      double sbuf[2];
343      _mm_storeu_pd(sbuf, s0);
344      *result = sbuf[0] + sbuf[1];
345      return k;
346  }
347  
348  
givens(double * a,double * b,int n,double c,double s) const349  template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const
350  {
351      int k = 0;
352      __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s);
353      for( ; k <= n - 2; k += 2 )
354      {
355          __m128d a0 = _mm_load_pd(a + k);
356          __m128d b0 = _mm_load_pd(b + k);
357          __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2));
358          __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2));
359          _mm_store_pd(a + k, t0);
360          _mm_store_pd(b + k, t1);
361      }
362      return k;
363  }
364  
365  
givensx(double * a,double * b,int n,double c,double s,double * anorm,double * bnorm) const366  template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double c, double s,
367                                                double* anorm, double* bnorm) const
368  {
369      int k = 0;
370      __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s);
371      __m128d sa = _mm_setzero_pd(), sb = _mm_setzero_pd();
372      for( ; k <= n - 2; k += 2 )
373      {
374          __m128d a0 = _mm_load_pd(a + k);
375          __m128d b0 = _mm_load_pd(b + k);
376          __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2));
377          __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2));
378          _mm_store_pd(a + k, t0);
379          _mm_store_pd(b + k, t1);
380          sa = _mm_add_pd(sa, _mm_mul_pd(t0, t0));
381          sb = _mm_add_pd(sb, _mm_mul_pd(t1, t1));
382      }
383      double abuf[2], bbuf[2];
384      _mm_storeu_pd(abuf, sa);
385      _mm_storeu_pd(bbuf, sb);
386      *anorm = abuf[0] + abuf[1];
387      *bnorm = bbuf[0] + bbuf[1];
388      return k;
389  }
390  #endif
391  
392  template<typename _Tp> void
JacobiSVDImpl_(_Tp * At,size_t astep,_Tp * _W,_Tp * Vt,size_t vstep,int m,int n,int n1,double minval,_Tp eps)393  JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
394                 int m, int n, int n1, double minval, _Tp eps)
395  {
396      VBLAS<_Tp> vblas;
397      AutoBuffer<double> Wbuf(n);
398      double* W = Wbuf;
399      int i, j, k, iter, max_iter = std::max(m, 30);
400      _Tp c, s;
401      double sd;
402      astep /= sizeof(At[0]);
403      vstep /= sizeof(Vt[0]);
404  
405      for( i = 0; i < n; i++ )
406      {
407          for( k = 0, sd = 0; k < m; k++ )
408          {
409              _Tp t = At[i*astep + k];
410              sd += (double)t*t;
411          }
412          W[i] = sd;
413  
414          if( Vt )
415          {
416              for( k = 0; k < n; k++ )
417                  Vt[i*vstep + k] = 0;
418              Vt[i*vstep + i] = 1;
419          }
420      }
421  
422      for( iter = 0; iter < max_iter; iter++ )
423      {
424          bool changed = false;
425  
426          for( i = 0; i < n-1; i++ )
427              for( j = i+1; j < n; j++ )
428              {
429                  _Tp *Ai = At + i*astep, *Aj = At + j*astep;
430                  double a = W[i], p = 0, b = W[j];
431  
432                  for( k = 0; k < m; k++ )
433                      p += (double)Ai[k]*Aj[k];
434  
435                  if( std::abs(p) <= eps*std::sqrt((double)a*b) )
436                      continue;
437  
438                  p *= 2;
439                  double beta = a - b, gamma = hypot((double)p, beta);
440                  if( beta < 0 )
441                  {
442                      double delta = (gamma - beta)*0.5;
443                      s = (_Tp)std::sqrt(delta/gamma);
444                      c = (_Tp)(p/(gamma*s*2));
445                  }
446                  else
447                  {
448                      c = (_Tp)std::sqrt((gamma + beta)/(gamma*2));
449                      s = (_Tp)(p/(gamma*c*2));
450                  }
451  
452                  a = b = 0;
453                  for( k = 0; k < m; k++ )
454                  {
455                      _Tp t0 = c*Ai[k] + s*Aj[k];
456                      _Tp t1 = -s*Ai[k] + c*Aj[k];
457                      Ai[k] = t0; Aj[k] = t1;
458  
459                      a += (double)t0*t0; b += (double)t1*t1;
460                  }
461                  W[i] = a; W[j] = b;
462  
463                  changed = true;
464  
465                  if( Vt )
466                  {
467                      _Tp *Vi = Vt + i*vstep, *Vj = Vt + j*vstep;
468                      k = vblas.givens(Vi, Vj, n, c, s);
469  
470                      for( ; k < n; k++ )
471                      {
472                          _Tp t0 = c*Vi[k] + s*Vj[k];
473                          _Tp t1 = -s*Vi[k] + c*Vj[k];
474                          Vi[k] = t0; Vj[k] = t1;
475                      }
476                  }
477              }
478          if( !changed )
479              break;
480      }
481  
482      for( i = 0; i < n; i++ )
483      {
484          for( k = 0, sd = 0; k < m; k++ )
485          {
486              _Tp t = At[i*astep + k];
487              sd += (double)t*t;
488          }
489          W[i] = std::sqrt(sd);
490      }
491  
492      for( i = 0; i < n-1; i++ )
493      {
494          j = i;
495          for( k = i+1; k < n; k++ )
496          {
497              if( W[j] < W[k] )
498                  j = k;
499          }
500          if( i != j )
501          {
502              std::swap(W[i], W[j]);
503              if( Vt )
504              {
505                  for( k = 0; k < m; k++ )
506                      std::swap(At[i*astep + k], At[j*astep + k]);
507  
508                  for( k = 0; k < n; k++ )
509                      std::swap(Vt[i*vstep + k], Vt[j*vstep + k]);
510              }
511          }
512      }
513  
514      for( i = 0; i < n; i++ )
515          _W[i] = (_Tp)W[i];
516  
517      if( !Vt )
518          return;
519  
520      RNG rng(0x12345678);
521      for( i = 0; i < n1; i++ )
522      {
523          sd = i < n ? W[i] : 0;
524  
525          for( int ii = 0; ii < 100 && sd <= minval; ii++ )
526          {
527              // if we got a zero singular value, then in order to get the corresponding left singular vector
528              // we generate a random vector, project it to the previously computed left singular vectors,
529              // subtract the projection and normalize the difference.
530              const _Tp val0 = (_Tp)(1./m);
531              for( k = 0; k < m; k++ )
532              {
533                  _Tp val = (rng.next() & 256) != 0 ? val0 : -val0;
534                  At[i*astep + k] = val;
535              }
536              for( iter = 0; iter < 2; iter++ )
537              {
538                  for( j = 0; j < i; j++ )
539                  {
540                      sd = 0;
541                      for( k = 0; k < m; k++ )
542                          sd += At[i*astep + k]*At[j*astep + k];
543                      _Tp asum = 0;
544                      for( k = 0; k < m; k++ )
545                      {
546                          _Tp t = (_Tp)(At[i*astep + k] - sd*At[j*astep + k]);
547                          At[i*astep + k] = t;
548                          asum += std::abs(t);
549                      }
550                      asum = asum > eps*100 ? 1/asum : 0;
551                      for( k = 0; k < m; k++ )
552                          At[i*astep + k] *= asum;
553                  }
554              }
555              sd = 0;
556              for( k = 0; k < m; k++ )
557              {
558                  _Tp t = At[i*astep + k];
559                  sd += (double)t*t;
560              }
561              sd = std::sqrt(sd);
562          }
563  
564          s = (_Tp)(sd > minval ? 1/sd : 0.);
565          for( k = 0; k < m; k++ )
566              At[i*astep + k] *= s;
567      }
568  }
569  
570  
JacobiSVD(float * At,size_t astep,float * W,float * Vt,size_t vstep,int m,int n,int n1=-1)571  static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
572  {
573      JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
574  }
575  
JacobiSVD(double * At,size_t astep,double * W,double * Vt,size_t vstep,int m,int n,int n1=-1)576  static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
577  {
578      JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
579  }
580  
581  /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
582  template<typename T1, typename T2, typename T3> static void
MatrAXPY(int m,int n,const T1 * x,int dx,const T2 * a,int inca,T3 * y,int dy)583  MatrAXPY( int m, int n, const T1* x, int dx,
584           const T2* a, int inca, T3* y, int dy )
585  {
586      int i;
587      for( i = 0; i < m; i++, x += dx, y += dy )
588      {
589          T2 s = a[i*inca];
590          int j = 0;
591           #if CV_ENABLE_UNROLLED
592          for(; j <= n - 4; j += 4 )
593          {
594              T3 t0 = (T3)(y[j]   + s*x[j]);
595              T3 t1 = (T3)(y[j+1] + s*x[j+1]);
596              y[j]   = t0;
597              y[j+1] = t1;
598              t0 = (T3)(y[j+2] + s*x[j+2]);
599              t1 = (T3)(y[j+3] + s*x[j+3]);
600              y[j+2] = t0;
601              y[j+3] = t1;
602          }
603          #endif
604          for( ; j < n; j++ )
605              y[j] = (T3)(y[j] + s*x[j]);
606      }
607  }
608  
609  template<typename T> static void
SVBkSbImpl_(int m,int n,const T * w,int incw,const T * u,int ldu,bool uT,const T * v,int ldv,bool vT,const T * b,int ldb,int nb,T * x,int ldx,double * buffer,T eps)610  SVBkSbImpl_( int m, int n, const T* w, int incw,
611         const T* u, int ldu, bool uT,
612         const T* v, int ldv, bool vT,
613         const T* b, int ldb, int nb,
614         T* x, int ldx, double* buffer, T eps )
615  {
616      double threshold = 0;
617      int udelta0 = uT ? ldu : 1, udelta1 = uT ? 1 : ldu;
618      int vdelta0 = vT ? ldv : 1, vdelta1 = vT ? 1 : ldv;
619      int i, j, nm = std::min(m, n);
620  
621      if( !b )
622          nb = m;
623  
624      for( i = 0; i < n; i++ )
625          for( j = 0; j < nb; j++ )
626              x[i*ldx + j] = 0;
627  
628      for( i = 0; i < nm; i++ )
629          threshold += w[i*incw];
630      threshold *= eps;
631  
632      // v * inv(w) * uT * b
633      for( i = 0; i < nm; i++, u += udelta0, v += vdelta0 )
634      {
635          double wi = w[i*incw];
636          if( (double)std::abs(wi) <= threshold )
637              continue;
638          wi = 1/wi;
639  
640          if( nb == 1 )
641          {
642              double s = 0;
643              if( b )
644                  for( j = 0; j < m; j++ )
645                      s += u[j*udelta1]*b[j*ldb];
646              else
647                  s = u[0];
648              s *= wi;
649  
650              for( j = 0; j < n; j++ )
651                  x[j*ldx] = (T)(x[j*ldx] + s*v[j*vdelta1]);
652          }
653          else
654          {
655              if( b )
656              {
657                  for( j = 0; j < nb; j++ )
658                      buffer[j] = 0;
659                  MatrAXPY( m, nb, b, ldb, u, udelta1, buffer, 0 );
660                  for( j = 0; j < nb; j++ )
661                      buffer[j] *= wi;
662              }
663              else
664              {
665                  for( j = 0; j < nb; j++ )
666                      buffer[j] = u[j*udelta1]*wi;
667              }
668              MatrAXPY( n, nb, buffer, 0, v, vdelta1, x, ldx );
669          }
670      }
671  }
672  
673  static void
SVBkSb(int m,int n,const float * w,size_t wstep,const float * u,size_t ustep,bool uT,const float * v,size_t vstep,bool vT,const float * b,size_t bstep,int nb,float * x,size_t xstep,uchar * buffer)674  SVBkSb( int m, int n, const float* w, size_t wstep,
675          const float* u, size_t ustep, bool uT,
676          const float* v, size_t vstep, bool vT,
677          const float* b, size_t bstep, int nb,
678          float* x, size_t xstep, uchar* buffer )
679  {
680      SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
681                  u, (int)(ustep/sizeof(u[0])), uT,
682                  v, (int)(vstep/sizeof(v[0])), vT,
683                  b, (int)(bstep/sizeof(b[0])), nb,
684                  x, (int)(xstep/sizeof(x[0])),
685                  (double*)alignPtr(buffer, sizeof(double)), (float)(DBL_EPSILON*2) );
686  }
687  
688  static void
SVBkSb(int m,int n,const double * w,size_t wstep,const double * u,size_t ustep,bool uT,const double * v,size_t vstep,bool vT,const double * b,size_t bstep,int nb,double * x,size_t xstep,uchar * buffer)689  SVBkSb( int m, int n, const double* w, size_t wstep,
690         const double* u, size_t ustep, bool uT,
691         const double* v, size_t vstep, bool vT,
692         const double* b, size_t bstep, int nb,
693         double* x, size_t xstep, uchar* buffer )
694  {
695      SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
696                  u, (int)(ustep/sizeof(u[0])), uT,
697                  v, (int)(vstep/sizeof(v[0])), vT,
698                  b, (int)(bstep/sizeof(b[0])), nb,
699                  x, (int)(xstep/sizeof(x[0])),
700                  (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 );
701  }
702  
703  }
704  
705  /****************************************************************************************\
706  *                                 Determinant of the matrix                              *
707  \****************************************************************************************/
708  
709  #define det2(m)   ((double)m(0,0)*m(1,1) - (double)m(0,1)*m(1,0))
710  #define det3(m)   (m(0,0)*((double)m(1,1)*m(2,2) - (double)m(1,2)*m(2,1)) -  \
711                     m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) +  \
712                     m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0)))
713  
determinant(InputArray _mat)714  double cv::determinant( InputArray _mat )
715  {
716      Mat mat = _mat.getMat();
717      double result = 0;
718      int type = mat.type(), rows = mat.rows;
719      size_t step = mat.step;
720      const uchar* m = mat.ptr();
721  
722      CV_Assert( !mat.empty() );
723      CV_Assert( mat.rows == mat.cols && (type == CV_32F || type == CV_64F));
724  
725      #define Mf(y, x) ((float*)(m + y*step))[x]
726      #define Md(y, x) ((double*)(m + y*step))[x]
727  
728      if( type == CV_32F )
729      {
730          if( rows == 2 )
731              result = det2(Mf);
732          else if( rows == 3 )
733              result = det3(Mf);
734          else if( rows == 1 )
735              result = Mf(0,0);
736          else
737          {
738              size_t bufSize = rows*rows*sizeof(float);
739              AutoBuffer<uchar> buffer(bufSize);
740              Mat a(rows, rows, CV_32F, (uchar*)buffer);
741              mat.copyTo(a);
742  
743              result = hal::LU(a.ptr<float>(), a.step, rows, 0, 0, 0);
744              if( result )
745              {
746                  for( int i = 0; i < rows; i++ )
747                      result *= a.at<float>(i,i);
748                  result = 1./result;
749              }
750          }
751      }
752      else
753      {
754          if( rows == 2 )
755              result = det2(Md);
756          else if( rows == 3 )
757              result = det3(Md);
758          else if( rows == 1 )
759              result = Md(0,0);
760          else
761          {
762              size_t bufSize = rows*rows*sizeof(double);
763              AutoBuffer<uchar> buffer(bufSize);
764              Mat a(rows, rows, CV_64F, (uchar*)buffer);
765              mat.copyTo(a);
766  
767              result = hal::LU(a.ptr<double>(), a.step, rows, 0, 0, 0);
768              if( result )
769              {
770                  for( int i = 0; i < rows; i++ )
771                      result *= a.at<double>(i,i);
772                  result = 1./result;
773              }
774          }
775      }
776  
777      #undef Mf
778      #undef Md
779  
780      return result;
781  }
782  
783  /****************************************************************************************\
784  *                          Inverse (or pseudo-inverse) of a matrix                       *
785  \****************************************************************************************/
786  
787  #define Sf( y, x ) ((float*)(srcdata + y*srcstep))[x]
788  #define Sd( y, x ) ((double*)(srcdata + y*srcstep))[x]
789  #define Df( y, x ) ((float*)(dstdata + y*dststep))[x]
790  #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x]
791  
invert(InputArray _src,OutputArray _dst,int method)792  double cv::invert( InputArray _src, OutputArray _dst, int method )
793  {
794      bool result = false;
795      Mat src = _src.getMat();
796      int type = src.type();
797  
798      CV_Assert(type == CV_32F || type == CV_64F);
799  
800      size_t esz = CV_ELEM_SIZE(type);
801      int m = src.rows, n = src.cols;
802  
803      if( method == DECOMP_SVD )
804      {
805          int nm = std::min(m, n);
806  
807          AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double));
808          uchar* buf = alignPtr((uchar*)_buf, (int)esz);
809          Mat u(m, nm, type, buf);
810          Mat w(nm, 1, type, u.ptr() + m*nm*esz);
811          Mat vt(nm, n, type, w.ptr() + nm*esz);
812  
813          SVD::compute(src, w, u, vt);
814          SVD::backSubst(w, u, vt, Mat(), _dst);
815          return type == CV_32F ?
816              (w.ptr<float>()[0] >= FLT_EPSILON ?
817               w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
818              (w.ptr<double>()[0] >= DBL_EPSILON ?
819               w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
820      }
821  
822      CV_Assert( m == n );
823  
824      if( method == DECOMP_EIG )
825      {
826          AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double));
827          uchar* buf = alignPtr((uchar*)_buf, (int)esz);
828          Mat u(n, n, type, buf);
829          Mat w(n, 1, type, u.ptr() + n*n*esz);
830          Mat vt(n, n, type, w.ptr() + n*esz);
831  
832          eigen(src, w, vt);
833          transpose(vt, u);
834          SVD::backSubst(w, u, vt, Mat(), _dst);
835          return type == CV_32F ?
836          (w.ptr<float>()[0] >= FLT_EPSILON ?
837           w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
838          (w.ptr<double>()[0] >= DBL_EPSILON ?
839           w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
840      }
841  
842      CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
843  
844      _dst.create( n, n, type );
845      Mat dst = _dst.getMat();
846  
847      if( n <= 3 )
848      {
849          const uchar* srcdata = src.ptr();
850          uchar* dstdata = dst.ptr();
851          size_t srcstep = src.step;
852          size_t dststep = dst.step;
853  
854          if( n == 2 )
855          {
856              if( type == CV_32FC1 )
857              {
858                  double d = det2(Sf);
859                  if( d != 0. )
860                  {
861                      result = true;
862                      d = 1./d;
863  
864                      #if CV_SSE2
865                          if(USE_SSE2)
866                          {
867                              __m128 zero = _mm_setzero_ps();
868                              __m128 t0 = _mm_loadl_pi(zero, (const __m64*)srcdata); //t0 = sf(0,0) sf(0,1)
869                              __m128 t1 = _mm_loadh_pi(zero, (const __m64*)(srcdata+srcstep)); //t1 = sf(1,0) sf(1,1)
870                              __m128 s0 = _mm_or_ps(t0, t1);
871                              __m128 det =_mm_set1_ps((float)d);
872                              s0 =  _mm_mul_ps(s0, det);
873                              static const uchar CV_DECL_ALIGNED(16) inv[16] = {0,0,0,0,0,0,0,0x80,0,0,0,0x80,0,0,0,0};
874                              __m128 pattern = _mm_load_ps((const float*)inv);
875                              s0 = _mm_xor_ps(s0, pattern);//==-1*s0
876                              s0 = _mm_shuffle_ps(s0, s0, _MM_SHUFFLE(0,2,1,3));
877                              _mm_storel_pi((__m64*)dstdata, s0);
878                              _mm_storeh_pi((__m64*)((float*)(dstdata+dststep)), s0);
879                          }
880                          else
881                      #endif
882                          {
883                          double t0, t1;
884                          t0 = Sf(0,0)*d;
885                          t1 = Sf(1,1)*d;
886                          Df(1,1) = (float)t0;
887                          Df(0,0) = (float)t1;
888                          t0 = -Sf(0,1)*d;
889                          t1 = -Sf(1,0)*d;
890                          Df(0,1) = (float)t0;
891                          Df(1,0) = (float)t1;
892                          }
893  
894                  }
895              }
896              else
897              {
898                  double d = det2(Sd);
899                  if( d != 0. )
900                  {
901                      result = true;
902                      d = 1./d;
903                      #if CV_SSE2
904                          if(USE_SSE2)
905                          {
906                              __m128d s0 = _mm_loadu_pd((const double*)srcdata); //s0 = sf(0,0) sf(0,1)
907                              __m128d s1 = _mm_loadu_pd ((const double*)(srcdata+srcstep));//s1 = sf(1,0) sf(1,1)
908                              __m128d sm = _mm_unpacklo_pd(s0, _mm_load_sd((const double*)(srcdata+srcstep)+1)); //sm = sf(0,0) sf(1,1) - main diagonal
909                              __m128d ss = _mm_shuffle_pd(s0, s1, _MM_SHUFFLE2(0,1)); //ss = sf(0,1) sf(1,0) - secondary diagonal
910                              __m128d det = _mm_load1_pd((const double*)&d);
911                              sm =  _mm_mul_pd(sm, det);
912  
913                              static const uchar CV_DECL_ALIGNED(16) inv[8] = {0,0,0,0,0,0,0,0x80};
914                              __m128d pattern = _mm_load1_pd((double*)inv);
915                              ss = _mm_mul_pd(ss, det);
916                              ss = _mm_xor_pd(ss, pattern);//==-1*ss
917  
918                              s0 = _mm_shuffle_pd(sm, ss, _MM_SHUFFLE2(0,1));
919                              s1 = _mm_shuffle_pd(ss, sm, _MM_SHUFFLE2(0,1));
920                              _mm_storeu_pd((double*)dstdata, s0);
921                              _mm_storeu_pd((double*)(dstdata+dststep), s1);
922                          }
923                          else
924                      #endif
925                          {
926                              double t0, t1;
927                              t0 = Sd(0,0)*d;
928                              t1 = Sd(1,1)*d;
929                              Dd(1,1) = t0;
930                              Dd(0,0) = t1;
931                              t0 = -Sd(0,1)*d;
932                              t1 = -Sd(1,0)*d;
933                              Dd(0,1) = t0;
934                              Dd(1,0) = t1;
935                          }
936                  }
937              }
938          }
939          else if( n == 3 )
940          {
941              if( type == CV_32FC1 )
942              {
943                  double d = det3(Sf);
944  
945                  if( d != 0. )
946                  {
947                      double t[12];
948  
949                      result = true;
950                      d = 1./d;
951                      t[0] = (((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d);
952                      t[1] = (((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d);
953                      t[2] = (((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d);
954  
955                      t[3] = (((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d);
956                      t[4] = (((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d);
957                      t[5] = (((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d);
958  
959                      t[6] = (((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d);
960                      t[7] = (((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d);
961                      t[8] = (((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d);
962  
963                      Df(0,0) = (float)t[0]; Df(0,1) = (float)t[1]; Df(0,2) = (float)t[2];
964                      Df(1,0) = (float)t[3]; Df(1,1) = (float)t[4]; Df(1,2) = (float)t[5];
965                      Df(2,0) = (float)t[6]; Df(2,1) = (float)t[7]; Df(2,2) = (float)t[8];
966                  }
967              }
968              else
969              {
970                  double d = det3(Sd);
971                  if( d != 0. )
972                  {
973                      result = true;
974                      d = 1./d;
975                      double t[9];
976  
977                      t[0] = (Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1)) * d;
978                      t[1] = (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2)) * d;
979                      t[2] = (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1)) * d;
980  
981                      t[3] = (Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2)) * d;
982                      t[4] = (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0)) * d;
983                      t[5] = (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2)) * d;
984  
985                      t[6] = (Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0)) * d;
986                      t[7] = (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1)) * d;
987                      t[8] = (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0)) * d;
988  
989                      Dd(0,0) = t[0]; Dd(0,1) = t[1]; Dd(0,2) = t[2];
990                      Dd(1,0) = t[3]; Dd(1,1) = t[4]; Dd(1,2) = t[5];
991                      Dd(2,0) = t[6]; Dd(2,1) = t[7]; Dd(2,2) = t[8];
992                  }
993              }
994          }
995          else
996          {
997              assert( n == 1 );
998  
999              if( type == CV_32FC1 )
1000              {
1001                  double d = Sf(0,0);
1002                  if( d != 0. )
1003                  {
1004                      result = true;
1005                      Df(0,0) = (float)(1./d);
1006                  }
1007              }
1008              else
1009              {
1010                  double d = Sd(0,0);
1011                  if( d != 0. )
1012                  {
1013                      result = true;
1014                      Dd(0,0) = 1./d;
1015                  }
1016              }
1017          }
1018          if( !result )
1019              dst = Scalar(0);
1020          return result;
1021      }
1022  
1023     int elem_size = CV_ELEM_SIZE(type);
1024      AutoBuffer<uchar> buf(n*n*elem_size);
1025      Mat src1(n, n, type, (uchar*)buf);
1026      src.copyTo(src1);
1027      setIdentity(dst);
1028  
1029      if( method == DECOMP_LU && type == CV_32F )
1030          result = hal::LU(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
1031      else if( method == DECOMP_LU && type == CV_64F )
1032          result = hal::LU(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
1033      else if( method == DECOMP_CHOLESKY && type == CV_32F )
1034          result = hal::Cholesky(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
1035      else
1036          result = hal::Cholesky(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
1037  
1038      if( !result )
1039          dst = Scalar(0);
1040  
1041      return result;
1042  }
1043  
1044  
1045  
1046  /****************************************************************************************\
1047  *                              Solving a linear system                                   *
1048  \****************************************************************************************/
1049  
solve(InputArray _src,InputArray _src2arg,OutputArray _dst,int method)1050  bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
1051  {
1052      bool result = true;
1053      Mat src = _src.getMat(), _src2 = _src2arg.getMat();
1054      int type = src.type();
1055      bool is_normal = (method & DECOMP_NORMAL) != 0;
1056  
1057      CV_Assert( type == _src2.type() && (type == CV_32F || type == CV_64F) );
1058  
1059      method &= ~DECOMP_NORMAL;
1060      CV_Assert( (method != DECOMP_LU && method != DECOMP_CHOLESKY) ||
1061          is_normal || src.rows == src.cols );
1062  
1063      // check case of a single equation and small matrix
1064      if( (method == DECOMP_LU || method == DECOMP_CHOLESKY) && !is_normal &&
1065          src.rows <= 3 && src.rows == src.cols && _src2.cols == 1 )
1066      {
1067          _dst.create( src.cols, _src2.cols, src.type() );
1068          Mat dst = _dst.getMat();
1069  
1070          #define bf(y) ((float*)(bdata + y*src2step))[0]
1071          #define bd(y) ((double*)(bdata + y*src2step))[0]
1072  
1073          const uchar* srcdata = src.ptr();
1074          const uchar* bdata = _src2.ptr();
1075          uchar* dstdata = dst.ptr();
1076          size_t srcstep = src.step;
1077          size_t src2step = _src2.step;
1078          size_t dststep = dst.step;
1079  
1080          if( src.rows == 2 )
1081          {
1082              if( type == CV_32FC1 )
1083              {
1084                  double d = det2(Sf);
1085                  if( d != 0. )
1086                  {
1087                      double t;
1088                      d = 1./d;
1089                      t = (float)(((double)bf(0)*Sf(1,1) - (double)bf(1)*Sf(0,1))*d);
1090                      Df(1,0) = (float)(((double)bf(1)*Sf(0,0) - (double)bf(0)*Sf(1,0))*d);
1091                      Df(0,0) = (float)t;
1092                  }
1093                  else
1094                      result = false;
1095              }
1096              else
1097              {
1098                  double d = det2(Sd);
1099                  if( d != 0. )
1100                  {
1101                      double t;
1102                      d = 1./d;
1103                      t = (bd(0)*Sd(1,1) - bd(1)*Sd(0,1))*d;
1104                      Dd(1,0) = (bd(1)*Sd(0,0) - bd(0)*Sd(1,0))*d;
1105                      Dd(0,0) = t;
1106                  }
1107                  else
1108                      result = false;
1109              }
1110          }
1111          else if( src.rows == 3 )
1112          {
1113              if( type == CV_32FC1 )
1114              {
1115                  double d = det3(Sf);
1116                  if( d != 0. )
1117                  {
1118                      float t[3];
1119                      d = 1./d;
1120  
1121                      t[0] = (float)(d*
1122                             (bf(0)*((double)Sf(1,1)*Sf(2,2) - (double)Sf(1,2)*Sf(2,1)) -
1123                              Sf(0,1)*((double)bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) +
1124                              Sf(0,2)*((double)bf(1)*Sf(2,1) - (double)Sf(1,1)*bf(2))));
1125  
1126                      t[1] = (float)(d*
1127                             (Sf(0,0)*(double)(bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) -
1128                              bf(0)*((double)Sf(1,0)*Sf(2,2) - (double)Sf(1,2)*Sf(2,0)) +
1129                              Sf(0,2)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0))));
1130  
1131                      t[2] = (float)(d*
1132                             (Sf(0,0)*((double)Sf(1,1)*bf(2) - (double)bf(1)*Sf(2,1)) -
1133                              Sf(0,1)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)) +
1134                              bf(0)*((double)Sf(1,0)*Sf(2,1) - (double)Sf(1,1)*Sf(2,0))));
1135  
1136                      Df(0,0) = t[0];
1137                      Df(1,0) = t[1];
1138                      Df(2,0) = t[2];
1139                  }
1140                  else
1141                      result = false;
1142              }
1143              else
1144              {
1145                  double d = det3(Sd);
1146                  if( d != 0. )
1147                  {
1148                      double t[9];
1149  
1150                      d = 1./d;
1151  
1152                      t[0] = ((Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1))*bd(0) +
1153                              (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2))*bd(1) +
1154                              (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1))*bd(2))*d;
1155  
1156                      t[1] = ((Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2))*bd(0) +
1157                              (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0))*bd(1) +
1158                              (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2))*bd(2))*d;
1159  
1160                      t[2] = ((Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0))*bd(0) +
1161                              (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1))*bd(1) +
1162                              (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0))*bd(2))*d;
1163  
1164                      Dd(0,0) = t[0];
1165                      Dd(1,0) = t[1];
1166                      Dd(2,0) = t[2];
1167                  }
1168                  else
1169                      result = false;
1170              }
1171          }
1172          else
1173          {
1174              assert( src.rows == 1 );
1175  
1176              if( type == CV_32FC1 )
1177              {
1178                  double d = Sf(0,0);
1179                  if( d != 0. )
1180                      Df(0,0) = (float)(bf(0)/d);
1181                  else
1182                      result = false;
1183              }
1184              else
1185              {
1186                  double d = Sd(0,0);
1187                  if( d != 0. )
1188                      Dd(0,0) = (bd(0)/d);
1189                  else
1190                      result = false;
1191              }
1192          }
1193          return result;
1194      }
1195  
1196      if( method == DECOMP_QR )
1197          method = DECOMP_SVD;
1198  
1199      int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols;
1200      size_t esz = CV_ELEM_SIZE(type), bufsize = 0;
1201      size_t vstep = alignSize(n*esz, 16);
1202      size_t astep = method == DECOMP_SVD && !is_normal ? alignSize(m*esz, 16) : vstep;
1203      AutoBuffer<uchar> buffer;
1204  
1205      Mat src2 = _src2;
1206      _dst.create( src.cols, src2.cols, src.type() );
1207      Mat dst = _dst.getMat();
1208  
1209      if( m < n )
1210          CV_Error(CV_StsBadArg, "The function can not solve under-determined linear systems" );
1211  
1212      if( m == n )
1213          is_normal = false;
1214      else if( is_normal )
1215      {
1216          m_ = n;
1217          if( method == DECOMP_SVD )
1218              method = DECOMP_EIG;
1219      }
1220  
1221      size_t asize = astep*(method == DECOMP_SVD || is_normal ? n : m);
1222      bufsize += asize + 32;
1223  
1224      if( is_normal )
1225          bufsize += n*nb*esz;
1226  
1227      if( method == DECOMP_SVD || method == DECOMP_EIG )
1228          bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32;
1229  
1230      buffer.allocate(bufsize);
1231      uchar* ptr = alignPtr((uchar*)buffer, 16);
1232  
1233      Mat a(m_, n, type, ptr, astep);
1234  
1235      if( is_normal )
1236          mulTransposed(src, a, true);
1237      else if( method != DECOMP_SVD )
1238          src.copyTo(a);
1239      else
1240      {
1241          a = Mat(n, m_, type, ptr, astep);
1242          transpose(src, a);
1243      }
1244      ptr += asize;
1245  
1246      if( !is_normal )
1247      {
1248          if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
1249              src2.copyTo(dst);
1250      }
1251      else
1252      {
1253          // a'*b
1254          if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
1255              gemm( src, src2, 1, Mat(), 0, dst, GEMM_1_T );
1256          else
1257          {
1258              Mat tmp(n, nb, type, ptr);
1259              ptr += n*nb*esz;
1260              gemm( src, src2, 1, Mat(), 0, tmp, GEMM_1_T );
1261              src2 = tmp;
1262          }
1263      }
1264  
1265      if( method == DECOMP_LU )
1266      {
1267          if( type == CV_32F )
1268              result = hal::LU(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
1269          else
1270              result = hal::LU(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
1271      }
1272      else if( method == DECOMP_CHOLESKY )
1273      {
1274          if( type == CV_32F )
1275              result = hal::Cholesky(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
1276          else
1277              result = hal::Cholesky(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
1278      }
1279      else
1280      {
1281          ptr = alignPtr(ptr, 16);
1282          Mat v(n, n, type, ptr, vstep), w(n, 1, type, ptr + vstep*n), u;
1283          ptr += n*(vstep + esz);
1284  
1285          if( method == DECOMP_EIG )
1286          {
1287              if( type == CV_32F )
1288                  Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr);
1289              else
1290                  Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
1291              u = v;
1292          }
1293          else
1294          {
1295              if( type == CV_32F )
1296                  JacobiSVD(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, m_, n);
1297              else
1298                  JacobiSVD(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, m_, n);
1299              u = a;
1300          }
1301  
1302          if( type == CV_32F )
1303          {
1304              SVBkSb(m_, n, w.ptr<float>(), 0, u.ptr<float>(), u.step, true,
1305                     v.ptr<float>(), v.step, true, src2.ptr<float>(),
1306                     src2.step, nb, dst.ptr<float>(), dst.step, ptr);
1307          }
1308          else
1309          {
1310              SVBkSb(m_, n, w.ptr<double>(), 0, u.ptr<double>(), u.step, true,
1311                     v.ptr<double>(), v.step, true, src2.ptr<double>(),
1312                     src2.step, nb, dst.ptr<double>(), dst.step, ptr);
1313          }
1314          result = true;
1315      }
1316  
1317      if( !result )
1318          dst = Scalar(0);
1319  
1320      return result;
1321  }
1322  
1323  
1324  /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix ///////////////
1325  
eigen(InputArray _src,OutputArray _evals,OutputArray _evects)1326  bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
1327  {
1328      Mat src = _src.getMat();
1329      int type = src.type();
1330      int n = src.rows;
1331  
1332      CV_Assert( src.rows == src.cols );
1333      CV_Assert (type == CV_32F || type == CV_64F);
1334  
1335      Mat v;
1336      if( _evects.needed() )
1337      {
1338          _evects.create(n, n, type);
1339          v = _evects.getMat();
1340      }
1341  
1342      size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16);
1343      AutoBuffer<uchar> buf(n*astep + n*5*elemSize + 32);
1344      uchar* ptr = alignPtr((uchar*)buf, 16);
1345      Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n);
1346      ptr += astep*n + elemSize*n;
1347      src.copyTo(a);
1348      bool ok = type == CV_32F ?
1349          Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr) :
1350          Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
1351  
1352      w.copyTo(_evals);
1353      return ok;
1354  }
1355  
1356  namespace cv
1357  {
1358  
_SVDcompute(InputArray _aarr,OutputArray _w,OutputArray _u,OutputArray _vt,int flags)1359  static void _SVDcompute( InputArray _aarr, OutputArray _w,
1360                           OutputArray _u, OutputArray _vt, int flags )
1361  {
1362      Mat src = _aarr.getMat();
1363      int m = src.rows, n = src.cols;
1364      int type = src.type();
1365      bool compute_uv = _u.needed() || _vt.needed();
1366      bool full_uv = (flags & SVD::FULL_UV) != 0;
1367  
1368      CV_Assert( type == CV_32F || type == CV_64F );
1369  
1370      if( flags & SVD::NO_UV )
1371      {
1372          _u.release();
1373          _vt.release();
1374          compute_uv = full_uv = false;
1375      }
1376  
1377      bool at = false;
1378      if( m < n )
1379      {
1380          std::swap(m, n);
1381          at = true;
1382      }
1383  
1384      int urows = full_uv ? m : n;
1385      size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16);
1386      AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32);
1387      uchar* buf = alignPtr((uchar*)_buf, 16);
1388      Mat temp_a(n, m, type, buf, astep);
1389      Mat temp_w(n, 1, type, buf + urows*astep);
1390      Mat temp_u(urows, m, type, buf, astep), temp_v;
1391  
1392      if( compute_uv )
1393          temp_v = Mat(n, n, type, alignPtr(buf + urows*astep + n*esz, 16), vstep);
1394  
1395      if( urows > n )
1396          temp_u = Scalar::all(0);
1397  
1398      if( !at )
1399          transpose(src, temp_a);
1400      else
1401          src.copyTo(temp_a);
1402  
1403      if( type == CV_32F )
1404      {
1405          JacobiSVD(temp_a.ptr<float>(), temp_u.step, temp_w.ptr<float>(),
1406                temp_v.ptr<float>(), temp_v.step, m, n, compute_uv ? urows : 0);
1407      }
1408      else
1409      {
1410          JacobiSVD(temp_a.ptr<double>(), temp_u.step, temp_w.ptr<double>(),
1411                temp_v.ptr<double>(), temp_v.step, m, n, compute_uv ? urows : 0);
1412      }
1413      temp_w.copyTo(_w);
1414      if( compute_uv )
1415      {
1416          if( !at )
1417          {
1418              if( _u.needed() )
1419                  transpose(temp_u, _u);
1420              if( _vt.needed() )
1421                  temp_v.copyTo(_vt);
1422          }
1423          else
1424          {
1425              if( _u.needed() )
1426                  transpose(temp_v, _u);
1427              if( _vt.needed() )
1428                  temp_u.copyTo(_vt);
1429          }
1430      }
1431  }
1432  
1433  
compute(InputArray a,OutputArray w,OutputArray u,OutputArray vt,int flags)1434  void SVD::compute( InputArray a, OutputArray w, OutputArray u, OutputArray vt, int flags )
1435  {
1436      _SVDcompute(a, w, u, vt, flags);
1437  }
1438  
compute(InputArray a,OutputArray w,int flags)1439  void SVD::compute( InputArray a, OutputArray w, int flags )
1440  {
1441      _SVDcompute(a, w, noArray(), noArray(), flags);
1442  }
1443  
backSubst(InputArray _w,InputArray _u,InputArray _vt,InputArray _rhs,OutputArray _dst)1444  void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt,
1445                       InputArray _rhs, OutputArray _dst )
1446  {
1447      Mat w = _w.getMat(), u = _u.getMat(), vt = _vt.getMat(), rhs = _rhs.getMat();
1448      int type = w.type(), esz = (int)w.elemSize();
1449      int m = u.rows, n = vt.cols, nb = rhs.data ? rhs.cols : m, nm = std::min(m, n);
1450      size_t wstep = w.rows == 1 ? (size_t)esz : w.cols == 1 ? (size_t)w.step : (size_t)w.step + esz;
1451      AutoBuffer<uchar> buffer(nb*sizeof(double) + 16);
1452      CV_Assert( w.type() == u.type() && u.type() == vt.type() && u.data && vt.data && w.data );
1453      CV_Assert( u.cols >= nm && vt.rows >= nm &&
1454                (w.size() == Size(nm, 1) || w.size() == Size(1, nm) || w.size() == Size(vt.rows, u.cols)) );
1455      CV_Assert( rhs.data == 0 || (rhs.type() == type && rhs.rows == m) );
1456  
1457      _dst.create( n, nb, type );
1458      Mat dst = _dst.getMat();
1459      if( type == CV_32F )
1460          SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false,
1461                 vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb,
1462                 dst.ptr<float>(), dst.step, buffer);
1463      else if( type == CV_64F )
1464          SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false,
1465                 vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb,
1466                 dst.ptr<double>(), dst.step, buffer);
1467      else
1468          CV_Error( CV_StsUnsupportedFormat, "" );
1469  }
1470  
1471  
operator ()(InputArray a,int flags)1472  SVD& SVD::operator ()(InputArray a, int flags)
1473  {
1474      _SVDcompute(a, w, u, vt, flags);
1475      return *this;
1476  }
1477  
1478  
backSubst(InputArray rhs,OutputArray dst) const1479  void SVD::backSubst( InputArray rhs, OutputArray dst ) const
1480  {
1481      backSubst( w, u, vt, rhs, dst );
1482  }
1483  
1484  }
1485  
1486  
SVDecomp(InputArray src,OutputArray w,OutputArray u,OutputArray vt,int flags)1487  void cv::SVDecomp(InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags)
1488  {
1489      SVD::compute(src, w, u, vt, flags);
1490  }
1491  
SVBackSubst(InputArray w,InputArray u,InputArray vt,InputArray rhs,OutputArray dst)1492  void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, OutputArray dst)
1493  {
1494      SVD::backSubst(w, u, vt, rhs, dst);
1495  }
1496  
1497  
1498  CV_IMPL double
cvDet(const CvArr * arr)1499  cvDet( const CvArr* arr )
1500  {
1501      if( CV_IS_MAT(arr) && ((CvMat*)arr)->rows <= 3 )
1502      {
1503          CvMat* mat = (CvMat*)arr;
1504          int type = CV_MAT_TYPE(mat->type);
1505          int rows = mat->rows;
1506          uchar* m = mat->data.ptr;
1507          int step = mat->step;
1508          CV_Assert( rows == mat->cols );
1509  
1510          #define Mf(y, x) ((float*)(m + y*step))[x]
1511          #define Md(y, x) ((double*)(m + y*step))[x]
1512  
1513          if( type == CV_32F )
1514          {
1515              if( rows == 2 )
1516                  return det2(Mf);
1517              if( rows == 3 )
1518                  return det3(Mf);
1519          }
1520          else if( type == CV_64F )
1521          {
1522              if( rows == 2 )
1523                  return det2(Md);
1524              if( rows == 3 )
1525                  return det3(Md);
1526          }
1527      }
1528      return cv::determinant(cv::cvarrToMat(arr));
1529  }
1530  
1531  
1532  CV_IMPL double
cvInvert(const CvArr * srcarr,CvArr * dstarr,int method)1533  cvInvert( const CvArr* srcarr, CvArr* dstarr, int method )
1534  {
1535      cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1536  
1537      CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows );
1538      return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
1539                        method == CV_SVD ? cv::DECOMP_SVD :
1540                        method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU );
1541  }
1542  
1543  
1544  CV_IMPL int
cvSolve(const CvArr * Aarr,const CvArr * barr,CvArr * xarr,int method)1545  cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method )
1546  {
1547      cv::Mat A = cv::cvarrToMat(Aarr), b = cv::cvarrToMat(barr), x = cv::cvarrToMat(xarr);
1548  
1549      CV_Assert( A.type() == x.type() && A.cols == x.rows && x.cols == b.cols );
1550      bool is_normal = (method & CV_NORMAL) != 0;
1551      method &= ~CV_NORMAL;
1552      return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
1553                                  method == CV_SVD ? cv::DECOMP_SVD :
1554                                  method == CV_SVD_SYM ? cv::DECOMP_EIG :
1555          A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) );
1556  }
1557  
1558  
1559  CV_IMPL void
cvEigenVV(CvArr * srcarr,CvArr * evectsarr,CvArr * evalsarr,double,int,int)1560  cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
1561             int, int )
1562  {
1563      cv::Mat src = cv::cvarrToMat(srcarr), evals0 = cv::cvarrToMat(evalsarr), evals = evals0;
1564      if( evectsarr )
1565      {
1566          cv::Mat evects0 = cv::cvarrToMat(evectsarr), evects = evects0;
1567          eigen(src, evals, evects);
1568          if( evects0.data != evects.data )
1569          {
1570              const uchar* p = evects0.ptr();
1571              evects.convertTo(evects0, evects0.type());
1572              CV_Assert( p == evects0.ptr() );
1573          }
1574      }
1575      else
1576          eigen(src, evals);
1577      if( evals0.data != evals.data )
1578      {
1579          const uchar* p = evals0.ptr();
1580          if( evals0.size() == evals.size() )
1581              evals.convertTo(evals0, evals0.type());
1582          else if( evals0.type() == evals.type() )
1583              cv::transpose(evals, evals0);
1584          else
1585              cv::Mat(evals.t()).convertTo(evals0, evals0.type());
1586          CV_Assert( p == evals0.ptr() );
1587      }
1588  }
1589  
1590  
1591  CV_IMPL void
cvSVD(CvArr * aarr,CvArr * warr,CvArr * uarr,CvArr * varr,int flags)1592  cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
1593  {
1594      cv::Mat a = cv::cvarrToMat(aarr), w = cv::cvarrToMat(warr), u, v;
1595      int m = a.rows, n = a.cols, type = a.type(), mn = std::max(m, n), nm = std::min(m, n);
1596  
1597      CV_Assert( w.type() == type &&
1598          (w.size() == cv::Size(nm,1) || w.size() == cv::Size(1, nm) ||
1599          w.size() == cv::Size(nm, nm) || w.size() == cv::Size(n, m)) );
1600  
1601      cv::SVD svd;
1602  
1603      if( w.size() == cv::Size(nm, 1) )
1604          svd.w = cv::Mat(nm, 1, type, w.ptr() );
1605      else if( w.isContinuous() )
1606          svd.w = w;
1607  
1608      if( uarr )
1609      {
1610          u = cv::cvarrToMat(uarr);
1611          CV_Assert( u.type() == type );
1612          svd.u = u;
1613      }
1614  
1615      if( varr )
1616      {
1617          v = cv::cvarrToMat(varr);
1618          CV_Assert( v.type() == type );
1619          svd.vt = v;
1620      }
1621  
1622      svd(a, ((flags & CV_SVD_MODIFY_A) ? cv::SVD::MODIFY_A : 0) |
1623          ((!svd.u.data && !svd.vt.data) ? cv::SVD::NO_UV : 0) |
1624          ((m != n && (svd.u.size() == cv::Size(mn, mn) ||
1625          svd.vt.size() == cv::Size(mn, mn))) ? cv::SVD::FULL_UV : 0));
1626  
1627      if( !u.empty() )
1628      {
1629          if( flags & CV_SVD_U_T )
1630              cv::transpose( svd.u, u );
1631          else if( u.data != svd.u.data )
1632          {
1633              CV_Assert( u.size() == svd.u.size() );
1634              svd.u.copyTo(u);
1635          }
1636      }
1637  
1638      if( !v.empty() )
1639      {
1640          if( !(flags & CV_SVD_V_T) )
1641              cv::transpose( svd.vt, v );
1642          else if( v.data != svd.vt.data )
1643          {
1644              CV_Assert( v.size() == svd.vt.size() );
1645              svd.vt.copyTo(v);
1646          }
1647      }
1648  
1649      if( w.data != svd.w.data )
1650      {
1651          if( w.size() == svd.w.size() )
1652              svd.w.copyTo(w);
1653          else
1654          {
1655              w = cv::Scalar(0);
1656              cv::Mat wd = w.diag();
1657              svd.w.copyTo(wd);
1658          }
1659      }
1660  }
1661  
1662  
1663  CV_IMPL void
cvSVBkSb(const CvArr * warr,const CvArr * uarr,const CvArr * varr,const CvArr * rhsarr,CvArr * dstarr,int flags)1664  cvSVBkSb( const CvArr* warr, const CvArr* uarr,
1665            const CvArr* varr, const CvArr* rhsarr,
1666            CvArr* dstarr, int flags )
1667  {
1668      cv::Mat w = cv::cvarrToMat(warr), u = cv::cvarrToMat(uarr),
1669          v = cv::cvarrToMat(varr), rhs,
1670          dst = cv::cvarrToMat(dstarr), dst0 = dst;
1671      if( flags & CV_SVD_U_T )
1672      {
1673          cv::Mat tmp;
1674          transpose(u, tmp);
1675          u = tmp;
1676      }
1677      if( !(flags & CV_SVD_V_T) )
1678      {
1679          cv::Mat tmp;
1680          transpose(v, tmp);
1681          v = tmp;
1682      }
1683      if( rhsarr )
1684          rhs = cv::cvarrToMat(rhsarr);
1685  
1686      cv::SVD::backSubst(w, u, v, rhs, dst);
1687      CV_Assert( dst.data == dst0.data );
1688  }
1689