• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 #include "precomp.hpp"
43 #include <float.h>
44 #include <stdio.h>
45 #include "lkpyramid.hpp"
46 #include "opencl_kernels_video.hpp"
47 
48 #define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
49 
50 namespace
51 {
calcSharrDeriv(const cv::Mat & src,cv::Mat & dst)52 static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
53 {
54     using namespace cv;
55     using cv::detail::deriv_type;
56     int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth();
57     CV_Assert(depth == CV_8U);
58     dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
59 
60 #ifdef HAVE_TEGRA_OPTIMIZATION
61     if (tegra::useTegra() && tegra::calcSharrDeriv(src, dst))
62         return;
63 #endif
64 
65     int x, y, delta = (int)alignSize((cols + 2)*cn, 16);
66     AutoBuffer<deriv_type> _tempBuf(delta*2 + 64);
67     deriv_type *trow0 = alignPtr(_tempBuf + cn, 16), *trow1 = alignPtr(trow0 + delta, 16);
68 
69 #if CV_SSE2
70     __m128i z = _mm_setzero_si128(), c3 = _mm_set1_epi16(3), c10 = _mm_set1_epi16(10);
71 #endif
72 
73 #if CV_NEON
74     const uint16x8_t q8 = vdupq_n_u16(3);
75     const uint8x8_t d18 = vdup_n_u8(10);
76 
77     const int16x8_t q8i = vdupq_n_s16(3);
78     const int16x8_t q9 = vdupq_n_s16(10);
79 #endif
80 
81     for( y = 0; y < rows; y++ )
82     {
83         const uchar* srow0 = src.ptr<uchar>(y > 0 ? y-1 : rows > 1 ? 1 : 0);
84         const uchar* srow1 = src.ptr<uchar>(y);
85         const uchar* srow2 = src.ptr<uchar>(y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
86         deriv_type* drow = dst.ptr<deriv_type>(y);
87 
88         // do vertical convolution
89         x = 0;
90 #if CV_SSE2
91         for( ; x <= colsn - 8; x += 8 )
92         {
93             __m128i s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow0 + x)), z);
94             __m128i s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow1 + x)), z);
95             __m128i s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow2 + x)), z);
96             __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(_mm_add_epi16(s0, s2), c3), _mm_mullo_epi16(s1, c10));
97             __m128i t1 = _mm_sub_epi16(s2, s0);
98             _mm_store_si128((__m128i*)(trow0 + x), t0);
99             _mm_store_si128((__m128i*)(trow1 + x), t1);
100         }
101 #endif
102 
103 #if CV_NEON
104         for( ; x <= colsn - 8; x += 8)
105         {
106             uint8x8_t d0 = vld1_u8((const uint8_t*)&srow0[x]);
107             uint8x8_t d1 = vld1_u8((const uint8_t*)&srow1[x]);
108             uint8x8_t d2 = vld1_u8((const uint8_t*)&srow2[x]);
109             uint16x8_t q4 = vaddl_u8(d0, d2);
110             uint16x8_t q11 = vsubl_u8(d2, d0);
111             uint16x8_t q5 = vmulq_u16(q4, q8);
112             uint16x8_t q6 = vmull_u8(d1, d18);
113             uint16x8_t q10 = vaddq_u16(q6, q5);
114             vst1q_u16((uint16_t*)&trow0[x], q10);
115             vst1q_u16((uint16_t*)&trow1[x], q11);
116 
117         }
118 #endif
119 
120         for( ; x < colsn; x++ )
121         {
122             int t0 = (srow0[x] + srow2[x])*3 + srow1[x]*10;
123             int t1 = srow2[x] - srow0[x];
124             trow0[x] = (deriv_type)t0;
125             trow1[x] = (deriv_type)t1;
126         }
127 
128         // make border
129         int x0 = (cols > 1 ? 1 : 0)*cn, x1 = (cols > 1 ? cols-2 : 0)*cn;
130         for( int k = 0; k < cn; k++ )
131         {
132             trow0[-cn + k] = trow0[x0 + k]; trow0[colsn + k] = trow0[x1 + k];
133             trow1[-cn + k] = trow1[x0 + k]; trow1[colsn + k] = trow1[x1 + k];
134         }
135 
136         // do horizontal convolution, interleave the results and store them to dst
137         x = 0;
138 #if CV_SSE2
139         for( ; x <= colsn - 8; x += 8 )
140         {
141             __m128i s0 = _mm_loadu_si128((const __m128i*)(trow0 + x - cn));
142             __m128i s1 = _mm_loadu_si128((const __m128i*)(trow0 + x + cn));
143             __m128i s2 = _mm_loadu_si128((const __m128i*)(trow1 + x - cn));
144             __m128i s3 = _mm_load_si128((const __m128i*)(trow1 + x));
145             __m128i s4 = _mm_loadu_si128((const __m128i*)(trow1 + x + cn));
146 
147             __m128i t0 = _mm_sub_epi16(s1, s0);
148             __m128i t1 = _mm_add_epi16(_mm_mullo_epi16(_mm_add_epi16(s2, s4), c3), _mm_mullo_epi16(s3, c10));
149             __m128i t2 = _mm_unpacklo_epi16(t0, t1);
150             t0 = _mm_unpackhi_epi16(t0, t1);
151             // this can probably be replaced with aligned stores if we aligned dst properly.
152             _mm_storeu_si128((__m128i*)(drow + x*2), t2);
153             _mm_storeu_si128((__m128i*)(drow + x*2 + 8), t0);
154         }
155 #endif
156 
157 #if CV_NEON
158         for( ; x <= colsn - 8; x += 8 )
159         {
160 
161             int16x8_t q0 = vld1q_s16((const int16_t*)&trow0[x+cn]);
162             int16x8_t q1 = vld1q_s16((const int16_t*)&trow0[x-cn]);
163             int16x8_t q2 = vld1q_s16((const int16_t*)&trow1[x+cn]);
164             int16x8_t q3 = vld1q_s16((const int16_t*)&trow1[x-cn]);
165             int16x8_t q5 = vsubq_s16(q0, q1);
166             int16x8_t q6 = vaddq_s16(q2, q3);
167             int16x8_t q4 = vld1q_s16((const int16_t*)&trow1[x]);
168             int16x8_t q7 = vmulq_s16(q6, q8i);
169             int16x8_t q10 = vmulq_s16(q4, q9);
170             int16x8_t q11 = vaddq_s16(q7, q10);
171             int16x4_t d22 = vget_low_s16(q11);
172             int16x4_t d23 = vget_high_s16(q11);
173             int16x4_t d11 = vget_high_s16(q5);
174             int16x4_t d10 = vget_low_s16(q5);
175             int16x4x2_t q5x2, q11x2;
176             q5x2.val[0] = d10; q5x2.val[1] = d22;
177             q11x2.val[0] = d11; q11x2.val[1] = d23;
178             vst2_s16((int16_t*)&drow[x*2], q5x2);
179             vst2_s16((int16_t*)&drow[(x*2)+8], q11x2);
180 
181         }
182 #endif
183         for( ; x < colsn; x++ )
184         {
185             deriv_type t0 = (deriv_type)(trow0[x+cn] - trow0[x-cn]);
186             deriv_type t1 = (deriv_type)((trow1[x+cn] + trow1[x-cn])*3 + trow1[x]*10);
187             drow[x*2] = t0; drow[x*2+1] = t1;
188         }
189     }
190 }
191 
192 }//namespace
193 
LKTrackerInvoker(const Mat & _prevImg,const Mat & _prevDeriv,const Mat & _nextImg,const Point2f * _prevPts,Point2f * _nextPts,uchar * _status,float * _err,Size _winSize,TermCriteria _criteria,int _level,int _maxLevel,int _flags,float _minEigThreshold)194 cv::detail::LKTrackerInvoker::LKTrackerInvoker(
195                       const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
196                       const Point2f* _prevPts, Point2f* _nextPts,
197                       uchar* _status, float* _err,
198                       Size _winSize, TermCriteria _criteria,
199                       int _level, int _maxLevel, int _flags, float _minEigThreshold )
200 {
201     prevImg = &_prevImg;
202     prevDeriv = &_prevDeriv;
203     nextImg = &_nextImg;
204     prevPts = _prevPts;
205     nextPts = _nextPts;
206     status = _status;
207     err = _err;
208     winSize = _winSize;
209     criteria = _criteria;
210     level = _level;
211     maxLevel = _maxLevel;
212     flags = _flags;
213     minEigThreshold = _minEigThreshold;
214 }
215 
216 #if defined __arm__ && !CV_NEON
217 typedef int64 acctype;
218 typedef int itemtype;
219 #else
220 typedef float acctype;
221 typedef float itemtype;
222 #endif
223 
operator ()(const Range & range) const224 void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
225 {
226     Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
227     const Mat& I = *prevImg;
228     const Mat& J = *nextImg;
229     const Mat& derivI = *prevDeriv;
230 
231     int j, cn = I.channels(), cn2 = cn*2;
232     cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
233     int derivDepth = DataType<deriv_type>::depth;
234 
235     Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
236     Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
237 
238     for( int ptidx = range.start; ptidx < range.end; ptidx++ )
239     {
240         Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
241         Point2f nextPt;
242         if( level == maxLevel )
243         {
244             if( flags & OPTFLOW_USE_INITIAL_FLOW )
245                 nextPt = nextPts[ptidx]*(float)(1./(1 << level));
246             else
247                 nextPt = prevPt;
248         }
249         else
250             nextPt = nextPts[ptidx]*2.f;
251         nextPts[ptidx] = nextPt;
252 
253         Point2i iprevPt, inextPt;
254         prevPt -= halfWin;
255         iprevPt.x = cvFloor(prevPt.x);
256         iprevPt.y = cvFloor(prevPt.y);
257 
258         if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols ||
259             iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows )
260         {
261             if( level == 0 )
262             {
263                 if( status )
264                     status[ptidx] = false;
265                 if( err )
266                     err[ptidx] = 0;
267             }
268             continue;
269         }
270 
271         float a = prevPt.x - iprevPt.x;
272         float b = prevPt.y - iprevPt.y;
273         const int W_BITS = 14, W_BITS1 = 14;
274         const float FLT_SCALE = 1.f/(1 << 20);
275         int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
276         int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
277         int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
278         int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
279 
280         int dstep = (int)(derivI.step/derivI.elemSize1());
281         int stepI = (int)(I.step/I.elemSize1());
282         int stepJ = (int)(J.step/J.elemSize1());
283         acctype iA11 = 0, iA12 = 0, iA22 = 0;
284         float A11, A12, A22;
285 
286 #if CV_SSE2
287         __m128i qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
288         __m128i qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
289         __m128i z = _mm_setzero_si128();
290         __m128i qdelta_d = _mm_set1_epi32(1 << (W_BITS1-1));
291         __m128i qdelta = _mm_set1_epi32(1 << (W_BITS1-5-1));
292         __m128 qA11 = _mm_setzero_ps(), qA12 = _mm_setzero_ps(), qA22 = _mm_setzero_ps();
293 #endif
294 
295 #if CV_NEON
296 
297         int CV_DECL_ALIGNED(16) nA11[] = {0, 0, 0, 0}, nA12[] = {0, 0, 0, 0}, nA22[] = {0, 0, 0, 0};
298         const int shifter1 = -(W_BITS - 5); //negative so it shifts right
299         const int shifter2 = -(W_BITS);
300 
301         const int16x4_t d26 = vdup_n_s16((int16_t)iw00);
302         const int16x4_t d27 = vdup_n_s16((int16_t)iw01);
303         const int16x4_t d28 = vdup_n_s16((int16_t)iw10);
304         const int16x4_t d29 = vdup_n_s16((int16_t)iw11);
305         const int32x4_t q11 = vdupq_n_s32((int32_t)shifter1);
306         const int32x4_t q12 = vdupq_n_s32((int32_t)shifter2);
307 
308 #endif
309 
310         // extract the patch from the first image, compute covariation matrix of derivatives
311         int x, y;
312         for( y = 0; y < winSize.height; y++ )
313         {
314             const uchar* src = I.ptr() + (y + iprevPt.y)*stepI + iprevPt.x*cn;
315             const deriv_type* dsrc = derivI.ptr<deriv_type>() + (y + iprevPt.y)*dstep + iprevPt.x*cn2;
316 
317             deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
318             deriv_type* dIptr = derivIWinBuf.ptr<deriv_type>(y);
319 
320             x = 0;
321 
322 #if CV_SSE2
323             for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
324             {
325                 __m128i v00, v01, v10, v11, t0, t1;
326 
327                 v00 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x)), z);
328                 v01 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + cn)), z);
329                 v10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI)), z);
330                 v11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI + cn)), z);
331 
332                 t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
333                                    _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
334                 t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
335                 _mm_storel_epi64((__m128i*)(Iptr + x), _mm_packs_epi32(t0,t0));
336 
337                 v00 = _mm_loadu_si128((const __m128i*)(dsrc));
338                 v01 = _mm_loadu_si128((const __m128i*)(dsrc + cn2));
339                 v10 = _mm_loadu_si128((const __m128i*)(dsrc + dstep));
340                 v11 = _mm_loadu_si128((const __m128i*)(dsrc + dstep + cn2));
341 
342                 t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
343                                    _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
344                 t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
345                                    _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
346                 t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta_d), W_BITS1);
347                 t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta_d), W_BITS1);
348                 v00 = _mm_packs_epi32(t0, t1); // Ix0 Iy0 Ix1 Iy1 ...
349 
350                 _mm_storeu_si128((__m128i*)dIptr, v00);
351                 t0 = _mm_srai_epi32(v00, 16); // Iy0 Iy1 Iy2 Iy3
352                 t1 = _mm_srai_epi32(_mm_slli_epi32(v00, 16), 16); // Ix0 Ix1 Ix2 Ix3
353 
354                 __m128 fy = _mm_cvtepi32_ps(t0);
355                 __m128 fx = _mm_cvtepi32_ps(t1);
356 
357                 qA22 = _mm_add_ps(qA22, _mm_mul_ps(fy, fy));
358                 qA12 = _mm_add_ps(qA12, _mm_mul_ps(fx, fy));
359                 qA11 = _mm_add_ps(qA11, _mm_mul_ps(fx, fx));
360             }
361 #endif
362 
363 #if CV_NEON
364             for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
365             {
366 
367                 uint8x8_t d0 = vld1_u8(&src[x]);
368                 uint8x8_t d2 = vld1_u8(&src[x+cn]);
369                 uint16x8_t q0 = vmovl_u8(d0);
370                 uint16x8_t q1 = vmovl_u8(d2);
371 
372                 int32x4_t q5 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q0)), d26);
373                 int32x4_t q6 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q1)), d27);
374 
375                 uint8x8_t d4 = vld1_u8(&src[x + stepI]);
376                 uint8x8_t d6 = vld1_u8(&src[x + stepI + cn]);
377                 uint16x8_t q2 = vmovl_u8(d4);
378                 uint16x8_t q3 = vmovl_u8(d6);
379 
380                 int32x4_t q7 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q2)), d28);
381                 int32x4_t q8 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q3)), d29);
382 
383                 q5 = vaddq_s32(q5, q6);
384                 q7 = vaddq_s32(q7, q8);
385                 q5 = vaddq_s32(q5, q7);
386 
387                 int16x4x2_t d0d1 = vld2_s16(dsrc);
388                 int16x4x2_t d2d3 = vld2_s16(&dsrc[cn2]);
389 
390                 q5 = vqrshlq_s32(q5, q11);
391 
392                 int32x4_t q4 = vmull_s16(d0d1.val[0], d26);
393                 q6 = vmull_s16(d0d1.val[1], d26);
394 
395                 int16x4_t nd0 = vmovn_s32(q5);
396 
397                 q7 = vmull_s16(d2d3.val[0], d27);
398                 q8 = vmull_s16(d2d3.val[1], d27);
399 
400                 vst1_s16(&Iptr[x], nd0);
401 
402                 int16x4x2_t d4d5 = vld2_s16(&dsrc[dstep]);
403                 int16x4x2_t d6d7 = vld2_s16(&dsrc[dstep+cn2]);
404 
405                 q4 = vaddq_s32(q4, q7);
406                 q6 = vaddq_s32(q6, q8);
407 
408                 q7 = vmull_s16(d4d5.val[0], d28);
409                 int32x4_t nq0 = vmull_s16(d4d5.val[1], d28);
410                 q8 = vmull_s16(d6d7.val[0], d29);
411                 int32x4_t q15 = vmull_s16(d6d7.val[1], d29);
412 
413                 q7 = vaddq_s32(q7, q8);
414                 nq0 = vaddq_s32(nq0, q15);
415 
416                 q4 = vaddq_s32(q4, q7);
417                 q6 = vaddq_s32(q6, nq0);
418 
419                 int32x4_t nq1 = vld1q_s32(nA12);
420                 int32x4_t nq2 = vld1q_s32(nA22);
421                 nq0 = vld1q_s32(nA11);
422 
423                 q4 = vqrshlq_s32(q4, q12);
424                 q6 = vqrshlq_s32(q6, q12);
425 
426                 q7 = vmulq_s32(q4, q4);
427                 q8 = vmulq_s32(q4, q6);
428                 q15 = vmulq_s32(q6, q6);
429 
430                 nq0 = vaddq_s32(nq0, q7);
431                 nq1 = vaddq_s32(nq1, q8);
432                 nq2 = vaddq_s32(nq2, q15);
433 
434                 vst1q_s32(nA11, nq0);
435                 vst1q_s32(nA12, nq1);
436                 vst1q_s32(nA22, nq2);
437 
438                 int16x4_t d8 = vmovn_s32(q4);
439                 int16x4_t d12 = vmovn_s32(q6);
440 
441                 int16x4x2_t d8d12;
442                 d8d12.val[0] = d8; d8d12.val[1] = d12;
443                 vst2_s16(dIptr, d8d12);
444             }
445 #endif
446 
447             for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 )
448             {
449                 int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
450                                       src[x+stepI]*iw10 + src[x+stepI+cn]*iw11, W_BITS1-5);
451                 int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
452                                        dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
453                 int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
454                                        dsrc[dstep+cn2+1]*iw11, W_BITS1);
455 
456                 Iptr[x] = (short)ival;
457                 dIptr[0] = (short)ixval;
458                 dIptr[1] = (short)iyval;
459 
460                 iA11 += (itemtype)(ixval*ixval);
461                 iA12 += (itemtype)(ixval*iyval);
462                 iA22 += (itemtype)(iyval*iyval);
463             }
464         }
465 
466 #if CV_SSE2
467         float CV_DECL_ALIGNED(16) A11buf[4], A12buf[4], A22buf[4];
468         _mm_store_ps(A11buf, qA11);
469         _mm_store_ps(A12buf, qA12);
470         _mm_store_ps(A22buf, qA22);
471         iA11 += A11buf[0] + A11buf[1] + A11buf[2] + A11buf[3];
472         iA12 += A12buf[0] + A12buf[1] + A12buf[2] + A12buf[3];
473         iA22 += A22buf[0] + A22buf[1] + A22buf[2] + A22buf[3];
474 #endif
475 
476 #if CV_NEON
477         iA11 += (float)(nA11[0] + nA11[1] + nA11[2] + nA11[3]);
478         iA12 += (float)(nA12[0] + nA12[1] + nA12[2] + nA12[3]);
479         iA22 += (float)(nA22[0] + nA22[1] + nA22[2] + nA22[3]);
480 #endif
481 
482         A11 = iA11*FLT_SCALE;
483         A12 = iA12*FLT_SCALE;
484         A22 = iA22*FLT_SCALE;
485 
486         float D = A11*A22 - A12*A12;
487         float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
488                         4.f*A12*A12))/(2*winSize.width*winSize.height);
489 
490         if( err && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0 )
491             err[ptidx] = (float)minEig;
492 
493         if( minEig < minEigThreshold || D < FLT_EPSILON )
494         {
495             if( level == 0 && status )
496                 status[ptidx] = false;
497             continue;
498         }
499 
500         D = 1.f/D;
501 
502         nextPt -= halfWin;
503         Point2f prevDelta;
504 
505         for( j = 0; j < criteria.maxCount; j++ )
506         {
507             inextPt.x = cvFloor(nextPt.x);
508             inextPt.y = cvFloor(nextPt.y);
509 
510             if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
511                inextPt.y < -winSize.height || inextPt.y >= J.rows )
512             {
513                 if( level == 0 && status )
514                     status[ptidx] = false;
515                 break;
516             }
517 
518             a = nextPt.x - inextPt.x;
519             b = nextPt.y - inextPt.y;
520             iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
521             iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
522             iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
523             iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
524             acctype ib1 = 0, ib2 = 0;
525             float b1, b2;
526 #if CV_SSE2
527             qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
528             qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
529             __m128 qb0 = _mm_setzero_ps(), qb1 = _mm_setzero_ps();
530 #endif
531 
532 #if CV_NEON
533             int CV_DECL_ALIGNED(16) nB1[] = {0,0,0,0}, nB2[] = {0,0,0,0};
534 
535             const int16x4_t d26_2 = vdup_n_s16((int16_t)iw00);
536             const int16x4_t d27_2 = vdup_n_s16((int16_t)iw01);
537             const int16x4_t d28_2 = vdup_n_s16((int16_t)iw10);
538             const int16x4_t d29_2 = vdup_n_s16((int16_t)iw11);
539 
540 #endif
541 
542             for( y = 0; y < winSize.height; y++ )
543             {
544                 const uchar* Jptr = J.ptr() + (y + inextPt.y)*stepJ + inextPt.x*cn;
545                 const deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
546                 const deriv_type* dIptr = derivIWinBuf.ptr<deriv_type>(y);
547 
548                 x = 0;
549 
550 #if CV_SSE2
551                 for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
552                 {
553                     __m128i diff0 = _mm_loadu_si128((const __m128i*)(Iptr + x)), diff1;
554                     __m128i v00 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x)), z);
555                     __m128i v01 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + cn)), z);
556                     __m128i v10 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ)), z);
557                     __m128i v11 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ + cn)), z);
558 
559                     __m128i t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
560                                                _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
561                     __m128i t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
562                                                _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
563                     t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
564                     t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta), W_BITS1-5);
565                     diff0 = _mm_subs_epi16(_mm_packs_epi32(t0, t1), diff0);
566                     diff1 = _mm_unpackhi_epi16(diff0, diff0);
567                     diff0 = _mm_unpacklo_epi16(diff0, diff0); // It0 It0 It1 It1 ...
568                     v00 = _mm_loadu_si128((const __m128i*)(dIptr)); // Ix0 Iy0 Ix1 Iy1 ...
569                     v01 = _mm_loadu_si128((const __m128i*)(dIptr + 8));
570                     v10 = _mm_mullo_epi16(v00, diff0);
571                     v11 = _mm_mulhi_epi16(v00, diff0);
572                     v00 = _mm_unpacklo_epi16(v10, v11);
573                     v10 = _mm_unpackhi_epi16(v10, v11);
574                     qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
575                     qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
576                     v10 = _mm_mullo_epi16(v01, diff1);
577                     v11 = _mm_mulhi_epi16(v01, diff1);
578                     v00 = _mm_unpacklo_epi16(v10, v11);
579                     v10 = _mm_unpackhi_epi16(v10, v11);
580                     qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
581                     qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
582                 }
583 #endif
584 
585 #if CV_NEON
586                 for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
587                 {
588 
589                     uint8x8_t d0 = vld1_u8(&Jptr[x]);
590                     uint8x8_t d2 = vld1_u8(&Jptr[x+cn]);
591                     uint8x8_t d4 = vld1_u8(&Jptr[x+stepJ]);
592                     uint8x8_t d6 = vld1_u8(&Jptr[x+stepJ+cn]);
593 
594                     uint16x8_t q0 = vmovl_u8(d0);
595                     uint16x8_t q1 = vmovl_u8(d2);
596                     uint16x8_t q2 = vmovl_u8(d4);
597                     uint16x8_t q3 = vmovl_u8(d6);
598 
599                     int32x4_t nq4 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q0)), d26_2);
600                     int32x4_t nq5 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q0)), d26_2);
601 
602                     int32x4_t nq6 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q1)), d27_2);
603                     int32x4_t nq7 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q1)), d27_2);
604 
605                     int32x4_t nq8 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q2)), d28_2);
606                     int32x4_t nq9 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q2)), d28_2);
607 
608                     int32x4_t nq10 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q3)), d29_2);
609                     int32x4_t nq11 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q3)), d29_2);
610 
611                     nq4 = vaddq_s32(nq4, nq6);
612                     nq5 = vaddq_s32(nq5, nq7);
613                     nq8 = vaddq_s32(nq8, nq10);
614                     nq9 = vaddq_s32(nq9, nq11);
615 
616                     int16x8_t q6 = vld1q_s16(&Iptr[x]);
617 
618                     nq4 = vaddq_s32(nq4, nq8);
619                     nq5 = vaddq_s32(nq5, nq9);
620 
621                     nq8 = vmovl_s16(vget_high_s16(q6));
622                     nq6 = vmovl_s16(vget_low_s16(q6));
623 
624                     nq4 = vqrshlq_s32(nq4, q11);
625                     nq5 = vqrshlq_s32(nq5, q11);
626 
627                     int16x8x2_t q0q1 = vld2q_s16(dIptr);
628                     nq11 = vld1q_s32(nB1);
629                     int32x4_t nq15 = vld1q_s32(nB2);
630 
631                     nq4 = vsubq_s32(nq4, nq6);
632                     nq5 = vsubq_s32(nq5, nq8);
633 
634                     int32x4_t nq2 = vmovl_s16(vget_low_s16(q0q1.val[0]));
635                     int32x4_t nq3 = vmovl_s16(vget_high_s16(q0q1.val[0]));
636 
637                     nq7 = vmovl_s16(vget_low_s16(q0q1.val[1]));
638                     nq8 = vmovl_s16(vget_high_s16(q0q1.val[1]));
639 
640                     nq9 = vmulq_s32(nq4, nq2);
641                     nq10 = vmulq_s32(nq5, nq3);
642 
643                     nq4 = vmulq_s32(nq4, nq7);
644                     nq5 = vmulq_s32(nq5, nq8);
645 
646                     nq9 = vaddq_s32(nq9, nq10);
647                     nq4 = vaddq_s32(nq4, nq5);
648 
649                     nq11 = vaddq_s32(nq11, nq9);
650                     nq15 = vaddq_s32(nq15, nq4);
651 
652                     vst1q_s32(nB1, nq11);
653                     vst1q_s32(nB2, nq15);
654                 }
655 #endif
656 
657                 for( ; x < winSize.width*cn; x++, dIptr += 2 )
658                 {
659                     int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
660                                           Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
661                                           W_BITS1-5) - Iptr[x];
662                     ib1 += (itemtype)(diff*dIptr[0]);
663                     ib2 += (itemtype)(diff*dIptr[1]);
664                 }
665             }
666 
667 #if CV_SSE2
668             float CV_DECL_ALIGNED(16) bbuf[4];
669             _mm_store_ps(bbuf, _mm_add_ps(qb0, qb1));
670             ib1 += bbuf[0] + bbuf[2];
671             ib2 += bbuf[1] + bbuf[3];
672 #endif
673 
674 #if CV_NEON
675 
676             ib1 += (float)(nB1[0] + nB1[1] + nB1[2] + nB1[3]);
677             ib2 += (float)(nB2[0] + nB2[1] + nB2[2] + nB2[3]);
678 #endif
679 
680             b1 = ib1*FLT_SCALE;
681             b2 = ib2*FLT_SCALE;
682 
683             Point2f delta( (float)((A12*b2 - A22*b1) * D),
684                           (float)((A12*b1 - A11*b2) * D));
685             //delta = -delta;
686 
687             nextPt += delta;
688             nextPts[ptidx] = nextPt + halfWin;
689 
690             if( delta.ddot(delta) <= criteria.epsilon )
691                 break;
692 
693             if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 &&
694                std::abs(delta.y + prevDelta.y) < 0.01 )
695             {
696                 nextPts[ptidx] -= delta*0.5f;
697                 break;
698             }
699             prevDelta = delta;
700         }
701 
702         if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 )
703         {
704             Point2f nextPoint = nextPts[ptidx] - halfWin;
705             Point inextPoint;
706 
707             inextPoint.x = cvFloor(nextPoint.x);
708             inextPoint.y = cvFloor(nextPoint.y);
709 
710             if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols ||
711                 inextPoint.y < -winSize.height || inextPoint.y >= J.rows )
712             {
713                 if( status )
714                     status[ptidx] = false;
715                 continue;
716             }
717 
718             float aa = nextPoint.x - inextPoint.x;
719             float bb = nextPoint.y - inextPoint.y;
720             iw00 = cvRound((1.f - aa)*(1.f - bb)*(1 << W_BITS));
721             iw01 = cvRound(aa*(1.f - bb)*(1 << W_BITS));
722             iw10 = cvRound((1.f - aa)*bb*(1 << W_BITS));
723             iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
724             float errval = 0.f;
725 
726             for( y = 0; y < winSize.height; y++ )
727             {
728                 const uchar* Jptr = J.ptr() + (y + inextPoint.y)*stepJ + inextPoint.x*cn;
729                 const deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
730 
731                 for( x = 0; x < winSize.width*cn; x++ )
732                 {
733                     int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
734                                           Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
735                                           W_BITS1-5) - Iptr[x];
736                     errval += std::abs((float)diff);
737                 }
738             }
739             err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height);
740         }
741     }
742 }
743 
buildOpticalFlowPyramid(InputArray _img,OutputArrayOfArrays pyramid,Size winSize,int maxLevel,bool withDerivatives,int pyrBorder,int derivBorder,bool tryReuseInputImage)744 int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives,
745                                 int pyrBorder, int derivBorder, bool tryReuseInputImage)
746 {
747     Mat img = _img.getMat();
748     CV_Assert(img.depth() == CV_8U && winSize.width > 2 && winSize.height > 2 );
749     int pyrstep = withDerivatives ? 2 : 1;
750 
751     pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0);
752 
753     int derivType = CV_MAKETYPE(DataType<cv::detail::deriv_type>::depth, img.channels() * 2);
754 
755     //level 0
756     bool lvl0IsSet = false;
757     if(tryReuseInputImage && img.isSubmatrix() && (pyrBorder & BORDER_ISOLATED) == 0)
758     {
759         Size wholeSize;
760         Point ofs;
761         img.locateROI(wholeSize, ofs);
762         if (ofs.x >= winSize.width && ofs.y >= winSize.height
763               && ofs.x + img.cols + winSize.width <= wholeSize.width
764               && ofs.y + img.rows + winSize.height <= wholeSize.height)
765         {
766             pyramid.getMatRef(0) = img;
767             lvl0IsSet = true;
768         }
769     }
770 
771     if(!lvl0IsSet)
772     {
773         Mat& temp = pyramid.getMatRef(0);
774 
775         if(!temp.empty())
776             temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
777         if(temp.type() != img.type() || temp.cols != winSize.width*2 + img.cols || temp.rows != winSize.height * 2 + img.rows)
778             temp.create(img.rows + winSize.height*2, img.cols + winSize.width*2, img.type());
779 
780         if(pyrBorder == BORDER_TRANSPARENT)
781             img.copyTo(temp(Rect(winSize.width, winSize.height, img.cols, img.rows)));
782         else
783             copyMakeBorder(img, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder);
784         temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
785     }
786 
787     Size sz = img.size();
788     Mat prevLevel = pyramid.getMatRef(0);
789     Mat thisLevel = prevLevel;
790 
791     for(int level = 0; level <= maxLevel; ++level)
792     {
793         if (level != 0)
794         {
795             Mat& temp = pyramid.getMatRef(level * pyrstep);
796 
797             if(!temp.empty())
798                 temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
799             if(temp.type() != img.type() || temp.cols != winSize.width*2 + sz.width || temp.rows != winSize.height * 2 + sz.height)
800                 temp.create(sz.height + winSize.height*2, sz.width + winSize.width*2, img.type());
801 
802             thisLevel = temp(Rect(winSize.width, winSize.height, sz.width, sz.height));
803             pyrDown(prevLevel, thisLevel, sz);
804 
805             if(pyrBorder != BORDER_TRANSPARENT)
806                 copyMakeBorder(thisLevel, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder|BORDER_ISOLATED);
807             temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
808         }
809 
810         if(withDerivatives)
811         {
812             Mat& deriv = pyramid.getMatRef(level * pyrstep + 1);
813 
814             if(!deriv.empty())
815                 deriv.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
816             if(deriv.type() != derivType || deriv.cols != winSize.width*2 + sz.width || deriv.rows != winSize.height * 2 + sz.height)
817                 deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType);
818 
819             Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height));
820             calcSharrDeriv(thisLevel, derivI);
821 
822             if(derivBorder != BORDER_TRANSPARENT)
823                 copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED);
824             deriv.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
825         }
826 
827         sz = Size((sz.width+1)/2, (sz.height+1)/2);
828         if( sz.width <= winSize.width || sz.height <= winSize.height )
829         {
830             pyramid.create(1, (level + 1) * pyrstep, 0 /*type*/, -1, true, 0);//check this
831             return level;
832         }
833 
834         prevLevel = thisLevel;
835     }
836 
837     return maxLevel;
838 }
839 
840 namespace cv
841 {
842     class PyrLKOpticalFlow
843     {
844         struct dim3
845         {
846             unsigned int x, y, z;
dim3cv::PyrLKOpticalFlow::dim3847             dim3() : x(0), y(0), z(0) { }
848         };
849     public:
PyrLKOpticalFlow()850         PyrLKOpticalFlow()
851         {
852             winSize = Size(21, 21);
853             maxLevel = 3;
854             iters = 30;
855             derivLambda = 0.5;
856             useInitialFlow = false;
857 
858             waveSize = 0;
859         }
860 
checkParam()861         bool checkParam()
862         {
863             iters = std::min(std::max(iters, 0), 100);
864 
865             derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
866             if (derivLambda < 0)
867                 return false;
868             if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
869                 return false;
870             calcPatchSize();
871             if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
872                 return false;
873             if (!initWaveSize())
874                 return false;
875             return true;
876         }
877 
sparse(const UMat & prevImg,const UMat & nextImg,const UMat & prevPts,UMat & nextPts,UMat & status,UMat & err)878         bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err)
879         {
880             if (!checkParam())
881                 return false;
882 
883             UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
884             UMat temp2 = nextPts.reshape(1);
885             multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2);
886 
887             status.setTo(Scalar::all(1));
888 
889             // build the image pyramids.
890             std::vector<UMat> prevPyr; prevPyr.resize(maxLevel + 1);
891             std::vector<UMat> nextPyr; nextPyr.resize(maxLevel + 1);
892 
893             // allocate buffers with aligned pitch to be able to use cl_khr_image2d_from_buffer extention
894             // This is the required pitch alignment in pixels
895             int pitchAlign = (int)ocl::Device::getDefault().imagePitchAlignment();
896             if (pitchAlign>0)
897             {
898                 prevPyr[0] = UMat(prevImg.rows,(prevImg.cols+pitchAlign-1)&(-pitchAlign),CV_32FC1).colRange(0,prevImg.cols);
899                 nextPyr[0] = UMat(nextImg.rows,(nextImg.cols+pitchAlign-1)&(-pitchAlign),CV_32FC1).colRange(0,nextImg.cols);
900                 for (int level = 1; level <= maxLevel; ++level)
901                 {
902                     int cols,rows;
903                     // allocate buffers with aligned pitch to be able to use image on buffer extention
904                     cols = (prevPyr[level - 1].cols+1)/2;
905                     rows = (prevPyr[level - 1].rows+1)/2;
906                     prevPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),prevPyr[level-1].type()).colRange(0,cols);
907                     cols = (nextPyr[level - 1].cols+1)/2;
908                     rows = (nextPyr[level - 1].rows+1)/2;
909                     nextPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),nextPyr[level-1].type()).colRange(0,cols);
910                 }
911             }
912 
913             prevImg.convertTo(prevPyr[0], CV_32F);
914             nextImg.convertTo(nextPyr[0], CV_32F);
915 
916             for (int level = 1; level <= maxLevel; ++level)
917             {
918                 pyrDown(prevPyr[level - 1], prevPyr[level]);
919                 pyrDown(nextPyr[level - 1], nextPyr[level]);
920             }
921 
922             // dI/dx ~ Ix, dI/dy ~ Iy
923             for (int level = maxLevel; level >= 0; level--)
924             {
925                 if (!lkSparse_run(prevPyr[level], nextPyr[level], prevPts,
926                                   nextPts, status, err,
927                                   prevPts.cols, level))
928                     return false;
929             }
930             return true;
931         }
932 
933         Size winSize;
934         int maxLevel;
935         int iters;
936         double derivLambda;
937         bool useInitialFlow;
938 
939     private:
940         int waveSize;
initWaveSize()941         bool initWaveSize()
942         {
943             waveSize = 1;
944             if (isDeviceCPU())
945                 return true;
946 
947             ocl::Kernel kernel;
948             if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
949                 return false;
950             waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
951             return true;
952         }
953         dim3 patch;
calcPatchSize()954         void calcPatchSize()
955         {
956             dim3 block;
957 
958             if (winSize.width > 32 && winSize.width > 2 * winSize.height)
959             {
960                 block.x = 32;
961                 block.y = 8;
962             }
963             else
964             {
965                 block.x = 16;
966                 block.y = 16;
967             }
968 
969             patch.x = (winSize.width  + block.x - 1) / block.x;
970             patch.y = (winSize.height + block.y - 1) / block.y;
971 
972             block.z = patch.z = 1;
973         }
974 
lkSparse_run(UMat & I,UMat & J,const UMat & prevPts,UMat & nextPts,UMat & status,UMat & err,int ptcount,int level)975         bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err,
976             int ptcount, int level)
977         {
978             size_t localThreads[3]  = { 8, 8};
979             size_t globalThreads[3] = { 8 * ptcount, 8};
980             char calcErr = (0 == level) ? 1 : 0;
981 
982             cv::String build_options;
983             if (isDeviceCPU())
984                 build_options = " -D CPU";
985             else
986                 build_options = cv::format("-D WAVE_SIZE=%d", waveSize);
987 
988             ocl::Kernel kernel;
989             if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
990                 return false;
991 
992             CV_Assert(I.depth() == CV_32F && J.depth() == CV_32F);
993             ocl::Image2D imageI(I, false, ocl::Image2D::canCreateAlias(I));
994             ocl::Image2D imageJ(J, false, ocl::Image2D::canCreateAlias(J));
995 
996             int idxArg = 0;
997             idxArg = kernel.set(idxArg, imageI); //image2d_t I
998             idxArg = kernel.set(idxArg, imageJ); //image2d_t J
999             idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
1000             idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
1001             idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
1002             idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
1003             idxArg = kernel.set(idxArg, (int)level); // const int level
1004             idxArg = kernel.set(idxArg, (int)I.rows); // const int rows
1005             idxArg = kernel.set(idxArg, (int)I.cols); // const int cols
1006             idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X
1007             idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y
1008             idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x
1009             idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
1010             idxArg = kernel.set(idxArg, (int)iters); // int c_iters
1011             idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
1012             return kernel.run(2, globalThreads, localThreads, false);
1013         }
1014     private:
isDeviceCPU()1015         inline static bool isDeviceCPU()
1016         {
1017             return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type());
1018         }
1019     };
1020 
1021 
ocl_calcOpticalFlowPyrLK(InputArray _prevImg,InputArray _nextImg,InputArray _prevPts,InputOutputArray _nextPts,OutputArray _status,OutputArray _err,Size winSize,int maxLevel,TermCriteria criteria,int flags)1022     static bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg,
1023                                   InputArray _prevPts, InputOutputArray _nextPts,
1024                                   OutputArray _status, OutputArray _err,
1025                                   Size winSize, int maxLevel,
1026                                   TermCriteria criteria,
1027                                   int flags/*, double minEigThreshold*/ )
1028     {
1029         if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags))
1030             return false;
1031         if (!cv::ocl::Device::getDefault().imageSupport())
1032             return false;
1033         if (_nextImg.size() != _prevImg.size())
1034             return false;
1035         int typePrev = _prevImg.type();
1036         int typeNext = _nextImg.type();
1037         if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
1038             return false;
1039         if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext)))
1040             return false;
1041 
1042         if (_prevPts.empty() || _prevPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
1043             return false;
1044         if ((1 != _prevPts.size().height) && (1 != _prevPts.size().width))
1045             return false;
1046         size_t npoints = _prevPts.total();
1047         bool useInitialFlow  = (0 != (flags & OPTFLOW_USE_INITIAL_FLOW));
1048         if (useInitialFlow)
1049         {
1050             if (_nextPts.empty() || _nextPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
1051                 return false;
1052             if ((1 != _nextPts.size().height) && (1 != _nextPts.size().width))
1053                 return false;
1054             if (_nextPts.total() != npoints)
1055                 return false;
1056         }
1057         else
1058         {
1059             _nextPts.create(_prevPts.size(), _prevPts.type());
1060         }
1061 
1062         PyrLKOpticalFlow opticalFlow;
1063         opticalFlow.winSize     = winSize;
1064         opticalFlow.maxLevel    = maxLevel;
1065         opticalFlow.iters       = criteria.maxCount;
1066         opticalFlow.derivLambda = criteria.epsilon;
1067         opticalFlow.useInitialFlow  = useInitialFlow;
1068 
1069         if (!opticalFlow.checkParam())
1070             return false;
1071 
1072         UMat umatErr;
1073         if (_err.needed())
1074         {
1075             _err.create((int)npoints, 1, CV_32FC1);
1076             umatErr = _err.getUMat();
1077         }
1078         else
1079             umatErr.create((int)npoints, 1, CV_32FC1);
1080 
1081         _status.create((int)npoints, 1, CV_8UC1);
1082         UMat umatNextPts = _nextPts.getUMat();
1083         UMat umatStatus = _status.getUMat();
1084         return opticalFlow.sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
1085     }
1086 };
1087 
calcOpticalFlowPyrLK(InputArray _prevImg,InputArray _nextImg,InputArray _prevPts,InputOutputArray _nextPts,OutputArray _status,OutputArray _err,Size winSize,int maxLevel,TermCriteria criteria,int flags,double minEigThreshold)1088 void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
1089                            InputArray _prevPts, InputOutputArray _nextPts,
1090                            OutputArray _status, OutputArray _err,
1091                            Size winSize, int maxLevel,
1092                            TermCriteria criteria,
1093                            int flags, double minEigThreshold )
1094 {
1095     bool use_opencl = ocl::useOpenCL() &&
1096                       (_prevImg.isUMat() || _nextImg.isUMat()) &&
1097                       ocl::Image2D::isFormatSupported(CV_32F, 1, false);
1098     if ( use_opencl && ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err, winSize, maxLevel, criteria, flags/*, minEigThreshold*/))
1099     {
1100         CV_IMPL_ADD(CV_IMPL_OCL);
1101         return;
1102     }
1103 
1104     Mat prevPtsMat = _prevPts.getMat();
1105     const int derivDepth = DataType<cv::detail::deriv_type>::depth;
1106 
1107     CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
1108 
1109     int level=0, i, npoints;
1110     CV_Assert( (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 );
1111 
1112     if( npoints == 0 )
1113     {
1114         _nextPts.release();
1115         _status.release();
1116         _err.release();
1117         return;
1118     }
1119 
1120     if( !(flags & OPTFLOW_USE_INITIAL_FLOW) )
1121         _nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true);
1122 
1123     Mat nextPtsMat = _nextPts.getMat();
1124     CV_Assert( nextPtsMat.checkVector(2, CV_32F, true) == npoints );
1125 
1126     const Point2f* prevPts = prevPtsMat.ptr<Point2f>();
1127     Point2f* nextPts = nextPtsMat.ptr<Point2f>();
1128 
1129     _status.create((int)npoints, 1, CV_8U, -1, true);
1130     Mat statusMat = _status.getMat(), errMat;
1131     CV_Assert( statusMat.isContinuous() );
1132     uchar* status = statusMat.ptr();
1133     float* err = 0;
1134 
1135     for( i = 0; i < npoints; i++ )
1136         status[i] = true;
1137 
1138     if( _err.needed() )
1139     {
1140         _err.create((int)npoints, 1, CV_32F, -1, true);
1141         errMat = _err.getMat();
1142         CV_Assert( errMat.isContinuous() );
1143         err = errMat.ptr<float>();
1144     }
1145 
1146     std::vector<Mat> prevPyr, nextPyr;
1147     int levels1 = -1;
1148     int lvlStep1 = 1;
1149     int levels2 = -1;
1150     int lvlStep2 = 1;
1151 
1152     if(_prevImg.kind() == _InputArray::STD_VECTOR_MAT)
1153     {
1154         _prevImg.getMatVector(prevPyr);
1155 
1156         levels1 = int(prevPyr.size()) - 1;
1157         CV_Assert(levels1 >= 0);
1158 
1159         if (levels1 % 2 == 1 && prevPyr[0].channels() * 2 == prevPyr[1].channels() && prevPyr[1].depth() == derivDepth)
1160         {
1161             lvlStep1 = 2;
1162             levels1 /= 2;
1163         }
1164 
1165         // ensure that pyramid has reqired padding
1166         if(levels1 > 0)
1167         {
1168             Size fullSize;
1169             Point ofs;
1170             prevPyr[lvlStep1].locateROI(fullSize, ofs);
1171             CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
1172                 && ofs.x + prevPyr[lvlStep1].cols + winSize.width <= fullSize.width
1173                 && ofs.y + prevPyr[lvlStep1].rows + winSize.height <= fullSize.height);
1174         }
1175 
1176         if(levels1 < maxLevel)
1177             maxLevel = levels1;
1178     }
1179 
1180     if(_nextImg.kind() == _InputArray::STD_VECTOR_MAT)
1181     {
1182         _nextImg.getMatVector(nextPyr);
1183 
1184         levels2 = int(nextPyr.size()) - 1;
1185         CV_Assert(levels2 >= 0);
1186 
1187         if (levels2 % 2 == 1 && nextPyr[0].channels() * 2 == nextPyr[1].channels() && nextPyr[1].depth() == derivDepth)
1188         {
1189             lvlStep2 = 2;
1190             levels2 /= 2;
1191         }
1192 
1193         // ensure that pyramid has reqired padding
1194         if(levels2 > 0)
1195         {
1196             Size fullSize;
1197             Point ofs;
1198             nextPyr[lvlStep2].locateROI(fullSize, ofs);
1199             CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
1200                 && ofs.x + nextPyr[lvlStep2].cols + winSize.width <= fullSize.width
1201                 && ofs.y + nextPyr[lvlStep2].rows + winSize.height <= fullSize.height);
1202         }
1203 
1204         if(levels2 < maxLevel)
1205             maxLevel = levels2;
1206     }
1207 
1208     if (levels1 < 0)
1209         maxLevel = buildOpticalFlowPyramid(_prevImg, prevPyr, winSize, maxLevel, false);
1210 
1211     if (levels2 < 0)
1212         maxLevel = buildOpticalFlowPyramid(_nextImg, nextPyr, winSize, maxLevel, false);
1213 
1214     if( (criteria.type & TermCriteria::COUNT) == 0 )
1215         criteria.maxCount = 30;
1216     else
1217         criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100);
1218     if( (criteria.type & TermCriteria::EPS) == 0 )
1219         criteria.epsilon = 0.01;
1220     else
1221         criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.);
1222     criteria.epsilon *= criteria.epsilon;
1223 
1224     // dI/dx ~ Ix, dI/dy ~ Iy
1225     Mat derivIBuf;
1226     if(lvlStep1 == 1)
1227         derivIBuf.create(prevPyr[0].rows + winSize.height*2, prevPyr[0].cols + winSize.width*2, CV_MAKETYPE(derivDepth, prevPyr[0].channels() * 2));
1228 
1229     for( level = maxLevel; level >= 0; level-- )
1230     {
1231         Mat derivI;
1232         if(lvlStep1 == 1)
1233         {
1234             Size imgSize = prevPyr[level * lvlStep1].size();
1235             Mat _derivI( imgSize.height + winSize.height*2,
1236                 imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.ptr() );
1237             derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
1238             calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
1239             copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
1240         }
1241         else
1242             derivI = prevPyr[level * lvlStep1 + 1];
1243 
1244         CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
1245         CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());
1246 
1247 #ifdef HAVE_TEGRA_OPTIMIZATION
1248         typedef tegra::LKTrackerInvoker<cv::detail::LKTrackerInvoker> LKTrackerInvoker;
1249 #else
1250         typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
1251 #endif
1252 
1253         parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
1254                                                           nextPyr[level * lvlStep2], prevPts, nextPts,
1255                                                           status, err,
1256                                                           winSize, criteria, level, maxLevel,
1257                                                           flags, (float)minEigThreshold));
1258     }
1259 }
1260 
1261 namespace cv
1262 {
1263 
1264 static void
getRTMatrix(const Point2f * a,const Point2f * b,int count,Mat & M,bool fullAffine)1265 getRTMatrix( const Point2f* a, const Point2f* b,
1266              int count, Mat& M, bool fullAffine )
1267 {
1268     CV_Assert( M.isContinuous() );
1269 
1270     if( fullAffine )
1271     {
1272         double sa[6][6]={{0.}}, sb[6]={0.};
1273         Mat A( 6, 6, CV_64F, &sa[0][0] ), B( 6, 1, CV_64F, sb );
1274         Mat MM = M.reshape(1, 6);
1275 
1276         for( int i = 0; i < count; i++ )
1277         {
1278             sa[0][0] += a[i].x*a[i].x;
1279             sa[0][1] += a[i].y*a[i].x;
1280             sa[0][2] += a[i].x;
1281 
1282             sa[1][1] += a[i].y*a[i].y;
1283             sa[1][2] += a[i].y;
1284 
1285             sa[2][2] += 1;
1286 
1287             sb[0] += a[i].x*b[i].x;
1288             sb[1] += a[i].y*b[i].x;
1289             sb[2] += b[i].x;
1290             sb[3] += a[i].x*b[i].y;
1291             sb[4] += a[i].y*b[i].y;
1292             sb[5] += b[i].y;
1293         }
1294 
1295         sa[3][4] = sa[4][3] = sa[1][0] = sa[0][1];
1296         sa[3][5] = sa[5][3] = sa[2][0] = sa[0][2];
1297         sa[4][5] = sa[5][4] = sa[2][1] = sa[1][2];
1298 
1299         sa[3][3] = sa[0][0];
1300         sa[4][4] = sa[1][1];
1301         sa[5][5] = sa[2][2];
1302 
1303         solve( A, B, MM, DECOMP_EIG );
1304     }
1305     else
1306     {
1307         double sa[4][4]={{0.}}, sb[4]={0.}, m[4];
1308         Mat A( 4, 4, CV_64F, sa ), B( 4, 1, CV_64F, sb );
1309         Mat MM( 4, 1, CV_64F, m );
1310 
1311         for( int i = 0; i < count; i++ )
1312         {
1313             sa[0][0] += a[i].x*a[i].x + a[i].y*a[i].y;
1314             sa[0][2] += a[i].x;
1315             sa[0][3] += a[i].y;
1316 
1317 
1318             sa[2][1] += -a[i].y;
1319             sa[2][2] += 1;
1320 
1321             sa[3][0] += a[i].y;
1322             sa[3][1] += a[i].x;
1323             sa[3][3] += 1;
1324 
1325             sb[0] += a[i].x*b[i].x + a[i].y*b[i].y;
1326             sb[1] += a[i].x*b[i].y - a[i].y*b[i].x;
1327             sb[2] += b[i].x;
1328             sb[3] += b[i].y;
1329         }
1330 
1331         sa[1][1] = sa[0][0];
1332         sa[2][1] = sa[1][2] = -sa[0][3];
1333         sa[3][1] = sa[1][3] = sa[2][0] = sa[0][2];
1334         sa[2][2] = sa[3][3] = count;
1335         sa[3][0] = sa[0][3];
1336 
1337         solve( A, B, MM, DECOMP_EIG );
1338 
1339         double* om = M.ptr<double>();
1340         om[0] = om[4] = m[0];
1341         om[1] = -m[1];
1342         om[3] = m[1];
1343         om[2] = m[2];
1344         om[5] = m[3];
1345     }
1346 }
1347 
1348 }
1349 
estimateRigidTransform(InputArray src1,InputArray src2,bool fullAffine)1350 cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullAffine )
1351 {
1352     Mat M(2, 3, CV_64F), A = src1.getMat(), B = src2.getMat();
1353 
1354     const int COUNT = 15;
1355     const int WIDTH = 160, HEIGHT = 120;
1356     const int RANSAC_MAX_ITERS = 500;
1357     const int RANSAC_SIZE0 = 3;
1358     const double RANSAC_GOOD_RATIO = 0.5;
1359 
1360     std::vector<Point2f> pA, pB;
1361     std::vector<int> good_idx;
1362     std::vector<uchar> status;
1363 
1364     double scale = 1.;
1365     int i, j, k, k1;
1366 
1367     RNG rng((uint64)-1);
1368     int good_count = 0;
1369 
1370     if( A.size() != B.size() )
1371         CV_Error( Error::StsUnmatchedSizes, "Both input images must have the same size" );
1372 
1373     if( A.type() != B.type() )
1374         CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
1375 
1376     int count = A.checkVector(2);
1377 
1378     if( count > 0 )
1379     {
1380         A.reshape(2, count).convertTo(pA, CV_32F);
1381         B.reshape(2, count).convertTo(pB, CV_32F);
1382     }
1383     else if( A.depth() == CV_8U )
1384     {
1385         int cn = A.channels();
1386         CV_Assert( cn == 1 || cn == 3 || cn == 4 );
1387         Size sz0 = A.size();
1388         Size sz1(WIDTH, HEIGHT);
1389 
1390         scale = std::max(1., std::max( (double)sz1.width/sz0.width, (double)sz1.height/sz0.height ));
1391 
1392         sz1.width = cvRound( sz0.width * scale );
1393         sz1.height = cvRound( sz0.height * scale );
1394 
1395         bool equalSizes = sz1.width == sz0.width && sz1.height == sz0.height;
1396 
1397         if( !equalSizes || cn != 1 )
1398         {
1399             Mat sA, sB;
1400 
1401             if( cn != 1 )
1402             {
1403                 Mat gray;
1404                 cvtColor(A, gray, COLOR_BGR2GRAY);
1405                 resize(gray, sA, sz1, 0., 0., INTER_AREA);
1406                 cvtColor(B, gray, COLOR_BGR2GRAY);
1407                 resize(gray, sB, sz1, 0., 0., INTER_AREA);
1408             }
1409             else
1410             {
1411                 resize(A, sA, sz1, 0., 0., INTER_AREA);
1412                 resize(B, sB, sz1, 0., 0., INTER_AREA);
1413             }
1414 
1415             A = sA;
1416             B = sB;
1417         }
1418 
1419         int count_y = COUNT;
1420         int count_x = cvRound((double)COUNT*sz1.width/sz1.height);
1421         count = count_x * count_y;
1422 
1423         pA.resize(count);
1424         pB.resize(count);
1425         status.resize(count);
1426 
1427         for( i = 0, k = 0; i < count_y; i++ )
1428             for( j = 0; j < count_x; j++, k++ )
1429             {
1430                 pA[k].x = (j+0.5f)*sz1.width/count_x;
1431                 pA[k].y = (i+0.5f)*sz1.height/count_y;
1432             }
1433 
1434         // find the corresponding points in B
1435         calcOpticalFlowPyrLK(A, B, pA, pB, status, noArray(), Size(21, 21), 3,
1436                              TermCriteria(TermCriteria::MAX_ITER,40,0.1));
1437 
1438         // repack the remained points
1439         for( i = 0, k = 0; i < count; i++ )
1440             if( status[i] )
1441             {
1442                 if( i > k )
1443                 {
1444                     pA[k] = pA[i];
1445                     pB[k] = pB[i];
1446                 }
1447                 k++;
1448             }
1449         count = k;
1450         pA.resize(count);
1451         pB.resize(count);
1452     }
1453     else
1454         CV_Error( Error::StsUnsupportedFormat, "Both input images must have either 8uC1 or 8uC3 type" );
1455 
1456     good_idx.resize(count);
1457 
1458     if( count < RANSAC_SIZE0 )
1459         return Mat();
1460 
1461     Rect brect = boundingRect(pB);
1462 
1463     // RANSAC stuff:
1464     // 1. find the consensus
1465     for( k = 0; k < RANSAC_MAX_ITERS; k++ )
1466     {
1467         int idx[RANSAC_SIZE0];
1468         Point2f a[RANSAC_SIZE0];
1469         Point2f b[RANSAC_SIZE0];
1470 
1471         // choose random 3 non-complanar points from A & B
1472         for( i = 0; i < RANSAC_SIZE0; i++ )
1473         {
1474             for( k1 = 0; k1 < RANSAC_MAX_ITERS; k1++ )
1475             {
1476                 idx[i] = rng.uniform(0, count);
1477 
1478                 for( j = 0; j < i; j++ )
1479                 {
1480                     if( idx[j] == idx[i] )
1481                         break;
1482                     // check that the points are not very close one each other
1483                     if( fabs(pA[idx[i]].x - pA[idx[j]].x) +
1484                         fabs(pA[idx[i]].y - pA[idx[j]].y) < FLT_EPSILON )
1485                         break;
1486                     if( fabs(pB[idx[i]].x - pB[idx[j]].x) +
1487                         fabs(pB[idx[i]].y - pB[idx[j]].y) < FLT_EPSILON )
1488                         break;
1489                 }
1490 
1491                 if( j < i )
1492                     continue;
1493 
1494                 if( i+1 == RANSAC_SIZE0 )
1495                 {
1496                     // additional check for non-complanar vectors
1497                     a[0] = pA[idx[0]];
1498                     a[1] = pA[idx[1]];
1499                     a[2] = pA[idx[2]];
1500 
1501                     b[0] = pB[idx[0]];
1502                     b[1] = pB[idx[1]];
1503                     b[2] = pB[idx[2]];
1504 
1505                     double dax1 = a[1].x - a[0].x, day1 = a[1].y - a[0].y;
1506                     double dax2 = a[2].x - a[0].x, day2 = a[2].y - a[0].y;
1507                     double dbx1 = b[1].x - b[0].x, dby1 = b[1].y - b[0].y;
1508                     double dbx2 = b[2].x - b[0].x, dby2 = b[2].y - b[0].y;
1509                     const double eps = 0.01;
1510 
1511                     if( fabs(dax1*day2 - day1*dax2) < eps*std::sqrt(dax1*dax1+day1*day1)*std::sqrt(dax2*dax2+day2*day2) ||
1512                         fabs(dbx1*dby2 - dby1*dbx2) < eps*std::sqrt(dbx1*dbx1+dby1*dby1)*std::sqrt(dbx2*dbx2+dby2*dby2) )
1513                         continue;
1514                 }
1515                 break;
1516             }
1517 
1518             if( k1 >= RANSAC_MAX_ITERS )
1519                 break;
1520         }
1521 
1522         if( i < RANSAC_SIZE0 )
1523             continue;
1524 
1525         // estimate the transformation using 3 points
1526         getRTMatrix( a, b, 3, M, fullAffine );
1527 
1528         const double* m = M.ptr<double>();
1529         for( i = 0, good_count = 0; i < count; i++ )
1530         {
1531             if( std::abs( m[0]*pA[i].x + m[1]*pA[i].y + m[2] - pB[i].x ) +
1532                 std::abs( m[3]*pA[i].x + m[4]*pA[i].y + m[5] - pB[i].y ) < std::max(brect.width,brect.height)*0.05 )
1533                 good_idx[good_count++] = i;
1534         }
1535 
1536         if( good_count >= count*RANSAC_GOOD_RATIO )
1537             break;
1538     }
1539 
1540     if( k >= RANSAC_MAX_ITERS )
1541         return Mat();
1542 
1543     if( good_count < count )
1544     {
1545         for( i = 0; i < good_count; i++ )
1546         {
1547             j = good_idx[i];
1548             pA[i] = pA[j];
1549             pB[i] = pB[j];
1550         }
1551     }
1552 
1553     getRTMatrix( &pA[0], &pB[0], good_count, M, fullAffine );
1554     M.at<double>(0, 2) /= scale;
1555     M.at<double>(1, 2) /= scale;
1556 
1557     return M;
1558 }
1559 
1560 /* End of file. */
1561