• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 //   * Redistribution's of source code must retain the above copyright notice,
22 //     this list of conditions and the following disclaimer.
23 //
24 //   * Redistribution's in binary form must reproduce the above copyright notice,
25 //     this list of conditions and the following disclaimer in the documentation
26 //     and/or other materials provided with the distribution.
27 //
28 //   * The name of the copyright holders may not be used to endorse or promote products
29 //     derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43 
44 /*
45  This is a variation of
46  "Stereo Processing by Semiglobal Matching and Mutual Information"
47  by Heiko Hirschmuller.
48 
49  We match blocks rather than individual pixels, thus the algorithm is called
50  SGBM (Semi-global block matching)
51  */
52 
53 #include "precomp.hpp"
54 #include <limits.h>
55 
56 namespace cv
57 {
58 
59 typedef uchar PixType;
60 typedef short CostType;
61 typedef short DispType;
62 
63 enum { NR = 16, NR2 = NR/2 };
64 
65 
66 struct StereoSGBMParams
67 {
StereoSGBMParamscv::StereoSGBMParams68     StereoSGBMParams()
69     {
70         minDisparity = numDisparities = 0;
71         SADWindowSize = 0;
72         P1 = P2 = 0;
73         disp12MaxDiff = 0;
74         preFilterCap = 0;
75         uniquenessRatio = 0;
76         speckleWindowSize = 0;
77         speckleRange = 0;
78         mode = StereoSGBM::MODE_SGBM;
79     }
80 
StereoSGBMParamscv::StereoSGBMParams81     StereoSGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize,
82                       int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
83                       int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
84                       int _mode )
85     {
86         minDisparity = _minDisparity;
87         numDisparities = _numDisparities;
88         SADWindowSize = _SADWindowSize;
89         P1 = _P1;
90         P2 = _P2;
91         disp12MaxDiff = _disp12MaxDiff;
92         preFilterCap = _preFilterCap;
93         uniquenessRatio = _uniquenessRatio;
94         speckleWindowSize = _speckleWindowSize;
95         speckleRange = _speckleRange;
96         mode = _mode;
97     }
98 
99     int minDisparity;
100     int numDisparities;
101     int SADWindowSize;
102     int preFilterCap;
103     int uniquenessRatio;
104     int P1;
105     int P2;
106     int speckleWindowSize;
107     int speckleRange;
108     int disp12MaxDiff;
109     int mode;
110 };
111 
112 /*
113  For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
114  and for each disparity minD<=d<maxD the function
115  computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
116  row1[x] and row2[x-d]. The subpixel algorithm from
117  "Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
118  is used, hence the suffix BT.
119 
120  the temporary buffer should contain width2*2 elements
121  */
calcPixelCostBT(const Mat & img1,const Mat & img2,int y,int minD,int maxD,CostType * cost,PixType * buffer,const PixType * tab,int tabOfs,int)122 static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
123                             int minD, int maxD, CostType* cost,
124                             PixType* buffer, const PixType* tab,
125                             int tabOfs, int )
126 {
127     int x, c, width = img1.cols, cn = img1.channels();
128     int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
129     int minX2 = std::max(minX1 - maxD, 0), maxX2 = std::min(maxX1 - minD, width);
130     int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
131     const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
132     PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
133 
134     tab += tabOfs;
135 
136     for( c = 0; c < cn*2; c++ )
137     {
138         prow1[width*c] = prow1[width*c + width-1] =
139         prow2[width*c] = prow2[width*c + width-1] = tab[0];
140     }
141 
142     int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
143     int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
144 
145     if( cn == 1 )
146     {
147         for( x = 1; x < width-1; x++ )
148         {
149             prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
150             prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
151 
152             prow1[x+width] = row1[x];
153             prow2[width-1-x+width] = row2[x];
154         }
155     }
156     else
157     {
158         for( x = 1; x < width-1; x++ )
159         {
160             prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
161             prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
162             prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
163 
164             prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
165             prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
166             prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
167 
168             prow1[x+width*3] = row1[x*3];
169             prow1[x+width*4] = row1[x*3+1];
170             prow1[x+width*5] = row1[x*3+2];
171 
172             prow2[width-1-x+width*3] = row2[x*3];
173             prow2[width-1-x+width*4] = row2[x*3+1];
174             prow2[width-1-x+width*5] = row2[x*3+2];
175         }
176     }
177 
178     memset( cost, 0, width1*D*sizeof(cost[0]) );
179 
180     buffer -= minX2;
181     cost -= minX1*D + minD; // simplify the cost indices inside the loop
182 
183 #if CV_SSE2
184     volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
185 #endif
186 
187 #if 1
188     for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
189     {
190         int diff_scale = c < cn ? 0 : 2;
191 
192         // precompute
193         //   v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
194         //   v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
195         for( x = minX2; x < maxX2; x++ )
196         {
197             int v = prow2[x];
198             int vl = x > 0 ? (v + prow2[x-1])/2 : v;
199             int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
200             int v0 = std::min(vl, vr); v0 = std::min(v0, v);
201             int v1 = std::max(vl, vr); v1 = std::max(v1, v);
202             buffer[x] = (PixType)v0;
203             buffer[x + width2] = (PixType)v1;
204         }
205 
206         for( x = minX1; x < maxX1; x++ )
207         {
208             int u = prow1[x];
209             int ul = x > 0 ? (u + prow1[x-1])/2 : u;
210             int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
211             int u0 = std::min(ul, ur); u0 = std::min(u0, u);
212             int u1 = std::max(ul, ur); u1 = std::max(u1, u);
213 
214         #if CV_SSE2
215             if( useSIMD )
216             {
217                 __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
218                 __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
219                 __m128i ds = _mm_cvtsi32_si128(diff_scale);
220 
221                 for( int d = minD; d < maxD; d += 16 )
222                 {
223                     __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
224                     __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
225                     __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
226                     __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
227                     __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
228                     __m128i diff = _mm_min_epu8(c0, c1);
229 
230                     c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
231                     c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
232 
233                     _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
234                     _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
235                 }
236             }
237             else
238         #endif
239             {
240                 for( int d = minD; d < maxD; d++ )
241                 {
242                     int v = prow2[width-x-1 + d];
243                     int v0 = buffer[width-x-1 + d];
244                     int v1 = buffer[width-x-1 + d + width2];
245                     int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u);
246                     int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v);
247 
248                     cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale));
249                 }
250             }
251         }
252     }
253 #else
254     for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
255     {
256         for( x = minX1; x < maxX1; x++ )
257         {
258             int u = prow1[x];
259         #if CV_SSE2
260             if( useSIMD )
261             {
262                 __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
263 
264                 for( int d = minD; d < maxD; d += 16 )
265                 {
266                     __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
267                     __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
268                     __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
269                     __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
270 
271                     _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
272                     _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
273                 }
274             }
275             else
276         #endif
277             {
278                 for( int d = minD; d < maxD; d++ )
279                 {
280                     int v = prow2[width-1-x + d];
281                     cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
282                 }
283             }
284         }
285     }
286 #endif
287 }
288 
289 
290 /*
291  computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
292  that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
293  minD <= d < maxD.
294  disp2full is the reverse disparity map, that is:
295  disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
296 
297  note that disp1buf will have the same size as the roi and
298  disp2full will have the same size as img1 (or img2).
299  On exit disp2buf is not the final disparity, it is an intermediate result that becomes
300  final after all the tiles are processed.
301 
302  the disparity in disp1buf is written with sub-pixel accuracy
303  (4 fractional bits, see StereoSGBM::DISP_SCALE),
304  using quadratic interpolation, while the disparity in disp2buf
305  is written as is, without interpolation.
306 
307  disp2cost also has the same size as img1 (or img2).
308  It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
309  */
computeDisparitySGBM(const Mat & img1,const Mat & img2,Mat & disp1,const StereoSGBMParams & params,Mat & buffer)310 static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
311                                  Mat& disp1, const StereoSGBMParams& params,
312                                  Mat& buffer )
313 {
314 #if CV_SSE2
315     static const uchar LSBTab[] =
316     {
317         0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
318         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
319         6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
320         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
321         7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
322         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
323         6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
324         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
325     };
326 
327     volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
328 #endif
329 
330     const int ALIGN = 16;
331     const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
332     const int DISP_SCALE = (1 << DISP_SHIFT);
333     const CostType MAX_COST = SHRT_MAX;
334 
335     int minD = params.minDisparity, maxD = minD + params.numDisparities;
336     Size SADWindowSize;
337     SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
338     int ftzero = std::max(params.preFilterCap, 15) | 1;
339     int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
340     int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
341     int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
342     int k, width = disp1.cols, height = disp1.rows;
343     int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
344     int D = maxD - minD, width1 = maxX1 - minX1;
345     int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
346     int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
347     bool fullDP = params.mode == StereoSGBM::MODE_HH;
348     int npasses = fullDP ? 2 : 1;
349     const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
350     PixType clipTab[TAB_SIZE];
351 
352     for( k = 0; k < TAB_SIZE; k++ )
353         clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
354 
355     if( minX1 >= maxX1 )
356     {
357         disp1 = Scalar::all(INVALID_DISP_SCALED);
358         return;
359     }
360 
361     CV_Assert( D % 16 == 0 );
362 
363     // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
364     // if you change NR, please, modify the loop as well.
365     int D2 = D+16, NRD2 = NR2*D2;
366 
367     // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
368     // for 8-way dynamic programming we need the current row and
369     // the previous row, i.e. 2 rows in total
370     const int NLR = 2;
371     const int LrBorder = NLR - 1;
372 
373     // for each possible stereo match (img1(x,y) <=> img2(x-d,y))
374     // we keep pixel difference cost (C) and the summary cost over NR directions (S).
375     // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
376     size_t costBufSize = width1*D;
377     size_t CSBufSize = costBufSize*(fullDP ? height : 1);
378     size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
379     int hsumBufNRows = SH2*2 + 2;
380     size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
381     costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
382     CSBufSize*2*sizeof(CostType) + // C, S
383     width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
384     width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
385 
386     if( buffer.empty() || !buffer.isContinuous() ||
387         buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
388         buffer.create(1, (int)totalBufSize, CV_8U);
389 
390     // summary cost over different (nDirs) directions
391     CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);
392     CostType* Sbuf = Cbuf + CSBufSize;
393     CostType* hsumBuf = Sbuf + CSBufSize;
394     CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
395 
396     CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
397     DispType* disp2ptr = (DispType*)(disp2cost + width);
398     PixType* tempBuf = (PixType*)(disp2ptr + width);
399 
400     // add P2 to every C(x,y). it saves a few operations in the inner loops
401     for( k = 0; k < width1*D; k++ )
402         Cbuf[k] = (CostType)P2;
403 
404     for( int pass = 1; pass <= npasses; pass++ )
405     {
406         int x1, y1, x2, y2, dx, dy;
407 
408         if( pass == 1 )
409         {
410             y1 = 0; y2 = height; dy = 1;
411             x1 = 0; x2 = width1; dx = 1;
412         }
413         else
414         {
415             y1 = height-1; y2 = -1; dy = -1;
416             x1 = width1-1; x2 = -1; dx = -1;
417         }
418 
419         CostType *Lr[NLR]={0}, *minLr[NLR]={0};
420 
421         for( k = 0; k < NLR; k++ )
422         {
423             // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
424             // and will occasionally use negative indices with the arrays
425             // we need to shift Lr[k] pointers by 1, to give the space for d=-1.
426             // however, then the alignment will be imperfect, i.e. bad for SSE,
427             // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
428             Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
429             memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
430             minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder;
431             memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
432         }
433 
434         for( int y = y1; y != y2; y += dy )
435         {
436             int x, d;
437             DispType* disp1ptr = disp1.ptr<DispType>(y);
438             CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize);
439             CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize);
440 
441             if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
442             {
443                 int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
444 
445                 for( k = dy1; k <= dy2; k++ )
446                 {
447                     CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
448 
449                     if( k < height )
450                     {
451                         calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
452 
453                         memset(hsumAdd, 0, D*sizeof(CostType));
454                         for( x = 0; x <= SW2*D; x += D )
455                         {
456                             int scale = x == 0 ? SW2 + 1 : 1;
457                             for( d = 0; d < D; d++ )
458                                 hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
459                         }
460 
461                         if( y > 0 )
462                         {
463                             const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
464                             const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize;
465 
466                             for( x = D; x < width1*D; x += D )
467                             {
468                                 const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
469                                 const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
470 
471                             #if CV_SSE2
472                                 if( useSIMD )
473                                 {
474                                     for( d = 0; d < D; d += 8 )
475                                     {
476                                         __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
477                                         __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
478                                         hv = _mm_adds_epi16(_mm_subs_epi16(hv,
479                                                                            _mm_load_si128((const __m128i*)(pixSub + d))),
480                                                             _mm_load_si128((const __m128i*)(pixAdd + d)));
481                                         Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
482                                                                            _mm_load_si128((const __m128i*)(hsumSub + x + d))),
483                                                             hv);
484                                         _mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
485                                         _mm_store_si128((__m128i*)(C + x + d), Cx);
486                                     }
487                                 }
488                                 else
489                             #endif
490                                 {
491                                     for( d = 0; d < D; d++ )
492                                     {
493                                         int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
494                                         C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
495                                     }
496                                 }
497                             }
498                         }
499                         else
500                         {
501                             for( x = D; x < width1*D; x += D )
502                             {
503                                 const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
504                                 const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
505 
506                                 for( d = 0; d < D; d++ )
507                                     hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
508                             }
509                         }
510                     }
511 
512                     if( y == 0 )
513                     {
514                         int scale = k == 0 ? SH2 + 1 : 1;
515                         for( x = 0; x < width1*D; x++ )
516                             C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
517                     }
518                 }
519 
520                 // also, clear the S buffer
521                 for( k = 0; k < width1*D; k++ )
522                     S[k] = 0;
523             }
524 
525             // clear the left and the right borders
526             memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
527             memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
528             memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
529             memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
530 
531             /*
532              [formula 13 in the paper]
533              compute L_r(p, d) = C(p, d) +
534              min(L_r(p-r, d),
535              L_r(p-r, d-1) + P1,
536              L_r(p-r, d+1) + P1,
537              min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
538              where p = (x,y), r is one of the directions.
539              we process all the directions at once:
540              0: r=(-dx, 0)
541              1: r=(-1, -dy)
542              2: r=(0, -dy)
543              3: r=(1, -dy)
544              4: r=(-2, -dy)
545              5: r=(-1, -dy*2)
546              6: r=(1, -dy*2)
547              7: r=(2, -dy)
548              */
549             for( x = x1; x != x2; x += dx )
550             {
551                 int xm = x*NR2, xd = xm*D2;
552 
553                 int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
554                 int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
555 
556                 CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
557                 CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
558                 CostType* Lr_p2 = Lr[1] + xd + D2*2;
559                 CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
560 
561                 Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
562                 Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
563 
564                 CostType* Lr_p = Lr[0] + xd;
565                 const CostType* Cp = C + x*D;
566                 CostType* Sp = S + x*D;
567 
568             #if CV_SSE2
569                 if( useSIMD )
570                 {
571                     __m128i _P1 = _mm_set1_epi16((short)P1);
572 
573                     __m128i _delta0 = _mm_set1_epi16((short)delta0);
574                     __m128i _delta1 = _mm_set1_epi16((short)delta1);
575                     __m128i _delta2 = _mm_set1_epi16((short)delta2);
576                     __m128i _delta3 = _mm_set1_epi16((short)delta3);
577                     __m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
578 
579                     for( d = 0; d < D; d += 8 )
580                     {
581                         __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
582                         __m128i L0, L1, L2, L3;
583 
584                         L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
585                         L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
586                         L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
587                         L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
588 
589                         L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
590                         L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
591 
592                         L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
593                         L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
594 
595                         L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
596                         L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
597 
598                         L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
599                         L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
600 
601                         L0 = _mm_min_epi16(L0, _delta0);
602                         L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
603 
604                         L1 = _mm_min_epi16(L1, _delta1);
605                         L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
606 
607                         L2 = _mm_min_epi16(L2, _delta2);
608                         L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
609 
610                         L3 = _mm_min_epi16(L3, _delta3);
611                         L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
612 
613                         _mm_store_si128( (__m128i*)(Lr_p + d), L0);
614                         _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
615                         _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
616                         _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
617 
618                         __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
619                         __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
620                         t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
621                         _minL0 = _mm_min_epi16(_minL0, t0);
622 
623                         __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
624 
625                         L0 = _mm_adds_epi16(L0, L1);
626                         L2 = _mm_adds_epi16(L2, L3);
627                         Sval = _mm_adds_epi16(Sval, L0);
628                         Sval = _mm_adds_epi16(Sval, L2);
629 
630                         _mm_store_si128((__m128i*)(Sp + d), Sval);
631                     }
632 
633                     _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
634                     _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
635                 }
636                 else
637             #endif
638                 {
639                     int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
640 
641                     for( d = 0; d < D; d++ )
642                     {
643                         int Cpd = Cp[d], L0, L1, L2, L3;
644 
645                         L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
646                         L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1;
647                         L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2;
648                         L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3;
649 
650                         Lr_p[d] = (CostType)L0;
651                         minL0 = std::min(minL0, L0);
652 
653                         Lr_p[d + D2] = (CostType)L1;
654                         minL1 = std::min(minL1, L1);
655 
656                         Lr_p[d + D2*2] = (CostType)L2;
657                         minL2 = std::min(minL2, L2);
658 
659                         Lr_p[d + D2*3] = (CostType)L3;
660                         minL3 = std::min(minL3, L3);
661 
662                         Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
663                     }
664                     minLr[0][xm] = (CostType)minL0;
665                     minLr[0][xm+1] = (CostType)minL1;
666                     minLr[0][xm+2] = (CostType)minL2;
667                     minLr[0][xm+3] = (CostType)minL3;
668                 }
669             }
670 
671             if( pass == npasses )
672             {
673                 for( x = 0; x < width; x++ )
674                 {
675                     disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
676                     disp2cost[x] = MAX_COST;
677                 }
678 
679                 for( x = width1 - 1; x >= 0; x-- )
680                 {
681                     CostType* Sp = S + x*D;
682                     int minS = MAX_COST, bestDisp = -1;
683 
684                     if( npasses == 1 )
685                     {
686                         int xm = x*NR2, xd = xm*D2;
687 
688                         int minL0 = MAX_COST;
689                         int delta0 = minLr[0][xm + NR2] + P2;
690                         CostType* Lr_p0 = Lr[0] + xd + NRD2;
691                         Lr_p0[-1] = Lr_p0[D] = MAX_COST;
692                         CostType* Lr_p = Lr[0] + xd;
693 
694                         const CostType* Cp = C + x*D;
695 
696                     #if CV_SSE2
697                         if( useSIMD )
698                         {
699                             __m128i _P1 = _mm_set1_epi16((short)P1);
700                             __m128i _delta0 = _mm_set1_epi16((short)delta0);
701 
702                             __m128i _minL0 = _mm_set1_epi16((short)minL0);
703                             __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
704                             __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
705 
706                             for( d = 0; d < D; d += 8 )
707                             {
708                                 __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
709 
710                                 L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
711                                 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
712                                 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
713                                 L0 = _mm_min_epi16(L0, _delta0);
714                                 L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
715 
716                                 _mm_store_si128((__m128i*)(Lr_p + d), L0);
717                                 _minL0 = _mm_min_epi16(_minL0, L0);
718                                 L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
719                                 _mm_store_si128((__m128i*)(Sp + d), L0);
720 
721                                 __m128i mask = _mm_cmpgt_epi16(_minS, L0);
722                                 _minS = _mm_min_epi16(_minS, L0);
723                                 _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
724                                 _d8 = _mm_adds_epi16(_d8, _8);
725                             }
726 
727                             short CV_DECL_ALIGNED(16) bestDispBuf[8];
728                             _mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
729 
730                             _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
731                             _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
732                             _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
733 
734                             __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
735                             qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
736                             qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
737 
738                             minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
739                             minS = (CostType)_mm_cvtsi128_si32(qS);
740 
741                             qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
742                             qS = _mm_cmpeq_epi16(_minS, qS);
743                             int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
744 
745                             bestDisp = bestDispBuf[LSBTab[idx]];
746                         }
747                         else
748                     #endif
749                         {
750                             for( d = 0; d < D; d++ )
751                             {
752                                 int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
753 
754                                 Lr_p[d] = (CostType)L0;
755                                 minL0 = std::min(minL0, L0);
756 
757                                 int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
758                                 if( Sval < minS )
759                                 {
760                                     minS = Sval;
761                                     bestDisp = d;
762                                 }
763                             }
764                             minLr[0][xm] = (CostType)minL0;
765                         }
766                     }
767                     else
768                     {
769                         for( d = 0; d < D; d++ )
770                         {
771                             int Sval = Sp[d];
772                             if( Sval < minS )
773                             {
774                                 minS = Sval;
775                                 bestDisp = d;
776                             }
777                         }
778                     }
779 
780                     for( d = 0; d < D; d++ )
781                     {
782                         if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
783                             break;
784                     }
785                     if( d < D )
786                         continue;
787                     d = bestDisp;
788                     int _x2 = x + minX1 - d - minD;
789                     if( disp2cost[_x2] > minS )
790                     {
791                         disp2cost[_x2] = (CostType)minS;
792                         disp2ptr[_x2] = (DispType)(d + minD);
793                     }
794 
795                     if( 0 < d && d < D-1 )
796                     {
797                         // do subpixel quadratic interpolation:
798                         //   fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
799                         //   then find minimum of the parabola.
800                         int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
801                         d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
802                     }
803                     else
804                         d *= DISP_SCALE;
805                     disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
806                 }
807 
808                 for( x = minX1; x < maxX1; x++ )
809                 {
810                     // we round the computed disparity both towards -inf and +inf and check
811                     // if either of the corresponding disparities in disp2 is consistent.
812                     // This is to give the computed disparity a chance to look valid if it is.
813                     int d1 = disp1ptr[x];
814                     if( d1 == INVALID_DISP_SCALED )
815                         continue;
816                     int _d = d1 >> DISP_SHIFT;
817                     int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
818                     int _x = x - _d, x_ = x - d_;
819                     if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
820                        0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
821                         disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
822                 }
823             }
824 
825             // now shift the cyclic buffers
826             std::swap( Lr[0], Lr[1] );
827             std::swap( minLr[0], minLr[1] );
828         }
829     }
830 }
831 
832 class StereoSGBMImpl : public StereoSGBM
833 {
834 public:
StereoSGBMImpl()835     StereoSGBMImpl()
836     {
837         params = StereoSGBMParams();
838     }
839 
StereoSGBMImpl(int _minDisparity,int _numDisparities,int _SADWindowSize,int _P1,int _P2,int _disp12MaxDiff,int _preFilterCap,int _uniquenessRatio,int _speckleWindowSize,int _speckleRange,int _mode)840     StereoSGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,
841                     int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
842                     int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
843                     int _mode )
844     {
845         params = StereoSGBMParams( _minDisparity, _numDisparities, _SADWindowSize,
846                                    _P1, _P2, _disp12MaxDiff, _preFilterCap,
847                                    _uniquenessRatio, _speckleWindowSize, _speckleRange,
848                                    _mode );
849     }
850 
compute(InputArray leftarr,InputArray rightarr,OutputArray disparr)851     void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
852     {
853         Mat left = leftarr.getMat(), right = rightarr.getMat();
854         CV_Assert( left.size() == right.size() && left.type() == right.type() &&
855                    left.depth() == CV_8U );
856 
857         disparr.create( left.size(), CV_16S );
858         Mat disp = disparr.getMat();
859 
860         computeDisparitySGBM( left, right, disp, params, buffer );
861         medianBlur(disp, disp, 3);
862 
863         if( params.speckleWindowSize > 0 )
864             filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize,
865                            StereoMatcher::DISP_SCALE*params.speckleRange, buffer);
866     }
867 
getMinDisparity() const868     int getMinDisparity() const { return params.minDisparity; }
setMinDisparity(int minDisparity)869     void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
870 
getNumDisparities() const871     int getNumDisparities() const { return params.numDisparities; }
setNumDisparities(int numDisparities)872     void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }
873 
getBlockSize() const874     int getBlockSize() const { return params.SADWindowSize; }
setBlockSize(int blockSize)875     void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }
876 
getSpeckleWindowSize() const877     int getSpeckleWindowSize() const { return params.speckleWindowSize; }
setSpeckleWindowSize(int speckleWindowSize)878     void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }
879 
getSpeckleRange() const880     int getSpeckleRange() const { return params.speckleRange; }
setSpeckleRange(int speckleRange)881     void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }
882 
getDisp12MaxDiff() const883     int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
setDisp12MaxDiff(int disp12MaxDiff)884     void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }
885 
getPreFilterCap() const886     int getPreFilterCap() const { return params.preFilterCap; }
setPreFilterCap(int preFilterCap)887     void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }
888 
getUniquenessRatio() const889     int getUniquenessRatio() const { return params.uniquenessRatio; }
setUniquenessRatio(int uniquenessRatio)890     void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }
891 
getP1() const892     int getP1() const { return params.P1; }
setP1(int P1)893     void setP1(int P1) { params.P1 = P1; }
894 
getP2() const895     int getP2() const { return params.P2; }
setP2(int P2)896     void setP2(int P2) { params.P2 = P2; }
897 
getMode() const898     int getMode() const { return params.mode; }
setMode(int mode)899     void setMode(int mode) { params.mode = mode; }
900 
write(FileStorage & fs) const901     void write(FileStorage& fs) const
902     {
903         fs << "name" << name_
904         << "minDisparity" << params.minDisparity
905         << "numDisparities" << params.numDisparities
906         << "blockSize" << params.SADWindowSize
907         << "speckleWindowSize" << params.speckleWindowSize
908         << "speckleRange" << params.speckleRange
909         << "disp12MaxDiff" << params.disp12MaxDiff
910         << "preFilterCap" << params.preFilterCap
911         << "uniquenessRatio" << params.uniquenessRatio
912         << "P1" << params.P1
913         << "P2" << params.P2
914         << "mode" << params.mode;
915     }
916 
read(const FileNode & fn)917     void read(const FileNode& fn)
918     {
919         FileNode n = fn["name"];
920         CV_Assert( n.isString() && String(n) == name_ );
921         params.minDisparity = (int)fn["minDisparity"];
922         params.numDisparities = (int)fn["numDisparities"];
923         params.SADWindowSize = (int)fn["blockSize"];
924         params.speckleWindowSize = (int)fn["speckleWindowSize"];
925         params.speckleRange = (int)fn["speckleRange"];
926         params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
927         params.preFilterCap = (int)fn["preFilterCap"];
928         params.uniquenessRatio = (int)fn["uniquenessRatio"];
929         params.P1 = (int)fn["P1"];
930         params.P2 = (int)fn["P2"];
931         params.mode = (int)fn["mode"];
932     }
933 
934     StereoSGBMParams params;
935     Mat buffer;
936     static const char* name_;
937 };
938 
939 const char* StereoSGBMImpl::name_ = "StereoMatcher.SGBM";
940 
941 
create(int minDisparity,int numDisparities,int SADWindowSize,int P1,int P2,int disp12MaxDiff,int preFilterCap,int uniquenessRatio,int speckleWindowSize,int speckleRange,int mode)942 Ptr<StereoSGBM> StereoSGBM::create(int minDisparity, int numDisparities, int SADWindowSize,
943                                  int P1, int P2, int disp12MaxDiff,
944                                  int preFilterCap, int uniquenessRatio,
945                                  int speckleWindowSize, int speckleRange,
946                                  int mode)
947 {
948     return Ptr<StereoSGBM>(
949         new StereoSGBMImpl(minDisparity, numDisparities, SADWindowSize,
950                            P1, P2, disp12MaxDiff,
951                            preFilterCap, uniquenessRatio,
952                            speckleWindowSize, speckleRange,
953                            mode));
954 }
955 
getValidDisparityROI(Rect roi1,Rect roi2,int minDisparity,int numberOfDisparities,int SADWindowSize)956 Rect getValidDisparityROI( Rect roi1, Rect roi2,
957                           int minDisparity,
958                           int numberOfDisparities,
959                           int SADWindowSize )
960 {
961     int SW2 = SADWindowSize/2;
962     int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1;
963 
964     int xmin = std::max(roi1.x, roi2.x + maxD) + SW2;
965     int xmax = std::min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2;
966     int ymin = std::max(roi1.y, roi2.y) + SW2;
967     int ymax = std::min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2;
968 
969     Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
970 
971     return r.width > 0 && r.height > 0 ? r : Rect();
972 }
973 
974 typedef cv::Point_<short> Point2s;
975 
976 template <typename T>
filterSpecklesImpl(cv::Mat & img,int newVal,int maxSpeckleSize,int maxDiff,cv::Mat & _buf)977 void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDiff, cv::Mat& _buf)
978 {
979     using namespace cv;
980 
981     int width = img.cols, height = img.rows, npixels = width*height;
982     size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));
983     if( !_buf.isContinuous() || _buf.empty() || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )
984         _buf.create(1, (int)bufSize, CV_8U);
985 
986     uchar* buf = _buf.ptr();
987     int i, j, dstep = (int)(img.step/sizeof(T));
988     int* labels = (int*)buf;
989     buf += npixels*sizeof(labels[0]);
990     Point2s* wbuf = (Point2s*)buf;
991     buf += npixels*sizeof(wbuf[0]);
992     uchar* rtype = (uchar*)buf;
993     int curlabel = 0;
994 
995     // clear out label assignments
996     memset(labels, 0, npixels*sizeof(labels[0]));
997 
998     for( i = 0; i < height; i++ )
999     {
1000         T* ds = img.ptr<T>(i);
1001         int* ls = labels + width*i;
1002 
1003         for( j = 0; j < width; j++ )
1004         {
1005             if( ds[j] != newVal )   // not a bad disparity
1006             {
1007                 if( ls[j] )     // has a label, check for bad label
1008                 {
1009                     if( rtype[ls[j]] ) // small region, zero out disparity
1010                         ds[j] = (T)newVal;
1011                 }
1012                 // no label, assign and propagate
1013                 else
1014                 {
1015                     Point2s* ws = wbuf; // initialize wavefront
1016                     Point2s p((short)j, (short)i);  // current pixel
1017                     curlabel++; // next label
1018                     int count = 0;  // current region size
1019                     ls[j] = curlabel;
1020 
1021                     // wavefront propagation
1022                     while( ws >= wbuf ) // wavefront not empty
1023                     {
1024                         count++;
1025                         // put neighbors onto wavefront
1026                         T* dpp = &img.at<T>(p.y, p.x);
1027                         T dp = *dpp;
1028                         int* lpp = labels + width*p.y + p.x;
1029 
1030                         if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )
1031                         {
1032                             lpp[+width] = curlabel;
1033                             *ws++ = Point2s(p.x, p.y+1);
1034                         }
1035 
1036                         if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )
1037                         {
1038                             lpp[-width] = curlabel;
1039                             *ws++ = Point2s(p.x, p.y-1);
1040                         }
1041 
1042                         if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )
1043                         {
1044                             lpp[+1] = curlabel;
1045                             *ws++ = Point2s(p.x+1, p.y);
1046                         }
1047 
1048                         if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )
1049                         {
1050                             lpp[-1] = curlabel;
1051                             *ws++ = Point2s(p.x-1, p.y);
1052                         }
1053 
1054                         // pop most recent and propagate
1055                         // NB: could try least recent, maybe better convergence
1056                         p = *--ws;
1057                     }
1058 
1059                     // assign label type
1060                     if( count <= maxSpeckleSize )   // speckle region
1061                     {
1062                         rtype[ls[j]] = 1;   // small region label
1063                         ds[j] = (T)newVal;
1064                     }
1065                     else
1066                         rtype[ls[j]] = 0;   // large region label
1067                 }
1068             }
1069         }
1070     }
1071 }
1072 
1073 }
1074 
filterSpeckles(InputOutputArray _img,double _newval,int maxSpeckleSize,double _maxDiff,InputOutputArray __buf)1075 void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize,
1076                          double _maxDiff, InputOutputArray __buf )
1077 {
1078     Mat img = _img.getMat();
1079     int type = img.type();
1080     Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp;
1081     CV_Assert( type == CV_8UC1 || type == CV_16SC1 );
1082 
1083     int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff);
1084 
1085 #if IPP_VERSION_X100 >= 801
1086     CV_IPP_CHECK()
1087     {
1088         Ipp32s bufsize = 0;
1089         IppiSize roisize = { img.cols, img.rows };
1090         IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s;
1091 
1092         if (!__buf.needed() && (type == CV_8UC1 || type == CV_16SC1))
1093         {
1094             IppStatus status = ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize);
1095             Ipp8u * buffer = ippsMalloc_8u(bufsize);
1096 
1097             if ((int)status >= 0)
1098             {
1099                 if (type == CV_8UC1)
1100                     status = ippiMarkSpeckles_8u_C1IR(img.ptr<Ipp8u>(), (int)img.step, roisize,
1101                                                       (Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, buffer);
1102                 else
1103                     status = ippiMarkSpeckles_16s_C1IR(img.ptr<Ipp16s>(), (int)img.step, roisize,
1104                                                        (Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, buffer);
1105             }
1106 
1107             if (status >= 0)
1108             {
1109                 CV_IMPL_ADD(CV_IMPL_IPP);
1110                 return;
1111             }
1112             setIppErrorStatus();
1113         }
1114     }
1115 #endif
1116 
1117     if (type == CV_8UC1)
1118         filterSpecklesImpl<uchar>(img, newVal, maxSpeckleSize, maxDiff, _buf);
1119     else
1120         filterSpecklesImpl<short>(img, newVal, maxSpeckleSize, maxDiff, _buf);
1121 }
1122 
validateDisparity(InputOutputArray _disp,InputArray _cost,int minDisparity,int numberOfDisparities,int disp12MaxDiff)1123 void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity,
1124                             int numberOfDisparities, int disp12MaxDiff )
1125 {
1126     Mat disp = _disp.getMat(), cost = _cost.getMat();
1127     int cols = disp.cols, rows = disp.rows;
1128     int minD = minDisparity, maxD = minDisparity + numberOfDisparities;
1129     int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0);
1130     AutoBuffer<int> _disp2buf(cols*2);
1131     int* disp2buf = _disp2buf;
1132     int* disp2cost = disp2buf + cols;
1133     const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;
1134     int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
1135     int costType = cost.type();
1136 
1137     disp12MaxDiff *= DISP_SCALE;
1138 
1139     CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S &&
1140               (costType == CV_16S || costType == CV_32S) &&
1141               disp.size() == cost.size() );
1142 
1143     for( int y = 0; y < rows; y++ )
1144     {
1145         short* dptr = disp.ptr<short>(y);
1146 
1147         for( x = 0; x < cols; x++ )
1148         {
1149             disp2buf[x] = INVALID_DISP_SCALED;
1150             disp2cost[x] = INT_MAX;
1151         }
1152 
1153         if( costType == CV_16S )
1154         {
1155             const short* cptr = cost.ptr<short>(y);
1156 
1157             for( x = minX1; x < maxX1; x++ )
1158             {
1159                 int d = dptr[x], c = cptr[x];
1160                 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1161 
1162                 if( disp2cost[x2] > c )
1163                 {
1164                     disp2cost[x2] = c;
1165                     disp2buf[x2] = d;
1166                 }
1167             }
1168         }
1169         else
1170         {
1171             const int* cptr = cost.ptr<int>(y);
1172 
1173             for( x = minX1; x < maxX1; x++ )
1174             {
1175                 int d = dptr[x], c = cptr[x];
1176                 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1177 
1178                 if( disp2cost[x2] < c )
1179                 {
1180                     disp2cost[x2] = c;
1181                     disp2buf[x2] = d;
1182                 }
1183             }
1184         }
1185 
1186         for( x = minX1; x < maxX1; x++ )
1187         {
1188             // we round the computed disparity both towards -inf and +inf and check
1189             // if either of the corresponding disparities in disp2 is consistent.
1190             // This is to give the computed disparity a chance to look valid if it is.
1191             int d = dptr[x];
1192             if( d == INVALID_DISP_SCALED )
1193                 continue;
1194             int d0 = d >> DISP_SHIFT;
1195             int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT;
1196             int x0 = x - d0, x1 = x - d1;
1197             if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) &&
1198                 (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) )
1199                 dptr[x] = (short)INVALID_DISP_SCALED;
1200         }
1201     }
1202 }
1203