1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
23 //
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
27 //
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 /*
45 This is a variation of
46 "Stereo Processing by Semiglobal Matching and Mutual Information"
47 by Heiko Hirschmuller.
48
49 We match blocks rather than individual pixels, thus the algorithm is called
50 SGBM (Semi-global block matching)
51 */
52
53 #include "precomp.hpp"
54 #include <limits.h>
55
56 namespace cv
57 {
58
59 typedef uchar PixType;
60 typedef short CostType;
61 typedef short DispType;
62
63 enum { NR = 16, NR2 = NR/2 };
64
65
66 struct StereoSGBMParams
67 {
StereoSGBMParamscv::StereoSGBMParams68 StereoSGBMParams()
69 {
70 minDisparity = numDisparities = 0;
71 SADWindowSize = 0;
72 P1 = P2 = 0;
73 disp12MaxDiff = 0;
74 preFilterCap = 0;
75 uniquenessRatio = 0;
76 speckleWindowSize = 0;
77 speckleRange = 0;
78 mode = StereoSGBM::MODE_SGBM;
79 }
80
StereoSGBMParamscv::StereoSGBMParams81 StereoSGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize,
82 int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
83 int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
84 int _mode )
85 {
86 minDisparity = _minDisparity;
87 numDisparities = _numDisparities;
88 SADWindowSize = _SADWindowSize;
89 P1 = _P1;
90 P2 = _P2;
91 disp12MaxDiff = _disp12MaxDiff;
92 preFilterCap = _preFilterCap;
93 uniquenessRatio = _uniquenessRatio;
94 speckleWindowSize = _speckleWindowSize;
95 speckleRange = _speckleRange;
96 mode = _mode;
97 }
98
99 int minDisparity;
100 int numDisparities;
101 int SADWindowSize;
102 int preFilterCap;
103 int uniquenessRatio;
104 int P1;
105 int P2;
106 int speckleWindowSize;
107 int speckleRange;
108 int disp12MaxDiff;
109 int mode;
110 };
111
112 /*
113 For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
114 and for each disparity minD<=d<maxD the function
115 computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
116 row1[x] and row2[x-d]. The subpixel algorithm from
117 "Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
118 is used, hence the suffix BT.
119
120 the temporary buffer should contain width2*2 elements
121 */
calcPixelCostBT(const Mat & img1,const Mat & img2,int y,int minD,int maxD,CostType * cost,PixType * buffer,const PixType * tab,int tabOfs,int)122 static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
123 int minD, int maxD, CostType* cost,
124 PixType* buffer, const PixType* tab,
125 int tabOfs, int )
126 {
127 int x, c, width = img1.cols, cn = img1.channels();
128 int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
129 int minX2 = std::max(minX1 - maxD, 0), maxX2 = std::min(maxX1 - minD, width);
130 int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
131 const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
132 PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
133
134 tab += tabOfs;
135
136 for( c = 0; c < cn*2; c++ )
137 {
138 prow1[width*c] = prow1[width*c + width-1] =
139 prow2[width*c] = prow2[width*c + width-1] = tab[0];
140 }
141
142 int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
143 int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
144
145 if( cn == 1 )
146 {
147 for( x = 1; x < width-1; x++ )
148 {
149 prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
150 prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
151
152 prow1[x+width] = row1[x];
153 prow2[width-1-x+width] = row2[x];
154 }
155 }
156 else
157 {
158 for( x = 1; x < width-1; x++ )
159 {
160 prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
161 prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
162 prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
163
164 prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
165 prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
166 prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
167
168 prow1[x+width*3] = row1[x*3];
169 prow1[x+width*4] = row1[x*3+1];
170 prow1[x+width*5] = row1[x*3+2];
171
172 prow2[width-1-x+width*3] = row2[x*3];
173 prow2[width-1-x+width*4] = row2[x*3+1];
174 prow2[width-1-x+width*5] = row2[x*3+2];
175 }
176 }
177
178 memset( cost, 0, width1*D*sizeof(cost[0]) );
179
180 buffer -= minX2;
181 cost -= minX1*D + minD; // simplify the cost indices inside the loop
182
183 #if CV_SSE2
184 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
185 #endif
186
187 #if 1
188 for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
189 {
190 int diff_scale = c < cn ? 0 : 2;
191
192 // precompute
193 // v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
194 // v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
195 for( x = minX2; x < maxX2; x++ )
196 {
197 int v = prow2[x];
198 int vl = x > 0 ? (v + prow2[x-1])/2 : v;
199 int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
200 int v0 = std::min(vl, vr); v0 = std::min(v0, v);
201 int v1 = std::max(vl, vr); v1 = std::max(v1, v);
202 buffer[x] = (PixType)v0;
203 buffer[x + width2] = (PixType)v1;
204 }
205
206 for( x = minX1; x < maxX1; x++ )
207 {
208 int u = prow1[x];
209 int ul = x > 0 ? (u + prow1[x-1])/2 : u;
210 int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
211 int u0 = std::min(ul, ur); u0 = std::min(u0, u);
212 int u1 = std::max(ul, ur); u1 = std::max(u1, u);
213
214 #if CV_SSE2
215 if( useSIMD )
216 {
217 __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
218 __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
219 __m128i ds = _mm_cvtsi32_si128(diff_scale);
220
221 for( int d = minD; d < maxD; d += 16 )
222 {
223 __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
224 __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
225 __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
226 __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
227 __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
228 __m128i diff = _mm_min_epu8(c0, c1);
229
230 c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
231 c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
232
233 _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
234 _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
235 }
236 }
237 else
238 #endif
239 {
240 for( int d = minD; d < maxD; d++ )
241 {
242 int v = prow2[width-x-1 + d];
243 int v0 = buffer[width-x-1 + d];
244 int v1 = buffer[width-x-1 + d + width2];
245 int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u);
246 int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v);
247
248 cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale));
249 }
250 }
251 }
252 }
253 #else
254 for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
255 {
256 for( x = minX1; x < maxX1; x++ )
257 {
258 int u = prow1[x];
259 #if CV_SSE2
260 if( useSIMD )
261 {
262 __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
263
264 for( int d = minD; d < maxD; d += 16 )
265 {
266 __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
267 __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
268 __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
269 __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
270
271 _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
272 _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
273 }
274 }
275 else
276 #endif
277 {
278 for( int d = minD; d < maxD; d++ )
279 {
280 int v = prow2[width-1-x + d];
281 cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
282 }
283 }
284 }
285 }
286 #endif
287 }
288
289
290 /*
291 computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
292 that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
293 minD <= d < maxD.
294 disp2full is the reverse disparity map, that is:
295 disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
296
297 note that disp1buf will have the same size as the roi and
298 disp2full will have the same size as img1 (or img2).
299 On exit disp2buf is not the final disparity, it is an intermediate result that becomes
300 final after all the tiles are processed.
301
302 the disparity in disp1buf is written with sub-pixel accuracy
303 (4 fractional bits, see StereoSGBM::DISP_SCALE),
304 using quadratic interpolation, while the disparity in disp2buf
305 is written as is, without interpolation.
306
307 disp2cost also has the same size as img1 (or img2).
308 It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
309 */
computeDisparitySGBM(const Mat & img1,const Mat & img2,Mat & disp1,const StereoSGBMParams & params,Mat & buffer)310 static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
311 Mat& disp1, const StereoSGBMParams& params,
312 Mat& buffer )
313 {
314 #if CV_SSE2
315 static const uchar LSBTab[] =
316 {
317 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
318 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
319 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
320 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
321 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
322 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
323 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
324 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
325 };
326
327 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
328 #endif
329
330 const int ALIGN = 16;
331 const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
332 const int DISP_SCALE = (1 << DISP_SHIFT);
333 const CostType MAX_COST = SHRT_MAX;
334
335 int minD = params.minDisparity, maxD = minD + params.numDisparities;
336 Size SADWindowSize;
337 SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
338 int ftzero = std::max(params.preFilterCap, 15) | 1;
339 int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
340 int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
341 int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
342 int k, width = disp1.cols, height = disp1.rows;
343 int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
344 int D = maxD - minD, width1 = maxX1 - minX1;
345 int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
346 int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
347 bool fullDP = params.mode == StereoSGBM::MODE_HH;
348 int npasses = fullDP ? 2 : 1;
349 const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
350 PixType clipTab[TAB_SIZE];
351
352 for( k = 0; k < TAB_SIZE; k++ )
353 clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
354
355 if( minX1 >= maxX1 )
356 {
357 disp1 = Scalar::all(INVALID_DISP_SCALED);
358 return;
359 }
360
361 CV_Assert( D % 16 == 0 );
362
363 // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
364 // if you change NR, please, modify the loop as well.
365 int D2 = D+16, NRD2 = NR2*D2;
366
367 // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
368 // for 8-way dynamic programming we need the current row and
369 // the previous row, i.e. 2 rows in total
370 const int NLR = 2;
371 const int LrBorder = NLR - 1;
372
373 // for each possible stereo match (img1(x,y) <=> img2(x-d,y))
374 // we keep pixel difference cost (C) and the summary cost over NR directions (S).
375 // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
376 size_t costBufSize = width1*D;
377 size_t CSBufSize = costBufSize*(fullDP ? height : 1);
378 size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
379 int hsumBufNRows = SH2*2 + 2;
380 size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
381 costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
382 CSBufSize*2*sizeof(CostType) + // C, S
383 width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
384 width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
385
386 if( buffer.empty() || !buffer.isContinuous() ||
387 buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
388 buffer.create(1, (int)totalBufSize, CV_8U);
389
390 // summary cost over different (nDirs) directions
391 CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);
392 CostType* Sbuf = Cbuf + CSBufSize;
393 CostType* hsumBuf = Sbuf + CSBufSize;
394 CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
395
396 CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
397 DispType* disp2ptr = (DispType*)(disp2cost + width);
398 PixType* tempBuf = (PixType*)(disp2ptr + width);
399
400 // add P2 to every C(x,y). it saves a few operations in the inner loops
401 for( k = 0; k < width1*D; k++ )
402 Cbuf[k] = (CostType)P2;
403
404 for( int pass = 1; pass <= npasses; pass++ )
405 {
406 int x1, y1, x2, y2, dx, dy;
407
408 if( pass == 1 )
409 {
410 y1 = 0; y2 = height; dy = 1;
411 x1 = 0; x2 = width1; dx = 1;
412 }
413 else
414 {
415 y1 = height-1; y2 = -1; dy = -1;
416 x1 = width1-1; x2 = -1; dx = -1;
417 }
418
419 CostType *Lr[NLR]={0}, *minLr[NLR]={0};
420
421 for( k = 0; k < NLR; k++ )
422 {
423 // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
424 // and will occasionally use negative indices with the arrays
425 // we need to shift Lr[k] pointers by 1, to give the space for d=-1.
426 // however, then the alignment will be imperfect, i.e. bad for SSE,
427 // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
428 Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
429 memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
430 minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder;
431 memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
432 }
433
434 for( int y = y1; y != y2; y += dy )
435 {
436 int x, d;
437 DispType* disp1ptr = disp1.ptr<DispType>(y);
438 CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize);
439 CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize);
440
441 if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
442 {
443 int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
444
445 for( k = dy1; k <= dy2; k++ )
446 {
447 CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
448
449 if( k < height )
450 {
451 calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
452
453 memset(hsumAdd, 0, D*sizeof(CostType));
454 for( x = 0; x <= SW2*D; x += D )
455 {
456 int scale = x == 0 ? SW2 + 1 : 1;
457 for( d = 0; d < D; d++ )
458 hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
459 }
460
461 if( y > 0 )
462 {
463 const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
464 const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize;
465
466 for( x = D; x < width1*D; x += D )
467 {
468 const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
469 const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
470
471 #if CV_SSE2
472 if( useSIMD )
473 {
474 for( d = 0; d < D; d += 8 )
475 {
476 __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
477 __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
478 hv = _mm_adds_epi16(_mm_subs_epi16(hv,
479 _mm_load_si128((const __m128i*)(pixSub + d))),
480 _mm_load_si128((const __m128i*)(pixAdd + d)));
481 Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
482 _mm_load_si128((const __m128i*)(hsumSub + x + d))),
483 hv);
484 _mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
485 _mm_store_si128((__m128i*)(C + x + d), Cx);
486 }
487 }
488 else
489 #endif
490 {
491 for( d = 0; d < D; d++ )
492 {
493 int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
494 C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
495 }
496 }
497 }
498 }
499 else
500 {
501 for( x = D; x < width1*D; x += D )
502 {
503 const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
504 const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
505
506 for( d = 0; d < D; d++ )
507 hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
508 }
509 }
510 }
511
512 if( y == 0 )
513 {
514 int scale = k == 0 ? SH2 + 1 : 1;
515 for( x = 0; x < width1*D; x++ )
516 C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
517 }
518 }
519
520 // also, clear the S buffer
521 for( k = 0; k < width1*D; k++ )
522 S[k] = 0;
523 }
524
525 // clear the left and the right borders
526 memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
527 memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
528 memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
529 memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
530
531 /*
532 [formula 13 in the paper]
533 compute L_r(p, d) = C(p, d) +
534 min(L_r(p-r, d),
535 L_r(p-r, d-1) + P1,
536 L_r(p-r, d+1) + P1,
537 min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
538 where p = (x,y), r is one of the directions.
539 we process all the directions at once:
540 0: r=(-dx, 0)
541 1: r=(-1, -dy)
542 2: r=(0, -dy)
543 3: r=(1, -dy)
544 4: r=(-2, -dy)
545 5: r=(-1, -dy*2)
546 6: r=(1, -dy*2)
547 7: r=(2, -dy)
548 */
549 for( x = x1; x != x2; x += dx )
550 {
551 int xm = x*NR2, xd = xm*D2;
552
553 int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
554 int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
555
556 CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
557 CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
558 CostType* Lr_p2 = Lr[1] + xd + D2*2;
559 CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
560
561 Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
562 Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
563
564 CostType* Lr_p = Lr[0] + xd;
565 const CostType* Cp = C + x*D;
566 CostType* Sp = S + x*D;
567
568 #if CV_SSE2
569 if( useSIMD )
570 {
571 __m128i _P1 = _mm_set1_epi16((short)P1);
572
573 __m128i _delta0 = _mm_set1_epi16((short)delta0);
574 __m128i _delta1 = _mm_set1_epi16((short)delta1);
575 __m128i _delta2 = _mm_set1_epi16((short)delta2);
576 __m128i _delta3 = _mm_set1_epi16((short)delta3);
577 __m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
578
579 for( d = 0; d < D; d += 8 )
580 {
581 __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
582 __m128i L0, L1, L2, L3;
583
584 L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
585 L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
586 L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
587 L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
588
589 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
590 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
591
592 L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
593 L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
594
595 L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
596 L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
597
598 L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
599 L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
600
601 L0 = _mm_min_epi16(L0, _delta0);
602 L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
603
604 L1 = _mm_min_epi16(L1, _delta1);
605 L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
606
607 L2 = _mm_min_epi16(L2, _delta2);
608 L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
609
610 L3 = _mm_min_epi16(L3, _delta3);
611 L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
612
613 _mm_store_si128( (__m128i*)(Lr_p + d), L0);
614 _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
615 _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
616 _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
617
618 __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
619 __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
620 t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
621 _minL0 = _mm_min_epi16(_minL0, t0);
622
623 __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
624
625 L0 = _mm_adds_epi16(L0, L1);
626 L2 = _mm_adds_epi16(L2, L3);
627 Sval = _mm_adds_epi16(Sval, L0);
628 Sval = _mm_adds_epi16(Sval, L2);
629
630 _mm_store_si128((__m128i*)(Sp + d), Sval);
631 }
632
633 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
634 _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
635 }
636 else
637 #endif
638 {
639 int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
640
641 for( d = 0; d < D; d++ )
642 {
643 int Cpd = Cp[d], L0, L1, L2, L3;
644
645 L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
646 L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1;
647 L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2;
648 L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3;
649
650 Lr_p[d] = (CostType)L0;
651 minL0 = std::min(minL0, L0);
652
653 Lr_p[d + D2] = (CostType)L1;
654 minL1 = std::min(minL1, L1);
655
656 Lr_p[d + D2*2] = (CostType)L2;
657 minL2 = std::min(minL2, L2);
658
659 Lr_p[d + D2*3] = (CostType)L3;
660 minL3 = std::min(minL3, L3);
661
662 Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
663 }
664 minLr[0][xm] = (CostType)minL0;
665 minLr[0][xm+1] = (CostType)minL1;
666 minLr[0][xm+2] = (CostType)minL2;
667 minLr[0][xm+3] = (CostType)minL3;
668 }
669 }
670
671 if( pass == npasses )
672 {
673 for( x = 0; x < width; x++ )
674 {
675 disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
676 disp2cost[x] = MAX_COST;
677 }
678
679 for( x = width1 - 1; x >= 0; x-- )
680 {
681 CostType* Sp = S + x*D;
682 int minS = MAX_COST, bestDisp = -1;
683
684 if( npasses == 1 )
685 {
686 int xm = x*NR2, xd = xm*D2;
687
688 int minL0 = MAX_COST;
689 int delta0 = minLr[0][xm + NR2] + P2;
690 CostType* Lr_p0 = Lr[0] + xd + NRD2;
691 Lr_p0[-1] = Lr_p0[D] = MAX_COST;
692 CostType* Lr_p = Lr[0] + xd;
693
694 const CostType* Cp = C + x*D;
695
696 #if CV_SSE2
697 if( useSIMD )
698 {
699 __m128i _P1 = _mm_set1_epi16((short)P1);
700 __m128i _delta0 = _mm_set1_epi16((short)delta0);
701
702 __m128i _minL0 = _mm_set1_epi16((short)minL0);
703 __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
704 __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
705
706 for( d = 0; d < D; d += 8 )
707 {
708 __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
709
710 L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
711 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
712 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
713 L0 = _mm_min_epi16(L0, _delta0);
714 L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
715
716 _mm_store_si128((__m128i*)(Lr_p + d), L0);
717 _minL0 = _mm_min_epi16(_minL0, L0);
718 L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
719 _mm_store_si128((__m128i*)(Sp + d), L0);
720
721 __m128i mask = _mm_cmpgt_epi16(_minS, L0);
722 _minS = _mm_min_epi16(_minS, L0);
723 _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
724 _d8 = _mm_adds_epi16(_d8, _8);
725 }
726
727 short CV_DECL_ALIGNED(16) bestDispBuf[8];
728 _mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
729
730 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
731 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
732 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
733
734 __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
735 qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
736 qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
737
738 minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
739 minS = (CostType)_mm_cvtsi128_si32(qS);
740
741 qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
742 qS = _mm_cmpeq_epi16(_minS, qS);
743 int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
744
745 bestDisp = bestDispBuf[LSBTab[idx]];
746 }
747 else
748 #endif
749 {
750 for( d = 0; d < D; d++ )
751 {
752 int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
753
754 Lr_p[d] = (CostType)L0;
755 minL0 = std::min(minL0, L0);
756
757 int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
758 if( Sval < minS )
759 {
760 minS = Sval;
761 bestDisp = d;
762 }
763 }
764 minLr[0][xm] = (CostType)minL0;
765 }
766 }
767 else
768 {
769 for( d = 0; d < D; d++ )
770 {
771 int Sval = Sp[d];
772 if( Sval < minS )
773 {
774 minS = Sval;
775 bestDisp = d;
776 }
777 }
778 }
779
780 for( d = 0; d < D; d++ )
781 {
782 if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
783 break;
784 }
785 if( d < D )
786 continue;
787 d = bestDisp;
788 int _x2 = x + minX1 - d - minD;
789 if( disp2cost[_x2] > minS )
790 {
791 disp2cost[_x2] = (CostType)minS;
792 disp2ptr[_x2] = (DispType)(d + minD);
793 }
794
795 if( 0 < d && d < D-1 )
796 {
797 // do subpixel quadratic interpolation:
798 // fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
799 // then find minimum of the parabola.
800 int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
801 d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
802 }
803 else
804 d *= DISP_SCALE;
805 disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
806 }
807
808 for( x = minX1; x < maxX1; x++ )
809 {
810 // we round the computed disparity both towards -inf and +inf and check
811 // if either of the corresponding disparities in disp2 is consistent.
812 // This is to give the computed disparity a chance to look valid if it is.
813 int d1 = disp1ptr[x];
814 if( d1 == INVALID_DISP_SCALED )
815 continue;
816 int _d = d1 >> DISP_SHIFT;
817 int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
818 int _x = x - _d, x_ = x - d_;
819 if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
820 0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
821 disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
822 }
823 }
824
825 // now shift the cyclic buffers
826 std::swap( Lr[0], Lr[1] );
827 std::swap( minLr[0], minLr[1] );
828 }
829 }
830 }
831
832 class StereoSGBMImpl : public StereoSGBM
833 {
834 public:
StereoSGBMImpl()835 StereoSGBMImpl()
836 {
837 params = StereoSGBMParams();
838 }
839
StereoSGBMImpl(int _minDisparity,int _numDisparities,int _SADWindowSize,int _P1,int _P2,int _disp12MaxDiff,int _preFilterCap,int _uniquenessRatio,int _speckleWindowSize,int _speckleRange,int _mode)840 StereoSGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,
841 int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
842 int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
843 int _mode )
844 {
845 params = StereoSGBMParams( _minDisparity, _numDisparities, _SADWindowSize,
846 _P1, _P2, _disp12MaxDiff, _preFilterCap,
847 _uniquenessRatio, _speckleWindowSize, _speckleRange,
848 _mode );
849 }
850
compute(InputArray leftarr,InputArray rightarr,OutputArray disparr)851 void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
852 {
853 Mat left = leftarr.getMat(), right = rightarr.getMat();
854 CV_Assert( left.size() == right.size() && left.type() == right.type() &&
855 left.depth() == CV_8U );
856
857 disparr.create( left.size(), CV_16S );
858 Mat disp = disparr.getMat();
859
860 computeDisparitySGBM( left, right, disp, params, buffer );
861 medianBlur(disp, disp, 3);
862
863 if( params.speckleWindowSize > 0 )
864 filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize,
865 StereoMatcher::DISP_SCALE*params.speckleRange, buffer);
866 }
867
getMinDisparity() const868 int getMinDisparity() const { return params.minDisparity; }
setMinDisparity(int minDisparity)869 void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
870
getNumDisparities() const871 int getNumDisparities() const { return params.numDisparities; }
setNumDisparities(int numDisparities)872 void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }
873
getBlockSize() const874 int getBlockSize() const { return params.SADWindowSize; }
setBlockSize(int blockSize)875 void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }
876
getSpeckleWindowSize() const877 int getSpeckleWindowSize() const { return params.speckleWindowSize; }
setSpeckleWindowSize(int speckleWindowSize)878 void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }
879
getSpeckleRange() const880 int getSpeckleRange() const { return params.speckleRange; }
setSpeckleRange(int speckleRange)881 void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }
882
getDisp12MaxDiff() const883 int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
setDisp12MaxDiff(int disp12MaxDiff)884 void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }
885
getPreFilterCap() const886 int getPreFilterCap() const { return params.preFilterCap; }
setPreFilterCap(int preFilterCap)887 void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }
888
getUniquenessRatio() const889 int getUniquenessRatio() const { return params.uniquenessRatio; }
setUniquenessRatio(int uniquenessRatio)890 void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }
891
getP1() const892 int getP1() const { return params.P1; }
setP1(int P1)893 void setP1(int P1) { params.P1 = P1; }
894
getP2() const895 int getP2() const { return params.P2; }
setP2(int P2)896 void setP2(int P2) { params.P2 = P2; }
897
getMode() const898 int getMode() const { return params.mode; }
setMode(int mode)899 void setMode(int mode) { params.mode = mode; }
900
write(FileStorage & fs) const901 void write(FileStorage& fs) const
902 {
903 fs << "name" << name_
904 << "minDisparity" << params.minDisparity
905 << "numDisparities" << params.numDisparities
906 << "blockSize" << params.SADWindowSize
907 << "speckleWindowSize" << params.speckleWindowSize
908 << "speckleRange" << params.speckleRange
909 << "disp12MaxDiff" << params.disp12MaxDiff
910 << "preFilterCap" << params.preFilterCap
911 << "uniquenessRatio" << params.uniquenessRatio
912 << "P1" << params.P1
913 << "P2" << params.P2
914 << "mode" << params.mode;
915 }
916
read(const FileNode & fn)917 void read(const FileNode& fn)
918 {
919 FileNode n = fn["name"];
920 CV_Assert( n.isString() && String(n) == name_ );
921 params.minDisparity = (int)fn["minDisparity"];
922 params.numDisparities = (int)fn["numDisparities"];
923 params.SADWindowSize = (int)fn["blockSize"];
924 params.speckleWindowSize = (int)fn["speckleWindowSize"];
925 params.speckleRange = (int)fn["speckleRange"];
926 params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
927 params.preFilterCap = (int)fn["preFilterCap"];
928 params.uniquenessRatio = (int)fn["uniquenessRatio"];
929 params.P1 = (int)fn["P1"];
930 params.P2 = (int)fn["P2"];
931 params.mode = (int)fn["mode"];
932 }
933
934 StereoSGBMParams params;
935 Mat buffer;
936 static const char* name_;
937 };
938
939 const char* StereoSGBMImpl::name_ = "StereoMatcher.SGBM";
940
941
create(int minDisparity,int numDisparities,int SADWindowSize,int P1,int P2,int disp12MaxDiff,int preFilterCap,int uniquenessRatio,int speckleWindowSize,int speckleRange,int mode)942 Ptr<StereoSGBM> StereoSGBM::create(int minDisparity, int numDisparities, int SADWindowSize,
943 int P1, int P2, int disp12MaxDiff,
944 int preFilterCap, int uniquenessRatio,
945 int speckleWindowSize, int speckleRange,
946 int mode)
947 {
948 return Ptr<StereoSGBM>(
949 new StereoSGBMImpl(minDisparity, numDisparities, SADWindowSize,
950 P1, P2, disp12MaxDiff,
951 preFilterCap, uniquenessRatio,
952 speckleWindowSize, speckleRange,
953 mode));
954 }
955
getValidDisparityROI(Rect roi1,Rect roi2,int minDisparity,int numberOfDisparities,int SADWindowSize)956 Rect getValidDisparityROI( Rect roi1, Rect roi2,
957 int minDisparity,
958 int numberOfDisparities,
959 int SADWindowSize )
960 {
961 int SW2 = SADWindowSize/2;
962 int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1;
963
964 int xmin = std::max(roi1.x, roi2.x + maxD) + SW2;
965 int xmax = std::min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2;
966 int ymin = std::max(roi1.y, roi2.y) + SW2;
967 int ymax = std::min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2;
968
969 Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
970
971 return r.width > 0 && r.height > 0 ? r : Rect();
972 }
973
974 typedef cv::Point_<short> Point2s;
975
976 template <typename T>
filterSpecklesImpl(cv::Mat & img,int newVal,int maxSpeckleSize,int maxDiff,cv::Mat & _buf)977 void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDiff, cv::Mat& _buf)
978 {
979 using namespace cv;
980
981 int width = img.cols, height = img.rows, npixels = width*height;
982 size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));
983 if( !_buf.isContinuous() || _buf.empty() || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )
984 _buf.create(1, (int)bufSize, CV_8U);
985
986 uchar* buf = _buf.ptr();
987 int i, j, dstep = (int)(img.step/sizeof(T));
988 int* labels = (int*)buf;
989 buf += npixels*sizeof(labels[0]);
990 Point2s* wbuf = (Point2s*)buf;
991 buf += npixels*sizeof(wbuf[0]);
992 uchar* rtype = (uchar*)buf;
993 int curlabel = 0;
994
995 // clear out label assignments
996 memset(labels, 0, npixels*sizeof(labels[0]));
997
998 for( i = 0; i < height; i++ )
999 {
1000 T* ds = img.ptr<T>(i);
1001 int* ls = labels + width*i;
1002
1003 for( j = 0; j < width; j++ )
1004 {
1005 if( ds[j] != newVal ) // not a bad disparity
1006 {
1007 if( ls[j] ) // has a label, check for bad label
1008 {
1009 if( rtype[ls[j]] ) // small region, zero out disparity
1010 ds[j] = (T)newVal;
1011 }
1012 // no label, assign and propagate
1013 else
1014 {
1015 Point2s* ws = wbuf; // initialize wavefront
1016 Point2s p((short)j, (short)i); // current pixel
1017 curlabel++; // next label
1018 int count = 0; // current region size
1019 ls[j] = curlabel;
1020
1021 // wavefront propagation
1022 while( ws >= wbuf ) // wavefront not empty
1023 {
1024 count++;
1025 // put neighbors onto wavefront
1026 T* dpp = &img.at<T>(p.y, p.x);
1027 T dp = *dpp;
1028 int* lpp = labels + width*p.y + p.x;
1029
1030 if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )
1031 {
1032 lpp[+width] = curlabel;
1033 *ws++ = Point2s(p.x, p.y+1);
1034 }
1035
1036 if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )
1037 {
1038 lpp[-width] = curlabel;
1039 *ws++ = Point2s(p.x, p.y-1);
1040 }
1041
1042 if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )
1043 {
1044 lpp[+1] = curlabel;
1045 *ws++ = Point2s(p.x+1, p.y);
1046 }
1047
1048 if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )
1049 {
1050 lpp[-1] = curlabel;
1051 *ws++ = Point2s(p.x-1, p.y);
1052 }
1053
1054 // pop most recent and propagate
1055 // NB: could try least recent, maybe better convergence
1056 p = *--ws;
1057 }
1058
1059 // assign label type
1060 if( count <= maxSpeckleSize ) // speckle region
1061 {
1062 rtype[ls[j]] = 1; // small region label
1063 ds[j] = (T)newVal;
1064 }
1065 else
1066 rtype[ls[j]] = 0; // large region label
1067 }
1068 }
1069 }
1070 }
1071 }
1072
1073 }
1074
filterSpeckles(InputOutputArray _img,double _newval,int maxSpeckleSize,double _maxDiff,InputOutputArray __buf)1075 void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize,
1076 double _maxDiff, InputOutputArray __buf )
1077 {
1078 Mat img = _img.getMat();
1079 int type = img.type();
1080 Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp;
1081 CV_Assert( type == CV_8UC1 || type == CV_16SC1 );
1082
1083 int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff);
1084
1085 #if IPP_VERSION_X100 >= 801
1086 CV_IPP_CHECK()
1087 {
1088 Ipp32s bufsize = 0;
1089 IppiSize roisize = { img.cols, img.rows };
1090 IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s;
1091
1092 if (!__buf.needed() && (type == CV_8UC1 || type == CV_16SC1))
1093 {
1094 IppStatus status = ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize);
1095 Ipp8u * buffer = ippsMalloc_8u(bufsize);
1096
1097 if ((int)status >= 0)
1098 {
1099 if (type == CV_8UC1)
1100 status = ippiMarkSpeckles_8u_C1IR(img.ptr<Ipp8u>(), (int)img.step, roisize,
1101 (Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, buffer);
1102 else
1103 status = ippiMarkSpeckles_16s_C1IR(img.ptr<Ipp16s>(), (int)img.step, roisize,
1104 (Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, buffer);
1105 }
1106
1107 if (status >= 0)
1108 {
1109 CV_IMPL_ADD(CV_IMPL_IPP);
1110 return;
1111 }
1112 setIppErrorStatus();
1113 }
1114 }
1115 #endif
1116
1117 if (type == CV_8UC1)
1118 filterSpecklesImpl<uchar>(img, newVal, maxSpeckleSize, maxDiff, _buf);
1119 else
1120 filterSpecklesImpl<short>(img, newVal, maxSpeckleSize, maxDiff, _buf);
1121 }
1122
validateDisparity(InputOutputArray _disp,InputArray _cost,int minDisparity,int numberOfDisparities,int disp12MaxDiff)1123 void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity,
1124 int numberOfDisparities, int disp12MaxDiff )
1125 {
1126 Mat disp = _disp.getMat(), cost = _cost.getMat();
1127 int cols = disp.cols, rows = disp.rows;
1128 int minD = minDisparity, maxD = minDisparity + numberOfDisparities;
1129 int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0);
1130 AutoBuffer<int> _disp2buf(cols*2);
1131 int* disp2buf = _disp2buf;
1132 int* disp2cost = disp2buf + cols;
1133 const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;
1134 int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
1135 int costType = cost.type();
1136
1137 disp12MaxDiff *= DISP_SCALE;
1138
1139 CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S &&
1140 (costType == CV_16S || costType == CV_32S) &&
1141 disp.size() == cost.size() );
1142
1143 for( int y = 0; y < rows; y++ )
1144 {
1145 short* dptr = disp.ptr<short>(y);
1146
1147 for( x = 0; x < cols; x++ )
1148 {
1149 disp2buf[x] = INVALID_DISP_SCALED;
1150 disp2cost[x] = INT_MAX;
1151 }
1152
1153 if( costType == CV_16S )
1154 {
1155 const short* cptr = cost.ptr<short>(y);
1156
1157 for( x = minX1; x < maxX1; x++ )
1158 {
1159 int d = dptr[x], c = cptr[x];
1160 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1161
1162 if( disp2cost[x2] > c )
1163 {
1164 disp2cost[x2] = c;
1165 disp2buf[x2] = d;
1166 }
1167 }
1168 }
1169 else
1170 {
1171 const int* cptr = cost.ptr<int>(y);
1172
1173 for( x = minX1; x < maxX1; x++ )
1174 {
1175 int d = dptr[x], c = cptr[x];
1176 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1177
1178 if( disp2cost[x2] < c )
1179 {
1180 disp2cost[x2] = c;
1181 disp2buf[x2] = d;
1182 }
1183 }
1184 }
1185
1186 for( x = minX1; x < maxX1; x++ )
1187 {
1188 // we round the computed disparity both towards -inf and +inf and check
1189 // if either of the corresponding disparities in disp2 is consistent.
1190 // This is to give the computed disparity a chance to look valid if it is.
1191 int d = dptr[x];
1192 if( d == INVALID_DISP_SCALED )
1193 continue;
1194 int d0 = d >> DISP_SHIFT;
1195 int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT;
1196 int x0 = x - d0, x1 = x - d1;
1197 if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) &&
1198 (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) )
1199 dptr[x] = (short)INVALID_DISP_SCALED;
1200 }
1201 }
1202 }
1203