1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
21 //
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
25 //
26 // * The name of Intel Corporation may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "_cv.h"
43
44 /*
45 * This file includes the code, contributed by Simon Perreault
46 * (the function icvMedianBlur_8u_CnR_O1)
47 *
48 * Constant-time median filtering -- http://nomis80.org/ctmf.html
49 * Copyright (C) 2006 Simon Perreault
50 *
51 * Contact:
52 * Laboratoire de vision et systemes numeriques
53 * Pavillon Adrien-Pouliot
54 * Universite Laval
55 * Sainte-Foy, Quebec, Canada
56 * G1K 7P4
57 *
58 * perreaul@gel.ulaval.ca
59 */
60
61 // uncomment the line below to force SSE2 mode
62 //#define CV_SSE2 1
63
64 /****************************************************************************************\
65 Box Filter
66 \****************************************************************************************/
67
68 static void icvSumRow_8u32s( const uchar* src0, int* dst, void* params );
69 static void icvSumRow_32f64f( const float* src0, double* dst, void* params );
70 static void icvSumCol_32s8u( const int** src, uchar* dst, int dst_step,
71 int count, void* params );
72 static void icvSumCol_32s16s( const int** src, short* dst, int dst_step,
73 int count, void* params );
74 static void icvSumCol_32s32s( const int** src, int* dst, int dst_step,
75 int count, void* params );
76 static void icvSumCol_64f32f( const double** src, float* dst, int dst_step,
77 int count, void* params );
78
CvBoxFilter()79 CvBoxFilter::CvBoxFilter()
80 {
81 min_depth = CV_32S;
82 sum = 0;
83 sum_count = 0;
84 normalized = false;
85 }
86
87
CvBoxFilter(int _max_width,int _src_type,int _dst_type,bool _normalized,CvSize _ksize,CvPoint _anchor,int _border_mode,CvScalar _border_value)88 CvBoxFilter::CvBoxFilter( int _max_width, int _src_type, int _dst_type,
89 bool _normalized, CvSize _ksize,
90 CvPoint _anchor, int _border_mode,
91 CvScalar _border_value )
92 {
93 min_depth = CV_32S;
94 sum = 0;
95 sum_count = 0;
96 normalized = false;
97 init( _max_width, _src_type, _dst_type, _normalized,
98 _ksize, _anchor, _border_mode, _border_value );
99 }
100
101
~CvBoxFilter()102 CvBoxFilter::~CvBoxFilter()
103 {
104 clear();
105 }
106
107
init(int _max_width,int _src_type,int _dst_type,bool _normalized,CvSize _ksize,CvPoint _anchor,int _border_mode,CvScalar _border_value)108 void CvBoxFilter::init( int _max_width, int _src_type, int _dst_type,
109 bool _normalized, CvSize _ksize,
110 CvPoint _anchor, int _border_mode,
111 CvScalar _border_value )
112 {
113 CV_FUNCNAME( "CvBoxFilter::init" );
114
115 __BEGIN__;
116
117 sum = 0;
118 normalized = _normalized;
119
120 if( (normalized && CV_MAT_TYPE(_src_type) != CV_MAT_TYPE(_dst_type)) ||
121 (!normalized && CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type)))
122 CV_ERROR( CV_StsUnmatchedFormats,
123 "In case of normalized box filter input and output must have the same type.\n"
124 "In case of unnormalized box filter the number of input and output channels must be the same" );
125
126 min_depth = CV_MAT_DEPTH(_src_type) == CV_8U ? CV_32S : CV_64F;
127
128 CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize,
129 _anchor, _border_mode, _border_value );
130
131 scale = normalized ? 1./(ksize.width*ksize.height) : 1;
132
133 if( CV_MAT_DEPTH(src_type) == CV_8U )
134 x_func = (CvRowFilterFunc)icvSumRow_8u32s;
135 else if( CV_MAT_DEPTH(src_type) == CV_32F )
136 x_func = (CvRowFilterFunc)icvSumRow_32f64f;
137 else
138 CV_ERROR( CV_StsUnsupportedFormat, "Unknown/unsupported input image format" );
139
140 if( CV_MAT_DEPTH(dst_type) == CV_8U )
141 {
142 if( !normalized )
143 CV_ERROR( CV_StsBadArg, "Only normalized box filter can be used for 8u->8u transformation" );
144 y_func = (CvColumnFilterFunc)icvSumCol_32s8u;
145 }
146 else if( CV_MAT_DEPTH(dst_type) == CV_16S )
147 {
148 if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
149 CV_ERROR( CV_StsBadArg, "Only 8u->16s unnormalized box filter is supported in case of 16s output" );
150 y_func = (CvColumnFilterFunc)icvSumCol_32s16s;
151 }
152 else if( CV_MAT_DEPTH(dst_type) == CV_32S )
153 {
154 if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
155 CV_ERROR( CV_StsBadArg, "Only 8u->32s unnormalized box filter is supported in case of 32s output");
156
157 y_func = (CvColumnFilterFunc)icvSumCol_32s32s;
158 }
159 else if( CV_MAT_DEPTH(dst_type) == CV_32F )
160 {
161 if( CV_MAT_DEPTH(src_type) != CV_32F )
162 CV_ERROR( CV_StsBadArg, "Only 32f->32f box filter (normalized or not) is supported in case of 32f output" );
163 y_func = (CvColumnFilterFunc)icvSumCol_64f32f;
164 }
165 else{
166 CV_ERROR( CV_StsBadArg, "Unknown/unsupported destination image format" );
167 }
168
169 __END__;
170 }
171
172
start_process(CvSlice x_range,int width)173 void CvBoxFilter::start_process( CvSlice x_range, int width )
174 {
175 CvBaseImageFilter::start_process( x_range, width );
176 int i, psz = CV_ELEM_SIZE(work_type);
177 uchar* s;
178 buf_end -= buf_step;
179 buf_max_count--;
180 assert( buf_max_count >= max_ky*2 + 1 );
181 s = sum = buf_end + cvAlign((width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN);
182 sum_count = 0;
183
184 width *= psz;
185 for( i = 0; i < width; i++ )
186 s[i] = (uchar)0;
187 }
188
189
190 static void
icvSumRow_8u32s(const uchar * src,int * dst,void * params)191 icvSumRow_8u32s( const uchar* src, int* dst, void* params )
192 {
193 const CvBoxFilter* state = (const CvBoxFilter*)params;
194 int ksize = state->get_kernel_size().width;
195 int width = state->get_width();
196 int cn = CV_MAT_CN(state->get_src_type());
197 int i, k;
198
199 width = (width - 1)*cn; ksize *= cn;
200
201 for( k = 0; k < cn; k++, src++, dst++ )
202 {
203 int s = 0;
204 for( i = 0; i < ksize; i += cn )
205 s += src[i];
206 dst[0] = s;
207 for( i = 0; i < width; i += cn )
208 {
209 s += src[i+ksize] - src[i];
210 dst[i+cn] = s;
211 }
212 }
213 }
214
215
216 static void
icvSumRow_32f64f(const float * src,double * dst,void * params)217 icvSumRow_32f64f( const float* src, double* dst, void* params )
218 {
219 const CvBoxFilter* state = (const CvBoxFilter*)params;
220 int ksize = state->get_kernel_size().width;
221 int width = state->get_width();
222 int cn = CV_MAT_CN(state->get_src_type());
223 int i, k;
224
225 width = (width - 1)*cn; ksize *= cn;
226
227 for( k = 0; k < cn; k++, src++, dst++ )
228 {
229 double s = 0;
230 for( i = 0; i < ksize; i += cn )
231 s += src[i];
232 dst[0] = s;
233 for( i = 0; i < width; i += cn )
234 {
235 s += (double)src[i+ksize] - src[i];
236 dst[i+cn] = s;
237 }
238 }
239 }
240
241
242 static void
icvSumCol_32s8u(const int ** src,uchar * dst,int dst_step,int count,void * params)243 icvSumCol_32s8u( const int** src, uchar* dst,
244 int dst_step, int count, void* params )
245 {
246 #define BLUR_SHIFT 24
247 CvBoxFilter* state = (CvBoxFilter*)params;
248 int ksize = state->get_kernel_size().height;
249 int i, width = state->get_width();
250 int cn = CV_MAT_CN(state->get_src_type());
251 double scale = state->get_scale();
252 int iscale = cvFloor(scale*(1 << BLUR_SHIFT));
253 int* sum = (int*)state->get_sum_buf();
254 int* _sum_count = state->get_sum_count_ptr();
255 int sum_count = *_sum_count;
256
257 width *= cn;
258 src += sum_count;
259 count += ksize - 1 - sum_count;
260
261 for( ; count--; src++ )
262 {
263 const int* sp = src[0];
264 if( sum_count+1 < ksize )
265 {
266 for( i = 0; i <= width - 2; i += 2 )
267 {
268 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
269 sum[i] = s0; sum[i+1] = s1;
270 }
271
272 for( ; i < width; i++ )
273 sum[i] += sp[i];
274
275 sum_count++;
276 }
277 else
278 {
279 const int* sm = src[-ksize+1];
280 for( i = 0; i <= width - 2; i += 2 )
281 {
282 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
283 int t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT), t1 = CV_DESCALE(s1*iscale, BLUR_SHIFT);
284 s0 -= sm[i]; s1 -= sm[i+1];
285 sum[i] = s0; sum[i+1] = s1;
286 dst[i] = (uchar)t0; dst[i+1] = (uchar)t1;
287 }
288
289 for( ; i < width; i++ )
290 {
291 int s0 = sum[i] + sp[i], t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT);
292 sum[i] = s0 - sm[i]; dst[i] = (uchar)t0;
293 }
294 dst += dst_step;
295 }
296 }
297
298 *_sum_count = sum_count;
299 #undef BLUR_SHIFT
300 }
301
302
303 static void
icvSumCol_32s16s(const int ** src,short * dst,int dst_step,int count,void * params)304 icvSumCol_32s16s( const int** src, short* dst,
305 int dst_step, int count, void* params )
306 {
307 CvBoxFilter* state = (CvBoxFilter*)params;
308 int ksize = state->get_kernel_size().height;
309 int ktotal = ksize*state->get_kernel_size().width;
310 int i, width = state->get_width();
311 int cn = CV_MAT_CN(state->get_src_type());
312 int* sum = (int*)state->get_sum_buf();
313 int* _sum_count = state->get_sum_count_ptr();
314 int sum_count = *_sum_count;
315
316 dst_step /= sizeof(dst[0]);
317 width *= cn;
318 src += sum_count;
319 count += ksize - 1 - sum_count;
320
321 for( ; count--; src++ )
322 {
323 const int* sp = src[0];
324 if( sum_count+1 < ksize )
325 {
326 for( i = 0; i <= width - 2; i += 2 )
327 {
328 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
329 sum[i] = s0; sum[i+1] = s1;
330 }
331
332 for( ; i < width; i++ )
333 sum[i] += sp[i];
334
335 sum_count++;
336 }
337 else if( ktotal < 128 )
338 {
339 const int* sm = src[-ksize+1];
340 for( i = 0; i <= width - 2; i += 2 )
341 {
342 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
343 dst[i] = (short)s0; dst[i+1] = (short)s1;
344 s0 -= sm[i]; s1 -= sm[i+1];
345 sum[i] = s0; sum[i+1] = s1;
346 }
347
348 for( ; i < width; i++ )
349 {
350 int s0 = sum[i] + sp[i];
351 dst[i] = (short)s0;
352 sum[i] = s0 - sm[i];
353 }
354 dst += dst_step;
355 }
356 else
357 {
358 const int* sm = src[-ksize+1];
359 for( i = 0; i <= width - 2; i += 2 )
360 {
361 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
362 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
363 s0 -= sm[i]; s1 -= sm[i+1];
364 sum[i] = s0; sum[i+1] = s1;
365 }
366
367 for( ; i < width; i++ )
368 {
369 int s0 = sum[i] + sp[i];
370 dst[i] = CV_CAST_16S(s0);
371 sum[i] = s0 - sm[i];
372 }
373 dst += dst_step;
374 }
375 }
376
377 *_sum_count = sum_count;
378 }
379
380 static void
icvSumCol_32s32s(const int ** src,int * dst,int dst_step,int count,void * params)381 icvSumCol_32s32s( const int** src, int * dst,
382 int dst_step, int count, void* params )
383 {
384 CvBoxFilter* state = (CvBoxFilter*)params;
385 int ksize = state->get_kernel_size().height;
386 int i, width = state->get_width();
387 int cn = CV_MAT_CN(state->get_src_type());
388 int* sum = (int*)state->get_sum_buf();
389 int* _sum_count = state->get_sum_count_ptr();
390 int sum_count = *_sum_count;
391
392 dst_step /= sizeof(dst[0]);
393 width *= cn;
394 src += sum_count;
395 count += ksize - 1 - sum_count;
396
397 for( ; count--; src++ )
398 {
399 const int* sp = src[0];
400 if( sum_count+1 < ksize )
401 {
402 for( i = 0; i <= width - 2; i += 2 )
403 {
404 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
405 sum[i] = s0; sum[i+1] = s1;
406 }
407
408 for( ; i < width; i++ )
409 sum[i] += sp[i];
410
411 sum_count++;
412 }
413 else
414 {
415 const int* sm = src[-ksize+1];
416 for( i = 0; i <= width - 2; i += 2 )
417 {
418 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
419 dst[i] = s0; dst[i+1] = s1;
420 s0 -= sm[i]; s1 -= sm[i+1];
421 sum[i] = s0; sum[i+1] = s1;
422 }
423
424 for( ; i < width; i++ )
425 {
426 int s0 = sum[i] + sp[i];
427 dst[i] = s0;
428 sum[i] = s0 - sm[i];
429 }
430 dst += dst_step;
431 }
432 }
433
434 *_sum_count = sum_count;
435 }
436
437
438 static void
icvSumCol_64f32f(const double ** src,float * dst,int dst_step,int count,void * params)439 icvSumCol_64f32f( const double** src, float* dst,
440 int dst_step, int count, void* params )
441 {
442 CvBoxFilter* state = (CvBoxFilter*)params;
443 int ksize = state->get_kernel_size().height;
444 int i, width = state->get_width();
445 int cn = CV_MAT_CN(state->get_src_type());
446 double scale = state->get_scale();
447 bool normalized = state->is_normalized();
448 double* sum = (double*)state->get_sum_buf();
449 int* _sum_count = state->get_sum_count_ptr();
450 int sum_count = *_sum_count;
451
452 dst_step /= sizeof(dst[0]);
453 width *= cn;
454 src += sum_count;
455 count += ksize - 1 - sum_count;
456
457 for( ; count--; src++ )
458 {
459 const double* sp = src[0];
460 if( sum_count+1 < ksize )
461 {
462 for( i = 0; i <= width - 2; i += 2 )
463 {
464 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
465 sum[i] = s0; sum[i+1] = s1;
466 }
467
468 for( ; i < width; i++ )
469 sum[i] += sp[i];
470
471 sum_count++;
472 }
473 else
474 {
475 const double* sm = src[-ksize+1];
476 if( normalized )
477 for( i = 0; i <= width - 2; i += 2 )
478 {
479 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
480 double t0 = s0*scale, t1 = s1*scale;
481 s0 -= sm[i]; s1 -= sm[i+1];
482 dst[i] = (float)t0; dst[i+1] = (float)t1;
483 sum[i] = s0; sum[i+1] = s1;
484 }
485 else
486 for( i = 0; i <= width - 2; i += 2 )
487 {
488 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
489 dst[i] = (float)s0; dst[i+1] = (float)s1;
490 s0 -= sm[i]; s1 -= sm[i+1];
491 sum[i] = s0; sum[i+1] = s1;
492 }
493
494 for( ; i < width; i++ )
495 {
496 double s0 = sum[i] + sp[i], t0 = s0*scale;
497 sum[i] = s0 - sm[i]; dst[i] = (float)t0;
498 }
499 dst += dst_step;
500 }
501 }
502
503 *_sum_count = sum_count;
504 }
505
506
507 /****************************************************************************************\
508 Median Filter
509 \****************************************************************************************/
510
511 #define CV_MINMAX_8U(a,b) \
512 (t = CV_FAST_CAST_8U((a) - (b)), (b) += t, a -= t)
513
514 #if CV_SSE2 && !defined __SSE2__
515 #define __SSE2__ 1
516 #include "emmintrin.h"
517 #endif
518
519 #if defined(__VEC__) || defined(__ALTIVEC__)
520 #include <altivec.h>
521 #undef bool
522 #endif
523
524 #if defined(__GNUC__)
525 #define align(x) __attribute__ ((aligned (x)))
526 #elif CV_SSE2 && (defined(__ICL) || (_MSC_VER >= 1300))
527 #define align(x) __declspec(align(x))
528 #else
529 #define align(x)
530 #endif
531
532 #if _MSC_VER >= 1200
533 #pragma warning( disable: 4244 )
534 #endif
535
536 /**
537 * This structure represents a two-tier histogram. The first tier (known as the
538 * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level)
539 * is 8 bit wide. Pixels inserted in the fine level also get inserted into the
540 * coarse bucket designated by the 4 MSBs of the fine bucket value.
541 *
542 * The structure is aligned on 16 bits, which is a prerequisite for SIMD
543 * instructions. Each bucket is 16 bit wide, which means that extra care must be
544 * taken to prevent overflow.
545 */
546 typedef struct align(16)
547 {
548 ushort coarse[16];
549 ushort fine[16][16];
550 } Histogram;
551
552 /**
553 * HOP is short for Histogram OPeration. This macro makes an operation \a op on
554 * histogram \a h for pixel value \a x. It takes care of handling both levels.
555 */
556 #define HOP(h,x,op) \
557 h.coarse[x>>4] op; \
558 *((ushort*) h.fine + x) op;
559
560 #define COP(c,j,x,op) \
561 h_coarse[ 16*(n*c+j) + (x>>4) ] op; \
562 h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op;
563
564 #if defined __SSE2__ || defined __MMX__ || defined __ALTIVEC__
565 #define MEDIAN_HAVE_SIMD 1
566 #else
567 #define MEDIAN_HAVE_SIMD 0
568 #endif
569
570 /**
571 * Adds histograms \a x and \a y and stores the result in \a y. Makes use of
572 * SSE2, MMX or Altivec, if available.
573 */
574 #if defined(__SSE2__)
histogram_add(const ushort x[16],ushort y[16])575 static inline void histogram_add( const ushort x[16], ushort y[16] )
576 {
577 _mm_store_si128( (__m128i*) &y[0], _mm_add_epi16(
578 _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
579 _mm_store_si128( (__m128i*) &y[8], _mm_add_epi16(
580 _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
581 }
582 #elif defined(__MMX__)
histogram_add(const ushort x[16],ushort y[16])583 static inline void histogram_add( const ushort x[16], ushort y[16] )
584 {
585 *(__m64*) &y[0] = _mm_add_pi16( *(__m64*) &y[0], *(__m64*) &x[0] );
586 *(__m64*) &y[4] = _mm_add_pi16( *(__m64*) &y[4], *(__m64*) &x[4] );
587 *(__m64*) &y[8] = _mm_add_pi16( *(__m64*) &y[8], *(__m64*) &x[8] );
588 *(__m64*) &y[12] = _mm_add_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
589 }
590 #elif defined(__ALTIVEC__)
histogram_add(const ushort x[16],ushort y[16])591 static inline void histogram_add( const ushort x[16], ushort y[16] )
592 {
593 *(vector ushort*) &y[0] = vec_add( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
594 *(vector ushort*) &y[8] = vec_add( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
595 }
596 #else
histogram_add(const ushort x[16],ushort y[16])597 static inline void histogram_add( const ushort x[16], ushort y[16] )
598 {
599 int i;
600 for( i = 0; i < 16; ++i )
601 y[i] = (ushort)(y[i] + x[i]);
602 }
603 #endif
604
605 /**
606 * Subtracts histogram \a x from \a y and stores the result in \a y. Makes use
607 * of SSE2, MMX or Altivec, if available.
608 */
609 #if defined(__SSE2__)
histogram_sub(const ushort x[16],ushort y[16])610 static inline void histogram_sub( const ushort x[16], ushort y[16] )
611 {
612 _mm_store_si128( (__m128i*) &y[0], _mm_sub_epi16(
613 _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
614 _mm_store_si128( (__m128i*) &y[8], _mm_sub_epi16(
615 _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
616 }
617 #elif defined(__MMX__)
histogram_sub(const ushort x[16],ushort y[16])618 static inline void histogram_sub( const ushort x[16], ushort y[16] )
619 {
620 *(__m64*) &y[0] = _mm_sub_pi16( *(__m64*) &y[0], *(__m64*) &x[0] );
621 *(__m64*) &y[4] = _mm_sub_pi16( *(__m64*) &y[4], *(__m64*) &x[4] );
622 *(__m64*) &y[8] = _mm_sub_pi16( *(__m64*) &y[8], *(__m64*) &x[8] );
623 *(__m64*) &y[12] = _mm_sub_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
624 }
625 #elif defined(__ALTIVEC__)
histogram_sub(const ushort x[16],ushort y[16])626 static inline void histogram_sub( const ushort x[16], ushort y[16] )
627 {
628 *(vector ushort*) &y[0] = vec_sub( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
629 *(vector ushort*) &y[8] = vec_sub( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
630 }
631 #else
histogram_sub(const ushort x[16],ushort y[16])632 static inline void histogram_sub( const ushort x[16], ushort y[16] )
633 {
634 int i;
635 for( i = 0; i < 16; ++i )
636 y[i] = (ushort)(y[i] - x[i]);
637 }
638 #endif
639
histogram_muladd(int a,const ushort x[16],ushort y[16])640 static inline void histogram_muladd( int a, const ushort x[16],
641 ushort y[16] )
642 {
643 int i;
644 for ( i = 0; i < 16; ++i )
645 y[i] = (ushort)(y[i] + a * x[i]);
646 }
647
648 static CvStatus CV_STDCALL
icvMedianBlur_8u_CnR_O1(uchar * src,int src_step,uchar * dst,int dst_step,CvSize size,int kernel_size,int cn,int pad_left,int pad_right)649 icvMedianBlur_8u_CnR_O1( uchar* src, int src_step, uchar* dst, int dst_step,
650 CvSize size, int kernel_size, int cn, int pad_left, int pad_right )
651 {
652 int r = (kernel_size-1)/2;
653 const int m = size.height, n = size.width;
654 int i, j, k, c;
655 const unsigned char *p, *q;
656 Histogram H[4];
657 ushort *h_coarse, *h_fine, luc[4][16];
658
659 if( size.height < r || size.width < r )
660 return CV_BADSIZE_ERR;
661
662 assert( src );
663 assert( dst );
664 assert( r >= 0 );
665 assert( size.width >= 2*r+1 );
666 assert( size.height >= 2*r+1 );
667 assert( src_step != 0 );
668 assert( dst_step != 0 );
669
670 h_coarse = (ushort*) cvAlloc( 1 * 16 * n * cn * sizeof(ushort) );
671 h_fine = (ushort*) cvAlloc( 16 * 16 * n * cn * sizeof(ushort) );
672 memset( h_coarse, 0, 1 * 16 * n * cn * sizeof(ushort) );
673 memset( h_fine, 0, 16 * 16 * n * cn * sizeof(ushort) );
674
675 /* First row initialization */
676 for ( j = 0; j < n; ++j ) {
677 for ( c = 0; c < cn; ++c ) {
678 COP( c, j, src[cn*j+c], += r+1 );
679 }
680 }
681 for ( i = 0; i < r; ++i ) {
682 for ( j = 0; j < n; ++j ) {
683 for ( c = 0; c < cn; ++c ) {
684 COP( c, j, src[src_step*i+cn*j+c], ++ );
685 }
686 }
687 }
688
689 for ( i = 0; i < m; ++i ) {
690
691 /* Update column histograms for entire row. */
692 p = src + src_step * MAX( 0, i-r-1 );
693 q = p + cn * n;
694 for ( j = 0; p != q; ++j ) {
695 for ( c = 0; c < cn; ++c, ++p ) {
696 COP( c, j, *p, -- );
697 }
698 }
699
700 p = src + src_step * MIN( m-1, i+r );
701 q = p + cn * n;
702 for ( j = 0; p != q; ++j ) {
703 for ( c = 0; c < cn; ++c, ++p ) {
704 COP( c, j, *p, ++ );
705 }
706 }
707
708 /* First column initialization */
709 memset( H, 0, cn*sizeof(H[0]) );
710 memset( luc, 0, cn*sizeof(luc[0]) );
711 if ( pad_left ) {
712 for ( c = 0; c < cn; ++c ) {
713 histogram_muladd( r, &h_coarse[16*n*c], H[c].coarse );
714 }
715 }
716 for ( j = 0; j < (pad_left ? r : 2*r); ++j ) {
717 for ( c = 0; c < cn; ++c ) {
718 histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse );
719 }
720 }
721 for ( c = 0; c < cn; ++c ) {
722 for ( k = 0; k < 16; ++k ) {
723 histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] );
724 }
725 }
726
727 for ( j = pad_left ? 0 : r; j < (pad_right ? n : n-r); ++j ) {
728 for ( c = 0; c < cn; ++c ) {
729 int t = 2*r*r + 2*r, b, sum = 0;
730 ushort* segment;
731
732 histogram_add( &h_coarse[16*(n*c + MIN(j+r,n-1))], H[c].coarse );
733
734 /* Find median at coarse level */
735 for ( k = 0; k < 16 ; ++k ) {
736 sum += H[c].coarse[k];
737 if ( sum > t ) {
738 sum -= H[c].coarse[k];
739 break;
740 }
741 }
742 assert( k < 16 );
743
744 /* Update corresponding histogram segment */
745 if ( luc[c][k] <= j-r ) {
746 memset( &H[c].fine[k], 0, 16 * sizeof(ushort) );
747 for ( luc[c][k] = j-r; luc[c][k] < MIN(j+r+1,n); ++luc[c][k] ) {
748 histogram_add( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] );
749 }
750 if ( luc[c][k] < j+r+1 ) {
751 histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] );
752 luc[c][k] = (ushort)(j+r+1);
753 }
754 }
755 else {
756 for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) {
757 histogram_sub( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] );
758 histogram_add( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] );
759 }
760 }
761
762 histogram_sub( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse );
763
764 /* Find median in segment */
765 segment = H[c].fine[k];
766 for ( b = 0; b < 16 ; ++b ) {
767 sum += segment[b];
768 if ( sum > t ) {
769 dst[dst_step*i+cn*j+c] = (uchar)(16*k + b);
770 break;
771 }
772 }
773 assert( b < 16 );
774 }
775 }
776 }
777
778 #if defined(__MMX__)
779 _mm_empty();
780 #endif
781
782 cvFree(&h_coarse);
783 cvFree(&h_fine);
784
785 #undef HOP
786 #undef COP
787 return CV_OK;
788 }
789
790
791 #if _MSC_VER >= 1200
792 #pragma warning( default: 4244 )
793 #endif
794
795
796 static CvStatus CV_STDCALL
icvMedianBlur_8u_CnR_Om(uchar * src,int src_step,uchar * dst,int dst_step,CvSize size,int m,int cn)797 icvMedianBlur_8u_CnR_Om( uchar* src, int src_step, uchar* dst, int dst_step,
798 CvSize size, int m, int cn )
799 {
800 #define N 16
801 int zone0[4][N];
802 int zone1[4][N*N];
803 int x, y;
804 int n2 = m*m/2;
805 int nx = (m + 1)/2 - 1;
806 uchar* src_max = src + size.height*src_step;
807 uchar* src_right = src + size.width*cn;
808
809 #define UPDATE_ACC01( pix, cn, op ) \
810 { \
811 int p = (pix); \
812 zone1[cn][p] op; \
813 zone0[cn][p >> 4] op; \
814 }
815
816 if( size.height < nx || size.width < nx )
817 return CV_BADSIZE_ERR;
818
819 if( m == 3 )
820 {
821 size.width *= cn;
822
823 for( y = 0; y < size.height; y++, dst += dst_step )
824 {
825 const uchar* src0 = src + src_step*(y-1);
826 const uchar* src1 = src0 + src_step;
827 const uchar* src2 = src1 + src_step;
828 if( y == 0 )
829 src0 = src1;
830 else if( y == size.height - 1 )
831 src2 = src1;
832
833 for( x = 0; x < 2*cn; x++ )
834 {
835 int x0 = x < cn ? x : size.width - 3*cn + x;
836 int x2 = x < cn ? x + cn : size.width - 2*cn + x;
837 int x1 = x < cn ? x0 : x2, t;
838
839 int p0 = src0[x0], p1 = src0[x1], p2 = src0[x2];
840 int p3 = src1[x0], p4 = src1[x1], p5 = src1[x2];
841 int p6 = src2[x0], p7 = src2[x1], p8 = src2[x2];
842
843 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
844 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
845 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
846 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
847 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
848 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
849 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
850 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
851 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
852 CV_MINMAX_8U(p4, p2);
853 dst[x1] = (uchar)p4;
854 }
855
856 for( x = cn; x < size.width - cn; x++ )
857 {
858 int p0 = src0[x-cn], p1 = src0[x], p2 = src0[x+cn];
859 int p3 = src1[x-cn], p4 = src1[x], p5 = src1[x+cn];
860 int p6 = src2[x-cn], p7 = src2[x], p8 = src2[x+cn];
861 int t;
862
863 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
864 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
865 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
866 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
867 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
868 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
869 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
870 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
871 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
872 CV_MINMAX_8U(p4, p2);
873
874 dst[x] = (uchar)p4;
875 }
876 }
877
878 return CV_OK;
879 }
880
881 for( x = 0; x < size.width; x++, dst += cn )
882 {
883 uchar* dst_cur = dst;
884 uchar* src_top = src;
885 uchar* src_bottom = src;
886 int k, c;
887 int x0 = -1;
888 int src_step1 = src_step, dst_step1 = dst_step;
889
890 if( x % 2 != 0 )
891 {
892 src_bottom = src_top += src_step*(size.height-1);
893 dst_cur += dst_step*(size.height-1);
894 src_step1 = -src_step1;
895 dst_step1 = -dst_step1;
896 }
897
898 if( x <= m/2 )
899 nx++;
900
901 if( nx < m )
902 x0 = x < m/2 ? 0 : (nx-1)*cn;
903
904 // init accumulator
905 memset( zone0, 0, sizeof(zone0[0])*cn );
906 memset( zone1, 0, sizeof(zone1[0])*cn );
907
908 for( y = 0; y <= m/2; y++ )
909 {
910 for( c = 0; c < cn; c++ )
911 {
912 if( y > 0 )
913 {
914 if( x0 >= 0 )
915 UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
916 for( k = 0; k < nx*cn; k += cn )
917 UPDATE_ACC01( src_bottom[k+c], c, ++ );
918 }
919 else
920 {
921 if( x0 >= 0 )
922 UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx)*(m/2+1) );
923 for( k = 0; k < nx*cn; k += cn )
924 UPDATE_ACC01( src_bottom[k+c], c, += m/2+1 );
925 }
926 }
927
928 if( (src_step1 > 0 && y < size.height-1) ||
929 (src_step1 < 0 && size.height-y-1 > 0) )
930 src_bottom += src_step1;
931 }
932
933 for( y = 0; y < size.height; y++, dst_cur += dst_step1 )
934 {
935 // find median
936 for( c = 0; c < cn; c++ )
937 {
938 int s = 0;
939 for( k = 0; ; k++ )
940 {
941 int t = s + zone0[c][k];
942 if( t > n2 ) break;
943 s = t;
944 }
945
946 for( k *= N; ;k++ )
947 {
948 s += zone1[c][k];
949 if( s > n2 ) break;
950 }
951
952 dst_cur[c] = (uchar)k;
953 }
954
955 if( y+1 == size.height )
956 break;
957
958 if( cn == 1 )
959 {
960 for( k = 0; k < nx; k++ )
961 {
962 int p = src_top[k];
963 int q = src_bottom[k];
964 zone1[0][p]--;
965 zone0[0][p>>4]--;
966 zone1[0][q]++;
967 zone0[0][q>>4]++;
968 }
969 }
970 else if( cn == 3 )
971 {
972 for( k = 0; k < nx*3; k += 3 )
973 {
974 UPDATE_ACC01( src_top[k], 0, -- );
975 UPDATE_ACC01( src_top[k+1], 1, -- );
976 UPDATE_ACC01( src_top[k+2], 2, -- );
977
978 UPDATE_ACC01( src_bottom[k], 0, ++ );
979 UPDATE_ACC01( src_bottom[k+1], 1, ++ );
980 UPDATE_ACC01( src_bottom[k+2], 2, ++ );
981 }
982 }
983 else
984 {
985 assert( cn == 4 );
986 for( k = 0; k < nx*4; k += 4 )
987 {
988 UPDATE_ACC01( src_top[k], 0, -- );
989 UPDATE_ACC01( src_top[k+1], 1, -- );
990 UPDATE_ACC01( src_top[k+2], 2, -- );
991 UPDATE_ACC01( src_top[k+3], 3, -- );
992
993 UPDATE_ACC01( src_bottom[k], 0, ++ );
994 UPDATE_ACC01( src_bottom[k+1], 1, ++ );
995 UPDATE_ACC01( src_bottom[k+2], 2, ++ );
996 UPDATE_ACC01( src_bottom[k+3], 3, ++ );
997 }
998 }
999
1000 if( x0 >= 0 )
1001 {
1002 for( c = 0; c < cn; c++ )
1003 {
1004 UPDATE_ACC01( src_top[x0+c], c, -= (m - nx) );
1005 UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
1006 }
1007 }
1008
1009 if( (src_step1 > 0 && src_bottom + src_step1 < src_max) ||
1010 (src_step1 < 0 && src_bottom + src_step1 >= src) )
1011 src_bottom += src_step1;
1012
1013 if( y >= m/2 )
1014 src_top += src_step1;
1015 }
1016
1017 if( x >= m/2 )
1018 src += cn;
1019 if( src + nx*cn > src_right ) nx--;
1020 }
1021 #undef N
1022 #undef UPDATE_ACC
1023 return CV_OK;
1024 }
1025
1026
1027 /****************************************************************************************\
1028 Bilateral Filtering
1029 \****************************************************************************************/
1030
1031 static void
icvBilateralFiltering_8u(const CvMat * src,CvMat * dst,int d,double sigma_color,double sigma_space)1032 icvBilateralFiltering_8u( const CvMat* src, CvMat* dst, int d,
1033 double sigma_color, double sigma_space )
1034 {
1035 CvMat* temp = 0;
1036 float* color_weight = 0;
1037 float* space_weight = 0;
1038 int* space_ofs = 0;
1039
1040 CV_FUNCNAME( "icvBilateralFiltering_8u" );
1041
1042 __BEGIN__;
1043
1044 double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1045 double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1046 int cn = CV_MAT_CN(src->type);
1047 int i, j, k, maxk, radius;
1048 CvSize size = cvGetMatSize(src);
1049
1050 if( (CV_MAT_TYPE(src->type) != CV_8UC1 &&
1051 CV_MAT_TYPE(src->type) != CV_8UC3) ||
1052 !CV_ARE_TYPES_EQ(src, dst) )
1053 CV_ERROR( CV_StsUnsupportedFormat,
1054 "Both source and destination must be 8-bit, single-channel or 3-channel images" );
1055
1056 if( sigma_color <= 0 )
1057 sigma_color = 1;
1058 if( sigma_space <= 0 )
1059 sigma_space = 1;
1060
1061 if( d == 0 )
1062 radius = cvRound(sigma_space*1.5);
1063 else
1064 radius = d/2;
1065 radius = MAX(radius, 1);
1066 d = radius*2 + 1;
1067
1068 CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1069 src->cols + radius*2, src->type ));
1070 CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1071 CV_CALL( color_weight = (float*)cvAlloc(cn*256*sizeof(color_weight[0])));
1072 CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1073 CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1074
1075 // initialize color-related bilateral filter coefficients
1076 for( i = 0; i < 256*cn; i++ )
1077 color_weight[i] = (float)exp(i*i*gauss_color_coeff);
1078
1079 // initialize space-related bilateral filter coefficients
1080 for( i = -radius, maxk = 0; i <= radius; i++ )
1081 for( j = -radius; j <= radius; j++ )
1082 {
1083 double r = sqrt((double)i*i + (double)j*j);
1084 if( r > radius )
1085 continue;
1086 space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1087 space_ofs[maxk++] = i*temp->step + j*cn;
1088 }
1089
1090 for( i = 0; i < size.height; i++ )
1091 {
1092 const uchar* sptr = temp->data.ptr + (i+radius)*temp->step + radius*cn;
1093 uchar* dptr = dst->data.ptr + i*dst->step;
1094
1095 if( cn == 1 )
1096 {
1097 for( j = 0; j < size.width; j++ )
1098 {
1099 float sum = 0, wsum = 0;
1100 int val0 = sptr[j];
1101 for( k = 0; k < maxk; k++ )
1102 {
1103 int val = sptr[j + space_ofs[k]];
1104 float w = space_weight[k]*color_weight[abs(val - val0)];
1105 sum += val*w;
1106 wsum += w;
1107 }
1108 // overflow is not possible here => there is no need to use CV_CAST_8U
1109 dptr[j] = (uchar)cvRound(sum/wsum);
1110 }
1111 }
1112 else
1113 {
1114 assert( cn == 3 );
1115 for( j = 0; j < size.width*3; j += 3 )
1116 {
1117 float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1118 int b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1119 for( k = 0; k < maxk; k++ )
1120 {
1121 const uchar* sptr_k = sptr + j + space_ofs[k];
1122 int b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1123 float w = space_weight[k]*color_weight[abs(b - b0) +
1124 abs(g - g0) + abs(r - r0)];
1125 sum_b += b*w; sum_g += g*w; sum_r += r*w;
1126 wsum += w;
1127 }
1128 wsum = 1.f/wsum;
1129 b0 = cvRound(sum_b*wsum);
1130 g0 = cvRound(sum_g*wsum);
1131 r0 = cvRound(sum_r*wsum);
1132 dptr[j] = (uchar)b0; dptr[j+1] = (uchar)g0; dptr[j+2] = (uchar)r0;
1133 }
1134 }
1135 }
1136
1137 __END__;
1138
1139 cvReleaseMat( &temp );
1140 cvFree( &color_weight );
1141 cvFree( &space_weight );
1142 cvFree( &space_ofs );
1143 }
1144
1145
icvBilateralFiltering_32f(const CvMat * src,CvMat * dst,int d,double sigma_color,double sigma_space)1146 static void icvBilateralFiltering_32f( const CvMat* src, CvMat* dst, int d,
1147 double sigma_color, double sigma_space )
1148 {
1149 CvMat* temp = 0;
1150 float* space_weight = 0;
1151 int* space_ofs = 0;
1152 float *expLUT = 0;
1153
1154 CV_FUNCNAME( "icvBilateralFiltering_32f" );
1155
1156 __BEGIN__;
1157
1158 double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1159 double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1160 int cn = CV_MAT_CN(src->type);
1161 int i, j, k, maxk, radius;
1162 double minValSrc=-1, maxValSrc=1;
1163 const int kExpNumBinsPerChannel = 1 << 12;
1164 int kExpNumBins = 0;
1165 float lastExpVal = 1.f;
1166 int temp_step;
1167 float len, scale_index;
1168 CvMat src_reshaped;
1169
1170 CvSize size = cvGetMatSize(src);
1171
1172 if( (CV_MAT_TYPE(src->type) != CV_32FC1 &&
1173 CV_MAT_TYPE(src->type) != CV_32FC3) ||
1174 !CV_ARE_TYPES_EQ(src, dst) )
1175 CV_ERROR( CV_StsUnsupportedFormat,
1176 "Both source and destination must be 32-bit float, single-channel or 3-channel images" );
1177
1178 if( sigma_color <= 0 )
1179 sigma_color = 1;
1180 if( sigma_space <= 0 )
1181 sigma_space = 1;
1182
1183 if( d == 0 )
1184 radius = cvRound(sigma_space*1.5);
1185 else
1186 radius = d/2;
1187 radius = MAX(radius, 1);
1188 d = radius*2 + 1;
1189 // compute the min/max range for the input image (even if multichannel)
1190
1191 CV_CALL( cvReshape( src, &src_reshaped, 1 ) );
1192 CV_CALL( cvMinMaxLoc(&src_reshaped, &minValSrc, &maxValSrc) );
1193
1194 // temporary copy of the image with borders for easy processing
1195 CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1196 src->cols + radius*2, src->type ));
1197 temp_step = temp->step/sizeof(float);
1198 CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1199 // allocate lookup tables
1200 CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1201 CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1202
1203 // assign a length which is slightly more than needed
1204 len = (float)(maxValSrc - minValSrc) * cn;
1205 kExpNumBins = kExpNumBinsPerChannel * cn;
1206 CV_CALL( expLUT = (float*)cvAlloc((kExpNumBins+2) * sizeof(expLUT[0])));
1207 scale_index = kExpNumBins/len;
1208
1209 // initialize the exp LUT
1210 for( i = 0; i < kExpNumBins+2; i++ )
1211 {
1212 if( lastExpVal > 0.f )
1213 {
1214 double val = i / scale_index;
1215 expLUT[i] = (float)exp(val * val * gauss_color_coeff);
1216 lastExpVal = expLUT[i];
1217 }
1218 else
1219 expLUT[i] = 0.f;
1220 }
1221
1222 // initialize space-related bilateral filter coefficients
1223 for( i = -radius, maxk = 0; i <= radius; i++ )
1224 for( j = -radius; j <= radius; j++ )
1225 {
1226 double r = sqrt((double)i*i + (double)j*j);
1227 if( r > radius )
1228 continue;
1229 space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1230 space_ofs[maxk++] = i*temp_step + j*cn;
1231 }
1232
1233 for( i = 0; i < size.height; i++ )
1234 {
1235 const float* sptr = temp->data.fl + (i+radius)*temp_step + radius*cn;
1236 float* dptr = (float*)(dst->data.ptr + i*dst->step);
1237
1238 if( cn == 1 )
1239 {
1240 for( j = 0; j < size.width; j++ )
1241 {
1242 float sum = 0, wsum = 0;
1243 float val0 = sptr[j];
1244 for( k = 0; k < maxk; k++ )
1245 {
1246 float val = sptr[j + space_ofs[k]];
1247 float alpha = (float)(fabs(val - val0)*scale_index);
1248 int idx = cvFloor(alpha);
1249 alpha -= idx;
1250 float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx]));
1251 sum += val*w;
1252 wsum += w;
1253 }
1254 dptr[j] = (float)(sum/wsum);
1255 }
1256 }
1257 else
1258 {
1259 assert( cn == 3 );
1260 for( j = 0; j < size.width*3; j += 3 )
1261 {
1262 float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1263 float b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1264 for( k = 0; k < maxk; k++ )
1265 {
1266 const float* sptr_k = sptr + j + space_ofs[k];
1267 float b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1268 float alpha = (float)((fabs(b - b0) + fabs(g - g0) + fabs(r - r0))*scale_index);
1269 int idx = cvFloor(alpha);
1270 alpha -= idx;
1271 float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx]));
1272 sum_b += b*w; sum_g += g*w; sum_r += r*w;
1273 wsum += w;
1274 }
1275 wsum = 1.f/wsum;
1276 b0 = sum_b*wsum;
1277 g0 = sum_g*wsum;
1278 r0 = sum_r*wsum;
1279 dptr[j] = b0; dptr[j+1] = g0; dptr[j+2] = r0;
1280 }
1281 }
1282 }
1283
1284 __END__;
1285
1286 cvReleaseMat( &temp );
1287 cvFree( &space_weight );
1288 cvFree( &space_ofs );
1289 cvFree( &expLUT );
1290 }
1291
1292 //////////////////////////////// IPP smoothing functions /////////////////////////////////
1293
1294 icvFilterMedian_8u_C1R_t icvFilterMedian_8u_C1R_p = 0;
1295 icvFilterMedian_8u_C3R_t icvFilterMedian_8u_C3R_p = 0;
1296 icvFilterMedian_8u_C4R_t icvFilterMedian_8u_C4R_p = 0;
1297
1298 icvFilterBox_8u_C1R_t icvFilterBox_8u_C1R_p = 0;
1299 icvFilterBox_8u_C3R_t icvFilterBox_8u_C3R_p = 0;
1300 icvFilterBox_8u_C4R_t icvFilterBox_8u_C4R_p = 0;
1301 icvFilterBox_32f_C1R_t icvFilterBox_32f_C1R_p = 0;
1302 icvFilterBox_32f_C3R_t icvFilterBox_32f_C3R_p = 0;
1303 icvFilterBox_32f_C4R_t icvFilterBox_32f_C4R_p = 0;
1304
1305 typedef CvStatus (CV_STDCALL * CvSmoothFixedIPPFunc)
1306 ( const void* src, int srcstep, void* dst, int dststep,
1307 CvSize size, CvSize ksize, CvPoint anchor );
1308
1309 //////////////////////////////////////////////////////////////////////////////////////////
1310
1311 CV_IMPL void
cvSmooth(const void * srcarr,void * dstarr,int smooth_type,int param1,int param2,double param3,double param4)1312 cvSmooth( const void* srcarr, void* dstarr, int smooth_type,
1313 int param1, int param2, double param3, double param4 )
1314 {
1315 CvBoxFilter box_filter;
1316 CvSepFilter gaussian_filter;
1317
1318 CvMat* temp = 0;
1319
1320 CV_FUNCNAME( "cvSmooth" );
1321
1322 __BEGIN__;
1323
1324 int coi1 = 0, coi2 = 0;
1325 CvMat srcstub, *src = (CvMat*)srcarr;
1326 CvMat dststub, *dst = (CvMat*)dstarr;
1327 CvSize size;
1328 int src_type, dst_type, depth, cn;
1329 double sigma1 = 0, sigma2 = 0;
1330 bool have_ipp = icvFilterMedian_8u_C1R_p != 0;
1331
1332 CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
1333 CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
1334
1335 if( coi1 != 0 || coi2 != 0 )
1336 CV_ERROR( CV_BadCOI, "" );
1337
1338 src_type = CV_MAT_TYPE( src->type );
1339 dst_type = CV_MAT_TYPE( dst->type );
1340 depth = CV_MAT_DEPTH(src_type);
1341 cn = CV_MAT_CN(src_type);
1342 size = cvGetMatSize(src);
1343
1344 if( !CV_ARE_SIZES_EQ( src, dst ))
1345 CV_ERROR( CV_StsUnmatchedSizes, "" );
1346
1347 if( smooth_type != CV_BLUR_NO_SCALE && !CV_ARE_TYPES_EQ( src, dst ))
1348 CV_ERROR( CV_StsUnmatchedFormats,
1349 "The specified smoothing algorithm requires input and ouput arrays be of the same type" );
1350
1351 if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ||
1352 smooth_type == CV_GAUSSIAN || smooth_type == CV_MEDIAN )
1353 {
1354 // automatic detection of kernel size from sigma
1355 if( smooth_type == CV_GAUSSIAN )
1356 {
1357 sigma1 = param3;
1358 sigma2 = param4 ? param4 : param3;
1359
1360 if( param1 == 0 && sigma1 > 0 )
1361 param1 = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1362 if( param2 == 0 && sigma2 > 0 )
1363 param2 = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1364 }
1365
1366 if( param2 == 0 )
1367 param2 = size.height == 1 ? 1 : param1;
1368 if( param1 < 1 || (param1 & 1) == 0 || param2 < 1 || (param2 & 1) == 0 )
1369 CV_ERROR( CV_StsOutOfRange,
1370 "Both mask width and height must be >=1 and odd" );
1371
1372 if( param1 == 1 && param2 == 1 )
1373 {
1374 cvConvert( src, dst );
1375 EXIT;
1376 }
1377 }
1378
1379 if( have_ipp && (smooth_type == CV_BLUR || (smooth_type == CV_MEDIAN && param1 <= 15)) &&
1380 size.width >= param1 && size.height >= param2 && param1 > 1 && param2 > 1 )
1381 {
1382 CvSmoothFixedIPPFunc ipp_median_box_func = 0;
1383
1384 if( smooth_type == CV_BLUR )
1385 {
1386 ipp_median_box_func =
1387 src_type == CV_8UC1 ? icvFilterBox_8u_C1R_p :
1388 src_type == CV_8UC3 ? icvFilterBox_8u_C3R_p :
1389 src_type == CV_8UC4 ? icvFilterBox_8u_C4R_p :
1390 src_type == CV_32FC1 ? icvFilterBox_32f_C1R_p :
1391 src_type == CV_32FC3 ? icvFilterBox_32f_C3R_p :
1392 src_type == CV_32FC4 ? icvFilterBox_32f_C4R_p : 0;
1393 }
1394 else if( smooth_type == CV_MEDIAN )
1395 {
1396 ipp_median_box_func =
1397 src_type == CV_8UC1 ? icvFilterMedian_8u_C1R_p :
1398 src_type == CV_8UC3 ? icvFilterMedian_8u_C3R_p :
1399 src_type == CV_8UC4 ? icvFilterMedian_8u_C4R_p : 0;
1400 }
1401
1402 if( ipp_median_box_func )
1403 {
1404 CvSize el_size = { param1, param2 };
1405 CvPoint el_anchor = { param1/2, param2/2 };
1406 int stripe_size = 1 << 14; // the optimal value may depend on CPU cache,
1407 // overhead of the current IPP code etc.
1408 const uchar* shifted_ptr;
1409 int y, dy = 0;
1410 int temp_step, dst_step = dst->step;
1411
1412 CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
1413
1414 shifted_ptr = temp->data.ptr +
1415 el_anchor.y*temp->step + el_anchor.x*CV_ELEM_SIZE(src_type);
1416 temp_step = temp->step ? temp->step : CV_STUB_STEP;
1417
1418 for( y = 0; y < src->rows; y += dy )
1419 {
1420 dy = icvIPPFilterNextStripe( src, temp, y, el_size, el_anchor );
1421 IPPI_CALL( ipp_median_box_func( shifted_ptr, temp_step,
1422 dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
1423 el_size, el_anchor ));
1424 }
1425 EXIT;
1426 }
1427 }
1428
1429 if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE )
1430 {
1431 CV_CALL( box_filter.init( src->cols, src_type, dst_type,
1432 smooth_type == CV_BLUR, cvSize(param1, param2) ));
1433 CV_CALL( box_filter.process( src, dst ));
1434 }
1435 else if( smooth_type == CV_MEDIAN )
1436 {
1437 int img_size_mp = size.width*size.height;
1438 img_size_mp = (img_size_mp + (1<<19)) >> 20;
1439
1440 if( depth != CV_8U || (cn != 1 && cn != 3 && cn != 4) )
1441 CV_ERROR( CV_StsUnsupportedFormat,
1442 "Median filter only supports 8uC1, 8uC3 and 8uC4 images" );
1443
1444 if( size.width < param1*2 || size.height < param1*2 ||
1445 param1 <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*(MEDIAN_HAVE_SIMD ? 1 : 3))
1446 {
1447 // Special case optimized for 3x3
1448 IPPI_CALL( icvMedianBlur_8u_CnR_Om( src->data.ptr, src->step,
1449 dst->data.ptr, dst->step, size, param1, cn ));
1450 }
1451 else
1452 {
1453 const int r = (param1 - 1) / 2;
1454 const int CACHE_SIZE = (int) ( 0.95 * 256 * 1024 / cn ); // assume a 256 kB cache size
1455 const int STRIPES = (int) cvCeil( (double) (size.width - 2*r) /
1456 (CACHE_SIZE / sizeof(Histogram) - 2*r) );
1457 const int STRIPE_SIZE = (int) cvCeil(
1458 (double) ( size.width + STRIPES*2*r - 2*r ) / STRIPES );
1459
1460 for( int i = 0; i < size.width; i += STRIPE_SIZE - 2*r )
1461 {
1462 int stripe = STRIPE_SIZE;
1463 // Make sure that the filter kernel fits into one stripe.
1464 if( i + STRIPE_SIZE - 2*r >= size.width ||
1465 size.width - (i + STRIPE_SIZE - 2*r) < 2*r+1 )
1466 stripe = size.width - i;
1467
1468 IPPI_CALL( icvMedianBlur_8u_CnR_O1( src->data.ptr + cn*i, src->step,
1469 dst->data.ptr + cn*i, dst->step, cvSize(stripe, size.height),
1470 param1, cn, i == 0, stripe == size.width - i ));
1471
1472 if( stripe == size.width - i )
1473 break;
1474 }
1475 }
1476 }
1477 else if( smooth_type == CV_GAUSSIAN )
1478 {
1479 CvSize ksize = { param1, param2 };
1480 float* kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) );
1481 float* ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) );
1482 CvMat KX = cvMat( 1, ksize.width, CV_32F, kx );
1483 CvMat KY = cvMat( 1, ksize.height, CV_32F, ky );
1484
1485 CvSepFilter::init_gaussian_kernel( &KX, sigma1 );
1486 if( ksize.width != ksize.height || fabs(sigma1 - sigma2) > FLT_EPSILON )
1487 CvSepFilter::init_gaussian_kernel( &KY, sigma2 );
1488 else
1489 KY.data.fl = kx;
1490
1491 if( have_ipp && size.width >= param1*3 &&
1492 size.height >= param2 && param1 > 1 && param2 > 1 )
1493 {
1494 int done;
1495 CV_CALL( done = icvIPPSepFilter( src, dst, &KX, &KY,
1496 cvPoint(ksize.width/2,ksize.height/2)));
1497 if( done )
1498 EXIT;
1499 }
1500
1501 CV_CALL( gaussian_filter.init( src->cols, src_type, dst_type, &KX, &KY ));
1502 CV_CALL( gaussian_filter.process( src, dst ));
1503 }
1504 else if( smooth_type == CV_BILATERAL )
1505 {
1506 if( param2 != 0 && (param2 != param1 || param1 % 2 == 0) )
1507 CV_ERROR( CV_StsBadSize, "Bilateral filter only supports square windows of odd size" );
1508
1509 switch( src_type )
1510 {
1511 case CV_32FC1:
1512 case CV_32FC3:
1513 CV_CALL( icvBilateralFiltering_32f( src, dst, param1, param3, param4 ));
1514 break;
1515 case CV_8UC1:
1516 case CV_8UC3:
1517 CV_CALL( icvBilateralFiltering_8u( src, dst, param1, param3, param4 ));
1518 break;
1519 default:
1520 CV_ERROR( CV_StsUnsupportedFormat,
1521 "Unknown/unsupported format: bilateral filter only supports 8uC1, 8uC3, 32fC1 and 32fC3 formats" );
1522 }
1523 }
1524
1525 __END__;
1526
1527 cvReleaseMat( &temp );
1528 }
1529
1530 /* End of file. */
1531