1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
21 //
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
25 //
26 // * The name of Intel Corporation may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "_cv.h"
43
44
45 /****************************************************************************************\
46 Base Image Filter
47 \****************************************************************************************/
48
default_x_filter_func(const uchar *,uchar *,void *)49 static void default_x_filter_func( const uchar*, uchar*, void* )
50 {
51 }
52
default_y_filter_func(uchar **,uchar *,int,int,void *)53 static void default_y_filter_func( uchar**, uchar*, int, int, void* )
54 {
55 }
56
CvBaseImageFilter()57 CvBaseImageFilter::CvBaseImageFilter()
58 {
59 min_depth = CV_8U;
60 buffer = 0;
61 rows = 0;
62 max_width = 0;
63 x_func = default_x_filter_func;
64 y_func = default_y_filter_func;
65 }
66
67
CvBaseImageFilter(int _max_width,int _src_type,int _dst_type,bool _is_separable,CvSize _ksize,CvPoint _anchor,int _border_mode,CvScalar _border_value)68 CvBaseImageFilter::CvBaseImageFilter( int _max_width, int _src_type, int _dst_type,
69 bool _is_separable, CvSize _ksize, CvPoint _anchor,
70 int _border_mode, CvScalar _border_value )
71 {
72 min_depth = CV_8U;
73 buffer = 0;
74 rows = 0;
75 max_width = 0;
76 x_func = default_x_filter_func;
77 y_func = default_y_filter_func;
78
79 init( _max_width, _src_type, _dst_type, _is_separable,
80 _ksize, _anchor, _border_mode, _border_value );
81 }
82
83
clear()84 void CvBaseImageFilter::clear()
85 {
86 cvFree( &buffer );
87 rows = 0;
88 }
89
90
~CvBaseImageFilter()91 CvBaseImageFilter::~CvBaseImageFilter()
92 {
93 clear();
94 }
95
96
get_work_params()97 void CvBaseImageFilter::get_work_params()
98 {
99 int min_rows = max_ky*2 + 3, rows = MAX(min_rows,10), row_sz;
100 int width = max_width, trow_sz = 0;
101
102 if( is_separable )
103 {
104 int max_depth = MAX(CV_MAT_DEPTH(src_type), CV_MAT_DEPTH(dst_type));
105 int max_cn = MAX(CV_MAT_CN(src_type), CV_MAT_CN(dst_type));
106 max_depth = MAX( max_depth, min_depth );
107 work_type = CV_MAKETYPE( max_depth, max_cn );
108 trow_sz = cvAlign( (max_width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN );
109 }
110 else
111 {
112 work_type = src_type;
113 width += ksize.width - 1;
114 }
115 row_sz = cvAlign( width*CV_ELEM_SIZE(work_type), ALIGN );
116 buf_size = rows*row_sz;
117 buf_size = MIN( buf_size, 1 << 16 );
118 buf_size = MAX( buf_size, min_rows*row_sz );
119 max_rows = (buf_size/row_sz)*3 + max_ky*2 + 8;
120 buf_size += trow_sz;
121 }
122
123
init(int _max_width,int _src_type,int _dst_type,bool _is_separable,CvSize _ksize,CvPoint _anchor,int _border_mode,CvScalar _border_value)124 void CvBaseImageFilter::init( int _max_width, int _src_type, int _dst_type,
125 bool _is_separable, CvSize _ksize, CvPoint _anchor,
126 int _border_mode, CvScalar _border_value )
127 {
128 CV_FUNCNAME( "CvBaseImageFilter::init" );
129
130 __BEGIN__;
131
132 int total_buf_sz, src_pix_sz, row_tab_sz, bsz;
133 uchar* ptr;
134
135 if( !(buffer && _max_width <= max_width && _src_type == src_type &&
136 _dst_type == dst_type && _is_separable == is_separable &&
137 _ksize.width == ksize.width && _ksize.height == ksize.height &&
138 _anchor.x == anchor.x && _anchor.y == anchor.y) )
139 clear();
140
141 is_separable = _is_separable != 0;
142 max_width = _max_width; //MAX(_max_width,_ksize.width);
143 src_type = CV_MAT_TYPE(_src_type);
144 dst_type = CV_MAT_TYPE(_dst_type);
145 ksize = _ksize;
146 anchor = _anchor;
147
148 if( anchor.x == -1 )
149 anchor.x = ksize.width / 2;
150 if( anchor.y == -1 )
151 anchor.y = ksize.height / 2;
152
153 max_ky = MAX( anchor.y, ksize.height - anchor.y - 1 );
154 border_mode = _border_mode;
155 border_value = _border_value;
156
157 if( ksize.width <= 0 || ksize.height <= 0 ||
158 (unsigned)anchor.x >= (unsigned)ksize.width ||
159 (unsigned)anchor.y >= (unsigned)ksize.height )
160 CV_ERROR( CV_StsOutOfRange, "invalid kernel size and/or anchor position" );
161
162 if( border_mode != IPL_BORDER_CONSTANT && border_mode != IPL_BORDER_REPLICATE &&
163 border_mode != IPL_BORDER_REFLECT && border_mode != IPL_BORDER_REFLECT_101 )
164 CV_ERROR( CV_StsBadArg, "Invalid/unsupported border mode" );
165
166 get_work_params();
167
168 prev_width = 0;
169 prev_x_range = cvSlice(0,0);
170
171 buf_size = cvAlign( buf_size, ALIGN );
172
173 src_pix_sz = CV_ELEM_SIZE(src_type);
174 border_tab_sz1 = anchor.x*src_pix_sz;
175 border_tab_sz = (ksize.width-1)*src_pix_sz;
176 bsz = cvAlign( border_tab_sz*sizeof(int), ALIGN );
177
178 assert( max_rows > max_ky*2 );
179 row_tab_sz = cvAlign( max_rows*sizeof(uchar*), ALIGN );
180 total_buf_sz = buf_size + row_tab_sz + bsz;
181
182 CV_CALL( ptr = buffer = (uchar*)cvAlloc( total_buf_sz ));
183
184 rows = (uchar**)ptr;
185 ptr += row_tab_sz;
186 border_tab = (int*)ptr;
187 ptr += bsz;
188
189 buf_start = ptr;
190 const_row = 0;
191
192 if( border_mode == IPL_BORDER_CONSTANT )
193 cvScalarToRawData( &border_value, border_tab, src_type, 0 );
194
195 __END__;
196 }
197
198
start_process(CvSlice x_range,int width)199 void CvBaseImageFilter::start_process( CvSlice x_range, int width )
200 {
201 int mode = border_mode;
202 int pix_sz = CV_ELEM_SIZE(src_type), work_pix_sz = CV_ELEM_SIZE(work_type);
203 int bsz = buf_size, bw = x_range.end_index - x_range.start_index, bw1 = bw + ksize.width - 1;
204 int tr_step = cvAlign(bw1*pix_sz, ALIGN );
205 int i, j, k, ofs;
206
207 if( x_range.start_index == prev_x_range.start_index &&
208 x_range.end_index == prev_x_range.end_index &&
209 width == prev_width )
210 return;
211
212 prev_x_range = x_range;
213 prev_width = width;
214
215 if( !is_separable )
216 bw = bw1;
217 else
218 bsz -= tr_step;
219
220 buf_step = cvAlign(bw*work_pix_sz, ALIGN);
221
222 if( mode == IPL_BORDER_CONSTANT )
223 bsz -= buf_step;
224 buf_max_count = bsz/buf_step;
225 buf_max_count = MIN( buf_max_count, max_rows - max_ky*2 );
226 buf_end = buf_start + buf_max_count*buf_step;
227
228 if( mode == IPL_BORDER_CONSTANT )
229 {
230 int i, tab_len = ksize.width*pix_sz;
231 uchar* bt = (uchar*)border_tab;
232 uchar* trow = buf_end;
233 const_row = buf_end + (is_separable ? 1 : 0)*tr_step;
234
235 for( i = pix_sz; i < tab_len; i++ )
236 bt[i] = bt[i - pix_sz];
237 for( i = 0; i < pix_sz; i++ )
238 trow[i] = bt[i];
239 for( i = pix_sz; i < tr_step; i++ )
240 trow[i] = trow[i - pix_sz];
241 if( is_separable )
242 x_func( trow, const_row, this );
243 return;
244 }
245
246 if( x_range.end_index - x_range.start_index <= 1 )
247 mode = IPL_BORDER_REPLICATE;
248
249 width = (width - 1)*pix_sz;
250 ofs = (anchor.x-x_range.start_index)*pix_sz;
251
252 for( k = 0; k < 2; k++ )
253 {
254 int idx, delta;
255 int i1, i2, di;
256
257 if( k == 0 )
258 {
259 idx = (x_range.start_index - 1)*pix_sz;
260 delta = di = -pix_sz;
261 i1 = border_tab_sz1 - pix_sz;
262 i2 = -pix_sz;
263 }
264 else
265 {
266 idx = x_range.end_index*pix_sz;
267 delta = di = pix_sz;
268 i1 = border_tab_sz1;
269 i2 = border_tab_sz;
270 }
271
272 if( (unsigned)idx > (unsigned)width )
273 {
274 int shift = mode == IPL_BORDER_REFLECT_101 ? pix_sz : 0;
275 idx = k == 0 ? shift : width - shift;
276 delta = -delta;
277 }
278
279 for( i = i1; i != i2; i += di )
280 {
281 for( j = 0; j < pix_sz; j++ )
282 border_tab[i + j] = idx + ofs + j;
283 if( mode != IPL_BORDER_REPLICATE )
284 {
285 if( (delta > 0 && idx == width) ||
286 (delta < 0 && idx == 0) )
287 {
288 if( mode == IPL_BORDER_REFLECT_101 )
289 idx -= delta*2;
290 delta = -delta;
291 }
292 else
293 idx += delta;
294 }
295 }
296 }
297 }
298
299
make_y_border(int row_count,int top_rows,int bottom_rows)300 void CvBaseImageFilter::make_y_border( int row_count, int top_rows, int bottom_rows )
301 {
302 int i;
303
304 if( border_mode == IPL_BORDER_CONSTANT ||
305 border_mode == IPL_BORDER_REPLICATE )
306 {
307 uchar* row1 = border_mode == IPL_BORDER_CONSTANT ? const_row : rows[max_ky];
308
309 for( i = 0; i < top_rows && rows[i] == 0; i++ )
310 rows[i] = row1;
311
312 row1 = border_mode == IPL_BORDER_CONSTANT ? const_row : rows[row_count-1];
313 for( i = 0; i < bottom_rows; i++ )
314 rows[i + row_count] = row1;
315 }
316 else
317 {
318 int j, dj = 1, shift = border_mode == IPL_BORDER_REFLECT_101;
319
320 for( i = top_rows-1, j = top_rows+shift; i >= 0; i-- )
321 {
322 if( rows[i] == 0 )
323 rows[i] = rows[j];
324 j += dj;
325 if( dj > 0 && j >= row_count )
326 {
327 if( !bottom_rows )
328 break;
329 j -= 1 + shift;
330 dj = -dj;
331 }
332 }
333
334 for( i = 0, j = row_count-1-shift; i < bottom_rows; i++, j-- )
335 rows[i + row_count] = rows[j];
336 }
337 }
338
339
fill_cyclic_buffer(const uchar * src,int src_step,int y0,int y1,int y2)340 int CvBaseImageFilter::fill_cyclic_buffer( const uchar* src, int src_step,
341 int y0, int y1, int y2 )
342 {
343 int i, y = y0, bsz1 = border_tab_sz1, bsz = border_tab_sz;
344 int pix_size = CV_ELEM_SIZE(src_type);
345 int width = prev_x_range.end_index - prev_x_range.start_index, width_n = width*pix_size;
346 bool can_use_src_as_trow = false; //is_separable && width >= ksize.width;
347
348 // fill the cyclic buffer
349 for( ; buf_count < buf_max_count && y < y2; buf_count++, y++, src += src_step )
350 {
351 uchar* trow = is_separable ? buf_end : buf_tail;
352 uchar* bptr = can_use_src_as_trow && y1 < y && y+1 < y2 ? (uchar*)(src - bsz1) : trow;
353
354 if( bptr != trow )
355 {
356 for( i = 0; i < bsz1; i++ )
357 trow[i] = bptr[i];
358 for( ; i < bsz; i++ )
359 trow[i] = bptr[i + width_n];
360 }
361 else if( !(((size_t)(bptr + bsz1)|(size_t)src|width_n) & (sizeof(int)-1)) )
362 for( i = 0; i < width_n; i += sizeof(int) )
363 *(int*)(bptr + i + bsz1) = *(int*)(src + i);
364 else
365 for( i = 0; i < width_n; i++ )
366 bptr[i + bsz1] = src[i];
367
368 if( border_mode != IPL_BORDER_CONSTANT )
369 {
370 for( i = 0; i < bsz1; i++ )
371 {
372 int j = border_tab[i];
373 bptr[i] = bptr[j];
374 }
375 for( ; i < bsz; i++ )
376 {
377 int j = border_tab[i];
378 bptr[i + width_n] = bptr[j];
379 }
380 }
381 else
382 {
383 const uchar *bt = (uchar*)border_tab;
384 for( i = 0; i < bsz1; i++ )
385 bptr[i] = bt[i];
386
387 for( ; i < bsz; i++ )
388 bptr[i + width_n] = bt[i];
389 }
390
391 if( is_separable )
392 {
393 x_func( bptr, buf_tail, this );
394 if( bptr != trow )
395 {
396 for( i = 0; i < bsz1; i++ )
397 bptr[i] = trow[i];
398 for( ; i < bsz; i++ )
399 bptr[i + width_n] = trow[i];
400 }
401 }
402
403 buf_tail += buf_step;
404 if( buf_tail >= buf_end )
405 buf_tail = buf_start;
406 }
407
408 return y - y0;
409 }
410
process(const CvMat * src,CvMat * dst,CvRect src_roi,CvPoint dst_origin,int flags)411 int CvBaseImageFilter::process( const CvMat* src, CvMat* dst,
412 CvRect src_roi, CvPoint dst_origin, int flags )
413 {
414 int rows_processed = 0;
415
416 /*
417 check_parameters
418 initialize_horizontal_border_reloc_tab_if_not_initialized_yet
419
420 for_each_source_row: src starts from src_roi.y, buf starts with the first available row
421 1) if separable,
422 1a.1) copy source row to temporary buffer, form a border using border reloc tab.
423 1a.2) apply row-wise filter (symmetric, asymmetric or generic)
424 else
425 1b.1) copy source row to the buffer, form a border
426 2) if the buffer is full, or it is the last source row:
427 2.1) if stage != middle, form the pointers to other "virtual" rows.
428 if separable
429 2a.2) apply column-wise filter, store the results.
430 else
431 2b.2) form a sparse (offset,weight) tab
432 2b.3) apply generic non-separable filter, store the results
433 3) update row pointers etc.
434 */
435
436 CV_FUNCNAME( "CvBaseImageFilter::process" );
437
438 __BEGIN__;
439
440 int i, width, _src_y1, _src_y2;
441 int src_x, src_y, src_y1, src_y2, dst_y;
442 int pix_size = CV_ELEM_SIZE(src_type);
443 uchar *sptr = 0, *dptr;
444 int phase = flags & (CV_START|CV_END|CV_MIDDLE);
445 bool isolated_roi = (flags & CV_ISOLATED_ROI) != 0;
446
447 if( !CV_IS_MAT(src) )
448 CV_ERROR( CV_StsBadArg, "" );
449
450 if( CV_MAT_TYPE(src->type) != src_type )
451 CV_ERROR( CV_StsUnmatchedFormats, "" );
452
453 width = src->cols;
454
455 if( src_roi.width == -1 && src_roi.x == 0 )
456 src_roi.width = width;
457
458 if( src_roi.height == -1 && src_roi.y == 0 )
459 {
460 src_roi.y = 0;
461 src_roi.height = src->rows;
462 }
463
464 if( src_roi.width > max_width ||
465 src_roi.x < 0 || src_roi.width < 0 ||
466 src_roi.y < 0 || src_roi.height < 0 ||
467 src_roi.x + src_roi.width > width ||
468 src_roi.y + src_roi.height > src->rows )
469 CV_ERROR( CV_StsOutOfRange, "Too large source image or its ROI" );
470
471 src_x = src_roi.x;
472 _src_y1 = 0;
473 _src_y2 = src->rows;
474
475 if( isolated_roi )
476 {
477 src_roi.x = 0;
478 width = src_roi.width;
479 _src_y1 = src_roi.y;
480 _src_y2 = src_roi.y + src_roi.height;
481 }
482
483 if( !CV_IS_MAT(dst) )
484 CV_ERROR( CV_StsBadArg, "" );
485
486 if( CV_MAT_TYPE(dst->type) != dst_type )
487 CV_ERROR( CV_StsUnmatchedFormats, "" );
488
489 if( dst_origin.x < 0 || dst_origin.y < 0 )
490 CV_ERROR( CV_StsOutOfRange, "Incorrect destination ROI origin" );
491
492 if( phase == CV_WHOLE )
493 phase = CV_START | CV_END;
494 phase &= CV_START | CV_END | CV_MIDDLE;
495
496 // initialize horizontal border relocation tab if it is not initialized yet
497 if( phase & CV_START )
498 start_process( cvSlice(src_roi.x, src_roi.x + src_roi.width), width );
499 else if( prev_width != width || prev_x_range.start_index != src_roi.x ||
500 prev_x_range.end_index != src_roi.x + src_roi.width )
501 CV_ERROR( CV_StsBadArg,
502 "In a middle or at the end the horizontal placement of the stripe can not be changed" );
503
504 dst_y = dst_origin.y;
505 src_y1 = src_roi.y;
506 src_y2 = src_roi.y + src_roi.height;
507
508 if( phase & CV_START )
509 {
510 for( i = 0; i <= max_ky*2; i++ )
511 rows[i] = 0;
512
513 src_y1 -= max_ky;
514 top_rows = bottom_rows = 0;
515
516 if( src_y1 < _src_y1 )
517 {
518 top_rows = _src_y1 - src_y1;
519 src_y1 = _src_y1;
520 }
521
522 buf_head = buf_tail = buf_start;
523 buf_count = 0;
524 }
525
526 if( phase & CV_END )
527 {
528 src_y2 += max_ky;
529
530 if( src_y2 > _src_y2 )
531 {
532 bottom_rows = src_y2 - _src_y2;
533 src_y2 = _src_y2;
534 }
535 }
536
537 dptr = dst->data.ptr + dst_origin.y*dst->step + dst_origin.x*CV_ELEM_SIZE(dst_type);
538 sptr = src->data.ptr + src_y1*src->step + src_x*pix_size;
539
540 for( src_y = src_y1; src_y < src_y2; )
541 {
542 uchar* bptr;
543 int row_count, delta;
544
545 delta = fill_cyclic_buffer( sptr, src->step, src_y, src_y1, src_y2 );
546
547 src_y += delta;
548 sptr += src->step*delta;
549
550 // initialize the cyclic buffer row pointers
551 bptr = buf_head;
552 for( i = 0; i < buf_count; i++ )
553 {
554 rows[i+top_rows] = bptr;
555 bptr += buf_step;
556 if( bptr >= buf_end )
557 bptr = buf_start;
558 }
559
560 row_count = top_rows + buf_count;
561
562 if( !rows[0] || ((phase & CV_END) && src_y == src_y2) )
563 {
564 int br = (phase & CV_END) && src_y == src_y2 ? bottom_rows : 0;
565 make_y_border( row_count, top_rows, br );
566 row_count += br;
567 }
568
569 if( rows[0] && row_count > max_ky*2 )
570 {
571 int count = row_count - max_ky*2;
572 if( dst_y + count > dst->rows )
573 CV_ERROR( CV_StsOutOfRange, "The destination image can not fit the result" );
574
575 assert( count >= 0 );
576 y_func( rows + max_ky - anchor.y, dptr, dst->step, count, this );
577 row_count -= count;
578 dst_y += count;
579 dptr += dst->step*count;
580 for( bptr = row_count > 0 ?rows[count] : 0; buf_head != bptr && buf_count > 0; buf_count-- )
581 {
582 buf_head += buf_step;
583 if( buf_head >= buf_end )
584 buf_head = buf_start;
585 }
586 rows_processed += count;
587 top_rows = MAX(top_rows - count, 0);
588 }
589 }
590
591 __END__;
592
593 return rows_processed;
594 }
595
596
597 /****************************************************************************************\
598 Separable Linear Filter
599 \****************************************************************************************/
600
601 static void icvFilterRowSymm_8u32s( const uchar* src, int* dst, void* params );
602 static void icvFilterColSymm_32s8u( const int** src, uchar* dst, int dst_step,
603 int count, void* params );
604 static void icvFilterColSymm_32s16s( const int** src, short* dst, int dst_step,
605 int count, void* params );
606 static void icvFilterRowSymm_8u32f( const uchar* src, float* dst, void* params );
607 static void icvFilterRow_8u32f( const uchar* src, float* dst, void* params );
608 static void icvFilterRowSymm_16s32f( const short* src, float* dst, void* params );
609 static void icvFilterRow_16s32f( const short* src, float* dst, void* params );
610 static void icvFilterRowSymm_16u32f( const ushort* src, float* dst, void* params );
611 static void icvFilterRow_16u32f( const ushort* src, float* dst, void* params );
612 static void icvFilterRowSymm_32f( const float* src, float* dst, void* params );
613 static void icvFilterRow_32f( const float* src, float* dst, void* params );
614
615 static void icvFilterColSymm_32f8u( const float** src, uchar* dst, int dst_step,
616 int count, void* params );
617 static void icvFilterCol_32f8u( const float** src, uchar* dst, int dst_step,
618 int count, void* params );
619 static void icvFilterColSymm_32f16s( const float** src, short* dst, int dst_step,
620 int count, void* params );
621 static void icvFilterCol_32f16s( const float** src, short* dst, int dst_step,
622 int count, void* params );
623 static void icvFilterColSymm_32f16u( const float** src, ushort* dst, int dst_step,
624 int count, void* params );
625 static void icvFilterCol_32f16u( const float** src, ushort* dst, int dst_step,
626 int count, void* params );
627 static void icvFilterColSymm_32f( const float** src, float* dst, int dst_step,
628 int count, void* params );
629 static void icvFilterCol_32f( const float** src, float* dst, int dst_step,
630 int count, void* params );
631
CvSepFilter()632 CvSepFilter::CvSepFilter()
633 {
634 min_depth = CV_32F;
635 kx = ky = 0;
636 kx_flags = ky_flags = 0;
637 }
638
639
CvSepFilter(int _max_width,int _src_type,int _dst_type,const CvMat * _kx,const CvMat * _ky,CvPoint _anchor,int _border_mode,CvScalar _border_value)640 CvSepFilter::CvSepFilter( int _max_width, int _src_type, int _dst_type,
641 const CvMat* _kx, const CvMat* _ky,
642 CvPoint _anchor, int _border_mode,
643 CvScalar _border_value )
644 {
645 min_depth = CV_32F;
646 kx = ky = 0;
647 init( _max_width, _src_type, _dst_type, _kx, _ky, _anchor, _border_mode, _border_value );
648 }
649
650
clear()651 void CvSepFilter::clear()
652 {
653 cvReleaseMat( &kx );
654 cvReleaseMat( &ky );
655 CvBaseImageFilter::clear();
656 }
657
658
~CvSepFilter()659 CvSepFilter::~CvSepFilter()
660 {
661 clear();
662 }
663
664
665 #undef FILTER_BITS
666 #define FILTER_BITS 8
667
init(int _max_width,int _src_type,int _dst_type,const CvMat * _kx,const CvMat * _ky,CvPoint _anchor,int _border_mode,CvScalar _border_value)668 void CvSepFilter::init( int _max_width, int _src_type, int _dst_type,
669 const CvMat* _kx, const CvMat* _ky,
670 CvPoint _anchor, int _border_mode,
671 CvScalar _border_value )
672 {
673 CV_FUNCNAME( "CvSepFilter::init" );
674
675 __BEGIN__;
676
677 CvSize _ksize;
678 int filter_type;
679 int i, xsz, ysz;
680 int convert_filters = 0;
681 double xsum = 0, ysum = 0;
682 const float eps = FLT_EPSILON*100.f;
683
684 if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) ||
685 (_kx->cols != 1 && _kx->rows != 1) ||
686 (_ky->cols != 1 && _ky->rows != 1) ||
687 CV_MAT_CN(_kx->type) != 1 || CV_MAT_CN(_ky->type) != 1 ||
688 !CV_ARE_TYPES_EQ(_kx,_ky) )
689 CV_ERROR( CV_StsBadArg,
690 "Both kernels must be valid 1d single-channel vectors of the same types" );
691
692 if( CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type) )
693 CV_ERROR( CV_StsUnmatchedFormats, "Input and output must have the same number of channels" );
694
695 filter_type = MAX( CV_32F, CV_MAT_DEPTH(_kx->type) );
696
697 _ksize.width = _kx->rows + _kx->cols - 1;
698 _ksize.height = _ky->rows + _ky->cols - 1;
699
700 CV_CALL( CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize,
701 _anchor, _border_mode, _border_value ));
702
703 if( !(kx && CV_ARE_SIZES_EQ(kx,_kx)) )
704 {
705 cvReleaseMat( &kx );
706 CV_CALL( kx = cvCreateMat( _kx->rows, _kx->cols, filter_type ));
707 }
708
709 if( !(ky && CV_ARE_SIZES_EQ(ky,_ky)) )
710 {
711 cvReleaseMat( &ky );
712 CV_CALL( ky = cvCreateMat( _ky->rows, _ky->cols, filter_type ));
713 }
714
715 CV_CALL( cvConvert( _kx, kx ));
716 CV_CALL( cvConvert( _ky, ky ));
717
718 xsz = kx->rows + kx->cols - 1;
719 ysz = ky->rows + ky->cols - 1;
720 kx_flags = ky_flags = ASYMMETRICAL + SYMMETRICAL + POSITIVE + SUM_TO_1 + INTEGER;
721
722 if( !(xsz & 1) )
723 kx_flags &= ~(ASYMMETRICAL + SYMMETRICAL);
724 if( !(ysz & 1) )
725 ky_flags &= ~(ASYMMETRICAL + SYMMETRICAL);
726
727 for( i = 0; i < xsz; i++ )
728 {
729 float v = kx->data.fl[i];
730 xsum += v;
731 if( v < 0 )
732 kx_flags &= ~POSITIVE;
733 if( fabs(v - cvRound(v)) > eps )
734 kx_flags &= ~INTEGER;
735 if( fabs(v - kx->data.fl[xsz - i - 1]) > eps )
736 kx_flags &= ~SYMMETRICAL;
737 if( fabs(v + kx->data.fl[xsz - i - 1]) > eps )
738 kx_flags &= ~ASYMMETRICAL;
739 }
740
741 if( fabs(xsum - 1.) > eps )
742 kx_flags &= ~SUM_TO_1;
743
744 for( i = 0; i < ysz; i++ )
745 {
746 float v = ky->data.fl[i];
747 ysum += v;
748 if( v < 0 )
749 ky_flags &= ~POSITIVE;
750 if( fabs(v - cvRound(v)) > eps )
751 ky_flags &= ~INTEGER;
752 if( fabs(v - ky->data.fl[ysz - i - 1]) > eps )
753 ky_flags &= ~SYMMETRICAL;
754 if( fabs(v + ky->data.fl[ysz - i - 1]) > eps )
755 ky_flags &= ~ASYMMETRICAL;
756 }
757
758 if( fabs(ysum - 1.) > eps )
759 ky_flags &= ~SUM_TO_1;
760
761 x_func = 0;
762 y_func = 0;
763
764 if( CV_MAT_DEPTH(src_type) == CV_8U )
765 {
766 if( CV_MAT_DEPTH(dst_type) == CV_8U &&
767 ((kx_flags&ky_flags) & (SYMMETRICAL + POSITIVE + SUM_TO_1)) == SYMMETRICAL + POSITIVE + SUM_TO_1 )
768 {
769 x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32s;
770 y_func = (CvColumnFilterFunc)icvFilterColSymm_32s8u;
771 kx_flags &= ~INTEGER;
772 ky_flags &= ~INTEGER;
773 convert_filters = 1;
774 }
775 else if( CV_MAT_DEPTH(dst_type) == CV_16S &&
776 (kx_flags & (SYMMETRICAL + ASYMMETRICAL)) && (kx_flags & INTEGER) &&
777 (ky_flags & (SYMMETRICAL + ASYMMETRICAL)) && (ky_flags & INTEGER) )
778 {
779 x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32s;
780 y_func = (CvColumnFilterFunc)icvFilterColSymm_32s16s;
781 convert_filters = 1;
782 }
783 else
784 {
785 if( CV_MAT_DEPTH(dst_type) > CV_32F )
786 CV_ERROR( CV_StsUnsupportedFormat, "8u->64f separable filtering is not supported" );
787
788 if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) )
789 x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32f;
790 else
791 x_func = (CvRowFilterFunc)icvFilterRow_8u32f;
792 }
793 }
794 else if( CV_MAT_DEPTH(src_type) == CV_16U )
795 {
796 if( CV_MAT_DEPTH(dst_type) > CV_32F )
797 CV_ERROR( CV_StsUnsupportedFormat, "16u->64f separable filtering is not supported" );
798
799 if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) )
800 x_func = (CvRowFilterFunc)icvFilterRowSymm_16u32f;
801 else
802 x_func = (CvRowFilterFunc)icvFilterRow_16u32f;
803 }
804 else if( CV_MAT_DEPTH(src_type) == CV_16S )
805 {
806 if( CV_MAT_DEPTH(dst_type) > CV_32F )
807 CV_ERROR( CV_StsUnsupportedFormat, "16s->64f separable filtering is not supported" );
808
809 if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) )
810 x_func = (CvRowFilterFunc)icvFilterRowSymm_16s32f;
811 else
812 x_func = (CvRowFilterFunc)icvFilterRow_16s32f;
813 }
814 else if( CV_MAT_DEPTH(src_type) == CV_32F )
815 {
816 if( CV_MAT_DEPTH(dst_type) != CV_32F )
817 CV_ERROR( CV_StsUnsupportedFormat, "When the input has 32f data type, the output must also have 32f type" );
818
819 if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) )
820 x_func = (CvRowFilterFunc)icvFilterRowSymm_32f;
821 else
822 x_func = (CvRowFilterFunc)icvFilterRow_32f;
823 }
824 else
825 CV_ERROR( CV_StsUnsupportedFormat, "Unknown or unsupported input data type" );
826
827 if( !y_func )
828 {
829 if( CV_MAT_DEPTH(dst_type) == CV_8U )
830 {
831 if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) )
832 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f8u;
833 else
834 y_func = (CvColumnFilterFunc)icvFilterCol_32f8u;
835 }
836 else if( CV_MAT_DEPTH(dst_type) == CV_16U )
837 {
838 if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) )
839 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f16u;
840 else
841 y_func = (CvColumnFilterFunc)icvFilterCol_32f16u;
842 }
843 else if( CV_MAT_DEPTH(dst_type) == CV_16S )
844 {
845 if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) )
846 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f16s;
847 else
848 y_func = (CvColumnFilterFunc)icvFilterCol_32f16s;
849 }
850 else if( CV_MAT_DEPTH(dst_type) == CV_32F )
851 {
852 if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) )
853 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f;
854 else
855 y_func = (CvColumnFilterFunc)icvFilterCol_32f;
856 }
857 else
858 CV_ERROR( CV_StsUnsupportedFormat, "Unknown or unsupported input data type" );
859 }
860
861 if( convert_filters )
862 {
863 int scale = kx_flags & ky_flags & INTEGER ? 1 : (1 << FILTER_BITS);
864 int sum;
865
866 for( i = sum = 0; i < xsz; i++ )
867 {
868 int t = cvRound(kx->data.fl[i]*scale);
869 kx->data.i[i] = t;
870 sum += t;
871 }
872 if( scale > 1 )
873 kx->data.i[xsz/2] += scale - sum;
874
875 for( i = sum = 0; i < ysz; i++ )
876 {
877 int t = cvRound(ky->data.fl[i]*scale);
878 ky->data.i[i] = t;
879 sum += t;
880 }
881 if( scale > 1 )
882 ky->data.i[ysz/2] += scale - sum;
883 kx->type = (kx->type & ~CV_MAT_DEPTH_MASK) | CV_32S;
884 ky->type = (ky->type & ~CV_MAT_DEPTH_MASK) | CV_32S;
885 }
886
887 __END__;
888 }
889
890
init(int _max_width,int _src_type,int _dst_type,bool _is_separable,CvSize _ksize,CvPoint _anchor,int _border_mode,CvScalar _border_value)891 void CvSepFilter::init( int _max_width, int _src_type, int _dst_type,
892 bool _is_separable, CvSize _ksize,
893 CvPoint _anchor, int _border_mode,
894 CvScalar _border_value )
895 {
896 CvBaseImageFilter::init( _max_width, _src_type, _dst_type, _is_separable,
897 _ksize, _anchor, _border_mode, _border_value );
898 }
899
900
901 static void
icvFilterRowSymm_8u32s(const uchar * src,int * dst,void * params)902 icvFilterRowSymm_8u32s( const uchar* src, int* dst, void* params )
903 {
904 const CvSepFilter* state = (const CvSepFilter*)params;
905 const CvMat* _kx = state->get_x_kernel();
906 const int* kx = _kx->data.i;
907 int ksize = _kx->cols + _kx->rows - 1;
908 int i = 0, j, k, width = state->get_width();
909 int cn = CV_MAT_CN(state->get_src_type());
910 int ksize2 = ksize/2, ksize2n = ksize2*cn;
911 int is_symm = state->get_x_kernel_flags() & CvSepFilter::SYMMETRICAL;
912 const uchar* s = src + ksize2n;
913
914 kx += ksize2;
915 width *= cn;
916
917 if( is_symm )
918 {
919 if( ksize == 1 && kx[0] == 1 )
920 {
921 for( i = 0; i <= width - 2; i += 2 )
922 {
923 int s0 = s[i], s1 = s[i+1];
924 dst[i] = s0; dst[i+1] = s1;
925 }
926 s += i;
927 }
928 else if( ksize == 3 )
929 {
930 if( kx[0] == 2 && kx[1] == 1 )
931 for( ; i <= width - 2; i += 2, s += 2 )
932 {
933 int s0 = s[-cn] + s[0]*2 + s[cn], s1 = s[1-cn] + s[1]*2 + s[1+cn];
934 dst[i] = s0; dst[i+1] = s1;
935 }
936 else if( kx[0] == 10 && kx[1] == 3 )
937 for( ; i <= width - 2; i += 2, s += 2 )
938 {
939 int s0 = s[0]*10 + (s[-cn] + s[cn])*3, s1 = s[1]*10 + (s[1-cn] + s[1+cn])*3;
940 dst[i] = s0; dst[i+1] = s1;
941 }
942 else if( kx[0] == 2*64 && kx[1] == 1*64 )
943 for( ; i <= width - 2; i += 2, s += 2 )
944 {
945 int s0 = (s[0]*2 + s[-cn] + s[cn]) << 6;
946 int s1 = (s[1]*2 + s[1-cn] + s[1+cn]) << 6;
947 dst[i] = s0; dst[i+1] = s1;
948 }
949 else
950 {
951 int k0 = kx[0], k1 = kx[1];
952 for( ; i <= width - 2; i += 2, s += 2 )
953 {
954 int s0 = s[0]*k0 + (s[-cn] + s[cn])*k1, s1 = s[1]*k0 + (s[1-cn] + s[1+cn])*k1;
955 dst[i] = s0; dst[i+1] = s1;
956 }
957 }
958 }
959 else if( ksize == 5 )
960 {
961 int k0 = kx[0], k1 = kx[1], k2 = kx[2];
962 if( k0 == 6*16 && k1 == 4*16 && k2 == 1*16 )
963 for( ; i <= width - 2; i += 2, s += 2 )
964 {
965 int s0 = (s[0]*6 + (s[-cn] + s[cn])*4 + (s[-cn*2] + s[cn*2])*1) << 4;
966 int s1 = (s[1]*6 + (s[1-cn] + s[1+cn])*4 + (s[1-cn*2] + s[1+cn*2])*1) << 4;
967 dst[i] = s0; dst[i+1] = s1;
968 }
969 else
970 for( ; i <= width - 2; i += 2, s += 2 )
971 {
972 int s0 = s[0]*k0 + (s[-cn] + s[cn])*k1 + (s[-cn*2] + s[cn*2])*k2;
973 int s1 = s[1]*k0 + (s[1-cn] + s[1+cn])*k1 + (s[1-cn*2] + s[1+cn*2])*k2;
974 dst[i] = s0; dst[i+1] = s1;
975 }
976 }
977 else
978 for( ; i <= width - 4; i += 4, s += 4 )
979 {
980 int f = kx[0];
981 int s0 = f*s[0], s1 = f*s[1], s2 = f*s[2], s3 = f*s[3];
982 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
983 {
984 f = kx[k];
985 s0 += f*(s[j] + s[-j]); s1 += f*(s[j+1] + s[-j+1]);
986 s2 += f*(s[j+2] + s[-j+2]); s3 += f*(s[j+3] + s[-j+3]);
987 }
988
989 dst[i] = s0; dst[i+1] = s1;
990 dst[i+2] = s2; dst[i+3] = s3;
991 }
992
993 for( ; i < width; i++, s++ )
994 {
995 int s0 = kx[0]*s[0];
996 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
997 s0 += kx[k]*(s[j] + s[-j]);
998 dst[i] = s0;
999 }
1000 }
1001 else
1002 {
1003 if( ksize == 3 && kx[0] == 0 && kx[1] == 1 )
1004 for( ; i <= width - 2; i += 2, s += 2 )
1005 {
1006 int s0 = s[cn] - s[-cn], s1 = s[1+cn] - s[1-cn];
1007 dst[i] = s0; dst[i+1] = s1;
1008 }
1009 else
1010 for( ; i <= width - 4; i += 4, s += 4 )
1011 {
1012 int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
1013 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
1014 {
1015 int f = kx[k];
1016 s0 += f*(s[j] - s[-j]); s1 += f*(s[j+1] - s[-j+1]);
1017 s2 += f*(s[j+2] - s[-j+2]); s3 += f*(s[j+3] - s[-j+3]);
1018 }
1019
1020 dst[i] = s0; dst[i+1] = s1;
1021 dst[i+2] = s2; dst[i+3] = s3;
1022 }
1023
1024 for( ; i < width; i++, s++ )
1025 {
1026 int s0 = kx[0]*s[0];
1027 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
1028 s0 += kx[k]*(s[j] - s[-j]);
1029 dst[i] = s0;
1030 }
1031 }
1032 }
1033
1034
1035 #define ICV_FILTER_ROW( flavor, srctype, dsttype, load_macro ) \
1036 static void \
1037 icvFilterRow_##flavor(const srctype* src, dsttype* dst, void*params)\
1038 { \
1039 const CvSepFilter* state = (const CvSepFilter*)params; \
1040 const CvMat* _kx = state->get_x_kernel(); \
1041 const dsttype* kx = (const dsttype*)(_kx->data.ptr); \
1042 int ksize = _kx->cols + _kx->rows - 1; \
1043 int i = 0, k, width = state->get_width(); \
1044 int cn = CV_MAT_CN(state->get_src_type()); \
1045 const srctype* s; \
1046 \
1047 width *= cn; \
1048 \
1049 for( ; i <= width - 4; i += 4 ) \
1050 { \
1051 double f = kx[0]; \
1052 double s0=f*load_macro(src[i]), s1=f*load_macro(src[i+1]), \
1053 s2=f*load_macro(src[i+2]), s3=f*load_macro(src[i+3]);\
1054 for( k = 1, s = src + i + cn; k < ksize; k++, s += cn ) \
1055 { \
1056 f = kx[k]; \
1057 s0 += f*load_macro(s[0]); \
1058 s1 += f*load_macro(s[1]); \
1059 s2 += f*load_macro(s[2]); \
1060 s3 += f*load_macro(s[3]); \
1061 } \
1062 dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1; \
1063 dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3; \
1064 } \
1065 \
1066 for( ; i < width; i++ ) \
1067 { \
1068 double s0 = (double)kx[0]*load_macro(src[i]); \
1069 for( k = 1, s = src + i + cn; k < ksize; k++, s += cn ) \
1070 s0 += (double)kx[k]*load_macro(s[0]); \
1071 dst[i] = (dsttype)s0; \
1072 } \
1073 }
1074
1075
1076 ICV_FILTER_ROW( 8u32f, uchar, float, CV_8TO32F )
1077 ICV_FILTER_ROW( 16s32f, short, float, CV_NOP )
1078 ICV_FILTER_ROW( 16u32f, ushort, float, CV_NOP )
1079 ICV_FILTER_ROW( 32f, float, float, CV_NOP )
1080
1081
1082 #define ICV_FILTER_ROW_SYMM( flavor, srctype, dsttype, load_macro ) \
1083 static void \
1084 icvFilterRowSymm_##flavor( const srctype* src, \
1085 dsttype* dst, void* params ) \
1086 { \
1087 const CvSepFilter* state = (const CvSepFilter*)params; \
1088 const CvMat* _kx = state->get_x_kernel(); \
1089 const dsttype* kx = (const dsttype*)(_kx->data.ptr); \
1090 int ksize = _kx->cols + _kx->rows - 1; \
1091 int i = 0, j, k, width = state->get_width(); \
1092 int cn = CV_MAT_CN(state->get_src_type()); \
1093 int is_symm=state->get_x_kernel_flags()&CvSepFilter::SYMMETRICAL;\
1094 int ksize2 = ksize/2, ksize2n = ksize2*cn; \
1095 const srctype* s = src + ksize2n; \
1096 \
1097 kx += ksize2; \
1098 width *= cn; \
1099 \
1100 if( is_symm ) \
1101 { \
1102 for( ; i <= width - 4; i += 4, s += 4 ) \
1103 { \
1104 double f = kx[0]; \
1105 double s0=f*load_macro(s[0]), s1=f*load_macro(s[1]), \
1106 s2=f*load_macro(s[2]), s3=f*load_macro(s[3]); \
1107 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) \
1108 { \
1109 f = kx[k]; \
1110 s0 += f*load_macro(s[j] + s[-j]); \
1111 s1 += f*load_macro(s[j+1] + s[-j+1]); \
1112 s2 += f*load_macro(s[j+2] + s[-j+2]); \
1113 s3 += f*load_macro(s[j+3] + s[-j+3]); \
1114 } \
1115 \
1116 dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1; \
1117 dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3; \
1118 } \
1119 \
1120 for( ; i < width; i++, s++ ) \
1121 { \
1122 double s0 = (double)kx[0]*load_macro(s[0]); \
1123 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) \
1124 s0 += (double)kx[k]*load_macro(s[j] + s[-j]); \
1125 dst[i] = (dsttype)s0; \
1126 } \
1127 } \
1128 else \
1129 { \
1130 for( ; i <= width - 4; i += 4, s += 4 ) \
1131 { \
1132 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; \
1133 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) \
1134 { \
1135 double f = kx[k]; \
1136 s0 += f*load_macro(s[j] - s[-j]); \
1137 s1 += f*load_macro(s[j+1] - s[-j+1]); \
1138 s2 += f*load_macro(s[j+2] - s[-j+2]); \
1139 s3 += f*load_macro(s[j+3] - s[-j+3]); \
1140 } \
1141 \
1142 dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1; \
1143 dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3; \
1144 } \
1145 \
1146 for( ; i < width; i++, s++ ) \
1147 { \
1148 double s0 = 0; \
1149 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) \
1150 s0 += (double)kx[k]*load_macro(s[j] - s[-j]); \
1151 dst[i] = (dsttype)s0; \
1152 } \
1153 } \
1154 }
1155
1156
1157 ICV_FILTER_ROW_SYMM( 8u32f, uchar, float, CV_8TO32F )
1158 ICV_FILTER_ROW_SYMM( 16s32f, short, float, CV_NOP )
1159 ICV_FILTER_ROW_SYMM( 16u32f, ushort, float, CV_NOP )
1160
1161 static void
icvFilterRowSymm_32f(const float * src,float * dst,void * params)1162 icvFilterRowSymm_32f( const float* src, float* dst, void* params )
1163 {
1164 const CvSepFilter* state = (const CvSepFilter*)params;
1165 const CvMat* _kx = state->get_x_kernel();
1166 const float* kx = _kx->data.fl;
1167 int ksize = _kx->cols + _kx->rows - 1;
1168 int i = 0, j, k, width = state->get_width();
1169 int cn = CV_MAT_CN(state->get_src_type());
1170 int ksize2 = ksize/2, ksize2n = ksize2*cn;
1171 int is_symm = state->get_x_kernel_flags() & CvSepFilter::SYMMETRICAL;
1172 const float* s = src + ksize2n;
1173
1174 kx += ksize2;
1175 width *= cn;
1176
1177 if( is_symm )
1178 {
1179 if( ksize == 3 && fabs(kx[0]-2.) <= FLT_EPSILON && fabs(kx[1]-1.) <= FLT_EPSILON )
1180 for( ; i <= width - 2; i += 2, s += 2 )
1181 {
1182 float s0 = s[-cn] + s[0]*2 + s[cn], s1 = s[1-cn] + s[1]*2 + s[1+cn];
1183 dst[i] = s0; dst[i+1] = s1;
1184 }
1185 else if( ksize == 3 && fabs(kx[0]-10.) <= FLT_EPSILON && fabs(kx[1]-3.) <= FLT_EPSILON )
1186 for( ; i <= width - 2; i += 2, s += 2 )
1187 {
1188 float s0 = s[0]*10 + (s[-cn] + s[cn])*3, s1 = s[1]*10 + (s[1-cn] + s[1+cn])*3;
1189 dst[i] = s0; dst[i+1] = s1;
1190 }
1191 else
1192 for( ; i <= width - 4; i += 4, s += 4 )
1193 {
1194 double f = kx[0];
1195 double s0 = f*s[0], s1 = f*s[1], s2 = f*s[2], s3 = f*s[3];
1196 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
1197 {
1198 f = kx[k];
1199 s0 += f*(s[j] + s[-j]); s1 += f*(s[j+1] + s[-j+1]);
1200 s2 += f*(s[j+2] + s[-j+2]); s3 += f*(s[j+3] + s[-j+3]);
1201 }
1202
1203 dst[i] = (float)s0; dst[i+1] = (float)s1;
1204 dst[i+2] = (float)s2; dst[i+3] = (float)s3;
1205 }
1206
1207 for( ; i < width; i++, s++ )
1208 {
1209 double s0 = (double)kx[0]*s[0];
1210 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
1211 s0 += (double)kx[k]*(s[j] + s[-j]);
1212 dst[i] = (float)s0;
1213 }
1214 }
1215 else
1216 {
1217 if( ksize == 3 && fabs(kx[0]) <= FLT_EPSILON && fabs(kx[1]-1.) <= FLT_EPSILON )
1218 for( ; i <= width - 2; i += 2, s += 2 )
1219 {
1220 float s0 = s[cn] - s[-cn], s1 = s[1+cn] - s[1-cn];
1221 dst[i] = s0; dst[i+1] = s1;
1222 }
1223 else
1224 for( ; i <= width - 4; i += 4, s += 4 )
1225 {
1226 double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
1227 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
1228 {
1229 double f = kx[k];
1230 s0 += f*(s[j] - s[-j]); s1 += f*(s[j+1] - s[-j+1]);
1231 s2 += f*(s[j+2] - s[-j+2]); s3 += f*(s[j+3] - s[-j+3]);
1232 }
1233
1234 dst[i] = (float)s0; dst[i+1] = (float)s1;
1235 dst[i+2] = (float)s2; dst[i+3] = (float)s3;
1236 }
1237
1238 for( ; i < width; i++, s++ )
1239 {
1240 double s0 = (double)kx[0]*s[0];
1241 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
1242 s0 += (double)kx[k]*(s[j] - s[-j]);
1243 dst[i] = (float)s0;
1244 }
1245 }
1246 }
1247
1248
1249 static void
icvFilterColSymm_32s8u(const int ** src,uchar * dst,int dst_step,int count,void * params)1250 icvFilterColSymm_32s8u( const int** src, uchar* dst, int dst_step, int count, void* params )
1251 {
1252 const CvSepFilter* state = (const CvSepFilter*)params;
1253 const CvMat* _ky = state->get_y_kernel();
1254 const int* ky = _ky->data.i;
1255 int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2;
1256 int i, k, width = state->get_width();
1257 int cn = CV_MAT_CN(state->get_src_type());
1258
1259 width *= cn;
1260 src += ksize2;
1261 ky += ksize2;
1262
1263 for( ; count--; dst += dst_step, src++ )
1264 {
1265 if( ksize == 3 )
1266 {
1267 const int* sptr0 = src[-1], *sptr1 = src[0], *sptr2 = src[1];
1268 int k0 = ky[0], k1 = ky[1];
1269 for( i = 0; i <= width - 2; i += 2 )
1270 {
1271 int s0 = sptr1[i]*k0 + (sptr0[i] + sptr2[i])*k1;
1272 int s1 = sptr1[i+1]*k0 + (sptr0[i+1] + sptr2[i+1])*k1;
1273 s0 = CV_DESCALE(s0, FILTER_BITS*2);
1274 s1 = CV_DESCALE(s1, FILTER_BITS*2);
1275 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1;
1276 }
1277 }
1278 else if( ksize == 5 )
1279 {
1280 const int* sptr0 = src[-2], *sptr1 = src[-1],
1281 *sptr2 = src[0], *sptr3 = src[1], *sptr4 = src[2];
1282 int k0 = ky[0], k1 = ky[1], k2 = ky[2];
1283 for( i = 0; i <= width - 2; i += 2 )
1284 {
1285 int s0 = sptr2[i]*k0 + (sptr1[i] + sptr3[i])*k1 + (sptr0[i] + sptr4[i])*k2;
1286 int s1 = sptr2[i+1]*k0 + (sptr1[i+1] + sptr3[i+1])*k1 + (sptr0[i+1] + sptr4[i+1])*k2;
1287 s0 = CV_DESCALE(s0, FILTER_BITS*2);
1288 s1 = CV_DESCALE(s1, FILTER_BITS*2);
1289 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1;
1290 }
1291 }
1292 else
1293 for( i = 0; i <= width - 4; i += 4 )
1294 {
1295 int f = ky[0];
1296 const int* sptr = src[0] + i, *sptr2;
1297 int s0 = f*sptr[0], s1 = f*sptr[1], s2 = f*sptr[2], s3 = f*sptr[3];
1298 for( k = 1; k <= ksize2; k++ )
1299 {
1300 sptr = src[k] + i;
1301 sptr2 = src[-k] + i;
1302 f = ky[k];
1303 s0 += f*(sptr[0] + sptr2[0]);
1304 s1 += f*(sptr[1] + sptr2[1]);
1305 s2 += f*(sptr[2] + sptr2[2]);
1306 s3 += f*(sptr[3] + sptr2[3]);
1307 }
1308
1309 s0 = CV_DESCALE(s0, FILTER_BITS*2);
1310 s1 = CV_DESCALE(s1, FILTER_BITS*2);
1311 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1;
1312 s2 = CV_DESCALE(s2, FILTER_BITS*2);
1313 s3 = CV_DESCALE(s3, FILTER_BITS*2);
1314 dst[i+2] = (uchar)s2; dst[i+3] = (uchar)s3;
1315 }
1316
1317 for( ; i < width; i++ )
1318 {
1319 int s0 = ky[0]*src[0][i];
1320 for( k = 1; k <= ksize2; k++ )
1321 s0 += ky[k]*(src[k][i] + src[-k][i]);
1322
1323 s0 = CV_DESCALE(s0, FILTER_BITS*2);
1324 dst[i] = (uchar)s0;
1325 }
1326 }
1327 }
1328
1329
1330 static void
icvFilterColSymm_32s16s(const int ** src,short * dst,int dst_step,int count,void * params)1331 icvFilterColSymm_32s16s( const int** src, short* dst,
1332 int dst_step, int count, void* params )
1333 {
1334 const CvSepFilter* state = (const CvSepFilter*)params;
1335 const CvMat* _ky = state->get_y_kernel();
1336 const int* ky = (const int*)_ky->data.ptr;
1337 int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2;
1338 int i = 0, k, width = state->get_width();
1339 int cn = CV_MAT_CN(state->get_src_type());
1340 int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL;
1341 int is_1_2_1 = is_symm && ksize == 3 && ky[1] == 2 && ky[2] == 1;
1342 int is_3_10_3 = is_symm && ksize == 3 && ky[1] == 10 && ky[2] == 3;
1343 int is_m1_0_1 = !is_symm && ksize == 3 && ky[1] == 0 &&
1344 ky[2]*ky[2] == 1 ? (ky[2] > 0 ? 1 : -1) : 0;
1345
1346 width *= cn;
1347 src += ksize2;
1348 ky += ksize2;
1349 dst_step /= sizeof(dst[0]);
1350
1351 if( is_symm )
1352 {
1353 for( ; count--; dst += dst_step, src++ )
1354 {
1355 if( is_1_2_1 )
1356 {
1357 const int *src0 = src[-1], *src1 = src[0], *src2 = src[1];
1358
1359 for( i = 0; i <= width - 2; i += 2 )
1360 {
1361 int s0 = src0[i] + src1[i]*2 + src2[i],
1362 s1 = src0[i+1] + src1[i+1]*2 + src2[i+1];
1363
1364 dst[i] = (short)s0; dst[i+1] = (short)s1;
1365 }
1366 }
1367 else if( is_3_10_3 )
1368 {
1369 const int *src0 = src[-1], *src1 = src[0], *src2 = src[1];
1370
1371 for( i = 0; i <= width - 2; i += 2 )
1372 {
1373 int s0 = src1[i]*10 + (src0[i] + src2[i])*3,
1374 s1 = src1[i+1]*10 + (src0[i+1] + src2[i+1])*3;
1375
1376 dst[i] = (short)s0; dst[i+1] = (short)s1;
1377 }
1378 }
1379 else
1380 for( i = 0; i <= width - 4; i += 4 )
1381 {
1382 int f = ky[0];
1383 const int* sptr = src[0] + i, *sptr2;
1384 int s0 = f*sptr[0], s1 = f*sptr[1],
1385 s2 = f*sptr[2], s3 = f*sptr[3];
1386 for( k = 1; k <= ksize2; k++ )
1387 {
1388 sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k];
1389 s0 += f*(sptr[0] + sptr2[0]); s1 += f*(sptr[1] + sptr2[1]);
1390 s2 += f*(sptr[2] + sptr2[2]); s3 += f*(sptr[3] + sptr2[3]);
1391 }
1392
1393 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
1394 dst[i+2] = CV_CAST_16S(s2); dst[i+3] = CV_CAST_16S(s3);
1395 }
1396
1397 for( ; i < width; i++ )
1398 {
1399 int s0 = ky[0]*src[0][i];
1400 for( k = 1; k <= ksize2; k++ )
1401 s0 += ky[k]*(src[k][i] + src[-k][i]);
1402 dst[i] = CV_CAST_16S(s0);
1403 }
1404 }
1405 }
1406 else
1407 {
1408 for( ; count--; dst += dst_step, src++ )
1409 {
1410 if( is_m1_0_1 )
1411 {
1412 const int *src0 = src[-is_m1_0_1], *src2 = src[is_m1_0_1];
1413
1414 for( i = 0; i <= width - 2; i += 2 )
1415 {
1416 int s0 = src2[i] - src0[i], s1 = src2[i+1] - src0[i+1];
1417 dst[i] = (short)s0; dst[i+1] = (short)s1;
1418 }
1419 }
1420 else
1421 for( i = 0; i <= width - 4; i += 4 )
1422 {
1423 int f = ky[0];
1424 const int* sptr = src[0] + i, *sptr2;
1425 int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
1426 for( k = 1; k <= ksize2; k++ )
1427 {
1428 sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k];
1429 s0 += f*(sptr[0] - sptr2[0]); s1 += f*(sptr[1] - sptr2[1]);
1430 s2 += f*(sptr[2] - sptr2[2]); s3 += f*(sptr[3] - sptr2[3]);
1431 }
1432
1433 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
1434 dst[i+2] = CV_CAST_16S(s2); dst[i+3] = CV_CAST_16S(s3);
1435 }
1436
1437 for( ; i < width; i++ )
1438 {
1439 int s0 = ky[0]*src[0][i];
1440 for( k = 1; k <= ksize2; k++ )
1441 s0 += ky[k]*(src[k][i] - src[-k][i]);
1442 dst[i] = CV_CAST_16S(s0);
1443 }
1444 }
1445 }
1446 }
1447
1448
1449 #define ICV_FILTER_COL( flavor, srctype, dsttype, worktype, \
1450 cast_macro1, cast_macro2 ) \
1451 static void \
1452 icvFilterCol_##flavor( const srctype** src, dsttype* dst, \
1453 int dst_step, int count, void* params ) \
1454 { \
1455 const CvSepFilter* state = (const CvSepFilter*)params; \
1456 const CvMat* _ky = state->get_y_kernel(); \
1457 const srctype* ky = (const srctype*)_ky->data.ptr; \
1458 int ksize = _ky->cols + _ky->rows - 1; \
1459 int i, k, width = state->get_width(); \
1460 int cn = CV_MAT_CN(state->get_src_type()); \
1461 \
1462 width *= cn; \
1463 dst_step /= sizeof(dst[0]); \
1464 \
1465 for( ; count--; dst += dst_step, src++ ) \
1466 { \
1467 for( i = 0; i <= width - 4; i += 4 ) \
1468 { \
1469 double f = ky[0]; \
1470 const srctype* sptr = src[0] + i; \
1471 double s0 = f*sptr[0], s1 = f*sptr[1], \
1472 s2 = f*sptr[2], s3 = f*sptr[3]; \
1473 worktype t0, t1; \
1474 for( k = 1; k < ksize; k++ ) \
1475 { \
1476 sptr = src[k] + i; f = ky[k]; \
1477 s0 += f*sptr[0]; s1 += f*sptr[1]; \
1478 s2 += f*sptr[2]; s3 += f*sptr[3]; \
1479 } \
1480 \
1481 t0 = cast_macro1(s0); t1 = cast_macro1(s1); \
1482 dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1); \
1483 t0 = cast_macro1(s2); t1 = cast_macro1(s3); \
1484 dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \
1485 } \
1486 \
1487 for( ; i < width; i++ ) \
1488 { \
1489 double s0 = (double)ky[0]*src[0][i]; \
1490 worktype t0; \
1491 for( k = 1; k < ksize; k++ ) \
1492 s0 += (double)ky[k]*src[k][i]; \
1493 t0 = cast_macro1(s0); \
1494 dst[i] = cast_macro2(t0); \
1495 } \
1496 } \
1497 }
1498
1499
1500 ICV_FILTER_COL( 32f8u, float, uchar, int, cvRound, CV_CAST_8U )
1501 ICV_FILTER_COL( 32f16s, float, short, int, cvRound, CV_CAST_16S )
1502 ICV_FILTER_COL( 32f16u, float, ushort, int, cvRound, CV_CAST_16U )
1503
1504 #define ICV_FILTER_COL_SYMM( flavor, srctype, dsttype, worktype, \
1505 cast_macro1, cast_macro2 ) \
1506 static void \
1507 icvFilterColSymm_##flavor( const srctype** src, dsttype* dst, \
1508 int dst_step, int count, void* params ) \
1509 { \
1510 const CvSepFilter* state = (const CvSepFilter*)params; \
1511 const CvMat* _ky = state->get_y_kernel(); \
1512 const srctype* ky = (const srctype*)_ky->data.ptr; \
1513 int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2; \
1514 int i, k, width = state->get_width(); \
1515 int cn = CV_MAT_CN(state->get_src_type()); \
1516 int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL;\
1517 \
1518 width *= cn; \
1519 src += ksize2; \
1520 ky += ksize2; \
1521 dst_step /= sizeof(dst[0]); \
1522 \
1523 if( is_symm ) \
1524 { \
1525 for( ; count--; dst += dst_step, src++ ) \
1526 { \
1527 for( i = 0; i <= width - 4; i += 4 ) \
1528 { \
1529 double f = ky[0]; \
1530 const srctype* sptr = src[0] + i, *sptr2; \
1531 double s0 = f*sptr[0], s1 = f*sptr[1], \
1532 s2 = f*sptr[2], s3 = f*sptr[3]; \
1533 worktype t0, t1; \
1534 for( k = 1; k <= ksize2; k++ ) \
1535 { \
1536 sptr = src[k] + i; \
1537 sptr2 = src[-k] + i; \
1538 f = ky[k]; \
1539 s0 += f*(sptr[0] + sptr2[0]); \
1540 s1 += f*(sptr[1] + sptr2[1]); \
1541 s2 += f*(sptr[2] + sptr2[2]); \
1542 s3 += f*(sptr[3] + sptr2[3]); \
1543 } \
1544 \
1545 t0 = cast_macro1(s0); t1 = cast_macro1(s1); \
1546 dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1); \
1547 t0 = cast_macro1(s2); t1 = cast_macro1(s3); \
1548 dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \
1549 } \
1550 \
1551 for( ; i < width; i++ ) \
1552 { \
1553 double s0 = (double)ky[0]*src[0][i]; \
1554 worktype t0; \
1555 for( k = 1; k <= ksize2; k++ ) \
1556 s0 += (double)ky[k]*(src[k][i] + src[-k][i]); \
1557 t0 = cast_macro1(s0); \
1558 dst[i] = cast_macro2(t0); \
1559 } \
1560 } \
1561 } \
1562 else \
1563 { \
1564 for( ; count--; dst += dst_step, src++ ) \
1565 { \
1566 for( i = 0; i <= width - 4; i += 4 ) \
1567 { \
1568 double f = ky[0]; \
1569 const srctype* sptr = src[0] + i, *sptr2; \
1570 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; \
1571 worktype t0, t1; \
1572 for( k = 1; k <= ksize2; k++ ) \
1573 { \
1574 sptr = src[k] + i; \
1575 sptr2 = src[-k] + i; \
1576 f = ky[k]; \
1577 s0 += f*(sptr[0] - sptr2[0]); \
1578 s1 += f*(sptr[1] - sptr2[1]); \
1579 s2 += f*(sptr[2] - sptr2[2]); \
1580 s3 += f*(sptr[3] - sptr2[3]); \
1581 } \
1582 \
1583 t0 = cast_macro1(s0); t1 = cast_macro1(s1); \
1584 dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1); \
1585 t0 = cast_macro1(s2); t1 = cast_macro1(s3); \
1586 dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \
1587 } \
1588 \
1589 for( ; i < width; i++ ) \
1590 { \
1591 double s0 = (double)ky[0]*src[0][i]; \
1592 worktype t0; \
1593 for( k = 1; k <= ksize2; k++ ) \
1594 s0 += (double)ky[k]*(src[k][i] - src[-k][i]); \
1595 t0 = cast_macro1(s0); \
1596 dst[i] = cast_macro2(t0); \
1597 } \
1598 } \
1599 } \
1600 }
1601
1602
1603 ICV_FILTER_COL_SYMM( 32f8u, float, uchar, int, cvRound, CV_CAST_8U )
1604 ICV_FILTER_COL_SYMM( 32f16s, float, short, int, cvRound, CV_CAST_16S )
1605 ICV_FILTER_COL_SYMM( 32f16u, float, ushort, int, cvRound, CV_CAST_16U )
1606
1607
1608 static void
icvFilterCol_32f(const float ** src,float * dst,int dst_step,int count,void * params)1609 icvFilterCol_32f( const float** src, float* dst,
1610 int dst_step, int count, void* params )
1611 {
1612 const CvSepFilter* state = (const CvSepFilter*)params;
1613 const CvMat* _ky = state->get_y_kernel();
1614 const float* ky = (const float*)_ky->data.ptr;
1615 int ksize = _ky->cols + _ky->rows - 1;
1616 int i, k, width = state->get_width();
1617 int cn = CV_MAT_CN(state->get_src_type());
1618
1619 width *= cn;
1620 dst_step /= sizeof(dst[0]);
1621
1622 for( ; count--; dst += dst_step, src++ )
1623 {
1624 for( i = 0; i <= width - 4; i += 4 )
1625 {
1626 double f = ky[0];
1627 const float* sptr = src[0] + i;
1628 double s0 = f*sptr[0], s1 = f*sptr[1],
1629 s2 = f*sptr[2], s3 = f*sptr[3];
1630 for( k = 1; k < ksize; k++ )
1631 {
1632 sptr = src[k] + i; f = ky[k];
1633 s0 += f*sptr[0]; s1 += f*sptr[1];
1634 s2 += f*sptr[2]; s3 += f*sptr[3];
1635 }
1636
1637 dst[i] = (float)s0; dst[i+1] = (float)s1;
1638 dst[i+2] = (float)s2; dst[i+3] = (float)s3;
1639 }
1640
1641 for( ; i < width; i++ )
1642 {
1643 double s0 = (double)ky[0]*src[0][i];
1644 for( k = 1; k < ksize; k++ )
1645 s0 += (double)ky[k]*src[k][i];
1646 dst[i] = (float)s0;
1647 }
1648 }
1649 }
1650
1651
1652 static void
icvFilterColSymm_32f(const float ** src,float * dst,int dst_step,int count,void * params)1653 icvFilterColSymm_32f( const float** src, float* dst,
1654 int dst_step, int count, void* params )
1655 {
1656 const CvSepFilter* state = (const CvSepFilter*)params;
1657 const CvMat* _ky = state->get_y_kernel();
1658 const float* ky = (const float*)_ky->data.ptr;
1659 int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2;
1660 int i = 0, k, width = state->get_width();
1661 int cn = CV_MAT_CN(state->get_src_type());
1662 int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL;
1663 int is_1_2_1 = is_symm && ksize == 3 &&
1664 fabs(ky[1] - 2.) <= FLT_EPSILON && fabs(ky[2] - 1.) <= FLT_EPSILON;
1665 int is_3_10_3 = is_symm && ksize == 3 &&
1666 fabs(ky[1] - 10.) <= FLT_EPSILON && fabs(ky[2] - 3.) <= FLT_EPSILON;
1667 int is_m1_0_1 = !is_symm && ksize == 3 &&
1668 fabs(ky[1]) <= FLT_EPSILON && fabs(ky[2]*ky[2] - 1.) <= FLT_EPSILON ?
1669 (ky[2] > 0 ? 1 : -1) : 0;
1670
1671 width *= cn;
1672 src += ksize2;
1673 ky += ksize2;
1674 dst_step /= sizeof(dst[0]);
1675
1676 if( is_symm )
1677 {
1678 for( ; count--; dst += dst_step, src++ )
1679 {
1680 if( is_1_2_1 )
1681 {
1682 const float *src0 = src[-1], *src1 = src[0], *src2 = src[1];
1683
1684 for( i = 0; i <= width - 4; i += 4 )
1685 {
1686 float s0 = src0[i] + src1[i]*2 + src2[i],
1687 s1 = src0[i+1] + src1[i+1]*2 + src2[i+1],
1688 s2 = src0[i+2] + src1[i+2]*2 + src2[i+2],
1689 s3 = src0[i+3] + src1[i+3]*2 + src2[i+3];
1690
1691 dst[i] = s0; dst[i+1] = s1;
1692 dst[i+2] = s2; dst[i+3] = s3;
1693 }
1694 }
1695 else if( is_3_10_3 )
1696 {
1697 const float *src0 = src[-1], *src1 = src[0], *src2 = src[1];
1698
1699 for( i = 0; i <= width - 4; i += 4 )
1700 {
1701 float s0 = src1[i]*10 + (src0[i] + src2[i])*3,
1702 s1 = src1[i+1]*10 + (src0[i+1] + src2[i+1])*3,
1703 s2 = src1[i+2]*10 + (src0[i+2] + src2[i+2])*3,
1704 s3 = src1[i+3]*10 + (src0[i+3] + src2[i+3])*3;
1705
1706 dst[i] = s0; dst[i+1] = s1;
1707 dst[i+2] = s2; dst[i+3] = s3;
1708 }
1709 }
1710 else
1711 for( i = 0; i <= width - 4; i += 4 )
1712 {
1713 double f = ky[0];
1714 const float* sptr = src[0] + i, *sptr2;
1715 double s0 = f*sptr[0], s1 = f*sptr[1],
1716 s2 = f*sptr[2], s3 = f*sptr[3];
1717 for( k = 1; k <= ksize2; k++ )
1718 {
1719 sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k];
1720 s0 += f*(sptr[0] + sptr2[0]); s1 += f*(sptr[1] + sptr2[1]);
1721 s2 += f*(sptr[2] + sptr2[2]); s3 += f*(sptr[3] + sptr2[3]);
1722 }
1723
1724 dst[i] = (float)s0; dst[i+1] = (float)s1;
1725 dst[i+2] = (float)s2; dst[i+3] = (float)s3;
1726 }
1727
1728 for( ; i < width; i++ )
1729 {
1730 double s0 = (double)ky[0]*src[0][i];
1731 for( k = 1; k <= ksize2; k++ )
1732 s0 += (double)ky[k]*(src[k][i] + src[-k][i]);
1733 dst[i] = (float)s0;
1734 }
1735 }
1736 }
1737 else
1738 {
1739 for( ; count--; dst += dst_step, src++ )
1740 {
1741 if( is_m1_0_1 )
1742 {
1743 const float *src0 = src[-is_m1_0_1], *src2 = src[is_m1_0_1];
1744
1745 for( i = 0; i <= width - 4; i += 4 )
1746 {
1747 float s0 = src2[i] - src0[i], s1 = src2[i+1] - src0[i+1],
1748 s2 = src2[i+2] - src0[i+2], s3 = src2[i+3] - src0[i+3];
1749 dst[i] = s0; dst[i+1] = s1;
1750 dst[i+2] = s2; dst[i+3] = s3;
1751 }
1752 }
1753 else
1754 for( i = 0; i <= width - 4; i += 4 )
1755 {
1756 double f = ky[0];
1757 const float* sptr = src[0] + i, *sptr2;
1758 double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
1759 for( k = 1; k <= ksize2; k++ )
1760 {
1761 sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k];
1762 s0 += f*(sptr[0] - sptr2[0]); s1 += f*(sptr[1] - sptr2[1]);
1763 s2 += f*(sptr[2] - sptr2[2]); s3 += f*(sptr[3] - sptr2[3]);
1764 }
1765
1766 dst[i] = (float)s0; dst[i+1] = (float)s1;
1767 dst[i+2] = (float)s2; dst[i+3] = (float)s3;
1768 }
1769
1770 for( ; i < width; i++ )
1771 {
1772 double s0 = (double)ky[0]*src[0][i];
1773 for( k = 1; k <= ksize2; k++ )
1774 s0 += (double)ky[k]*(src[k][i] - src[-k][i]);
1775 dst[i] = (float)s0;
1776 }
1777 }
1778 }
1779 }
1780
1781
1782 #define SMALL_GAUSSIAN_SIZE 7
1783
init_gaussian_kernel(CvMat * kernel,double sigma)1784 void CvSepFilter::init_gaussian_kernel( CvMat* kernel, double sigma )
1785 {
1786 static const float small_gaussian_tab[][SMALL_GAUSSIAN_SIZE/2+1] =
1787 {
1788 {1.f},
1789 {0.5f, 0.25f},
1790 {0.375f, 0.25f, 0.0625f},
1791 {0.28125f, 0.21875f, 0.109375f, 0.03125f}
1792 };
1793
1794 CV_FUNCNAME( "CvSepFilter::init_gaussian_kernel" );
1795
1796 __BEGIN__;
1797
1798 int type, i, n, step;
1799 const float* fixed_kernel = 0;
1800 double sigmaX, scale2X, sum;
1801 float* cf;
1802 double* cd;
1803
1804 if( !CV_IS_MAT(kernel) )
1805 CV_ERROR( CV_StsBadArg, "kernel is not a valid matrix" );
1806
1807 type = CV_MAT_TYPE(kernel->type);
1808
1809 if( (kernel->cols != 1 && kernel->rows != 1) ||
1810 (kernel->cols + kernel->rows - 1) % 2 == 0 ||
1811 (type != CV_32FC1 && type != CV_64FC1) )
1812 CV_ERROR( CV_StsBadSize, "kernel should be 1D floating-point vector of odd (2*k+1) size" );
1813
1814 n = kernel->cols + kernel->rows - 1;
1815
1816 if( n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 )
1817 fixed_kernel = small_gaussian_tab[n>>1];
1818
1819 sigmaX = sigma > 0 ? sigma : (n/2 - 1)*0.3 + 0.8;
1820 scale2X = -0.5/(sigmaX*sigmaX);
1821 step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type);
1822 cf = kernel->data.fl;
1823 cd = kernel->data.db;
1824
1825 sum = fixed_kernel ? -fixed_kernel[0] : -1.;
1826
1827 for( i = 0; i <= n/2; i++ )
1828 {
1829 double t = fixed_kernel ? (double)fixed_kernel[i] : exp(scale2X*i*i);
1830 if( type == CV_32FC1 )
1831 {
1832 cf[(n/2+i)*step] = (float)t;
1833 sum += cf[(n/2+i)*step]*2;
1834 }
1835 else
1836 {
1837 cd[(n/2+i)*step] = t;
1838 sum += cd[(n/2+i)*step]*2;
1839 }
1840 }
1841
1842 sum = 1./sum;
1843 for( i = 0; i <= n/2; i++ )
1844 {
1845 if( type == CV_32FC1 )
1846 cf[(n/2+i)*step] = cf[(n/2-i)*step] = (float)(cf[(n/2+i)*step]*sum);
1847 else
1848 cd[(n/2+i)*step] = cd[(n/2-i)*step] = cd[(n/2+i)*step]*sum;
1849 }
1850
1851 __END__;
1852 }
1853
1854
init_sobel_kernel(CvMat * _kx,CvMat * _ky,int dx,int dy,int flags)1855 void CvSepFilter::init_sobel_kernel( CvMat* _kx, CvMat* _ky, int dx, int dy, int flags )
1856 {
1857 CV_FUNCNAME( "CvSepFilter::init_sobel_kernel" );
1858
1859 __BEGIN__;
1860
1861 int i, j, k, msz;
1862 int* kerI;
1863 bool normalize = (flags & NORMALIZE_KERNEL) != 0;
1864 bool flip = (flags & FLIP_KERNEL) != 0;
1865
1866 if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) )
1867 CV_ERROR( CV_StsBadArg, "One of the kernel matrices is not valid" );
1868
1869 msz = MAX( _kx->cols + _kx->rows, _ky->cols + _ky->rows );
1870 if( msz > 32 )
1871 CV_ERROR( CV_StsOutOfRange, "Too large kernel size" );
1872
1873 kerI = (int*)cvStackAlloc( msz*sizeof(kerI[0]) );
1874
1875 if( dx < 0 || dy < 0 || dx+dy <= 0 )
1876 CV_ERROR( CV_StsOutOfRange,
1877 "Both derivative orders (dx and dy) must be non-negative "
1878 "and at least one of them must be positive." );
1879
1880 for( k = 0; k < 2; k++ )
1881 {
1882 CvMat* kernel = k == 0 ? _kx : _ky;
1883 int order = k == 0 ? dx : dy;
1884 int n = kernel->cols + kernel->rows - 1, step;
1885 int type = CV_MAT_TYPE(kernel->type);
1886 double scale = !normalize ? 1. : 1./(1 << (n-order-1));
1887 int iscale = 1;
1888
1889 if( (kernel->cols != 1 && kernel->rows != 1) ||
1890 (kernel->cols + kernel->rows - 1) % 2 == 0 ||
1891 (type != CV_32FC1 && type != CV_64FC1 && type != CV_32SC1) )
1892 CV_ERROR( CV_StsOutOfRange,
1893 "Both kernels must be 1D floating-point or integer vectors of odd (2*k+1) size." );
1894
1895 if( normalize && n > 1 && type == CV_32SC1 )
1896 CV_ERROR( CV_StsBadArg, "Integer kernel can not be normalized" );
1897
1898 if( n <= order )
1899 CV_ERROR( CV_StsOutOfRange,
1900 "Derivative order must be smaller than the corresponding kernel size" );
1901
1902 if( n == 1 )
1903 kerI[0] = 1;
1904 else if( n == 3 )
1905 {
1906 if( order == 0 )
1907 kerI[0] = 1, kerI[1] = 2, kerI[2] = 1;
1908 else if( order == 1 )
1909 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
1910 else
1911 kerI[0] = 1, kerI[1] = -2, kerI[2] = 1;
1912 }
1913 else
1914 {
1915 int oldval, newval;
1916 kerI[0] = 1;
1917 for( i = 0; i < n; i++ )
1918 kerI[i+1] = 0;
1919
1920 for( i = 0; i < n - order - 1; i++ )
1921 {
1922 oldval = kerI[0];
1923 for( j = 1; j <= n; j++ )
1924 {
1925 newval = kerI[j]+kerI[j-1];
1926 kerI[j-1] = oldval;
1927 oldval = newval;
1928 }
1929 }
1930
1931 for( i = 0; i < order; i++ )
1932 {
1933 oldval = -kerI[0];
1934 for( j = 1; j <= n; j++ )
1935 {
1936 newval = kerI[j-1] - kerI[j];
1937 kerI[j-1] = oldval;
1938 oldval = newval;
1939 }
1940 }
1941 }
1942
1943 step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type);
1944 if( flip && (order & 1) && k )
1945 iscale = -iscale, scale = -scale;
1946
1947 for( i = 0; i < n; i++ )
1948 {
1949 if( type == CV_32SC1 )
1950 kernel->data.i[i*step] = kerI[i]*iscale;
1951 else if( type == CV_32FC1 )
1952 kernel->data.fl[i*step] = (float)(kerI[i]*scale);
1953 else
1954 kernel->data.db[i*step] = kerI[i]*scale;
1955 }
1956 }
1957
1958 __END__;
1959 }
1960
1961
init_scharr_kernel(CvMat * _kx,CvMat * _ky,int dx,int dy,int flags)1962 void CvSepFilter::init_scharr_kernel( CvMat* _kx, CvMat* _ky, int dx, int dy, int flags )
1963 {
1964 CV_FUNCNAME( "CvSepFilter::init_scharr_kernel" );
1965
1966 __BEGIN__;
1967
1968 int i, k;
1969 int kerI[3];
1970 bool normalize = (flags & NORMALIZE_KERNEL) != 0;
1971 bool flip = (flags & FLIP_KERNEL) != 0;
1972
1973 if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) )
1974 CV_ERROR( CV_StsBadArg, "One of the kernel matrices is not valid" );
1975
1976 if( ((dx|dy)&~1) || dx+dy != 1 )
1977 CV_ERROR( CV_StsOutOfRange,
1978 "Scharr kernel can only be used for 1st order derivatives" );
1979
1980 for( k = 0; k < 2; k++ )
1981 {
1982 CvMat* kernel = k == 0 ? _kx : _ky;
1983 int order = k == 0 ? dx : dy;
1984 int n = kernel->cols + kernel->rows - 1, step;
1985 int type = CV_MAT_TYPE(kernel->type);
1986 double scale = !normalize ? 1. : order == 0 ? 1./16 : 1./2;
1987 int iscale = 1;
1988
1989 if( (kernel->cols != 1 && kernel->rows != 1) ||
1990 kernel->cols + kernel->rows - 1 != 3 ||
1991 (type != CV_32FC1 && type != CV_64FC1 && type != CV_32SC1) )
1992 CV_ERROR( CV_StsOutOfRange,
1993 "Both kernels must be 1D floating-point or integer vectors containing 3 elements each." );
1994
1995 if( normalize && type == CV_32SC1 )
1996 CV_ERROR( CV_StsBadArg, "Integer kernel can not be normalized" );
1997
1998 if( order == 0 )
1999 kerI[0] = 3, kerI[1] = 10, kerI[2] = 3;
2000 else
2001 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
2002
2003 step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type);
2004 if( flip && (order & 1) && k )
2005 iscale = -iscale, scale = -scale;
2006
2007 for( i = 0; i < n; i++ )
2008 {
2009 if( type == CV_32SC1 )
2010 kernel->data.i[i*step] = kerI[i]*iscale;
2011 else if( type == CV_32FC1 )
2012 kernel->data.fl[i*step] = (float)(kerI[i]*scale);
2013 else
2014 kernel->data.db[i*step] = kerI[i]*scale;
2015 }
2016 }
2017
2018 __END__;
2019 }
2020
2021
init_deriv(int _max_width,int _src_type,int _dst_type,int dx,int dy,int aperture_size,int flags)2022 void CvSepFilter::init_deriv( int _max_width, int _src_type, int _dst_type,
2023 int dx, int dy, int aperture_size, int flags )
2024 {
2025 CV_FUNCNAME( "CvSepFilter::init_deriv" );
2026
2027 __BEGIN__;
2028
2029 int kx_size = aperture_size == CV_SCHARR ? 3 : aperture_size, ky_size = kx_size;
2030 float kx_data[CV_MAX_SOBEL_KSIZE], ky_data[CV_MAX_SOBEL_KSIZE];
2031 CvMat _kx, _ky;
2032
2033 if( kx_size <= 0 || ky_size > CV_MAX_SOBEL_KSIZE )
2034 CV_ERROR( CV_StsOutOfRange, "Incorrect aperture_size" );
2035
2036 if( kx_size == 1 && dx )
2037 kx_size = 3;
2038 if( ky_size == 1 && dy )
2039 ky_size = 3;
2040
2041 _kx = cvMat( 1, kx_size, CV_32FC1, kx_data );
2042 _ky = cvMat( 1, ky_size, CV_32FC1, ky_data );
2043
2044 if( aperture_size == CV_SCHARR )
2045 {
2046 CV_CALL( init_scharr_kernel( &_kx, &_ky, dx, dy, flags ));
2047 }
2048 else
2049 {
2050 CV_CALL( init_sobel_kernel( &_kx, &_ky, dx, dy, flags ));
2051 }
2052
2053 CV_CALL( init( _max_width, _src_type, _dst_type, &_kx, &_ky ));
2054
2055 __END__;
2056 }
2057
2058
init_gaussian(int _max_width,int _src_type,int _dst_type,int gaussian_size,double sigma)2059 void CvSepFilter::init_gaussian( int _max_width, int _src_type, int _dst_type,
2060 int gaussian_size, double sigma )
2061 {
2062 float* kdata = 0;
2063
2064 CV_FUNCNAME( "CvSepFilter::init_gaussian" );
2065
2066 __BEGIN__;
2067
2068 CvMat _kernel;
2069
2070 if( gaussian_size <= 0 || gaussian_size > 1024 )
2071 CV_ERROR( CV_StsBadSize, "Incorrect size of gaussian kernel" );
2072
2073 kdata = (float*)cvStackAlloc(gaussian_size*sizeof(kdata[0]));
2074 _kernel = cvMat( 1, gaussian_size, CV_32F, kdata );
2075
2076 CV_CALL( init_gaussian_kernel( &_kernel, sigma ));
2077 CV_CALL( init( _max_width, _src_type, _dst_type, &_kernel, &_kernel ));
2078
2079 __END__;
2080 }
2081
2082
2083 /****************************************************************************************\
2084 Non-separable Linear Filter
2085 \****************************************************************************************/
2086
2087 static void icvLinearFilter_8u( const uchar** src, uchar* dst, int dst_step,
2088 int count, void* params );
2089 static void icvLinearFilter_16s( const short** src, short* dst, int dst_step,
2090 int count, void* params );
2091 static void icvLinearFilter_16u( const ushort** src, ushort* dst, int dst_step,
2092 int count, void* params );
2093 static void icvLinearFilter_32f( const float** src, float* dst, int dst_step,
2094 int count, void* params );
2095
CvLinearFilter()2096 CvLinearFilter::CvLinearFilter()
2097 {
2098 kernel = 0;
2099 k_sparse = 0;
2100 }
2101
CvLinearFilter(int _max_width,int _src_type,int _dst_type,const CvMat * _kernel,CvPoint _anchor,int _border_mode,CvScalar _border_value)2102 CvLinearFilter::CvLinearFilter( int _max_width, int _src_type, int _dst_type,
2103 const CvMat* _kernel, CvPoint _anchor,
2104 int _border_mode, CvScalar _border_value )
2105 {
2106 kernel = 0;
2107 k_sparse = 0;
2108 init( _max_width, _src_type, _dst_type, _kernel,
2109 _anchor, _border_mode, _border_value );
2110 }
2111
2112
clear()2113 void CvLinearFilter::clear()
2114 {
2115 cvReleaseMat( &kernel );
2116 cvFree( &k_sparse );
2117 CvBaseImageFilter::clear();
2118 }
2119
2120
~CvLinearFilter()2121 CvLinearFilter::~CvLinearFilter()
2122 {
2123 clear();
2124 }
2125
2126
init(int _max_width,int _src_type,int _dst_type,const CvMat * _kernel,CvPoint _anchor,int _border_mode,CvScalar _border_value)2127 void CvLinearFilter::init( int _max_width, int _src_type, int _dst_type,
2128 const CvMat* _kernel, CvPoint _anchor,
2129 int _border_mode, CvScalar _border_value )
2130 {
2131 CV_FUNCNAME( "CvLinearFilter::init" );
2132
2133 __BEGIN__;
2134
2135 int depth = CV_MAT_DEPTH(_src_type);
2136 int cn = CV_MAT_CN(_src_type);
2137 CvPoint* nz_loc;
2138 float* coeffs;
2139 int i, j, k = 0;
2140
2141 if( !CV_IS_MAT(_kernel) )
2142 CV_ERROR( CV_StsBadArg, "kernel is not valid matrix" );
2143
2144 _src_type = CV_MAT_TYPE(_src_type);
2145 _dst_type = CV_MAT_TYPE(_dst_type);
2146
2147 if( _src_type != _dst_type )
2148 CV_ERROR( CV_StsUnmatchedFormats,
2149 "The source and destination image types must be the same" );
2150
2151 CV_CALL( CvBaseImageFilter::init( _max_width, _src_type, _dst_type,
2152 false, cvGetMatSize(_kernel), _anchor, _border_mode, _border_value ));
2153
2154 if( !(kernel && k_sparse && ksize.width == kernel->cols && ksize.height == kernel->rows ))
2155 {
2156 cvReleaseMat( &kernel );
2157 cvFree( &k_sparse );
2158 CV_CALL( kernel = cvCreateMat( ksize.height, ksize.width, CV_32FC1 ));
2159 CV_CALL( k_sparse = (uchar*)cvAlloc(
2160 ksize.width*ksize.height*(2*sizeof(int) + sizeof(uchar*) + sizeof(float))));
2161 }
2162
2163 CV_CALL( cvConvert( _kernel, kernel ));
2164
2165 nz_loc = (CvPoint*)k_sparse;
2166 for( i = 0; i < ksize.height; i++ )
2167 {
2168 for( j = 0; j < ksize.width; j++ )
2169 if( fabs(((float*)(kernel->data.ptr + i*kernel->step))[j])>FLT_EPSILON )
2170 nz_loc[k++] = cvPoint(j,i);
2171 }
2172 if( k == 0 )
2173 nz_loc[k++] = anchor;
2174 k_sparse_count = k;
2175 coeffs = (float*)((uchar**)(nz_loc + k_sparse_count) + k_sparse_count);
2176
2177 for( k = 0; k < k_sparse_count; k++ )
2178 {
2179 coeffs[k] = CV_MAT_ELEM( *kernel, float, nz_loc[k].y, nz_loc[k].x );
2180 nz_loc[k].x *= cn;
2181 }
2182
2183 x_func = 0;
2184 if( depth == CV_8U )
2185 y_func = (CvColumnFilterFunc)icvLinearFilter_8u;
2186 else if( depth == CV_16S )
2187 y_func = (CvColumnFilterFunc)icvLinearFilter_16s;
2188 else if( depth == CV_16U )
2189 y_func = (CvColumnFilterFunc)icvLinearFilter_16u;
2190 else if( depth == CV_32F )
2191 y_func = (CvColumnFilterFunc)icvLinearFilter_32f;
2192 else
2193 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported image type" );
2194
2195 __END__;
2196 }
2197
2198
init(int _max_width,int _src_type,int _dst_type,bool _is_separable,CvSize _ksize,CvPoint _anchor,int _border_mode,CvScalar _border_value)2199 void CvLinearFilter::init( int _max_width, int _src_type, int _dst_type,
2200 bool _is_separable, CvSize _ksize,
2201 CvPoint _anchor, int _border_mode,
2202 CvScalar _border_value )
2203 {
2204 CvBaseImageFilter::init( _max_width, _src_type, _dst_type, _is_separable,
2205 _ksize, _anchor, _border_mode, _border_value );
2206 }
2207
2208
2209 #define ICV_FILTER( flavor, arrtype, worktype, load_macro, \
2210 cast_macro1, cast_macro2 ) \
2211 static void \
2212 icvLinearFilter_##flavor( const arrtype** src, arrtype* dst, \
2213 int dst_step, int count, void* params ) \
2214 { \
2215 CvLinearFilter* state = (CvLinearFilter*)params; \
2216 int width = state->get_width(); \
2217 int cn = CV_MAT_CN(state->get_src_type()); \
2218 int i, k; \
2219 CvPoint* k_sparse = (CvPoint*)state->get_kernel_sparse_buf(); \
2220 int k_count = state->get_kernel_sparse_count(); \
2221 const arrtype** k_ptr = (const arrtype**)(k_sparse + k_count); \
2222 const arrtype** k_end = k_ptr + k_count; \
2223 const float* k_coeffs = (const float*)(k_ptr + k_count); \
2224 \
2225 width *= cn; \
2226 dst_step /= sizeof(dst[0]); \
2227 \
2228 for( ; count > 0; count--, dst += dst_step, src++ ) \
2229 { \
2230 for( k = 0; k < k_count; k++ ) \
2231 k_ptr[k] = src[k_sparse[k].y] + k_sparse[k].x; \
2232 \
2233 for( i = 0; i <= width - 4; i += 4 ) \
2234 { \
2235 const arrtype** kp = k_ptr; \
2236 const float* kc = k_coeffs; \
2237 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; \
2238 worktype t0, t1; \
2239 \
2240 while( kp != k_end ) \
2241 { \
2242 const arrtype* sptr = (*kp++) + i; \
2243 float f = *kc++; \
2244 s0 += f*load_macro(sptr[0]); \
2245 s1 += f*load_macro(sptr[1]); \
2246 s2 += f*load_macro(sptr[2]); \
2247 s3 += f*load_macro(sptr[3]); \
2248 } \
2249 \
2250 t0 = cast_macro1(s0); t1 = cast_macro1(s1); \
2251 dst[i] = cast_macro2(t0); \
2252 dst[i+1] = cast_macro2(t1); \
2253 t0 = cast_macro1(s2); t1 = cast_macro1(s3); \
2254 dst[i+2] = cast_macro2(t0); \
2255 dst[i+3] = cast_macro2(t1); \
2256 } \
2257 \
2258 for( ; i < width; i++ ) \
2259 { \
2260 const arrtype** kp = k_ptr; \
2261 const float* kc = k_coeffs; \
2262 double s0 = 0; \
2263 worktype t0; \
2264 \
2265 while( kp != k_end ) \
2266 { \
2267 const arrtype* sptr = *kp++; \
2268 float f = *kc++; \
2269 s0 += f*load_macro(sptr[i]); \
2270 } \
2271 \
2272 t0 = cast_macro1(s0); \
2273 dst[i] = cast_macro2(t0); \
2274 } \
2275 } \
2276 }
2277
2278
2279 ICV_FILTER( 8u, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U )
2280 ICV_FILTER( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U )
2281 ICV_FILTER( 16s, short, int, CV_NOP, cvRound, CV_CAST_16S )
2282 ICV_FILTER( 32f, float, float, CV_NOP, CV_CAST_32F, CV_NOP )
2283
2284
2285 /////////////////////// common functions for working with IPP filters ////////////////////
2286
icvIPPFilterInit(const CvMat * src,int stripe_size,CvSize ksize)2287 CvMat* icvIPPFilterInit( const CvMat* src, int stripe_size, CvSize ksize )
2288 {
2289 CvSize temp_size;
2290 int pix_size = CV_ELEM_SIZE(src->type);
2291 temp_size.width = cvAlign(src->cols + ksize.width - 1,8/CV_ELEM_SIZE(src->type & CV_MAT_DEPTH_MASK));
2292 //temp_size.width = src->cols + ksize.width - 1;
2293 temp_size.height = (stripe_size*2 + temp_size.width*pix_size) / (temp_size.width*pix_size*2);
2294 temp_size.height = MAX( temp_size.height, ksize.height );
2295 temp_size.height = MIN( temp_size.height, src->rows + ksize.height - 1 );
2296
2297 return cvCreateMat( temp_size.height, temp_size.width, src->type );
2298 }
2299
2300
icvIPPFilterNextStripe(const CvMat * src,CvMat * temp,int y,CvSize ksize,CvPoint anchor)2301 int icvIPPFilterNextStripe( const CvMat* src, CvMat* temp, int y,
2302 CvSize ksize, CvPoint anchor )
2303 {
2304 int pix_size = CV_ELEM_SIZE(src->type);
2305 int src_step = src->step ? src->step : CV_STUB_STEP;
2306 int temp_step = temp->step ? temp->step : CV_STUB_STEP;
2307 int i, dy, src_y1 = 0, src_y2;
2308 int temp_rows;
2309 uchar* temp_ptr = temp->data.ptr;
2310 CvSize stripe_size, temp_size;
2311
2312 dy = MIN( temp->rows - ksize.height + 1, src->rows - y );
2313 if( y > 0 )
2314 {
2315 int temp_ready = ksize.height - 1;
2316
2317 for( i = 0; i < temp_ready; i++ )
2318 memcpy( temp_ptr + temp_step*i, temp_ptr +
2319 temp_step*(temp->rows - temp_ready + i), temp_step );
2320
2321 temp_ptr += temp_ready*temp_step;
2322 temp_rows = dy;
2323 src_y1 = y + temp_ready - anchor.y;
2324 src_y2 = src_y1 + dy;
2325 if( src_y1 >= src->rows )
2326 {
2327 src_y1 = src->rows - 1;
2328 src_y2 = src->rows;
2329 }
2330 }
2331 else
2332 {
2333 temp_rows = dy + ksize.height - 1;
2334 src_y2 = temp_rows - anchor.y;
2335 }
2336
2337 src_y2 = MIN( src_y2, src->rows );
2338
2339 stripe_size = cvSize(src->cols, src_y2 - src_y1);
2340 temp_size = cvSize(temp->cols, temp_rows);
2341 icvCopyReplicateBorder_8u( src->data.ptr + src_y1*src_step, src_step,
2342 stripe_size, temp_ptr, temp_step, temp_size,
2343 (y == 0 ? anchor.y : 0), anchor.x, pix_size );
2344 return dy;
2345 }
2346
2347
2348 /////////////////////////////// IPP separable filter functions ///////////////////////////
2349
2350 icvFilterRow_8u_C1R_t icvFilterRow_8u_C1R_p = 0;
2351 icvFilterRow_8u_C3R_t icvFilterRow_8u_C3R_p = 0;
2352 icvFilterRow_8u_C4R_t icvFilterRow_8u_C4R_p = 0;
2353 icvFilterRow_16s_C1R_t icvFilterRow_16s_C1R_p = 0;
2354 icvFilterRow_16s_C3R_t icvFilterRow_16s_C3R_p = 0;
2355 icvFilterRow_16s_C4R_t icvFilterRow_16s_C4R_p = 0;
2356 icvFilterRow_32f_C1R_t icvFilterRow_32f_C1R_p = 0;
2357 icvFilterRow_32f_C3R_t icvFilterRow_32f_C3R_p = 0;
2358 icvFilterRow_32f_C4R_t icvFilterRow_32f_C4R_p = 0;
2359
2360 icvFilterColumn_8u_C1R_t icvFilterColumn_8u_C1R_p = 0;
2361 icvFilterColumn_8u_C3R_t icvFilterColumn_8u_C3R_p = 0;
2362 icvFilterColumn_8u_C4R_t icvFilterColumn_8u_C4R_p = 0;
2363 icvFilterColumn_16s_C1R_t icvFilterColumn_16s_C1R_p = 0;
2364 icvFilterColumn_16s_C3R_t icvFilterColumn_16s_C3R_p = 0;
2365 icvFilterColumn_16s_C4R_t icvFilterColumn_16s_C4R_p = 0;
2366 icvFilterColumn_32f_C1R_t icvFilterColumn_32f_C1R_p = 0;
2367 icvFilterColumn_32f_C3R_t icvFilterColumn_32f_C3R_p = 0;
2368 icvFilterColumn_32f_C4R_t icvFilterColumn_32f_C4R_p = 0;
2369
2370 //////////////////////////////////////////////////////////////////////////////////////////
2371
2372 typedef CvStatus (CV_STDCALL * CvIPPSepFilterFunc)
2373 ( const void* src, int srcstep, void* dst, int dststep,
2374 CvSize size, const float* kernel, int ksize, int anchor );
2375
icvIPPSepFilter(const CvMat * src,CvMat * dst,const CvMat * kernelX,const CvMat * kernelY,CvPoint anchor)2376 int icvIPPSepFilter( const CvMat* src, CvMat* dst, const CvMat* kernelX,
2377 const CvMat* kernelY, CvPoint anchor )
2378 {
2379 int result = 0;
2380
2381 CvMat* top_bottom = 0;
2382 CvMat* vout_hin = 0;
2383 CvMat* dst_buf = 0;
2384
2385 CV_FUNCNAME( "icvIPPSepFilter" );
2386
2387 __BEGIN__;
2388
2389 CvSize ksize;
2390 CvPoint el_anchor;
2391 CvSize size;
2392 int type, depth, pix_size;
2393 int i, x, y, dy = 0, prev_dy = 0, max_dy;
2394 CvMat vout;
2395 CvIPPSepFilterFunc x_func = 0, y_func = 0;
2396 int src_step, top_bottom_step;
2397 float *kx, *ky;
2398 int align, stripe_size;
2399
2400 if( !icvFilterRow_8u_C1R_p )
2401 EXIT;
2402
2403 if( !CV_ARE_TYPES_EQ( src, dst ) || !CV_ARE_SIZES_EQ( src, dst ) ||
2404 !CV_IS_MAT_CONT(kernelX->type & kernelY->type) ||
2405 CV_MAT_TYPE(kernelX->type) != CV_32FC1 ||
2406 CV_MAT_TYPE(kernelY->type) != CV_32FC1 ||
2407 (kernelX->cols != 1 && kernelX->rows != 1) ||
2408 (kernelY->cols != 1 && kernelY->rows != 1) ||
2409 (unsigned)anchor.x >= (unsigned)(kernelX->cols + kernelX->rows - 1) ||
2410 (unsigned)anchor.y >= (unsigned)(kernelY->cols + kernelY->rows - 1) )
2411 CV_ERROR( CV_StsError, "Internal Error: incorrect parameters" );
2412
2413 ksize.width = kernelX->cols + kernelX->rows - 1;
2414 ksize.height = kernelY->cols + kernelY->rows - 1;
2415
2416 /*if( ksize.width <= 5 && ksize.height <= 5 )
2417 {
2418 float* ker = (float*)cvStackAlloc( ksize.width*ksize.height*sizeof(ker[0]));
2419 CvMat kernel = cvMat( ksize.height, ksize.width, CV_32F, ker );
2420 for( y = 0, i = 0; y < ksize.height; y++ )
2421 for( x = 0; x < ksize.width; x++, i++ )
2422 ker[i] = kernelY->data.fl[y]*kernelX->data.fl[x];
2423
2424 CV_CALL( cvFilter2D( src, dst, &kernel, anchor ));
2425 EXIT;
2426 }*/
2427
2428 type = CV_MAT_TYPE(src->type);
2429 depth = CV_MAT_DEPTH(type);
2430 pix_size = CV_ELEM_SIZE(type);
2431
2432 if( type == CV_8UC1 )
2433 x_func = icvFilterRow_8u_C1R_p, y_func = icvFilterColumn_8u_C1R_p;
2434 else if( type == CV_8UC3 )
2435 x_func = icvFilterRow_8u_C3R_p, y_func = icvFilterColumn_8u_C3R_p;
2436 else if( type == CV_8UC4 )
2437 x_func = icvFilterRow_8u_C4R_p, y_func = icvFilterColumn_8u_C4R_p;
2438 else if( type == CV_16SC1 )
2439 x_func = icvFilterRow_16s_C1R_p, y_func = icvFilterColumn_16s_C1R_p;
2440 else if( type == CV_16SC3 )
2441 x_func = icvFilterRow_16s_C3R_p, y_func = icvFilterColumn_16s_C3R_p;
2442 else if( type == CV_16SC4 )
2443 x_func = icvFilterRow_16s_C4R_p, y_func = icvFilterColumn_16s_C4R_p;
2444 else if( type == CV_32FC1 )
2445 x_func = icvFilterRow_32f_C1R_p, y_func = icvFilterColumn_32f_C1R_p;
2446 else if( type == CV_32FC3 )
2447 x_func = icvFilterRow_32f_C3R_p, y_func = icvFilterColumn_32f_C3R_p;
2448 else if( type == CV_32FC4 )
2449 x_func = icvFilterRow_32f_C4R_p, y_func = icvFilterColumn_32f_C4R_p;
2450 else
2451 EXIT;
2452
2453 size = cvGetMatSize(src);
2454 stripe_size = src->data.ptr == dst->data.ptr ? 1 << 15 : 1 << 16;
2455 max_dy = MAX( ksize.height - 1, stripe_size/(size.width + ksize.width - 1));
2456 max_dy = MIN( max_dy, size.height + ksize.height - 1 );
2457
2458 align = 8/CV_ELEM_SIZE(depth);
2459
2460 CV_CALL( top_bottom = cvCreateMat( ksize.height*2, cvAlign(size.width,align), type ));
2461
2462 CV_CALL( vout_hin = cvCreateMat( max_dy + ksize.height,
2463 cvAlign(size.width + ksize.width - 1, align), type ));
2464
2465 if( src->data.ptr == dst->data.ptr && size.height )
2466 CV_CALL( dst_buf = cvCreateMat( max_dy + ksize.height,
2467 cvAlign(size.width, align), type ));
2468
2469 kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) );
2470 ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) );
2471
2472 // mirror the kernels
2473 for( i = 0; i < ksize.width; i++ )
2474 kx[i] = kernelX->data.fl[ksize.width - i - 1];
2475
2476 for( i = 0; i < ksize.height; i++ )
2477 ky[i] = kernelY->data.fl[ksize.height - i - 1];
2478
2479 el_anchor = cvPoint( ksize.width - anchor.x - 1, ksize.height - anchor.y - 1 );
2480
2481 cvGetCols( vout_hin, &vout, anchor.x, anchor.x + size.width );
2482
2483 src_step = src->step ? src->step : CV_STUB_STEP;
2484 top_bottom_step = top_bottom->step ? top_bottom->step : CV_STUB_STEP;
2485 vout.step = vout.step ? vout.step : CV_STUB_STEP;
2486
2487 for( y = 0; y < size.height; y += dy )
2488 {
2489 const CvMat *vin = src, *hout = dst;
2490 int src_y = y, dst_y = y;
2491 dy = MIN( max_dy, size.height - (ksize.height - anchor.y - 1) - y );
2492
2493 if( y < anchor.y || dy < anchor.y )
2494 {
2495 int ay = anchor.y;
2496 CvSize src_stripe_size = size;
2497
2498 if( y < anchor.y )
2499 {
2500 src_y = 0;
2501 dy = MIN( anchor.y, size.height );
2502 src_stripe_size.height = MIN( dy + ksize.height - anchor.y - 1, size.height );
2503 }
2504 else
2505 {
2506 src_y = MAX( y - anchor.y, 0 );
2507 dy = size.height - y;
2508 src_stripe_size.height = MIN( dy + anchor.y, size.height );
2509 ay = MAX( anchor.y - y, 0 );
2510 }
2511
2512 icvCopyReplicateBorder_8u( src->data.ptr + src_y*src_step, src_step,
2513 src_stripe_size, top_bottom->data.ptr, top_bottom_step,
2514 cvSize(size.width, dy + ksize.height - 1), ay, 0, pix_size );
2515 vin = top_bottom;
2516 src_y = anchor.y;
2517 }
2518
2519 // do vertical convolution
2520 IPPI_CALL( y_func( vin->data.ptr + src_y*vin->step, vin->step ? vin->step : CV_STUB_STEP,
2521 vout.data.ptr, vout.step, cvSize(size.width, dy),
2522 ky, ksize.height, el_anchor.y ));
2523
2524 // now it's time to copy the previously processed stripe to the input/output image
2525 if( src->data.ptr == dst->data.ptr )
2526 {
2527 for( i = 0; i < prev_dy; i++ )
2528 memcpy( dst->data.ptr + (y - prev_dy + i)*dst->step,
2529 dst_buf->data.ptr + i*dst_buf->step, size.width*pix_size );
2530 if( y + dy < size.height )
2531 {
2532 hout = dst_buf;
2533 dst_y = 0;
2534 }
2535 }
2536
2537 // create a border for every line by replicating the left-most/right-most elements
2538 for( i = 0; i < dy; i++ )
2539 {
2540 uchar* ptr = vout.data.ptr + i*vout.step;
2541 for( x = -1; x >= -anchor.x*pix_size; x-- )
2542 ptr[x] = ptr[x + pix_size];
2543 for( x = size.width*pix_size; x < (size.width+ksize.width-anchor.x-1)*pix_size; x++ )
2544 ptr[x] = ptr[x - pix_size];
2545 }
2546
2547 // do horizontal convolution
2548 IPPI_CALL( x_func( vout.data.ptr, vout.step, hout->data.ptr + dst_y*hout->step,
2549 hout->step ? hout->step : CV_STUB_STEP,
2550 cvSize(size.width, dy), kx, ksize.width, el_anchor.x ));
2551 prev_dy = dy;
2552 }
2553
2554 result = 1;
2555
2556 __END__;
2557
2558 cvReleaseMat( &vout_hin );
2559 cvReleaseMat( &dst_buf );
2560 cvReleaseMat( &top_bottom );
2561
2562 return result;
2563 }
2564
2565 //////////////////////////////////////////////////////////////////////////////////////////
2566
2567 //////////////////////////////// IPP generic filter functions ////////////////////////////
2568
2569 icvFilter_8u_C1R_t icvFilter_8u_C1R_p = 0;
2570 icvFilter_8u_C3R_t icvFilter_8u_C3R_p = 0;
2571 icvFilter_8u_C4R_t icvFilter_8u_C4R_p = 0;
2572 icvFilter_16s_C1R_t icvFilter_16s_C1R_p = 0;
2573 icvFilter_16s_C3R_t icvFilter_16s_C3R_p = 0;
2574 icvFilter_16s_C4R_t icvFilter_16s_C4R_p = 0;
2575 icvFilter_32f_C1R_t icvFilter_32f_C1R_p = 0;
2576 icvFilter_32f_C3R_t icvFilter_32f_C3R_p = 0;
2577 icvFilter_32f_C4R_t icvFilter_32f_C4R_p = 0;
2578
2579
2580 typedef CvStatus (CV_STDCALL * CvFilterIPPFunc)
2581 ( const void* src, int srcstep, void* dst, int dststep, CvSize size,
2582 const float* kernel, CvSize ksize, CvPoint anchor );
2583
2584 CV_IMPL void
cvFilter2D(const CvArr * _src,CvArr * _dst,const CvMat * kernel,CvPoint anchor)2585 cvFilter2D( const CvArr* _src, CvArr* _dst, const CvMat* kernel, CvPoint anchor )
2586 {
2587 const int dft_filter_size = 100;
2588
2589 CvLinearFilter filter;
2590 CvMat* ipp_kernel = 0;
2591
2592 // below that approximate size OpenCV is faster
2593 const int ipp_lower_limit = 20;
2594 CvMat* temp = 0;
2595
2596 CV_FUNCNAME( "cvFilter2D" );
2597
2598 __BEGIN__;
2599
2600 int coi1 = 0, coi2 = 0;
2601 CvMat srcstub, *src = (CvMat*)_src;
2602 CvMat dststub, *dst = (CvMat*)_dst;
2603 int type;
2604
2605 CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
2606 CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
2607
2608 if( coi1 != 0 || coi2 != 0 )
2609 CV_ERROR( CV_BadCOI, "" );
2610
2611 type = CV_MAT_TYPE( src->type );
2612
2613 if( !CV_ARE_SIZES_EQ( src, dst ))
2614 CV_ERROR( CV_StsUnmatchedSizes, "" );
2615
2616 if( !CV_ARE_TYPES_EQ( src, dst ))
2617 CV_ERROR( CV_StsUnmatchedFormats, "" );
2618
2619 if( anchor.x == -1 && anchor.y == -1 )
2620 anchor = cvPoint(kernel->cols/2,kernel->rows/2);
2621
2622 if( kernel->cols*kernel->rows >= dft_filter_size &&
2623 kernel->cols <= src->cols && kernel->rows <= src->rows )
2624 {
2625 if( src->data.ptr == dst->data.ptr )
2626 {
2627 temp = cvCloneMat( src );
2628 src = temp;
2629 }
2630 icvCrossCorr( src, kernel, dst, anchor );
2631 EXIT;
2632 }
2633
2634 if( icvFilter_8u_C1R_p && (src->rows >= ipp_lower_limit || src->cols >= ipp_lower_limit) )
2635 {
2636 CvFilterIPPFunc ipp_func =
2637 type == CV_8UC1 ? (CvFilterIPPFunc)icvFilter_8u_C1R_p :
2638 type == CV_8UC3 ? (CvFilterIPPFunc)icvFilter_8u_C3R_p :
2639 type == CV_8UC4 ? (CvFilterIPPFunc)icvFilter_8u_C4R_p :
2640 type == CV_16SC1 ? (CvFilterIPPFunc)icvFilter_16s_C1R_p :
2641 type == CV_16SC3 ? (CvFilterIPPFunc)icvFilter_16s_C3R_p :
2642 type == CV_16SC4 ? (CvFilterIPPFunc)icvFilter_16s_C4R_p :
2643 type == CV_32FC1 ? (CvFilterIPPFunc)icvFilter_32f_C1R_p :
2644 type == CV_32FC3 ? (CvFilterIPPFunc)icvFilter_32f_C3R_p :
2645 type == CV_32FC4 ? (CvFilterIPPFunc)icvFilter_32f_C4R_p : 0;
2646
2647 if( ipp_func )
2648 {
2649 CvSize el_size = { kernel->cols, kernel->rows };
2650 CvPoint el_anchor;
2651 int stripe_size = 1 << 16; // the optimal value may depend on CPU cache,
2652 // overhead of current IPP code etc.
2653 const uchar* shifted_ptr;
2654 int i, j, y, dy = 0;
2655 int temp_step, dst_step = dst->step ? dst->step : CV_STUB_STEP;
2656
2657 if( (unsigned)anchor.x >= (unsigned)kernel->cols ||
2658 (unsigned)anchor.y >= (unsigned)kernel->rows )
2659 CV_ERROR( CV_StsOutOfRange, "anchor point is out of kernel" );
2660
2661 el_anchor = cvPoint( el_size.width - anchor.x - 1, el_size.height - anchor.y - 1 );
2662
2663 CV_CALL( ipp_kernel = cvCreateMat( kernel->rows, kernel->cols, CV_32FC1 ));
2664 CV_CALL( cvConvert( kernel, ipp_kernel ));
2665
2666 // mirror the kernel around the center
2667 for( i = 0; i < (el_size.height+1)/2; i++ )
2668 {
2669 float* top_row = ipp_kernel->data.fl + el_size.width*i;
2670 float* bottom_row = ipp_kernel->data.fl + el_size.width*(el_size.height - i - 1);
2671
2672 for( j = 0; j < (el_size.width+1)/2; j++ )
2673 {
2674 float a = top_row[j], b = top_row[el_size.width - j - 1];
2675 float c = bottom_row[j], d = bottom_row[el_size.width - j - 1];
2676 top_row[j] = d;
2677 top_row[el_size.width - j - 1] = c;
2678 bottom_row[j] = b;
2679 bottom_row[el_size.width - j - 1] = a;
2680 }
2681 }
2682
2683 CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
2684
2685 shifted_ptr = temp->data.ptr +
2686 anchor.y*temp->step + anchor.x*CV_ELEM_SIZE(type);
2687 temp_step = temp->step ? temp->step : CV_STUB_STEP;
2688
2689 for( y = 0; y < src->rows; y += dy )
2690 {
2691 dy = icvIPPFilterNextStripe( src, temp, y, el_size, anchor );
2692 IPPI_CALL( ipp_func( shifted_ptr, temp_step,
2693 dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
2694 ipp_kernel->data.fl, el_size, el_anchor ));
2695 }
2696 EXIT;
2697 }
2698 }
2699
2700 CV_CALL( filter.init( src->cols, type, type, kernel, anchor ));
2701 CV_CALL( filter.process( src, dst ));
2702
2703 __END__;
2704
2705 cvReleaseMat( &temp );
2706 cvReleaseMat( &ipp_kernel );
2707 }
2708
2709
2710 /* End of file. */
2711