1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
21 //
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
25 //
26 // * The name of Intel Corporation may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "precomp.hpp"
43 #include "opencl_kernels_imgproc.hpp"
44
45 ////////////////////////////////////////////////// matchTemplate //////////////////////////////////////////////////////////
46
47 namespace cv
48 {
49
50 #ifdef HAVE_OPENCL
51
52 /////////////////////////////////////////////////// CCORR //////////////////////////////////////////////////////////////
53
54 enum
55 {
56 SUM_1 = 0, SUM_2 = 1
57 };
58
extractFirstChannel_32F(InputArray _image,OutputArray _result,int cn)59 static bool extractFirstChannel_32F(InputArray _image, OutputArray _result, int cn)
60 {
61 int depth = _image.depth();
62
63 ocl::Device dev = ocl::Device::getDefault();
64 int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
65
66 ocl::Kernel k("extractFirstChannel", ocl::imgproc::match_template_oclsrc, format("-D FIRST_CHANNEL -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d",
67 ocl::typeToStr(depth), cn, pxPerWIy));
68 if (k.empty())
69 return false;
70
71 UMat image = _image.getUMat();
72 UMat result = _result.getUMat();
73
74
75 size_t globalsize[2] = {result.cols, (result.rows+pxPerWIy-1)/pxPerWIy};
76 return k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::WriteOnly(result)).run( 2, globalsize, NULL, false);
77 }
78
sumTemplate(InputArray _src,UMat & result)79 static bool sumTemplate(InputArray _src, UMat & result)
80 {
81 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
82 int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
83 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
84
85 int wgs2_aligned = 1;
86 while (wgs2_aligned < (int)wgs)
87 wgs2_aligned <<= 1;
88 wgs2_aligned >>= 1;
89
90 char cvt[40];
91 ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
92 format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d",
93 ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn,
94 ocl::convertTypeStr(depth, wdepth, cn, cvt),
95 (int)wgs, wgs2_aligned));
96 if (k.empty())
97 return false;
98
99 UMat src = _src.getUMat();
100 result.create(1, 1, CV_32FC1);
101
102 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
103 resarg = ocl::KernelArg::PtrWriteOnly(result);
104
105 k.args(srcarg, src.cols, (int)src.total(), resarg);
106
107 size_t globalsize = wgs;
108 return k.run(1, &globalsize, &wgs, false);
109 }
110
useNaive(Size size)111 static bool useNaive(Size size)
112 {
113 int dft_size = 18;
114 return size.height < dft_size && size.width < dft_size;
115 }
116
117 struct ConvolveBuf
118 {
119 Size result_size;
120 Size block_size;
121 Size user_block_size;
122 Size dft_size;
123
124 UMat image_spect, templ_spect, result_spect;
125 UMat image_block, templ_block, result_data;
126
127 void create(Size image_size, Size templ_size);
128 };
129
create(Size image_size,Size templ_size)130 void ConvolveBuf::create(Size image_size, Size templ_size)
131 {
132 result_size = Size(image_size.width - templ_size.width + 1,
133 image_size.height - templ_size.height + 1);
134
135 const double blockScale = 4.5;
136 const int minBlockSize = 256;
137
138 block_size.width = cvRound(result_size.width*blockScale);
139 block_size.width = std::max( block_size.width, minBlockSize - templ_size.width + 1 );
140 block_size.width = std::min( block_size.width, result_size.width );
141 block_size.height = cvRound(templ_size.height*blockScale);
142 block_size.height = std::max( block_size.height, minBlockSize - templ_size.height + 1 );
143 block_size.height = std::min( block_size.height, result_size.height );
144
145 dft_size.width = std::max(getOptimalDFTSize(block_size.width + templ_size.width - 1), 2);
146 dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
147 if( dft_size.width <= 0 || dft_size.height <= 0 )
148 CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
149
150 // recompute block size
151 block_size.width = dft_size.width - templ_size.width + 1;
152 block_size.width = std::min( block_size.width, result_size.width);
153 block_size.height = dft_size.height - templ_size.height + 1;
154 block_size.height = std::min( block_size.height, result_size.height );
155
156 image_block.create(dft_size, CV_32F);
157 templ_block.create(dft_size, CV_32F);
158 result_data.create(dft_size, CV_32F);
159
160 image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
161 templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
162 result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
163
164 // Use maximum result matrix block size for the estimated DFT block size
165 block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
166 block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
167 }
168
convolve_dft(InputArray _image,InputArray _templ,OutputArray _result)169 static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result)
170 {
171 ConvolveBuf buf;
172 CV_Assert(_image.type() == CV_32F);
173 CV_Assert(_templ.type() == CV_32F);
174
175 buf.create(_image.size(), _templ.size());
176 _result.create(buf.result_size, CV_32F);
177
178 UMat image = _image.getUMat();
179 UMat templ = _templ.getUMat();
180
181 UMat result = _result.getUMat();
182
183 Size& block_size = buf.block_size;
184 Size& dft_size = buf.dft_size;
185
186 UMat& image_block = buf.image_block;
187 UMat& templ_block = buf.templ_block;
188 UMat& result_data = buf.result_data;
189
190 UMat& image_spect = buf.image_spect;
191 UMat& templ_spect = buf.templ_spect;
192 UMat& result_spect = buf.result_spect;
193
194 UMat templ_roi = templ;
195 copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
196 templ_block.cols - templ_roi.cols, BORDER_ISOLATED);
197
198 dft(templ_block, templ_spect, 0, templ.rows);
199
200 // Process all blocks of the result matrix
201 for (int y = 0; y < result.rows; y += block_size.height)
202 {
203 for (int x = 0; x < result.cols; x += block_size.width)
204 {
205 Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
206 std::min(y + dft_size.height, image.rows) - y);
207 Rect roi0(x, y, image_roi_size.width, image_roi_size.height);
208
209 UMat image_roi(image, roi0);
210
211 copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
212 0, image_block.cols - image_roi.cols, BORDER_ISOLATED);
213
214 dft(image_block, image_spect, 0);
215
216 mulSpectrums(image_spect, templ_spect, result_spect, 0, true);
217
218 dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
219
220 Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
221 std::min(y + block_size.height, result.rows) - y);
222
223 Rect roi1(x, y, result_roi_size.width, result_roi_size.height);
224 Rect roi2(0, 0, result_roi_size.width, result_roi_size.height);
225
226 UMat result_roi(result, roi1);
227 UMat result_block(result_data, roi2);
228
229 result_block.copyTo(result_roi);
230 }
231 }
232 return true;
233 }
234
convolve_32F(InputArray _image,InputArray _templ,OutputArray _result)235 static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result)
236 {
237 _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F);
238
239 if (_image.channels() == 1)
240 return(convolve_dft(_image, _templ, _result));
241 else
242 {
243 UMat image = _image.getUMat();
244 UMat templ = _templ.getUMat();
245 UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F);
246 bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_);
247 if (ok==false)
248 return false;
249 UMat result = _result.getUMat();
250 return (extractFirstChannel_32F(result_, _result, _image.channels()));
251 }
252 }
253
matchTemplateNaive_CCORR(InputArray _image,InputArray _templ,OutputArray _result)254 static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
255 {
256 int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
257 int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
258
259 ocl::Device dev = ocl::Device::getDefault();
260 int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
261 int rated_cn = cn;
262 int wtype1 = wtype;
263
264 if (pxPerWIx!=1)
265 {
266 rated_cn = pxPerWIx;
267 type = CV_MAKE_TYPE(depth, rated_cn);
268 wtype1 = CV_MAKE_TYPE(wdepth, rated_cn);
269 }
270
271 char cvt[40];
272 char cvt1[40];
273 const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt);
274 const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1);
275
276 ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc,
277 format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype),
278 convertToWT, convertToWT1, cn, pxPerWIx));
279 if (k.empty())
280 return false;
281
282 UMat image = _image.getUMat(), templ = _templ.getUMat();
283 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
284 UMat result = _result.getUMat();
285
286 k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
287 ocl::KernelArg::WriteOnly(result));
288
289 size_t globalsize[2] = { (result.cols+pxPerWIx-1)/pxPerWIx, result.rows};
290 return k.run(2, globalsize, NULL, false);
291 }
292
293
matchTemplate_CCORR(InputArray _image,InputArray _templ,OutputArray _result)294 static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
295 {
296 if (useNaive(_templ.size()))
297 return( matchTemplateNaive_CCORR(_image, _templ, _result));
298 else
299 {
300 if(_image.depth() == CV_8U)
301 {
302 UMat imagef, templf;
303 UMat image = _image.getUMat();
304 UMat templ = _templ.getUMat();
305 image.convertTo(imagef, CV_32F);
306 templ.convertTo(templf, CV_32F);
307 return(convolve_32F(imagef, templf, _result));
308 }
309 else
310 {
311 return(convolve_32F(_image, _templ, _result));
312 }
313 }
314 }
315
matchTemplate_CCORR_NORMED(InputArray _image,InputArray _templ,OutputArray _result)316 static bool matchTemplate_CCORR_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
317 {
318 matchTemplate(_image, _templ, _result, CV_TM_CCORR);
319
320 int type = _image.type(), cn = CV_MAT_CN(type);
321
322 ocl::Kernel k("matchTemplate_CCORR_NORMED", ocl::imgproc::match_template_oclsrc,
323 format("-D CCORR_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
324 if (k.empty())
325 return false;
326
327 UMat image = _image.getUMat(), templ = _templ.getUMat();
328 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
329 UMat result = _result.getUMat();
330
331 UMat image_sums, image_sqsums;
332 integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
333
334 UMat templ_sqsum;
335 if (!sumTemplate(templ, templ_sqsum))
336 return false;
337
338 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
339 templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
340
341 size_t globalsize[2] = { result.cols, result.rows };
342 return k.run(2, globalsize, NULL, false);
343 }
344
345 ////////////////////////////////////// SQDIFF //////////////////////////////////////////////////////////////
346
matchTemplateNaive_SQDIFF(InputArray _image,InputArray _templ,OutputArray _result)347 static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
348 {
349 int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
350 int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
351
352 char cvt[40];
353 ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc,
354 format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth),
355 ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn));
356 if (k.empty())
357 return false;
358
359 UMat image = _image.getUMat(), templ = _templ.getUMat();
360 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
361 UMat result = _result.getUMat();
362
363 k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
364 ocl::KernelArg::WriteOnly(result));
365
366 size_t globalsize[2] = { result.cols, result.rows };
367 return k.run(2, globalsize, NULL, false);
368 }
369
matchTemplate_SQDIFF(InputArray _image,InputArray _templ,OutputArray _result)370 static bool matchTemplate_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
371 {
372 if (useNaive(_templ.size()))
373 return( matchTemplateNaive_SQDIFF(_image, _templ, _result));
374 else
375 {
376 matchTemplate(_image, _templ, _result, CV_TM_CCORR);
377
378 int type = _image.type(), cn = CV_MAT_CN(type);
379
380 ocl::Kernel k("matchTemplate_Prepared_SQDIFF", ocl::imgproc::match_template_oclsrc,
381 format("-D SQDIFF_PREPARED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
382 if (k.empty())
383 return false;
384
385 UMat image = _image.getUMat(), templ = _templ.getUMat();
386 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
387 UMat result = _result.getUMat();
388
389 UMat image_sums, image_sqsums;
390 integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
391
392 UMat templ_sqsum;
393 if (!sumTemplate(_templ, templ_sqsum))
394 return false;
395
396 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
397 templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
398
399 size_t globalsize[2] = { result.cols, result.rows };
400
401 return k.run(2, globalsize, NULL, false);
402 }
403 }
404
matchTemplate_SQDIFF_NORMED(InputArray _image,InputArray _templ,OutputArray _result)405 static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
406 {
407 matchTemplate(_image, _templ, _result, CV_TM_CCORR);
408
409 int type = _image.type(), cn = CV_MAT_CN(type);
410
411 ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc,
412 format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
413 if (k.empty())
414 return false;
415
416 UMat image = _image.getUMat(), templ = _templ.getUMat();
417 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
418 UMat result = _result.getUMat();
419
420 UMat image_sums, image_sqsums;
421 integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
422
423 UMat templ_sqsum;
424 if (!sumTemplate(_templ, templ_sqsum))
425 return false;
426
427 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
428 templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
429
430 size_t globalsize[2] = { result.cols, result.rows };
431
432 return k.run(2, globalsize, NULL, false);
433 }
434
435 ///////////////////////////////////// CCOEFF /////////////////////////////////////////////////////////////////
436
matchTemplate_CCOEFF(InputArray _image,InputArray _templ,OutputArray _result)437 static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
438 {
439 matchTemplate(_image, _templ, _result, CV_TM_CCORR);
440
441 UMat image_sums, temp;
442 integral(_image, image_sums, CV_32F);
443
444 int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
445
446 ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
447 format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
448 if (k.empty())
449 return false;
450
451 UMat templ = _templ.getUMat();
452 UMat result = _result.getUMat();
453
454 if (cn==1)
455 {
456 Scalar templMean = mean(templ);
457 float templ_sum = (float)templMean[0];
458
459 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);
460 }
461 else
462 {
463 Vec4f templ_sum = Vec4f::all(0);
464 templ_sum = (Vec4f)mean(templ);
465
466 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); }
467
468 size_t globalsize[2] = { result.cols, result.rows };
469 return k.run(2, globalsize, NULL, false);
470 }
471
matchTemplate_CCOEFF_NORMED(InputArray _image,InputArray _templ,OutputArray _result)472 static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
473 {
474 matchTemplate(_image, _templ, _result, CV_TM_CCORR);
475
476 UMat temp, image_sums, image_sqsums;
477 integral(_image, image_sums, image_sqsums, CV_32F, CV_32F);
478
479 int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
480
481 ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc,
482 format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
483 if (k.empty())
484 return false;
485
486 UMat templ = _templ.getUMat();
487 Size size = _image.size(), tsize = templ.size();
488 _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
489 UMat result = _result.getUMat();
490
491 float scale = 1.f / tsize.area();
492
493 if (cn == 1)
494 {
495 float templ_sum = (float)sum(templ)[0];
496
497 multiply(templ, templ, temp, 1, CV_32F);
498 float templ_sqsum = (float)sum(temp)[0];
499
500 templ_sqsum -= scale * templ_sum * templ_sum;
501 templ_sum *= scale;
502
503 if (templ_sqsum < DBL_EPSILON)
504 {
505 result = Scalar::all(1);
506 return true;
507 }
508
509 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
510 ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum);
511 }
512 else
513 {
514 Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0);
515 templ_sum = sum(templ);
516
517 multiply(templ, templ, temp, 1, CV_32F);
518 templ_sqsum = sum(temp);
519
520 float templ_sqsum_sum = 0;
521 for (int i = 0; i < cn; i ++)
522 templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
523
524 templ_sum *= scale;
525
526 if (templ_sqsum_sum < DBL_EPSILON)
527 {
528 result = Scalar::all(1);
529 return true;
530 }
531
532 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
533 ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
534 templ_sum, templ_sqsum_sum); }
535
536 size_t globalsize[2] = { result.cols, result.rows };
537 return k.run(2, globalsize, NULL, false);
538 }
539
540 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
541
ocl_matchTemplate(InputArray _img,InputArray _templ,OutputArray _result,int method)542 static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method)
543 {
544 int cn = _img.channels();
545
546 if (cn > 4)
547 return false;
548
549 typedef bool (*Caller)(InputArray _img, InputArray _templ, OutputArray _result);
550
551 static const Caller callers[] =
552 {
553 matchTemplate_SQDIFF, matchTemplate_SQDIFF_NORMED, matchTemplate_CCORR,
554 matchTemplate_CCORR_NORMED, matchTemplate_CCOEFF, matchTemplate_CCOEFF_NORMED
555 };
556 const Caller caller = callers[method];
557
558 return caller(_img, _templ, _result);
559 }
560
561 #endif
562
563 #if defined HAVE_IPP
564
565 typedef IppStatus (CV_STDCALL * ippimatchTemplate)(const void*, int, IppiSize, const void*, int, IppiSize, Ipp32f* , int , IppEnum , Ipp8u*);
566
ipp_crossCorr(const Mat & src,const Mat & tpl,Mat & dst)567 static bool ipp_crossCorr(const Mat& src, const Mat& tpl, Mat& dst)
568 {
569 IppStatus status;
570
571 IppiSize srcRoiSize = {src.cols,src.rows};
572 IppiSize tplRoiSize = {tpl.cols,tpl.rows};
573
574 Ipp8u *pBuffer;
575 int bufSize=0;
576
577 int depth = src.depth();
578
579 ippimatchTemplate ippFunc =
580 depth==CV_8U ? (ippimatchTemplate)ippiCrossCorrNorm_8u32f_C1R:
581 depth==CV_32F? (ippimatchTemplate)ippiCrossCorrNorm_32f_C1R: 0;
582
583 if (ippFunc==0)
584 return false;
585
586 IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
587
588 status = ippiCrossCorrNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
589 if ( status < 0 )
590 return false;
591
592 pBuffer = ippsMalloc_8u( bufSize );
593
594 status = ippFunc(src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer);
595
596 ippsFree( pBuffer );
597 return status >= 0;
598 }
599
ipp_sqrDistance(const Mat & src,const Mat & tpl,Mat & dst)600 static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst)
601 {
602 IppStatus status;
603
604 IppiSize srcRoiSize = {src.cols,src.rows};
605 IppiSize tplRoiSize = {tpl.cols,tpl.rows};
606
607 Ipp8u *pBuffer;
608 int bufSize=0;
609
610 int depth = src.depth();
611
612 ippimatchTemplate ippFunc =
613 depth==CV_8U ? (ippimatchTemplate)ippiSqrDistanceNorm_8u32f_C1R:
614 depth==CV_32F? (ippimatchTemplate)ippiSqrDistanceNorm_32f_C1R: 0;
615
616 if (ippFunc==0)
617 return false;
618
619 IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
620
621 status = ippiSqrDistanceNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
622 if ( status < 0 )
623 return false;
624
625 pBuffer = ippsMalloc_8u( bufSize );
626
627 status = ippFunc(src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer);
628
629 ippsFree( pBuffer );
630 return status >= 0;
631 }
632
633 #endif
634
crossCorr(const Mat & img,const Mat & _templ,Mat & corr,Size corrsize,int ctype,Point anchor,double delta,int borderType)635 void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
636 Size corrsize, int ctype,
637 Point anchor, double delta, int borderType )
638 {
639 const double blockScale = 4.5;
640 const int minBlockSize = 256;
641 std::vector<uchar> buf;
642
643 Mat templ = _templ;
644 int depth = img.depth(), cn = img.channels();
645 int tdepth = templ.depth(), tcn = templ.channels();
646 int cdepth = CV_MAT_DEPTH(ctype), ccn = CV_MAT_CN(ctype);
647
648 CV_Assert( img.dims <= 2 && templ.dims <= 2 && corr.dims <= 2 );
649
650 if( depth != tdepth && tdepth != std::max(CV_32F, depth) )
651 {
652 _templ.convertTo(templ, std::max(CV_32F, depth));
653 tdepth = templ.depth();
654 }
655
656 CV_Assert( depth == tdepth || tdepth == CV_32F);
657 CV_Assert( corrsize.height <= img.rows + templ.rows - 1 &&
658 corrsize.width <= img.cols + templ.cols - 1 );
659
660 CV_Assert( ccn == 1 || delta == 0 );
661
662 corr.create(corrsize, ctype);
663
664 int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
665 Size blocksize, dftsize;
666
667 blocksize.width = cvRound(templ.cols*blockScale);
668 blocksize.width = std::max( blocksize.width, minBlockSize - templ.cols + 1 );
669 blocksize.width = std::min( blocksize.width, corr.cols );
670 blocksize.height = cvRound(templ.rows*blockScale);
671 blocksize.height = std::max( blocksize.height, minBlockSize - templ.rows + 1 );
672 blocksize.height = std::min( blocksize.height, corr.rows );
673
674 dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
675 dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);
676 if( dftsize.width <= 0 || dftsize.height <= 0 )
677 CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
678
679 // recompute block size
680 blocksize.width = dftsize.width - templ.cols + 1;
681 blocksize.width = MIN( blocksize.width, corr.cols );
682 blocksize.height = dftsize.height - templ.rows + 1;
683 blocksize.height = MIN( blocksize.height, corr.rows );
684
685 Mat dftTempl( dftsize.height*tcn, dftsize.width, maxDepth );
686 Mat dftImg( dftsize, maxDepth );
687
688 int i, k, bufSize = 0;
689 if( tcn > 1 && tdepth != maxDepth )
690 bufSize = templ.cols*templ.rows*CV_ELEM_SIZE(tdepth);
691
692 if( cn > 1 && depth != maxDepth )
693 bufSize = std::max( bufSize, (blocksize.width + templ.cols - 1)*
694 (blocksize.height + templ.rows - 1)*CV_ELEM_SIZE(depth));
695
696 if( (ccn > 1 || cn > 1) && cdepth != maxDepth )
697 bufSize = std::max( bufSize, blocksize.width*blocksize.height*CV_ELEM_SIZE(cdepth));
698
699 buf.resize(bufSize);
700
701 // compute DFT of each template plane
702 for( k = 0; k < tcn; k++ )
703 {
704 int yofs = k*dftsize.height;
705 Mat src = templ;
706 Mat dst(dftTempl, Rect(0, yofs, dftsize.width, dftsize.height));
707 Mat dst1(dftTempl, Rect(0, yofs, templ.cols, templ.rows));
708
709 if( tcn > 1 )
710 {
711 src = tdepth == maxDepth ? dst1 : Mat(templ.size(), tdepth, &buf[0]);
712 int pairs[] = {k, 0};
713 mixChannels(&templ, 1, &src, 1, pairs, 1);
714 }
715
716 if( dst1.data != src.data )
717 src.convertTo(dst1, dst1.depth());
718
719 if( dst.cols > templ.cols )
720 {
721 Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
722 part = Scalar::all(0);
723 }
724 dft(dst, dst, 0, templ.rows);
725 }
726
727 int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
728 int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
729 int tileCount = tileCountX * tileCountY;
730
731 Size wholeSize = img.size();
732 Point roiofs(0,0);
733 Mat img0 = img;
734
735 if( !(borderType & BORDER_ISOLATED) )
736 {
737 img.locateROI(wholeSize, roiofs);
738 img0.adjustROI(roiofs.y, wholeSize.height-img.rows-roiofs.y,
739 roiofs.x, wholeSize.width-img.cols-roiofs.x);
740 }
741 borderType |= BORDER_ISOLATED;
742
743 // calculate correlation by blocks
744 for( i = 0; i < tileCount; i++ )
745 {
746 int x = (i%tileCountX)*blocksize.width;
747 int y = (i/tileCountX)*blocksize.height;
748
749 Size bsz(std::min(blocksize.width, corr.cols - x),
750 std::min(blocksize.height, corr.rows - y));
751 Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
752 int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
753 int x1 = std::max(0, x0), y1 = std::max(0, y0);
754 int x2 = std::min(img0.cols, x0 + dsz.width);
755 int y2 = std::min(img0.rows, y0 + dsz.height);
756 Mat src0(img0, Range(y1, y2), Range(x1, x2));
757 Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
758 Mat dst1(dftImg, Rect(x1-x0, y1-y0, x2-x1, y2-y1));
759 Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));
760
761 for( k = 0; k < cn; k++ )
762 {
763 Mat src = src0;
764 dftImg = Scalar::all(0);
765
766 if( cn > 1 )
767 {
768 src = depth == maxDepth ? dst1 : Mat(y2-y1, x2-x1, depth, &buf[0]);
769 int pairs[] = {k, 0};
770 mixChannels(&src0, 1, &src, 1, pairs, 1);
771 }
772
773 if( dst1.data != src.data )
774 src.convertTo(dst1, dst1.depth());
775
776 if( x2 - x1 < dsz.width || y2 - y1 < dsz.height )
777 copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0),
778 x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
779
780 dft( dftImg, dftImg, 0, dsz.height );
781 Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0,
782 dftsize.width, dftsize.height));
783 mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
784 dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
785
786 src = dftImg(Rect(0, 0, bsz.width, bsz.height));
787
788 if( ccn > 1 )
789 {
790 if( cdepth != maxDepth )
791 {
792 Mat plane(bsz, cdepth, &buf[0]);
793 src.convertTo(plane, cdepth, 1, delta);
794 src = plane;
795 }
796 int pairs[] = {0, k};
797 mixChannels(&src, 1, &cdst, 1, pairs, 1);
798 }
799 else
800 {
801 if( k == 0 )
802 src.convertTo(cdst, cdepth, 1, delta);
803 else
804 {
805 if( maxDepth != cdepth )
806 {
807 Mat plane(bsz, cdepth, &buf[0]);
808 src.convertTo(plane, cdepth);
809 src = plane;
810 }
811 add(src, cdst, cdst);
812 }
813 }
814 }
815 }
816 }
817
matchTemplateMask(InputArray _img,InputArray _templ,OutputArray _result,int method,InputArray _mask)818 static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
819 {
820 int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
821 CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
822 CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
823
824 Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();
825 int ttype = templ.type(), tdepth = CV_MAT_DEPTH(ttype), tcn = CV_MAT_CN(ttype);
826 int mtype = img.type(), mdepth = CV_MAT_DEPTH(type), mcn = CV_MAT_CN(mtype);
827
828 if (depth == CV_8U)
829 {
830 depth = CV_32F;
831 type = CV_MAKETYPE(CV_32F, cn);
832 img.convertTo(img, type, 1.0 / 255);
833 }
834
835 if (tdepth == CV_8U)
836 {
837 tdepth = CV_32F;
838 ttype = CV_MAKETYPE(CV_32F, tcn);
839 templ.convertTo(templ, ttype, 1.0 / 255);
840 }
841
842 if (mdepth == CV_8U)
843 {
844 mdepth = CV_32F;
845 mtype = CV_MAKETYPE(CV_32F, mcn);
846 compare(mask, Scalar::all(0), mask, CMP_NE);
847 mask.convertTo(mask, mtype, 1.0 / 255);
848 }
849
850 Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
851 _result.create(corrSize, CV_32F);
852 Mat result = _result.getMat();
853
854 Mat img2 = img.mul(img);
855 Mat mask2 = mask.mul(mask);
856 Mat mask_templ = templ.mul(mask);
857 Scalar templMean, templSdv;
858
859 double templSum2 = 0;
860 meanStdDev( mask_templ, templMean, templSdv );
861
862 templSum2 = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
863 templSum2 += templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
864 templSum2 *= ((double)templ.rows * templ.cols);
865
866 if (method == CV_TM_SQDIFF)
867 {
868 Mat mask2_templ = templ.mul(mask2);
869
870 Mat corr(corrSize, CV_32F);
871 crossCorr( img, mask2_templ, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
872 crossCorr( img2, mask, result, result.size(), result.type(), Point(0,0), 0, 0 );
873
874 result -= corr * 2;
875 result += templSum2;
876 }
877 else if (method == CV_TM_CCORR_NORMED)
878 {
879 if (templSum2 < DBL_EPSILON)
880 {
881 result = Scalar::all(1);
882 return;
883 }
884
885 Mat corr(corrSize, CV_32F);
886 crossCorr( img2, mask2, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
887 crossCorr( img, mask_templ, result, result.size(), result.type(), Point(0,0), 0, 0 );
888
889 sqrt(corr, corr);
890 result = result.mul(1/corr);
891 result /= std::sqrt(templSum2);
892 }
893 else
894 CV_Error(Error::StsNotImplemented, "");
895 }
896 }
897
898 ////////////////////////////////////////////////////////////////////////////////////////////////////////
899
matchTemplate(InputArray _img,InputArray _templ,OutputArray _result,int method,InputArray _mask)900 void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
901 {
902 if (!_mask.empty())
903 {
904 cv::matchTemplateMask(_img, _templ, _result, method, _mask);
905 return;
906 }
907
908 int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
909 CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
910 CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
911
912 bool needswap = _img.size().height < _templ.size().height || _img.size().width < _templ.size().width;
913 if (needswap)
914 {
915 CV_Assert(_img.size().height <= _templ.size().height && _img.size().width <= _templ.size().width);
916 }
917
918 CV_OCL_RUN(_img.dims() <= 2 && _result.isUMat(),
919 (!needswap ? ocl_matchTemplate(_img, _templ, _result, method) : ocl_matchTemplate(_templ, _img, _result, method)))
920
921 int numType = method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED ? 0 :
922 method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED ? 1 : 2;
923 bool isNormed = method == CV_TM_CCORR_NORMED ||
924 method == CV_TM_SQDIFF_NORMED ||
925 method == CV_TM_CCOEFF_NORMED;
926
927 Mat img = _img.getMat(), templ = _templ.getMat();
928 if (needswap)
929 std::swap(img, templ);
930
931 Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
932 _result.create(corrSize, CV_32F);
933 Mat result = _result.getMat();
934
935 #ifdef HAVE_TEGRA_OPTIMIZATION
936 if (tegra::useTegra() && tegra::matchTemplate(img, templ, result, method))
937 return;
938 #endif
939
940 #if defined HAVE_IPP
941 bool useIppMT = false;
942 CV_IPP_CHECK()
943 {
944 useIppMT = (templ.rows < img.rows/2 && templ.cols < img.cols/2);
945
946 if (method == CV_TM_SQDIFF && cn == 1 && useIppMT)
947 {
948 if (ipp_sqrDistance(img, templ, result))
949 {
950 CV_IMPL_ADD(CV_IMPL_IPP);
951 return;
952 }
953 setIppErrorStatus();
954 }
955 }
956 #endif
957
958 #if defined HAVE_IPP
959 if (cn == 1 && useIppMT)
960 {
961 if (!ipp_crossCorr(img, templ, result))
962 {
963 setIppErrorStatus();
964 crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);
965 }
966 else
967 {
968 CV_IMPL_ADD(CV_IMPL_IPP);
969 }
970 }
971 else
972 #endif
973 crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);
974
975 if( method == CV_TM_CCORR )
976 return;
977
978 double invArea = 1./((double)templ.rows * templ.cols);
979
980 Mat sum, sqsum;
981 Scalar templMean, templSdv;
982 double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
983 double templNorm = 0, templSum2 = 0;
984
985 if( method == CV_TM_CCOEFF )
986 {
987 integral(img, sum, CV_64F);
988 templMean = mean(templ);
989 }
990 else
991 {
992 integral(img, sum, sqsum, CV_64F);
993 meanStdDev( templ, templMean, templSdv );
994
995 templNorm = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
996
997 if( templNorm < DBL_EPSILON && method == CV_TM_CCOEFF_NORMED )
998 {
999 result = Scalar::all(1);
1000 return;
1001 }
1002
1003 templSum2 = templNorm + templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
1004
1005 if( numType != 1 )
1006 {
1007 templMean = Scalar::all(0);
1008 templNorm = templSum2;
1009 }
1010
1011 templSum2 /= invArea;
1012 templNorm = std::sqrt(templNorm);
1013 templNorm /= std::sqrt(invArea); // care of accuracy here
1014
1015 q0 = (double*)sqsum.data;
1016 q1 = q0 + templ.cols*cn;
1017 q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
1018 q3 = q2 + templ.cols*cn;
1019 }
1020
1021 double* p0 = (double*)sum.data;
1022 double* p1 = p0 + templ.cols*cn;
1023 double* p2 = (double*)(sum.data + templ.rows*sum.step);
1024 double* p3 = p2 + templ.cols*cn;
1025
1026 int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
1027 int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
1028
1029 int i, j, k;
1030
1031 for( i = 0; i < result.rows; i++ )
1032 {
1033 float* rrow = result.ptr<float>(i);
1034 int idx = i * sumstep;
1035 int idx2 = i * sqstep;
1036
1037 for( j = 0; j < result.cols; j++, idx += cn, idx2 += cn )
1038 {
1039 double num = rrow[j], t;
1040 double wndMean2 = 0, wndSum2 = 0;
1041
1042 if( numType == 1 )
1043 {
1044 for( k = 0; k < cn; k++ )
1045 {
1046 t = p0[idx+k] - p1[idx+k] - p2[idx+k] + p3[idx+k];
1047 wndMean2 += t*t;
1048 num -= t*templMean[k];
1049 }
1050
1051 wndMean2 *= invArea;
1052 }
1053
1054 if( isNormed || numType == 2 )
1055 {
1056 for( k = 0; k < cn; k++ )
1057 {
1058 t = q0[idx2+k] - q1[idx2+k] - q2[idx2+k] + q3[idx2+k];
1059 wndSum2 += t;
1060 }
1061
1062 if( numType == 2 )
1063 {
1064 num = wndSum2 - 2*num + templSum2;
1065 num = MAX(num, 0.);
1066 }
1067 }
1068
1069 if( isNormed )
1070 {
1071 t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm;
1072 if( fabs(num) < t )
1073 num /= t;
1074 else if( fabs(num) < t*1.125 )
1075 num = num > 0 ? 1 : -1;
1076 else
1077 num = method != CV_TM_SQDIFF_NORMED ? 0 : 1;
1078 }
1079
1080 rrow[j] = (float)num;
1081 }
1082 }
1083 }
1084
1085
1086 CV_IMPL void
cvMatchTemplate(const CvArr * _img,const CvArr * _templ,CvArr * _result,int method)1087 cvMatchTemplate( const CvArr* _img, const CvArr* _templ, CvArr* _result, int method )
1088 {
1089 cv::Mat img = cv::cvarrToMat(_img), templ = cv::cvarrToMat(_templ),
1090 result = cv::cvarrToMat(_result);
1091 CV_Assert( result.size() == cv::Size(std::abs(img.cols - templ.cols) + 1,
1092 std::abs(img.rows - templ.rows) + 1) &&
1093 result.type() == CV_32F );
1094 matchTemplate(img, templ, result, method);
1095 }
1096
1097 /* End of file. */
1098