1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 /
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
23 //
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
27 //
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 #include "precomp.hpp"
45 #include "opencl_kernels_imgproc.hpp"
46
47 namespace cv
48 {
49
50 template <typename T, typename AT>
51 struct Acc_SIMD
52 {
operator ()cv::Acc_SIMD53 int operator() (const T *, AT *, const uchar *, int, int) const
54 {
55 return 0;
56 }
57 };
58
59 template <typename T, typename AT>
60 struct AccSqr_SIMD
61 {
operator ()cv::AccSqr_SIMD62 int operator() (const T *, AT *, const uchar *, int, int) const
63 {
64 return 0;
65 }
66 };
67
68 template <typename T, typename AT>
69 struct AccProd_SIMD
70 {
operator ()cv::AccProd_SIMD71 int operator() (const T *, const T *, AT *, const uchar *, int, int) const
72 {
73 return 0;
74 }
75 };
76
77 template <typename T, typename AT>
78 struct AccW_SIMD
79 {
operator ()cv::AccW_SIMD80 int operator() (const T *, AT *, const uchar *, int, int, AT) const
81 {
82 return 0;
83 }
84 };
85
86 #if CV_NEON
87
88 template <>
89 struct Acc_SIMD<uchar, float>
90 {
operator ()cv::Acc_SIMD91 int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn) const
92 {
93 int x = 0;
94
95 if (!mask)
96 {
97 len *= cn;
98 for ( ; x <= len - 16; x += 16)
99 {
100 uint8x16_t v_src = vld1q_u8(src + x);
101 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
102
103 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
104 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
105 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
106 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
107 }
108 }
109 else if (cn == 1)
110 {
111 uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
112
113 for ( ; x <= len - 16; x += 16)
114 {
115 uint8x16_t v_src = vandq_u8(vld1q_u8(src + x), veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0)));
116 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
117
118 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
119 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
120 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
121 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
122 }
123 }
124
125 return x;
126 }
127 };
128
129 template <>
130 struct Acc_SIMD<ushort, float>
131 {
operator ()cv::Acc_SIMD132 int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn) const
133 {
134 int x = 0;
135
136 if (!mask)
137 {
138 len *= cn;
139 for ( ; x <= len - 8; x += 8)
140 {
141 uint16x8_t v_src = vld1q_u16(src + x);
142 uint32x4_t v_src0 = vmovl_u16(vget_low_u16(v_src)), v_src1 = vmovl_u16(vget_high_u16(v_src));
143
144 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
145 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
146 }
147 }
148
149 return x;
150 }
151 };
152
153 template <>
154 struct Acc_SIMD<float, float>
155 {
operator ()cv::Acc_SIMD156 int operator() (const float * src, float * dst, const uchar * mask, int len, int cn) const
157 {
158 int x = 0;
159
160 if (!mask)
161 {
162 len *= cn;
163 for ( ; x <= len - 8; x += 8)
164 {
165 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vld1q_f32(src + x)));
166 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vld1q_f32(src + x + 4)));
167 }
168 }
169
170 return x;
171 }
172 };
173
174 template <>
175 struct AccSqr_SIMD<uchar, float>
176 {
operator ()cv::AccSqr_SIMD177 int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn) const
178 {
179 int x = 0;
180
181 if (!mask)
182 {
183 len *= cn;
184 for ( ; x <= len - 16; x += 16)
185 {
186 uint8x16_t v_src = vld1q_u8(src + x);
187 uint8x8_t v_src_0 = vget_low_u8(v_src), v_src_1 = vget_high_u8(v_src);
188 uint16x8_t v_src0 = vmull_u8(v_src_0, v_src_0), v_src1 = vmull_u8(v_src_1, v_src_1);
189
190 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
191 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
192 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
193 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
194 }
195 }
196 else if (cn == 1)
197 {
198 uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
199
200 for ( ; x <= len - 16; x += 16)
201 {
202 uint8x16_t v_src = vandq_u8(vld1q_u8(src + x), veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0)));
203 uint8x8_t v_src_0 = vget_low_u8(v_src), v_src_1 = vget_high_u8(v_src);
204 uint16x8_t v_src0 = vmull_u8(v_src_0, v_src_0), v_src1 = vmull_u8(v_src_1, v_src_1);
205
206 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
207 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
208 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
209 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
210 }
211 }
212
213 return x;
214 }
215 };
216
217 template <>
218 struct AccSqr_SIMD<ushort, float>
219 {
operator ()cv::AccSqr_SIMD220 int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn) const
221 {
222 int x = 0;
223
224 if (!mask)
225 {
226 len *= cn;
227 for ( ; x <= len - 8; x += 8)
228 {
229 uint16x8_t v_src = vld1q_u16(src + x);
230 uint16x4_t v_src_0 = vget_low_u16(v_src), v_src_1 = vget_high_u16(v_src);
231 uint32x4_t v_src0 = vmull_u16(v_src_0, v_src_0), v_src1 = vmull_u16(v_src_1, v_src_1);
232
233 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
234 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
235 }
236 }
237 else if (cn == 1)
238 {
239 uint8x8_t v_255 = vdup_n_u8(255), v_0 = vdup_n_u8(0);
240
241 for ( ; x <= len - 8; x += 8)
242 {
243 uint8x8_t v_mask_src = veor_u8(v_255, vceq_u8(vld1_u8(mask + x), v_0));
244 uint8x8x2_t v_mask_zp = vzip_u8(v_mask_src, v_mask_src);
245 uint16x8_t v_mask = vreinterpretq_u16_u8(vcombine_u8(v_mask_zp.val[0], v_mask_zp.val[1])),
246 v_src = vandq_u16(vld1q_u16(src + x), v_mask);
247
248 uint16x4_t v_src_0 = vget_low_u16(v_src), v_src_1 = vget_high_u16(v_src);
249 uint32x4_t v_src0 = vmull_u16(v_src_0, v_src_0), v_src1 = vmull_u16(v_src_1, v_src_1);
250
251 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
252 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
253 }
254 }
255
256 return x;
257 }
258 };
259
260 template <>
261 struct AccSqr_SIMD<float, float>
262 {
operator ()cv::AccSqr_SIMD263 int operator() (const float * src, float * dst, const uchar * mask, int len, int cn) const
264 {
265 int x = 0;
266
267 if (!mask)
268 {
269 len *= cn;
270 for ( ; x <= len - 8; x += 8)
271 {
272 float32x4_t v_src = vld1q_f32(src + x);
273 vst1q_f32(dst + x, vmlaq_f32(vld1q_f32(dst + x), v_src, v_src));
274
275 v_src = vld1q_f32(src + x + 4);
276 vst1q_f32(dst + x + 4, vmlaq_f32(vld1q_f32(dst + x + 4), v_src, v_src));
277 }
278 }
279
280 return x;
281 }
282 };
283
284 template <>
285 struct AccProd_SIMD<uchar, float>
286 {
operator ()cv::AccProd_SIMD287 int operator() (const uchar * src1, const uchar * src2, float * dst, const uchar * mask, int len, int cn) const
288 {
289 int x = 0;
290
291 if (!mask)
292 {
293 len *= cn;
294 for ( ; x <= len - 16; x += 16)
295 {
296 uint8x16_t v_1src = vld1q_u8(src1 + x), v_2src = vld1q_u8(src2 + x);
297 uint16x8_t v_src0 = vmull_u8(vget_low_u8(v_1src), vget_low_u8(v_2src)),
298 v_src1 = vmull_u8(vget_high_u8(v_1src), vget_high_u8(v_2src));
299
300 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
301 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
302 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
303 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
304 }
305 }
306 else if (cn == 1)
307 {
308 uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
309
310 for ( ; x <= len - 16; x += 16)
311 {
312 uint8x16_t v_mask = veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0));
313 uint8x16_t v_1src = vandq_u8(vld1q_u8(src1 + x), v_mask), v_2src = vandq_u8(vld1q_u8(src2 + x), v_mask);
314 uint16x8_t v_src0 = vmull_u8(vget_low_u8(v_1src), vget_low_u8(v_2src)),
315 v_src1 = vmull_u8(vget_high_u8(v_1src), vget_high_u8(v_2src));
316
317 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
318 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
319 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
320 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
321 }
322 }
323
324 return x;
325 }
326 };
327
328 template <>
329 struct AccProd_SIMD<ushort, float>
330 {
operator ()cv::AccProd_SIMD331 int operator() (const ushort * src1, const ushort * src2, float * dst, const uchar * mask, int len, int cn) const
332 {
333 int x = 0;
334
335 if (!mask)
336 {
337 len *= cn;
338 for ( ; x <= len - 8; x += 8)
339 {
340 uint16x8_t v_1src = vld1q_u16(src1 + x), v_2src = vld1q_u16(src2 + x);
341 uint32x4_t v_src0 = vmull_u16(vget_low_u16(v_1src), vget_low_u16(v_2src)),
342 v_src1 = vmull_u16(vget_high_u16(v_1src), vget_high_u16(v_2src));
343
344 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
345 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
346 }
347 }
348 else if (cn == 1)
349 {
350 uint8x8_t v_255 = vdup_n_u8(255), v_0 = vdup_n_u8(0);
351
352 for ( ; x <= len - 8; x += 8)
353 {
354 uint8x8_t v_mask_src = veor_u8(v_255, vceq_u8(vld1_u8(mask + x), v_0));
355 uint8x8x2_t v_mask_zp = vzip_u8(v_mask_src, v_mask_src);
356 uint16x8_t v_mask = vreinterpretq_u16_u8(vcombine_u8(v_mask_zp.val[0], v_mask_zp.val[1])),
357 v_1src = vandq_u16(vld1q_u16(src1 + x), v_mask),
358 v_2src = vandq_u16(vld1q_u16(src2 + x), v_mask);
359
360 uint32x4_t v_src0 = vmull_u16(vget_low_u16(v_1src), vget_low_u16(v_2src)),
361 v_src1 = vmull_u16(vget_high_u16(v_1src), vget_high_u16(v_2src));
362
363 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
364 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
365 }
366 }
367
368 return x;
369 }
370 };
371
372 template <>
373 struct AccProd_SIMD<float, float>
374 {
operator ()cv::AccProd_SIMD375 int operator() (const float * src1, const float * src2, float * dst, const uchar * mask, int len, int cn) const
376 {
377 int x = 0;
378
379 if (!mask)
380 {
381 len *= cn;
382 for ( ; x <= len - 8; x += 8)
383 {
384 vst1q_f32(dst + x, vmlaq_f32(vld1q_f32(dst + x), vld1q_f32(src1 + x), vld1q_f32(src2 + x)));
385 vst1q_f32(dst + x + 4, vmlaq_f32(vld1q_f32(dst + x + 4), vld1q_f32(src1 + x + 4), vld1q_f32(src2 + x + 4)));
386 }
387 }
388
389 return x;
390 }
391 };
392
393 template <>
394 struct AccW_SIMD<uchar, float>
395 {
operator ()cv::AccW_SIMD396 int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
397 {
398 int x = 0;
399 float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
400
401 if (!mask)
402 {
403 len *= cn;
404 for ( ; x <= len - 16; x += 16)
405 {
406 uint8x16_t v_src = vld1q_u8(src + x);
407 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
408
409 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta),
410 vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))), v_alpha));
411 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta),
412 vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))), v_alpha));
413 vst1q_f32(dst + x + 8, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 8), v_beta),
414 vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))), v_alpha));
415 vst1q_f32(dst + x + 12, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 12), v_beta),
416 vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))), v_alpha));
417 }
418 }
419
420 return x;
421 }
422 };
423
424 template <>
425 struct AccW_SIMD<ushort, float>
426 {
operator ()cv::AccW_SIMD427 int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
428 {
429 int x = 0;
430 float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
431
432 if (!mask)
433 {
434 len *= cn;
435 for ( ; x <= len - 8; x += 8)
436 {
437 uint16x8_t v_src = vld1q_u16(src + x);
438 uint32x4_t v_src0 = vmovl_u16(vget_low_u16(v_src)), v_src1 = vmovl_u16(vget_high_u16(v_src));
439
440 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), vcvtq_f32_u32(v_src0), v_alpha));
441 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), vcvtq_f32_u32(v_src1), v_alpha));
442 }
443 }
444
445 return x;
446 }
447 };
448
449 template <>
450 struct AccW_SIMD<float, float>
451 {
operator ()cv::AccW_SIMD452 int operator() (const float * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
453 {
454 int x = 0;
455 float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
456
457 if (!mask)
458 {
459 len *= cn;
460 for ( ; x <= len - 8; x += 8)
461 {
462 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), vld1q_f32(src + x), v_alpha));
463 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), vld1q_f32(src + x + 4), v_alpha));
464 }
465 }
466
467 return x;
468 }
469 };
470
471 #endif
472
473 template<typename T, typename AT> void
acc_(const T * src,AT * dst,const uchar * mask,int len,int cn)474 acc_( const T* src, AT* dst, const uchar* mask, int len, int cn )
475 {
476 int i = Acc_SIMD<T, AT>()(src, dst, mask, len, cn);
477
478 if( !mask )
479 {
480 len *= cn;
481 #if CV_ENABLE_UNROLLED
482 for( ; i <= len - 4; i += 4 )
483 {
484 AT t0, t1;
485 t0 = src[i] + dst[i];
486 t1 = src[i+1] + dst[i+1];
487 dst[i] = t0; dst[i+1] = t1;
488
489 t0 = src[i+2] + dst[i+2];
490 t1 = src[i+3] + dst[i+3];
491 dst[i+2] = t0; dst[i+3] = t1;
492 }
493 #endif
494 for( ; i < len; i++ )
495 dst[i] += src[i];
496 }
497 else if( cn == 1 )
498 {
499 for( ; i < len; i++ )
500 {
501 if( mask[i] )
502 dst[i] += src[i];
503 }
504 }
505 else if( cn == 3 )
506 {
507 for( ; i < len; i++, src += 3, dst += 3 )
508 {
509 if( mask[i] )
510 {
511 AT t0 = src[0] + dst[0];
512 AT t1 = src[1] + dst[1];
513 AT t2 = src[2] + dst[2];
514
515 dst[0] = t0; dst[1] = t1; dst[2] = t2;
516 }
517 }
518 }
519 else
520 {
521 for( ; i < len; i++, src += cn, dst += cn )
522 if( mask[i] )
523 {
524 for( int k = 0; k < cn; k++ )
525 dst[k] += src[k];
526 }
527 }
528 }
529
530
531 template<typename T, typename AT> void
accSqr_(const T * src,AT * dst,const uchar * mask,int len,int cn)532 accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn )
533 {
534 int i = AccSqr_SIMD<T, AT>()(src, dst, mask, len, cn);
535
536 if( !mask )
537 {
538 len *= cn;
539 #if CV_ENABLE_UNROLLED
540 for( ; i <= len - 4; i += 4 )
541 {
542 AT t0, t1;
543 t0 = (AT)src[i]*src[i] + dst[i];
544 t1 = (AT)src[i+1]*src[i+1] + dst[i+1];
545 dst[i] = t0; dst[i+1] = t1;
546
547 t0 = (AT)src[i+2]*src[i+2] + dst[i+2];
548 t1 = (AT)src[i+3]*src[i+3] + dst[i+3];
549 dst[i+2] = t0; dst[i+3] = t1;
550 }
551 #endif
552 for( ; i < len; i++ )
553 dst[i] += (AT)src[i]*src[i];
554 }
555 else if( cn == 1 )
556 {
557 for( ; i < len; i++ )
558 {
559 if( mask[i] )
560 dst[i] += (AT)src[i]*src[i];
561 }
562 }
563 else if( cn == 3 )
564 {
565 for( ; i < len; i++, src += 3, dst += 3 )
566 {
567 if( mask[i] )
568 {
569 AT t0 = (AT)src[0]*src[0] + dst[0];
570 AT t1 = (AT)src[1]*src[1] + dst[1];
571 AT t2 = (AT)src[2]*src[2] + dst[2];
572
573 dst[0] = t0; dst[1] = t1; dst[2] = t2;
574 }
575 }
576 }
577 else
578 {
579 for( ; i < len; i++, src += cn, dst += cn )
580 if( mask[i] )
581 {
582 for( int k = 0; k < cn; k++ )
583 dst[k] += (AT)src[k]*src[k];
584 }
585 }
586 }
587
588
589 template<typename T, typename AT> void
accProd_(const T * src1,const T * src2,AT * dst,const uchar * mask,int len,int cn)590 accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int cn )
591 {
592 int i = AccProd_SIMD<T, AT>()(src1, src2, dst, mask, len, cn);
593
594 if( !mask )
595 {
596 len *= cn;
597 #if CV_ENABLE_UNROLLED
598 for( ; i <= len - 4; i += 4 )
599 {
600 AT t0, t1;
601 t0 = (AT)src1[i]*src2[i] + dst[i];
602 t1 = (AT)src1[i+1]*src2[i+1] + dst[i+1];
603 dst[i] = t0; dst[i+1] = t1;
604
605 t0 = (AT)src1[i+2]*src2[i+2] + dst[i+2];
606 t1 = (AT)src1[i+3]*src2[i+3] + dst[i+3];
607 dst[i+2] = t0; dst[i+3] = t1;
608 }
609 #endif
610 for( ; i < len; i++ )
611 dst[i] += (AT)src1[i]*src2[i];
612 }
613 else if( cn == 1 )
614 {
615 for( ; i < len; i++ )
616 {
617 if( mask[i] )
618 dst[i] += (AT)src1[i]*src2[i];
619 }
620 }
621 else if( cn == 3 )
622 {
623 for( ; i < len; i++, src1 += 3, src2 += 3, dst += 3 )
624 {
625 if( mask[i] )
626 {
627 AT t0 = (AT)src1[0]*src2[0] + dst[0];
628 AT t1 = (AT)src1[1]*src2[1] + dst[1];
629 AT t2 = (AT)src1[2]*src2[2] + dst[2];
630
631 dst[0] = t0; dst[1] = t1; dst[2] = t2;
632 }
633 }
634 }
635 else
636 {
637 for( ; i < len; i++, src1 += cn, src2 += cn, dst += cn )
638 if( mask[i] )
639 {
640 for( int k = 0; k < cn; k++ )
641 dst[k] += (AT)src1[k]*src2[k];
642 }
643 }
644 }
645
646
647 template<typename T, typename AT> void
accW_(const T * src,AT * dst,const uchar * mask,int len,int cn,double alpha)648 accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha )
649 {
650 AT a = (AT)alpha, b = 1 - a;
651 int i = AccW_SIMD<T, AT>()(src, dst, mask, len, cn, a);
652
653 if( !mask )
654 {
655 len *= cn;
656 #if CV_ENABLE_UNROLLED
657 for( ; i <= len - 4; i += 4 )
658 {
659 AT t0, t1;
660 t0 = src[i]*a + dst[i]*b;
661 t1 = src[i+1]*a + dst[i+1]*b;
662 dst[i] = t0; dst[i+1] = t1;
663
664 t0 = src[i+2]*a + dst[i+2]*b;
665 t1 = src[i+3]*a + dst[i+3]*b;
666 dst[i+2] = t0; dst[i+3] = t1;
667 }
668 #endif
669 for( ; i < len; i++ )
670 dst[i] = src[i]*a + dst[i]*b;
671 }
672 else if( cn == 1 )
673 {
674 for( ; i < len; i++ )
675 {
676 if( mask[i] )
677 dst[i] = src[i]*a + dst[i]*b;
678 }
679 }
680 else if( cn == 3 )
681 {
682 for( ; i < len; i++, src += 3, dst += 3 )
683 {
684 if( mask[i] )
685 {
686 AT t0 = src[0]*a + dst[0]*b;
687 AT t1 = src[1]*a + dst[1]*b;
688 AT t2 = src[2]*a + dst[2]*b;
689
690 dst[0] = t0; dst[1] = t1; dst[2] = t2;
691 }
692 }
693 }
694 else
695 {
696 for( ; i < len; i++, src += cn, dst += cn )
697 if( mask[i] )
698 {
699 for( int k = 0; k < cn; k++ )
700 dst[k] = src[k]*a + dst[k]*b;
701 }
702 }
703 }
704
705
706 #define DEF_ACC_FUNCS(suffix, type, acctype) \
707 static void acc_##suffix(const type* src, acctype* dst, \
708 const uchar* mask, int len, int cn) \
709 { acc_(src, dst, mask, len, cn); } \
710 \
711 static void accSqr_##suffix(const type* src, acctype* dst, \
712 const uchar* mask, int len, int cn) \
713 { accSqr_(src, dst, mask, len, cn); } \
714 \
715 static void accProd_##suffix(const type* src1, const type* src2, \
716 acctype* dst, const uchar* mask, int len, int cn) \
717 { accProd_(src1, src2, dst, mask, len, cn); } \
718 \
719 static void accW_##suffix(const type* src, acctype* dst, \
720 const uchar* mask, int len, int cn, double alpha) \
721 { accW_(src, dst, mask, len, cn, alpha); }
722
723
724 DEF_ACC_FUNCS(8u32f, uchar, float)
725 DEF_ACC_FUNCS(8u64f, uchar, double)
726 DEF_ACC_FUNCS(16u32f, ushort, float)
727 DEF_ACC_FUNCS(16u64f, ushort, double)
728 DEF_ACC_FUNCS(32f, float, float)
729 DEF_ACC_FUNCS(32f64f, float, double)
730 DEF_ACC_FUNCS(64f, double, double)
731
732
733 typedef void (*AccFunc)(const uchar*, uchar*, const uchar*, int, int);
734 typedef void (*AccProdFunc)(const uchar*, const uchar*, uchar*, const uchar*, int, int);
735 typedef void (*AccWFunc)(const uchar*, uchar*, const uchar*, int, int, double);
736
737 static AccFunc accTab[] =
738 {
739 (AccFunc)acc_8u32f, (AccFunc)acc_8u64f,
740 (AccFunc)acc_16u32f, (AccFunc)acc_16u64f,
741 (AccFunc)acc_32f, (AccFunc)acc_32f64f,
742 (AccFunc)acc_64f
743 };
744
745 static AccFunc accSqrTab[] =
746 {
747 (AccFunc)accSqr_8u32f, (AccFunc)accSqr_8u64f,
748 (AccFunc)accSqr_16u32f, (AccFunc)accSqr_16u64f,
749 (AccFunc)accSqr_32f, (AccFunc)accSqr_32f64f,
750 (AccFunc)accSqr_64f
751 };
752
753 static AccProdFunc accProdTab[] =
754 {
755 (AccProdFunc)accProd_8u32f, (AccProdFunc)accProd_8u64f,
756 (AccProdFunc)accProd_16u32f, (AccProdFunc)accProd_16u64f,
757 (AccProdFunc)accProd_32f, (AccProdFunc)accProd_32f64f,
758 (AccProdFunc)accProd_64f
759 };
760
761 static AccWFunc accWTab[] =
762 {
763 (AccWFunc)accW_8u32f, (AccWFunc)accW_8u64f,
764 (AccWFunc)accW_16u32f, (AccWFunc)accW_16u64f,
765 (AccWFunc)accW_32f, (AccWFunc)accW_32f64f,
766 (AccWFunc)accW_64f
767 };
768
getAccTabIdx(int sdepth,int ddepth)769 inline int getAccTabIdx(int sdepth, int ddepth)
770 {
771 return sdepth == CV_8U && ddepth == CV_32F ? 0 :
772 sdepth == CV_8U && ddepth == CV_64F ? 1 :
773 sdepth == CV_16U && ddepth == CV_32F ? 2 :
774 sdepth == CV_16U && ddepth == CV_64F ? 3 :
775 sdepth == CV_32F && ddepth == CV_32F ? 4 :
776 sdepth == CV_32F && ddepth == CV_64F ? 5 :
777 sdepth == CV_64F && ddepth == CV_64F ? 6 : -1;
778 }
779
780 #ifdef HAVE_OPENCL
781
782 enum
783 {
784 ACCUMULATE = 0,
785 ACCUMULATE_SQUARE = 1,
786 ACCUMULATE_PRODUCT = 2,
787 ACCUMULATE_WEIGHTED = 3
788 };
789
ocl_accumulate(InputArray _src,InputArray _src2,InputOutputArray _dst,double alpha,InputArray _mask,int op_type)790 static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray _dst, double alpha,
791 InputArray _mask, int op_type )
792 {
793 CV_Assert(op_type == ACCUMULATE || op_type == ACCUMULATE_SQUARE ||
794 op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED);
795
796 const ocl::Device & dev = ocl::Device::getDefault();
797 bool haveMask = !_mask.empty(), doubleSupport = dev.doubleFPConfig() > 0;
798 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ddepth = _dst.depth();
799 int kercn = haveMask ? cn : ocl::predictOptimalVectorWidthMax(_src, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1;
800
801 if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
802 return false;
803
804 const char * const opMap[4] = { "ACCUMULATE", "ACCUMULATE_SQUARE", "ACCUMULATE_PRODUCT",
805 "ACCUMULATE_WEIGHTED" };
806
807 char cvt[40];
808 ocl::Kernel k("accumulate", ocl::imgproc::accumulate_oclsrc,
809 format("-D %s%s -D srcT1=%s -D cn=%d -D dstT1=%s%s -D rowsPerWI=%d -D convertToDT=%s",
810 opMap[op_type], haveMask ? " -D HAVE_MASK" : "",
811 ocl::typeToStr(sdepth), kercn, ocl::typeToStr(ddepth),
812 doubleSupport ? " -D DOUBLE_SUPPORT" : "", rowsPerWI,
813 ocl::convertTypeStr(sdepth, ddepth, 1, cvt)));
814 if (k.empty())
815 return false;
816
817 UMat src = _src.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(), mask = _mask.getUMat();
818
819 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
820 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
821 dstarg = ocl::KernelArg::ReadWrite(dst, cn, kercn),
822 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
823
824 int argidx = k.set(0, srcarg);
825 if (op_type == ACCUMULATE_PRODUCT)
826 argidx = k.set(argidx, src2arg);
827 argidx = k.set(argidx, dstarg);
828 if (op_type == ACCUMULATE_WEIGHTED)
829 {
830 if (ddepth == CV_32F)
831 argidx = k.set(argidx, (float)alpha);
832 else
833 argidx = k.set(argidx, alpha);
834 }
835 if (haveMask)
836 k.set(argidx, maskarg);
837
838 size_t globalsize[2] = { src.cols * cn / kercn, (src.rows + rowsPerWI - 1) / rowsPerWI };
839 return k.run(2, globalsize, NULL, false);
840 }
841
842 #endif
843
844 }
845
accumulate(InputArray _src,InputOutputArray _dst,InputArray _mask)846 void cv::accumulate( InputArray _src, InputOutputArray _dst, InputArray _mask )
847 {
848 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
849 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
850
851 CV_Assert( _src.sameSize(_dst) && dcn == scn );
852 CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
853
854 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
855 ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE))
856
857 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
858
859 #if defined HAVE_IPP
860 CV_IPP_CHECK()
861 {
862 if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous())))
863 {
864 typedef IppStatus (CV_STDCALL * ippiAdd)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize);
865 typedef IppStatus (CV_STDCALL * ippiAddMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst,
866 int srcDstStep, IppiSize roiSize);
867 ippiAdd ippFunc = 0;
868 ippiAddMask ippFuncMask = 0;
869
870 if (mask.empty())
871 {
872 CV_SUPPRESS_DEPRECATED_START
873 ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAdd)ippiAdd_8u32f_C1IR :
874 sdepth == CV_16U && ddepth == CV_32F ? (ippiAdd)ippiAdd_16u32f_C1IR :
875 sdepth == CV_32F && ddepth == CV_32F ? (ippiAdd)ippiAdd_32f_C1IR : 0;
876 CV_SUPPRESS_DEPRECATED_END
877 }
878 else if (scn == 1)
879 {
880 ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddMask)ippiAdd_8u32f_C1IMR :
881 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddMask)ippiAdd_16u32f_C1IMR :
882 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddMask)ippiAdd_32f_C1IMR : 0;
883 }
884
885 if (ippFunc || ippFuncMask)
886 {
887 IppStatus status = ippStsNoErr;
888
889 Size size = src.size();
890 int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
891 if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
892 {
893 srcstep = static_cast<int>(src.total() * src.elemSize());
894 dststep = static_cast<int>(dst.total() * dst.elemSize());
895 maskstep = static_cast<int>(mask.total() * mask.elemSize());
896 size.width = static_cast<int>(src.total());
897 size.height = 1;
898 }
899 size.width *= scn;
900
901 if (mask.empty())
902 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
903 else
904 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
905 dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
906
907 if (status >= 0)
908 {
909 CV_IMPL_ADD(CV_IMPL_IPP);
910 return;
911 }
912 setIppErrorStatus();
913 }
914 }
915 }
916 #endif
917
918 int fidx = getAccTabIdx(sdepth, ddepth);
919 AccFunc func = fidx >= 0 ? accTab[fidx] : 0;
920 CV_Assert( func != 0 );
921
922 const Mat* arrays[] = {&src, &dst, &mask, 0};
923 uchar* ptrs[3];
924 NAryMatIterator it(arrays, ptrs);
925 int len = (int)it.size;
926
927 for( size_t i = 0; i < it.nplanes; i++, ++it )
928 func(ptrs[0], ptrs[1], ptrs[2], len, scn);
929 }
930
accumulateSquare(InputArray _src,InputOutputArray _dst,InputArray _mask)931 void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _mask )
932 {
933 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
934 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
935
936 CV_Assert( _src.sameSize(_dst) && dcn == scn );
937 CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
938
939 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
940 ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE_SQUARE))
941
942 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
943
944 #if defined(HAVE_IPP)
945 CV_IPP_CHECK()
946 {
947 if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous())))
948 {
949 typedef IppStatus (CV_STDCALL * ippiAddSquare)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize);
950 typedef IppStatus (CV_STDCALL * ippiAddSquareMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst,
951 int srcDstStep, IppiSize roiSize);
952 ippiAddSquare ippFunc = 0;
953 ippiAddSquareMask ippFuncMask = 0;
954
955 if (mask.empty())
956 {
957 ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_8u32f_C1IR :
958 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_16u32f_C1IR :
959 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_32f_C1IR : 0;
960 }
961 else if (scn == 1)
962 {
963 ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_8u32f_C1IMR :
964 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_16u32f_C1IMR :
965 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_32f_C1IMR : 0;
966 }
967
968 if (ippFunc || ippFuncMask)
969 {
970 IppStatus status = ippStsNoErr;
971
972 Size size = src.size();
973 int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
974 if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
975 {
976 srcstep = static_cast<int>(src.total() * src.elemSize());
977 dststep = static_cast<int>(dst.total() * dst.elemSize());
978 maskstep = static_cast<int>(mask.total() * mask.elemSize());
979 size.width = static_cast<int>(src.total());
980 size.height = 1;
981 }
982 size.width *= scn;
983
984 if (mask.empty())
985 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
986 else
987 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
988 dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
989
990 if (status >= 0)
991 {
992 CV_IMPL_ADD(CV_IMPL_IPP);
993 return;
994 }
995 setIppErrorStatus();
996 }
997 }
998 }
999 #endif
1000
1001 int fidx = getAccTabIdx(sdepth, ddepth);
1002 AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0;
1003 CV_Assert( func != 0 );
1004
1005 const Mat* arrays[] = {&src, &dst, &mask, 0};
1006 uchar* ptrs[3];
1007 NAryMatIterator it(arrays, ptrs);
1008 int len = (int)it.size;
1009
1010 for( size_t i = 0; i < it.nplanes; i++, ++it )
1011 func(ptrs[0], ptrs[1], ptrs[2], len, scn);
1012 }
1013
accumulateProduct(InputArray _src1,InputArray _src2,InputOutputArray _dst,InputArray _mask)1014 void cv::accumulateProduct( InputArray _src1, InputArray _src2,
1015 InputOutputArray _dst, InputArray _mask )
1016 {
1017 int stype = _src1.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
1018 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
1019
1020 CV_Assert( _src1.sameSize(_src2) && stype == _src2.type() );
1021 CV_Assert( _src1.sameSize(_dst) && dcn == scn );
1022 CV_Assert( _mask.empty() || (_src1.sameSize(_mask) && _mask.type() == CV_8U) );
1023
1024 CV_OCL_RUN(_src1.dims() <= 2 && _dst.isUMat(),
1025 ocl_accumulate(_src1, _src2, _dst, 0.0, _mask, ACCUMULATE_PRODUCT))
1026
1027 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
1028
1029 #if defined(HAVE_IPP)
1030 CV_IPP_CHECK()
1031 {
1032 if (src1.dims <= 2 || (src1.isContinuous() && src2.isContinuous() && dst.isContinuous()))
1033 {
1034 typedef IppStatus (CV_STDCALL * ippiAddProduct)(const void * pSrc1, int src1Step, const void * pSrc2,
1035 int src2Step, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
1036 typedef IppStatus (CV_STDCALL * ippiAddProductMask)(const void * pSrc1, int src1Step, const void * pSrc2, int src2Step,
1037 const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
1038 ippiAddProduct ippFunc = 0;
1039 ippiAddProductMask ippFuncMask = 0;
1040
1041 if (mask.empty())
1042 {
1043 ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_8u32f_C1IR :
1044 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_16u32f_C1IR :
1045 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_32f_C1IR : 0;
1046 }
1047 else if (scn == 1)
1048 {
1049 ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_8u32f_C1IMR :
1050 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_16u32f_C1IMR :
1051 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_32f_C1IMR : 0;
1052 }
1053
1054 if (ippFunc || ippFuncMask)
1055 {
1056 IppStatus status = ippStsNoErr;
1057
1058 Size size = src1.size();
1059 int src1step = (int)src1.step, src2step = (int)src2.step, dststep = (int)dst.step, maskstep = (int)mask.step;
1060 if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous() && mask.isContinuous())
1061 {
1062 src1step = static_cast<int>(src1.total() * src1.elemSize());
1063 src2step = static_cast<int>(src2.total() * src2.elemSize());
1064 dststep = static_cast<int>(dst.total() * dst.elemSize());
1065 maskstep = static_cast<int>(mask.total() * mask.elemSize());
1066 size.width = static_cast<int>(src1.total());
1067 size.height = 1;
1068 }
1069 size.width *= scn;
1070
1071 if (mask.empty())
1072 status = ippFunc(src1.ptr(), src1step, src2.ptr(), src2step, dst.ptr<Ipp32f>(),
1073 dststep, ippiSize(size.width, size.height));
1074 else
1075 status = ippFuncMask(src1.ptr(), src1step, src2.ptr(), src2step, mask.ptr<Ipp8u>(), maskstep,
1076 dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
1077
1078 if (status >= 0)
1079 {
1080 CV_IMPL_ADD(CV_IMPL_IPP);
1081 return;
1082 }
1083 setIppErrorStatus();
1084 }
1085 }
1086 }
1087 #endif
1088
1089 int fidx = getAccTabIdx(sdepth, ddepth);
1090 AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0;
1091 CV_Assert( func != 0 );
1092
1093 const Mat* arrays[] = {&src1, &src2, &dst, &mask, 0};
1094 uchar* ptrs[4];
1095 NAryMatIterator it(arrays, ptrs);
1096 int len = (int)it.size;
1097
1098 for( size_t i = 0; i < it.nplanes; i++, ++it )
1099 func(ptrs[0], ptrs[1], ptrs[2], ptrs[3], len, scn);
1100 }
1101
accumulateWeighted(InputArray _src,InputOutputArray _dst,double alpha,InputArray _mask)1102 void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst,
1103 double alpha, InputArray _mask )
1104 {
1105 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
1106 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
1107
1108 CV_Assert( _src.sameSize(_dst) && dcn == scn );
1109 CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
1110
1111 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
1112 ocl_accumulate(_src, noArray(), _dst, alpha, _mask, ACCUMULATE_WEIGHTED))
1113
1114 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
1115
1116 #if defined(HAVE_IPP)
1117 CV_IPP_CHECK()
1118 {
1119 if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && mask.isContinuous()))
1120 {
1121 typedef IppStatus (CV_STDCALL * ippiAddWeighted)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep,
1122 IppiSize roiSize, Ipp32f alpha);
1123 typedef IppStatus (CV_STDCALL * ippiAddWeightedMask)(const void * pSrc, int srcStep, const Ipp8u * pMask,
1124 int maskStep, Ipp32f * pSrcDst,
1125 int srcDstStep, IppiSize roiSize, Ipp32f alpha);
1126 ippiAddWeighted ippFunc = 0;
1127 ippiAddWeightedMask ippFuncMask = 0;
1128
1129 if (mask.empty())
1130 {
1131 ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_8u32f_C1IR :
1132 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_16u32f_C1IR :
1133 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_32f_C1IR : 0;
1134 }
1135 else if (scn == 1)
1136 {
1137 ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_8u32f_C1IMR :
1138 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_16u32f_C1IMR :
1139 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_32f_C1IMR : 0;
1140 }
1141
1142 if (ippFunc || ippFuncMask)
1143 {
1144 IppStatus status = ippStsNoErr;
1145
1146 Size size = src.size();
1147 int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
1148 if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
1149 {
1150 srcstep = static_cast<int>(src.total() * src.elemSize());
1151 dststep = static_cast<int>(dst.total() * dst.elemSize());
1152 maskstep = static_cast<int>(mask.total() * mask.elemSize());
1153 size.width = static_cast<int>((int)src.total());
1154 size.height = 1;
1155 }
1156 size.width *= scn;
1157
1158 if (mask.empty())
1159 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
1160 else
1161 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
1162 dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
1163
1164 if (status >= 0)
1165 {
1166 CV_IMPL_ADD(CV_IMPL_IPP);
1167 return;
1168 }
1169 setIppErrorStatus();
1170 }
1171 }
1172 }
1173 #endif
1174
1175 int fidx = getAccTabIdx(sdepth, ddepth);
1176 AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0;
1177 CV_Assert( func != 0 );
1178
1179 const Mat* arrays[] = {&src, &dst, &mask, 0};
1180 uchar* ptrs[3];
1181 NAryMatIterator it(arrays, ptrs);
1182 int len = (int)it.size;
1183
1184 for( size_t i = 0; i < it.nplanes; i++, ++it )
1185 func(ptrs[0], ptrs[1], ptrs[2], len, scn, alpha);
1186 }
1187
1188
1189 CV_IMPL void
cvAcc(const void * arr,void * sumarr,const void * maskarr)1190 cvAcc( const void* arr, void* sumarr, const void* maskarr )
1191 {
1192 cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
1193 if( maskarr )
1194 mask = cv::cvarrToMat(maskarr);
1195 cv::accumulate( src, dst, mask );
1196 }
1197
1198 CV_IMPL void
cvSquareAcc(const void * arr,void * sumarr,const void * maskarr)1199 cvSquareAcc( const void* arr, void* sumarr, const void* maskarr )
1200 {
1201 cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
1202 if( maskarr )
1203 mask = cv::cvarrToMat(maskarr);
1204 cv::accumulateSquare( src, dst, mask );
1205 }
1206
1207 CV_IMPL void
cvMultiplyAcc(const void * arr1,const void * arr2,void * sumarr,const void * maskarr)1208 cvMultiplyAcc( const void* arr1, const void* arr2,
1209 void* sumarr, const void* maskarr )
1210 {
1211 cv::Mat src1 = cv::cvarrToMat(arr1), src2 = cv::cvarrToMat(arr2);
1212 cv::Mat dst = cv::cvarrToMat(sumarr), mask;
1213 if( maskarr )
1214 mask = cv::cvarrToMat(maskarr);
1215 cv::accumulateProduct( src1, src2, dst, mask );
1216 }
1217
1218 CV_IMPL void
cvRunningAvg(const void * arr,void * sumarr,double alpha,const void * maskarr)1219 cvRunningAvg( const void* arr, void* sumarr, double alpha, const void* maskarr )
1220 {
1221 cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
1222 if( maskarr )
1223 mask = cv::cvarrToMat(maskarr);
1224 cv::accumulateWeighted( src, dst, alpha, mask );
1225 }
1226
1227 /* End of file. */
1228