1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
23 //
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
27 //
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 #include "precomp.hpp"
45 #include "opencl_kernels_core.hpp"
46 #include <limits>
47
48 namespace cv
49 {
50
51 typedef void (*MathFunc)(const void* src, void* dst, int len);
52
53 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
54 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
55 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
56 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
57
58 #ifdef HAVE_OPENCL
59
60 enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 };
61
62 static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGREES", "OP_PHASE_RADIANS", 0 };
63
ocl_math_op(InputArray _src1,InputArray _src2,OutputArray _dst,int oclop)64 static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop)
65 {
66 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
67 int kercn = oclop == OCL_OP_PHASE_DEGREES ||
68 oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
69
70 const ocl::Device d = ocl::Device::getDefault();
71 bool double_support = d.doubleFPConfig() > 0;
72 if (!double_support && depth == CV_64F)
73 return false;
74 int rowsPerWI = d.isIntel() ? 4 : 1;
75
76 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
77 format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
78 oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI,
79 double_support ? " -D DOUBLE_SUPPORT" : ""));
80 if (k.empty())
81 return false;
82
83 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
84 _dst.create(src1.size(), type);
85 UMat dst = _dst.getUMat();
86
87 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
88 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
89 dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
90
91 if (src2.empty())
92 k.args(src1arg, dstarg);
93 else
94 k.args(src1arg, src2arg, dstarg);
95
96 size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
97 return k.run(2, globalsize, 0, false);
98 }
99
100 #endif
101
fastAtan2(float y,float x)102 float fastAtan2( float y, float x )
103 {
104 float ax = std::abs(x), ay = std::abs(y);
105 float a, c, c2;
106 if( ax >= ay )
107 {
108 c = ay/(ax + (float)DBL_EPSILON);
109 c2 = c*c;
110 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
111 }
112 else
113 {
114 c = ax/(ay + (float)DBL_EPSILON);
115 c2 = c*c;
116 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
117 }
118 if( x < 0 )
119 a = 180.f - a;
120 if( y < 0 )
121 a = 360.f - a;
122 return a;
123 }
124
125 /* ************************************************************************** *\
126 Fast cube root by Ken Turkowski
127 (http://www.worldserver.com/turk/computergraphics/papers.html)
128 \* ************************************************************************** */
cubeRoot(float value)129 float cubeRoot( float value )
130 {
131 float fr;
132 Cv32suf v, m;
133 int ix, s;
134 int ex, shx;
135
136 v.f = value;
137 ix = v.i & 0x7fffffff;
138 s = v.i & 0x80000000;
139 ex = (ix >> 23) - 127;
140 shx = ex % 3;
141 shx -= shx >= 0 ? 3 : 0;
142 ex = (ex - shx) / 3; /* exponent of cube root */
143 v.i = (ix & ((1<<23)-1)) | ((shx + 127)<<23);
144 fr = v.f;
145
146 /* 0.125 <= fr < 1.0 */
147 /* Use quartic rational polynomial with error < 2^(-24) */
148 fr = (float)(((((45.2548339756803022511987494 * fr +
149 192.2798368355061050458134625) * fr +
150 119.1654824285581628956914143) * fr +
151 13.43250139086239872172837314) * fr +
152 0.1636161226585754240958355063)/
153 ((((14.80884093219134573786480845 * fr +
154 151.9714051044435648658557668) * fr +
155 168.5254414101568283957668343) * fr +
156 33.9905941350215598754191872) * fr +
157 1.0));
158
159 /* fr *= 2^ex * sign */
160 m.f = value;
161 v.f = fr;
162 v.i = (v.i + (ex << 23) + s) & (m.i*2 != 0 ? -1 : 0);
163 return v.f;
164 }
165
166 /****************************************************************************************\
167 * Cartezian -> Polar *
168 \****************************************************************************************/
169
magnitude(InputArray src1,InputArray src2,OutputArray dst)170 void magnitude( InputArray src1, InputArray src2, OutputArray dst )
171 {
172 int type = src1.type(), depth = src1.depth(), cn = src1.channels();
173 CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
174
175 CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
176 ocl_math_op(src1, src2, dst, OCL_OP_MAG))
177
178 Mat X = src1.getMat(), Y = src2.getMat();
179 dst.create(X.dims, X.size, X.type());
180 Mat Mag = dst.getMat();
181
182 const Mat* arrays[] = {&X, &Y, &Mag, 0};
183 uchar* ptrs[3];
184 NAryMatIterator it(arrays, ptrs);
185 int len = (int)it.size*cn;
186
187 for( size_t i = 0; i < it.nplanes; i++, ++it )
188 {
189 if( depth == CV_32F )
190 {
191 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
192 float *mag = (float*)ptrs[2];
193 hal::magnitude( x, y, mag, len );
194 }
195 else
196 {
197 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
198 double *mag = (double*)ptrs[2];
199 hal::magnitude( x, y, mag, len );
200 }
201 }
202 }
203
204
phase(InputArray src1,InputArray src2,OutputArray dst,bool angleInDegrees)205 void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees )
206 {
207 int type = src1.type(), depth = src1.depth(), cn = src1.channels();
208 CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
209
210 CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
211 ocl_math_op(src1, src2, dst, angleInDegrees ? OCL_OP_PHASE_DEGREES : OCL_OP_PHASE_RADIANS))
212
213 Mat X = src1.getMat(), Y = src2.getMat();
214 dst.create( X.dims, X.size, type );
215 Mat Angle = dst.getMat();
216
217 const Mat* arrays[] = {&X, &Y, &Angle, 0};
218 uchar* ptrs[3];
219 NAryMatIterator it(arrays, ptrs);
220 cv::AutoBuffer<float> _buf;
221 float* buf[2] = {0, 0};
222 int j, k, total = (int)(it.size*cn), blockSize = total;
223 size_t esz1 = X.elemSize1();
224
225 if( depth == CV_64F )
226 {
227 blockSize = std::min(blockSize, ((BLOCK_SIZE+cn-1)/cn)*cn);
228 _buf.allocate(blockSize*2);
229 buf[0] = _buf;
230 buf[1] = buf[0] + blockSize;
231 }
232
233 for( size_t i = 0; i < it.nplanes; i++, ++it )
234 {
235 for( j = 0; j < total; j += blockSize )
236 {
237 int len = std::min(total - j, blockSize);
238 if( depth == CV_32F )
239 {
240 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
241 float *angle = (float*)ptrs[2];
242 hal::fastAtan2( y, x, angle, len, angleInDegrees );
243 }
244 else
245 {
246 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
247 double *angle = (double*)ptrs[2];
248 k = 0;
249
250 #if CV_SSE2
251 if (USE_SSE2)
252 {
253 for ( ; k <= len - 4; k += 4)
254 {
255 __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)),
256 _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2)));
257 __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)),
258 _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2)));
259
260 _mm_storeu_ps(buf[0] + k, v_dst0);
261 _mm_storeu_ps(buf[1] + k, v_dst1);
262 }
263 }
264 #endif
265
266 for( ; k < len; k++ )
267 {
268 buf[0][k] = (float)x[k];
269 buf[1][k] = (float)y[k];
270 }
271
272 hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees );
273 k = 0;
274
275 #if CV_SSE2
276 if (USE_SSE2)
277 {
278 for ( ; k <= len - 4; k += 4)
279 {
280 __m128 v_src = _mm_loadu_ps(buf[0] + k);
281 _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src));
282 _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8))));
283 }
284 }
285 #endif
286
287 for( ; k < len; k++ )
288 angle[k] = buf[0][k];
289 }
290 ptrs[0] += len*esz1;
291 ptrs[1] += len*esz1;
292 ptrs[2] += len*esz1;
293 }
294 }
295 }
296
297 #ifdef HAVE_OPENCL
298
ocl_cartToPolar(InputArray _src1,InputArray _src2,OutputArray _dst1,OutputArray _dst2,bool angleInDegrees)299 static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
300 OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
301 {
302 const ocl::Device & d = ocl::Device::getDefault();
303 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
304 rowsPerWI = d.isIntel() ? 4 : 1;
305 bool doubleSupport = d.doubleFPConfig() > 0;
306
307 if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 &&
308 (depth == CV_32F || depth == CV_64F) && type == _src2.type()) ||
309 (depth == CV_64F && !doubleSupport) )
310 return false;
311
312 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
313 format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s",
314 ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
315 depth, rowsPerWI, angleInDegrees ? "AD" : "AR",
316 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
317 if (k.empty())
318 return false;
319
320 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
321 Size size = src1.size();
322 CV_Assert( size == src2.size() );
323
324 _dst1.create(size, type);
325 _dst2.create(size, type);
326 UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
327
328 k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
329 ocl::KernelArg::ReadOnlyNoSize(src2),
330 ocl::KernelArg::WriteOnly(dst1, cn),
331 ocl::KernelArg::WriteOnlyNoSize(dst2));
332
333 size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
334 return k.run(2, globalsize, NULL, false);
335 }
336
337 #endif
338
cartToPolar(InputArray src1,InputArray src2,OutputArray dst1,OutputArray dst2,bool angleInDegrees)339 void cartToPolar( InputArray src1, InputArray src2,
340 OutputArray dst1, OutputArray dst2, bool angleInDegrees )
341 {
342 CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(),
343 ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees))
344
345 Mat X = src1.getMat(), Y = src2.getMat();
346 int type = X.type(), depth = X.depth(), cn = X.channels();
347 CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F));
348 dst1.create( X.dims, X.size, type );
349 dst2.create( X.dims, X.size, type );
350 Mat Mag = dst1.getMat(), Angle = dst2.getMat();
351
352 const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
353 uchar* ptrs[4];
354 NAryMatIterator it(arrays, ptrs);
355 cv::AutoBuffer<float> _buf;
356 float* buf[2] = {0, 0};
357 int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
358 size_t esz1 = X.elemSize1();
359
360 if( depth == CV_64F )
361 {
362 _buf.allocate(blockSize*2);
363 buf[0] = _buf;
364 buf[1] = buf[0] + blockSize;
365 }
366
367 for( size_t i = 0; i < it.nplanes; i++, ++it )
368 {
369 for( j = 0; j < total; j += blockSize )
370 {
371 int len = std::min(total - j, blockSize);
372 if( depth == CV_32F )
373 {
374 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
375 float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
376 hal::magnitude( x, y, mag, len );
377 hal::fastAtan2( y, x, angle, len, angleInDegrees );
378 }
379 else
380 {
381 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
382 double *angle = (double*)ptrs[3];
383
384 hal::magnitude(x, y, (double*)ptrs[2], len);
385 k = 0;
386
387 #if CV_SSE2
388 if (USE_SSE2)
389 {
390 for ( ; k <= len - 4; k += 4)
391 {
392 __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)),
393 _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2)));
394 __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)),
395 _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2)));
396
397 _mm_storeu_ps(buf[0] + k, v_dst0);
398 _mm_storeu_ps(buf[1] + k, v_dst1);
399 }
400 }
401 #endif
402
403 for( ; k < len; k++ )
404 {
405 buf[0][k] = (float)x[k];
406 buf[1][k] = (float)y[k];
407 }
408
409 hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees );
410 k = 0;
411
412 #if CV_SSE2
413 if (USE_SSE2)
414 {
415 for ( ; k <= len - 4; k += 4)
416 {
417 __m128 v_src = _mm_loadu_ps(buf[0] + k);
418 _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src));
419 _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8))));
420 }
421 }
422 #endif
423
424 for( ; k < len; k++ )
425 angle[k] = buf[0][k];
426 }
427 ptrs[0] += len*esz1;
428 ptrs[1] += len*esz1;
429 ptrs[2] += len*esz1;
430 ptrs[3] += len*esz1;
431 }
432 }
433 }
434
435
436 /****************************************************************************************\
437 * Polar -> Cartezian *
438 \****************************************************************************************/
439
SinCos_32f(const float * angle,float * sinval,float * cosval,int len,int angle_in_degrees)440 static void SinCos_32f( const float *angle, float *sinval, float* cosval,
441 int len, int angle_in_degrees )
442 {
443 const int N = 64;
444
445 static const double sin_table[] =
446 {
447 0.00000000000000000000, 0.09801714032956060400,
448 0.19509032201612825000, 0.29028467725446233000,
449 0.38268343236508978000, 0.47139673682599764000,
450 0.55557023301960218000, 0.63439328416364549000,
451 0.70710678118654746000, 0.77301045336273699000,
452 0.83146961230254524000, 0.88192126434835494000,
453 0.92387953251128674000, 0.95694033573220894000,
454 0.98078528040323043000, 0.99518472667219682000,
455 1.00000000000000000000, 0.99518472667219693000,
456 0.98078528040323043000, 0.95694033573220894000,
457 0.92387953251128674000, 0.88192126434835505000,
458 0.83146961230254546000, 0.77301045336273710000,
459 0.70710678118654757000, 0.63439328416364549000,
460 0.55557023301960218000, 0.47139673682599786000,
461 0.38268343236508989000, 0.29028467725446239000,
462 0.19509032201612861000, 0.09801714032956082600,
463 0.00000000000000012246, -0.09801714032956059000,
464 -0.19509032201612836000, -0.29028467725446211000,
465 -0.38268343236508967000, -0.47139673682599764000,
466 -0.55557023301960196000, -0.63439328416364527000,
467 -0.70710678118654746000, -0.77301045336273666000,
468 -0.83146961230254524000, -0.88192126434835494000,
469 -0.92387953251128652000, -0.95694033573220882000,
470 -0.98078528040323032000, -0.99518472667219693000,
471 -1.00000000000000000000, -0.99518472667219693000,
472 -0.98078528040323043000, -0.95694033573220894000,
473 -0.92387953251128663000, -0.88192126434835505000,
474 -0.83146961230254546000, -0.77301045336273688000,
475 -0.70710678118654768000, -0.63439328416364593000,
476 -0.55557023301960218000, -0.47139673682599792000,
477 -0.38268343236509039000, -0.29028467725446250000,
478 -0.19509032201612872000, -0.09801714032956050600,
479 };
480
481 static const double k2 = (2*CV_PI)/N;
482
483 static const double sin_a0 = -0.166630293345647*k2*k2*k2;
484 static const double sin_a2 = k2;
485
486 static const double cos_a0 = -0.499818138450326*k2*k2;
487 /*static const double cos_a2 = 1;*/
488
489 double k1;
490 int i = 0;
491
492 if( !angle_in_degrees )
493 k1 = N/(2*CV_PI);
494 else
495 k1 = N/360.;
496
497 #if CV_AVX2
498 if (USE_AVX2)
499 {
500 __m128d v_k1 = _mm_set1_pd(k1);
501 __m128d v_1 = _mm_set1_pd(1);
502 __m128i v_N1 = _mm_set1_epi32(N - 1);
503 __m128i v_N4 = _mm_set1_epi32(N >> 2);
504 __m128d v_sin_a0 = _mm_set1_pd(sin_a0);
505 __m128d v_sin_a2 = _mm_set1_pd(sin_a2);
506 __m128d v_cos_a0 = _mm_set1_pd(cos_a0);
507
508 for ( ; i <= len - 4; i += 4)
509 {
510 __m128 v_angle = _mm_loadu_ps(angle + i);
511
512 // 0-1
513 __m128d v_t = _mm_mul_pd(_mm_cvtps_pd(v_angle), v_k1);
514 __m128i v_it = _mm_cvtpd_epi32(v_t);
515 v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it));
516
517 __m128i v_sin_idx = _mm_and_si128(v_it, v_N1);
518 __m128i v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1);
519
520 __m128d v_t2 = _mm_mul_pd(v_t, v_t);
521 __m128d v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t);
522 __m128d v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1);
523
524 __m128d v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8);
525 __m128d v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8);
526
527 __m128d v_sin_val_0 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b),
528 _mm_mul_pd(v_cos_a, v_sin_b));
529 __m128d v_cos_val_0 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b),
530 _mm_mul_pd(v_sin_a, v_sin_b));
531
532 // 2-3
533 v_t = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_angle), 8))), v_k1);
534 v_it = _mm_cvtpd_epi32(v_t);
535 v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it));
536
537 v_sin_idx = _mm_and_si128(v_it, v_N1);
538 v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1);
539
540 v_t2 = _mm_mul_pd(v_t, v_t);
541 v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t);
542 v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1);
543
544 v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8);
545 v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8);
546
547 __m128d v_sin_val_1 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b),
548 _mm_mul_pd(v_cos_a, v_sin_b));
549 __m128d v_cos_val_1 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b),
550 _mm_mul_pd(v_sin_a, v_sin_b));
551
552 _mm_storeu_ps(sinval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_sin_val_0),
553 _mm_cvtpd_ps(v_sin_val_1)));
554 _mm_storeu_ps(cosval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_cos_val_0),
555 _mm_cvtpd_ps(v_cos_val_1)));
556 }
557 }
558 #endif
559
560 for( ; i < len; i++ )
561 {
562 double t = angle[i]*k1;
563 int it = cvRound(t);
564 t -= it;
565 int sin_idx = it & (N - 1);
566 int cos_idx = (N/4 - sin_idx) & (N - 1);
567
568 double sin_b = (sin_a0*t*t + sin_a2)*t;
569 double cos_b = cos_a0*t*t + 1;
570
571 double sin_a = sin_table[sin_idx];
572 double cos_a = sin_table[cos_idx];
573
574 double sin_val = sin_a*cos_b + cos_a*sin_b;
575 double cos_val = cos_a*cos_b - sin_a*sin_b;
576
577 sinval[i] = (float)sin_val;
578 cosval[i] = (float)cos_val;
579 }
580 }
581
582
583 #ifdef HAVE_OPENCL
584
ocl_polarToCart(InputArray _mag,InputArray _angle,OutputArray _dst1,OutputArray _dst2,bool angleInDegrees)585 static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
586 OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
587 {
588 const ocl::Device & d = ocl::Device::getDefault();
589 int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
590 rowsPerWI = d.isIntel() ? 4 : 1;
591 bool doubleSupport = d.doubleFPConfig() > 0;
592
593 if ( !doubleSupport && depth == CV_64F )
594 return false;
595
596 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
597 format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s",
598 ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI,
599 depth, angleInDegrees ? "AD" : "AR",
600 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
601 if (k.empty())
602 return false;
603
604 UMat mag = _mag.getUMat(), angle = _angle.getUMat();
605 Size size = angle.size();
606 CV_Assert(mag.size() == size);
607
608 _dst1.create(size, type);
609 _dst2.create(size, type);
610 UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
611
612 k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle),
613 ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2));
614
615 size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
616 return k.run(2, globalsize, NULL, false);
617 }
618
619 #endif
620
polarToCart(InputArray src1,InputArray src2,OutputArray dst1,OutputArray dst2,bool angleInDegrees)621 void polarToCart( InputArray src1, InputArray src2,
622 OutputArray dst1, OutputArray dst2, bool angleInDegrees )
623 {
624 int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
625 CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type));
626
627 CV_OCL_RUN(!src1.empty() && src2.dims() <= 2 && dst1.isUMat() && dst2.isUMat(),
628 ocl_polarToCart(src1, src2, dst1, dst2, angleInDegrees))
629
630 Mat Mag = src1.getMat(), Angle = src2.getMat();
631 CV_Assert( Mag.empty() || Angle.size == Mag.size);
632 dst1.create( Angle.dims, Angle.size, type );
633 dst2.create( Angle.dims, Angle.size, type );
634 Mat X = dst1.getMat(), Y = dst2.getMat();
635
636 #if defined(HAVE_IPP)
637 CV_IPP_CHECK()
638 {
639 if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees)
640 {
641 typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase,
642 void * pDstRe, void * pDstIm, int len);
643 ippsPolarToCart ippFunc =
644 depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f :
645 depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0;
646 CV_Assert(ippFunc != 0);
647
648 IppStatus status = ippFunc(Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast<int>(cn * X.total()));
649 if (status >= 0)
650 {
651 CV_IMPL_ADD(CV_IMPL_IPP);
652 return;
653 }
654 setIppErrorStatus();
655 }
656 }
657 #endif
658
659 const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
660 uchar* ptrs[4];
661 NAryMatIterator it(arrays, ptrs);
662 cv::AutoBuffer<float> _buf;
663 float* buf[2] = {0, 0};
664 int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
665 size_t esz1 = Angle.elemSize1();
666
667 if( depth == CV_64F )
668 {
669 _buf.allocate(blockSize*2);
670 buf[0] = _buf;
671 buf[1] = buf[0] + blockSize;
672 }
673
674 for( size_t i = 0; i < it.nplanes; i++, ++it )
675 {
676 for( j = 0; j < total; j += blockSize )
677 {
678 int len = std::min(total - j, blockSize);
679 if( depth == CV_32F )
680 {
681 const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1];
682 float *x = (float*)ptrs[2], *y = (float*)ptrs[3];
683
684 SinCos_32f( angle, y, x, len, angleInDegrees );
685 if( mag )
686 {
687 k = 0;
688
689 #if CV_NEON
690 for( ; k <= len - 4; k += 4 )
691 {
692 float32x4_t v_m = vld1q_f32(mag + k);
693 vst1q_f32(x + k, vmulq_f32(vld1q_f32(x + k), v_m));
694 vst1q_f32(y + k, vmulq_f32(vld1q_f32(y + k), v_m));
695 }
696 #elif CV_SSE2
697 if (USE_SSE2)
698 {
699 for( ; k <= len - 4; k += 4 )
700 {
701 __m128 v_m = _mm_loadu_ps(mag + k);
702 _mm_storeu_ps(x + k, _mm_mul_ps(_mm_loadu_ps(x + k), v_m));
703 _mm_storeu_ps(y + k, _mm_mul_ps(_mm_loadu_ps(y + k), v_m));
704 }
705 }
706 #endif
707
708 for( ; k < len; k++ )
709 {
710 float m = mag[k];
711 x[k] *= m; y[k] *= m;
712 }
713 }
714 }
715 else
716 {
717 const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1];
718 double *x = (double*)ptrs[2], *y = (double*)ptrs[3];
719
720 for( k = 0; k < len; k++ )
721 buf[0][k] = (float)angle[k];
722
723 SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees );
724 if( mag )
725 for( k = 0; k < len; k++ )
726 {
727 double m = mag[k];
728 x[k] = buf[0][k]*m; y[k] = buf[1][k]*m;
729 }
730 else
731 {
732 std::memcpy(x, buf[0], sizeof(float) * len);
733 std::memcpy(y, buf[1], sizeof(float) * len);
734 }
735 }
736
737 if( ptrs[0] )
738 ptrs[0] += len*esz1;
739 ptrs[1] += len*esz1;
740 ptrs[2] += len*esz1;
741 ptrs[3] += len*esz1;
742 }
743 }
744 }
745
746 /****************************************************************************************\
747 * E X P *
748 \****************************************************************************************/
749
750 #ifdef HAVE_IPP
Exp_32f_ipp(const float * x,float * y,int n)751 static void Exp_32f_ipp(const float *x, float *y, int n)
752 {
753 CV_IPP_CHECK()
754 {
755 if (0 <= ippsExp_32f_A21(x, y, n))
756 {
757 CV_IMPL_ADD(CV_IMPL_IPP);
758 return;
759 }
760 setIppErrorStatus();
761 }
762 hal::exp(x, y, n);
763 }
764
Exp_64f_ipp(const double * x,double * y,int n)765 static void Exp_64f_ipp(const double *x, double *y, int n)
766 {
767 CV_IPP_CHECK()
768 {
769 if (0 <= ippsExp_64f_A50(x, y, n))
770 {
771 CV_IMPL_ADD(CV_IMPL_IPP);
772 return;
773 }
774 setIppErrorStatus();
775 }
776 hal::exp(x, y, n);
777 }
778
779 #define Exp_32f Exp_32f_ipp
780 #define Exp_64f Exp_64f_ipp
781 #else
782 #define Exp_32f hal::exp
783 #define Exp_64f hal::exp
784 #endif
785
786
exp(InputArray _src,OutputArray _dst)787 void exp( InputArray _src, OutputArray _dst )
788 {
789 int type = _src.type(), depth = _src.depth(), cn = _src.channels();
790 CV_Assert( depth == CV_32F || depth == CV_64F );
791
792 CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
793 ocl_math_op(_src, noArray(), _dst, OCL_OP_EXP))
794
795 Mat src = _src.getMat();
796 _dst.create( src.dims, src.size, type );
797 Mat dst = _dst.getMat();
798
799 const Mat* arrays[] = {&src, &dst, 0};
800 uchar* ptrs[2];
801 NAryMatIterator it(arrays, ptrs);
802 int len = (int)(it.size*cn);
803
804 for( size_t i = 0; i < it.nplanes; i++, ++it )
805 {
806 if( depth == CV_32F )
807 Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len);
808 else
809 Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len);
810 }
811 }
812
813
814 /****************************************************************************************\
815 * L O G *
816 \****************************************************************************************/
817
818 #ifdef HAVE_IPP
Log_32f_ipp(const float * x,float * y,int n)819 static void Log_32f_ipp(const float *x, float *y, int n)
820 {
821 CV_IPP_CHECK()
822 {
823 if (0 <= ippsLn_32f_A21(x, y, n))
824 {
825 CV_IMPL_ADD(CV_IMPL_IPP);
826 return;
827 }
828 setIppErrorStatus();
829 }
830 hal::log(x, y, n);
831 }
832
Log_64f_ipp(const double * x,double * y,int n)833 static void Log_64f_ipp(const double *x, double *y, int n)
834 {
835 CV_IPP_CHECK()
836 {
837 if (0 <= ippsLn_64f_A50(x, y, n))
838 {
839 CV_IMPL_ADD(CV_IMPL_IPP);
840 return;
841 }
842 setIppErrorStatus();
843 }
844 hal::log(x, y, n);
845 }
846
847 #define Log_32f Log_32f_ipp
848 #define Log_64f Log_64f_ipp
849 #else
850 #define Log_32f hal::log
851 #define Log_64f hal::log
852 #endif
853
log(InputArray _src,OutputArray _dst)854 void log( InputArray _src, OutputArray _dst )
855 {
856 int type = _src.type(), depth = _src.depth(), cn = _src.channels();
857 CV_Assert( depth == CV_32F || depth == CV_64F );
858
859 CV_OCL_RUN( _dst.isUMat() && _src.dims() <= 2,
860 ocl_math_op(_src, noArray(), _dst, OCL_OP_LOG))
861
862 Mat src = _src.getMat();
863 _dst.create( src.dims, src.size, type );
864 Mat dst = _dst.getMat();
865
866 const Mat* arrays[] = {&src, &dst, 0};
867 uchar* ptrs[2];
868 NAryMatIterator it(arrays, ptrs);
869 int len = (int)(it.size*cn);
870
871 for( size_t i = 0; i < it.nplanes; i++, ++it )
872 {
873 if( depth == CV_32F )
874 Log_32f( (const float*)ptrs[0], (float*)ptrs[1], len );
875 else
876 Log_64f( (const double*)ptrs[0], (double*)ptrs[1], len );
877 }
878 }
879
880 /****************************************************************************************\
881 * P O W E R *
882 \****************************************************************************************/
883
884 template <typename T, typename WT>
885 struct iPow_SIMD
886 {
operator ()cv::iPow_SIMD887 int operator() ( const T *, T *, int, int)
888 {
889 return 0;
890 }
891 };
892
893 #if CV_SIMD128
894
895 template <>
896 struct iPow_SIMD<uchar, int>
897 {
operator ()cv::iPow_SIMD898 int operator() ( const uchar * src, uchar * dst, int len, int power )
899 {
900 int i = 0;
901 v_uint32x4 v_1 = v_setall_u32(1u);
902
903 for ( ; i <= len - 8; i += 8)
904 {
905 v_uint32x4 v_a1 = v_1, v_a2 = v_1;
906 v_uint16x8 v = v_load_expand(src + i);
907 v_uint32x4 v_b1, v_b2;
908 v_expand(v, v_b1, v_b2);
909 int p = power;
910
911 while( p > 1 )
912 {
913 if (p & 1)
914 {
915 v_a1 *= v_b1;
916 v_a2 *= v_b2;
917 }
918 v_b1 *= v_b1;
919 v_b2 *= v_b2;
920 p >>= 1;
921 }
922
923 v_a1 *= v_b1;
924 v_a2 *= v_b2;
925
926 v = v_pack(v_a1, v_a2);
927 v_pack_store(dst + i, v);
928 }
929
930 return i;
931 }
932 };
933
934 template <>
935 struct iPow_SIMD<schar, int>
936 {
operator ()cv::iPow_SIMD937 int operator() ( const schar * src, schar * dst, int len, int power)
938 {
939 int i = 0;
940 v_int32x4 v_1 = v_setall_s32(1);
941
942 for ( ; i <= len - 8; i += 8)
943 {
944 v_int32x4 v_a1 = v_1, v_a2 = v_1;
945 v_int16x8 v = v_load_expand(src + i);
946 v_int32x4 v_b1, v_b2;
947 v_expand(v, v_b1, v_b2);
948 int p = power;
949
950 while( p > 1 )
951 {
952 if (p & 1)
953 {
954 v_a1 *= v_b1;
955 v_a2 *= v_b2;
956 }
957 v_b1 *= v_b1;
958 v_b2 *= v_b2;
959 p >>= 1;
960 }
961
962 v_a1 *= v_b1;
963 v_a2 *= v_b2;
964
965 v = v_pack(v_a1, v_a2);
966 v_pack_store(dst + i, v);
967 }
968
969 return i;
970 }
971 };
972
973 template <>
974 struct iPow_SIMD<ushort, int>
975 {
operator ()cv::iPow_SIMD976 int operator() ( const ushort * src, ushort * dst, int len, int power)
977 {
978 int i = 0;
979 v_uint32x4 v_1 = v_setall_u32(1u);
980
981 for ( ; i <= len - 8; i += 8)
982 {
983 v_uint32x4 v_a1 = v_1, v_a2 = v_1;
984 v_uint16x8 v = v_load(src + i);
985 v_uint32x4 v_b1, v_b2;
986 v_expand(v, v_b1, v_b2);
987 int p = power;
988
989 while( p > 1 )
990 {
991 if (p & 1)
992 {
993 v_a1 *= v_b1;
994 v_a2 *= v_b2;
995 }
996 v_b1 *= v_b1;
997 v_b2 *= v_b2;
998 p >>= 1;
999 }
1000
1001 v_a1 *= v_b1;
1002 v_a2 *= v_b2;
1003
1004 v = v_pack(v_a1, v_a2);
1005 v_store(dst + i, v);
1006 }
1007
1008 return i;
1009 }
1010 };
1011
1012 template <>
1013 struct iPow_SIMD<short, int>
1014 {
operator ()cv::iPow_SIMD1015 int operator() ( const short * src, short * dst, int len, int power)
1016 {
1017 int i = 0;
1018 v_int32x4 v_1 = v_setall_s32(1);
1019
1020 for ( ; i <= len - 8; i += 8)
1021 {
1022 v_int32x4 v_a1 = v_1, v_a2 = v_1;
1023 v_int16x8 v = v_load(src + i);
1024 v_int32x4 v_b1, v_b2;
1025 v_expand(v, v_b1, v_b2);
1026 int p = power;
1027
1028 while( p > 1 )
1029 {
1030 if (p & 1)
1031 {
1032 v_a1 *= v_b1;
1033 v_a2 *= v_b2;
1034 }
1035 v_b1 *= v_b1;
1036 v_b2 *= v_b2;
1037 p >>= 1;
1038 }
1039
1040 v_a1 *= v_b1;
1041 v_a2 *= v_b2;
1042
1043 v = v_pack(v_a1, v_a2);
1044 v_store(dst + i, v);
1045 }
1046
1047 return i;
1048 }
1049 };
1050
1051 template <>
1052 struct iPow_SIMD<int, int>
1053 {
operator ()cv::iPow_SIMD1054 int operator() ( const int * src, int * dst, int len, int power)
1055 {
1056 int i = 0;
1057 v_int32x4 v_1 = v_setall_s32(1);
1058
1059 for ( ; i <= len - 8; i += 8)
1060 {
1061 v_int32x4 v_a1 = v_1, v_a2 = v_1;
1062 v_int32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
1063 int p = power;
1064
1065 while( p > 1 )
1066 {
1067 if (p & 1)
1068 {
1069 v_a1 *= v_b1;
1070 v_a2 *= v_b2;
1071 }
1072 v_b1 *= v_b1;
1073 v_b2 *= v_b2;
1074 p >>= 1;
1075 }
1076
1077 v_a1 *= v_b1;
1078 v_a2 *= v_b2;
1079
1080 v_store(dst + i, v_a1);
1081 v_store(dst + i + 4, v_a2);
1082 }
1083
1084 return i;
1085 }
1086 };
1087
1088 template <>
1089 struct iPow_SIMD<float, float>
1090 {
operator ()cv::iPow_SIMD1091 int operator() ( const float * src, float * dst, int len, int power)
1092 {
1093 int i = 0;
1094 v_float32x4 v_1 = v_setall_f32(1.f);
1095
1096 for ( ; i <= len - 8; i += 8)
1097 {
1098 v_float32x4 v_a1 = v_1, v_a2 = v_1;
1099 v_float32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
1100 int p = std::abs(power);
1101 if( power < 0 )
1102 {
1103 v_b1 = v_1 / v_b1;
1104 v_b2 = v_1 / v_b2;
1105 }
1106
1107 while( p > 1 )
1108 {
1109 if (p & 1)
1110 {
1111 v_a1 *= v_b1;
1112 v_a2 *= v_b2;
1113 }
1114 v_b1 *= v_b1;
1115 v_b2 *= v_b2;
1116 p >>= 1;
1117 }
1118
1119 v_a1 *= v_b1;
1120 v_a2 *= v_b2;
1121
1122 v_store(dst + i, v_a1);
1123 v_store(dst + i + 4, v_a2);
1124 }
1125
1126 return i;
1127 }
1128 };
1129
1130 #if CV_SIMD128_64F
1131 template <>
1132 struct iPow_SIMD<double, double>
1133 {
operator ()cv::iPow_SIMD1134 int operator() ( const double * src, double * dst, int len, int power)
1135 {
1136 int i = 0;
1137 v_float64x2 v_1 = v_setall_f64(1.);
1138
1139 for ( ; i <= len - 4; i += 4)
1140 {
1141 v_float64x2 v_a1 = v_1, v_a2 = v_1;
1142 v_float64x2 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 2);
1143 int p = std::abs(power);
1144 if( power < 0 )
1145 {
1146 v_b1 = v_1 / v_b1;
1147 v_b2 = v_1 / v_b2;
1148 }
1149
1150 while( p > 1 )
1151 {
1152 if (p & 1)
1153 {
1154 v_a1 *= v_b1;
1155 v_a2 *= v_b2;
1156 }
1157 v_b1 *= v_b1;
1158 v_b2 *= v_b2;
1159 p >>= 1;
1160 }
1161
1162 v_a1 *= v_b1;
1163 v_a2 *= v_b2;
1164
1165 v_store(dst + i, v_a1);
1166 v_store(dst + i + 2, v_a2);
1167 }
1168
1169 return i;
1170 }
1171 };
1172 #endif
1173
1174 #endif
1175
1176 template<typename T, typename WT>
1177 static void
iPow_i(const T * src,T * dst,int len,int power)1178 iPow_i( const T* src, T* dst, int len, int power )
1179 {
1180 if( power < 0 )
1181 {
1182 T tab[5] =
1183 {
1184 power == -1 ? saturate_cast<T>(-1) : 0, (power & 1) ? -1 : 1,
1185 std::numeric_limits<T>::max(), 1, power == -1 ? 1 : 0
1186 };
1187 for( int i = 0; i < len; i++ )
1188 {
1189 T val = src[i];
1190 dst[i] = cv_abs(val) <= 2 ? tab[val + 2] : (T)0;
1191 }
1192 }
1193 else
1194 {
1195 iPow_SIMD<T, WT> vop;
1196 int i = vop(src, dst, len, power);
1197
1198 for( ; i < len; i++ )
1199 {
1200 WT a = 1, b = src[i];
1201 int p = power;
1202 while( p > 1 )
1203 {
1204 if( p & 1 )
1205 a *= b;
1206 b *= b;
1207 p >>= 1;
1208 }
1209
1210 a *= b;
1211 dst[i] = saturate_cast<T>(a);
1212 }
1213 }
1214 }
1215
1216 template<typename T>
1217 static void
iPow_f(const T * src,T * dst,int len,int power0)1218 iPow_f( const T* src, T* dst, int len, int power0 )
1219 {
1220 iPow_SIMD<T, T> vop;
1221 int i = vop(src, dst, len, power0);
1222 int power = std::abs(power0);
1223
1224 for( ; i < len; i++ )
1225 {
1226 T a = 1, b = src[i];
1227 int p = power;
1228 if( power0 < 0 )
1229 b = 1/b;
1230
1231 while( p > 1 )
1232 {
1233 if( p & 1 )
1234 a *= b;
1235 b *= b;
1236 p >>= 1;
1237 }
1238
1239 a *= b;
1240 dst[i] = a;
1241 }
1242 }
1243
iPow8u(const uchar * src,uchar * dst,int len,int power)1244 static void iPow8u(const uchar* src, uchar* dst, int len, int power)
1245 {
1246 iPow_i<uchar, unsigned>(src, dst, len, power);
1247 }
1248
iPow8s(const schar * src,schar * dst,int len,int power)1249 static void iPow8s(const schar* src, schar* dst, int len, int power)
1250 {
1251 iPow_i<schar, int>(src, dst, len, power);
1252 }
1253
iPow16u(const ushort * src,ushort * dst,int len,int power)1254 static void iPow16u(const ushort* src, ushort* dst, int len, int power)
1255 {
1256 iPow_i<ushort, unsigned>(src, dst, len, power);
1257 }
1258
iPow16s(const short * src,short * dst,int len,int power)1259 static void iPow16s(const short* src, short* dst, int len, int power)
1260 {
1261 iPow_i<short, int>(src, dst, len, power);
1262 }
1263
iPow32s(const int * src,int * dst,int len,int power)1264 static void iPow32s(const int* src, int* dst, int len, int power)
1265 {
1266 iPow_i<int, int>(src, dst, len, power);
1267 }
1268
iPow32f(const float * src,float * dst,int len,int power)1269 static void iPow32f(const float* src, float* dst, int len, int power)
1270 {
1271 iPow_f<float>(src, dst, len, power);
1272 }
1273
iPow64f(const double * src,double * dst,int len,int power)1274 static void iPow64f(const double* src, double* dst, int len, int power)
1275 {
1276 iPow_f<double>(src, dst, len, power);
1277 }
1278
1279
1280 typedef void (*IPowFunc)( const uchar* src, uchar* dst, int len, int power );
1281
1282 static IPowFunc ipowTab[] =
1283 {
1284 (IPowFunc)iPow8u, (IPowFunc)iPow8s, (IPowFunc)iPow16u, (IPowFunc)iPow16s,
1285 (IPowFunc)iPow32s, (IPowFunc)iPow32f, (IPowFunc)iPow64f, 0
1286 };
1287
1288 #ifdef HAVE_OPENCL
1289
ocl_pow(InputArray _src,double power,OutputArray _dst,bool is_ipower,int ipower)1290 static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
1291 bool is_ipower, int ipower)
1292 {
1293 const ocl::Device & d = ocl::Device::getDefault();
1294 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
1295 rowsPerWI = d.isIntel() ? 4 : 1;
1296 bool doubleSupport = d.doubleFPConfig() > 0;
1297
1298 _dst.createSameSize(_src, type);
1299 if (is_ipower)
1300 {
1301 if (ipower == 0)
1302 {
1303 _dst.setTo(Scalar::all(1));
1304 return true;
1305 }
1306 if (ipower == 1)
1307 {
1308 _src.copyTo(_dst);
1309 return true;
1310 }
1311 if( ipower < 0 )
1312 {
1313 if( depth == CV_32F || depth == CV_64F )
1314 is_ipower = false;
1315 else
1316 return false;
1317 }
1318 }
1319
1320 if (depth == CV_64F && !doubleSupport)
1321 return false;
1322
1323 bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
1324 const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
1325
1326 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
1327 format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
1328 ocl::typeToStr(depth), depth, rowsPerWI, op,
1329 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
1330 if (k.empty())
1331 return false;
1332
1333 UMat src = _src.getUMat();
1334 _dst.create(src.size(), type);
1335 UMat dst = _dst.getUMat();
1336
1337 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
1338 dstarg = ocl::KernelArg::WriteOnly(dst, cn);
1339
1340 if (issqrt)
1341 k.args(srcarg, dstarg);
1342 else if (is_ipower)
1343 k.args(srcarg, dstarg, ipower);
1344 else
1345 {
1346 if (depth == CV_32F)
1347 k.args(srcarg, dstarg, (float)power);
1348 else
1349 k.args(srcarg, dstarg, power);
1350 }
1351
1352 size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
1353 return k.run(2, globalsize, NULL, false);
1354 }
1355
1356 #endif
1357
InvSqrt_32f(const float * src,float * dst,int n)1358 static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt(src, dst, n); }
InvSqrt_64f(const double * src,double * dst,int n)1359 static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt(src, dst, n); }
Sqrt_32f(const float * src,float * dst,int n)1360 static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt(src, dst, n); }
Sqrt_64f(const double * src,double * dst,int n)1361 static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt(src, dst, n); }
1362
pow(InputArray _src,double power,OutputArray _dst)1363 void pow( InputArray _src, double power, OutputArray _dst )
1364 {
1365 int type = _src.type(), depth = CV_MAT_DEPTH(type),
1366 cn = CV_MAT_CN(type), ipower = cvRound(power);
1367 bool is_ipower = fabs(ipower - power) < DBL_EPSILON,
1368 useOpenCL = _dst.isUMat() && _src.dims() <= 2;
1369
1370 if( is_ipower && !(ocl::Device::getDefault().isIntel() && useOpenCL && depth != CV_64F))
1371 {
1372 switch( ipower )
1373 {
1374 case 0:
1375 _dst.createSameSize(_src, type);
1376 _dst.setTo(Scalar::all(1));
1377 return;
1378 case 1:
1379 _src.copyTo(_dst);
1380 return;
1381 case 2:
1382 multiply(_src, _src, _dst);
1383 return;
1384 }
1385 }
1386 else
1387 CV_Assert( depth == CV_32F || depth == CV_64F );
1388
1389 CV_OCL_RUN(useOpenCL, ocl_pow(_src, power, _dst, is_ipower, ipower))
1390
1391 Mat src = _src.getMat();
1392 _dst.create( src.dims, src.size, type );
1393 Mat dst = _dst.getMat();
1394
1395 const Mat* arrays[] = {&src, &dst, 0};
1396 uchar* ptrs[2];
1397 NAryMatIterator it(arrays, ptrs);
1398 int len = (int)(it.size*cn);
1399
1400 if( is_ipower )
1401 {
1402 IPowFunc func = ipowTab[depth];
1403 CV_Assert( func != 0 );
1404
1405 for( size_t i = 0; i < it.nplanes; i++, ++it )
1406 func( ptrs[0], ptrs[1], len, ipower );
1407 }
1408 else if( fabs(fabs(power) - 0.5) < DBL_EPSILON )
1409 {
1410 MathFunc func = power < 0 ?
1411 (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) :
1412 (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f);
1413
1414 for( size_t i = 0; i < it.nplanes; i++, ++it )
1415 func( ptrs[0], ptrs[1], len );
1416 }
1417 else
1418 {
1419 int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
1420 size_t esz1 = src.elemSize1();
1421 AutoBuffer<uchar> buf;
1422 Cv32suf inf32, nan32;
1423 Cv64suf inf64, nan64;
1424 float* fbuf = 0;
1425 double* dbuf = 0;
1426 inf32.i = 0x7f800000;
1427 nan32.i = 0x7fffffff;
1428 inf64.i = CV_BIG_INT(0x7FF0000000000000);
1429 nan64.i = CV_BIG_INT(0x7FFFFFFFFFFFFFFF);
1430
1431 if( src.ptr() == dst.ptr() )
1432 {
1433 buf.allocate(blockSize*esz1);
1434 fbuf = (float*)(uchar*)buf;
1435 dbuf = (double*)(uchar*)buf;
1436 }
1437
1438 for( size_t i = 0; i < it.nplanes; i++, ++it )
1439 {
1440 for( j = 0; j < len; j += blockSize )
1441 {
1442 int bsz = std::min(len - j, blockSize);
1443
1444 #if defined(HAVE_IPP)
1445 CV_IPP_CHECK()
1446 {
1447 IppStatus status = depth == CV_32F ?
1448 ippsPowx_32f_A21((const float*)ptrs[0], (float)power, (float*)ptrs[1], bsz) :
1449 ippsPowx_64f_A50((const double*)ptrs[0], (double)power, (double*)ptrs[1], bsz);
1450
1451 if (status >= 0)
1452 {
1453 CV_IMPL_ADD(CV_IMPL_IPP);
1454 ptrs[0] += bsz*esz1;
1455 ptrs[1] += bsz*esz1;
1456 continue;
1457 }
1458 setIppErrorStatus();
1459 }
1460 #endif
1461
1462 if( depth == CV_32F )
1463 {
1464 float* x0 = (float*)ptrs[0];
1465 float* x = fbuf ? fbuf : x0;
1466 float* y = (float*)ptrs[1];
1467
1468 if( x != x0 )
1469 memcpy(x, x0, bsz*esz1);
1470
1471 Log_32f(x, y, bsz);
1472 for( k = 0; k < bsz; k++ )
1473 y[k] = (float)(y[k]*power);
1474 Exp_32f(y, y, bsz);
1475 for( k = 0; k < bsz; k++ )
1476 {
1477 if( x0[k] <= 0 )
1478 {
1479 if( x0[k] == 0.f )
1480 {
1481 if( power < 0 )
1482 y[k] = inf32.f;
1483 }
1484 else
1485 y[k] = nan32.f;
1486 }
1487 }
1488 }
1489 else
1490 {
1491 double* x0 = (double*)ptrs[0];
1492 double* x = dbuf ? dbuf : x0;
1493 double* y = (double*)ptrs[1];
1494
1495 if( x != x0 )
1496 memcpy(x, x0, bsz*esz1);
1497
1498 Log_64f(x, y, bsz);
1499 for( k = 0; k < bsz; k++ )
1500 y[k] *= power;
1501 Exp_64f(y, y, bsz);
1502
1503 for( k = 0; k < bsz; k++ )
1504 {
1505 if( x0[k] <= 0 )
1506 {
1507 if( x0[k] == 0. )
1508 {
1509 if( power < 0 )
1510 y[k] = inf64.f;
1511 }
1512 else
1513 y[k] = nan64.f;
1514 }
1515 }
1516 }
1517 ptrs[0] += bsz*esz1;
1518 ptrs[1] += bsz*esz1;
1519 }
1520 }
1521 }
1522 }
1523
sqrt(InputArray a,OutputArray b)1524 void sqrt(InputArray a, OutputArray b)
1525 {
1526 cv::pow(a, 0.5, b);
1527 }
1528
1529 /************************** CheckArray for NaN's, Inf's *********************************/
1530
1531 template<int cv_mat_type> struct mat_type_assotiations{};
1532
1533 template<> struct mat_type_assotiations<CV_8U>
1534 {
1535 typedef unsigned char type;
1536 static const type min_allowable = 0x0;
1537 static const type max_allowable = 0xFF;
1538 };
1539
1540 template<> struct mat_type_assotiations<CV_8S>
1541 {
1542 typedef signed char type;
1543 static const type min_allowable = SCHAR_MIN;
1544 static const type max_allowable = SCHAR_MAX;
1545 };
1546
1547 template<> struct mat_type_assotiations<CV_16U>
1548 {
1549 typedef unsigned short type;
1550 static const type min_allowable = 0x0;
1551 static const type max_allowable = USHRT_MAX;
1552 };
1553 template<> struct mat_type_assotiations<CV_16S>
1554 {
1555 typedef signed short type;
1556 static const type min_allowable = SHRT_MIN;
1557 static const type max_allowable = SHRT_MAX;
1558 };
1559
1560 template<> struct mat_type_assotiations<CV_32S>
1561 {
1562 typedef int type;
1563 static const type min_allowable = (-INT_MAX - 1);
1564 static const type max_allowable = INT_MAX;
1565 };
1566
1567 // inclusive maxVal !!!
1568 template<int depth>
checkIntegerRange(cv::Mat src,Point & bad_pt,int minVal,int maxVal,double & bad_value)1569 bool checkIntegerRange(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value)
1570 {
1571 typedef mat_type_assotiations<depth> type_ass;
1572
1573 if (minVal < type_ass::min_allowable && maxVal > type_ass::max_allowable)
1574 {
1575 return true;
1576 }
1577 else if (minVal > type_ass::max_allowable || maxVal < type_ass::min_allowable || maxVal < minVal)
1578 {
1579 bad_pt = cv::Point(0,0);
1580 return false;
1581 }
1582 cv::Mat as_one_channel = src.reshape(1,0);
1583
1584 for (int j = 0; j < as_one_channel.rows; ++j)
1585 for (int i = 0; i < as_one_channel.cols; ++i)
1586 {
1587 if (as_one_channel.at<typename type_ass::type>(j ,i) < minVal || as_one_channel.at<typename type_ass::type>(j ,i) > maxVal)
1588 {
1589 bad_pt.y = j ;
1590 bad_pt.x = i % src.channels();
1591 bad_value = as_one_channel.at<typename type_ass::type>(j ,i);
1592 return false;
1593 }
1594 }
1595 bad_value = 0.0;
1596
1597 return true;
1598 }
1599
1600 typedef bool (*check_range_function)(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value);
1601
1602 check_range_function check_range_functions[] =
1603 {
1604 &checkIntegerRange<CV_8U>,
1605 &checkIntegerRange<CV_8S>,
1606 &checkIntegerRange<CV_16U>,
1607 &checkIntegerRange<CV_16S>,
1608 &checkIntegerRange<CV_32S>
1609 };
1610
checkRange(InputArray _src,bool quiet,Point * pt,double minVal,double maxVal)1611 bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double maxVal)
1612 {
1613 Mat src = _src.getMat();
1614
1615 if ( src.dims > 2 )
1616 {
1617 const Mat* arrays[] = {&src, 0};
1618 Mat planes[1];
1619 NAryMatIterator it(arrays, planes);
1620
1621 for ( size_t i = 0; i < it.nplanes; i++, ++it )
1622 {
1623 if (!checkRange( it.planes[0], quiet, pt, minVal, maxVal ))
1624 {
1625 // todo: set index properly
1626 return false;
1627 }
1628 }
1629 return true;
1630 }
1631
1632 int depth = src.depth();
1633 Point badPt(-1, -1);
1634 double badValue = 0;
1635
1636 if (depth < CV_32F)
1637 {
1638 // see "Bug #1784"
1639 int minVali = minVal<(-INT_MAX - 1) ? (-INT_MAX - 1) : cvFloor(minVal);
1640 int maxVali = maxVal>INT_MAX ? INT_MAX : cvCeil(maxVal) - 1; // checkIntegerRang() use inclusive maxVal
1641
1642 (check_range_functions[depth])(src, badPt, minVali, maxVali, badValue);
1643 }
1644 else
1645 {
1646 int i, loc = 0;
1647 Size size = getContinuousSize( src, src.channels() );
1648
1649 if( depth == CV_32F )
1650 {
1651 Cv32suf a, b;
1652 int ia, ib;
1653 const int* isrc = src.ptr<int>();
1654 size_t step = src.step/sizeof(isrc[0]);
1655
1656 a.f = (float)std::max(minVal, (double)-FLT_MAX);
1657 b.f = (float)std::min(maxVal, (double)FLT_MAX);
1658
1659 ia = CV_TOGGLE_FLT(a.i);
1660 ib = CV_TOGGLE_FLT(b.i);
1661
1662 for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
1663 {
1664 for( i = 0; i < size.width; i++ )
1665 {
1666 int val = isrc[i];
1667 val = CV_TOGGLE_FLT(val);
1668
1669 if( val < ia || val >= ib )
1670 {
1671 badPt = Point((loc + i) % src.cols, (loc + i) / src.cols);
1672 badValue = ((const float*)isrc)[i];
1673 break;
1674 }
1675 }
1676 }
1677 }
1678 else
1679 {
1680 Cv64suf a, b;
1681 int64 ia, ib;
1682 const int64* isrc = src.ptr<int64>();
1683 size_t step = src.step/sizeof(isrc[0]);
1684
1685 a.f = minVal;
1686 b.f = maxVal;
1687
1688 ia = CV_TOGGLE_DBL(a.i);
1689 ib = CV_TOGGLE_DBL(b.i);
1690
1691 for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
1692 {
1693 for( i = 0; i < size.width; i++ )
1694 {
1695 int64 val = isrc[i];
1696 val = CV_TOGGLE_DBL(val);
1697
1698 if( val < ia || val >= ib )
1699 {
1700 badPt = Point((loc + i) % src.cols, (loc + i) / src.cols);
1701 badValue = ((const double*)isrc)[i];
1702 break;
1703 }
1704 }
1705 }
1706 }
1707 }
1708
1709 if( badPt.x >= 0 )
1710 {
1711 if( pt )
1712 *pt = badPt;
1713 if( !quiet )
1714 CV_Error_( CV_StsOutOfRange,
1715 ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
1716 }
1717 return badPt.x < 0;
1718 }
1719
1720 #ifdef HAVE_OPENCL
1721
ocl_patchNaNs(InputOutputArray _a,float value)1722 static bool ocl_patchNaNs( InputOutputArray _a, float value )
1723 {
1724 int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
1725 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
1726 format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d",
1727 rowsPerWI));
1728 if (k.empty())
1729 return false;
1730
1731 UMat a = _a.getUMat();
1732 int cn = a.channels();
1733
1734 k.args(ocl::KernelArg::ReadOnlyNoSize(a),
1735 ocl::KernelArg::WriteOnly(a, cn), (float)value);
1736
1737 size_t globalsize[2] = { a.cols * cn, (a.rows + rowsPerWI - 1) / rowsPerWI };
1738 return k.run(2, globalsize, NULL, false);
1739 }
1740
1741 #endif
1742
patchNaNs(InputOutputArray _a,double _val)1743 void patchNaNs( InputOutputArray _a, double _val )
1744 {
1745 CV_Assert( _a.depth() == CV_32F );
1746
1747 CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
1748 ocl_patchNaNs(_a, (float)_val))
1749
1750 Mat a = _a.getMat();
1751 const Mat* arrays[] = {&a, 0};
1752 int* ptrs[1];
1753 NAryMatIterator it(arrays, (uchar**)ptrs);
1754 size_t len = it.size*a.channels();
1755 Cv32suf val;
1756 val.f = (float)_val;
1757
1758 #if CV_SSE2
1759 __m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000);
1760 __m128i v_val = _mm_set1_epi32(val.i);
1761 #elif CV_NEON
1762 int32x4_t v_mask1 = vdupq_n_s32(0x7fffffff), v_mask2 = vdupq_n_s32(0x7f800000),
1763 v_val = vdupq_n_s32(val.i);
1764 #endif
1765
1766 for( size_t i = 0; i < it.nplanes; i++, ++it )
1767 {
1768 int* tptr = ptrs[0];
1769 size_t j = 0;
1770
1771 #if CV_SSE2
1772 if (USE_SSE2)
1773 {
1774 for ( ; j + 4 <= len; j += 4)
1775 {
1776 __m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j));
1777 __m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1));
1778 __m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val));
1779 _mm_storeu_si128((__m128i *)(tptr + j), v_res);
1780 }
1781 }
1782 #elif CV_NEON
1783 for ( ; j + 4 <= len; j += 4)
1784 {
1785 int32x4_t v_src = vld1q_s32(tptr + j);
1786 uint32x4_t v_cmp_mask = vcltq_s32(v_mask2, vandq_s32(v_src, v_mask1));
1787 int32x4_t v_dst = vbslq_s32(v_cmp_mask, v_val, v_src);
1788 vst1q_s32(tptr + j, v_dst);
1789 }
1790 #endif
1791
1792 for( ; j < len; j++ )
1793 if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
1794 tptr[j] = val.i;
1795 }
1796 }
1797
1798 }
1799
cvCbrt(float value)1800 CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); }
cvFastArctan(float y,float x)1801 CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); }
1802
1803 CV_IMPL void
cvCartToPolar(const CvArr * xarr,const CvArr * yarr,CvArr * magarr,CvArr * anglearr,int angle_in_degrees)1804 cvCartToPolar( const CvArr* xarr, const CvArr* yarr,
1805 CvArr* magarr, CvArr* anglearr,
1806 int angle_in_degrees )
1807 {
1808 cv::Mat X = cv::cvarrToMat(xarr), Y = cv::cvarrToMat(yarr), Mag, Angle;
1809 if( magarr )
1810 {
1811 Mag = cv::cvarrToMat(magarr);
1812 CV_Assert( Mag.size() == X.size() && Mag.type() == X.type() );
1813 }
1814 if( anglearr )
1815 {
1816 Angle = cv::cvarrToMat(anglearr);
1817 CV_Assert( Angle.size() == X.size() && Angle.type() == X.type() );
1818 }
1819 if( magarr )
1820 {
1821 if( anglearr )
1822 cv::cartToPolar( X, Y, Mag, Angle, angle_in_degrees != 0 );
1823 else
1824 cv::magnitude( X, Y, Mag );
1825 }
1826 else
1827 cv::phase( X, Y, Angle, angle_in_degrees != 0 );
1828 }
1829
1830 CV_IMPL void
cvPolarToCart(const CvArr * magarr,const CvArr * anglearr,CvArr * xarr,CvArr * yarr,int angle_in_degrees)1831 cvPolarToCart( const CvArr* magarr, const CvArr* anglearr,
1832 CvArr* xarr, CvArr* yarr, int angle_in_degrees )
1833 {
1834 cv::Mat X, Y, Angle = cv::cvarrToMat(anglearr), Mag;
1835 if( magarr )
1836 {
1837 Mag = cv::cvarrToMat(magarr);
1838 CV_Assert( Mag.size() == Angle.size() && Mag.type() == Angle.type() );
1839 }
1840 if( xarr )
1841 {
1842 X = cv::cvarrToMat(xarr);
1843 CV_Assert( X.size() == Angle.size() && X.type() == Angle.type() );
1844 }
1845 if( yarr )
1846 {
1847 Y = cv::cvarrToMat(yarr);
1848 CV_Assert( Y.size() == Angle.size() && Y.type() == Angle.type() );
1849 }
1850
1851 cv::polarToCart( Mag, Angle, X, Y, angle_in_degrees != 0 );
1852 }
1853
cvExp(const CvArr * srcarr,CvArr * dstarr)1854 CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr )
1855 {
1856 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1857 CV_Assert( src.type() == dst.type() && src.size == dst.size );
1858 cv::exp( src, dst );
1859 }
1860
cvLog(const CvArr * srcarr,CvArr * dstarr)1861 CV_IMPL void cvLog( const CvArr* srcarr, CvArr* dstarr )
1862 {
1863 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1864 CV_Assert( src.type() == dst.type() && src.size == dst.size );
1865 cv::log( src, dst );
1866 }
1867
cvPow(const CvArr * srcarr,CvArr * dstarr,double power)1868 CV_IMPL void cvPow( const CvArr* srcarr, CvArr* dstarr, double power )
1869 {
1870 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1871 CV_Assert( src.type() == dst.type() && src.size == dst.size );
1872 cv::pow( src, power, dst );
1873 }
1874
cvCheckArr(const CvArr * arr,int flags,double minVal,double maxVal)1875 CV_IMPL int cvCheckArr( const CvArr* arr, int flags,
1876 double minVal, double maxVal )
1877 {
1878 if( (flags & CV_CHECK_RANGE) == 0 )
1879 minVal = -DBL_MAX, maxVal = DBL_MAX;
1880 return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal );
1881 }
1882
1883
1884 /*
1885 Finds real roots of cubic, quadratic or linear equation.
1886 The original code has been taken from Ken Turkowski web page
1887 (http://www.worldserver.com/turk/opensource/) and adopted for OpenCV.
1888 Here is the copyright notice.
1889
1890 -----------------------------------------------------------------------
1891 Copyright (C) 1978-1999 Ken Turkowski. <turk@computer.org>
1892
1893 All rights reserved.
1894
1895 Warranty Information
1896 Even though I have reviewed this software, I make no warranty
1897 or representation, either express or implied, with respect to this
1898 software, its quality, accuracy, merchantability, or fitness for a
1899 particular purpose. As a result, this software is provided "as is,"
1900 and you, its user, are assuming the entire risk as to its quality
1901 and accuracy.
1902
1903 This code may be used and freely distributed as long as it includes
1904 this copyright notice and the above warranty information.
1905 -----------------------------------------------------------------------
1906 */
1907
solveCubic(InputArray _coeffs,OutputArray _roots)1908 int cv::solveCubic( InputArray _coeffs, OutputArray _roots )
1909 {
1910 const int n0 = 3;
1911 Mat coeffs = _coeffs.getMat();
1912 int ctype = coeffs.type();
1913
1914 CV_Assert( ctype == CV_32F || ctype == CV_64F );
1915 CV_Assert( (coeffs.size() == Size(n0, 1) ||
1916 coeffs.size() == Size(n0+1, 1) ||
1917 coeffs.size() == Size(1, n0) ||
1918 coeffs.size() == Size(1, n0+1)) );
1919
1920 _roots.create(n0, 1, ctype, -1, true, _OutputArray::DEPTH_MASK_FLT);
1921 Mat roots = _roots.getMat();
1922
1923 int i = -1, n = 0;
1924 double a0 = 1., a1, a2, a3;
1925 double x0 = 0., x1 = 0., x2 = 0.;
1926 int ncoeffs = coeffs.rows + coeffs.cols - 1;
1927
1928 if( ctype == CV_32FC1 )
1929 {
1930 if( ncoeffs == 4 )
1931 a0 = coeffs.at<float>(++i);
1932
1933 a1 = coeffs.at<float>(i+1);
1934 a2 = coeffs.at<float>(i+2);
1935 a3 = coeffs.at<float>(i+3);
1936 }
1937 else
1938 {
1939 if( ncoeffs == 4 )
1940 a0 = coeffs.at<double>(++i);
1941
1942 a1 = coeffs.at<double>(i+1);
1943 a2 = coeffs.at<double>(i+2);
1944 a3 = coeffs.at<double>(i+3);
1945 }
1946
1947 if( a0 == 0 )
1948 {
1949 if( a1 == 0 )
1950 {
1951 if( a2 == 0 )
1952 n = a3 == 0 ? -1 : 0;
1953 else
1954 {
1955 // linear equation
1956 x0 = -a3/a2;
1957 n = 1;
1958 }
1959 }
1960 else
1961 {
1962 // quadratic equation
1963 double d = a2*a2 - 4*a1*a3;
1964 if( d >= 0 )
1965 {
1966 d = std::sqrt(d);
1967 double q1 = (-a2 + d) * 0.5;
1968 double q2 = (a2 + d) * -0.5;
1969 if( fabs(q1) > fabs(q2) )
1970 {
1971 x0 = q1 / a1;
1972 x1 = a3 / q1;
1973 }
1974 else
1975 {
1976 x0 = q2 / a1;
1977 x1 = a3 / q2;
1978 }
1979 n = d > 0 ? 2 : 1;
1980 }
1981 }
1982 }
1983 else
1984 {
1985 a0 = 1./a0;
1986 a1 *= a0;
1987 a2 *= a0;
1988 a3 *= a0;
1989
1990 double Q = (a1 * a1 - 3 * a2) * (1./9);
1991 double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54);
1992 double Qcubed = Q * Q * Q;
1993 double d = Qcubed - R * R;
1994
1995 if( d >= 0 )
1996 {
1997 double theta = acos(R / std::sqrt(Qcubed));
1998 double sqrtQ = std::sqrt(Q);
1999 double t0 = -2 * sqrtQ;
2000 double t1 = theta * (1./3);
2001 double t2 = a1 * (1./3);
2002 x0 = t0 * cos(t1) - t2;
2003 x1 = t0 * cos(t1 + (2.*CV_PI/3)) - t2;
2004 x2 = t0 * cos(t1 + (4.*CV_PI/3)) - t2;
2005 n = 3;
2006 }
2007 else
2008 {
2009 double e;
2010 d = std::sqrt(-d);
2011 e = std::pow(d + fabs(R), 0.333333333333);
2012 if( R > 0 )
2013 e = -e;
2014 x0 = (e + Q / e) - a1 * (1./3);
2015 n = 1;
2016 }
2017 }
2018
2019 if( roots.type() == CV_32FC1 )
2020 {
2021 roots.at<float>(0) = (float)x0;
2022 roots.at<float>(1) = (float)x1;
2023 roots.at<float>(2) = (float)x2;
2024 }
2025 else
2026 {
2027 roots.at<double>(0) = x0;
2028 roots.at<double>(1) = x1;
2029 roots.at<double>(2) = x2;
2030 }
2031
2032 return n;
2033 }
2034
2035 /* finds complex roots of a polynomial using Durand-Kerner method:
2036 http://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method */
solvePoly(InputArray _coeffs0,OutputArray _roots0,int maxIters)2037 double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters )
2038 {
2039 typedef Complex<double> C;
2040
2041 double maxDiff = 0;
2042 int iter, i, j;
2043 Mat coeffs0 = _coeffs0.getMat();
2044 int ctype = _coeffs0.type();
2045 int cdepth = CV_MAT_DEPTH(ctype);
2046
2047 CV_Assert( CV_MAT_DEPTH(ctype) >= CV_32F && CV_MAT_CN(ctype) <= 2 );
2048 CV_Assert( coeffs0.rows == 1 || coeffs0.cols == 1 );
2049
2050 int n0 = coeffs0.cols + coeffs0.rows - 2, n = n0;
2051
2052 _roots0.create(n, 1, CV_MAKETYPE(cdepth, 2), -1, true, _OutputArray::DEPTH_MASK_FLT);
2053 Mat roots0 = _roots0.getMat();
2054
2055 AutoBuffer<C> buf(n*2+2);
2056 C *coeffs = buf, *roots = coeffs + n + 1;
2057 Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots);
2058 coeffs0.convertTo(coeffs1, coeffs1.type());
2059 if( coeffs0.channels() == 1 )
2060 {
2061 const double* rcoeffs = (const double*)roots;
2062 for( i = 0; i <= n; i++ )
2063 coeffs[i] = C(rcoeffs[i], 0);
2064 }
2065
2066 for( ; n > 1; n-- )
2067 {
2068 if( std::abs(coeffs[n].re) + std::abs(coeffs[n].im) > DBL_EPSILON )
2069 break;
2070 }
2071
2072 C p(1, 0), r(1, 1);
2073
2074 for( i = 0; i < n; i++ )
2075 {
2076 roots[i] = p;
2077 p = p * r;
2078 }
2079
2080 maxIters = maxIters <= 0 ? 1000 : maxIters;
2081 for( iter = 0; iter < maxIters; iter++ )
2082 {
2083 maxDiff = 0;
2084 for( i = 0; i < n; i++ )
2085 {
2086 p = roots[i];
2087 C num = coeffs[n], denom = coeffs[n];
2088 for( j = 0; j < n; j++ )
2089 {
2090 num = num*p + coeffs[n-j-1];
2091 if( j != i ) denom = denom * (p - roots[j]);
2092 }
2093 num /= denom;
2094 roots[i] = p - num;
2095 maxDiff = std::max(maxDiff, cv::abs(num));
2096 }
2097 if( maxDiff <= 0 )
2098 break;
2099 }
2100
2101 if( coeffs0.channels() == 1 )
2102 {
2103 const double verySmallEps = 1e-100;
2104 for( i = 0; i < n; i++ )
2105 if( fabs(roots[i].im) < verySmallEps )
2106 roots[i].im = 0;
2107 }
2108
2109 for( ; n < n0; n++ )
2110 roots[n+1] = roots[n];
2111
2112 Mat(roots0.size(), CV_64FC2, roots).convertTo(roots0, roots0.type());
2113 return maxDiff;
2114 }
2115
2116
2117 CV_IMPL int
cvSolveCubic(const CvMat * coeffs,CvMat * roots)2118 cvSolveCubic( const CvMat* coeffs, CvMat* roots )
2119 {
2120 cv::Mat _coeffs = cv::cvarrToMat(coeffs), _roots = cv::cvarrToMat(roots), _roots0 = _roots;
2121 int nroots = cv::solveCubic(_coeffs, _roots);
2122 CV_Assert( _roots.data == _roots0.data ); // check that the array of roots was not reallocated
2123 return nroots;
2124 }
2125
2126
cvSolvePoly(const CvMat * a,CvMat * r,int maxiter,int)2127 void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int)
2128 {
2129 cv::Mat _a = cv::cvarrToMat(a);
2130 cv::Mat _r = cv::cvarrToMat(r);
2131 cv::Mat _r0 = _r;
2132 cv::solvePoly(_a, _r, maxiter);
2133 CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated
2134 }
2135
2136
2137 /* End of file. */
2138