• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 //   * Redistribution's of source code must retain the above copyright notice,
22 //     this list of conditions and the following disclaimer.
23 //
24 //   * Redistribution's in binary form must reproduce the above copyright notice,
25 //     this list of conditions and the following disclaimer in the documentation
26 //     and/or other materials provided with the distribution.
27 //
28 //   * The name of the copyright holders may not be used to endorse or promote products
29 //     derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43 
44 #include "precomp.hpp"
45 #include "opencl_kernels_core.hpp"
46 #include <limits>
47 
48 namespace cv
49 {
50 
51 typedef void (*MathFunc)(const void* src, void* dst, int len);
52 
53 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
54 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
55 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
56 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
57 
58 #ifdef HAVE_OPENCL
59 
60 enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 };
61 
62 static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGREES", "OP_PHASE_RADIANS", 0 };
63 
ocl_math_op(InputArray _src1,InputArray _src2,OutputArray _dst,int oclop)64 static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop)
65 {
66     int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
67     int kercn = oclop == OCL_OP_PHASE_DEGREES ||
68             oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
69 
70     const ocl::Device d = ocl::Device::getDefault();
71     bool double_support = d.doubleFPConfig() > 0;
72     if (!double_support && depth == CV_64F)
73         return false;
74     int rowsPerWI = d.isIntel() ? 4 : 1;
75 
76     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
77                   format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
78                          oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI,
79                          double_support ? " -D DOUBLE_SUPPORT" : ""));
80     if (k.empty())
81         return false;
82 
83     UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
84     _dst.create(src1.size(), type);
85     UMat dst = _dst.getUMat();
86 
87     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
88             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
89             dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
90 
91     if (src2.empty())
92         k.args(src1arg, dstarg);
93     else
94         k.args(src1arg, src2arg, dstarg);
95 
96     size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
97     return k.run(2, globalsize, 0, false);
98 }
99 
100 #endif
101 
fastAtan2(float y,float x)102 float fastAtan2( float y, float x )
103 {
104     float ax = std::abs(x), ay = std::abs(y);
105     float a, c, c2;
106     if( ax >= ay )
107     {
108         c = ay/(ax + (float)DBL_EPSILON);
109         c2 = c*c;
110         a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
111     }
112     else
113     {
114         c = ax/(ay + (float)DBL_EPSILON);
115         c2 = c*c;
116         a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
117     }
118     if( x < 0 )
119         a = 180.f - a;
120     if( y < 0 )
121         a = 360.f - a;
122     return a;
123 }
124 
125 /* ************************************************************************** *\
126    Fast cube root by Ken Turkowski
127    (http://www.worldserver.com/turk/computergraphics/papers.html)
128 \* ************************************************************************** */
cubeRoot(float value)129 float  cubeRoot( float value )
130 {
131     float fr;
132     Cv32suf v, m;
133     int ix, s;
134     int ex, shx;
135 
136     v.f = value;
137     ix = v.i & 0x7fffffff;
138     s = v.i & 0x80000000;
139     ex = (ix >> 23) - 127;
140     shx = ex % 3;
141     shx -= shx >= 0 ? 3 : 0;
142     ex = (ex - shx) / 3; /* exponent of cube root */
143     v.i = (ix & ((1<<23)-1)) | ((shx + 127)<<23);
144     fr = v.f;
145 
146     /* 0.125 <= fr < 1.0 */
147     /* Use quartic rational polynomial with error < 2^(-24) */
148     fr = (float)(((((45.2548339756803022511987494 * fr +
149     192.2798368355061050458134625) * fr +
150     119.1654824285581628956914143) * fr +
151     13.43250139086239872172837314) * fr +
152     0.1636161226585754240958355063)/
153     ((((14.80884093219134573786480845 * fr +
154     151.9714051044435648658557668) * fr +
155     168.5254414101568283957668343) * fr +
156     33.9905941350215598754191872) * fr +
157     1.0));
158 
159     /* fr *= 2^ex * sign */
160     m.f = value;
161     v.f = fr;
162     v.i = (v.i + (ex << 23) + s) & (m.i*2 != 0 ? -1 : 0);
163     return v.f;
164 }
165 
166 /****************************************************************************************\
167 *                                  Cartezian -> Polar                                    *
168 \****************************************************************************************/
169 
magnitude(InputArray src1,InputArray src2,OutputArray dst)170 void magnitude( InputArray src1, InputArray src2, OutputArray dst )
171 {
172     int type = src1.type(), depth = src1.depth(), cn = src1.channels();
173     CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
174 
175     CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
176                ocl_math_op(src1, src2, dst, OCL_OP_MAG))
177 
178     Mat X = src1.getMat(), Y = src2.getMat();
179     dst.create(X.dims, X.size, X.type());
180     Mat Mag = dst.getMat();
181 
182     const Mat* arrays[] = {&X, &Y, &Mag, 0};
183     uchar* ptrs[3];
184     NAryMatIterator it(arrays, ptrs);
185     int len = (int)it.size*cn;
186 
187     for( size_t i = 0; i < it.nplanes; i++, ++it )
188     {
189         if( depth == CV_32F )
190         {
191             const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
192             float *mag = (float*)ptrs[2];
193             hal::magnitude( x, y, mag, len );
194         }
195         else
196         {
197             const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
198             double *mag = (double*)ptrs[2];
199             hal::magnitude( x, y, mag, len );
200         }
201     }
202 }
203 
204 
phase(InputArray src1,InputArray src2,OutputArray dst,bool angleInDegrees)205 void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees )
206 {
207     int type = src1.type(), depth = src1.depth(), cn = src1.channels();
208     CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
209 
210     CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
211                ocl_math_op(src1, src2, dst, angleInDegrees ? OCL_OP_PHASE_DEGREES : OCL_OP_PHASE_RADIANS))
212 
213     Mat X = src1.getMat(), Y = src2.getMat();
214     dst.create( X.dims, X.size, type );
215     Mat Angle = dst.getMat();
216 
217     const Mat* arrays[] = {&X, &Y, &Angle, 0};
218     uchar* ptrs[3];
219     NAryMatIterator it(arrays, ptrs);
220     cv::AutoBuffer<float> _buf;
221     float* buf[2] = {0, 0};
222     int j, k, total = (int)(it.size*cn), blockSize = total;
223     size_t esz1 = X.elemSize1();
224 
225     if( depth == CV_64F )
226     {
227         blockSize = std::min(blockSize, ((BLOCK_SIZE+cn-1)/cn)*cn);
228         _buf.allocate(blockSize*2);
229         buf[0] = _buf;
230         buf[1] = buf[0] + blockSize;
231     }
232 
233     for( size_t i = 0; i < it.nplanes; i++, ++it )
234     {
235         for( j = 0; j < total; j += blockSize )
236         {
237             int len = std::min(total - j, blockSize);
238             if( depth == CV_32F )
239             {
240                 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
241                 float *angle = (float*)ptrs[2];
242                 hal::fastAtan2( y, x, angle, len, angleInDegrees );
243             }
244             else
245             {
246                 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
247                 double *angle = (double*)ptrs[2];
248                 k = 0;
249 
250 #if CV_SSE2
251                 if (USE_SSE2)
252                 {
253                     for ( ; k <= len - 4; k += 4)
254                     {
255                         __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)),
256                                                       _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2)));
257                         __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)),
258                                                       _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2)));
259 
260                         _mm_storeu_ps(buf[0] + k, v_dst0);
261                         _mm_storeu_ps(buf[1] + k, v_dst1);
262                     }
263                 }
264 #endif
265 
266                 for( ; k < len; k++ )
267                 {
268                     buf[0][k] = (float)x[k];
269                     buf[1][k] = (float)y[k];
270                 }
271 
272                 hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees );
273                 k = 0;
274 
275 #if CV_SSE2
276                 if (USE_SSE2)
277                 {
278                     for ( ; k <= len - 4; k += 4)
279                     {
280                         __m128 v_src = _mm_loadu_ps(buf[0] + k);
281                         _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src));
282                         _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8))));
283                     }
284                 }
285 #endif
286 
287                 for( ; k < len; k++ )
288                     angle[k] = buf[0][k];
289             }
290             ptrs[0] += len*esz1;
291             ptrs[1] += len*esz1;
292             ptrs[2] += len*esz1;
293         }
294     }
295 }
296 
297 #ifdef HAVE_OPENCL
298 
ocl_cartToPolar(InputArray _src1,InputArray _src2,OutputArray _dst1,OutputArray _dst2,bool angleInDegrees)299 static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
300                              OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
301 {
302     const ocl::Device & d = ocl::Device::getDefault();
303     int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
304             rowsPerWI = d.isIntel() ? 4 : 1;
305     bool doubleSupport = d.doubleFPConfig() > 0;
306 
307     if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 &&
308            (depth == CV_32F || depth == CV_64F) && type == _src2.type()) ||
309          (depth == CV_64F && !doubleSupport) )
310         return false;
311 
312     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
313                   format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s",
314                          ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
315                          depth, rowsPerWI, angleInDegrees ? "AD" : "AR",
316                          doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
317     if (k.empty())
318         return false;
319 
320     UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
321     Size size = src1.size();
322     CV_Assert( size == src2.size() );
323 
324     _dst1.create(size, type);
325     _dst2.create(size, type);
326     UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
327 
328     k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
329            ocl::KernelArg::ReadOnlyNoSize(src2),
330            ocl::KernelArg::WriteOnly(dst1, cn),
331            ocl::KernelArg::WriteOnlyNoSize(dst2));
332 
333     size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
334     return k.run(2, globalsize, NULL, false);
335 }
336 
337 #endif
338 
cartToPolar(InputArray src1,InputArray src2,OutputArray dst1,OutputArray dst2,bool angleInDegrees)339 void cartToPolar( InputArray src1, InputArray src2,
340                   OutputArray dst1, OutputArray dst2, bool angleInDegrees )
341 {
342     CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(),
343             ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees))
344 
345     Mat X = src1.getMat(), Y = src2.getMat();
346     int type = X.type(), depth = X.depth(), cn = X.channels();
347     CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F));
348     dst1.create( X.dims, X.size, type );
349     dst2.create( X.dims, X.size, type );
350     Mat Mag = dst1.getMat(), Angle = dst2.getMat();
351 
352     const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
353     uchar* ptrs[4];
354     NAryMatIterator it(arrays, ptrs);
355     cv::AutoBuffer<float> _buf;
356     float* buf[2] = {0, 0};
357     int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
358     size_t esz1 = X.elemSize1();
359 
360     if( depth == CV_64F )
361     {
362         _buf.allocate(blockSize*2);
363         buf[0] = _buf;
364         buf[1] = buf[0] + blockSize;
365     }
366 
367     for( size_t i = 0; i < it.nplanes; i++, ++it )
368     {
369         for( j = 0; j < total; j += blockSize )
370         {
371             int len = std::min(total - j, blockSize);
372             if( depth == CV_32F )
373             {
374                 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
375                 float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
376                 hal::magnitude( x, y, mag, len );
377                 hal::fastAtan2( y, x, angle, len, angleInDegrees );
378             }
379             else
380             {
381                 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
382                 double *angle = (double*)ptrs[3];
383 
384                 hal::magnitude(x, y, (double*)ptrs[2], len);
385                 k = 0;
386 
387 #if CV_SSE2
388                 if (USE_SSE2)
389                 {
390                     for ( ; k <= len - 4; k += 4)
391                     {
392                         __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)),
393                                                       _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2)));
394                         __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)),
395                                                       _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2)));
396 
397                         _mm_storeu_ps(buf[0] + k, v_dst0);
398                         _mm_storeu_ps(buf[1] + k, v_dst1);
399                     }
400                 }
401 #endif
402 
403                 for( ; k < len; k++ )
404                 {
405                     buf[0][k] = (float)x[k];
406                     buf[1][k] = (float)y[k];
407                 }
408 
409                 hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees );
410                 k = 0;
411 
412 #if CV_SSE2
413                 if (USE_SSE2)
414                 {
415                     for ( ; k <= len - 4; k += 4)
416                     {
417                         __m128 v_src = _mm_loadu_ps(buf[0] + k);
418                         _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src));
419                         _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8))));
420                     }
421                 }
422 #endif
423 
424                 for( ; k < len; k++ )
425                     angle[k] = buf[0][k];
426             }
427             ptrs[0] += len*esz1;
428             ptrs[1] += len*esz1;
429             ptrs[2] += len*esz1;
430             ptrs[3] += len*esz1;
431         }
432     }
433 }
434 
435 
436 /****************************************************************************************\
437 *                                  Polar -> Cartezian                                    *
438 \****************************************************************************************/
439 
SinCos_32f(const float * angle,float * sinval,float * cosval,int len,int angle_in_degrees)440 static void SinCos_32f( const float *angle, float *sinval, float* cosval,
441                         int len, int angle_in_degrees )
442 {
443     const int N = 64;
444 
445     static const double sin_table[] =
446     {
447      0.00000000000000000000,     0.09801714032956060400,
448      0.19509032201612825000,     0.29028467725446233000,
449      0.38268343236508978000,     0.47139673682599764000,
450      0.55557023301960218000,     0.63439328416364549000,
451      0.70710678118654746000,     0.77301045336273699000,
452      0.83146961230254524000,     0.88192126434835494000,
453      0.92387953251128674000,     0.95694033573220894000,
454      0.98078528040323043000,     0.99518472667219682000,
455      1.00000000000000000000,     0.99518472667219693000,
456      0.98078528040323043000,     0.95694033573220894000,
457      0.92387953251128674000,     0.88192126434835505000,
458      0.83146961230254546000,     0.77301045336273710000,
459      0.70710678118654757000,     0.63439328416364549000,
460      0.55557023301960218000,     0.47139673682599786000,
461      0.38268343236508989000,     0.29028467725446239000,
462      0.19509032201612861000,     0.09801714032956082600,
463      0.00000000000000012246,    -0.09801714032956059000,
464     -0.19509032201612836000,    -0.29028467725446211000,
465     -0.38268343236508967000,    -0.47139673682599764000,
466     -0.55557023301960196000,    -0.63439328416364527000,
467     -0.70710678118654746000,    -0.77301045336273666000,
468     -0.83146961230254524000,    -0.88192126434835494000,
469     -0.92387953251128652000,    -0.95694033573220882000,
470     -0.98078528040323032000,    -0.99518472667219693000,
471     -1.00000000000000000000,    -0.99518472667219693000,
472     -0.98078528040323043000,    -0.95694033573220894000,
473     -0.92387953251128663000,    -0.88192126434835505000,
474     -0.83146961230254546000,    -0.77301045336273688000,
475     -0.70710678118654768000,    -0.63439328416364593000,
476     -0.55557023301960218000,    -0.47139673682599792000,
477     -0.38268343236509039000,    -0.29028467725446250000,
478     -0.19509032201612872000,    -0.09801714032956050600,
479     };
480 
481     static const double k2 = (2*CV_PI)/N;
482 
483     static const double sin_a0 = -0.166630293345647*k2*k2*k2;
484     static const double sin_a2 = k2;
485 
486     static const double cos_a0 = -0.499818138450326*k2*k2;
487     /*static const double cos_a2 =  1;*/
488 
489     double k1;
490     int i = 0;
491 
492     if( !angle_in_degrees )
493         k1 = N/(2*CV_PI);
494     else
495         k1 = N/360.;
496 
497 #if CV_AVX2
498     if (USE_AVX2)
499     {
500         __m128d v_k1 = _mm_set1_pd(k1);
501         __m128d v_1 = _mm_set1_pd(1);
502         __m128i v_N1 = _mm_set1_epi32(N - 1);
503         __m128i v_N4 = _mm_set1_epi32(N >> 2);
504         __m128d v_sin_a0 = _mm_set1_pd(sin_a0);
505         __m128d v_sin_a2 = _mm_set1_pd(sin_a2);
506         __m128d v_cos_a0 = _mm_set1_pd(cos_a0);
507 
508         for ( ; i <= len - 4; i += 4)
509         {
510             __m128 v_angle = _mm_loadu_ps(angle + i);
511 
512             // 0-1
513             __m128d v_t = _mm_mul_pd(_mm_cvtps_pd(v_angle), v_k1);
514             __m128i v_it = _mm_cvtpd_epi32(v_t);
515             v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it));
516 
517             __m128i v_sin_idx = _mm_and_si128(v_it, v_N1);
518             __m128i v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1);
519 
520             __m128d v_t2 = _mm_mul_pd(v_t, v_t);
521             __m128d v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t);
522             __m128d v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1);
523 
524             __m128d v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8);
525             __m128d v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8);
526 
527             __m128d v_sin_val_0 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b),
528                                              _mm_mul_pd(v_cos_a, v_sin_b));
529             __m128d v_cos_val_0 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b),
530                                              _mm_mul_pd(v_sin_a, v_sin_b));
531 
532             // 2-3
533             v_t = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_angle), 8))), v_k1);
534             v_it = _mm_cvtpd_epi32(v_t);
535             v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it));
536 
537             v_sin_idx = _mm_and_si128(v_it, v_N1);
538             v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1);
539 
540             v_t2 = _mm_mul_pd(v_t, v_t);
541             v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t);
542             v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1);
543 
544             v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8);
545             v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8);
546 
547             __m128d v_sin_val_1 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b),
548                                              _mm_mul_pd(v_cos_a, v_sin_b));
549             __m128d v_cos_val_1 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b),
550                                              _mm_mul_pd(v_sin_a, v_sin_b));
551 
552             _mm_storeu_ps(sinval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_sin_val_0),
553                                                     _mm_cvtpd_ps(v_sin_val_1)));
554             _mm_storeu_ps(cosval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_cos_val_0),
555                                                     _mm_cvtpd_ps(v_cos_val_1)));
556         }
557     }
558 #endif
559 
560     for( ; i < len; i++ )
561     {
562         double t = angle[i]*k1;
563         int it = cvRound(t);
564         t -= it;
565         int sin_idx = it & (N - 1);
566         int cos_idx = (N/4 - sin_idx) & (N - 1);
567 
568         double sin_b = (sin_a0*t*t + sin_a2)*t;
569         double cos_b = cos_a0*t*t + 1;
570 
571         double sin_a = sin_table[sin_idx];
572         double cos_a = sin_table[cos_idx];
573 
574         double sin_val = sin_a*cos_b + cos_a*sin_b;
575         double cos_val = cos_a*cos_b - sin_a*sin_b;
576 
577         sinval[i] = (float)sin_val;
578         cosval[i] = (float)cos_val;
579     }
580 }
581 
582 
583 #ifdef HAVE_OPENCL
584 
ocl_polarToCart(InputArray _mag,InputArray _angle,OutputArray _dst1,OutputArray _dst2,bool angleInDegrees)585 static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
586                              OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
587 {
588     const ocl::Device & d = ocl::Device::getDefault();
589     int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
590             rowsPerWI = d.isIntel() ? 4 : 1;
591     bool doubleSupport = d.doubleFPConfig() > 0;
592 
593     if ( !doubleSupport && depth == CV_64F )
594         return false;
595 
596     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
597                   format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s",
598                          ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI,
599                          depth, angleInDegrees ? "AD" : "AR",
600                          doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
601     if (k.empty())
602         return false;
603 
604     UMat mag = _mag.getUMat(), angle = _angle.getUMat();
605     Size size = angle.size();
606     CV_Assert(mag.size() == size);
607 
608     _dst1.create(size, type);
609     _dst2.create(size, type);
610     UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
611 
612     k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle),
613            ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2));
614 
615     size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
616     return k.run(2, globalsize, NULL, false);
617 }
618 
619 #endif
620 
polarToCart(InputArray src1,InputArray src2,OutputArray dst1,OutputArray dst2,bool angleInDegrees)621 void polarToCart( InputArray src1, InputArray src2,
622                   OutputArray dst1, OutputArray dst2, bool angleInDegrees )
623 {
624     int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
625     CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type));
626 
627     CV_OCL_RUN(!src1.empty() && src2.dims() <= 2 && dst1.isUMat() && dst2.isUMat(),
628                ocl_polarToCart(src1, src2, dst1, dst2, angleInDegrees))
629 
630     Mat Mag = src1.getMat(), Angle = src2.getMat();
631     CV_Assert( Mag.empty() || Angle.size == Mag.size);
632     dst1.create( Angle.dims, Angle.size, type );
633     dst2.create( Angle.dims, Angle.size, type );
634     Mat X = dst1.getMat(), Y = dst2.getMat();
635 
636 #if defined(HAVE_IPP)
637     CV_IPP_CHECK()
638     {
639         if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees)
640         {
641             typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase,
642                                                              void * pDstRe, void * pDstIm, int len);
643             ippsPolarToCart ippFunc =
644             depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f :
645             depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0;
646             CV_Assert(ippFunc != 0);
647 
648             IppStatus status = ippFunc(Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast<int>(cn * X.total()));
649             if (status >= 0)
650             {
651                 CV_IMPL_ADD(CV_IMPL_IPP);
652                 return;
653             }
654             setIppErrorStatus();
655         }
656     }
657 #endif
658 
659     const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
660     uchar* ptrs[4];
661     NAryMatIterator it(arrays, ptrs);
662     cv::AutoBuffer<float> _buf;
663     float* buf[2] = {0, 0};
664     int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
665     size_t esz1 = Angle.elemSize1();
666 
667     if( depth == CV_64F )
668     {
669         _buf.allocate(blockSize*2);
670         buf[0] = _buf;
671         buf[1] = buf[0] + blockSize;
672     }
673 
674     for( size_t i = 0; i < it.nplanes; i++, ++it )
675     {
676         for( j = 0; j < total; j += blockSize )
677         {
678             int len = std::min(total - j, blockSize);
679             if( depth == CV_32F )
680             {
681                 const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1];
682                 float *x = (float*)ptrs[2], *y = (float*)ptrs[3];
683 
684                 SinCos_32f( angle, y, x, len, angleInDegrees );
685                 if( mag )
686                 {
687                     k = 0;
688 
689                     #if CV_NEON
690                     for( ; k <= len - 4; k += 4 )
691                     {
692                         float32x4_t v_m = vld1q_f32(mag + k);
693                         vst1q_f32(x + k, vmulq_f32(vld1q_f32(x + k), v_m));
694                         vst1q_f32(y + k, vmulq_f32(vld1q_f32(y + k), v_m));
695                     }
696                     #elif CV_SSE2
697                     if (USE_SSE2)
698                     {
699                         for( ; k <= len - 4; k += 4 )
700                         {
701                             __m128 v_m = _mm_loadu_ps(mag + k);
702                             _mm_storeu_ps(x + k, _mm_mul_ps(_mm_loadu_ps(x + k), v_m));
703                             _mm_storeu_ps(y + k, _mm_mul_ps(_mm_loadu_ps(y + k), v_m));
704                         }
705                     }
706                     #endif
707 
708                     for( ; k < len; k++ )
709                     {
710                         float m = mag[k];
711                         x[k] *= m; y[k] *= m;
712                     }
713                 }
714             }
715             else
716             {
717                 const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1];
718                 double *x = (double*)ptrs[2], *y = (double*)ptrs[3];
719 
720                 for( k = 0; k < len; k++ )
721                     buf[0][k] = (float)angle[k];
722 
723                 SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees );
724                 if( mag )
725                     for( k = 0; k < len; k++ )
726                     {
727                         double m = mag[k];
728                         x[k] = buf[0][k]*m; y[k] = buf[1][k]*m;
729                     }
730                 else
731                 {
732                     std::memcpy(x, buf[0], sizeof(float) * len);
733                     std::memcpy(y, buf[1], sizeof(float) * len);
734                 }
735             }
736 
737             if( ptrs[0] )
738                 ptrs[0] += len*esz1;
739             ptrs[1] += len*esz1;
740             ptrs[2] += len*esz1;
741             ptrs[3] += len*esz1;
742         }
743     }
744 }
745 
746 /****************************************************************************************\
747 *                                          E X P                                         *
748 \****************************************************************************************/
749 
750 #ifdef HAVE_IPP
Exp_32f_ipp(const float * x,float * y,int n)751 static void Exp_32f_ipp(const float *x, float *y, int n)
752 {
753     CV_IPP_CHECK()
754     {
755         if (0 <= ippsExp_32f_A21(x, y, n))
756         {
757             CV_IMPL_ADD(CV_IMPL_IPP);
758             return;
759         }
760         setIppErrorStatus();
761     }
762     hal::exp(x, y, n);
763 }
764 
Exp_64f_ipp(const double * x,double * y,int n)765 static void Exp_64f_ipp(const double *x, double *y, int n)
766 {
767     CV_IPP_CHECK()
768     {
769         if (0 <= ippsExp_64f_A50(x, y, n))
770         {
771             CV_IMPL_ADD(CV_IMPL_IPP);
772             return;
773         }
774         setIppErrorStatus();
775     }
776     hal::exp(x, y, n);
777 }
778 
779 #define Exp_32f Exp_32f_ipp
780 #define Exp_64f Exp_64f_ipp
781 #else
782 #define Exp_32f hal::exp
783 #define Exp_64f hal::exp
784 #endif
785 
786 
exp(InputArray _src,OutputArray _dst)787 void exp( InputArray _src, OutputArray _dst )
788 {
789     int type = _src.type(), depth = _src.depth(), cn = _src.channels();
790     CV_Assert( depth == CV_32F || depth == CV_64F );
791 
792     CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
793                ocl_math_op(_src, noArray(), _dst, OCL_OP_EXP))
794 
795     Mat src = _src.getMat();
796     _dst.create( src.dims, src.size, type );
797     Mat dst = _dst.getMat();
798 
799     const Mat* arrays[] = {&src, &dst, 0};
800     uchar* ptrs[2];
801     NAryMatIterator it(arrays, ptrs);
802     int len = (int)(it.size*cn);
803 
804     for( size_t i = 0; i < it.nplanes; i++, ++it )
805     {
806         if( depth == CV_32F )
807             Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len);
808         else
809             Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len);
810     }
811 }
812 
813 
814 /****************************************************************************************\
815 *                                          L O G                                         *
816 \****************************************************************************************/
817 
818 #ifdef HAVE_IPP
Log_32f_ipp(const float * x,float * y,int n)819 static void Log_32f_ipp(const float *x, float *y, int n)
820 {
821     CV_IPP_CHECK()
822     {
823         if (0 <= ippsLn_32f_A21(x, y, n))
824         {
825             CV_IMPL_ADD(CV_IMPL_IPP);
826             return;
827         }
828         setIppErrorStatus();
829     }
830     hal::log(x, y, n);
831 }
832 
Log_64f_ipp(const double * x,double * y,int n)833 static void Log_64f_ipp(const double *x, double *y, int n)
834 {
835     CV_IPP_CHECK()
836     {
837         if (0 <= ippsLn_64f_A50(x, y, n))
838         {
839             CV_IMPL_ADD(CV_IMPL_IPP);
840             return;
841         }
842         setIppErrorStatus();
843     }
844     hal::log(x, y, n);
845 }
846 
847 #define Log_32f Log_32f_ipp
848 #define Log_64f Log_64f_ipp
849 #else
850 #define Log_32f hal::log
851 #define Log_64f hal::log
852 #endif
853 
log(InputArray _src,OutputArray _dst)854 void log( InputArray _src, OutputArray _dst )
855 {
856     int type = _src.type(), depth = _src.depth(), cn = _src.channels();
857     CV_Assert( depth == CV_32F || depth == CV_64F );
858 
859     CV_OCL_RUN( _dst.isUMat() && _src.dims() <= 2,
860                 ocl_math_op(_src, noArray(), _dst, OCL_OP_LOG))
861 
862     Mat src = _src.getMat();
863     _dst.create( src.dims, src.size, type );
864     Mat dst = _dst.getMat();
865 
866     const Mat* arrays[] = {&src, &dst, 0};
867     uchar* ptrs[2];
868     NAryMatIterator it(arrays, ptrs);
869     int len = (int)(it.size*cn);
870 
871     for( size_t i = 0; i < it.nplanes; i++, ++it )
872     {
873         if( depth == CV_32F )
874             Log_32f( (const float*)ptrs[0], (float*)ptrs[1], len );
875         else
876             Log_64f( (const double*)ptrs[0], (double*)ptrs[1], len );
877     }
878 }
879 
880 /****************************************************************************************\
881 *                                    P O W E R                                           *
882 \****************************************************************************************/
883 
884 template <typename T, typename WT>
885 struct iPow_SIMD
886 {
operator ()cv::iPow_SIMD887     int operator() ( const T *, T *, int, int)
888     {
889         return 0;
890     }
891 };
892 
893 #if CV_SIMD128
894 
895 template <>
896 struct iPow_SIMD<uchar, int>
897 {
operator ()cv::iPow_SIMD898     int operator() ( const uchar * src, uchar * dst, int len, int power )
899     {
900         int i = 0;
901         v_uint32x4 v_1 = v_setall_u32(1u);
902 
903         for ( ; i <= len - 8; i += 8)
904         {
905             v_uint32x4 v_a1 = v_1, v_a2 = v_1;
906             v_uint16x8 v = v_load_expand(src + i);
907             v_uint32x4 v_b1, v_b2;
908             v_expand(v, v_b1, v_b2);
909             int p = power;
910 
911             while( p > 1 )
912             {
913                 if (p & 1)
914                 {
915                     v_a1 *= v_b1;
916                     v_a2 *= v_b2;
917                 }
918                 v_b1 *= v_b1;
919                 v_b2 *= v_b2;
920                 p >>= 1;
921             }
922 
923             v_a1 *= v_b1;
924             v_a2 *= v_b2;
925 
926             v = v_pack(v_a1, v_a2);
927             v_pack_store(dst + i, v);
928         }
929 
930         return i;
931     }
932 };
933 
934 template <>
935 struct iPow_SIMD<schar, int>
936 {
operator ()cv::iPow_SIMD937     int operator() ( const schar * src, schar * dst, int len, int power)
938     {
939         int i = 0;
940         v_int32x4 v_1 = v_setall_s32(1);
941 
942         for ( ; i <= len - 8; i += 8)
943         {
944             v_int32x4 v_a1 = v_1, v_a2 = v_1;
945             v_int16x8 v = v_load_expand(src + i);
946             v_int32x4 v_b1, v_b2;
947             v_expand(v, v_b1, v_b2);
948             int p = power;
949 
950             while( p > 1 )
951             {
952                 if (p & 1)
953                 {
954                     v_a1 *= v_b1;
955                     v_a2 *= v_b2;
956                 }
957                 v_b1 *= v_b1;
958                 v_b2 *= v_b2;
959                 p >>= 1;
960             }
961 
962             v_a1 *= v_b1;
963             v_a2 *= v_b2;
964 
965             v = v_pack(v_a1, v_a2);
966             v_pack_store(dst + i, v);
967         }
968 
969         return i;
970     }
971 };
972 
973 template <>
974 struct iPow_SIMD<ushort, int>
975 {
operator ()cv::iPow_SIMD976     int operator() ( const ushort * src, ushort * dst, int len, int power)
977     {
978         int i = 0;
979         v_uint32x4 v_1 = v_setall_u32(1u);
980 
981         for ( ; i <= len - 8; i += 8)
982         {
983             v_uint32x4 v_a1 = v_1, v_a2 = v_1;
984             v_uint16x8 v = v_load(src + i);
985             v_uint32x4 v_b1, v_b2;
986             v_expand(v, v_b1, v_b2);
987             int p = power;
988 
989             while( p > 1 )
990             {
991                 if (p & 1)
992                 {
993                     v_a1 *= v_b1;
994                     v_a2 *= v_b2;
995                 }
996                 v_b1 *= v_b1;
997                 v_b2 *= v_b2;
998                 p >>= 1;
999             }
1000 
1001             v_a1 *= v_b1;
1002             v_a2 *= v_b2;
1003 
1004             v = v_pack(v_a1, v_a2);
1005             v_store(dst + i, v);
1006         }
1007 
1008         return i;
1009     }
1010 };
1011 
1012 template <>
1013 struct iPow_SIMD<short, int>
1014 {
operator ()cv::iPow_SIMD1015     int operator() ( const short * src, short * dst, int len, int power)
1016     {
1017         int i = 0;
1018         v_int32x4 v_1 = v_setall_s32(1);
1019 
1020         for ( ; i <= len - 8; i += 8)
1021         {
1022             v_int32x4 v_a1 = v_1, v_a2 = v_1;
1023             v_int16x8 v = v_load(src + i);
1024             v_int32x4 v_b1, v_b2;
1025             v_expand(v, v_b1, v_b2);
1026             int p = power;
1027 
1028             while( p > 1 )
1029             {
1030                 if (p & 1)
1031                 {
1032                     v_a1 *= v_b1;
1033                     v_a2 *= v_b2;
1034                 }
1035                 v_b1 *= v_b1;
1036                 v_b2 *= v_b2;
1037                 p >>= 1;
1038             }
1039 
1040             v_a1 *= v_b1;
1041             v_a2 *= v_b2;
1042 
1043             v = v_pack(v_a1, v_a2);
1044             v_store(dst + i, v);
1045         }
1046 
1047         return i;
1048     }
1049 };
1050 
1051 template <>
1052 struct iPow_SIMD<int, int>
1053 {
operator ()cv::iPow_SIMD1054     int operator() ( const int * src, int * dst, int len, int power)
1055     {
1056         int i = 0;
1057         v_int32x4 v_1 = v_setall_s32(1);
1058 
1059         for ( ; i <= len - 8; i += 8)
1060         {
1061             v_int32x4 v_a1 = v_1, v_a2 = v_1;
1062             v_int32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
1063             int p = power;
1064 
1065             while( p > 1 )
1066             {
1067                 if (p & 1)
1068                 {
1069                     v_a1 *= v_b1;
1070                     v_a2 *= v_b2;
1071                 }
1072                 v_b1 *= v_b1;
1073                 v_b2 *= v_b2;
1074                 p >>= 1;
1075             }
1076 
1077             v_a1 *= v_b1;
1078             v_a2 *= v_b2;
1079 
1080             v_store(dst + i, v_a1);
1081             v_store(dst + i + 4, v_a2);
1082         }
1083 
1084         return i;
1085     }
1086 };
1087 
1088 template <>
1089 struct iPow_SIMD<float, float>
1090 {
operator ()cv::iPow_SIMD1091     int operator() ( const float * src, float * dst, int len, int power)
1092     {
1093         int i = 0;
1094         v_float32x4 v_1 = v_setall_f32(1.f);
1095 
1096         for ( ; i <= len - 8; i += 8)
1097         {
1098             v_float32x4 v_a1 = v_1, v_a2 = v_1;
1099             v_float32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
1100             int p = std::abs(power);
1101             if( power < 0 )
1102             {
1103                 v_b1 = v_1 / v_b1;
1104                 v_b2 = v_1 / v_b2;
1105             }
1106 
1107             while( p > 1 )
1108             {
1109                 if (p & 1)
1110                 {
1111                     v_a1 *= v_b1;
1112                     v_a2 *= v_b2;
1113                 }
1114                 v_b1 *= v_b1;
1115                 v_b2 *= v_b2;
1116                 p >>= 1;
1117             }
1118 
1119             v_a1 *= v_b1;
1120             v_a2 *= v_b2;
1121 
1122             v_store(dst + i, v_a1);
1123             v_store(dst + i + 4, v_a2);
1124         }
1125 
1126         return i;
1127     }
1128 };
1129 
1130 #if CV_SIMD128_64F
1131 template <>
1132 struct iPow_SIMD<double, double>
1133 {
operator ()cv::iPow_SIMD1134     int operator() ( const double * src, double * dst, int len, int power)
1135     {
1136         int i = 0;
1137         v_float64x2 v_1 = v_setall_f64(1.);
1138 
1139         for ( ; i <= len - 4; i += 4)
1140         {
1141             v_float64x2 v_a1 = v_1, v_a2 = v_1;
1142             v_float64x2 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 2);
1143             int p = std::abs(power);
1144             if( power < 0 )
1145             {
1146                 v_b1 = v_1 / v_b1;
1147                 v_b2 = v_1 / v_b2;
1148             }
1149 
1150             while( p > 1 )
1151             {
1152                 if (p & 1)
1153                 {
1154                     v_a1 *= v_b1;
1155                     v_a2 *= v_b2;
1156                 }
1157                 v_b1 *= v_b1;
1158                 v_b2 *= v_b2;
1159                 p >>= 1;
1160             }
1161 
1162             v_a1 *= v_b1;
1163             v_a2 *= v_b2;
1164 
1165             v_store(dst + i, v_a1);
1166             v_store(dst + i + 2, v_a2);
1167         }
1168 
1169         return i;
1170     }
1171 };
1172 #endif
1173 
1174 #endif
1175 
1176 template<typename T, typename WT>
1177 static void
iPow_i(const T * src,T * dst,int len,int power)1178 iPow_i( const T* src, T* dst, int len, int power )
1179 {
1180     if( power < 0 )
1181     {
1182         T tab[5] =
1183         {
1184             power == -1 ? saturate_cast<T>(-1) : 0, (power & 1) ? -1 : 1,
1185             std::numeric_limits<T>::max(), 1, power == -1 ? 1 : 0
1186         };
1187         for( int i = 0; i < len; i++ )
1188         {
1189             T val = src[i];
1190             dst[i] = cv_abs(val) <= 2 ? tab[val + 2] : (T)0;
1191         }
1192     }
1193     else
1194     {
1195         iPow_SIMD<T, WT> vop;
1196         int i = vop(src, dst, len, power);
1197 
1198         for( ; i < len; i++ )
1199         {
1200             WT a = 1, b = src[i];
1201             int p = power;
1202             while( p > 1 )
1203             {
1204                 if( p & 1 )
1205                     a *= b;
1206                 b *= b;
1207                 p >>= 1;
1208             }
1209 
1210             a *= b;
1211             dst[i] = saturate_cast<T>(a);
1212         }
1213     }
1214 }
1215 
1216 template<typename T>
1217 static void
iPow_f(const T * src,T * dst,int len,int power0)1218 iPow_f( const T* src, T* dst, int len, int power0 )
1219 {
1220     iPow_SIMD<T, T> vop;
1221     int i = vop(src, dst, len, power0);
1222     int power = std::abs(power0);
1223 
1224     for( ; i < len; i++ )
1225     {
1226         T a = 1, b = src[i];
1227         int p = power;
1228         if( power0 < 0 )
1229             b = 1/b;
1230 
1231         while( p > 1 )
1232         {
1233             if( p & 1 )
1234                 a *= b;
1235             b *= b;
1236             p >>= 1;
1237         }
1238 
1239         a *= b;
1240         dst[i] = a;
1241     }
1242 }
1243 
iPow8u(const uchar * src,uchar * dst,int len,int power)1244 static void iPow8u(const uchar* src, uchar* dst, int len, int power)
1245 {
1246     iPow_i<uchar, unsigned>(src, dst, len, power);
1247 }
1248 
iPow8s(const schar * src,schar * dst,int len,int power)1249 static void iPow8s(const schar* src, schar* dst, int len, int power)
1250 {
1251     iPow_i<schar, int>(src, dst, len, power);
1252 }
1253 
iPow16u(const ushort * src,ushort * dst,int len,int power)1254 static void iPow16u(const ushort* src, ushort* dst, int len, int power)
1255 {
1256     iPow_i<ushort, unsigned>(src, dst, len, power);
1257 }
1258 
iPow16s(const short * src,short * dst,int len,int power)1259 static void iPow16s(const short* src, short* dst, int len, int power)
1260 {
1261     iPow_i<short, int>(src, dst, len, power);
1262 }
1263 
iPow32s(const int * src,int * dst,int len,int power)1264 static void iPow32s(const int* src, int* dst, int len, int power)
1265 {
1266     iPow_i<int, int>(src, dst, len, power);
1267 }
1268 
iPow32f(const float * src,float * dst,int len,int power)1269 static void iPow32f(const float* src, float* dst, int len, int power)
1270 {
1271     iPow_f<float>(src, dst, len, power);
1272 }
1273 
iPow64f(const double * src,double * dst,int len,int power)1274 static void iPow64f(const double* src, double* dst, int len, int power)
1275 {
1276     iPow_f<double>(src, dst, len, power);
1277 }
1278 
1279 
1280 typedef void (*IPowFunc)( const uchar* src, uchar* dst, int len, int power );
1281 
1282 static IPowFunc ipowTab[] =
1283 {
1284     (IPowFunc)iPow8u, (IPowFunc)iPow8s, (IPowFunc)iPow16u, (IPowFunc)iPow16s,
1285     (IPowFunc)iPow32s, (IPowFunc)iPow32f, (IPowFunc)iPow64f, 0
1286 };
1287 
1288 #ifdef HAVE_OPENCL
1289 
ocl_pow(InputArray _src,double power,OutputArray _dst,bool is_ipower,int ipower)1290 static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
1291                     bool is_ipower, int ipower)
1292 {
1293     const ocl::Device & d = ocl::Device::getDefault();
1294     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
1295             rowsPerWI = d.isIntel() ? 4 : 1;
1296     bool doubleSupport = d.doubleFPConfig() > 0;
1297 
1298     _dst.createSameSize(_src, type);
1299     if (is_ipower)
1300     {
1301         if (ipower == 0)
1302         {
1303             _dst.setTo(Scalar::all(1));
1304             return true;
1305         }
1306         if (ipower == 1)
1307         {
1308             _src.copyTo(_dst);
1309             return true;
1310         }
1311         if( ipower < 0 )
1312         {
1313             if( depth == CV_32F || depth == CV_64F )
1314                 is_ipower = false;
1315             else
1316                 return false;
1317         }
1318     }
1319 
1320     if (depth == CV_64F && !doubleSupport)
1321         return false;
1322 
1323     bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
1324     const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
1325 
1326     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
1327                   format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
1328                          ocl::typeToStr(depth), depth,  rowsPerWI, op,
1329                          doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
1330     if (k.empty())
1331         return false;
1332 
1333     UMat src = _src.getUMat();
1334     _dst.create(src.size(), type);
1335     UMat dst = _dst.getUMat();
1336 
1337     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
1338             dstarg = ocl::KernelArg::WriteOnly(dst, cn);
1339 
1340     if (issqrt)
1341         k.args(srcarg, dstarg);
1342     else if (is_ipower)
1343         k.args(srcarg, dstarg, ipower);
1344     else
1345     {
1346         if (depth == CV_32F)
1347             k.args(srcarg, dstarg, (float)power);
1348         else
1349             k.args(srcarg, dstarg, power);
1350     }
1351 
1352     size_t globalsize[2] = { dst.cols *  cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
1353     return k.run(2, globalsize, NULL, false);
1354 }
1355 
1356 #endif
1357 
InvSqrt_32f(const float * src,float * dst,int n)1358 static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt(src, dst, n); }
InvSqrt_64f(const double * src,double * dst,int n)1359 static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt(src, dst, n); }
Sqrt_32f(const float * src,float * dst,int n)1360 static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt(src, dst, n); }
Sqrt_64f(const double * src,double * dst,int n)1361 static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt(src, dst, n); }
1362 
pow(InputArray _src,double power,OutputArray _dst)1363 void pow( InputArray _src, double power, OutputArray _dst )
1364 {
1365     int type = _src.type(), depth = CV_MAT_DEPTH(type),
1366             cn = CV_MAT_CN(type), ipower = cvRound(power);
1367     bool is_ipower = fabs(ipower - power) < DBL_EPSILON,
1368             useOpenCL = _dst.isUMat() && _src.dims() <= 2;
1369 
1370     if( is_ipower && !(ocl::Device::getDefault().isIntel() && useOpenCL && depth != CV_64F))
1371     {
1372         switch( ipower )
1373         {
1374         case 0:
1375             _dst.createSameSize(_src, type);
1376             _dst.setTo(Scalar::all(1));
1377             return;
1378         case 1:
1379             _src.copyTo(_dst);
1380             return;
1381         case 2:
1382             multiply(_src, _src, _dst);
1383             return;
1384         }
1385     }
1386     else
1387         CV_Assert( depth == CV_32F || depth == CV_64F );
1388 
1389     CV_OCL_RUN(useOpenCL, ocl_pow(_src, power, _dst, is_ipower, ipower))
1390 
1391     Mat src = _src.getMat();
1392     _dst.create( src.dims, src.size, type );
1393     Mat dst = _dst.getMat();
1394 
1395     const Mat* arrays[] = {&src, &dst, 0};
1396     uchar* ptrs[2];
1397     NAryMatIterator it(arrays, ptrs);
1398     int len = (int)(it.size*cn);
1399 
1400     if( is_ipower )
1401     {
1402         IPowFunc func = ipowTab[depth];
1403         CV_Assert( func != 0 );
1404 
1405         for( size_t i = 0; i < it.nplanes; i++, ++it )
1406             func( ptrs[0], ptrs[1], len, ipower );
1407     }
1408     else if( fabs(fabs(power) - 0.5) < DBL_EPSILON )
1409     {
1410         MathFunc func = power < 0 ?
1411             (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) :
1412             (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f);
1413 
1414         for( size_t i = 0; i < it.nplanes; i++, ++it )
1415             func( ptrs[0], ptrs[1], len );
1416     }
1417     else
1418     {
1419         int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
1420         size_t esz1 = src.elemSize1();
1421         AutoBuffer<uchar> buf;
1422         Cv32suf inf32, nan32;
1423         Cv64suf inf64, nan64;
1424         float* fbuf = 0;
1425         double* dbuf = 0;
1426         inf32.i = 0x7f800000;
1427         nan32.i = 0x7fffffff;
1428         inf64.i = CV_BIG_INT(0x7FF0000000000000);
1429         nan64.i = CV_BIG_INT(0x7FFFFFFFFFFFFFFF);
1430 
1431         if( src.ptr() == dst.ptr() )
1432         {
1433             buf.allocate(blockSize*esz1);
1434             fbuf = (float*)(uchar*)buf;
1435             dbuf = (double*)(uchar*)buf;
1436         }
1437 
1438         for( size_t i = 0; i < it.nplanes; i++, ++it )
1439         {
1440             for( j = 0; j < len; j += blockSize )
1441             {
1442                 int bsz = std::min(len - j, blockSize);
1443 
1444             #if defined(HAVE_IPP)
1445                 CV_IPP_CHECK()
1446                 {
1447                     IppStatus status = depth == CV_32F ?
1448                     ippsPowx_32f_A21((const float*)ptrs[0], (float)power, (float*)ptrs[1], bsz) :
1449                     ippsPowx_64f_A50((const double*)ptrs[0], (double)power, (double*)ptrs[1], bsz);
1450 
1451                     if (status >= 0)
1452                     {
1453                         CV_IMPL_ADD(CV_IMPL_IPP);
1454                         ptrs[0] += bsz*esz1;
1455                         ptrs[1] += bsz*esz1;
1456                         continue;
1457                     }
1458                     setIppErrorStatus();
1459                 }
1460             #endif
1461 
1462                 if( depth == CV_32F )
1463                 {
1464                     float* x0 = (float*)ptrs[0];
1465                     float* x = fbuf ? fbuf : x0;
1466                     float* y = (float*)ptrs[1];
1467 
1468                     if( x != x0 )
1469                         memcpy(x, x0, bsz*esz1);
1470 
1471                     Log_32f(x, y, bsz);
1472                     for( k = 0; k < bsz; k++ )
1473                         y[k] = (float)(y[k]*power);
1474                     Exp_32f(y, y, bsz);
1475                     for( k = 0; k < bsz; k++ )
1476                     {
1477                         if( x0[k] <= 0 )
1478                         {
1479                             if( x0[k] == 0.f )
1480                             {
1481                                 if( power < 0 )
1482                                     y[k] = inf32.f;
1483                             }
1484                             else
1485                                 y[k] = nan32.f;
1486                         }
1487                     }
1488                 }
1489                 else
1490                 {
1491                     double* x0 = (double*)ptrs[0];
1492                     double* x = dbuf ? dbuf : x0;
1493                     double* y = (double*)ptrs[1];
1494 
1495                     if( x != x0 )
1496                         memcpy(x, x0, bsz*esz1);
1497 
1498                     Log_64f(x, y, bsz);
1499                     for( k = 0; k < bsz; k++ )
1500                         y[k] *= power;
1501                     Exp_64f(y, y, bsz);
1502 
1503                     for( k = 0; k < bsz; k++ )
1504                     {
1505                         if( x0[k] <= 0 )
1506                         {
1507                             if( x0[k] == 0. )
1508                             {
1509                                 if( power < 0 )
1510                                     y[k] = inf64.f;
1511                             }
1512                             else
1513                                 y[k] = nan64.f;
1514                         }
1515                     }
1516                 }
1517                 ptrs[0] += bsz*esz1;
1518                 ptrs[1] += bsz*esz1;
1519             }
1520         }
1521     }
1522 }
1523 
sqrt(InputArray a,OutputArray b)1524 void sqrt(InputArray a, OutputArray b)
1525 {
1526     cv::pow(a, 0.5, b);
1527 }
1528 
1529 /************************** CheckArray for NaN's, Inf's *********************************/
1530 
1531 template<int cv_mat_type> struct mat_type_assotiations{};
1532 
1533 template<> struct mat_type_assotiations<CV_8U>
1534 {
1535     typedef unsigned char type;
1536     static const type min_allowable = 0x0;
1537     static const type max_allowable = 0xFF;
1538 };
1539 
1540 template<> struct mat_type_assotiations<CV_8S>
1541 {
1542     typedef signed char type;
1543     static const type min_allowable = SCHAR_MIN;
1544     static const type max_allowable = SCHAR_MAX;
1545 };
1546 
1547 template<> struct mat_type_assotiations<CV_16U>
1548 {
1549     typedef unsigned short type;
1550     static const type min_allowable = 0x0;
1551     static const type max_allowable = USHRT_MAX;
1552 };
1553 template<> struct mat_type_assotiations<CV_16S>
1554 {
1555     typedef signed short type;
1556     static const type min_allowable = SHRT_MIN;
1557     static const type max_allowable = SHRT_MAX;
1558 };
1559 
1560 template<> struct mat_type_assotiations<CV_32S>
1561 {
1562     typedef int type;
1563     static const type min_allowable = (-INT_MAX - 1);
1564     static const type max_allowable = INT_MAX;
1565 };
1566 
1567 // inclusive maxVal !!!
1568 template<int depth>
checkIntegerRange(cv::Mat src,Point & bad_pt,int minVal,int maxVal,double & bad_value)1569 bool checkIntegerRange(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value)
1570 {
1571     typedef mat_type_assotiations<depth> type_ass;
1572 
1573     if (minVal < type_ass::min_allowable && maxVal > type_ass::max_allowable)
1574     {
1575         return true;
1576     }
1577     else if (minVal > type_ass::max_allowable || maxVal < type_ass::min_allowable || maxVal < minVal)
1578     {
1579         bad_pt = cv::Point(0,0);
1580         return false;
1581     }
1582     cv::Mat as_one_channel = src.reshape(1,0);
1583 
1584     for (int j = 0; j < as_one_channel.rows; ++j)
1585         for (int i = 0; i < as_one_channel.cols; ++i)
1586         {
1587             if (as_one_channel.at<typename type_ass::type>(j ,i) < minVal || as_one_channel.at<typename type_ass::type>(j ,i) > maxVal)
1588             {
1589                 bad_pt.y = j ;
1590                 bad_pt.x = i % src.channels();
1591                 bad_value = as_one_channel.at<typename type_ass::type>(j ,i);
1592                 return false;
1593             }
1594         }
1595     bad_value = 0.0;
1596 
1597     return true;
1598 }
1599 
1600 typedef bool (*check_range_function)(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value);
1601 
1602 check_range_function check_range_functions[] =
1603 {
1604     &checkIntegerRange<CV_8U>,
1605     &checkIntegerRange<CV_8S>,
1606     &checkIntegerRange<CV_16U>,
1607     &checkIntegerRange<CV_16S>,
1608     &checkIntegerRange<CV_32S>
1609 };
1610 
checkRange(InputArray _src,bool quiet,Point * pt,double minVal,double maxVal)1611 bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double maxVal)
1612 {
1613     Mat src = _src.getMat();
1614 
1615     if ( src.dims > 2 )
1616     {
1617         const Mat* arrays[] = {&src, 0};
1618         Mat planes[1];
1619         NAryMatIterator it(arrays, planes);
1620 
1621         for ( size_t i = 0; i < it.nplanes; i++, ++it )
1622         {
1623             if (!checkRange( it.planes[0], quiet, pt, minVal, maxVal ))
1624             {
1625                 // todo: set index properly
1626                 return false;
1627             }
1628         }
1629         return true;
1630     }
1631 
1632     int depth = src.depth();
1633     Point badPt(-1, -1);
1634     double badValue = 0;
1635 
1636     if (depth < CV_32F)
1637     {
1638         // see "Bug #1784"
1639         int minVali = minVal<(-INT_MAX - 1) ? (-INT_MAX - 1) : cvFloor(minVal);
1640         int maxVali = maxVal>INT_MAX ? INT_MAX : cvCeil(maxVal) - 1; // checkIntegerRang() use inclusive maxVal
1641 
1642         (check_range_functions[depth])(src, badPt, minVali, maxVali, badValue);
1643     }
1644     else
1645     {
1646         int i, loc = 0;
1647         Size size = getContinuousSize( src, src.channels() );
1648 
1649         if( depth == CV_32F )
1650         {
1651             Cv32suf a, b;
1652             int ia, ib;
1653             const int* isrc = src.ptr<int>();
1654             size_t step = src.step/sizeof(isrc[0]);
1655 
1656             a.f = (float)std::max(minVal, (double)-FLT_MAX);
1657             b.f = (float)std::min(maxVal, (double)FLT_MAX);
1658 
1659             ia = CV_TOGGLE_FLT(a.i);
1660             ib = CV_TOGGLE_FLT(b.i);
1661 
1662             for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
1663             {
1664                 for( i = 0; i < size.width; i++ )
1665                 {
1666                     int val = isrc[i];
1667                     val = CV_TOGGLE_FLT(val);
1668 
1669                     if( val < ia || val >= ib )
1670                     {
1671                         badPt = Point((loc + i) % src.cols, (loc + i) / src.cols);
1672                         badValue = ((const float*)isrc)[i];
1673                         break;
1674                     }
1675                 }
1676             }
1677         }
1678         else
1679         {
1680             Cv64suf a, b;
1681             int64 ia, ib;
1682             const int64* isrc = src.ptr<int64>();
1683             size_t step = src.step/sizeof(isrc[0]);
1684 
1685             a.f = minVal;
1686             b.f = maxVal;
1687 
1688             ia = CV_TOGGLE_DBL(a.i);
1689             ib = CV_TOGGLE_DBL(b.i);
1690 
1691             for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
1692             {
1693                 for( i = 0; i < size.width; i++ )
1694                 {
1695                     int64 val = isrc[i];
1696                     val = CV_TOGGLE_DBL(val);
1697 
1698                     if( val < ia || val >= ib )
1699                     {
1700                         badPt = Point((loc + i) % src.cols, (loc + i) / src.cols);
1701                         badValue = ((const double*)isrc)[i];
1702                         break;
1703                     }
1704                 }
1705             }
1706         }
1707     }
1708 
1709     if( badPt.x >= 0 )
1710     {
1711         if( pt )
1712             *pt = badPt;
1713         if( !quiet )
1714             CV_Error_( CV_StsOutOfRange,
1715             ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
1716     }
1717     return badPt.x < 0;
1718 }
1719 
1720 #ifdef HAVE_OPENCL
1721 
ocl_patchNaNs(InputOutputArray _a,float value)1722 static bool ocl_patchNaNs( InputOutputArray _a, float value )
1723 {
1724     int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
1725     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
1726                      format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d",
1727                             rowsPerWI));
1728     if (k.empty())
1729         return false;
1730 
1731     UMat a = _a.getUMat();
1732     int cn = a.channels();
1733 
1734     k.args(ocl::KernelArg::ReadOnlyNoSize(a),
1735            ocl::KernelArg::WriteOnly(a, cn), (float)value);
1736 
1737     size_t globalsize[2] = { a.cols * cn, (a.rows + rowsPerWI - 1) / rowsPerWI };
1738     return k.run(2, globalsize, NULL, false);
1739 }
1740 
1741 #endif
1742 
patchNaNs(InputOutputArray _a,double _val)1743 void patchNaNs( InputOutputArray _a, double _val )
1744 {
1745     CV_Assert( _a.depth() == CV_32F );
1746 
1747     CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
1748                ocl_patchNaNs(_a, (float)_val))
1749 
1750     Mat a = _a.getMat();
1751     const Mat* arrays[] = {&a, 0};
1752     int* ptrs[1];
1753     NAryMatIterator it(arrays, (uchar**)ptrs);
1754     size_t len = it.size*a.channels();
1755     Cv32suf val;
1756     val.f = (float)_val;
1757 
1758 #if CV_SSE2
1759     __m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000);
1760     __m128i v_val = _mm_set1_epi32(val.i);
1761 #elif CV_NEON
1762     int32x4_t v_mask1 = vdupq_n_s32(0x7fffffff), v_mask2 = vdupq_n_s32(0x7f800000),
1763         v_val = vdupq_n_s32(val.i);
1764 #endif
1765 
1766     for( size_t i = 0; i < it.nplanes; i++, ++it )
1767     {
1768         int* tptr = ptrs[0];
1769         size_t j = 0;
1770 
1771 #if CV_SSE2
1772         if (USE_SSE2)
1773         {
1774             for ( ; j + 4 <= len; j += 4)
1775             {
1776                 __m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j));
1777                 __m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1));
1778                 __m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val));
1779                 _mm_storeu_si128((__m128i *)(tptr + j), v_res);
1780             }
1781         }
1782 #elif CV_NEON
1783         for ( ; j + 4 <= len; j += 4)
1784         {
1785             int32x4_t v_src = vld1q_s32(tptr + j);
1786             uint32x4_t v_cmp_mask = vcltq_s32(v_mask2, vandq_s32(v_src, v_mask1));
1787             int32x4_t v_dst = vbslq_s32(v_cmp_mask, v_val, v_src);
1788             vst1q_s32(tptr + j, v_dst);
1789         }
1790 #endif
1791 
1792         for( ; j < len; j++ )
1793             if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
1794                 tptr[j] = val.i;
1795     }
1796 }
1797 
1798 }
1799 
cvCbrt(float value)1800 CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); }
cvFastArctan(float y,float x)1801 CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); }
1802 
1803 CV_IMPL void
cvCartToPolar(const CvArr * xarr,const CvArr * yarr,CvArr * magarr,CvArr * anglearr,int angle_in_degrees)1804 cvCartToPolar( const CvArr* xarr, const CvArr* yarr,
1805                CvArr* magarr, CvArr* anglearr,
1806                int angle_in_degrees )
1807 {
1808     cv::Mat X = cv::cvarrToMat(xarr), Y = cv::cvarrToMat(yarr), Mag, Angle;
1809     if( magarr )
1810     {
1811         Mag = cv::cvarrToMat(magarr);
1812         CV_Assert( Mag.size() == X.size() && Mag.type() == X.type() );
1813     }
1814     if( anglearr )
1815     {
1816         Angle = cv::cvarrToMat(anglearr);
1817         CV_Assert( Angle.size() == X.size() && Angle.type() == X.type() );
1818     }
1819     if( magarr )
1820     {
1821         if( anglearr )
1822             cv::cartToPolar( X, Y, Mag, Angle, angle_in_degrees != 0 );
1823         else
1824             cv::magnitude( X, Y, Mag );
1825     }
1826     else
1827         cv::phase( X, Y, Angle, angle_in_degrees != 0 );
1828 }
1829 
1830 CV_IMPL void
cvPolarToCart(const CvArr * magarr,const CvArr * anglearr,CvArr * xarr,CvArr * yarr,int angle_in_degrees)1831 cvPolarToCart( const CvArr* magarr, const CvArr* anglearr,
1832                CvArr* xarr, CvArr* yarr, int angle_in_degrees )
1833 {
1834     cv::Mat X, Y, Angle = cv::cvarrToMat(anglearr), Mag;
1835     if( magarr )
1836     {
1837         Mag = cv::cvarrToMat(magarr);
1838         CV_Assert( Mag.size() == Angle.size() && Mag.type() == Angle.type() );
1839     }
1840     if( xarr )
1841     {
1842         X = cv::cvarrToMat(xarr);
1843         CV_Assert( X.size() == Angle.size() && X.type() == Angle.type() );
1844     }
1845     if( yarr )
1846     {
1847         Y = cv::cvarrToMat(yarr);
1848         CV_Assert( Y.size() == Angle.size() && Y.type() == Angle.type() );
1849     }
1850 
1851     cv::polarToCart( Mag, Angle, X, Y, angle_in_degrees != 0 );
1852 }
1853 
cvExp(const CvArr * srcarr,CvArr * dstarr)1854 CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr )
1855 {
1856     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1857     CV_Assert( src.type() == dst.type() && src.size == dst.size );
1858     cv::exp( src, dst );
1859 }
1860 
cvLog(const CvArr * srcarr,CvArr * dstarr)1861 CV_IMPL void cvLog( const CvArr* srcarr, CvArr* dstarr )
1862 {
1863     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1864     CV_Assert( src.type() == dst.type() && src.size == dst.size );
1865     cv::log( src, dst );
1866 }
1867 
cvPow(const CvArr * srcarr,CvArr * dstarr,double power)1868 CV_IMPL void cvPow( const CvArr* srcarr, CvArr* dstarr, double power )
1869 {
1870     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1871     CV_Assert( src.type() == dst.type() && src.size == dst.size );
1872     cv::pow( src, power, dst );
1873 }
1874 
cvCheckArr(const CvArr * arr,int flags,double minVal,double maxVal)1875 CV_IMPL int cvCheckArr( const CvArr* arr, int flags,
1876                         double minVal, double maxVal )
1877 {
1878     if( (flags & CV_CHECK_RANGE) == 0 )
1879         minVal = -DBL_MAX, maxVal = DBL_MAX;
1880     return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal );
1881 }
1882 
1883 
1884 /*
1885   Finds real roots of cubic, quadratic or linear equation.
1886   The original code has been taken from Ken Turkowski web page
1887   (http://www.worldserver.com/turk/opensource/) and adopted for OpenCV.
1888   Here is the copyright notice.
1889 
1890   -----------------------------------------------------------------------
1891   Copyright (C) 1978-1999 Ken Turkowski. <turk@computer.org>
1892 
1893     All rights reserved.
1894 
1895     Warranty Information
1896       Even though I have reviewed this software, I make no warranty
1897       or representation, either express or implied, with respect to this
1898       software, its quality, accuracy, merchantability, or fitness for a
1899       particular purpose.  As a result, this software is provided "as is,"
1900       and you, its user, are assuming the entire risk as to its quality
1901       and accuracy.
1902 
1903     This code may be used and freely distributed as long as it includes
1904     this copyright notice and the above warranty information.
1905   -----------------------------------------------------------------------
1906 */
1907 
solveCubic(InputArray _coeffs,OutputArray _roots)1908 int cv::solveCubic( InputArray _coeffs, OutputArray _roots )
1909 {
1910     const int n0 = 3;
1911     Mat coeffs = _coeffs.getMat();
1912     int ctype = coeffs.type();
1913 
1914     CV_Assert( ctype == CV_32F || ctype == CV_64F );
1915     CV_Assert( (coeffs.size() == Size(n0, 1) ||
1916                 coeffs.size() == Size(n0+1, 1) ||
1917                 coeffs.size() == Size(1, n0) ||
1918                 coeffs.size() == Size(1, n0+1)) );
1919 
1920     _roots.create(n0, 1, ctype, -1, true, _OutputArray::DEPTH_MASK_FLT);
1921     Mat roots = _roots.getMat();
1922 
1923     int i = -1, n = 0;
1924     double a0 = 1., a1, a2, a3;
1925     double x0 = 0., x1 = 0., x2 = 0.;
1926     int ncoeffs = coeffs.rows + coeffs.cols - 1;
1927 
1928     if( ctype == CV_32FC1 )
1929     {
1930         if( ncoeffs == 4 )
1931             a0 = coeffs.at<float>(++i);
1932 
1933         a1 = coeffs.at<float>(i+1);
1934         a2 = coeffs.at<float>(i+2);
1935         a3 = coeffs.at<float>(i+3);
1936     }
1937     else
1938     {
1939         if( ncoeffs == 4 )
1940             a0 = coeffs.at<double>(++i);
1941 
1942         a1 = coeffs.at<double>(i+1);
1943         a2 = coeffs.at<double>(i+2);
1944         a3 = coeffs.at<double>(i+3);
1945     }
1946 
1947     if( a0 == 0 )
1948     {
1949         if( a1 == 0 )
1950         {
1951             if( a2 == 0 )
1952                 n = a3 == 0 ? -1 : 0;
1953             else
1954             {
1955                 // linear equation
1956                 x0 = -a3/a2;
1957                 n = 1;
1958             }
1959         }
1960         else
1961         {
1962             // quadratic equation
1963             double d = a2*a2 - 4*a1*a3;
1964             if( d >= 0 )
1965             {
1966                 d = std::sqrt(d);
1967                 double q1 = (-a2 + d) * 0.5;
1968                 double q2 = (a2 + d) * -0.5;
1969                 if( fabs(q1) > fabs(q2) )
1970                 {
1971                     x0 = q1 / a1;
1972                     x1 = a3 / q1;
1973                 }
1974                 else
1975                 {
1976                     x0 = q2 / a1;
1977                     x1 = a3 / q2;
1978                 }
1979                 n = d > 0 ? 2 : 1;
1980             }
1981         }
1982     }
1983     else
1984     {
1985         a0 = 1./a0;
1986         a1 *= a0;
1987         a2 *= a0;
1988         a3 *= a0;
1989 
1990         double Q = (a1 * a1 - 3 * a2) * (1./9);
1991         double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54);
1992         double Qcubed = Q * Q * Q;
1993         double d = Qcubed - R * R;
1994 
1995         if( d >= 0 )
1996         {
1997             double theta = acos(R / std::sqrt(Qcubed));
1998             double sqrtQ = std::sqrt(Q);
1999             double t0 = -2 * sqrtQ;
2000             double t1 = theta * (1./3);
2001             double t2 = a1 * (1./3);
2002             x0 = t0 * cos(t1) - t2;
2003             x1 = t0 * cos(t1 + (2.*CV_PI/3)) - t2;
2004             x2 = t0 * cos(t1 + (4.*CV_PI/3)) - t2;
2005             n = 3;
2006         }
2007         else
2008         {
2009             double e;
2010             d = std::sqrt(-d);
2011             e = std::pow(d + fabs(R), 0.333333333333);
2012             if( R > 0 )
2013                 e = -e;
2014             x0 = (e + Q / e) - a1 * (1./3);
2015             n = 1;
2016         }
2017     }
2018 
2019     if( roots.type() == CV_32FC1 )
2020     {
2021         roots.at<float>(0) = (float)x0;
2022         roots.at<float>(1) = (float)x1;
2023         roots.at<float>(2) = (float)x2;
2024     }
2025     else
2026     {
2027         roots.at<double>(0) = x0;
2028         roots.at<double>(1) = x1;
2029         roots.at<double>(2) = x2;
2030     }
2031 
2032     return n;
2033 }
2034 
2035 /* finds complex roots of a polynomial using Durand-Kerner method:
2036    http://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method */
solvePoly(InputArray _coeffs0,OutputArray _roots0,int maxIters)2037 double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters )
2038 {
2039     typedef Complex<double> C;
2040 
2041     double maxDiff = 0;
2042     int iter, i, j;
2043     Mat coeffs0 = _coeffs0.getMat();
2044     int ctype = _coeffs0.type();
2045     int cdepth = CV_MAT_DEPTH(ctype);
2046 
2047     CV_Assert( CV_MAT_DEPTH(ctype) >= CV_32F && CV_MAT_CN(ctype) <= 2 );
2048     CV_Assert( coeffs0.rows == 1 || coeffs0.cols == 1 );
2049 
2050     int n0 = coeffs0.cols + coeffs0.rows - 2, n = n0;
2051 
2052     _roots0.create(n, 1, CV_MAKETYPE(cdepth, 2), -1, true, _OutputArray::DEPTH_MASK_FLT);
2053     Mat roots0 = _roots0.getMat();
2054 
2055     AutoBuffer<C> buf(n*2+2);
2056     C *coeffs = buf, *roots = coeffs + n + 1;
2057     Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots);
2058     coeffs0.convertTo(coeffs1, coeffs1.type());
2059     if( coeffs0.channels() == 1 )
2060     {
2061         const double* rcoeffs = (const double*)roots;
2062         for( i = 0; i <= n; i++ )
2063             coeffs[i] = C(rcoeffs[i], 0);
2064     }
2065 
2066     for( ; n > 1; n-- )
2067     {
2068         if( std::abs(coeffs[n].re) + std::abs(coeffs[n].im) > DBL_EPSILON )
2069             break;
2070     }
2071 
2072     C p(1, 0), r(1, 1);
2073 
2074     for( i = 0; i < n; i++ )
2075     {
2076         roots[i] = p;
2077         p = p * r;
2078     }
2079 
2080     maxIters = maxIters <= 0 ? 1000 : maxIters;
2081     for( iter = 0; iter < maxIters; iter++ )
2082     {
2083         maxDiff = 0;
2084         for( i = 0; i < n; i++ )
2085         {
2086             p = roots[i];
2087             C num = coeffs[n], denom = coeffs[n];
2088             for( j = 0; j < n; j++ )
2089             {
2090                 num = num*p + coeffs[n-j-1];
2091                 if( j != i ) denom = denom * (p - roots[j]);
2092             }
2093             num /= denom;
2094             roots[i] = p - num;
2095             maxDiff = std::max(maxDiff, cv::abs(num));
2096         }
2097         if( maxDiff <= 0 )
2098             break;
2099     }
2100 
2101     if( coeffs0.channels() == 1 )
2102     {
2103         const double verySmallEps = 1e-100;
2104         for( i = 0; i < n; i++ )
2105             if( fabs(roots[i].im) < verySmallEps )
2106                 roots[i].im = 0;
2107     }
2108 
2109     for( ; n < n0; n++ )
2110         roots[n+1] = roots[n];
2111 
2112     Mat(roots0.size(), CV_64FC2, roots).convertTo(roots0, roots0.type());
2113     return maxDiff;
2114 }
2115 
2116 
2117 CV_IMPL int
cvSolveCubic(const CvMat * coeffs,CvMat * roots)2118 cvSolveCubic( const CvMat* coeffs, CvMat* roots )
2119 {
2120     cv::Mat _coeffs = cv::cvarrToMat(coeffs), _roots = cv::cvarrToMat(roots), _roots0 = _roots;
2121     int nroots = cv::solveCubic(_coeffs, _roots);
2122     CV_Assert( _roots.data == _roots0.data ); // check that the array of roots was not reallocated
2123     return nroots;
2124 }
2125 
2126 
cvSolvePoly(const CvMat * a,CvMat * r,int maxiter,int)2127 void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int)
2128 {
2129     cv::Mat _a = cv::cvarrToMat(a);
2130     cv::Mat _r = cv::cvarrToMat(r);
2131     cv::Mat _r0 = _r;
2132     cv::solvePoly(_a, _r, maxiter);
2133     CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated
2134 }
2135 
2136 
2137 /* End of file. */
2138