• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * By downloading, copying, installing or using the software you agree to this license.
3  * If you do not agree to this license, do not download, install,
4  * copy or use the software.
5  *
6  *
7  *                           License Agreement
8  *                For Open Source Computer Vision Library
9  *                        (3-clause BSD License)
10  *
11  * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12  * Third party copyrights are property of their respective owners.
13  *
14  * Redistribution and use in source and binary forms, with or without modification,
15  * are permitted provided that the following conditions are met:
16  *
17  *   * Redistributions of source code must retain the above copyright notice,
18  *     this list of conditions and the following disclaimer.
19  *
20  *   * Redistributions in binary form must reproduce the above copyright notice,
21  *     this list of conditions and the following disclaimer in the documentation
22  *     and/or other materials provided with the distribution.
23  *
24  *   * Neither the names of the copyright holders nor the names of the contributors
25  *     may be used to endorse or promote products derived from this software
26  *     without specific prior written permission.
27  *
28  * This software is provided by the copyright holders and contributors "as is" and
29  * any express or implied warranties, including, but not limited to, the implied
30  * warranties of merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall copyright holders or contributors be liable for any direct,
32  * indirect, incidental, special, exemplary, or consequential damages
33  * (including, but not limited to, procurement of substitute goods or services;
34  * loss of use, data, or profits; or business interruption) however caused
35  * and on any theory of liability, whether in contract, strict liability,
36  * or tort (including negligence or otherwise) arising in any way out of
37  * the use of this software, even if advised of the possibility of such damage.
38  */
39 
40 #include "common.hpp"
41 #include "vtransform.hpp"
42 
43 #include <cmath>
44 
45 namespace CAROTENE_NS {
46 
47 #ifdef CAROTENE_NEON
48 
49 namespace {
50 
51 struct Magnitude
52 {
53     typedef s16 type;
54 
operator ()CAROTENE_NS::__anon831bdaa60111::Magnitude55     void operator() (const int16x8_t & v_src0, const int16x8_t & v_src1,
56               int16x8_t & v_dst) const
57     {
58         int16x4_t v_src0_p = vget_low_s16(v_src0), v_src1_p = vget_low_s16(v_src1);
59         float32x4_t v_sqr0 = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0_p, v_src0_p)),
60                                        vcvtq_f32_s32(vmull_s16(v_src1_p, v_src1_p)));
61         v_src0_p = vget_high_s16(v_src0);
62         v_src1_p = vget_high_s16(v_src1);
63         float32x4_t v_sqr1 = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0_p, v_src0_p)),
64                                        vcvtq_f32_s32(vmull_s16(v_src1_p, v_src1_p)));
65 
66         int32x4_t v_sqrt0 = vcvtq_s32_f32(internal::vsqrtq_f32(v_sqr0));
67         int32x4_t v_sqrt1 = vcvtq_s32_f32(internal::vsqrtq_f32(v_sqr1));
68 
69         v_dst = vcombine_s16(vqmovn_s32(v_sqrt0), vqmovn_s32(v_sqrt1));
70     }
71 
operator ()CAROTENE_NS::__anon831bdaa60111::Magnitude72     void operator() (const int16x4_t & v_src0, const int16x4_t & v_src1,
73               int16x4_t & v_dst) const
74     {
75         float32x4_t v_tmp = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0, v_src0)),
76                                       vcvtq_f32_s32(vmull_s16(v_src1, v_src1)));
77         int32x4_t v_sqrt = vcvtq_s32_f32(internal::vsqrtq_f32(v_tmp));
78         v_dst = vqmovn_s32(v_sqrt);
79     }
80 
operator ()CAROTENE_NS::__anon831bdaa60111::Magnitude81     void operator() (const short * src0, const short * src1, short * dst) const
82     {
83         f32 src0val = (f32)src0[0], src1val = (f32)src1[0];
84         dst[0] = internal::saturate_cast<s16>((s32)sqrtf(src0val * src0val + src1val * src1val));
85     }
86 };
87 
88 struct MagnitudeF32
89 {
90     typedef f32 type;
91 
operator ()CAROTENE_NS::__anon831bdaa60111::MagnitudeF3292     void operator() (const float32x4_t & v_src0, const float32x4_t & v_src1,
93               float32x4_t & v_dst) const
94     {
95         v_dst = internal::vsqrtq_f32(vaddq_f32(vmulq_f32(v_src0, v_src0), vmulq_f32(v_src1, v_src1)));
96     }
97 
operator ()CAROTENE_NS::__anon831bdaa60111::MagnitudeF3298     void operator() (const float32x2_t & v_src0, const float32x2_t & v_src1,
99               float32x2_t & v_dst) const
100     {
101         v_dst = internal::vsqrt_f32(vadd_f32(vmul_f32(v_src0, v_src0), vmul_f32(v_src1, v_src1)));
102     }
103 
operator ()CAROTENE_NS::__anon831bdaa60111::MagnitudeF32104     void operator() (const f32 * src0, const f32 * src1, f32 * dst) const
105     {
106         dst[0] = sqrtf(src0[0] * src0[0] + src1[0] * src1[0]);
107     }
108 };
109 
110 } // namespace
111 
112 #endif
113 
magnitude(const Size2D & size,const s16 * src0Base,ptrdiff_t src0Stride,const s16 * src1Base,ptrdiff_t src1Stride,s16 * dstBase,ptrdiff_t dstStride)114 void magnitude(const Size2D &size,
115                const s16 * src0Base, ptrdiff_t src0Stride,
116                const s16 * src1Base, ptrdiff_t src1Stride,
117                s16 * dstBase, ptrdiff_t dstStride)
118 {
119     internal::assertSupportedConfiguration();
120 #ifdef CAROTENE_NEON
121     internal::vtransform(size,
122                          src0Base, src0Stride,
123                          src1Base, src1Stride,
124                          dstBase, dstStride,
125                          Magnitude());
126 #else
127     (void)size;
128     (void)src0Base;
129     (void)src0Stride;
130     (void)src1Base;
131     (void)src1Stride;
132     (void)dstBase;
133     (void)dstStride;
134 #endif
135 }
136 
magnitude(const Size2D & size,const f32 * src0Base,ptrdiff_t src0Stride,const f32 * src1Base,ptrdiff_t src1Stride,f32 * dstBase,ptrdiff_t dstStride)137 void magnitude(const Size2D &size,
138                const f32 * src0Base, ptrdiff_t src0Stride,
139                const f32 * src1Base, ptrdiff_t src1Stride,
140                f32 * dstBase, ptrdiff_t dstStride)
141 {
142     internal::assertSupportedConfiguration();
143 #ifdef CAROTENE_NEON
144     internal::vtransform(size,
145                          src0Base, src0Stride,
146                          src1Base, src1Stride,
147                          dstBase, dstStride,
148                          MagnitudeF32());
149 #else
150     (void)size;
151     (void)src0Base;
152     (void)src0Stride;
153     (void)src1Base;
154     (void)src1Stride;
155     (void)dstBase;
156     (void)dstStride;
157 #endif
158 }
159 
160 } // namespace CAROTENE_NS
161