• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * By downloading, copying, installing or using the software you agree to this license.
3  * If you do not agree to this license, do not download, install,
4  * copy or use the software.
5  *
6  *
7  *                           License Agreement
8  *                For Open Source Computer Vision Library
9  *                        (3-clause BSD License)
10  *
11  * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
12  * Third party copyrights are property of their respective owners.
13  *
14  * Redistribution and use in source and binary forms, with or without modification,
15  * are permitted provided that the following conditions are met:
16  *
17  *   * Redistributions of source code must retain the above copyright notice,
18  *     this list of conditions and the following disclaimer.
19  *
20  *   * Redistributions in binary form must reproduce the above copyright notice,
21  *     this list of conditions and the following disclaimer in the documentation
22  *     and/or other materials provided with the distribution.
23  *
24  *   * Neither the names of the copyright holders nor the names of the contributors
25  *     may be used to endorse or promote products derived from this software
26  *     without specific prior written permission.
27  *
28  * This software is provided by the copyright holders and contributors "as is" and
29  * any express or implied warranties, including, but not limited to, the implied
30  * warranties of merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall copyright holders or contributors be liable for any direct,
32  * indirect, incidental, special, exemplary, or consequential damages
33  * (including, but not limited to, procurement of substitute goods or services;
34  * loss of use, data, or profits; or business interruption) however caused
35  * and on any theory of liability, whether in contract, strict liability,
36  * or tort (including negligence or otherwise) arising in any way out of
37  * the use of this software, even if advised of the possibility of such damage.
38  */
39 
40 #include "common.hpp"
41 #include "vtransform.hpp"
42 
43 namespace CAROTENE_NS {
44 
45 #ifdef CAROTENE_NEON
46 
47 namespace {
48 
vnst(u8 * dst,uint8x16_t v1,uint8x16_t v2)49 inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); }
vnst(u8 * dst,uint16x8_t v1,uint16x8_t v2)50 inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); }
vnst(u8 * dst,uint32x4_t v1,uint32x4_t v2)51 inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); }
52 
53 template <typename Op, int elsize> struct vtail
54 {
compareCAROTENE_NS::__anon819574a80111::vtail55     static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
56                                u8 * dst, const Op & op,
57                                size_t &x, size_t width)
58     {
59         //do nothing since there couldn't be enough data
60         (void)src0;
61         (void)src1;
62         (void)dst;
63         (void)op;
64         (void)x;
65         (void)width;
66     }
67 };
68 template <typename Op> struct vtail<Op, 2>
69 {
compareCAROTENE_NS::__anon819574a80111::vtail70     static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
71                                u8 * dst, const Op & op,
72                                size_t &x, size_t width)
73     {
74         typedef typename Op::type type;
75         typedef typename internal::VecTraits<type>::vec128 vec128;
76         typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
77         //There no more than 15 elements in the tail, so we could handle 8 element vector only once
78         if( x + 8 < width)
79         {
80             vec128  v_src0, v_src1;
81             uvec128 v_dst;
82 
83             v_src0 = internal::vld1q(src0 + x);
84             v_src1 = internal::vld1q(src1 + x);
85             op(v_src0, v_src1, v_dst);
86             internal::vst1(dst + x, internal::vmovn(v_dst));
87             x+=8;
88         }
89     }
90 };
91 template <typename Op> struct vtail<Op, 1>
92 {
compareCAROTENE_NS::__anon819574a80111::vtail93     static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
94                                u8 * dst, const Op & op,
95                                size_t &x, size_t width)
96     {
97         typedef typename Op::type type;
98         typedef typename internal::VecTraits<type>::vec128 vec128;
99         typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
100         typedef typename internal::VecTraits<type>::vec64 vec64;
101         typedef typename internal::VecTraits<type>::unsign::vec64 uvec64;
102         //There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements
103         if( x + 16 < width)
104         {
105             vec128  v_src0, v_src1;
106             uvec128 v_dst;
107 
108             v_src0 = internal::vld1q(src0 + x);
109             v_src1 = internal::vld1q(src1 + x);
110             op(v_src0, v_src1, v_dst);
111             internal::vst1q(dst + x, v_dst);
112             x+=16;
113         }
114         if( x + 8 < width)
115         {
116             vec64  v_src0, v_src1;
117             uvec64 v_dst;
118 
119             v_src0 = internal::vld1(src0 + x);
120             v_src1 = internal::vld1(src1 + x);
121             op(v_src0, v_src1, v_dst);
122             internal::vst1(dst + x, v_dst);
123             x+=8;
124         }
125     }
126 };
127 
128 template <typename Op>
vcompare(Size2D size,const typename Op::type * src0Base,ptrdiff_t src0Stride,const typename Op::type * src1Base,ptrdiff_t src1Stride,u8 * dstBase,ptrdiff_t dstStride,const Op & op)129 void vcompare(Size2D size,
130               const typename Op::type * src0Base, ptrdiff_t src0Stride,
131               const typename Op::type * src1Base, ptrdiff_t src1Stride,
132               u8 * dstBase, ptrdiff_t dstStride, const Op & op)
133 {
134     typedef typename Op::type type;
135     typedef typename internal::VecTraits<type>::vec128 vec128;
136     typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
137 
138     if (src0Stride == src1Stride && src0Stride == dstStride &&
139         src0Stride == (ptrdiff_t)(size.width * sizeof(type)))
140     {
141         size.width *= size.height;
142         size.height = 1;
143     }
144 
145     const u32 step_base = 32 / sizeof(type);
146     size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
147 
148     for (size_t y = 0; y < size.height; ++y)
149     {
150         const type * src0 = internal::getRowPtr(src0Base, src0Stride, y);
151         const type * src1 = internal::getRowPtr(src1Base, src1Stride, y);
152         u8 * dst = internal::getRowPtr(dstBase, dstStride, y);
153         size_t x = 0;
154 
155         for( ; x < roiw_base; x += step_base )
156         {
157             internal::prefetch(src0 + x);
158             internal::prefetch(src1 + x);
159 
160             vec128 v_src00 = internal::vld1q(src0 + x), v_src01 = internal::vld1q(src0 + x + 16 / sizeof(type));
161             vec128 v_src10 = internal::vld1q(src1 + x), v_src11 = internal::vld1q(src1 + x + 16 / sizeof(type));
162             uvec128 v_dst0;
163             uvec128 v_dst1;
164 
165             op(v_src00, v_src10, v_dst0);
166             op(v_src01, v_src11, v_dst1);
167 
168             vnst(dst + x, v_dst0, v_dst1);
169         }
170 
171         vtail<Op, sizeof(type)>::compare(src0, src1, dst, op, x, size.width);
172 
173         for (; x < size.width; ++x)
174         {
175             op(src0 + x, src1 + x, dst + x);
176         }
177     }
178 }
179 
180 template<typename T>
181 struct OpCmpEQ
182 {
183     typedef T type;
184 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpEQ185     void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
186               typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
187     {
188         v_dst = internal::vceqq(v_src0, v_src1);
189     }
190 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpEQ191     void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
192               typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
193     {
194         v_dst = internal::vceq(v_src0, v_src1);
195     }
196 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpEQ197     void operator() (const T * src0, const T * src1, u8 * dst) const
198     {
199         dst[0] = src0[0] == src1[0] ? 255 : 0;
200     }
201 };
202 
203 template<typename T>
204 struct OpCmpNE
205 {
206     typedef T type;
207 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpNE208     void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
209               typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
210     {
211         v_dst = internal::vmvnq(internal::vceqq(v_src0, v_src1));
212     }
213 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpNE214     void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
215               typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
216     {
217         v_dst = internal::vmvn(internal::vceq(v_src0, v_src1));
218     }
219 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpNE220     void operator() (const T * src0, const T * src1, u8 * dst) const
221     {
222         dst[0] = src0[0] == src1[0] ? 0 : 255;
223     }
224 };
225 
226 template<typename T>
227 struct OpCmpGT
228 {
229     typedef T type;
230 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpGT231     void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
232               typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
233     {
234         v_dst = internal::vcgtq(v_src0, v_src1);
235     }
236 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpGT237     void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
238               typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
239     {
240         v_dst = internal::vcgt(v_src0, v_src1);
241     }
242 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpGT243     void operator() (const T * src0, const T * src1, u8 * dst) const
244     {
245         dst[0] = src0[0] > src1[0] ? 255 : 0;
246     }
247 };
248 
249 template<typename T>
250 struct OpCmpGE
251 {
252     typedef T type;
253 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpGE254     void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
255               typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
256     {
257         v_dst = internal::vcgeq(v_src0, v_src1);
258     }
259 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpGE260     void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
261               typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
262     {
263         v_dst = internal::vcge(v_src0, v_src1);
264     }
265 
operator ()CAROTENE_NS::__anon819574a80111::OpCmpGE266     void operator() (const T * src0, const T * src1, u8 * dst) const
267     {
268         dst[0] = src0[0] >= src1[0] ? 255 : 0;
269     }
270 };
271 
272 }
273 
274 #define IMPL_CMPOP(op, type)                              \
275 void cmp##op(const Size2D &size,                          \
276              const type * src0Base, ptrdiff_t src0Stride, \
277              const type * src1Base, ptrdiff_t src1Stride, \
278                        u8 *dstBase, ptrdiff_t dstStride)  \
279 {                                                         \
280     internal::assertSupportedConfiguration();             \
281     vcompare(size,                                        \
282              src0Base, src0Stride,                        \
283              src1Base, src1Stride,                        \
284              dstBase, dstStride,                          \
285              OpCmp##op<type>());                          \
286 }
287 
288 #else
289 
290 #define IMPL_CMPOP(op, type)                              \
291 void cmp##op(const Size2D &size,                          \
292              const type * src0Base, ptrdiff_t src0Stride, \
293              const type * src1Base, ptrdiff_t src1Stride, \
294              u8 *dstBase, ptrdiff_t dstStride)            \
295 {                                                         \
296     internal::assertSupportedConfiguration();             \
297     (void)size;                                           \
298     (void)src0Base;                                       \
299     (void)src0Stride;                                     \
300     (void)src1Base;                                       \
301     (void)src1Stride;                                     \
302     (void)dstBase;                                        \
303     (void)dstStride;                                      \
304 }
305 
306 #endif
307 
308 IMPL_CMPOP(EQ, u8)
309 IMPL_CMPOP(EQ, s8)
310 IMPL_CMPOP(EQ, u16)
311 IMPL_CMPOP(EQ, s16)
312 IMPL_CMPOP(EQ, u32)
313 IMPL_CMPOP(EQ, s32)
314 IMPL_CMPOP(EQ, f32)
315 
316 IMPL_CMPOP(NE, u8)
317 IMPL_CMPOP(NE, s8)
318 IMPL_CMPOP(NE, u16)
319 IMPL_CMPOP(NE, s16)
320 IMPL_CMPOP(NE, u32)
321 IMPL_CMPOP(NE, s32)
322 IMPL_CMPOP(NE, f32)
323 
324 IMPL_CMPOP(GT, u8)
325 IMPL_CMPOP(GT, s8)
326 IMPL_CMPOP(GT, u16)
327 IMPL_CMPOP(GT, s16)
328 IMPL_CMPOP(GT, u32)
329 IMPL_CMPOP(GT, s32)
330 IMPL_CMPOP(GT, f32)
331 
332 IMPL_CMPOP(GE, u8)
333 IMPL_CMPOP(GE, s8)
334 IMPL_CMPOP(GE, u16)
335 IMPL_CMPOP(GE, s16)
336 IMPL_CMPOP(GE, u32)
337 IMPL_CMPOP(GE, s32)
338 IMPL_CMPOP(GE, f32)
339 
340 } // namespace CAROTENE_NS
341