• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * By downloading, copying, installing or using the software you agree to this license.
3  * If you do not agree to this license, do not download, install,
4  * copy or use the software.
5  *
6  *
7  *                           License Agreement
8  *                For Open Source Computer Vision Library
9  *                        (3-clause BSD License)
10  *
11  * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12  * Third party copyrights are property of their respective owners.
13  *
14  * Redistribution and use in source and binary forms, with or without modification,
15  * are permitted provided that the following conditions are met:
16  *
17  *   * Redistributions of source code must retain the above copyright notice,
18  *     this list of conditions and the following disclaimer.
19  *
20  *   * Redistributions in binary form must reproduce the above copyright notice,
21  *     this list of conditions and the following disclaimer in the documentation
22  *     and/or other materials provided with the distribution.
23  *
24  *   * Neither the names of the copyright holders nor the names of the contributors
25  *     may be used to endorse or promote products derived from this software
26  *     without specific prior written permission.
27  *
28  * This software is provided by the copyright holders and contributors "as is" and
29  * any express or implied warranties, including, but not limited to, the implied
30  * warranties of merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall copyright holders or contributors be liable for any direct,
32  * indirect, incidental, special, exemplary, or consequential damages
33  * (including, but not limited to, procurement of substitute goods or services;
34  * loss of use, data, or profits; or business interruption) however caused
35  * and on any theory of liability, whether in contract, strict liability,
36  * or tort (including negligence or otherwise) arising in any way out of
37  * the use of this software, even if advised of the possibility of such damage.
38  */
39 
40 #include "common.hpp"
41 
42 #include "vtransform.hpp"
43 
44 namespace CAROTENE_NS {
45 
46 #ifdef CAROTENE_NEON
47 
48 namespace {
49 
vnst(u8 * dst,uint8x16_t v1,uint8x16_t v2)50 inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); }
vnst(u8 * dst,uint16x8_t v1,uint16x8_t v2)51 inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); }
vnst(u8 * dst,uint32x4_t v1,uint32x4_t v2)52 inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); }
53 
54 template <typename T, int elsize> struct vtail
55 {
inRangeCAROTENE_NS::__anonbc33ebab0111::vtail56     static inline void inRange(const T *, const T *, const T *,
57                                u8 *, size_t &, size_t)
58     {
59         //do nothing since there couldn't be enough data
60     }
61 };
62 template <typename T> struct vtail<T, 2>
63 {
inRangeCAROTENE_NS::__anonbc33ebab0111::vtail64     static inline void inRange(const T * src, const T * rng1, const T * rng2,
65                                u8 * dst, size_t &x, size_t width)
66     {
67         typedef typename internal::VecTraits<T>::vec128 vec128;
68         typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
69         //There no more than 15 elements in the tail, so we could handle 8 element vector only once
70         if( x + 8 < width)
71         {
72              vec128  vs = internal::vld1q( src + x);
73              vec128 vr1 = internal::vld1q(rng1 + x);
74              vec128 vr2 = internal::vld1q(rng2 + x);
75             uvec128  vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
76             internal::vst1(dst + x, internal::vmovn(vd));
77             x+=8;
78         }
79     }
80 };
81 template <typename T> struct vtail<T, 1>
82 {
inRangeCAROTENE_NS::__anonbc33ebab0111::vtail83     static inline void inRange(const T * src, const T * rng1, const T * rng2,
84                                u8 * dst, size_t &x, size_t width)
85     {
86         typedef typename internal::VecTraits<T>::vec128 vec128;
87         typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
88         typedef typename internal::VecTraits<T>::vec64 vec64;
89         typedef typename internal::VecTraits<T>::unsign::vec64 uvec64;
90         //There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements
91         if( x + 16 < width)
92         {
93              vec128  vs = internal::vld1q( src + x);
94              vec128 vr1 = internal::vld1q(rng1 + x);
95              vec128 vr2 = internal::vld1q(rng2 + x);
96             uvec128  vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
97             internal::vst1q(dst + x, vd);
98             x+=16;
99         }
100         if( x + 8 < width)
101         {
102              vec64  vs = internal::vld1( src + x);
103              vec64 vr1 = internal::vld1(rng1 + x);
104              vec64 vr2 = internal::vld1(rng2 + x);
105             uvec64  vd = internal::vand(internal::vcge(vs, vr1), internal::vcge(vr2, vs));
106             internal::vst1(dst + x, vd);
107             x+=8;
108         }
109     }
110 };
111 
112 template <typename T>
inRangeCheck(const Size2D & _size,const T * srcBase,ptrdiff_t srcStride,const T * rng1Base,ptrdiff_t rng1Stride,const T * rng2Base,ptrdiff_t rng2Stride,u8 * dstBase,ptrdiff_t dstStride)113 inline void inRangeCheck(const Size2D &_size,
114                          const T * srcBase, ptrdiff_t srcStride,
115                          const T * rng1Base, ptrdiff_t rng1Stride,
116                          const T * rng2Base, ptrdiff_t rng2Stride,
117                          u8 * dstBase, ptrdiff_t dstStride)
118 {
119     typedef typename internal::VecTraits<T>::vec128 vec128;
120     typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
121 
122     Size2D size(_size);
123     if (srcStride == dstStride &&
124         srcStride == rng1Stride &&
125         srcStride == rng2Stride &&
126         srcStride == (ptrdiff_t)(size.width))
127     {
128         size.width *= size.height;
129         size.height = 1;
130     }
131     const size_t width = size.width & ~( 32/sizeof(T) - 1 );
132 
133     for(size_t j = 0; j < size.height; ++j)
134     {
135         const T *  src = internal::getRowPtr( srcBase,  srcStride, j);
136         const T * rng1 = internal::getRowPtr(rng1Base, rng1Stride, j);
137         const T * rng2 = internal::getRowPtr(rng2Base, rng2Stride, j);
138              u8 *  dst = internal::getRowPtr( dstBase,  dstStride, j);
139         size_t i = 0;
140         for( ; i < width; i += 32/sizeof(T) )
141         {
142             internal::prefetch(src + i);
143             internal::prefetch(rng1 + i);
144             internal::prefetch(rng2 + i);
145 
146              vec128  vs = internal::vld1q( src + i);
147              vec128 vr1 = internal::vld1q(rng1 + i);
148              vec128 vr2 = internal::vld1q(rng2 + i);
149             uvec128 vd1 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
150                      vs = internal::vld1q( src + i + 16/sizeof(T));
151                     vr1 = internal::vld1q(rng1 + i + 16/sizeof(T));
152                     vr2 = internal::vld1q(rng2 + i + 16/sizeof(T));
153             uvec128 vd2 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
154             vnst(dst + i, vd1, vd2);
155         }
156         vtail<T, sizeof(T)>::inRange(src, rng1, rng2, dst, i, size.width);
157         for( ; i < size.width; i++ )
158             dst[i] = (u8)(-(rng1[i] <= src[i] && src[i] <= rng2[i]));
159     }
160 }
161 
162 }
163 
164 #define INRANGEFUNC(T)                                       \
165 void inRange(const Size2D &_size,                            \
166              const T * srcBase, ptrdiff_t srcStride,         \
167              const T * rng1Base, ptrdiff_t rng1Stride,       \
168              const T * rng2Base, ptrdiff_t rng2Stride,       \
169              u8 * dstBase, ptrdiff_t dstStride)              \
170 {                                                            \
171     internal::assertSupportedConfiguration();                \
172     inRangeCheck(_size, srcBase, srcStride,                  \
173                  rng1Base, rng1Stride, rng2Base, rng2Stride, \
174                  dstBase, dstStride);                        \
175 }
176 #else
177 #define INRANGEFUNC(T)                                       \
178 void inRange(const Size2D &,                                 \
179              const T *, ptrdiff_t,                           \
180              const T *, ptrdiff_t,                           \
181              const T *, ptrdiff_t,                           \
182              u8 *, ptrdiff_t)                                \
183 {                                                            \
184     internal::assertSupportedConfiguration();                \
185 }
186 #endif
187 
188 INRANGEFUNC(u8)
189 INRANGEFUNC(s8)
190 INRANGEFUNC(u16)
191 INRANGEFUNC(s16)
192 INRANGEFUNC(s32)
193 INRANGEFUNC(f32)
194 
195 } // namespace CAROTENE_NS
196