1 /*
2 * By downloading, copying, installing or using the software you agree to this license.
3 * If you do not agree to this license, do not download, install,
4 * copy or use the software.
5 *
6 *
7 * License Agreement
8 * For Open Source Computer Vision Library
9 * (3-clause BSD License)
10 *
11 * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12 * Third party copyrights are property of their respective owners.
13 *
14 * Redistribution and use in source and binary forms, with or without modification,
15 * are permitted provided that the following conditions are met:
16 *
17 * * Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimer.
19 *
20 * * Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * * Neither the names of the copyright holders nor the names of the contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided by the copyright holders and contributors "as is" and
29 * any express or implied warranties, including, but not limited to, the implied
30 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall copyright holders or contributors be liable for any direct,
32 * indirect, incidental, special, exemplary, or consequential damages
33 * (including, but not limited to, procurement of substitute goods or services;
34 * loss of use, data, or profits; or business interruption) however caused
35 * and on any theory of liability, whether in contract, strict liability,
36 * or tort (including negligence or otherwise) arising in any way out of
37 * the use of this software, even if advised of the possibility of such damage.
38 */
39
40 #include "common.hpp"
41
42 #include "vtransform.hpp"
43
44 namespace CAROTENE_NS {
45
46 #ifdef CAROTENE_NEON
47
48 namespace {
49
vnst(u8 * dst,uint8x16_t v1,uint8x16_t v2)50 inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); }
vnst(u8 * dst,uint16x8_t v1,uint16x8_t v2)51 inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); }
vnst(u8 * dst,uint32x4_t v1,uint32x4_t v2)52 inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); }
53
54 template <typename T, int elsize> struct vtail
55 {
inRangeCAROTENE_NS::__anonbc33ebab0111::vtail56 static inline void inRange(const T *, const T *, const T *,
57 u8 *, size_t &, size_t)
58 {
59 //do nothing since there couldn't be enough data
60 }
61 };
62 template <typename T> struct vtail<T, 2>
63 {
inRangeCAROTENE_NS::__anonbc33ebab0111::vtail64 static inline void inRange(const T * src, const T * rng1, const T * rng2,
65 u8 * dst, size_t &x, size_t width)
66 {
67 typedef typename internal::VecTraits<T>::vec128 vec128;
68 typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
69 //There no more than 15 elements in the tail, so we could handle 8 element vector only once
70 if( x + 8 < width)
71 {
72 vec128 vs = internal::vld1q( src + x);
73 vec128 vr1 = internal::vld1q(rng1 + x);
74 vec128 vr2 = internal::vld1q(rng2 + x);
75 uvec128 vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
76 internal::vst1(dst + x, internal::vmovn(vd));
77 x+=8;
78 }
79 }
80 };
81 template <typename T> struct vtail<T, 1>
82 {
inRangeCAROTENE_NS::__anonbc33ebab0111::vtail83 static inline void inRange(const T * src, const T * rng1, const T * rng2,
84 u8 * dst, size_t &x, size_t width)
85 {
86 typedef typename internal::VecTraits<T>::vec128 vec128;
87 typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
88 typedef typename internal::VecTraits<T>::vec64 vec64;
89 typedef typename internal::VecTraits<T>::unsign::vec64 uvec64;
90 //There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements
91 if( x + 16 < width)
92 {
93 vec128 vs = internal::vld1q( src + x);
94 vec128 vr1 = internal::vld1q(rng1 + x);
95 vec128 vr2 = internal::vld1q(rng2 + x);
96 uvec128 vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
97 internal::vst1q(dst + x, vd);
98 x+=16;
99 }
100 if( x + 8 < width)
101 {
102 vec64 vs = internal::vld1( src + x);
103 vec64 vr1 = internal::vld1(rng1 + x);
104 vec64 vr2 = internal::vld1(rng2 + x);
105 uvec64 vd = internal::vand(internal::vcge(vs, vr1), internal::vcge(vr2, vs));
106 internal::vst1(dst + x, vd);
107 x+=8;
108 }
109 }
110 };
111
112 template <typename T>
inRangeCheck(const Size2D & _size,const T * srcBase,ptrdiff_t srcStride,const T * rng1Base,ptrdiff_t rng1Stride,const T * rng2Base,ptrdiff_t rng2Stride,u8 * dstBase,ptrdiff_t dstStride)113 inline void inRangeCheck(const Size2D &_size,
114 const T * srcBase, ptrdiff_t srcStride,
115 const T * rng1Base, ptrdiff_t rng1Stride,
116 const T * rng2Base, ptrdiff_t rng2Stride,
117 u8 * dstBase, ptrdiff_t dstStride)
118 {
119 typedef typename internal::VecTraits<T>::vec128 vec128;
120 typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
121
122 Size2D size(_size);
123 if (srcStride == dstStride &&
124 srcStride == rng1Stride &&
125 srcStride == rng2Stride &&
126 srcStride == (ptrdiff_t)(size.width))
127 {
128 size.width *= size.height;
129 size.height = 1;
130 }
131 const size_t width = size.width & ~( 32/sizeof(T) - 1 );
132
133 for(size_t j = 0; j < size.height; ++j)
134 {
135 const T * src = internal::getRowPtr( srcBase, srcStride, j);
136 const T * rng1 = internal::getRowPtr(rng1Base, rng1Stride, j);
137 const T * rng2 = internal::getRowPtr(rng2Base, rng2Stride, j);
138 u8 * dst = internal::getRowPtr( dstBase, dstStride, j);
139 size_t i = 0;
140 for( ; i < width; i += 32/sizeof(T) )
141 {
142 internal::prefetch(src + i);
143 internal::prefetch(rng1 + i);
144 internal::prefetch(rng2 + i);
145
146 vec128 vs = internal::vld1q( src + i);
147 vec128 vr1 = internal::vld1q(rng1 + i);
148 vec128 vr2 = internal::vld1q(rng2 + i);
149 uvec128 vd1 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
150 vs = internal::vld1q( src + i + 16/sizeof(T));
151 vr1 = internal::vld1q(rng1 + i + 16/sizeof(T));
152 vr2 = internal::vld1q(rng2 + i + 16/sizeof(T));
153 uvec128 vd2 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
154 vnst(dst + i, vd1, vd2);
155 }
156 vtail<T, sizeof(T)>::inRange(src, rng1, rng2, dst, i, size.width);
157 for( ; i < size.width; i++ )
158 dst[i] = (u8)(-(rng1[i] <= src[i] && src[i] <= rng2[i]));
159 }
160 }
161
162 }
163
164 #define INRANGEFUNC(T) \
165 void inRange(const Size2D &_size, \
166 const T * srcBase, ptrdiff_t srcStride, \
167 const T * rng1Base, ptrdiff_t rng1Stride, \
168 const T * rng2Base, ptrdiff_t rng2Stride, \
169 u8 * dstBase, ptrdiff_t dstStride) \
170 { \
171 internal::assertSupportedConfiguration(); \
172 inRangeCheck(_size, srcBase, srcStride, \
173 rng1Base, rng1Stride, rng2Base, rng2Stride, \
174 dstBase, dstStride); \
175 }
176 #else
177 #define INRANGEFUNC(T) \
178 void inRange(const Size2D &, \
179 const T *, ptrdiff_t, \
180 const T *, ptrdiff_t, \
181 const T *, ptrdiff_t, \
182 u8 *, ptrdiff_t) \
183 { \
184 internal::assertSupportedConfiguration(); \
185 }
186 #endif
187
188 INRANGEFUNC(u8)
189 INRANGEFUNC(s8)
190 INRANGEFUNC(u16)
191 INRANGEFUNC(s16)
192 INRANGEFUNC(s32)
193 INRANGEFUNC(f32)
194
195 } // namespace CAROTENE_NS
196