1 /*
2 * By downloading, copying, installing or using the software you agree to this license.
3 * If you do not agree to this license, do not download, install,
4 * copy or use the software.
5 *
6 *
7 * License Agreement
8 * For Open Source Computer Vision Library
9 * (3-clause BSD License)
10 *
11 * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12 * Third party copyrights are property of their respective owners.
13 *
14 * Redistribution and use in source and binary forms, with or without modification,
15 * are permitted provided that the following conditions are met:
16 *
17 * * Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimer.
19 *
20 * * Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * * Neither the names of the copyright holders nor the names of the contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided by the copyright holders and contributors "as is" and
29 * any express or implied warranties, including, but not limited to, the implied
30 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall copyright holders or contributors be liable for any direct,
32 * indirect, incidental, special, exemplary, or consequential damages
33 * (including, but not limited to, procurement of substitute goods or services;
34 * loss of use, data, or profits; or business interruption) however caused
35 * and on any theory of liability, whether in contract, strict liability,
36 * or tort (including negligence or otherwise) arising in any way out of
37 * the use of this software, even if advised of the possibility of such damage.
38 */
39
40
41 /* This is FAST corner detector, contributed to OpenCV by the author, Edward Rosten.
42 Below is the original copyright and the references */
43
44 /*
45 Copyright (c) 2006, 2008 Edward Rosten
46 All rights reserved.
47
48 Redistribution and use in source and binary forms, with or without
49 modification, are permitted provided that the following conditions
50 are met:
51
52 *Redistributions of source code must retain the above copyright
53 notice, this list of conditions and the following disclaimer.
54
55 *Redistributions in binary form must reproduce the above copyright
56 notice, this list of conditions and the following disclaimer in the
57 documentation and/or other materials provided with the distribution.
58
59 *Neither the name of the University of Cambridge nor the names of
60 its contributors may be used to endorse or promote products derived
61 from this software without specific prior written permission.
62
63 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
64 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
65 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
66 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
67 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
68 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
69 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
70 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
71 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
72 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
73 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
74 */
75
76 /*
77 The references are:
78 * Machine learning for high-speed corner detection,
79 E. Rosten and T. Drummond, ECCV 2006
80 * Faster and better: A machine learning approach to corner detection
81 E. Rosten, R. Porter and T. Drummond, PAMI, 2009
82 */
83
84 #include "common.hpp"
85
86 #include <vector>
87 #include <cstring>
88
89 namespace CAROTENE_NS {
90
91 #ifdef CAROTENE_NEON
92 namespace
93 {
94
makeOffsets(ptrdiff_t pixel[],ptrdiff_t row_stride)95 void makeOffsets(ptrdiff_t pixel[], ptrdiff_t row_stride)
96 {
97 pixel[0] = 0 + row_stride * 3;
98 pixel[1] = 1 + row_stride * 3;
99 pixel[2] = 2 + row_stride * 2;
100 pixel[3] = 3 + row_stride * 1;
101 pixel[4] = 3 + row_stride * 0;
102 pixel[5] = 3 + row_stride * -1;
103 pixel[6] = 2 + row_stride * -2;
104 pixel[7] = 1 + row_stride * -3;
105 pixel[8] = 0 + row_stride * -3;
106 pixel[9] = -1 + row_stride * -3;
107 pixel[10] = -2 + row_stride * -2;
108 pixel[11] = -3 + row_stride * -1;
109 pixel[12] = -3 + row_stride * 0;
110 pixel[13] = -3 + row_stride * 1;
111 pixel[14] = -2 + row_stride * 2;
112 pixel[15] = -1 + row_stride * 3;
113 }
114
cornerScore(const u8 * ptr,const ptrdiff_t pixel[])115 u8 cornerScore(const u8* ptr, const ptrdiff_t pixel[])
116 {
117 const s32 K = 8, N = 16 + K + 1;
118 s32 k, v = ptr[0];
119 s16 d[(N + 7) & ~7];
120 for( k = 0; k < N; k++ )
121 d[k] = (s16)(v - ptr[pixel[k]]);
122
123 int16x8_t q0 = vdupq_n_s16((s16)(-1000));
124 int16x8_t q1 = vdupq_n_s16((s16)(1000));
125
126 int16x8_t d0_7 = vld1q_s16(d + 0);
127 int16x8_t d8_15 = vld1q_s16(d + 8);
128 int16x8_t d16_23 = vld1q_s16(d + 16);
129 int16x8_t d24 = vld1q_s16(d + 24);
130
131 //k == 0
132 int16x8_t v0k0 = vextq_s16(d0_7, d8_15, 1);
133 int16x8_t v1k0 = vextq_s16(d0_7, d8_15, 2);
134 int16x8_t ak0 = vminq_s16(v0k0, v1k0);
135 int16x8_t bk0 = vmaxq_s16(v0k0, v1k0);
136
137 v0k0 = vextq_s16(d0_7, d8_15, 3);
138 ak0 = vminq_s16(ak0, v0k0);
139 bk0 = vmaxq_s16(bk0, v0k0);
140
141 v1k0 = vextq_s16(d0_7, d8_15, 4);
142 ak0 = vminq_s16(ak0, v1k0);
143 bk0 = vmaxq_s16(bk0, v1k0);
144
145 v0k0 = vextq_s16(d0_7, d8_15, 5);
146 ak0 = vminq_s16(ak0, v0k0);
147 bk0 = vmaxq_s16(bk0, v0k0);
148
149 v1k0 = vextq_s16(d0_7, d8_15, 6);
150 ak0 = vminq_s16(ak0, v1k0);
151 bk0 = vmaxq_s16(bk0, v1k0);
152
153 v0k0 = vextq_s16(d0_7, d8_15, 7);
154 ak0 = vminq_s16(ak0, v0k0);
155 bk0 = vmaxq_s16(bk0, v0k0);
156
157 ak0 = vminq_s16(ak0, d8_15);
158 bk0 = vmaxq_s16(bk0, d8_15);
159
160 q0 = vmaxq_s16(q0, vminq_s16(ak0, d0_7));
161 q1 = vminq_s16(q1, vmaxq_s16(bk0, d0_7));
162
163 v1k0 = vextq_s16(d8_15, d16_23, 1);
164 q0 = vmaxq_s16(q0, vminq_s16(ak0, v1k0));
165 q1 = vminq_s16(q1, vmaxq_s16(bk0, v1k0));
166
167 //k == 8
168 int16x8_t v0k8 = v1k0;
169 int16x8_t v1k8 = vextq_s16(d8_15, d16_23, 2);
170 int16x8_t ak8 = vminq_s16(v0k8, v1k8);
171 int16x8_t bk8 = vmaxq_s16(v0k8, v1k8);
172
173 v0k8 = vextq_s16(d8_15, d16_23, 3);
174 ak8 = vminq_s16(ak8, v0k8);
175 bk8 = vmaxq_s16(bk8, v0k8);
176
177 v1k8 = vextq_s16(d8_15, d16_23, 4);
178 ak8 = vminq_s16(ak8, v1k8);
179 bk8 = vmaxq_s16(bk8, v1k8);
180
181 v0k8 = vextq_s16(d8_15, d16_23, 5);
182 ak8 = vminq_s16(ak8, v0k8);
183 bk8 = vmaxq_s16(bk8, v0k8);
184
185 v1k8 = vextq_s16(d8_15, d16_23, 6);
186 ak8 = vminq_s16(ak8, v1k8);
187 bk8 = vmaxq_s16(bk8, v1k8);
188
189 v0k8 = vextq_s16(d8_15, d16_23, 7);
190 ak8 = vminq_s16(ak8, v0k8);
191 bk8 = vmaxq_s16(bk8, v0k8);
192
193 ak8 = vminq_s16(ak8, d16_23);
194 bk8 = vmaxq_s16(bk8, d16_23);
195
196 q0 = vmaxq_s16(q0, vminq_s16(ak8, d8_15));
197 q1 = vminq_s16(q1, vmaxq_s16(bk8, d8_15));
198
199 v1k8 = vextq_s16(d16_23, d24, 1);
200 q0 = vmaxq_s16(q0, vminq_s16(ak8, v1k8));
201 q1 = vminq_s16(q1, vmaxq_s16(bk8, v1k8));
202
203 //fin
204 int16x8_t q = vmaxq_s16(q0, vsubq_s16(vmovq_n_s16(0), q1));
205 int16x4_t q2 = vmax_s16(vget_low_s16(q), vget_high_s16(q));
206 int32x4_t q2w = vmovl_s16(q2);
207 int32x2_t q4 = vmax_s32(vget_low_s32(q2w), vget_high_s32(q2w));
208 int32x2_t q8 = vmax_s32(q4, vreinterpret_s32_s64(vshr_n_s64(vreinterpret_s64_s32(q4), 32)));
209
210 return (u8)(vget_lane_s32(q8, 0) - 1);
211 }
212
213 } //namespace
214 #endif
215
FAST(const Size2D & size,u8 * srcBase,ptrdiff_t srcStride,KeypointStore * keypoints,u8 threshold,bool nonmax_suppression)216 void FAST(const Size2D &size,
217 u8 *srcBase, ptrdiff_t srcStride,
218 KeypointStore *keypoints,
219 u8 threshold, bool nonmax_suppression)
220 {
221 internal::assertSupportedConfiguration();
222 #ifdef CAROTENE_NEON
223 //keypoints.clear();
224
225 const s32 K = 8, N = 16 + K + 1;
226 ptrdiff_t i, j, k, pixel[N];
227 makeOffsets(pixel, srcStride);
228 for(k = 16; k < N; k++)
229 pixel[k] = pixel[k - 16];
230
231 uint8x16_t delta = vdupq_n_u8(128);
232 uint8x16_t t = vdupq_n_u8(threshold);
233 uint8x16_t K16 = vdupq_n_u8((u8)K);
234
235 u8 threshold_tab[512];
236 for( i = -255; i <= 255; i++ )
237 threshold_tab[i+255] = (u8)(i < -threshold ? 1 : i > threshold ? 2 : 0);
238
239 std::vector<u8> _buf((size.width+16)*3*(sizeof(ptrdiff_t) + sizeof(u8)) + 128);
240 u8* buf[3];
241 buf[0] = &_buf[0]; buf[1] = buf[0] + size.width; buf[2] = buf[1] + size.width;
242 ptrdiff_t* cpbuf[3];
243 cpbuf[0] = (ptrdiff_t*)internal::alignPtr(buf[2] + size.width, sizeof(ptrdiff_t)) + 1;
244 cpbuf[1] = cpbuf[0] + size.width + 1;
245 cpbuf[2] = cpbuf[1] + size.width + 1;
246 memset(buf[0], 0, size.width*3);
247
248 for(i = 3; i < (ptrdiff_t)size.height-2; i++)
249 {
250 const u8* ptr = internal::getRowPtr(srcBase, srcStride, i) + 3;
251 u8* curr = buf[(i - 3)%3];
252 ptrdiff_t* cornerpos = cpbuf[(i - 3)%3];
253 memset(curr, 0, size.width);
254 ptrdiff_t ncorners = 0;
255
256 if( i < (ptrdiff_t)size.height - 3 )
257 {
258 j = 3;
259
260 for(; j < (ptrdiff_t)size.width - 16 - 3; j += 16, ptr += 16)
261 {
262 internal::prefetch(ptr);
263 internal::prefetch(ptr + pixel[0]);
264 internal::prefetch(ptr + pixel[2]);
265
266 uint8x16_t v0 = vld1q_u8(ptr);
267 int8x16_t v1 = vreinterpretq_s8_u8(veorq_u8(vqsubq_u8(v0, t), delta));
268 int8x16_t v2 = vreinterpretq_s8_u8(veorq_u8(vqaddq_u8(v0, t), delta));
269
270 int8x16_t x0 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[0]), delta));
271 int8x16_t x1 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[4]), delta));
272 int8x16_t x2 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[8]), delta));
273 int8x16_t x3 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[12]), delta));
274
275 uint8x16_t m0 = vandq_u8(vcgtq_s8(x0, v2), vcgtq_s8(x1, v2));
276 uint8x16_t m1 = vandq_u8(vcgtq_s8(v1, x0), vcgtq_s8(v1, x1));
277 m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x1, v2), vcgtq_s8(x2, v2)));
278 m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x1), vcgtq_s8(v1, x2)));
279 m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x2, v2), vcgtq_s8(x3, v2)));
280 m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x2), vcgtq_s8(v1, x3)));
281 m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x3, v2), vcgtq_s8(x0, v2)));
282 m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x3), vcgtq_s8(v1, x0)));
283 m0 = vorrq_u8(m0, m1);
284
285 u64 mask[2];
286 vst1q_u64(mask, vreinterpretq_u64_u8(m0));
287
288 if( mask[0] == 0 )
289 {
290 if (mask[1] != 0)
291 {
292 j -= 8;
293 ptr -= 8;
294 }
295 continue;
296 }
297
298 uint8x16_t c0 = vmovq_n_u8(0);
299 uint8x16_t c1 = vmovq_n_u8(0);
300 uint8x16_t max0 = vmovq_n_u8(0);
301 uint8x16_t max1 = vmovq_n_u8(0);
302 for( k = 0; k < N; k++ )
303 {
304 int8x16_t x = vreinterpretq_s8_u8(veorq_u8(vld1q_u8(ptr + pixel[k]), delta));
305 m0 = vcgtq_s8(x, v2);
306 m1 = vcgtq_s8(v1, x);
307
308 c0 = vandq_u8(vsubq_u8(c0, m0), m0);
309 c1 = vandq_u8(vsubq_u8(c1, m1), m1);
310
311 max0 = vmaxq_u8(max0, c0);
312 max1 = vmaxq_u8(max1, c1);
313 }
314
315 max0 = vmaxq_u8(max0, max1);
316 u8 m[16];
317 vst1q_u8(m, vcgtq_u8(max0, K16));
318
319 for( k = 0; k < 16; ++k )
320 if(m[k])
321 {
322 cornerpos[ncorners++] = j+k;
323 if(nonmax_suppression)
324 curr[j+k] = cornerScore(ptr+k, pixel);
325 }
326 }
327
328 for( ; j < (s32)size.width - 3; j++, ptr++ )
329 {
330 s32 v = ptr[0];
331 const u8* tab = &threshold_tab[0] - v + 255;
332 s32 d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];
333
334 if( d == 0 )
335 continue;
336
337 d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];
338 d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];
339 d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];
340
341 if( d == 0 )
342 continue;
343
344 d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
345 d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
346 d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
347 d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];
348
349 if( d & 1 )
350 {
351 s32 vt = v - threshold, count = 0;
352
353 for( k = 0; k < N; k++ )
354 {
355 s32 x = ptr[pixel[k]];
356 if(x < vt)
357 {
358 if( ++count > K )
359 {
360 cornerpos[ncorners++] = j;
361 if(nonmax_suppression)
362 curr[j] = cornerScore(ptr, pixel);
363 break;
364 }
365 }
366 else
367 count = 0;
368 }
369 }
370
371 if( d & 2 )
372 {
373 s32 vt = v + threshold, count = 0;
374
375 for( k = 0; k < N; k++ )
376 {
377 s32 x = ptr[pixel[k]];
378 if(x > vt)
379 {
380 if( ++count > K )
381 {
382 cornerpos[ncorners++] = j;
383 if(nonmax_suppression)
384 curr[j] = cornerScore(ptr, pixel);
385 break;
386 }
387 }
388 else
389 count = 0;
390 }
391 }
392 }
393 }
394
395 cornerpos[-1] = ncorners;
396
397 if( i == 3 )
398 continue;
399
400 const u8* prev = buf[(i - 4 + 3)%3];
401 const u8* pprev = buf[(i - 5 + 3)%3];
402 cornerpos = cpbuf[(i - 4 + 3)%3];
403 ncorners = cornerpos[-1];
404
405 for( k = 0; k < ncorners; k++ )
406 {
407 j = cornerpos[k];
408 s32 score = prev[j];
409 if( !nonmax_suppression ||
410 (score > prev[j+1] && score > prev[j-1] &&
411 score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] &&
412 score > curr[j-1] && score > curr[j] && score > curr[j+1]) )
413 {
414 keypoints->push((f32)j, (f32)(i-1), 7.f, -1, (f32)score);
415 }
416 }
417 }
418 #else
419 (void)size;
420 (void)srcBase;
421 (void)srcStride;
422 (void)keypoints;
423 (void)threshold;
424 (void)nonmax_suppression;
425 #endif
426 }
427
428 } // namespace CAROTENE_NS
429