• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * Copyright (C) 2000, Intel Corporation, all rights reserved.
19 * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
20 * Third party copyrights are property of their respective owners.
21 *
22 * Redistribution and use in source and binary forms, with or without modification,
23 * are permitted provided that the following conditions are met:
24 *
25 *   * Redistribution's of source code must retain the above copyright notice,
26 *     this list of conditions and the following disclaimer.
27 *
28 *   * Redistribution's in binary form must reproduce the above copyright notice,
29 *     this list of conditions and the following disclaimer in the documentation
30 *     and/or other materials provided with the distribution.
31 *
32 *   * The name of the copyright holders may not be used to endorse or promote products
33 *     derived from this software without specific prior written permission.
34 *
35 * This software is provided by the copyright holders and contributors "as is" and
36 * any express or implied warranties, including, but not limited to, the implied
37 * warranties of merchantability and fitness for a particular purpose are disclaimed.
38 * In no event shall the Intel Corporation or contributors be liable for any direct,
39 * indirect, incidental, special, exemplary, or consequential damages
40 * (including, but not limited to, procurement of substitute goods or services;
41 * loss of use, data, or profits; or business interruption) however caused
42 * and on any theory of liability, whether in contract, strict liability,
43 * or tort (including negligence or otherwise) arising in any way out of
44 * the use of this software, even if advised of the possibility of such damage.
45 */
46
47#define HARRIS_THRESHOLD 3.0f
48// Block size over which to compute harris response
49//
50// Note that changing this will require fiddling with the local array sizes in
51// harris_response
52#define HARRIS_RADIUS 2
53#define DISTANCE_THRESHOLD 80
54
55// Sub-pixel refinement window for feature points
56#define REFINE_WIN_HALF_W 5
57#define REFINE_WIN_HALF_H 5
58#define REFINE_WIN_W 11 // REFINE_WIN_HALF_W * 2 + 1
59#define REFINE_WIN_H 11
60
61// Non-maximum suppression window size
62#define NONMAX_WIN 30
63#define NONMAX_WIN_HALF 15 // NONMAX_WIN / 2
64
65typedef struct PointPair {
66    // Previous frame
67    float2 p1;
68    // Current frame
69    float2 p2;
70} PointPair;
71
72typedef struct SmoothedPointPair {
73    // Non-smoothed point in current frame
74    int2 p1;
75    // Smoothed point in current frame
76    float2 p2;
77} SmoothedPointPair;
78
79typedef struct MotionVector {
80    PointPair p;
81    // Used to mark vectors as potential outliers
82    int should_consider;
83} MotionVector;
84
85const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
86                          CLK_ADDRESS_CLAMP_TO_EDGE |
87                          CLK_FILTER_NEAREST;
88
89const sampler_t sampler_linear = CLK_NORMALIZED_COORDS_FALSE |
90                          CLK_ADDRESS_CLAMP_TO_EDGE |
91                          CLK_FILTER_LINEAR;
92
93const sampler_t sampler_linear_mirror = CLK_NORMALIZED_COORDS_TRUE |
94                          CLK_ADDRESS_MIRRORED_REPEAT |
95                          CLK_FILTER_LINEAR;
96
97// Writes to a 1D array at loc, treating it as a 2D array with the same
98// dimensions as the global work size.
99static void write_to_1d_arrf(__global float *buf, int2 loc, float val) {
100    buf[loc.x + loc.y * get_global_size(0)] = val;
101}
102
103static void write_to_1d_arrul8(__global ulong8 *buf, int2 loc, ulong8 val) {
104    buf[loc.x + loc.y * get_global_size(0)] = val;
105}
106
107static void write_to_1d_arrvec(__global MotionVector *buf, int2 loc, MotionVector val) {
108    buf[loc.x + loc.y * get_global_size(0)] = val;
109}
110
111static void write_to_1d_arrf2(__global float2 *buf, int2 loc, float2 val) {
112    buf[loc.x + loc.y * get_global_size(0)] = val;
113}
114
115static ulong8 read_from_1d_arrul8(__global const ulong8 *buf, int2 loc) {
116    return buf[loc.x + loc.y * get_global_size(0)];
117}
118
119static float2 read_from_1d_arrf2(__global const float2 *buf, int2 loc) {
120    return buf[loc.x + loc.y * get_global_size(0)];
121}
122
123// Returns the grayscale value at the given point.
124static float pixel_grayscale(__read_only image2d_t src, int2 loc) {
125    float4 pixel = read_imagef(src, sampler, loc);
126    return (pixel.x + pixel.y + pixel.z) / 3.0f;
127}
128
129static float convolve(
130    __local const float *grayscale,
131    int local_idx_x,
132    int local_idx_y,
133    float mask[3][3]
134) {
135    float ret = 0;
136
137    // These loops touch each pixel surrounding loc as well as loc itself
138    for (int i = 1, i2 = 0; i >= -1; --i, ++i2) {
139        for (int j = -1, j2 = 0; j <= 1; ++j, ++j2) {
140            ret += mask[i2][j2] * grayscale[(local_idx_x + 3 + j) + (local_idx_y + 3 + i) * 14];
141        }
142    }
143
144    return ret;
145}
146
147// Sums dx * dy for all pixels within radius of loc
148static float sum_deriv_prod(
149    __local const float *grayscale,
150    float mask_x[3][3],
151    float mask_y[3][3]
152) {
153    float ret = 0;
154
155    for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {
156        for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {
157            ret += convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_x) *
158                   convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_y);
159        }
160    }
161
162    return ret;
163}
164
165// Sums d<>^2 (determined by mask) for all pixels within radius of loc
166static float sum_deriv_pow(__local const float *grayscale, float mask[3][3])
167{
168    float ret = 0;
169
170    for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {
171        for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {
172            float deriv = convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask);
173            ret += deriv * deriv;
174        }
175    }
176
177    return ret;
178}
179
180// Fills a box with the given radius and pixel around loc
181static void draw_box(__write_only image2d_t dst, int2 loc, float4 pixel, int radius)
182{
183    for (int i = -radius; i <= radius; ++i) {
184        for (int j = -radius; j <= radius; ++j) {
185            write_imagef(
186                dst,
187                (int2)(
188                    // Clamp to avoid writing outside image bounds
189                    clamp(loc.x + i, 0, get_image_dim(dst).x - 1),
190                    clamp(loc.y + j, 0, get_image_dim(dst).y - 1)
191                ),
192                pixel
193            );
194        }
195    }
196}
197
198// Converts the src image to grayscale
199__kernel void grayscale(
200    __read_only image2d_t src,
201    __write_only image2d_t grayscale
202) {
203    int2 loc = (int2)(get_global_id(0), get_global_id(1));
204    write_imagef(grayscale, loc, (float4)(pixel_grayscale(src, loc), 0.0f, 0.0f, 1.0f));
205}
206
207// This kernel computes the harris response for the given grayscale src image
208// within the given radius and writes it to harris_buf
209__kernel void harris_response(
210    __read_only image2d_t grayscale,
211    __global float *harris_buf
212) {
213    int2 loc = (int2)(get_global_id(0), get_global_id(1));
214
215    if (loc.x > get_image_width(grayscale) - 1 || loc.y > get_image_height(grayscale) - 1) {
216        write_to_1d_arrf(harris_buf, loc, 0);
217        return;
218    }
219
220    float scale = 1.0f / ((1 << 2) * HARRIS_RADIUS * 255.0f);
221
222    float sobel_mask_x[3][3] = {
223        {-1, 0, 1},
224        {-2, 0, 2},
225        {-1, 0, 1}
226    };
227
228    float sobel_mask_y[3][3] = {
229        { 1,  2,  1},
230        { 0,  0,  0},
231        {-1, -2, -1}
232    };
233
234    // 8 x 8 local work + 3 pixels around each side (needed to accomodate for the
235    // block size radius of 2)
236    __local float grayscale_data[196];
237
238    int idx = get_group_id(0) * get_local_size(0);
239    int idy = get_group_id(1) * get_local_size(1);
240
241    for (int i = idy - 3, it = 0; i < idy + (int)get_local_size(1) + 3; i++, it++) {
242        for (int j = idx - 3, jt = 0; j < idx + (int)get_local_size(0) + 3; j++, jt++) {
243            grayscale_data[jt + it * 14] = read_imagef(grayscale, sampler, (int2)(j, i)).x;
244        }
245    }
246
247    barrier(CLK_LOCAL_MEM_FENCE);
248
249    float sumdxdy = sum_deriv_prod(grayscale_data, sobel_mask_x, sobel_mask_y);
250    float sumdx2 = sum_deriv_pow(grayscale_data, sobel_mask_x);
251    float sumdy2 = sum_deriv_pow(grayscale_data, sobel_mask_y);
252
253    float trace = sumdx2 + sumdy2;
254    // r = det(M) - k(trace(M))^2
255    // k usually between 0.04 to 0.06
256    float r = (sumdx2 * sumdy2 - sumdxdy * sumdxdy) - 0.04f * (trace * trace) * pown(scale, 4);
257
258    // Threshold the r value
259    harris_buf[loc.x + loc.y * get_image_width(grayscale)] = r * step(HARRIS_THRESHOLD, r);
260}
261
262// Gets a patch centered around a float coordinate from a grayscale image using
263// bilinear interpolation
264static void get_rect_sub_pix(
265    __read_only image2d_t grayscale,
266    float *buffer,
267    int size_x,
268    int size_y,
269    float2 center
270) {
271    float2 offset = ((float2)(size_x, size_y) - 1.0f) * 0.5f;
272
273    for (int i = 0; i < size_y; i++) {
274        for (int j = 0; j < size_x; j++) {
275            buffer[i * size_x + j] = read_imagef(
276                grayscale,
277                sampler_linear,
278                (float2)(j, i) + center - offset
279            ).x * 255.0f;
280        }
281    }
282}
283
284// Refines detected features at a sub-pixel level
285//
286// This function is ported from OpenCV
287static float2 corner_sub_pix(
288    __read_only image2d_t grayscale,
289    float2 feature,
290    float *mask
291) {
292    float2 init = feature;
293    int src_width = get_global_size(0);
294    int src_height = get_global_size(1);
295
296    const int max_iters = 40;
297    const float eps = 0.001f * 0.001f;
298    int i, j, k;
299
300    int iter = 0;
301    float err = 0;
302    float subpix[(REFINE_WIN_W + 2) * (REFINE_WIN_H + 2)];
303    const float flt_epsilon = 0x1.0p-23f;
304
305    do {
306        float2 feature_tmp;
307        float a = 0, b = 0, c = 0, bb1 = 0, bb2 = 0;
308
309        get_rect_sub_pix(grayscale, subpix, REFINE_WIN_W + 2, REFINE_WIN_H + 2, feature);
310        float *subpix_ptr = subpix;
311        subpix_ptr += REFINE_WIN_W + 2 + 1;
312
313        // process gradient
314        for (i = 0, k = 0; i < REFINE_WIN_H; i++, subpix_ptr += REFINE_WIN_W + 2) {
315            float py = i - REFINE_WIN_HALF_H;
316
317            for (j = 0; j < REFINE_WIN_W; j++, k++) {
318                float m = mask[k];
319                float tgx = subpix_ptr[j + 1] - subpix_ptr[j - 1];
320                float tgy = subpix_ptr[j + REFINE_WIN_W + 2] - subpix_ptr[j - REFINE_WIN_W - 2];
321                float gxx = tgx * tgx * m;
322                float gxy = tgx * tgy * m;
323                float gyy = tgy * tgy * m;
324                float px = j - REFINE_WIN_HALF_W;
325
326                a += gxx;
327                b += gxy;
328                c += gyy;
329
330                bb1 += gxx * px + gxy * py;
331                bb2 += gxy * px + gyy * py;
332            }
333        }
334
335        float det = a * c - b * b;
336        if (fabs(det) <= flt_epsilon * flt_epsilon) {
337            break;
338        }
339
340        // 2x2 matrix inversion
341        float scale = 1.0f / det;
342        feature_tmp.x = (float)(feature.x + (c * scale * bb1) - (b * scale * bb2));
343        feature_tmp.y = (float)(feature.y - (b * scale * bb1) + (a * scale * bb2));
344        err = dot(feature_tmp - feature, feature_tmp - feature);
345
346        feature = feature_tmp;
347        if (feature.x < 0 || feature.x >= src_width || feature.y < 0 || feature.y >= src_height) {
348            break;
349        }
350    } while (++iter < max_iters && err > eps);
351
352    // Make sure new point isn't too far from the initial point (indicates poor convergence)
353    if (fabs(feature.x - init.x) > REFINE_WIN_HALF_W || fabs(feature.y - init.y) > REFINE_WIN_HALF_H) {
354        feature = init;
355    }
356
357    return feature;
358}
359
360// Performs non-maximum suppression on the harris response and writes the resulting
361// feature locations to refined_features.
362//
363// Assumes that refined_features and the global work sizes are set up such that the image
364// is split up into a grid of 32x32 blocks where each block has a single slot in the
365// refined_features buffer. This kernel finds the best corner in each block (if the
366// block has any) and writes it to the corresponding slot in the buffer.
367//
368// If subpixel_refine is true, the features are additionally refined at a sub-pixel
369// level for increased precision.
370__kernel void refine_features(
371    __read_only image2d_t grayscale,
372    __global const float *harris_buf,
373    __global float2 *refined_features,
374    int subpixel_refine
375) {
376    int2 loc = (int2)(get_global_id(0), get_global_id(1));
377    // The location in the grayscale buffer rather than the compacted grid
378    int2 loc_i = (int2)(loc.x * 32, loc.y * 32);
379
380    float new_val;
381    float max_val = 0;
382    float2 loc_max = (float2)(-1, -1);
383
384    int end_x = min(loc_i.x + 32, (int)get_image_dim(grayscale).x - 1);
385    int end_y = min(loc_i.y + 32, (int)get_image_dim(grayscale).y - 1);
386
387    for (int i = loc_i.x; i < end_x; ++i) {
388        for (int j = loc_i.y; j < end_y; ++j) {
389            new_val = harris_buf[i + j * get_image_dim(grayscale).x];
390
391            if (new_val > max_val) {
392                max_val = new_val;
393                loc_max = (float2)(i, j);
394            }
395        }
396    }
397
398    if (max_val == 0) {
399        // There are no features in this part of the frame
400        write_to_1d_arrf2(refined_features, loc, loc_max);
401        return;
402    }
403
404    if (subpixel_refine) {
405        float mask[REFINE_WIN_H * REFINE_WIN_W];
406        for (int i = 0; i < REFINE_WIN_H; i++) {
407            float y = (float)(i - REFINE_WIN_HALF_H) / REFINE_WIN_HALF_H;
408            float vy = exp(-y * y);
409
410            for (int j = 0; j < REFINE_WIN_W; j++) {
411                float x = (float)(j - REFINE_WIN_HALF_W) / REFINE_WIN_HALF_W;
412                mask[i * REFINE_WIN_W + j] = (float)(vy * exp(-x * x));
413            }
414        }
415
416        loc_max = corner_sub_pix(grayscale, loc_max, mask);
417    }
418
419    write_to_1d_arrf2(refined_features, loc, loc_max);
420}
421
422// Extracts BRIEF descriptors from the grayscale src image for the given features
423// using the provided sampler.
424__kernel void brief_descriptors(
425    __read_only image2d_t grayscale,
426    __global const float2 *refined_features,
427    // for 512 bit descriptors
428    __global ulong8 *desc_buf,
429    __global const PointPair *brief_pattern
430) {
431    int2 loc = (int2)(get_global_id(0), get_global_id(1));
432    float2 feature = read_from_1d_arrf2(refined_features, loc);
433
434    // There was no feature in this part of the frame
435    if (feature.x == -1) {
436        write_to_1d_arrul8(desc_buf, loc, (ulong8)(0));
437        return;
438    }
439
440    ulong8 desc = 0;
441    ulong *p = &desc;
442
443    for (int i = 0; i < 8; ++i) {
444        for (int j = 0; j < 64; ++j) {
445            PointPair pair = brief_pattern[j * (i + 1)];
446            float l1 = read_imagef(grayscale, sampler_linear, feature + pair.p1).x;
447            float l2 = read_imagef(grayscale, sampler_linear, feature + pair.p2).x;
448
449            if (l1 < l2) {
450                p[i] |= 1UL << j;
451            }
452        }
453    }
454
455    write_to_1d_arrul8(desc_buf, loc, desc);
456}
457
458// Given buffers with descriptors for the current and previous frame, determines
459// which ones match, writing correspondences to matches_buf.
460//
461// Feature and descriptor buffers are assumed to be compacted (each element sourced
462// from a 32x32 block in the frame being processed).
463__kernel void match_descriptors(
464    __global const float2 *prev_refined_features,
465    __global const float2 *refined_features,
466    __global const ulong8 *desc_buf,
467    __global const ulong8 *prev_desc_buf,
468    __global MotionVector *matches_buf
469) {
470    int2 loc = (int2)(get_global_id(0), get_global_id(1));
471    ulong8 desc = read_from_1d_arrul8(desc_buf, loc);
472    const int search_radius = 3;
473
474    MotionVector invalid_vector = (MotionVector) {
475        (PointPair) {
476            (float2)(-1, -1),
477            (float2)(-1, -1)
478        },
479        0
480    };
481
482    if (desc.s0 == 0 && desc.s1 == 0) {
483        // There was no feature in this part of the frame
484        write_to_1d_arrvec(
485            matches_buf,
486            loc,
487            invalid_vector
488        );
489        return;
490    }
491
492    int2 start = max(loc - search_radius, 0);
493    int2 end = min(loc + search_radius, (int2)(get_global_size(0) - 1, get_global_size(1) - 1));
494
495    for (int i = start.x; i < end.x; ++i) {
496        for (int j = start.y; j < end.y; ++j) {
497            int2 prev_point = (int2)(i, j);
498            int total_dist = 0;
499
500            ulong8 prev_desc = read_from_1d_arrul8(prev_desc_buf, prev_point);
501
502            if (prev_desc.s0 == 0 && prev_desc.s1 == 0) {
503                continue;
504            }
505
506            ulong *prev_desc_p = &prev_desc;
507            ulong *desc_p = &desc;
508
509            for (int i = 0; i < 8; i++) {
510                total_dist += popcount(desc_p[i] ^ prev_desc_p[i]);
511            }
512
513            if (total_dist < DISTANCE_THRESHOLD) {
514                write_to_1d_arrvec(
515                    matches_buf,
516                    loc,
517                    (MotionVector) {
518                        (PointPair) {
519                            read_from_1d_arrf2(prev_refined_features, prev_point),
520                            read_from_1d_arrf2(refined_features, loc)
521                        },
522                        1
523                    }
524                );
525
526                return;
527            }
528        }
529    }
530
531    // There is no found match for this point
532    write_to_1d_arrvec(
533        matches_buf,
534        loc,
535        invalid_vector
536    );
537}
538
539// Returns the position of the given point after the transform is applied
540static float2 transformed_point(float2 p, __global const float *transform) {
541    float2 ret;
542
543    ret.x = p.x * transform[0] + p.y * transform[1] + transform[2];
544    ret.y = p.x * transform[3] + p.y * transform[4] + transform[5];
545
546    return ret;
547}
548
549
550// Performs the given transform on the src image
551__kernel void transform(
552    __read_only image2d_t src,
553    __write_only image2d_t dst,
554    __global const float *transform
555) {
556    int2 loc = (int2)(get_global_id(0), get_global_id(1));
557    float2 norm = convert_float2(get_image_dim(src));
558
559    write_imagef(
560        dst,
561        loc,
562        read_imagef(
563            src,
564            sampler_linear_mirror,
565            transformed_point((float2)(loc.x, loc.y), transform) / norm
566        )
567    );
568}
569
570// Returns the new location of the given point using the given crop bounding box
571// and the width and height of the original frame.
572static float2 cropped_point(
573    float2 p,
574    float2 top_left,
575    float2 bottom_right,
576    int2 orig_dim
577) {
578    float2 ret;
579
580    float crop_width  = bottom_right.x - top_left.x;
581    float crop_height = bottom_right.y - top_left.y;
582
583    float width_norm = p.x / (float)orig_dim.x;
584    float height_norm = p.y / (float)orig_dim.y;
585
586    ret.x = (width_norm * crop_width) + top_left.x;
587    ret.y = (height_norm * crop_height) + ((float)orig_dim.y - bottom_right.y);
588
589    return ret;
590}
591
592// Upscales the given cropped region to the size of the original frame
593__kernel void crop_upscale(
594    __read_only image2d_t src,
595    __write_only image2d_t dst,
596    float2 top_left,
597    float2 bottom_right
598) {
599    int2 loc = (int2)(get_global_id(0), get_global_id(1));
600
601    write_imagef(
602        dst,
603        loc,
604        read_imagef(
605            src,
606            sampler_linear,
607            cropped_point((float2)(loc.x, loc.y), top_left, bottom_right, get_image_dim(dst))
608        )
609    );
610}
611
612// Draws boxes to represent the given point matches and uses the given transform
613// and crop info to make sure their positions are accurate on the transformed frame.
614//
615// model_matches is an array of three points that were used by the RANSAC process
616// to generate the given transform
617__kernel void draw_debug_info(
618    __write_only image2d_t dst,
619    __global const MotionVector *matches,
620    __global const MotionVector *model_matches,
621    int num_model_matches,
622    __global const float *transform
623) {
624    int loc = get_global_id(0);
625    MotionVector vec = matches[loc];
626    // Black box: matched point that RANSAC considered an outlier
627    float4 big_rect_color = (float4)(0.1f, 0.1f, 0.1f, 1.0f);
628
629    if (vec.should_consider) {
630        // Green box: matched point that RANSAC considered an inlier
631        big_rect_color = (float4)(0.0f, 1.0f, 0.0f, 1.0f);
632    }
633
634    for (int i = 0; i < num_model_matches; i++) {
635        if (vec.p.p2.x == model_matches[i].p.p2.x && vec.p.p2.y == model_matches[i].p.p2.y) {
636            // Orange box: point used to calculate model
637            big_rect_color = (float4)(1.0f, 0.5f, 0.0f, 1.0f);
638        }
639    }
640
641    float2 transformed_p1 = transformed_point(vec.p.p1, transform);
642    float2 transformed_p2 = transformed_point(vec.p.p2, transform);
643
644    draw_box(dst, (int2)(transformed_p2.x, transformed_p2.y), big_rect_color, 5);
645    // Small light blue box: the point in the previous frame
646    draw_box(dst, (int2)(transformed_p1.x, transformed_p1.y), (float4)(0.0f, 0.3f, 0.7f, 1.0f), 3);
647}
648