1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
19 * Copyright (C) 2009, Willow Garage Inc., all rights reserved.
20 * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
21 * Third party copyrights are property of their respective owners.
22 *
23 * Redistribution and use in source and binary forms, with or without modification,
24 * are permitted provided that the following conditions are met:
25 *
26 * * Redistribution's of source code must retain the above copyright notice,
27 * this list of conditions and the following disclaimer.
28 *
29 * * Redistribution's in binary form must reproduce the above copyright notice,
30 * this list of conditions and the following disclaimer in the documentation
31 * and/or other materials provided with the distribution.
32 *
33 * * The name of the copyright holders may not be used to endorse or promote products
34 * derived from this software without specific prior written permission.
35 *
36 * This software is provided by the copyright holders and contributors "as is" and
37 * any express or implied warranties, including, but not limited to, the implied
38 * warranties of merchantability and fitness for a particular purpose are disclaimed.
39 * In no event shall the Intel Corporation or contributors be liable for any direct,
40 * indirect, incidental, special, exemplary, or consequential damages
41 * (including, but not limited to, procurement of substitute goods or services;
42 * loss of use, data, or profits; or business interruption) however caused
43 * and on any theory of liability, whether in contract, strict liability,
44 * or tort (including negligence or otherwise) arising in any way out of
45 * the use of this software, even if advised of the possibility of such damage.
46 */
47
48 #include <float.h>
49 #include <libavutil/lfg.h>
50 #include "libavutil/opt.h"
51 #include "libavutil/imgutils.h"
52 #include "libavutil/mem.h"
53 #include "libavutil/fifo.h"
54 #include "libavutil/common.h"
55 #include "libavutil/avassert.h"
56 #include "libavutil/pixfmt.h"
57 #include "avfilter.h"
58 #include "framequeue.h"
59 #include "filters.h"
60 #include "transform.h"
61 #include "formats.h"
62 #include "internal.h"
63 #include "opencl.h"
64 #include "opencl_source.h"
65 #include "video.h"
66
67 /*
68 This filter matches feature points between frames (dealing with outliers) and then
69 uses the matches to estimate an affine transform between frames. This transform is
70 decomposed into various values (translation, scale, rotation) and the values are
71 summed relative to the start of the video to obtain on absolute camera position
72 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting
73 in a new path that is turned back into an affine transform and applied to each
74 frame to render it.
75
76 High-level overview:
77
78 All of the work to extract motion data from frames occurs in queue_frame. Motion data
79 is buffered in a smoothing window, so queue_frame simply computes the absolute camera
80 positions and places them in ringbuffers.
81
82 filter_frame is responsible for looking at the absolute camera positions currently
83 in the ringbuffers, applying the gaussian filter, and then transforming the frames.
84 */
85
86 // Number of bits for BRIEF descriptors
87 #define BREIFN 512
88 // Size of the patch from which a BRIEF descriptor is extracted
89 // This is the size used in OpenCV
90 #define BRIEF_PATCH_SIZE 31
91 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
92
93 #define MATCHES_CONTIG_SIZE 2000
94
95 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
96
97 typedef struct PointPair {
98 // Previous frame
99 cl_float2 p1;
100 // Current frame
101 cl_float2 p2;
102 } PointPair;
103
104 typedef struct MotionVector {
105 PointPair p;
106 // Used to mark vectors as potential outliers
107 cl_int should_consider;
108 } MotionVector;
109
110 // Denotes the indices for the different types of motion in the ringbuffers array
111 enum RingbufferIndices {
112 RingbufX,
113 RingbufY,
114 RingbufRot,
115 RingbufScaleX,
116 RingbufScaleY,
117
118 // Should always be last
119 RingbufCount
120 };
121
122 // Struct that holds data for drawing point match debug data
123 typedef struct DebugMatches {
124 MotionVector *matches;
125 // The points used to calculate the affine transform for a frame
126 MotionVector model_matches[3];
127
128 int num_matches;
129 // For cases where we couldn't calculate a model
130 int num_model_matches;
131 } DebugMatches;
132
133 // Groups together the ringbuffers that store absolute distortion / position values
134 // for each frame
135 typedef struct AbsoluteFrameMotion {
136 // Array with the various ringbuffers, indexed via the RingbufferIndices enum
137 AVFifo *ringbuffers[RingbufCount];
138
139 // Offset to get to the current frame being processed
140 // (not in bytes)
141 int curr_frame_offset;
142 // Keeps track of where the start and end of contiguous motion data is (to
143 // deal with cases where no motion data is found between two frames)
144 int data_start_offset;
145 int data_end_offset;
146
147 AVFifo *debug_matches;
148 } AbsoluteFrameMotion;
149
150 // Takes care of freeing the arrays within the DebugMatches inside of the
151 // debug_matches ringbuffer and then freeing the buffer itself.
free_debug_matches(AbsoluteFrameMotion * afm)152 static void free_debug_matches(AbsoluteFrameMotion *afm) {
153 DebugMatches dm;
154
155 if (!afm->debug_matches) {
156 return;
157 }
158
159 while (av_fifo_read(afm->debug_matches, &dm, 1) >= 0)
160 av_freep(&dm.matches);
161
162 av_fifo_freep2(&afm->debug_matches);
163 }
164
165 // Stores the translation, scale, rotation, and skew deltas between two frames
166 typedef struct FrameDelta {
167 cl_float2 translation;
168 float rotation;
169 cl_float2 scale;
170 cl_float2 skew;
171 } FrameDelta;
172
173 typedef struct SimilarityMatrix {
174 // The 2x3 similarity matrix
175 double matrix[6];
176 } SimilarityMatrix;
177
178 typedef struct CropInfo {
179 // The top left corner of the bounding box for the crop
180 cl_float2 top_left;
181 // The bottom right corner of the bounding box for the crop
182 cl_float2 bottom_right;
183 } CropInfo;
184
185 // Returned from function that determines start and end values for iteration
186 // around the current frame in a ringbuffer
187 typedef struct IterIndices {
188 int start;
189 int end;
190 } IterIndices;
191
192 typedef struct DeshakeOpenCLContext {
193 OpenCLFilterContext ocf;
194 // Whether or not the above `OpenCLFilterContext` has been initialized
195 int initialized;
196
197 // These variables are used in the activate callback
198 int64_t duration;
199 int eof;
200
201 // State for random number generation
202 AVLFG alfg;
203
204 // FIFO frame queue used to buffer future frames for processing
205 FFFrameQueue fq;
206 // Ringbuffers for frame positions
207 AbsoluteFrameMotion abs_motion;
208
209 // The number of frames' motion to consider before and after the frame we are
210 // smoothing
211 int smooth_window;
212 // The number of the frame we are currently processing
213 int curr_frame;
214
215 // Stores a 1d array of normalised gaussian kernel values for convolution
216 float *gauss_kernel;
217
218 // Buffer for error values used in RANSAC code
219 float *ransac_err;
220
221 // Information regarding how to crop the smoothed luminance (or RGB) planes
222 CropInfo crop_y;
223 // Information regarding how to crop the smoothed chroma planes
224 CropInfo crop_uv;
225
226 // Whether or not we are processing YUV input (as oppposed to RGB)
227 int is_yuv;
228 // The underlying format of the hardware surfaces
229 int sw_format;
230
231 // Buffer to copy `matches` into for the CPU to work with
232 MotionVector *matches_host;
233 MotionVector *matches_contig_host;
234
235 MotionVector *inliers;
236
237 cl_command_queue command_queue;
238 cl_kernel kernel_grayscale;
239 cl_kernel kernel_harris_response;
240 cl_kernel kernel_refine_features;
241 cl_kernel kernel_brief_descriptors;
242 cl_kernel kernel_match_descriptors;
243 cl_kernel kernel_transform;
244 cl_kernel kernel_crop_upscale;
245
246 // Stores a frame converted to grayscale
247 cl_mem grayscale;
248 // Stores the harris response for a frame (measure of "cornerness" for each pixel)
249 cl_mem harris_buf;
250
251 // Detected features after non-maximum suppression and sub-pixel refinement
252 cl_mem refined_features;
253 // Saved from the previous frame
254 cl_mem prev_refined_features;
255
256 // BRIEF sampling pattern that is randomly initialized
257 cl_mem brief_pattern;
258 // Feature point descriptors for the current frame
259 cl_mem descriptors;
260 // Feature point descriptors for the previous frame
261 cl_mem prev_descriptors;
262 // Vectors between points in current and previous frame
263 cl_mem matches;
264 cl_mem matches_contig;
265 // Holds the matrix to transform luminance (or RGB) with
266 cl_mem transform_y;
267 // Holds the matrix to transform chroma with
268 cl_mem transform_uv;
269
270 // Configurable options
271
272 int tripod_mode;
273 int debug_on;
274 int should_crop;
275
276 // Whether or not feature points should be refined at a sub-pixel level
277 cl_int refine_features;
278 // If the user sets a value other than the default, 0, this percentage is
279 // translated into a sigma value ranging from 0.5 to 40.0
280 float smooth_percent;
281 // This number is multiplied by the video frame rate to determine the size
282 // of the smooth window
283 float smooth_window_multiplier;
284
285 // Debug stuff
286
287 cl_kernel kernel_draw_debug_info;
288 cl_mem debug_matches;
289 cl_mem debug_model_matches;
290
291 // These store the total time spent executing the different kernels in nanoseconds
292 unsigned long long grayscale_time;
293 unsigned long long harris_response_time;
294 unsigned long long refine_features_time;
295 unsigned long long brief_descriptors_time;
296 unsigned long long match_descriptors_time;
297 unsigned long long transform_time;
298 unsigned long long crop_upscale_time;
299
300 // Time spent copying matched features from the device to the host
301 unsigned long long read_buf_time;
302 } DeshakeOpenCLContext;
303
304 // Returns a random uniformly-distributed number in [low, high]
rand_in(int low,int high,AVLFG * alfg)305 static int rand_in(int low, int high, AVLFG *alfg) {
306 return (av_lfg_get(alfg) % (high - low)) + low;
307 }
308
309 // Returns the average execution time for an event given the total time and the
310 // number of frames processed.
averaged_event_time_ms(unsigned long long total_time,int num_frames)311 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {
312 return (double)total_time / (double)num_frames / 1000000.0;
313 }
314
315 // The following code is loosely ported from OpenCV
316
317 // Estimates affine transform from 3 point pairs
318 // model is a 2x3 matrix:
319 // a b c
320 // d e f
run_estimate_kernel(const MotionVector * point_pairs,double * model)321 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
322 {
323 // src points
324 double x1 = point_pairs[0].p.p1.s[0];
325 double y1 = point_pairs[0].p.p1.s[1];
326 double x2 = point_pairs[1].p.p1.s[0];
327 double y2 = point_pairs[1].p.p1.s[1];
328 double x3 = point_pairs[2].p.p1.s[0];
329 double y3 = point_pairs[2].p.p1.s[1];
330
331 // dest points
332 double X1 = point_pairs[0].p.p2.s[0];
333 double Y1 = point_pairs[0].p.p2.s[1];
334 double X2 = point_pairs[1].p.p2.s[0];
335 double Y2 = point_pairs[1].p.p2.s[1];
336 double X3 = point_pairs[2].p.p2.s[0];
337 double Y3 = point_pairs[2].p.p2.s[1];
338
339 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
340
341 model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
342 model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
343 model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
344
345 model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
346 model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
347 model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
348 }
349
350 // Checks that the 3 points in the given array are not collinear
points_not_collinear(const cl_float2 ** points)351 static int points_not_collinear(const cl_float2 **points)
352 {
353 int j, k, i = 2;
354
355 for (j = 0; j < i; j++) {
356 double dx1 = points[j]->s[0] - points[i]->s[0];
357 double dy1 = points[j]->s[1] - points[i]->s[1];
358
359 for (k = 0; k < j; k++) {
360 double dx2 = points[k]->s[0] - points[i]->s[0];
361 double dy2 = points[k]->s[1] - points[i]->s[1];
362
363 // Assuming a 3840 x 2160 video with a point at (0, 0) and one at
364 // (3839, 2159), this prevents a third point from being within roughly
365 // 0.5 of a pixel of the line connecting the two on both axes
366 if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
367 return 0;
368 }
369 }
370 }
371
372 return 1;
373 }
374
375 // Checks a subset of 3 point pairs to make sure that the points are not collinear
376 // and not too close to each other
check_subset(const MotionVector * pairs_subset)377 static int check_subset(const MotionVector *pairs_subset)
378 {
379 const cl_float2 *prev_points[] = {
380 &pairs_subset[0].p.p1,
381 &pairs_subset[1].p.p1,
382 &pairs_subset[2].p.p1
383 };
384
385 const cl_float2 *curr_points[] = {
386 &pairs_subset[0].p.p2,
387 &pairs_subset[1].p.p2,
388 &pairs_subset[2].p.p2
389 };
390
391 return points_not_collinear(prev_points) && points_not_collinear(curr_points);
392 }
393
394 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset
get_subset(AVLFG * alfg,const MotionVector * point_pairs,const int num_point_pairs,MotionVector * pairs_subset,int max_attempts)395 static int get_subset(
396 AVLFG *alfg,
397 const MotionVector *point_pairs,
398 const int num_point_pairs,
399 MotionVector *pairs_subset,
400 int max_attempts
401 ) {
402 int idx[3];
403 int i = 0, j, iters = 0;
404
405 for (; iters < max_attempts; iters++) {
406 for (i = 0; i < 3 && iters < max_attempts;) {
407 int idx_i = 0;
408
409 for (;;) {
410 idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);
411
412 for (j = 0; j < i; j++) {
413 if (idx_i == idx[j]) {
414 break;
415 }
416 }
417
418 if (j == i) {
419 break;
420 }
421 }
422
423 pairs_subset[i] = point_pairs[idx[i]];
424 i++;
425 }
426
427 if (i == 3 && !check_subset(pairs_subset)) {
428 continue;
429 }
430 break;
431 }
432
433 return i == 3 && iters < max_attempts;
434 }
435
436 // Computes the error for each of the given points based on the given model.
compute_error(const MotionVector * point_pairs,const int num_point_pairs,const double * model,float * err)437 static void compute_error(
438 const MotionVector *point_pairs,
439 const int num_point_pairs,
440 const double *model,
441 float *err
442 ) {
443 double F0 = model[0], F1 = model[1], F2 = model[2];
444 double F3 = model[3], F4 = model[4], F5 = model[5];
445
446 for (int i = 0; i < num_point_pairs; i++) {
447 const cl_float2 *f = &point_pairs[i].p.p1;
448 const cl_float2 *t = &point_pairs[i].p.p2;
449
450 double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];
451 double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];
452
453 err[i] = a*a + b*b;
454 }
455 }
456
457 // Determines which of the given point matches are inliers for the given model
458 // based on the specified threshold.
459 //
460 // err must be an array of num_point_pairs length
find_inliers(MotionVector * point_pairs,const int num_point_pairs,const double * model,float * err,double thresh)461 static int find_inliers(
462 MotionVector *point_pairs,
463 const int num_point_pairs,
464 const double *model,
465 float *err,
466 double thresh
467 ) {
468 float t = (float)(thresh * thresh);
469 int i, n = num_point_pairs, num_inliers = 0;
470
471 compute_error(point_pairs, num_point_pairs, model, err);
472
473 for (i = 0; i < n; i++) {
474 if (err[i] <= t) {
475 // This is an inlier
476 point_pairs[i].should_consider = 1;
477 num_inliers += 1;
478 } else {
479 point_pairs[i].should_consider = 0;
480 }
481 }
482
483 return num_inliers;
484 }
485
486 // Determines the number of iterations required to achieve the desired confidence level.
487 //
488 // The equation used to determine the number of iterations to do is:
489 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters
490 //
491 // Solving for num_iters:
492 //
493 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)
494 //
495 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus
496 // under the 'Parameters' heading
ransac_update_num_iters(double confidence,double num_outliers,int max_iters)497 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
498 {
499 double num, denom;
500
501 confidence = av_clipd(confidence, 0.0, 1.0);
502 num_outliers = av_clipd(num_outliers, 0.0, 1.0);
503
504 // avoid inf's & nan's
505 num = FFMAX(1.0 - confidence, DBL_MIN);
506 denom = 1.0 - pow(1.0 - num_outliers, 3);
507 if (denom < DBL_MIN) {
508 return 0;
509 }
510
511 num = log(num);
512 denom = log(denom);
513
514 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);
515 }
516
517 // Estimates an affine transform between the given pairs of points using RANdom
518 // SAmple Consensus
estimate_affine_2d(DeshakeOpenCLContext * deshake_ctx,MotionVector * point_pairs,DebugMatches * debug_matches,const int num_point_pairs,double * model_out,const double threshold,const int max_iters,const double confidence)519 static int estimate_affine_2d(
520 DeshakeOpenCLContext *deshake_ctx,
521 MotionVector *point_pairs,
522 DebugMatches *debug_matches,
523 const int num_point_pairs,
524 double *model_out,
525 const double threshold,
526 const int max_iters,
527 const double confidence
528 ) {
529 int result = 0;
530 double best_model[6], model[6];
531 MotionVector pairs_subset[3], best_pairs[3];
532
533 int iter, niters = FFMAX(max_iters, 1);
534 int good_count, max_good_count = 0;
535
536 // We need at least 3 points to build a model from
537 if (num_point_pairs < 3) {
538 return 0;
539 } else if (num_point_pairs == 3) {
540 // There are only 3 points, so RANSAC doesn't apply here
541 run_estimate_kernel(point_pairs, model_out);
542
543 for (int i = 0; i < 3; ++i) {
544 point_pairs[i].should_consider = 1;
545 }
546
547 return 1;
548 }
549
550 for (iter = 0; iter < niters; ++iter) {
551 int found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
552
553 if (!found) {
554 if (iter == 0) {
555 return 0;
556 }
557
558 break;
559 }
560
561 run_estimate_kernel(pairs_subset, model);
562 good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);
563
564 if (good_count > FFMAX(max_good_count, 2)) {
565 for (int mi = 0; mi < 6; ++mi) {
566 best_model[mi] = model[mi];
567 }
568
569 for (int pi = 0; pi < 3; pi++) {
570 best_pairs[pi] = pairs_subset[pi];
571 }
572
573 max_good_count = good_count;
574 niters = ransac_update_num_iters(
575 confidence,
576 (double)(num_point_pairs - good_count) / num_point_pairs,
577 niters
578 );
579 }
580 }
581
582 if (max_good_count > 0) {
583 for (int mi = 0; mi < 6; ++mi) {
584 model_out[mi] = best_model[mi];
585 }
586
587 for (int pi = 0; pi < 3; ++pi) {
588 debug_matches->model_matches[pi] = best_pairs[pi];
589 }
590 debug_matches->num_model_matches = 3;
591
592 // Find the inliers again for the best model for debugging
593 find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);
594 result = 1;
595 }
596
597 return result;
598 }
599
600 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the
601 // total error
optimize_model(DeshakeOpenCLContext * deshake_ctx,MotionVector * best_pairs,MotionVector * inliers,const int num_inliers,float best_err,double * model_out)602 static void optimize_model(
603 DeshakeOpenCLContext *deshake_ctx,
604 MotionVector *best_pairs,
605 MotionVector *inliers,
606 const int num_inliers,
607 float best_err,
608 double *model_out
609 ) {
610 float move_x_val = 0.01;
611 float move_y_val = 0.01;
612 int move_x = 1;
613 float old_move_x_val = 0;
614 double model[6];
615 int last_changed = 0;
616
617 for (int iters = 0; iters < 200; iters++) {
618 float total_err = 0;
619
620 if (move_x) {
621 best_pairs[0].p.p2.s[0] += move_x_val;
622 } else {
623 best_pairs[0].p.p2.s[0] += move_y_val;
624 }
625
626 run_estimate_kernel(best_pairs, model);
627 compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
628
629 for (int j = 0; j < num_inliers; j++) {
630 total_err += deshake_ctx->ransac_err[j];
631 }
632
633 if (total_err < best_err) {
634 for (int mi = 0; mi < 6; ++mi) {
635 model_out[mi] = model[mi];
636 }
637
638 best_err = total_err;
639 last_changed = iters;
640 } else {
641 // Undo the change
642 if (move_x) {
643 best_pairs[0].p.p2.s[0] -= move_x_val;
644 } else {
645 best_pairs[0].p.p2.s[0] -= move_y_val;
646 }
647
648 if (iters - last_changed > 4) {
649 // We've already improved the model as much as we can
650 break;
651 }
652
653 old_move_x_val = move_x_val;
654
655 if (move_x) {
656 move_x_val *= -1;
657 } else {
658 move_y_val *= -1;
659 }
660
661 if (old_move_x_val < 0) {
662 move_x = 0;
663 } else {
664 move_x = 1;
665 }
666 }
667 }
668 }
669
670 // Uses a process similar to that of RANSAC to find a transform that minimizes
671 // the total error for a set of point matches determined to be inliers
672 //
673 // (Pick random subsets, compute model, find total error, iterate until error
674 // is minimized.)
minimize_error(DeshakeOpenCLContext * deshake_ctx,MotionVector * inliers,DebugMatches * debug_matches,const int num_inliers,double * model_out,const int max_iters)675 static int minimize_error(
676 DeshakeOpenCLContext *deshake_ctx,
677 MotionVector *inliers,
678 DebugMatches *debug_matches,
679 const int num_inliers,
680 double *model_out,
681 const int max_iters
682 ) {
683 int result = 0;
684 float best_err = FLT_MAX;
685 double best_model[6], model[6];
686 MotionVector pairs_subset[3], best_pairs[3];
687
688 for (int i = 0; i < max_iters; i++) {
689 float total_err = 0;
690 int found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);
691
692 if (!found) {
693 if (i == 0) {
694 return 0;
695 }
696
697 break;
698 }
699
700 run_estimate_kernel(pairs_subset, model);
701 compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
702
703 for (int j = 0; j < num_inliers; j++) {
704 total_err += deshake_ctx->ransac_err[j];
705 }
706
707 if (total_err < best_err) {
708 for (int mi = 0; mi < 6; ++mi) {
709 best_model[mi] = model[mi];
710 }
711
712 for (int pi = 0; pi < 3; pi++) {
713 best_pairs[pi] = pairs_subset[pi];
714 }
715
716 best_err = total_err;
717 }
718 }
719
720 for (int mi = 0; mi < 6; ++mi) {
721 model_out[mi] = best_model[mi];
722 }
723
724 for (int pi = 0; pi < 3; ++pi) {
725 debug_matches->model_matches[pi] = best_pairs[pi];
726 }
727 debug_matches->num_model_matches = 3;
728 result = 1;
729
730 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
731 return result;
732 }
733
734 // End code from OpenCV
735
736 // Decomposes a similarity matrix into translation, rotation, scale, and skew
737 //
738 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html
decompose_transform(double * model)739 static FrameDelta decompose_transform(double *model)
740 {
741 FrameDelta ret;
742
743 double a = model[0];
744 double c = model[1];
745 double e = model[2];
746 double b = model[3];
747 double d = model[4];
748 double f = model[5];
749 double delta = a * d - b * c;
750
751 memset(&ret, 0, sizeof(ret));
752
753 ret.translation.s[0] = e;
754 ret.translation.s[1] = f;
755
756 // This is the QR method
757 if (a != 0 || b != 0) {
758 double r = hypot(a, b);
759
760 ret.rotation = FFSIGN(b) * acos(a / r);
761 ret.scale.s[0] = r;
762 ret.scale.s[1] = delta / r;
763 ret.skew.s[0] = atan((a * c + b * d) / (r * r));
764 ret.skew.s[1] = 0;
765 } else if (c != 0 || d != 0) {
766 double s = sqrt(c * c + d * d);
767
768 ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);
769 ret.scale.s[0] = delta / s;
770 ret.scale.s[1] = s;
771 ret.skew.s[0] = 0;
772 ret.skew.s[1] = atan((a * c + b * d) / (s * s));
773 } // otherwise there is only translation
774
775 return ret;
776 }
777
778 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous
make_vectors_contig(DeshakeOpenCLContext * deshake_ctx,int size_y,int size_x)779 static int make_vectors_contig(
780 DeshakeOpenCLContext *deshake_ctx,
781 int size_y,
782 int size_x
783 ) {
784 int num_vectors = 0;
785
786 for (int i = 0; i < size_y; ++i) {
787 for (int j = 0; j < size_x; ++j) {
788 MotionVector v = deshake_ctx->matches_host[j + i * size_x];
789
790 if (v.should_consider) {
791 deshake_ctx->matches_contig_host[num_vectors] = v;
792 ++num_vectors;
793 }
794
795 // Make sure we do not exceed the amount of space we allocated for these vectors
796 if (num_vectors == MATCHES_CONTIG_SIZE - 1) {
797 return num_vectors;
798 }
799 }
800 }
801 return num_vectors;
802 }
803
804 // Returns the gaussian kernel value for the given x coordinate and sigma value
gaussian_for(int x,float sigma)805 static float gaussian_for(int x, float sigma) {
806 return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));
807 }
808
809 // Makes a normalized gaussian kernel of the given length for the given sigma
810 // and places it in gauss_kernel
make_gauss_kernel(float * gauss_kernel,float length,float sigma)811 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
812 {
813 float gauss_sum = 0;
814 int window_half = length / 2;
815
816 for (int i = 0; i < length; ++i) {
817 float val = gaussian_for(i - window_half, sigma);
818
819 gauss_sum += val;
820 gauss_kernel[i] = val;
821 }
822
823 // Normalize the gaussian values
824 for (int i = 0; i < length; ++i) {
825 gauss_kernel[i] /= gauss_sum;
826 }
827 }
828
829 // Returns indices to start and end iteration at in order to iterate over a window
830 // of length size centered at the current frame in a ringbuffer
831 //
832 // Always returns numbers that result in a window of length size, even if that
833 // means specifying negative indices or indices past the end of the values in the
834 // ringbuffers. Make sure you clip indices appropriately within your loop.
start_end_for(DeshakeOpenCLContext * deshake_ctx,int length)835 static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length) {
836 IterIndices indices;
837
838 indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);
839 indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);
840
841 return indices;
842 }
843
844 // Sets val to the value in the given ringbuffer at the given offset, taking care of
845 // clipping the offset into the appropriate range
ringbuf_float_at(DeshakeOpenCLContext * deshake_ctx,AVFifo * values,float * val,int offset)846 static void ringbuf_float_at(
847 DeshakeOpenCLContext *deshake_ctx,
848 AVFifo *values,
849 float *val,
850 int offset
851 ) {
852 int clip_start, clip_end, offset_clipped;
853 if (deshake_ctx->abs_motion.data_end_offset != -1) {
854 clip_end = deshake_ctx->abs_motion.data_end_offset;
855 } else {
856 // This expression represents the last valid index in the buffer,
857 // which we use repeatedly at the end of the video.
858 clip_end = deshake_ctx->smooth_window - av_fifo_can_write(values) - 1;
859 }
860
861 if (deshake_ctx->abs_motion.data_start_offset != -1) {
862 clip_start = deshake_ctx->abs_motion.data_start_offset;
863 } else {
864 // Negative indices will occur at the start of the video, and we want
865 // them to be clipped to 0 in order to repeatedly use the position of
866 // the first frame.
867 clip_start = 0;
868 }
869
870 offset_clipped = av_clip(
871 offset,
872 clip_start,
873 clip_end
874 );
875
876 av_fifo_peek(values, val, 1, offset_clipped);
877 }
878
879 // Returns smoothed current frame value of the given buffer of floats based on the
880 // given Gaussian kernel and its length (also the window length, centered around the
881 // current frame) and the "maximum value" of the motion.
882 //
883 // This "maximum value" should be the width / height of the image in the case of
884 // translation and an empirically chosen constant for rotation / scale.
885 //
886 // The sigma chosen to generate the final gaussian kernel with used to smooth the
887 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or
888 // adaptively chosen.
smooth(DeshakeOpenCLContext * deshake_ctx,float * gauss_kernel,int length,float max_val,AVFifo * values)889 static float smooth(
890 DeshakeOpenCLContext *deshake_ctx,
891 float *gauss_kernel,
892 int length,
893 float max_val,
894 AVFifo *values
895 ) {
896 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
897 percent_of_max, inverted_percent;
898 IterIndices indices = start_end_for(deshake_ctx, length);
899 float large_sigma = 40.0f;
900 float small_sigma = 2.0f;
901 float best_sigma;
902
903 if (deshake_ctx->smooth_percent) {
904 best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;
905 } else {
906 // Strategy to adaptively smooth trajectory:
907 //
908 // 1. Smooth path with large and small sigma values
909 // 2. Take the absolute value of the difference between them
910 // 3. Get a percentage by putting the difference over the "max value"
911 // 4, Invert the percentage
912 // 5. Calculate a new sigma value weighted towards the larger sigma value
913 // 6. Determine final smoothed trajectory value using that sigma
914
915 make_gauss_kernel(gauss_kernel, length, large_sigma);
916 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
917 ringbuf_float_at(deshake_ctx, values, &old, i);
918 new_large_s += old * gauss_kernel[j];
919 }
920
921 make_gauss_kernel(gauss_kernel, length, small_sigma);
922 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
923 ringbuf_float_at(deshake_ctx, values, &old, i);
924 new_small_s += old * gauss_kernel[j];
925 }
926
927 diff_between = fabsf(new_large_s - new_small_s);
928 percent_of_max = diff_between / max_val;
929 inverted_percent = 1 - percent_of_max;
930 best_sigma = large_sigma * powf(inverted_percent, 40);
931 }
932
933 make_gauss_kernel(gauss_kernel, length, best_sigma);
934 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
935 ringbuf_float_at(deshake_ctx, values, &old, i);
936 new_best += old * gauss_kernel[j];
937 }
938
939 return new_best;
940 }
941
942 // Returns the position of the given point after the transform is applied
transformed_point(float x,float y,float * transform)943 static cl_float2 transformed_point(float x, float y, float *transform) {
944 cl_float2 ret;
945
946 ret.s[0] = x * transform[0] + y * transform[1] + transform[2];
947 ret.s[1] = x * transform[3] + y * transform[4] + transform[5];
948
949 return ret;
950 }
951
952 // Creates an affine transform that scales from the center of a frame
transform_center_scale(float x_shift,float y_shift,float angle,float scale_x,float scale_y,float center_w,float center_h,float * matrix)953 static void transform_center_scale(
954 float x_shift,
955 float y_shift,
956 float angle,
957 float scale_x,
958 float scale_y,
959 float center_w,
960 float center_h,
961 float *matrix
962 ) {
963 cl_float2 center_s;
964 float center_s_w, center_s_h;
965
966 ff_get_matrix(
967 0,
968 0,
969 0,
970 scale_x,
971 scale_y,
972 matrix
973 );
974
975 center_s = transformed_point(center_w, center_h, matrix);
976 center_s_w = center_w - center_s.s[0];
977 center_s_h = center_h - center_s.s[1];
978
979 ff_get_matrix(
980 x_shift + center_s_w,
981 y_shift + center_s_h,
982 angle,
983 scale_x,
984 scale_y,
985 matrix
986 );
987 }
988
989 // Determines the crop necessary to eliminate black borders from a smoothed frame
990 // and updates target crop accordingly
update_needed_crop(CropInfo * crop,float * transform,float frame_width,float frame_height)991 static void update_needed_crop(
992 CropInfo* crop,
993 float *transform,
994 float frame_width,
995 float frame_height
996 ) {
997 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
998
999 cl_float2 top_left = transformed_point(0, 0, transform);
1000 cl_float2 top_right = transformed_point(frame_width, 0, transform);
1001 cl_float2 bottom_left = transformed_point(0, frame_height, transform);
1002 cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);
1003 float ar_h = frame_height / frame_width;
1004 float ar_w = frame_width / frame_height;
1005
1006 if (crop->bottom_right.s[0] == 0) {
1007 // The crop hasn't been set to the original size of the plane
1008 crop->bottom_right.s[0] = frame_width;
1009 crop->bottom_right.s[1] = frame_height;
1010 }
1011
1012 crop->top_left.s[0] = FFMAX3(
1013 crop->top_left.s[0],
1014 top_left.s[0],
1015 bottom_left.s[0]
1016 );
1017
1018 crop->top_left.s[1] = FFMAX3(
1019 crop->top_left.s[1],
1020 top_left.s[1],
1021 top_right.s[1]
1022 );
1023
1024 crop->bottom_right.s[0] = FFMIN3(
1025 crop->bottom_right.s[0],
1026 bottom_right.s[0],
1027 top_right.s[0]
1028 );
1029
1030 crop->bottom_right.s[1] = FFMIN3(
1031 crop->bottom_right.s[1],
1032 bottom_right.s[1],
1033 bottom_left.s[1]
1034 );
1035
1036 // Make sure our potentially new bounding box has the same aspect ratio
1037 new_height = crop->bottom_right.s[1] - crop->top_left.s[1];
1038 new_width = crop->bottom_right.s[0] - crop->top_left.s[0];
1039
1040 adjusted_width = new_height * ar_w;
1041 adjusted_x = crop->bottom_right.s[0] - adjusted_width;
1042
1043 if (adjusted_x >= crop->top_left.s[0]) {
1044 crop->top_left.s[0] = adjusted_x;
1045 } else {
1046 adjusted_height = new_width * ar_h;
1047 adjusted_y = crop->bottom_right.s[1] - adjusted_height;
1048 crop->top_left.s[1] = adjusted_y;
1049 }
1050 }
1051
deshake_opencl_uninit(AVFilterContext * avctx)1052 static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)
1053 {
1054 DeshakeOpenCLContext *ctx = avctx->priv;
1055 cl_int cle;
1056
1057 for (int i = 0; i < RingbufCount; i++)
1058 av_fifo_freep2(&ctx->abs_motion.ringbuffers[i]);
1059
1060 if (ctx->debug_on)
1061 free_debug_matches(&ctx->abs_motion);
1062
1063 if (ctx->gauss_kernel)
1064 av_freep(&ctx->gauss_kernel);
1065
1066 if (ctx->ransac_err)
1067 av_freep(&ctx->ransac_err);
1068
1069 if (ctx->matches_host)
1070 av_freep(&ctx->matches_host);
1071
1072 if (ctx->matches_contig_host)
1073 av_freep(&ctx->matches_contig_host);
1074
1075 if (ctx->inliers)
1076 av_freep(&ctx->inliers);
1077
1078 ff_framequeue_free(&ctx->fq);
1079
1080 CL_RELEASE_KERNEL(ctx->kernel_grayscale);
1081 CL_RELEASE_KERNEL(ctx->kernel_harris_response);
1082 CL_RELEASE_KERNEL(ctx->kernel_refine_features);
1083 CL_RELEASE_KERNEL(ctx->kernel_brief_descriptors);
1084 CL_RELEASE_KERNEL(ctx->kernel_match_descriptors);
1085 CL_RELEASE_KERNEL(ctx->kernel_crop_upscale);
1086 if (ctx->debug_on)
1087 CL_RELEASE_KERNEL(ctx->kernel_draw_debug_info);
1088
1089 CL_RELEASE_QUEUE(ctx->command_queue);
1090
1091 if (!ctx->is_yuv)
1092 CL_RELEASE_MEMORY(ctx->grayscale);
1093 CL_RELEASE_MEMORY(ctx->harris_buf);
1094 CL_RELEASE_MEMORY(ctx->refined_features);
1095 CL_RELEASE_MEMORY(ctx->prev_refined_features);
1096 CL_RELEASE_MEMORY(ctx->brief_pattern);
1097 CL_RELEASE_MEMORY(ctx->descriptors);
1098 CL_RELEASE_MEMORY(ctx->prev_descriptors);
1099 CL_RELEASE_MEMORY(ctx->matches);
1100 CL_RELEASE_MEMORY(ctx->matches_contig);
1101 CL_RELEASE_MEMORY(ctx->transform_y);
1102 CL_RELEASE_MEMORY(ctx->transform_uv);
1103 if (ctx->debug_on) {
1104 CL_RELEASE_MEMORY(ctx->debug_matches);
1105 CL_RELEASE_MEMORY(ctx->debug_model_matches);
1106 }
1107
1108 ff_opencl_filter_uninit(avctx);
1109 }
1110
deshake_opencl_init(AVFilterContext * avctx)1111 static int deshake_opencl_init(AVFilterContext *avctx)
1112 {
1113 DeshakeOpenCLContext *ctx = avctx->priv;
1114 AVFilterLink *outlink = avctx->outputs[0];
1115 AVFilterLink *inlink = avctx->inputs[0];
1116 // Pointer to the host-side pattern buffer to be initialized and then copied
1117 // to the GPU
1118 PointPair *pattern_host = NULL;
1119 cl_int cle;
1120 int err;
1121 cl_ulong8 zeroed_ulong8;
1122 FFFrameQueueGlobal fqg;
1123 cl_image_format grayscale_format;
1124 cl_image_desc grayscale_desc;
1125 cl_command_queue_properties queue_props;
1126
1127 const enum AVPixelFormat disallowed_formats[14] = {
1128 AV_PIX_FMT_GBRP,
1129 AV_PIX_FMT_GBRP9BE,
1130 AV_PIX_FMT_GBRP9LE,
1131 AV_PIX_FMT_GBRP10BE,
1132 AV_PIX_FMT_GBRP10LE,
1133 AV_PIX_FMT_GBRP16BE,
1134 AV_PIX_FMT_GBRP16LE,
1135 AV_PIX_FMT_GBRAP,
1136 AV_PIX_FMT_GBRAP16BE,
1137 AV_PIX_FMT_GBRAP16LE,
1138 AV_PIX_FMT_GBRAP12BE,
1139 AV_PIX_FMT_GBRAP12LE,
1140 AV_PIX_FMT_GBRAP10BE,
1141 AV_PIX_FMT_GBRAP10LE
1142 };
1143
1144 // Number of elements for an array
1145 const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);
1146
1147 const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);
1148 const int features_buf_size = image_grid_32 * sizeof(cl_float2);
1149
1150 const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
1151 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);
1152
1153 av_assert0(hw_frames_ctx);
1154 av_assert0(desc);
1155
1156 ff_framequeue_global_init(&fqg);
1157 ff_framequeue_init(&ctx->fq, &fqg);
1158 ctx->eof = 0;
1159 ctx->smooth_window = (int)(av_q2d(avctx->inputs[0]->frame_rate) * ctx->smooth_window_multiplier);
1160 ctx->curr_frame = 0;
1161
1162 memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));
1163
1164 ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));
1165 if (!ctx->gauss_kernel) {
1166 err = AVERROR(ENOMEM);
1167 goto fail;
1168 }
1169
1170 ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));
1171 if (!ctx->ransac_err) {
1172 err = AVERROR(ENOMEM);
1173 goto fail;
1174 }
1175
1176 for (int i = 0; i < RingbufCount; i++) {
1177 ctx->abs_motion.ringbuffers[i] = av_fifo_alloc2(ctx->smooth_window,
1178 sizeof(float), 0);
1179
1180 if (!ctx->abs_motion.ringbuffers[i]) {
1181 err = AVERROR(ENOMEM);
1182 goto fail;
1183 }
1184 }
1185
1186 if (ctx->debug_on) {
1187 ctx->abs_motion.debug_matches = av_fifo_alloc2(
1188 ctx->smooth_window / 2,
1189 sizeof(DebugMatches), 0
1190 );
1191
1192 if (!ctx->abs_motion.debug_matches) {
1193 err = AVERROR(ENOMEM);
1194 goto fail;
1195 }
1196 }
1197
1198 ctx->abs_motion.curr_frame_offset = 0;
1199 ctx->abs_motion.data_start_offset = -1;
1200 ctx->abs_motion.data_end_offset = -1;
1201
1202 pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));
1203 if (!pattern_host) {
1204 err = AVERROR(ENOMEM);
1205 goto fail;
1206 }
1207
1208 ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));
1209 if (!ctx->matches_host) {
1210 err = AVERROR(ENOMEM);
1211 goto fail;
1212 }
1213
1214 ctx->matches_contig_host = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1215 if (!ctx->matches_contig_host) {
1216 err = AVERROR(ENOMEM);
1217 goto fail;
1218 }
1219
1220 ctx->inliers = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1221 if (!ctx->inliers) {
1222 err = AVERROR(ENOMEM);
1223 goto fail;
1224 }
1225
1226 // Initializing the patch pattern for building BREIF descriptors with
1227 av_lfg_init(&ctx->alfg, 234342424);
1228 for (int i = 0; i < BREIFN; ++i) {
1229 PointPair pair;
1230
1231 for (int j = 0; j < 2; ++j) {
1232 pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1233 pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1234 }
1235
1236 pattern_host[i] = pair;
1237 }
1238
1239 for (int i = 0; i < 14; i++) {
1240 if (ctx->sw_format == disallowed_formats[i]) {
1241 av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");
1242 err = AVERROR(ENOSYS);
1243 goto fail;
1244 }
1245 }
1246
1247 if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
1248 ctx->is_yuv = 0;
1249 } else {
1250 ctx->is_yuv = 1;
1251 }
1252 ctx->sw_format = hw_frames_ctx->sw_format;
1253
1254 err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_deshake, 1);
1255 if (err < 0)
1256 goto fail;
1257
1258 if (ctx->debug_on) {
1259 queue_props = CL_QUEUE_PROFILING_ENABLE;
1260 } else {
1261 queue_props = 0;
1262 }
1263 ctx->command_queue = clCreateCommandQueue(
1264 ctx->ocf.hwctx->context,
1265 ctx->ocf.hwctx->device_id,
1266 queue_props,
1267 &cle
1268 );
1269 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);
1270
1271 CL_CREATE_KERNEL(ctx, grayscale);
1272 CL_CREATE_KERNEL(ctx, harris_response);
1273 CL_CREATE_KERNEL(ctx, refine_features);
1274 CL_CREATE_KERNEL(ctx, brief_descriptors);
1275 CL_CREATE_KERNEL(ctx, match_descriptors);
1276 CL_CREATE_KERNEL(ctx, transform);
1277 CL_CREATE_KERNEL(ctx, crop_upscale);
1278 if (ctx->debug_on)
1279 CL_CREATE_KERNEL(ctx, draw_debug_info);
1280
1281 if (!ctx->is_yuv) {
1282 grayscale_format.image_channel_order = CL_R;
1283 grayscale_format.image_channel_data_type = CL_FLOAT;
1284
1285 grayscale_desc = (cl_image_desc) {
1286 .image_type = CL_MEM_OBJECT_IMAGE2D,
1287 .image_width = outlink->w,
1288 .image_height = outlink->h,
1289 .image_depth = 0,
1290 .image_array_size = 0,
1291 .image_row_pitch = 0,
1292 .image_slice_pitch = 0,
1293 .num_mip_levels = 0,
1294 .num_samples = 0,
1295 .buffer = NULL,
1296 };
1297
1298 ctx->grayscale = clCreateImage(
1299 ctx->ocf.hwctx->context,
1300 0,
1301 &grayscale_format,
1302 &grayscale_desc,
1303 NULL,
1304 &cle
1305 );
1306 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);
1307 }
1308
1309 CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));
1310 CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);
1311 CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);
1312 CL_CREATE_BUFFER_FLAGS(
1313 ctx,
1314 brief_pattern,
1315 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1316 BREIFN * sizeof(PointPair),
1317 pattern_host
1318 );
1319 CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);
1320 CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);
1321 CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));
1322 CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1323 CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));
1324 CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));
1325 if (ctx->debug_on) {
1326 CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1327 CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));
1328 }
1329
1330 ctx->initialized = 1;
1331 av_freep(&pattern_host);
1332
1333 return 0;
1334
1335 fail:
1336 av_freep(&pattern_host);
1337 return err;
1338 }
1339
1340 // Logs debug information about the transform data
transform_debug(AVFilterContext * avctx,float * new_vals,float * old_vals,int curr_frame)1341 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {
1342 av_log(avctx, AV_LOG_VERBOSE,
1343 "Frame %d:\n"
1344 "\tframe moved from: %f x, %f y\n"
1345 "\t to: %f x, %f y\n"
1346 "\t rotated from: %f degrees\n"
1347 "\t to: %f degrees\n"
1348 "\t scaled from: %f x, %f y\n"
1349 "\t to: %f x, %f y\n"
1350 "\n"
1351 "\tframe moved by: %f x, %f y\n"
1352 "\t rotated by: %f degrees\n"
1353 "\t scaled by: %f x, %f y\n",
1354 curr_frame,
1355 old_vals[RingbufX], old_vals[RingbufY],
1356 new_vals[RingbufX], new_vals[RingbufY],
1357 old_vals[RingbufRot] * (180.0 / M_PI),
1358 new_vals[RingbufRot] * (180.0 / M_PI),
1359 old_vals[RingbufScaleX], old_vals[RingbufScaleY],
1360 new_vals[RingbufScaleX], new_vals[RingbufScaleY],
1361 old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],
1362 old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),
1363 new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]
1364 );
1365 }
1366
1367 // Uses the buffered motion information to determine a transform that smooths the
1368 // given frame and applies it
filter_frame(AVFilterLink * link,AVFrame * input_frame)1369 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
1370 {
1371 AVFilterContext *avctx = link->dst;
1372 AVFilterLink *outlink = avctx->outputs[0];
1373 DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1374 AVFrame *cropped_frame = NULL, *transformed_frame = NULL;
1375 int err;
1376 cl_int cle;
1377 float new_vals[RingbufCount];
1378 float old_vals[RingbufCount];
1379 // Luma (in the case of YUV) transform, or just the transform in the case of RGB
1380 float transform_y[9];
1381 // Chroma transform
1382 float transform_uv[9];
1383 // Luma crop transform (or RGB)
1384 float transform_crop_y[9];
1385 // Chroma crop transform
1386 float transform_crop_uv[9];
1387 float transform_debug_rgb[9];
1388 size_t global_work[2];
1389 int64_t duration;
1390 cl_mem src, transformed, dst;
1391 cl_mem transforms[3];
1392 CropInfo crops[3];
1393 cl_event transform_event, crop_upscale_event;
1394 DebugMatches debug_matches;
1395 cl_int num_model_matches;
1396
1397 const float center_w = (float)input_frame->width / 2;
1398 const float center_h = (float)input_frame->height / 2;
1399
1400 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);
1401 const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);
1402 const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);
1403
1404 const float center_w_chroma = (float)chroma_width / 2;
1405 const float center_h_chroma = (float)chroma_height / 2;
1406
1407 const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);
1408 const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);
1409
1410 if (deshake_ctx->debug_on) {
1411 av_fifo_read(
1412 deshake_ctx->abs_motion.debug_matches,
1413 &debug_matches, 1);
1414 }
1415
1416 if (input_frame->pkt_duration) {
1417 duration = input_frame->pkt_duration;
1418 } else {
1419 duration = av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
1420 }
1421 deshake_ctx->duration = input_frame->pts + duration;
1422
1423 // Get the absolute transform data for this frame
1424 for (int i = 0; i < RingbufCount; i++) {
1425 av_fifo_peek(deshake_ctx->abs_motion.ringbuffers[i],
1426 &old_vals[i], 1,
1427 deshake_ctx->abs_motion.curr_frame_offset);
1428 }
1429
1430 if (deshake_ctx->tripod_mode) {
1431 // If tripod mode is turned on we simply undo all motion relative to the
1432 // first frame
1433
1434 new_vals[RingbufX] = 0.0f;
1435 new_vals[RingbufY] = 0.0f;
1436 new_vals[RingbufRot] = 0.0f;
1437 new_vals[RingbufScaleX] = 1.0f;
1438 new_vals[RingbufScaleY] = 1.0f;
1439 } else {
1440 // Tripod mode is off and we need to smooth a moving camera
1441
1442 new_vals[RingbufX] = smooth(
1443 deshake_ctx,
1444 deshake_ctx->gauss_kernel,
1445 deshake_ctx->smooth_window,
1446 input_frame->width,
1447 deshake_ctx->abs_motion.ringbuffers[RingbufX]
1448 );
1449 new_vals[RingbufY] = smooth(
1450 deshake_ctx,
1451 deshake_ctx->gauss_kernel,
1452 deshake_ctx->smooth_window,
1453 input_frame->height,
1454 deshake_ctx->abs_motion.ringbuffers[RingbufY]
1455 );
1456 new_vals[RingbufRot] = smooth(
1457 deshake_ctx,
1458 deshake_ctx->gauss_kernel,
1459 deshake_ctx->smooth_window,
1460 M_PI / 4,
1461 deshake_ctx->abs_motion.ringbuffers[RingbufRot]
1462 );
1463 new_vals[RingbufScaleX] = smooth(
1464 deshake_ctx,
1465 deshake_ctx->gauss_kernel,
1466 deshake_ctx->smooth_window,
1467 2.0f,
1468 deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]
1469 );
1470 new_vals[RingbufScaleY] = smooth(
1471 deshake_ctx,
1472 deshake_ctx->gauss_kernel,
1473 deshake_ctx->smooth_window,
1474 2.0f,
1475 deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]
1476 );
1477 }
1478
1479 transform_center_scale(
1480 old_vals[RingbufX] - new_vals[RingbufX],
1481 old_vals[RingbufY] - new_vals[RingbufY],
1482 old_vals[RingbufRot] - new_vals[RingbufRot],
1483 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1484 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1485 center_w,
1486 center_h,
1487 transform_y
1488 );
1489
1490 transform_center_scale(
1491 (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,
1492 (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,
1493 old_vals[RingbufRot] - new_vals[RingbufRot],
1494 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1495 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1496 center_w_chroma,
1497 center_h_chroma,
1498 transform_uv
1499 );
1500
1501 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);
1502 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);
1503
1504 if (deshake_ctx->debug_on)
1505 transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);
1506
1507 cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1508 if (!cropped_frame) {
1509 err = AVERROR(ENOMEM);
1510 goto fail;
1511 }
1512
1513 transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1514 if (!transformed_frame) {
1515 err = AVERROR(ENOMEM);
1516 goto fail;
1517 }
1518
1519 transforms[0] = deshake_ctx->transform_y;
1520 transforms[1] = transforms[2] = deshake_ctx->transform_uv;
1521
1522 for (int p = 0; p < FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1523 // Transform all of the planes appropriately
1524 src = (cl_mem)input_frame->data[p];
1525 transformed = (cl_mem)transformed_frame->data[p];
1526
1527 if (!transformed)
1528 break;
1529
1530 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1531 if (err < 0)
1532 goto fail;
1533
1534 CL_RUN_KERNEL_WITH_ARGS(
1535 deshake_ctx->command_queue,
1536 deshake_ctx->kernel_transform,
1537 global_work,
1538 NULL,
1539 &transform_event,
1540 { sizeof(cl_mem), &src },
1541 { sizeof(cl_mem), &transformed },
1542 { sizeof(cl_mem), &transforms[p] },
1543 );
1544 }
1545
1546 if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {
1547 CL_BLOCKING_WRITE_BUFFER(
1548 deshake_ctx->command_queue,
1549 deshake_ctx->debug_matches,
1550 debug_matches.num_matches * sizeof(MotionVector),
1551 debug_matches.matches,
1552 NULL
1553 );
1554
1555 CL_BLOCKING_WRITE_BUFFER(
1556 deshake_ctx->command_queue,
1557 deshake_ctx->debug_model_matches,
1558 debug_matches.num_model_matches * sizeof(MotionVector),
1559 debug_matches.model_matches,
1560 NULL
1561 );
1562
1563 num_model_matches = debug_matches.num_model_matches;
1564
1565 // Invert the transform
1566 transform_center_scale(
1567 new_vals[RingbufX] - old_vals[RingbufX],
1568 new_vals[RingbufY] - old_vals[RingbufY],
1569 new_vals[RingbufRot] - old_vals[RingbufRot],
1570 old_vals[RingbufScaleX] / new_vals[RingbufScaleX],
1571 old_vals[RingbufScaleY] / new_vals[RingbufScaleY],
1572 center_w,
1573 center_h,
1574 transform_debug_rgb
1575 );
1576
1577 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);
1578
1579 transformed = (cl_mem)transformed_frame->data[0];
1580 CL_RUN_KERNEL_WITH_ARGS(
1581 deshake_ctx->command_queue,
1582 deshake_ctx->kernel_draw_debug_info,
1583 (size_t[]){ debug_matches.num_matches },
1584 NULL,
1585 NULL,
1586 { sizeof(cl_mem), &transformed },
1587 { sizeof(cl_mem), &deshake_ctx->debug_matches },
1588 { sizeof(cl_mem), &deshake_ctx->debug_model_matches },
1589 { sizeof(cl_int), &num_model_matches },
1590 { sizeof(cl_mem), &deshake_ctx->transform_y }
1591 );
1592 }
1593
1594 if (deshake_ctx->should_crop) {
1595 // Generate transforms for cropping
1596 transform_center_scale(
1597 (old_vals[RingbufX] - new_vals[RingbufX]) / 5,
1598 (old_vals[RingbufY] - new_vals[RingbufY]) / 5,
1599 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1600 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1601 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1602 center_w,
1603 center_h,
1604 transform_crop_y
1605 );
1606 update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);
1607
1608 transform_center_scale(
1609 (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),
1610 (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),
1611 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1612 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1613 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1614 center_w_chroma,
1615 center_h_chroma,
1616 transform_crop_uv
1617 );
1618 update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);
1619
1620 crops[0] = deshake_ctx->crop_y;
1621 crops[1] = crops[2] = deshake_ctx->crop_uv;
1622
1623 for (int p = 0; p < FF_ARRAY_ELEMS(cropped_frame->data); p++) {
1624 // Crop all of the planes appropriately
1625 dst = (cl_mem)cropped_frame->data[p];
1626 transformed = (cl_mem)transformed_frame->data[p];
1627
1628 if (!dst)
1629 break;
1630
1631 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1632 if (err < 0)
1633 goto fail;
1634
1635 CL_RUN_KERNEL_WITH_ARGS(
1636 deshake_ctx->command_queue,
1637 deshake_ctx->kernel_crop_upscale,
1638 global_work,
1639 NULL,
1640 &crop_upscale_event,
1641 { sizeof(cl_mem), &transformed },
1642 { sizeof(cl_mem), &dst },
1643 { sizeof(cl_float2), &crops[p].top_left },
1644 { sizeof(cl_float2), &crops[p].bottom_right },
1645 );
1646 }
1647 }
1648
1649 if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {
1650 // This means we are somewhere at the start of the video. We need to
1651 // increment the current frame offset until it reaches the center of
1652 // the ringbuffers (as the current frame will be located there for
1653 // the rest of the video).
1654 //
1655 // The end of the video is taken care of by draining motion data
1656 // one-by-one out of the buffer, causing the (at that point fixed)
1657 // offset to move towards later frames' data.
1658 ++deshake_ctx->abs_motion.curr_frame_offset;
1659 }
1660
1661 if (deshake_ctx->abs_motion.data_end_offset != -1) {
1662 // Keep the end offset in sync with the frame it's supposed to be
1663 // positioned at
1664 --deshake_ctx->abs_motion.data_end_offset;
1665
1666 if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {
1667 // The end offset would be the start of the new video sequence; flip to
1668 // start offset
1669 deshake_ctx->abs_motion.data_end_offset = -1;
1670 deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;
1671 }
1672 } else if (deshake_ctx->abs_motion.data_start_offset != -1) {
1673 // Keep the start offset in sync with the frame it's supposed to be
1674 // positioned at
1675 --deshake_ctx->abs_motion.data_start_offset;
1676 }
1677
1678 if (deshake_ctx->debug_on) {
1679 deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);
1680 if (deshake_ctx->should_crop) {
1681 deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);
1682 }
1683 }
1684
1685 ++deshake_ctx->curr_frame;
1686
1687 if (deshake_ctx->debug_on)
1688 av_freep(&debug_matches.matches);
1689
1690 if (deshake_ctx->should_crop) {
1691 err = av_frame_copy_props(cropped_frame, input_frame);
1692 if (err < 0)
1693 goto fail;
1694
1695 av_frame_free(&transformed_frame);
1696 av_frame_free(&input_frame);
1697 return ff_filter_frame(outlink, cropped_frame);
1698
1699 } else {
1700 err = av_frame_copy_props(transformed_frame, input_frame);
1701 if (err < 0)
1702 goto fail;
1703
1704 av_frame_free(&cropped_frame);
1705 av_frame_free(&input_frame);
1706 return ff_filter_frame(outlink, transformed_frame);
1707 }
1708
1709 fail:
1710 clFinish(deshake_ctx->command_queue);
1711
1712 if (deshake_ctx->debug_on)
1713 if (debug_matches.matches)
1714 av_freep(&debug_matches.matches);
1715
1716 av_frame_free(&input_frame);
1717 av_frame_free(&transformed_frame);
1718 av_frame_free(&cropped_frame);
1719 return err;
1720 }
1721
1722 // Add the given frame to the frame queue to eventually be processed.
1723 //
1724 // Also determines the motion from the previous frame and updates the stored
1725 // motion information accordingly.
queue_frame(AVFilterLink * link,AVFrame * input_frame)1726 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
1727 {
1728 AVFilterContext *avctx = link->dst;
1729 DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1730 int err;
1731 int num_vectors;
1732 int num_inliers = 0;
1733 cl_int cle;
1734 FrameDelta relative;
1735 SimilarityMatrix model;
1736 size_t global_work[2];
1737 size_t harris_global_work[2];
1738 size_t grid_32_global_work[2];
1739 int grid_32_h, grid_32_w;
1740 size_t local_work[2];
1741 cl_mem src, temp;
1742 float prev_vals[5];
1743 float new_vals[5];
1744 cl_event grayscale_event, harris_response_event, refine_features_event,
1745 brief_event, match_descriptors_event, read_buf_event;
1746 DebugMatches debug_matches;
1747
1748 num_vectors = 0;
1749
1750 local_work[0] = 8;
1751 local_work[1] = 8;
1752
1753 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);
1754 if (err < 0)
1755 goto fail;
1756
1757 err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);
1758 if (err < 0)
1759 goto fail;
1760
1761 err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);
1762 if (err < 0)
1763 goto fail;
1764
1765 // We want a single work-item for each 32x32 block of pixels in the input frame
1766 grid_32_global_work[0] /= 32;
1767 grid_32_global_work[1] /= 32;
1768
1769 grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);
1770 grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);
1771
1772 if (deshake_ctx->is_yuv) {
1773 deshake_ctx->grayscale = (cl_mem)input_frame->data[0];
1774 } else {
1775 src = (cl_mem)input_frame->data[0];
1776
1777 CL_RUN_KERNEL_WITH_ARGS(
1778 deshake_ctx->command_queue,
1779 deshake_ctx->kernel_grayscale,
1780 global_work,
1781 NULL,
1782 &grayscale_event,
1783 { sizeof(cl_mem), &src },
1784 { sizeof(cl_mem), &deshake_ctx->grayscale }
1785 );
1786 }
1787
1788 CL_RUN_KERNEL_WITH_ARGS(
1789 deshake_ctx->command_queue,
1790 deshake_ctx->kernel_harris_response,
1791 harris_global_work,
1792 local_work,
1793 &harris_response_event,
1794 { sizeof(cl_mem), &deshake_ctx->grayscale },
1795 { sizeof(cl_mem), &deshake_ctx->harris_buf }
1796 );
1797
1798 CL_RUN_KERNEL_WITH_ARGS(
1799 deshake_ctx->command_queue,
1800 deshake_ctx->kernel_refine_features,
1801 grid_32_global_work,
1802 NULL,
1803 &refine_features_event,
1804 { sizeof(cl_mem), &deshake_ctx->grayscale },
1805 { sizeof(cl_mem), &deshake_ctx->harris_buf },
1806 { sizeof(cl_mem), &deshake_ctx->refined_features },
1807 { sizeof(cl_int), &deshake_ctx->refine_features }
1808 );
1809
1810 CL_RUN_KERNEL_WITH_ARGS(
1811 deshake_ctx->command_queue,
1812 deshake_ctx->kernel_brief_descriptors,
1813 grid_32_global_work,
1814 NULL,
1815 &brief_event,
1816 { sizeof(cl_mem), &deshake_ctx->grayscale },
1817 { sizeof(cl_mem), &deshake_ctx->refined_features },
1818 { sizeof(cl_mem), &deshake_ctx->descriptors },
1819 { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1820 );
1821
1822 if (!av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
1823 // This is the first frame we've been given to queue, meaning there is
1824 // no previous frame to match descriptors to
1825
1826 goto no_motion_data;
1827 }
1828
1829 CL_RUN_KERNEL_WITH_ARGS(
1830 deshake_ctx->command_queue,
1831 deshake_ctx->kernel_match_descriptors,
1832 grid_32_global_work,
1833 NULL,
1834 &match_descriptors_event,
1835 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1836 { sizeof(cl_mem), &deshake_ctx->refined_features },
1837 { sizeof(cl_mem), &deshake_ctx->descriptors },
1838 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1839 { sizeof(cl_mem), &deshake_ctx->matches }
1840 );
1841
1842 cle = clEnqueueReadBuffer(
1843 deshake_ctx->command_queue,
1844 deshake_ctx->matches,
1845 CL_TRUE,
1846 0,
1847 grid_32_h * grid_32_w * sizeof(MotionVector),
1848 deshake_ctx->matches_host,
1849 0,
1850 NULL,
1851 &read_buf_event
1852 );
1853 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);
1854
1855 num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);
1856
1857 if (num_vectors < 10) {
1858 // Not enough matches to get reliable motion data for this frame
1859 //
1860 // From this point on all data is relative to this frame rather than the
1861 // original frame. We have to make sure that we don't mix values that were
1862 // relative to the original frame with the new values relative to this
1863 // frame when doing the gaussian smoothing. We keep track of where the old
1864 // values end using this data_end_offset field in order to accomplish
1865 // that goal.
1866 //
1867 // If no motion data is present for multiple frames in a short window of
1868 // time, we leave the end where it was to avoid mixing 0s in with the
1869 // old data (and just treat them all as part of the new values)
1870 if (deshake_ctx->abs_motion.data_end_offset == -1) {
1871 deshake_ctx->abs_motion.data_end_offset =
1872 av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX]) - 1;
1873 }
1874
1875 goto no_motion_data;
1876 }
1877
1878 if (!estimate_affine_2d(
1879 deshake_ctx,
1880 deshake_ctx->matches_contig_host,
1881 &debug_matches,
1882 num_vectors,
1883 model.matrix,
1884 10.0,
1885 3000,
1886 0.999999999999
1887 )) {
1888 goto no_motion_data;
1889 }
1890
1891 for (int i = 0; i < num_vectors; i++) {
1892 if (deshake_ctx->matches_contig_host[i].should_consider) {
1893 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];
1894 num_inliers++;
1895 }
1896 }
1897
1898 if (!minimize_error(
1899 deshake_ctx,
1900 deshake_ctx->inliers,
1901 &debug_matches,
1902 num_inliers,
1903 model.matrix,
1904 400
1905 )) {
1906 goto no_motion_data;
1907 }
1908
1909
1910 relative = decompose_transform(model.matrix);
1911
1912 // Get the absolute transform data for the previous frame
1913 for (int i = 0; i < RingbufCount; i++) {
1914 av_fifo_peek(
1915 deshake_ctx->abs_motion.ringbuffers[i],
1916 &prev_vals[i], 1,
1917 av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[i]) - 1);
1918 }
1919
1920 new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];
1921 new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];
1922 new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;
1923 new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];
1924 new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];
1925
1926 if (deshake_ctx->debug_on) {
1927 if (!deshake_ctx->is_yuv) {
1928 deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);
1929 }
1930 deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);
1931 deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);
1932 deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);
1933 deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);
1934 deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);
1935 }
1936
1937 goto end;
1938
1939 no_motion_data:
1940 new_vals[RingbufX] = 0.0f;
1941 new_vals[RingbufY] = 0.0f;
1942 new_vals[RingbufRot] = 0.0f;
1943 new_vals[RingbufScaleX] = 1.0f;
1944 new_vals[RingbufScaleY] = 1.0f;
1945
1946 for (int i = 0; i < num_vectors; i++) {
1947 deshake_ctx->matches_contig_host[i].should_consider = 0;
1948 }
1949 debug_matches.num_model_matches = 0;
1950
1951 if (deshake_ctx->debug_on) {
1952 av_log(avctx, AV_LOG_VERBOSE,
1953 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1954 );
1955 }
1956
1957 goto end;
1958
1959 end:
1960 // Swap the descriptor buffers (we don't need the previous frame's descriptors
1961 // again so we will use that space for the next frame's descriptors)
1962 temp = deshake_ctx->prev_descriptors;
1963 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1964 deshake_ctx->descriptors = temp;
1965
1966 // Same for the refined features
1967 temp = deshake_ctx->prev_refined_features;
1968 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1969 deshake_ctx->refined_features = temp;
1970
1971 if (deshake_ctx->debug_on) {
1972 if (num_vectors == 0) {
1973 debug_matches.matches = NULL;
1974 } else {
1975 debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));
1976
1977 if (!debug_matches.matches) {
1978 err = AVERROR(ENOMEM);
1979 goto fail;
1980 }
1981 }
1982
1983 for (int i = 0; i < num_vectors; i++) {
1984 debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];
1985 }
1986 debug_matches.num_matches = num_vectors;
1987
1988 av_fifo_write(
1989 deshake_ctx->abs_motion.debug_matches,
1990 &debug_matches, 1);
1991 }
1992
1993 for (int i = 0; i < RingbufCount; i++) {
1994 av_fifo_write(deshake_ctx->abs_motion.ringbuffers[i], &new_vals[i], 1);
1995 }
1996
1997 return ff_framequeue_add(&deshake_ctx->fq, input_frame);
1998
1999 fail:
2000 clFinish(deshake_ctx->command_queue);
2001 av_frame_free(&input_frame);
2002 return err;
2003 }
2004
activate(AVFilterContext * ctx)2005 static int activate(AVFilterContext *ctx)
2006 {
2007 AVFilterLink *inlink = ctx->inputs[0];
2008 AVFilterLink *outlink = ctx->outputs[0];
2009 DeshakeOpenCLContext *deshake_ctx = ctx->priv;
2010 AVFrame *frame = NULL;
2011 int ret, status;
2012 int64_t pts;
2013
2014 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
2015
2016 if (!deshake_ctx->eof) {
2017 ret = ff_inlink_consume_frame(inlink, &frame);
2018 if (ret < 0)
2019 return ret;
2020 if (ret > 0) {
2021 if (!frame->hw_frames_ctx)
2022 return AVERROR(EINVAL);
2023
2024 if (!deshake_ctx->initialized) {
2025 ret = deshake_opencl_init(ctx);
2026 if (ret < 0)
2027 return ret;
2028 }
2029
2030 // If there is no more space in the ringbuffers, remove the oldest
2031 // values to make room for the new ones
2032 if (!av_fifo_can_write(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
2033 for (int i = 0; i < RingbufCount; i++) {
2034 av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2035 }
2036 }
2037 ret = queue_frame(inlink, frame);
2038 if (ret < 0)
2039 return ret;
2040 if (ret >= 0) {
2041 // See if we have enough buffered frames to process one
2042 //
2043 // "enough" is half the smooth window of queued frames into the future
2044 if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {
2045 return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2046 }
2047 }
2048 }
2049 }
2050
2051 if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
2052 if (status == AVERROR_EOF) {
2053 deshake_ctx->eof = 1;
2054 }
2055 }
2056
2057 if (deshake_ctx->eof) {
2058 // Finish processing the rest of the frames in the queue.
2059 while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {
2060 for (int i = 0; i < RingbufCount; i++) {
2061 av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2062 }
2063
2064 ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2065 if (ret < 0) {
2066 return ret;
2067 }
2068 }
2069
2070 if (deshake_ctx->debug_on) {
2071 av_log(ctx, AV_LOG_VERBOSE,
2072 "Average kernel execution times:\n"
2073 "\t grayscale: %0.3f ms\n"
2074 "\t harris_response: %0.3f ms\n"
2075 "\t refine_features: %0.3f ms\n"
2076 "\tbrief_descriptors: %0.3f ms\n"
2077 "\tmatch_descriptors: %0.3f ms\n"
2078 "\t transform: %0.3f ms\n"
2079 "\t crop_upscale: %0.3f ms\n"
2080 "Average buffer read times:\n"
2081 "\t features buf: %0.3f ms\n",
2082 averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),
2083 averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),
2084 averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),
2085 averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),
2086 averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),
2087 averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),
2088 averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),
2089 averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)
2090 );
2091 }
2092
2093 ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);
2094 return 0;
2095 }
2096
2097 if (!deshake_ctx->eof) {
2098 FF_FILTER_FORWARD_WANTED(outlink, inlink);
2099 }
2100
2101 return FFERROR_NOT_READY;
2102 }
2103
2104 static const AVFilterPad deshake_opencl_inputs[] = {
2105 {
2106 .name = "default",
2107 .type = AVMEDIA_TYPE_VIDEO,
2108 .config_props = &ff_opencl_filter_config_input,
2109 },
2110 };
2111
2112 static const AVFilterPad deshake_opencl_outputs[] = {
2113 {
2114 .name = "default",
2115 .type = AVMEDIA_TYPE_VIDEO,
2116 .config_props = &ff_opencl_filter_config_output,
2117 },
2118 };
2119
2120 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2121 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2122
2123 static const AVOption deshake_opencl_options[] = {
2124 {
2125 "tripod", "simulates a tripod by preventing any camera movement whatsoever "
2126 "from the original frame",
2127 OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2128 },
2129 {
2130 "debug", "turn on additional debugging information",
2131 OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2132 },
2133 {
2134 "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",
2135 OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2136 },
2137 {
2138 "refine_features", "refine feature point locations at a sub-pixel level",
2139 OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2140 },
2141 {
2142 "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",
2143 OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS
2144 },
2145 {
2146 "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",
2147 OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS
2148 },
2149 { NULL }
2150 };
2151
2152 AVFILTER_DEFINE_CLASS(deshake_opencl);
2153
2154 const AVFilter ff_vf_deshake_opencl = {
2155 .name = "deshake_opencl",
2156 .description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),
2157 .priv_size = sizeof(DeshakeOpenCLContext),
2158 .priv_class = &deshake_opencl_class,
2159 .init = &ff_opencl_filter_init,
2160 .uninit = &deshake_opencl_uninit,
2161 .activate = activate,
2162 FILTER_INPUTS(deshake_opencl_inputs),
2163 FILTER_OUTPUTS(deshake_opencl_outputs),
2164 FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL),
2165 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE
2166 };
2167