1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "libavutil/hwcontext.h"
24 #include "libavutil/hwcontext_cuda_internal.h"
25 #include "libavutil/cuda_check.h"
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
28
29 #include "avfilter.h"
30 #include "internal.h"
31
32 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
33
34 #define HIST_SIZE (3*256)
35 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
36 #define BLOCKX 32
37 #define BLOCKY 16
38
39 static const enum AVPixelFormat supported_formats[] = {
40 AV_PIX_FMT_NV12,
41 AV_PIX_FMT_YUV420P,
42 AV_PIX_FMT_YUV444P,
43 AV_PIX_FMT_P010,
44 AV_PIX_FMT_P016,
45 AV_PIX_FMT_YUV444P16,
46 };
47
48 struct thumb_frame {
49 AVFrame *buf; ///< cached frame
50 int histogram[HIST_SIZE]; ///< RGB color distribution histogram of the frame
51 };
52
53 typedef struct ThumbnailCudaContext {
54 const AVClass *class;
55 int n; ///< current frame
56 int n_frames; ///< number of frames for analysis
57 struct thumb_frame *frames; ///< the n_frames frames
58 AVRational tb; ///< copy of the input timebase to ease access
59
60 AVBufferRef *hw_frames_ctx;
61 AVCUDADeviceContext *hwctx;
62
63 CUmodule cu_module;
64
65 CUfunction cu_func_uchar;
66 CUfunction cu_func_uchar2;
67 CUfunction cu_func_ushort;
68 CUfunction cu_func_ushort2;
69 CUstream cu_stream;
70
71 CUdeviceptr data;
72
73 } ThumbnailCudaContext;
74
75 #define OFFSET(x) offsetof(ThumbnailCudaContext, x)
76 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
77
78 static const AVOption thumbnail_cuda_options[] = {
79 { "n", "set the frames batch size", OFFSET(n_frames), AV_OPT_TYPE_INT, {.i64=100}, 2, INT_MAX, FLAGS },
80 { NULL }
81 };
82
83 AVFILTER_DEFINE_CLASS(thumbnail_cuda);
84
init(AVFilterContext * ctx)85 static av_cold int init(AVFilterContext *ctx)
86 {
87 ThumbnailCudaContext *s = ctx->priv;
88
89 s->frames = av_calloc(s->n_frames, sizeof(*s->frames));
90 if (!s->frames) {
91 av_log(ctx, AV_LOG_ERROR,
92 "Allocation failure, try to lower the number of frames\n");
93 return AVERROR(ENOMEM);
94 }
95 av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", s->n_frames);
96 return 0;
97 }
98
99 /**
100 * @brief Compute Sum-square deviation to estimate "closeness".
101 * @param hist color distribution histogram
102 * @param median average color distribution histogram
103 * @return sum of squared errors
104 */
frame_sum_square_err(const int * hist,const double * median)105 static double frame_sum_square_err(const int *hist, const double *median)
106 {
107 int i;
108 double err, sum_sq_err = 0;
109
110 for (i = 0; i < HIST_SIZE; i++) {
111 err = median[i] - (double)hist[i];
112 sum_sq_err += err*err;
113 }
114 return sum_sq_err;
115 }
116
get_best_frame(AVFilterContext * ctx)117 static AVFrame *get_best_frame(AVFilterContext *ctx)
118 {
119 AVFrame *picref;
120 ThumbnailCudaContext *s = ctx->priv;
121 int i, j, best_frame_idx = 0;
122 int nb_frames = s->n;
123 double avg_hist[HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
124
125 // average histogram of the N frames
126 for (j = 0; j < FF_ARRAY_ELEMS(avg_hist); j++) {
127 for (i = 0; i < nb_frames; i++)
128 avg_hist[j] += (double)s->frames[i].histogram[j];
129 avg_hist[j] /= nb_frames;
130 }
131
132 // find the frame closer to the average using the sum of squared errors
133 for (i = 0; i < nb_frames; i++) {
134 sq_err = frame_sum_square_err(s->frames[i].histogram, avg_hist);
135 if (i == 0 || sq_err < min_sq_err)
136 best_frame_idx = i, min_sq_err = sq_err;
137 }
138
139 // free and reset everything (except the best frame buffer)
140 for (i = 0; i < nb_frames; i++) {
141 memset(s->frames[i].histogram, 0, sizeof(s->frames[i].histogram));
142 if (i != best_frame_idx)
143 av_frame_free(&s->frames[i].buf);
144 }
145 s->n = 0;
146
147 // raise the chosen one
148 picref = s->frames[best_frame_idx].buf;
149 av_log(ctx, AV_LOG_INFO, "frame id #%d (pts_time=%f) selected "
150 "from a set of %d images\n", best_frame_idx,
151 picref->pts * av_q2d(s->tb), nb_frames);
152 s->frames[best_frame_idx].buf = NULL;
153
154 return picref;
155 }
156
thumbnail_kernel(AVFilterContext * ctx,CUfunction func,int channels,int * histogram,uint8_t * src_dptr,int src_width,int src_height,int src_pitch,int pixel_size)157 static int thumbnail_kernel(AVFilterContext *ctx, CUfunction func, int channels,
158 int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size)
159 {
160 int ret;
161 ThumbnailCudaContext *s = ctx->priv;
162 CudaFunctions *cu = s->hwctx->internal->cuda_dl;
163 CUtexObject tex = 0;
164 void *args[] = { &tex, &histogram, &src_width, &src_height };
165
166 CUDA_TEXTURE_DESC tex_desc = {
167 .filterMode = CU_TR_FILTER_MODE_LINEAR,
168 .flags = CU_TRSF_READ_AS_INTEGER,
169 };
170
171 CUDA_RESOURCE_DESC res_desc = {
172 .resType = CU_RESOURCE_TYPE_PITCH2D,
173 .res.pitch2D.format = pixel_size == 1 ?
174 CU_AD_FORMAT_UNSIGNED_INT8 :
175 CU_AD_FORMAT_UNSIGNED_INT16,
176 .res.pitch2D.numChannels = channels,
177 .res.pitch2D.width = src_width,
178 .res.pitch2D.height = src_height,
179 .res.pitch2D.pitchInBytes = src_pitch,
180 .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
181 };
182
183 ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
184 if (ret < 0)
185 goto exit;
186
187 ret = CHECK_CU(cu->cuLaunchKernel(func,
188 DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1,
189 BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL));
190 exit:
191 if (tex)
192 CHECK_CU(cu->cuTexObjectDestroy(tex));
193
194 return ret;
195 }
196
thumbnail(AVFilterContext * ctx,int * histogram,AVFrame * in)197 static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
198 {
199 AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
200 ThumbnailCudaContext *s = ctx->priv;
201
202 switch (in_frames_ctx->sw_format) {
203 case AV_PIX_FMT_NV12:
204 thumbnail_kernel(ctx, s->cu_func_uchar, 1,
205 histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
206 thumbnail_kernel(ctx, s->cu_func_uchar2, 2,
207 histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1);
208 break;
209 case AV_PIX_FMT_YUV420P:
210 thumbnail_kernel(ctx, s->cu_func_uchar, 1,
211 histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
212 thumbnail_kernel(ctx, s->cu_func_uchar, 1,
213 histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1);
214 thumbnail_kernel(ctx, s->cu_func_uchar, 1,
215 histogram + 512, in->data[2], in->width / 2, in->height / 2, in->linesize[2], 1);
216 break;
217 case AV_PIX_FMT_YUV444P:
218 thumbnail_kernel(ctx, s->cu_func_uchar, 1,
219 histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
220 thumbnail_kernel(ctx, s->cu_func_uchar, 1,
221 histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 1);
222 thumbnail_kernel(ctx, s->cu_func_uchar, 1,
223 histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 1);
224 break;
225 case AV_PIX_FMT_P010LE:
226 case AV_PIX_FMT_P016LE:
227 thumbnail_kernel(ctx, s->cu_func_ushort, 1,
228 histogram, in->data[0], in->width, in->height, in->linesize[0], 2);
229 thumbnail_kernel(ctx, s->cu_func_ushort2, 2,
230 histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 2);
231 break;
232 case AV_PIX_FMT_YUV444P16:
233 thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
234 histogram, in->data[0], in->width, in->height, in->linesize[0], 2);
235 thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
236 histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 2);
237 thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
238 histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 2);
239 break;
240 default:
241 return AVERROR_BUG;
242 }
243
244 return 0;
245 }
246
filter_frame(AVFilterLink * inlink,AVFrame * frame)247 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
248 {
249 AVFilterContext *ctx = inlink->dst;
250 ThumbnailCudaContext *s = ctx->priv;
251 CudaFunctions *cu = s->hwctx->internal->cuda_dl;
252 AVFilterLink *outlink = ctx->outputs[0];
253 int *hist = s->frames[s->n].histogram;
254 AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data;
255 CUcontext dummy;
256 CUDA_MEMCPY2D cpy = { 0 };
257 int ret = 0;
258
259 // keep a reference of each frame
260 s->frames[s->n].buf = frame;
261
262 ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
263 if (ret < 0)
264 return ret;
265
266 CHECK_CU(cu->cuMemsetD8Async(s->data, 0, HIST_SIZE * sizeof(int), s->cu_stream));
267
268 thumbnail(ctx, (int*)s->data, frame);
269
270 cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
271 cpy.dstMemoryType = CU_MEMORYTYPE_HOST;
272 cpy.srcDevice = s->data;
273 cpy.dstHost = hist;
274 cpy.srcPitch = HIST_SIZE * sizeof(int);
275 cpy.dstPitch = HIST_SIZE * sizeof(int);
276 cpy.WidthInBytes = HIST_SIZE * sizeof(int);
277 cpy.Height = 1;
278
279 ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, s->cu_stream));
280 if (ret < 0)
281 return ret;
282
283 if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P ||
284 hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE)
285 {
286 int i;
287 for (i = 256; i < HIST_SIZE; i++)
288 hist[i] = 4 * hist[i];
289 }
290
291 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
292 if (ret < 0)
293 return ret;
294
295 // no selection until the buffer of N frames is filled up
296 s->n++;
297 if (s->n < s->n_frames)
298 return 0;
299
300 return ff_filter_frame(outlink, get_best_frame(ctx));
301 }
302
uninit(AVFilterContext * ctx)303 static av_cold void uninit(AVFilterContext *ctx)
304 {
305 int i;
306 ThumbnailCudaContext *s = ctx->priv;
307 CudaFunctions *cu = s->hwctx->internal->cuda_dl;
308
309 if (s->data) {
310 CHECK_CU(cu->cuMemFree(s->data));
311 s->data = 0;
312 }
313
314 if (s->cu_module) {
315 CHECK_CU(cu->cuModuleUnload(s->cu_module));
316 s->cu_module = NULL;
317 }
318
319 for (i = 0; i < s->n_frames && s->frames[i].buf; i++)
320 av_frame_free(&s->frames[i].buf);
321 av_freep(&s->frames);
322 }
323
request_frame(AVFilterLink * link)324 static int request_frame(AVFilterLink *link)
325 {
326 AVFilterContext *ctx = link->src;
327 ThumbnailCudaContext *s = ctx->priv;
328 int ret = ff_request_frame(ctx->inputs[0]);
329
330 if (ret == AVERROR_EOF && s->n) {
331 ret = ff_filter_frame(link, get_best_frame(ctx));
332 if (ret < 0)
333 return ret;
334 ret = AVERROR_EOF;
335 }
336 if (ret < 0)
337 return ret;
338 return 0;
339 }
340
format_is_supported(enum AVPixelFormat fmt)341 static int format_is_supported(enum AVPixelFormat fmt)
342 {
343 int i;
344
345 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
346 if (supported_formats[i] == fmt)
347 return 1;
348 return 0;
349 }
350
config_props(AVFilterLink * inlink)351 static int config_props(AVFilterLink *inlink)
352 {
353 AVFilterContext *ctx = inlink->dst;
354 ThumbnailCudaContext *s = ctx->priv;
355 AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
356 AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
357 CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
358 CudaFunctions *cu = device_hwctx->internal->cuda_dl;
359 int ret;
360
361 extern char vf_thumbnail_cuda_ptx[];
362
363 s->hwctx = device_hwctx;
364 s->cu_stream = s->hwctx->stream;
365
366 ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
367 if (ret < 0)
368 return ret;
369
370 ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx));
371 if (ret < 0)
372 return ret;
373
374 ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"));
375 if (ret < 0)
376 return ret;
377
378 ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"));
379 if (ret < 0)
380 return ret;
381
382 ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"));
383 if (ret < 0)
384 return ret;
385
386 ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"));
387 if (ret < 0)
388 return ret;
389
390 ret = CHECK_CU(cu->cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)));
391 if (ret < 0)
392 return ret;
393
394 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
395
396 s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx;
397
398 ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->hw_frames_ctx);
399 if (!ctx->outputs[0]->hw_frames_ctx)
400 return AVERROR(ENOMEM);
401
402 s->tb = inlink->time_base;
403
404 if (!format_is_supported(hw_frames_ctx->sw_format)) {
405 av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", av_get_pix_fmt_name(hw_frames_ctx->sw_format));
406 return AVERROR(ENOSYS);
407 }
408
409 return 0;
410 }
411
query_formats(AVFilterContext * ctx)412 static int query_formats(AVFilterContext *ctx)
413 {
414 static const enum AVPixelFormat pix_fmts[] = {
415 AV_PIX_FMT_CUDA,
416 AV_PIX_FMT_NONE
417 };
418 AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
419 if (!fmts_list)
420 return AVERROR(ENOMEM);
421 return ff_set_common_formats(ctx, fmts_list);
422 }
423
424 static const AVFilterPad thumbnail_cuda_inputs[] = {
425 {
426 .name = "default",
427 .type = AVMEDIA_TYPE_VIDEO,
428 .config_props = config_props,
429 .filter_frame = filter_frame,
430 },
431 { NULL }
432 };
433
434 static const AVFilterPad thumbnail_cuda_outputs[] = {
435 {
436 .name = "default",
437 .type = AVMEDIA_TYPE_VIDEO,
438 .request_frame = request_frame,
439 },
440 { NULL }
441 };
442
443 AVFilter ff_vf_thumbnail_cuda = {
444 .name = "thumbnail_cuda",
445 .description = NULL_IF_CONFIG_SMALL("Select the most representative frame in a given sequence of consecutive frames."),
446 .priv_size = sizeof(ThumbnailCudaContext),
447 .init = init,
448 .uninit = uninit,
449 .query_formats = query_formats,
450 .inputs = thumbnail_cuda_inputs,
451 .outputs = thumbnail_cuda_outputs,
452 .priv_class = &thumbnail_cuda_class,
453 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
454 };
455