1 /*
2 * HW decode acceleration through NVDEC
3 *
4 * Copyright (c) 2016 Anton Khirnov
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24 #include "config_components.h"
25
26 #include "libavutil/common.h"
27 #include "libavutil/error.h"
28 #include "libavutil/hwcontext.h"
29 #include "libavutil/hwcontext_cuda_internal.h"
30 #include "libavutil/cuda_check.h"
31 #include "libavutil/pixdesc.h"
32 #include "libavutil/pixfmt.h"
33
34 #include "avcodec.h"
35 #include "decode.h"
36 #include "nvdec.h"
37 #include "internal.h"
38
39 #if !NVDECAPI_CHECK_VERSION(9, 0)
40 #define cudaVideoSurfaceFormat_YUV444 2
41 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
42 #endif
43
44 typedef struct NVDECDecoder {
45 CUvideodecoder decoder;
46
47 AVBufferRef *hw_device_ref;
48 AVBufferRef *real_hw_frames_ref;
49 CUcontext cuda_ctx;
50 CUstream stream;
51
52 CudaFunctions *cudl;
53 CuvidFunctions *cvdl;
54 } NVDECDecoder;
55
56 typedef struct NVDECFramePool {
57 unsigned int dpb_size;
58 unsigned int nb_allocated;
59 } NVDECFramePool;
60
61 #define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
62
map_avcodec_id(enum AVCodecID id)63 static int map_avcodec_id(enum AVCodecID id)
64 {
65 switch (id) {
66 #if CONFIG_AV1_NVDEC_HWACCEL
67 case AV_CODEC_ID_AV1: return cudaVideoCodec_AV1;
68 #endif
69 case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
70 case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
71 case AV_CODEC_ID_MJPEG: return cudaVideoCodec_JPEG;
72 case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1;
73 case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
74 case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4;
75 case AV_CODEC_ID_VC1: return cudaVideoCodec_VC1;
76 case AV_CODEC_ID_VP8: return cudaVideoCodec_VP8;
77 case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9;
78 case AV_CODEC_ID_WMV3: return cudaVideoCodec_VC1;
79 }
80 return -1;
81 }
82
map_chroma_format(enum AVPixelFormat pix_fmt)83 static int map_chroma_format(enum AVPixelFormat pix_fmt)
84 {
85 int shift_h = 0, shift_v = 0;
86
87 if (av_pix_fmt_count_planes(pix_fmt) == 1)
88 return cudaVideoChromaFormat_Monochrome;
89
90 av_pix_fmt_get_chroma_sub_sample(pix_fmt, &shift_h, &shift_v);
91
92 if (shift_h == 1 && shift_v == 1)
93 return cudaVideoChromaFormat_420;
94 else if (shift_h == 1 && shift_v == 0)
95 return cudaVideoChromaFormat_422;
96 else if (shift_h == 0 && shift_v == 0)
97 return cudaVideoChromaFormat_444;
98
99 return -1;
100 }
101
nvdec_test_capabilities(NVDECDecoder * decoder,CUVIDDECODECREATEINFO * params,void * logctx)102 static int nvdec_test_capabilities(NVDECDecoder *decoder,
103 CUVIDDECODECREATEINFO *params, void *logctx)
104 {
105 int ret;
106 CUVIDDECODECAPS caps = { 0 };
107
108 caps.eCodecType = params->CodecType;
109 caps.eChromaFormat = params->ChromaFormat;
110 caps.nBitDepthMinus8 = params->bitDepthMinus8;
111
112 if (!decoder->cvdl->cuvidGetDecoderCaps) {
113 av_log(logctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
114 av_log(logctx, AV_LOG_WARNING, "The minimum required version is "
115 #if defined(_WIN32) || defined(__CYGWIN__)
116 "378.66"
117 #else
118 "378.13"
119 #endif
120 ". Continuing blind.\n");
121 return 0;
122 }
123
124 ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps));
125 if (ret < 0)
126 return ret;
127
128 av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
129 av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
130 caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
131 av_log(logctx, AV_LOG_VERBOSE, "min_width: %d, max_width: %d\n",
132 caps.nMinWidth, caps.nMaxWidth);
133 av_log(logctx, AV_LOG_VERBOSE, "min_height: %d, max_height: %d\n",
134 caps.nMinHeight, caps.nMaxHeight);
135
136 if (!caps.bIsSupported) {
137 av_log(logctx, AV_LOG_ERROR, "Hardware is lacking required capabilities\n");
138 return AVERROR(EINVAL);
139 }
140
141 if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
142 av_log(logctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
143 (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
144 return AVERROR(EINVAL);
145 }
146
147 if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
148 av_log(logctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
149 (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
150 return AVERROR(EINVAL);
151 }
152
153 if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
154 av_log(logctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
155 (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
156 return AVERROR(EINVAL);
157 }
158
159 return 0;
160 }
161
nvdec_decoder_free(void * opaque,uint8_t * data)162 static void nvdec_decoder_free(void *opaque, uint8_t *data)
163 {
164 NVDECDecoder *decoder = (NVDECDecoder*)data;
165
166 if (decoder->decoder) {
167 void *logctx = decoder->hw_device_ref->data;
168 CUcontext dummy;
169 CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
170 CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder));
171 CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
172 }
173
174 av_buffer_unref(&decoder->real_hw_frames_ref);
175 av_buffer_unref(&decoder->hw_device_ref);
176
177 cuvid_free_functions(&decoder->cvdl);
178
179 av_freep(&decoder);
180 }
181
nvdec_decoder_create(AVBufferRef ** out,AVBufferRef * hw_device_ref,CUVIDDECODECREATEINFO * params,void * logctx)182 static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
183 CUVIDDECODECREATEINFO *params, void *logctx)
184 {
185 AVHWDeviceContext *hw_device_ctx = (AVHWDeviceContext*)hw_device_ref->data;
186 AVCUDADeviceContext *device_hwctx = hw_device_ctx->hwctx;
187
188 AVBufferRef *decoder_ref;
189 NVDECDecoder *decoder;
190
191 CUcontext dummy;
192 int ret;
193
194 decoder = av_mallocz(sizeof(*decoder));
195 if (!decoder)
196 return AVERROR(ENOMEM);
197
198 decoder_ref = av_buffer_create((uint8_t*)decoder, sizeof(*decoder),
199 nvdec_decoder_free, NULL, AV_BUFFER_FLAG_READONLY);
200 if (!decoder_ref) {
201 av_freep(&decoder);
202 return AVERROR(ENOMEM);
203 }
204
205 decoder->hw_device_ref = av_buffer_ref(hw_device_ref);
206 if (!decoder->hw_device_ref) {
207 ret = AVERROR(ENOMEM);
208 goto fail;
209 }
210 decoder->cuda_ctx = device_hwctx->cuda_ctx;
211 decoder->cudl = device_hwctx->internal->cuda_dl;
212 decoder->stream = device_hwctx->stream;
213
214 ret = cuvid_load_functions(&decoder->cvdl, logctx);
215 if (ret < 0) {
216 av_log(logctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
217 goto fail;
218 }
219
220 ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
221 if (ret < 0)
222 goto fail;
223
224 ret = nvdec_test_capabilities(decoder, params, logctx);
225 if (ret < 0) {
226 CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
227 goto fail;
228 }
229
230 ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params));
231
232 CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
233
234 if (ret < 0) {
235 goto fail;
236 }
237
238 *out = decoder_ref;
239
240 return 0;
241 fail:
242 av_buffer_unref(&decoder_ref);
243 return ret;
244 }
245
nvdec_decoder_frame_alloc(void * opaque,size_t size)246 static AVBufferRef *nvdec_decoder_frame_alloc(void *opaque, size_t size)
247 {
248 NVDECFramePool *pool = opaque;
249 AVBufferRef *ret;
250
251 if (pool->nb_allocated >= pool->dpb_size)
252 return NULL;
253
254 ret = av_buffer_alloc(sizeof(unsigned int));
255 if (!ret)
256 return NULL;
257
258 *(unsigned int*)ret->data = pool->nb_allocated++;
259
260 return ret;
261 }
262
ff_nvdec_decode_uninit(AVCodecContext * avctx)263 int ff_nvdec_decode_uninit(AVCodecContext *avctx)
264 {
265 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
266
267 av_freep(&ctx->bitstream);
268 av_freep(&ctx->bitstream_internal);
269 ctx->bitstream_len = 0;
270 ctx->bitstream_allocated = 0;
271
272 av_freep(&ctx->slice_offsets);
273 ctx->nb_slices = 0;
274 ctx->slice_offsets_allocated = 0;
275
276 av_buffer_unref(&ctx->decoder_ref);
277 av_buffer_pool_uninit(&ctx->decoder_pool);
278
279 return 0;
280 }
281
nvdec_free_dummy(struct AVHWFramesContext * ctx)282 static void nvdec_free_dummy(struct AVHWFramesContext *ctx)
283 {
284 av_buffer_pool_uninit(&ctx->pool);
285 }
286
nvdec_alloc_dummy(size_t size)287 static AVBufferRef *nvdec_alloc_dummy(size_t size)
288 {
289 return av_buffer_create(NULL, 0, NULL, NULL, 0);
290 }
291
nvdec_init_hwframes(AVCodecContext * avctx,AVBufferRef ** out_frames_ref,int dummy)292 static int nvdec_init_hwframes(AVCodecContext *avctx, AVBufferRef **out_frames_ref, int dummy)
293 {
294 AVHWFramesContext *frames_ctx;
295 int ret;
296
297 ret = avcodec_get_hw_frames_parameters(avctx,
298 avctx->hw_device_ctx,
299 avctx->hwaccel->pix_fmt,
300 out_frames_ref);
301 if (ret < 0)
302 return ret;
303
304 frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data;
305
306 if (dummy) {
307 // Copied from ff_decode_get_hw_frames_ctx for compatibility
308 frames_ctx->initial_pool_size += 3;
309
310 frames_ctx->free = nvdec_free_dummy;
311 frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
312
313 if (!frames_ctx->pool) {
314 av_buffer_unref(out_frames_ref);
315 return AVERROR(ENOMEM);
316 }
317 } else {
318 // This is normally not used to actually allocate frames from
319 frames_ctx->initial_pool_size = 0;
320 }
321
322 ret = av_hwframe_ctx_init(*out_frames_ref);
323 if (ret < 0) {
324 av_buffer_unref(out_frames_ref);
325 return ret;
326 }
327
328 return 0;
329 }
330
ff_nvdec_decode_init(AVCodecContext * avctx)331 int ff_nvdec_decode_init(AVCodecContext *avctx)
332 {
333 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
334
335 NVDECDecoder *decoder;
336 AVBufferRef *real_hw_frames_ref;
337 NVDECFramePool *pool;
338 AVHWFramesContext *frames_ctx;
339 const AVPixFmtDescriptor *sw_desc;
340
341 CUVIDDECODECREATEINFO params = { 0 };
342
343 cudaVideoSurfaceFormat output_format;
344 int cuvid_codec_type, cuvid_chroma_format, chroma_444;
345 int ret = 0;
346
347 sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
348 if (!sw_desc)
349 return AVERROR_BUG;
350
351 cuvid_codec_type = map_avcodec_id(avctx->codec_id);
352 if (cuvid_codec_type < 0) {
353 av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
354 return AVERROR_BUG;
355 }
356
357 cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
358 if (cuvid_chroma_format < 0) {
359 av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
360 return AVERROR(ENOSYS);
361 }
362 chroma_444 = ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
363
364 if (!avctx->hw_frames_ctx) {
365 ret = nvdec_init_hwframes(avctx, &avctx->hw_frames_ctx, 1);
366 if (ret < 0)
367 return ret;
368
369 ret = nvdec_init_hwframes(avctx, &real_hw_frames_ref, 0);
370 if (ret < 0)
371 return ret;
372 } else {
373 real_hw_frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
374 if (!real_hw_frames_ref)
375 return AVERROR(ENOMEM);
376 }
377
378 switch (sw_desc->comp[0].depth) {
379 case 8:
380 output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
381 cudaVideoSurfaceFormat_NV12;
382 break;
383 case 10:
384 case 12:
385 output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
386 cudaVideoSurfaceFormat_P016;
387 break;
388 default:
389 av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
390 av_buffer_unref(&real_hw_frames_ref);
391 return AVERROR(ENOSYS);
392 }
393
394 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
395
396 params.ulWidth = avctx->coded_width;
397 params.ulHeight = avctx->coded_height;
398 params.ulTargetWidth = avctx->coded_width;
399 params.ulTargetHeight = avctx->coded_height;
400 params.bitDepthMinus8 = sw_desc->comp[0].depth - 8;
401 params.OutputFormat = output_format;
402 params.CodecType = cuvid_codec_type;
403 params.ChromaFormat = cuvid_chroma_format;
404 params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
405 params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
406
407 ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, ¶ms, avctx);
408 if (ret < 0) {
409 if (params.ulNumDecodeSurfaces > 32) {
410 av_log(avctx, AV_LOG_WARNING, "Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
411 (int)params.ulNumDecodeSurfaces);
412 av_log(avctx, AV_LOG_WARNING, "Try lowering the amount of threads. Using %d right now.\n",
413 avctx->thread_count);
414 }
415 av_buffer_unref(&real_hw_frames_ref);
416 return ret;
417 }
418
419 decoder = (NVDECDecoder*)ctx->decoder_ref->data;
420 decoder->real_hw_frames_ref = real_hw_frames_ref;
421 real_hw_frames_ref = NULL;
422
423 pool = av_mallocz(sizeof(*pool));
424 if (!pool) {
425 ret = AVERROR(ENOMEM);
426 goto fail;
427 }
428 pool->dpb_size = frames_ctx->initial_pool_size;
429
430 ctx->decoder_pool = av_buffer_pool_init2(sizeof(int), pool,
431 nvdec_decoder_frame_alloc, av_free);
432 if (!ctx->decoder_pool) {
433 ret = AVERROR(ENOMEM);
434 goto fail;
435 }
436
437 return 0;
438 fail:
439 ff_nvdec_decode_uninit(avctx);
440 return ret;
441 }
442
nvdec_fdd_priv_free(void * priv)443 static void nvdec_fdd_priv_free(void *priv)
444 {
445 NVDECFrame *cf = priv;
446
447 if (!cf)
448 return;
449
450 av_buffer_unref(&cf->idx_ref);
451 av_buffer_unref(&cf->decoder_ref);
452 av_buffer_unref(&cf->ref_idx_ref);
453
454 av_freep(&priv);
455 }
456
nvdec_unmap_mapped_frame(void * opaque,uint8_t * data)457 static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
458 {
459 NVDECFrame *unmap_data = (NVDECFrame*)data;
460 NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
461 void *logctx = decoder->hw_device_ref->data;
462 CUdeviceptr devptr = (CUdeviceptr)opaque;
463 int ret;
464 CUcontext dummy;
465
466 ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
467 if (ret < 0)
468 goto finish;
469
470 CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
471
472 CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
473
474 finish:
475 av_buffer_unref(&unmap_data->idx_ref);
476 av_buffer_unref(&unmap_data->decoder_ref);
477 av_buffer_unref(&unmap_data->ref_idx_ref);
478 av_free(unmap_data);
479 }
480
nvdec_retrieve_data(void * logctx,AVFrame * frame)481 static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
482 {
483 FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
484 NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
485 NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
486
487 AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
488
489 CUVIDPROCPARAMS vpp = { 0 };
490 NVDECFrame *unmap_data = NULL;
491
492 CUcontext dummy;
493 CUdeviceptr devptr;
494
495 unsigned int pitch, i;
496 unsigned int offset = 0;
497 int shift_h = 0, shift_v = 0;
498 int ret = 0;
499
500 vpp.progressive_frame = 1;
501 vpp.output_stream = decoder->stream;
502
503 ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
504 if (ret < 0)
505 return ret;
506
507 ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder,
508 cf->idx, &devptr,
509 &pitch, &vpp));
510 if (ret < 0)
511 goto finish;
512
513 unmap_data = av_mallocz(sizeof(*unmap_data));
514 if (!unmap_data) {
515 ret = AVERROR(ENOMEM);
516 goto copy_fail;
517 }
518
519 frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
520 nvdec_unmap_mapped_frame, (void*)devptr,
521 AV_BUFFER_FLAG_READONLY);
522 if (!frame->buf[1]) {
523 ret = AVERROR(ENOMEM);
524 goto copy_fail;
525 }
526
527 av_buffer_unref(&frame->hw_frames_ctx);
528 frame->hw_frames_ctx = av_buffer_ref(decoder->real_hw_frames_ref);
529 if (!frame->hw_frames_ctx) {
530 ret = AVERROR(ENOMEM);
531 goto copy_fail;
532 }
533
534 unmap_data->idx = cf->idx;
535 unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
536 unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
537
538 av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
539 for (i = 0; frame->linesize[i]; i++) {
540 frame->data[i] = (uint8_t*)(devptr + offset);
541 frame->linesize[i] = pitch;
542 offset += pitch * (frame->height >> (i ? shift_v : 0));
543 }
544
545 goto finish;
546
547 copy_fail:
548 if (!frame->buf[1]) {
549 CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
550 av_freep(&unmap_data);
551 } else {
552 av_buffer_unref(&frame->buf[1]);
553 }
554
555 finish:
556 CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
557 return ret;
558 }
559
ff_nvdec_start_frame(AVCodecContext * avctx,AVFrame * frame)560 int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame)
561 {
562 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
563 FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
564 NVDECFrame *cf = NULL;
565 int ret;
566
567 ctx->bitstream_len = 0;
568 ctx->nb_slices = 0;
569
570 if (fdd->hwaccel_priv)
571 return 0;
572
573 cf = av_mallocz(sizeof(*cf));
574 if (!cf)
575 return AVERROR(ENOMEM);
576
577 cf->decoder_ref = av_buffer_ref(ctx->decoder_ref);
578 if (!cf->decoder_ref) {
579 ret = AVERROR(ENOMEM);
580 goto fail;
581 }
582
583 cf->idx_ref = av_buffer_pool_get(ctx->decoder_pool);
584 if (!cf->idx_ref) {
585 av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
586 ret = AVERROR(ENOMEM);
587 goto fail;
588 }
589 cf->ref_idx = cf->idx = *(unsigned int*)cf->idx_ref->data;
590
591 fdd->hwaccel_priv = cf;
592 fdd->hwaccel_priv_free = nvdec_fdd_priv_free;
593 fdd->post_process = nvdec_retrieve_data;
594
595 return 0;
596 fail:
597 nvdec_fdd_priv_free(cf);
598 return ret;
599
600 }
601
ff_nvdec_start_frame_sep_ref(AVCodecContext * avctx,AVFrame * frame,int has_sep_ref)602 int ff_nvdec_start_frame_sep_ref(AVCodecContext *avctx, AVFrame *frame, int has_sep_ref)
603 {
604 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
605 FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
606 NVDECFrame *cf;
607 int ret;
608
609 ret = ff_nvdec_start_frame(avctx, frame);
610 if (ret < 0)
611 return ret;
612
613 cf = fdd->hwaccel_priv;
614
615 if (has_sep_ref) {
616 if (!cf->ref_idx_ref) {
617 cf->ref_idx_ref = av_buffer_pool_get(ctx->decoder_pool);
618 if (!cf->ref_idx_ref) {
619 av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
620 ret = AVERROR(ENOMEM);
621 goto fail;
622 }
623 }
624 cf->ref_idx = *(unsigned int*)cf->ref_idx_ref->data;
625 } else {
626 av_buffer_unref(&cf->ref_idx_ref);
627 cf->ref_idx = cf->idx;
628 }
629
630 return 0;
631 fail:
632 nvdec_fdd_priv_free(cf);
633 return ret;
634 }
635
ff_nvdec_end_frame(AVCodecContext * avctx)636 int ff_nvdec_end_frame(AVCodecContext *avctx)
637 {
638 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
639 NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
640 void *logctx = avctx;
641 CUVIDPICPARAMS *pp = &ctx->pic_params;
642
643 CUcontext dummy;
644
645 int ret = 0;
646
647 pp->nBitstreamDataLen = ctx->bitstream_len;
648 pp->pBitstreamData = ctx->bitstream;
649 pp->nNumSlices = ctx->nb_slices;
650 pp->pSliceDataOffsets = ctx->slice_offsets;
651
652 ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
653 if (ret < 0)
654 return ret;
655
656 ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params));
657 if (ret < 0)
658 goto finish;
659
660 finish:
661 CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
662
663 return ret;
664 }
665
ff_nvdec_simple_end_frame(AVCodecContext * avctx)666 int ff_nvdec_simple_end_frame(AVCodecContext *avctx)
667 {
668 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
669 int ret = ff_nvdec_end_frame(avctx);
670 ctx->bitstream = NULL;
671 return ret;
672 }
673
ff_nvdec_simple_decode_slice(AVCodecContext * avctx,const uint8_t * buffer,uint32_t size)674 int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
675 uint32_t size)
676 {
677 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
678 void *tmp;
679
680 tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
681 (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
682 if (!tmp)
683 return AVERROR(ENOMEM);
684 ctx->slice_offsets = tmp;
685
686 if (!ctx->bitstream)
687 ctx->bitstream = (uint8_t*)buffer;
688
689 ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
690 ctx->bitstream_len += size;
691 ctx->nb_slices++;
692
693 return 0;
694 }
695
ff_nvdec_frame_params(AVCodecContext * avctx,AVBufferRef * hw_frames_ctx,int dpb_size,int supports_444)696 int ff_nvdec_frame_params(AVCodecContext *avctx,
697 AVBufferRef *hw_frames_ctx,
698 int dpb_size,
699 int supports_444)
700 {
701 AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
702 const AVPixFmtDescriptor *sw_desc;
703 int cuvid_codec_type, cuvid_chroma_format, chroma_444;
704
705 sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
706 if (!sw_desc)
707 return AVERROR_BUG;
708
709 cuvid_codec_type = map_avcodec_id(avctx->codec_id);
710 if (cuvid_codec_type < 0) {
711 av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
712 return AVERROR_BUG;
713 }
714
715 cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
716 if (cuvid_chroma_format < 0) {
717 av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
718 return AVERROR(EINVAL);
719 }
720 chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
721
722 frames_ctx->format = AV_PIX_FMT_CUDA;
723 frames_ctx->width = (avctx->coded_width + 1) & ~1;
724 frames_ctx->height = (avctx->coded_height + 1) & ~1;
725 /*
726 * We add two extra frames to the pool to account for deinterlacing filters
727 * holding onto their frames.
728 */
729 frames_ctx->initial_pool_size = dpb_size + 2;
730
731 switch (sw_desc->comp[0].depth) {
732 case 8:
733 frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
734 break;
735 case 10:
736 frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
737 break;
738 case 12:
739 frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
740 break;
741 default:
742 return AVERROR(EINVAL);
743 }
744
745 return 0;
746 }
747
ff_nvdec_get_ref_idx(AVFrame * frame)748 int ff_nvdec_get_ref_idx(AVFrame *frame)
749 {
750 FrameDecodeData *fdd;
751 NVDECFrame *cf;
752
753 if (!frame || !frame->private_ref)
754 return -1;
755
756 fdd = (FrameDecodeData*)frame->private_ref->data;
757 cf = (NVDECFrame*)fdd->hwaccel_priv;
758 if (!cf)
759 return -1;
760
761 return cf->ref_idx;
762 }
763