• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * HW decode acceleration through NVDEC
3  *
4  * Copyright (c) 2016 Anton Khirnov
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 #include "config_components.h"
25 
26 #include "libavutil/common.h"
27 #include "libavutil/error.h"
28 #include "libavutil/hwcontext.h"
29 #include "libavutil/hwcontext_cuda_internal.h"
30 #include "libavutil/cuda_check.h"
31 #include "libavutil/pixdesc.h"
32 #include "libavutil/pixfmt.h"
33 
34 #include "avcodec.h"
35 #include "decode.h"
36 #include "nvdec.h"
37 #include "internal.h"
38 
39 #if !NVDECAPI_CHECK_VERSION(9, 0)
40 #define cudaVideoSurfaceFormat_YUV444 2
41 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
42 #endif
43 
44 typedef struct NVDECDecoder {
45     CUvideodecoder decoder;
46 
47     AVBufferRef *hw_device_ref;
48     AVBufferRef *real_hw_frames_ref;
49     CUcontext    cuda_ctx;
50     CUstream     stream;
51 
52     CudaFunctions *cudl;
53     CuvidFunctions *cvdl;
54 } NVDECDecoder;
55 
56 typedef struct NVDECFramePool {
57     unsigned int dpb_size;
58     unsigned int nb_allocated;
59 } NVDECFramePool;
60 
61 #define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
62 
map_avcodec_id(enum AVCodecID id)63 static int map_avcodec_id(enum AVCodecID id)
64 {
65     switch (id) {
66 #if CONFIG_AV1_NVDEC_HWACCEL
67     case AV_CODEC_ID_AV1:        return cudaVideoCodec_AV1;
68 #endif
69     case AV_CODEC_ID_H264:       return cudaVideoCodec_H264;
70     case AV_CODEC_ID_HEVC:       return cudaVideoCodec_HEVC;
71     case AV_CODEC_ID_MJPEG:      return cudaVideoCodec_JPEG;
72     case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1;
73     case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
74     case AV_CODEC_ID_MPEG4:      return cudaVideoCodec_MPEG4;
75     case AV_CODEC_ID_VC1:        return cudaVideoCodec_VC1;
76     case AV_CODEC_ID_VP8:        return cudaVideoCodec_VP8;
77     case AV_CODEC_ID_VP9:        return cudaVideoCodec_VP9;
78     case AV_CODEC_ID_WMV3:       return cudaVideoCodec_VC1;
79     }
80     return -1;
81 }
82 
map_chroma_format(enum AVPixelFormat pix_fmt)83 static int map_chroma_format(enum AVPixelFormat pix_fmt)
84 {
85     int shift_h = 0, shift_v = 0;
86 
87     if (av_pix_fmt_count_planes(pix_fmt) == 1)
88         return cudaVideoChromaFormat_Monochrome;
89 
90     av_pix_fmt_get_chroma_sub_sample(pix_fmt, &shift_h, &shift_v);
91 
92     if (shift_h == 1 && shift_v == 1)
93         return cudaVideoChromaFormat_420;
94     else if (shift_h == 1 && shift_v == 0)
95         return cudaVideoChromaFormat_422;
96     else if (shift_h == 0 && shift_v == 0)
97         return cudaVideoChromaFormat_444;
98 
99     return -1;
100 }
101 
nvdec_test_capabilities(NVDECDecoder * decoder,CUVIDDECODECREATEINFO * params,void * logctx)102 static int nvdec_test_capabilities(NVDECDecoder *decoder,
103                                    CUVIDDECODECREATEINFO *params, void *logctx)
104 {
105     int ret;
106     CUVIDDECODECAPS caps = { 0 };
107 
108     caps.eCodecType      = params->CodecType;
109     caps.eChromaFormat   = params->ChromaFormat;
110     caps.nBitDepthMinus8 = params->bitDepthMinus8;
111 
112     if (!decoder->cvdl->cuvidGetDecoderCaps) {
113         av_log(logctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
114         av_log(logctx, AV_LOG_WARNING, "The minimum required version is "
115 #if defined(_WIN32) || defined(__CYGWIN__)
116             "378.66"
117 #else
118             "378.13"
119 #endif
120             ". Continuing blind.\n");
121         return 0;
122     }
123 
124     ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps));
125     if (ret < 0)
126         return ret;
127 
128     av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
129     av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
130            caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
131     av_log(logctx, AV_LOG_VERBOSE, "min_width: %d, max_width: %d\n",
132            caps.nMinWidth, caps.nMaxWidth);
133     av_log(logctx, AV_LOG_VERBOSE, "min_height: %d, max_height: %d\n",
134            caps.nMinHeight, caps.nMaxHeight);
135 
136     if (!caps.bIsSupported) {
137         av_log(logctx, AV_LOG_ERROR, "Hardware is lacking required capabilities\n");
138         return AVERROR(EINVAL);
139     }
140 
141     if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
142         av_log(logctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
143                (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
144         return AVERROR(EINVAL);
145     }
146 
147     if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
148         av_log(logctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
149                (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
150         return AVERROR(EINVAL);
151     }
152 
153     if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
154         av_log(logctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
155                (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
156         return AVERROR(EINVAL);
157     }
158 
159     return 0;
160 }
161 
nvdec_decoder_free(void * opaque,uint8_t * data)162 static void nvdec_decoder_free(void *opaque, uint8_t *data)
163 {
164     NVDECDecoder *decoder = (NVDECDecoder*)data;
165 
166     if (decoder->decoder) {
167         void *logctx = decoder->hw_device_ref->data;
168         CUcontext dummy;
169         CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
170         CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder));
171         CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
172     }
173 
174     av_buffer_unref(&decoder->real_hw_frames_ref);
175     av_buffer_unref(&decoder->hw_device_ref);
176 
177     cuvid_free_functions(&decoder->cvdl);
178 
179     av_freep(&decoder);
180 }
181 
nvdec_decoder_create(AVBufferRef ** out,AVBufferRef * hw_device_ref,CUVIDDECODECREATEINFO * params,void * logctx)182 static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
183                                 CUVIDDECODECREATEINFO *params, void *logctx)
184 {
185     AVHWDeviceContext  *hw_device_ctx = (AVHWDeviceContext*)hw_device_ref->data;
186     AVCUDADeviceContext *device_hwctx = hw_device_ctx->hwctx;
187 
188     AVBufferRef *decoder_ref;
189     NVDECDecoder *decoder;
190 
191     CUcontext dummy;
192     int ret;
193 
194     decoder = av_mallocz(sizeof(*decoder));
195     if (!decoder)
196         return AVERROR(ENOMEM);
197 
198     decoder_ref = av_buffer_create((uint8_t*)decoder, sizeof(*decoder),
199                                    nvdec_decoder_free, NULL, AV_BUFFER_FLAG_READONLY);
200     if (!decoder_ref) {
201         av_freep(&decoder);
202         return AVERROR(ENOMEM);
203     }
204 
205     decoder->hw_device_ref = av_buffer_ref(hw_device_ref);
206     if (!decoder->hw_device_ref) {
207         ret = AVERROR(ENOMEM);
208         goto fail;
209     }
210     decoder->cuda_ctx = device_hwctx->cuda_ctx;
211     decoder->cudl = device_hwctx->internal->cuda_dl;
212     decoder->stream = device_hwctx->stream;
213 
214     ret = cuvid_load_functions(&decoder->cvdl, logctx);
215     if (ret < 0) {
216         av_log(logctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
217         goto fail;
218     }
219 
220     ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
221     if (ret < 0)
222         goto fail;
223 
224     ret = nvdec_test_capabilities(decoder, params, logctx);
225     if (ret < 0) {
226         CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
227         goto fail;
228     }
229 
230     ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params));
231 
232     CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
233 
234     if (ret < 0) {
235         goto fail;
236     }
237 
238     *out = decoder_ref;
239 
240     return 0;
241 fail:
242     av_buffer_unref(&decoder_ref);
243     return ret;
244 }
245 
nvdec_decoder_frame_alloc(void * opaque,size_t size)246 static AVBufferRef *nvdec_decoder_frame_alloc(void *opaque, size_t size)
247 {
248     NVDECFramePool *pool = opaque;
249     AVBufferRef *ret;
250 
251     if (pool->nb_allocated >= pool->dpb_size)
252         return NULL;
253 
254     ret = av_buffer_alloc(sizeof(unsigned int));
255     if (!ret)
256         return NULL;
257 
258     *(unsigned int*)ret->data = pool->nb_allocated++;
259 
260     return ret;
261 }
262 
ff_nvdec_decode_uninit(AVCodecContext * avctx)263 int ff_nvdec_decode_uninit(AVCodecContext *avctx)
264 {
265     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
266 
267     av_freep(&ctx->bitstream);
268     av_freep(&ctx->bitstream_internal);
269     ctx->bitstream_len       = 0;
270     ctx->bitstream_allocated = 0;
271 
272     av_freep(&ctx->slice_offsets);
273     ctx->nb_slices               = 0;
274     ctx->slice_offsets_allocated = 0;
275 
276     av_buffer_unref(&ctx->decoder_ref);
277     av_buffer_pool_uninit(&ctx->decoder_pool);
278 
279     return 0;
280 }
281 
nvdec_free_dummy(struct AVHWFramesContext * ctx)282 static void nvdec_free_dummy(struct AVHWFramesContext *ctx)
283 {
284     av_buffer_pool_uninit(&ctx->pool);
285 }
286 
nvdec_alloc_dummy(size_t size)287 static AVBufferRef *nvdec_alloc_dummy(size_t size)
288 {
289     return av_buffer_create(NULL, 0, NULL, NULL, 0);
290 }
291 
nvdec_init_hwframes(AVCodecContext * avctx,AVBufferRef ** out_frames_ref,int dummy)292 static int nvdec_init_hwframes(AVCodecContext *avctx, AVBufferRef **out_frames_ref, int dummy)
293 {
294     AVHWFramesContext *frames_ctx;
295     int ret;
296 
297     ret = avcodec_get_hw_frames_parameters(avctx,
298                                            avctx->hw_device_ctx,
299                                            avctx->hwaccel->pix_fmt,
300                                            out_frames_ref);
301     if (ret < 0)
302         return ret;
303 
304     frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data;
305 
306     if (dummy) {
307         // Copied from ff_decode_get_hw_frames_ctx for compatibility
308         frames_ctx->initial_pool_size += 3;
309 
310         frames_ctx->free = nvdec_free_dummy;
311         frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
312 
313         if (!frames_ctx->pool) {
314             av_buffer_unref(out_frames_ref);
315             return AVERROR(ENOMEM);
316         }
317     } else {
318         // This is normally not used to actually allocate frames from
319         frames_ctx->initial_pool_size = 0;
320     }
321 
322     ret = av_hwframe_ctx_init(*out_frames_ref);
323     if (ret < 0) {
324         av_buffer_unref(out_frames_ref);
325         return ret;
326     }
327 
328     return 0;
329 }
330 
ff_nvdec_decode_init(AVCodecContext * avctx)331 int ff_nvdec_decode_init(AVCodecContext *avctx)
332 {
333     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
334 
335     NVDECDecoder        *decoder;
336     AVBufferRef         *real_hw_frames_ref;
337     NVDECFramePool      *pool;
338     AVHWFramesContext   *frames_ctx;
339     const AVPixFmtDescriptor *sw_desc;
340 
341     CUVIDDECODECREATEINFO params = { 0 };
342 
343     cudaVideoSurfaceFormat output_format;
344     int cuvid_codec_type, cuvid_chroma_format, chroma_444;
345     int ret = 0;
346 
347     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
348     if (!sw_desc)
349         return AVERROR_BUG;
350 
351     cuvid_codec_type = map_avcodec_id(avctx->codec_id);
352     if (cuvid_codec_type < 0) {
353         av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
354         return AVERROR_BUG;
355     }
356 
357     cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
358     if (cuvid_chroma_format < 0) {
359         av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
360         return AVERROR(ENOSYS);
361     }
362     chroma_444 = ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
363 
364     if (!avctx->hw_frames_ctx) {
365         ret = nvdec_init_hwframes(avctx, &avctx->hw_frames_ctx, 1);
366         if (ret < 0)
367             return ret;
368 
369         ret = nvdec_init_hwframes(avctx, &real_hw_frames_ref, 0);
370         if (ret < 0)
371             return ret;
372     } else {
373         real_hw_frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
374         if (!real_hw_frames_ref)
375             return AVERROR(ENOMEM);
376     }
377 
378     switch (sw_desc->comp[0].depth) {
379     case 8:
380         output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
381                                      cudaVideoSurfaceFormat_NV12;
382         break;
383     case 10:
384     case 12:
385         output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
386                                      cudaVideoSurfaceFormat_P016;
387         break;
388     default:
389         av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
390         av_buffer_unref(&real_hw_frames_ref);
391         return AVERROR(ENOSYS);
392     }
393 
394     frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
395 
396     params.ulWidth             = avctx->coded_width;
397     params.ulHeight            = avctx->coded_height;
398     params.ulTargetWidth       = avctx->coded_width;
399     params.ulTargetHeight      = avctx->coded_height;
400     params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
401     params.OutputFormat        = output_format;
402     params.CodecType           = cuvid_codec_type;
403     params.ChromaFormat        = cuvid_chroma_format;
404     params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
405     params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
406 
407     ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, &params, avctx);
408     if (ret < 0) {
409         if (params.ulNumDecodeSurfaces > 32) {
410             av_log(avctx, AV_LOG_WARNING, "Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
411                    (int)params.ulNumDecodeSurfaces);
412             av_log(avctx, AV_LOG_WARNING, "Try lowering the amount of threads. Using %d right now.\n",
413                    avctx->thread_count);
414         }
415         av_buffer_unref(&real_hw_frames_ref);
416         return ret;
417     }
418 
419     decoder = (NVDECDecoder*)ctx->decoder_ref->data;
420     decoder->real_hw_frames_ref = real_hw_frames_ref;
421     real_hw_frames_ref = NULL;
422 
423     pool = av_mallocz(sizeof(*pool));
424     if (!pool) {
425         ret = AVERROR(ENOMEM);
426         goto fail;
427     }
428     pool->dpb_size = frames_ctx->initial_pool_size;
429 
430     ctx->decoder_pool = av_buffer_pool_init2(sizeof(int), pool,
431                                              nvdec_decoder_frame_alloc, av_free);
432     if (!ctx->decoder_pool) {
433         ret = AVERROR(ENOMEM);
434         goto fail;
435     }
436 
437     return 0;
438 fail:
439     ff_nvdec_decode_uninit(avctx);
440     return ret;
441 }
442 
nvdec_fdd_priv_free(void * priv)443 static void nvdec_fdd_priv_free(void *priv)
444 {
445     NVDECFrame *cf = priv;
446 
447     if (!cf)
448         return;
449 
450     av_buffer_unref(&cf->idx_ref);
451     av_buffer_unref(&cf->decoder_ref);
452     av_buffer_unref(&cf->ref_idx_ref);
453 
454     av_freep(&priv);
455 }
456 
nvdec_unmap_mapped_frame(void * opaque,uint8_t * data)457 static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
458 {
459     NVDECFrame *unmap_data = (NVDECFrame*)data;
460     NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
461     void *logctx = decoder->hw_device_ref->data;
462     CUdeviceptr devptr = (CUdeviceptr)opaque;
463     int ret;
464     CUcontext dummy;
465 
466     ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
467     if (ret < 0)
468         goto finish;
469 
470     CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
471 
472     CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
473 
474 finish:
475     av_buffer_unref(&unmap_data->idx_ref);
476     av_buffer_unref(&unmap_data->decoder_ref);
477     av_buffer_unref(&unmap_data->ref_idx_ref);
478     av_free(unmap_data);
479 }
480 
nvdec_retrieve_data(void * logctx,AVFrame * frame)481 static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
482 {
483     FrameDecodeData  *fdd = (FrameDecodeData*)frame->private_ref->data;
484     NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
485     NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
486 
487     AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
488 
489     CUVIDPROCPARAMS vpp = { 0 };
490     NVDECFrame *unmap_data = NULL;
491 
492     CUcontext dummy;
493     CUdeviceptr devptr;
494 
495     unsigned int pitch, i;
496     unsigned int offset = 0;
497     int shift_h = 0, shift_v = 0;
498     int ret = 0;
499 
500     vpp.progressive_frame = 1;
501     vpp.output_stream = decoder->stream;
502 
503     ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
504     if (ret < 0)
505         return ret;
506 
507     ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder,
508                                                      cf->idx, &devptr,
509                                                      &pitch, &vpp));
510     if (ret < 0)
511         goto finish;
512 
513     unmap_data = av_mallocz(sizeof(*unmap_data));
514     if (!unmap_data) {
515         ret = AVERROR(ENOMEM);
516         goto copy_fail;
517     }
518 
519     frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
520                                      nvdec_unmap_mapped_frame, (void*)devptr,
521                                      AV_BUFFER_FLAG_READONLY);
522     if (!frame->buf[1]) {
523         ret = AVERROR(ENOMEM);
524         goto copy_fail;
525     }
526 
527     av_buffer_unref(&frame->hw_frames_ctx);
528     frame->hw_frames_ctx = av_buffer_ref(decoder->real_hw_frames_ref);
529     if (!frame->hw_frames_ctx) {
530         ret = AVERROR(ENOMEM);
531         goto copy_fail;
532     }
533 
534     unmap_data->idx = cf->idx;
535     unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
536     unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
537 
538     av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
539     for (i = 0; frame->linesize[i]; i++) {
540         frame->data[i] = (uint8_t*)(devptr + offset);
541         frame->linesize[i] = pitch;
542         offset += pitch * (frame->height >> (i ? shift_v : 0));
543     }
544 
545     goto finish;
546 
547 copy_fail:
548     if (!frame->buf[1]) {
549         CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
550         av_freep(&unmap_data);
551     } else {
552         av_buffer_unref(&frame->buf[1]);
553     }
554 
555 finish:
556     CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
557     return ret;
558 }
559 
ff_nvdec_start_frame(AVCodecContext * avctx,AVFrame * frame)560 int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame)
561 {
562     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
563     FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
564     NVDECFrame *cf = NULL;
565     int ret;
566 
567     ctx->bitstream_len = 0;
568     ctx->nb_slices     = 0;
569 
570     if (fdd->hwaccel_priv)
571         return 0;
572 
573     cf = av_mallocz(sizeof(*cf));
574     if (!cf)
575         return AVERROR(ENOMEM);
576 
577     cf->decoder_ref = av_buffer_ref(ctx->decoder_ref);
578     if (!cf->decoder_ref) {
579         ret = AVERROR(ENOMEM);
580         goto fail;
581     }
582 
583     cf->idx_ref = av_buffer_pool_get(ctx->decoder_pool);
584     if (!cf->idx_ref) {
585         av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
586         ret = AVERROR(ENOMEM);
587         goto fail;
588     }
589     cf->ref_idx = cf->idx = *(unsigned int*)cf->idx_ref->data;
590 
591     fdd->hwaccel_priv      = cf;
592     fdd->hwaccel_priv_free = nvdec_fdd_priv_free;
593     fdd->post_process      = nvdec_retrieve_data;
594 
595     return 0;
596 fail:
597     nvdec_fdd_priv_free(cf);
598     return ret;
599 
600 }
601 
ff_nvdec_start_frame_sep_ref(AVCodecContext * avctx,AVFrame * frame,int has_sep_ref)602 int ff_nvdec_start_frame_sep_ref(AVCodecContext *avctx, AVFrame *frame, int has_sep_ref)
603 {
604     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
605     FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
606     NVDECFrame *cf;
607     int ret;
608 
609     ret = ff_nvdec_start_frame(avctx, frame);
610     if (ret < 0)
611         return ret;
612 
613     cf = fdd->hwaccel_priv;
614 
615     if (has_sep_ref) {
616         if (!cf->ref_idx_ref) {
617             cf->ref_idx_ref = av_buffer_pool_get(ctx->decoder_pool);
618             if (!cf->ref_idx_ref) {
619                 av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
620                 ret = AVERROR(ENOMEM);
621                 goto fail;
622             }
623         }
624         cf->ref_idx = *(unsigned int*)cf->ref_idx_ref->data;
625     } else {
626         av_buffer_unref(&cf->ref_idx_ref);
627         cf->ref_idx = cf->idx;
628     }
629 
630     return 0;
631 fail:
632     nvdec_fdd_priv_free(cf);
633     return ret;
634 }
635 
ff_nvdec_end_frame(AVCodecContext * avctx)636 int ff_nvdec_end_frame(AVCodecContext *avctx)
637 {
638     NVDECContext     *ctx = avctx->internal->hwaccel_priv_data;
639     NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
640     void *logctx          = avctx;
641     CUVIDPICPARAMS    *pp = &ctx->pic_params;
642 
643     CUcontext dummy;
644 
645     int ret = 0;
646 
647     pp->nBitstreamDataLen = ctx->bitstream_len;
648     pp->pBitstreamData    = ctx->bitstream;
649     pp->nNumSlices        = ctx->nb_slices;
650     pp->pSliceDataOffsets = ctx->slice_offsets;
651 
652     ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
653     if (ret < 0)
654         return ret;
655 
656     ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params));
657     if (ret < 0)
658         goto finish;
659 
660 finish:
661     CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
662 
663     return ret;
664 }
665 
ff_nvdec_simple_end_frame(AVCodecContext * avctx)666 int ff_nvdec_simple_end_frame(AVCodecContext *avctx)
667 {
668     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
669     int ret = ff_nvdec_end_frame(avctx);
670     ctx->bitstream = NULL;
671     return ret;
672 }
673 
ff_nvdec_simple_decode_slice(AVCodecContext * avctx,const uint8_t * buffer,uint32_t size)674 int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
675                                  uint32_t size)
676 {
677     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
678     void *tmp;
679 
680     tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
681                           (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
682     if (!tmp)
683         return AVERROR(ENOMEM);
684     ctx->slice_offsets = tmp;
685 
686     if (!ctx->bitstream)
687         ctx->bitstream = (uint8_t*)buffer;
688 
689     ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
690     ctx->bitstream_len += size;
691     ctx->nb_slices++;
692 
693     return 0;
694 }
695 
ff_nvdec_frame_params(AVCodecContext * avctx,AVBufferRef * hw_frames_ctx,int dpb_size,int supports_444)696 int ff_nvdec_frame_params(AVCodecContext *avctx,
697                           AVBufferRef *hw_frames_ctx,
698                           int dpb_size,
699                           int supports_444)
700 {
701     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
702     const AVPixFmtDescriptor *sw_desc;
703     int cuvid_codec_type, cuvid_chroma_format, chroma_444;
704 
705     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
706     if (!sw_desc)
707         return AVERROR_BUG;
708 
709     cuvid_codec_type = map_avcodec_id(avctx->codec_id);
710     if (cuvid_codec_type < 0) {
711         av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
712         return AVERROR_BUG;
713     }
714 
715     cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
716     if (cuvid_chroma_format < 0) {
717         av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
718         return AVERROR(EINVAL);
719     }
720     chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
721 
722     frames_ctx->format            = AV_PIX_FMT_CUDA;
723     frames_ctx->width             = (avctx->coded_width + 1) & ~1;
724     frames_ctx->height            = (avctx->coded_height + 1) & ~1;
725     /*
726      * We add two extra frames to the pool to account for deinterlacing filters
727      * holding onto their frames.
728      */
729     frames_ctx->initial_pool_size = dpb_size + 2;
730 
731     switch (sw_desc->comp[0].depth) {
732     case 8:
733         frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
734         break;
735     case 10:
736         frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
737         break;
738     case 12:
739         frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
740         break;
741     default:
742         return AVERROR(EINVAL);
743     }
744 
745     return 0;
746 }
747 
ff_nvdec_get_ref_idx(AVFrame * frame)748 int ff_nvdec_get_ref_idx(AVFrame *frame)
749 {
750     FrameDecodeData *fdd;
751     NVDECFrame *cf;
752 
753     if (!frame || !frame->private_ref)
754         return -1;
755 
756     fdd = (FrameDecodeData*)frame->private_ref->data;
757     cf  = (NVDECFrame*)fdd->hwaccel_priv;
758     if (!cf)
759         return -1;
760 
761     return cf->ref_idx;
762 }
763