• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * HW decode acceleration through NVDEC
3  *
4  * Copyright (c) 2016 Anton Khirnov
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 
25 #include "libavutil/common.h"
26 #include "libavutil/error.h"
27 #include "libavutil/hwcontext.h"
28 #include "libavutil/hwcontext_cuda_internal.h"
29 #include "libavutil/cuda_check.h"
30 #include "libavutil/pixdesc.h"
31 #include "libavutil/pixfmt.h"
32 
33 #include "avcodec.h"
34 #include "decode.h"
35 #include "nvdec.h"
36 #include "internal.h"
37 
38 #if !NVDECAPI_CHECK_VERSION(9, 0)
39 #define cudaVideoSurfaceFormat_YUV444 2
40 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
41 #endif
42 
43 typedef struct NVDECDecoder {
44     CUvideodecoder decoder;
45 
46     AVBufferRef *hw_device_ref;
47     AVBufferRef *real_hw_frames_ref;
48     CUcontext    cuda_ctx;
49     CUstream     stream;
50 
51     CudaFunctions *cudl;
52     CuvidFunctions *cvdl;
53 } NVDECDecoder;
54 
55 typedef struct NVDECFramePool {
56     unsigned int dpb_size;
57     unsigned int nb_allocated;
58 } NVDECFramePool;
59 
60 #define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
61 
map_avcodec_id(enum AVCodecID id)62 static int map_avcodec_id(enum AVCodecID id)
63 {
64     switch (id) {
65     case AV_CODEC_ID_H264:       return cudaVideoCodec_H264;
66     case AV_CODEC_ID_HEVC:       return cudaVideoCodec_HEVC;
67     case AV_CODEC_ID_MJPEG:      return cudaVideoCodec_JPEG;
68     case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1;
69     case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
70     case AV_CODEC_ID_MPEG4:      return cudaVideoCodec_MPEG4;
71     case AV_CODEC_ID_VC1:        return cudaVideoCodec_VC1;
72     case AV_CODEC_ID_VP8:        return cudaVideoCodec_VP8;
73     case AV_CODEC_ID_VP9:        return cudaVideoCodec_VP9;
74     case AV_CODEC_ID_WMV3:       return cudaVideoCodec_VC1;
75     }
76     return -1;
77 }
78 
map_chroma_format(enum AVPixelFormat pix_fmt)79 static int map_chroma_format(enum AVPixelFormat pix_fmt)
80 {
81     int shift_h = 0, shift_v = 0;
82 
83     av_pix_fmt_get_chroma_sub_sample(pix_fmt, &shift_h, &shift_v);
84 
85     if (shift_h == 1 && shift_v == 1)
86         return cudaVideoChromaFormat_420;
87     else if (shift_h == 1 && shift_v == 0)
88         return cudaVideoChromaFormat_422;
89     else if (shift_h == 0 && shift_v == 0)
90         return cudaVideoChromaFormat_444;
91 
92     return -1;
93 }
94 
nvdec_test_capabilities(NVDECDecoder * decoder,CUVIDDECODECREATEINFO * params,void * logctx)95 static int nvdec_test_capabilities(NVDECDecoder *decoder,
96                                    CUVIDDECODECREATEINFO *params, void *logctx)
97 {
98     int ret;
99     CUVIDDECODECAPS caps = { 0 };
100 
101     caps.eCodecType      = params->CodecType;
102     caps.eChromaFormat   = params->ChromaFormat;
103     caps.nBitDepthMinus8 = params->bitDepthMinus8;
104 
105     if (!decoder->cvdl->cuvidGetDecoderCaps) {
106         av_log(logctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
107         av_log(logctx, AV_LOG_WARNING, "The minimum required version is "
108 #if defined(_WIN32) || defined(__CYGWIN__)
109             "378.66"
110 #else
111             "378.13"
112 #endif
113             ". Continuing blind.\n");
114         return 0;
115     }
116 
117     ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps));
118     if (ret < 0)
119         return ret;
120 
121     av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
122     av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
123            caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
124     av_log(logctx, AV_LOG_VERBOSE, "min_width: %d, max_width: %d\n",
125            caps.nMinWidth, caps.nMaxWidth);
126     av_log(logctx, AV_LOG_VERBOSE, "min_height: %d, max_height: %d\n",
127            caps.nMinHeight, caps.nMaxHeight);
128 
129     if (!caps.bIsSupported) {
130         av_log(logctx, AV_LOG_ERROR, "Hardware is lacking required capabilities\n");
131         return AVERROR(EINVAL);
132     }
133 
134     if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
135         av_log(logctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
136                (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
137         return AVERROR(EINVAL);
138     }
139 
140     if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
141         av_log(logctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
142                (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
143         return AVERROR(EINVAL);
144     }
145 
146     if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
147         av_log(logctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
148                (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
149         return AVERROR(EINVAL);
150     }
151 
152     return 0;
153 }
154 
nvdec_decoder_free(void * opaque,uint8_t * data)155 static void nvdec_decoder_free(void *opaque, uint8_t *data)
156 {
157     NVDECDecoder *decoder = (NVDECDecoder*)data;
158 
159     if (decoder->decoder) {
160         void *logctx = decoder->hw_device_ref->data;
161         CUcontext dummy;
162         CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
163         CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder));
164         CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
165     }
166 
167     av_buffer_unref(&decoder->real_hw_frames_ref);
168     av_buffer_unref(&decoder->hw_device_ref);
169 
170     cuvid_free_functions(&decoder->cvdl);
171 
172     av_freep(&decoder);
173 }
174 
nvdec_decoder_create(AVBufferRef ** out,AVBufferRef * hw_device_ref,CUVIDDECODECREATEINFO * params,void * logctx)175 static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
176                                 CUVIDDECODECREATEINFO *params, void *logctx)
177 {
178     AVHWDeviceContext  *hw_device_ctx = (AVHWDeviceContext*)hw_device_ref->data;
179     AVCUDADeviceContext *device_hwctx = hw_device_ctx->hwctx;
180 
181     AVBufferRef *decoder_ref;
182     NVDECDecoder *decoder;
183 
184     CUcontext dummy;
185     int ret;
186 
187     decoder = av_mallocz(sizeof(*decoder));
188     if (!decoder)
189         return AVERROR(ENOMEM);
190 
191     decoder_ref = av_buffer_create((uint8_t*)decoder, sizeof(*decoder),
192                                    nvdec_decoder_free, NULL, AV_BUFFER_FLAG_READONLY);
193     if (!decoder_ref) {
194         av_freep(&decoder);
195         return AVERROR(ENOMEM);
196     }
197 
198     decoder->hw_device_ref = av_buffer_ref(hw_device_ref);
199     if (!decoder->hw_device_ref) {
200         ret = AVERROR(ENOMEM);
201         goto fail;
202     }
203     decoder->cuda_ctx = device_hwctx->cuda_ctx;
204     decoder->cudl = device_hwctx->internal->cuda_dl;
205     decoder->stream = device_hwctx->stream;
206 
207     ret = cuvid_load_functions(&decoder->cvdl, logctx);
208     if (ret < 0) {
209         av_log(logctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
210         goto fail;
211     }
212 
213     ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
214     if (ret < 0)
215         goto fail;
216 
217     ret = nvdec_test_capabilities(decoder, params, logctx);
218     if (ret < 0) {
219         CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
220         goto fail;
221     }
222 
223     ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params));
224 
225     CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
226 
227     if (ret < 0) {
228         goto fail;
229     }
230 
231     *out = decoder_ref;
232 
233     return 0;
234 fail:
235     av_buffer_unref(&decoder_ref);
236     return ret;
237 }
238 
nvdec_decoder_frame_alloc(void * opaque,int size)239 static AVBufferRef *nvdec_decoder_frame_alloc(void *opaque, int size)
240 {
241     NVDECFramePool *pool = opaque;
242     AVBufferRef *ret;
243 
244     if (pool->nb_allocated >= pool->dpb_size)
245         return NULL;
246 
247     ret = av_buffer_alloc(sizeof(unsigned int));
248     if (!ret)
249         return NULL;
250 
251     *(unsigned int*)ret->data = pool->nb_allocated++;
252 
253     return ret;
254 }
255 
ff_nvdec_decode_uninit(AVCodecContext * avctx)256 int ff_nvdec_decode_uninit(AVCodecContext *avctx)
257 {
258     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
259 
260     av_freep(&ctx->bitstream);
261     ctx->bitstream_len       = 0;
262     ctx->bitstream_allocated = 0;
263 
264     av_freep(&ctx->slice_offsets);
265     ctx->nb_slices               = 0;
266     ctx->slice_offsets_allocated = 0;
267 
268     av_buffer_unref(&ctx->decoder_ref);
269     av_buffer_pool_uninit(&ctx->decoder_pool);
270 
271     return 0;
272 }
273 
nvdec_free_dummy(struct AVHWFramesContext * ctx)274 static void nvdec_free_dummy(struct AVHWFramesContext *ctx)
275 {
276     av_buffer_pool_uninit(&ctx->pool);
277 }
278 
nvdec_alloc_dummy(int size)279 static AVBufferRef *nvdec_alloc_dummy(int size)
280 {
281     return av_buffer_create(NULL, 0, NULL, NULL, 0);
282 }
283 
nvdec_init_hwframes(AVCodecContext * avctx,AVBufferRef ** out_frames_ref,int dummy)284 static int nvdec_init_hwframes(AVCodecContext *avctx, AVBufferRef **out_frames_ref, int dummy)
285 {
286     AVHWFramesContext *frames_ctx;
287     int ret;
288 
289     ret = avcodec_get_hw_frames_parameters(avctx,
290                                            avctx->hw_device_ctx,
291                                            avctx->hwaccel->pix_fmt,
292                                            out_frames_ref);
293     if (ret < 0)
294         return ret;
295 
296     frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data;
297 
298     if (dummy) {
299         // Copied from ff_decode_get_hw_frames_ctx for compatibility
300         frames_ctx->initial_pool_size += 3;
301 
302         frames_ctx->free = nvdec_free_dummy;
303         frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
304 
305         if (!frames_ctx->pool) {
306             av_buffer_unref(out_frames_ref);
307             return AVERROR(ENOMEM);
308         }
309     } else {
310         // This is normally not used to actually allocate frames from
311         frames_ctx->initial_pool_size = 0;
312     }
313 
314     ret = av_hwframe_ctx_init(*out_frames_ref);
315     if (ret < 0) {
316         av_buffer_unref(out_frames_ref);
317         return ret;
318     }
319 
320     return 0;
321 }
322 
ff_nvdec_decode_init(AVCodecContext * avctx)323 int ff_nvdec_decode_init(AVCodecContext *avctx)
324 {
325     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
326 
327     NVDECDecoder        *decoder;
328     AVBufferRef         *real_hw_frames_ref;
329     NVDECFramePool      *pool;
330     AVHWFramesContext   *frames_ctx;
331     const AVPixFmtDescriptor *sw_desc;
332 
333     CUVIDDECODECREATEINFO params = { 0 };
334 
335     cudaVideoSurfaceFormat output_format;
336     int cuvid_codec_type, cuvid_chroma_format, chroma_444;
337     int ret = 0;
338 
339     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
340     if (!sw_desc)
341         return AVERROR_BUG;
342 
343     cuvid_codec_type = map_avcodec_id(avctx->codec_id);
344     if (cuvid_codec_type < 0) {
345         av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
346         return AVERROR_BUG;
347     }
348 
349     cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
350     if (cuvid_chroma_format < 0) {
351         av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
352         return AVERROR(ENOSYS);
353     }
354     chroma_444 = ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
355 
356     if (!avctx->hw_frames_ctx) {
357         ret = nvdec_init_hwframes(avctx, &avctx->hw_frames_ctx, 1);
358         if (ret < 0)
359             return ret;
360 
361         ret = nvdec_init_hwframes(avctx, &real_hw_frames_ref, 0);
362         if (ret < 0)
363             return ret;
364     } else {
365         real_hw_frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
366         if (!real_hw_frames_ref)
367             return AVERROR(ENOMEM);
368     }
369 
370     switch (sw_desc->comp[0].depth) {
371     case 8:
372         output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
373                                      cudaVideoSurfaceFormat_NV12;
374         break;
375     case 10:
376     case 12:
377         output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
378                                      cudaVideoSurfaceFormat_P016;
379         break;
380     default:
381         av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
382         av_buffer_unref(&real_hw_frames_ref);
383         return AVERROR(ENOSYS);
384     }
385 
386     frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
387 
388     params.ulWidth             = avctx->coded_width;
389     params.ulHeight            = avctx->coded_height;
390     params.ulTargetWidth       = avctx->coded_width;
391     params.ulTargetHeight      = avctx->coded_height;
392     params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
393     params.OutputFormat        = output_format;
394     params.CodecType           = cuvid_codec_type;
395     params.ChromaFormat        = cuvid_chroma_format;
396     params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
397     params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
398 
399     ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, &params, avctx);
400     if (ret < 0) {
401         if (params.ulNumDecodeSurfaces > 32) {
402             av_log(avctx, AV_LOG_WARNING, "Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
403                    (int)params.ulNumDecodeSurfaces);
404             av_log(avctx, AV_LOG_WARNING, "Try lowering the amount of threads. Using %d right now.\n",
405                    avctx->thread_count);
406         }
407         av_buffer_unref(&real_hw_frames_ref);
408         return ret;
409     }
410 
411     decoder = (NVDECDecoder*)ctx->decoder_ref->data;
412     decoder->real_hw_frames_ref = real_hw_frames_ref;
413     real_hw_frames_ref = NULL;
414 
415     pool = av_mallocz(sizeof(*pool));
416     if (!pool) {
417         ret = AVERROR(ENOMEM);
418         goto fail;
419     }
420     pool->dpb_size = frames_ctx->initial_pool_size;
421 
422     ctx->decoder_pool = av_buffer_pool_init2(sizeof(int), pool,
423                                              nvdec_decoder_frame_alloc, av_free);
424     if (!ctx->decoder_pool) {
425         ret = AVERROR(ENOMEM);
426         goto fail;
427     }
428 
429     return 0;
430 fail:
431     ff_nvdec_decode_uninit(avctx);
432     return ret;
433 }
434 
nvdec_fdd_priv_free(void * priv)435 static void nvdec_fdd_priv_free(void *priv)
436 {
437     NVDECFrame *cf = priv;
438 
439     if (!cf)
440         return;
441 
442     av_buffer_unref(&cf->idx_ref);
443     av_buffer_unref(&cf->decoder_ref);
444 
445     av_freep(&priv);
446 }
447 
nvdec_unmap_mapped_frame(void * opaque,uint8_t * data)448 static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
449 {
450     NVDECFrame *unmap_data = (NVDECFrame*)data;
451     NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
452     void *logctx = decoder->hw_device_ref->data;
453     CUdeviceptr devptr = (CUdeviceptr)opaque;
454     int ret;
455     CUcontext dummy;
456 
457     ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
458     if (ret < 0)
459         goto finish;
460 
461     CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
462 
463     CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
464 
465 finish:
466     av_buffer_unref(&unmap_data->idx_ref);
467     av_buffer_unref(&unmap_data->decoder_ref);
468     av_free(unmap_data);
469 }
470 
nvdec_retrieve_data(void * logctx,AVFrame * frame)471 static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
472 {
473     FrameDecodeData  *fdd = (FrameDecodeData*)frame->private_ref->data;
474     NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
475     NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
476 
477     AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
478 
479     CUVIDPROCPARAMS vpp = { 0 };
480     NVDECFrame *unmap_data = NULL;
481 
482     CUcontext dummy;
483     CUdeviceptr devptr;
484 
485     unsigned int pitch, i;
486     unsigned int offset = 0;
487     int shift_h = 0, shift_v = 0;
488     int ret = 0;
489 
490     vpp.progressive_frame = 1;
491     vpp.output_stream = decoder->stream;
492 
493     ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
494     if (ret < 0)
495         return ret;
496 
497     ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder,
498                                                      cf->idx, &devptr,
499                                                      &pitch, &vpp));
500     if (ret < 0)
501         goto finish;
502 
503     unmap_data = av_mallocz(sizeof(*unmap_data));
504     if (!unmap_data) {
505         ret = AVERROR(ENOMEM);
506         goto copy_fail;
507     }
508 
509     frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
510                                      nvdec_unmap_mapped_frame, (void*)devptr,
511                                      AV_BUFFER_FLAG_READONLY);
512     if (!frame->buf[1]) {
513         ret = AVERROR(ENOMEM);
514         goto copy_fail;
515     }
516 
517     av_buffer_unref(&frame->hw_frames_ctx);
518     frame->hw_frames_ctx = av_buffer_ref(decoder->real_hw_frames_ref);
519     if (!frame->hw_frames_ctx) {
520         ret = AVERROR(ENOMEM);
521         goto copy_fail;
522     }
523 
524     unmap_data->idx = cf->idx;
525     unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
526     unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
527 
528     av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
529     for (i = 0; frame->linesize[i]; i++) {
530         frame->data[i] = (uint8_t*)(devptr + offset);
531         frame->linesize[i] = pitch;
532         offset += pitch * (frame->height >> (i ? shift_v : 0));
533     }
534 
535     goto finish;
536 
537 copy_fail:
538     if (!frame->buf[1]) {
539         CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
540         av_freep(&unmap_data);
541     } else {
542         av_buffer_unref(&frame->buf[1]);
543     }
544 
545 finish:
546     CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
547     return ret;
548 }
549 
ff_nvdec_start_frame(AVCodecContext * avctx,AVFrame * frame)550 int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame)
551 {
552     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
553     FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
554     NVDECFrame *cf = NULL;
555     int ret;
556 
557     ctx->bitstream_len = 0;
558     ctx->nb_slices     = 0;
559 
560     if (fdd->hwaccel_priv)
561         return 0;
562 
563     cf = av_mallocz(sizeof(*cf));
564     if (!cf)
565         return AVERROR(ENOMEM);
566 
567     cf->decoder_ref = av_buffer_ref(ctx->decoder_ref);
568     if (!cf->decoder_ref) {
569         ret = AVERROR(ENOMEM);
570         goto fail;
571     }
572 
573     cf->idx_ref = av_buffer_pool_get(ctx->decoder_pool);
574     if (!cf->idx_ref) {
575         av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
576         ret = AVERROR(ENOMEM);
577         goto fail;
578     }
579     cf->idx = *(unsigned int*)cf->idx_ref->data;
580 
581     fdd->hwaccel_priv      = cf;
582     fdd->hwaccel_priv_free = nvdec_fdd_priv_free;
583     fdd->post_process      = nvdec_retrieve_data;
584 
585     return 0;
586 fail:
587     nvdec_fdd_priv_free(cf);
588     return ret;
589 
590 }
591 
ff_nvdec_end_frame(AVCodecContext * avctx)592 int ff_nvdec_end_frame(AVCodecContext *avctx)
593 {
594     NVDECContext     *ctx = avctx->internal->hwaccel_priv_data;
595     NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
596     void *logctx          = avctx;
597     CUVIDPICPARAMS    *pp = &ctx->pic_params;
598 
599     CUcontext dummy;
600 
601     int ret = 0;
602 
603     pp->nBitstreamDataLen = ctx->bitstream_len;
604     pp->pBitstreamData    = ctx->bitstream;
605     pp->nNumSlices        = ctx->nb_slices;
606     pp->pSliceDataOffsets = ctx->slice_offsets;
607 
608     ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
609     if (ret < 0)
610         return ret;
611 
612     ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params));
613     if (ret < 0)
614         goto finish;
615 
616 finish:
617     CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
618 
619     return ret;
620 }
621 
ff_nvdec_simple_end_frame(AVCodecContext * avctx)622 int ff_nvdec_simple_end_frame(AVCodecContext *avctx)
623 {
624     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
625     int ret = ff_nvdec_end_frame(avctx);
626     ctx->bitstream = NULL;
627     return ret;
628 }
629 
ff_nvdec_simple_decode_slice(AVCodecContext * avctx,const uint8_t * buffer,uint32_t size)630 int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
631                                  uint32_t size)
632 {
633     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
634     void *tmp;
635 
636     tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
637                           (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
638     if (!tmp)
639         return AVERROR(ENOMEM);
640     ctx->slice_offsets = tmp;
641 
642     if (!ctx->bitstream)
643         ctx->bitstream = (uint8_t*)buffer;
644 
645     ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
646     ctx->bitstream_len += size;
647     ctx->nb_slices++;
648 
649     return 0;
650 }
651 
ff_nvdec_frame_params(AVCodecContext * avctx,AVBufferRef * hw_frames_ctx,int dpb_size,int supports_444)652 int ff_nvdec_frame_params(AVCodecContext *avctx,
653                           AVBufferRef *hw_frames_ctx,
654                           int dpb_size,
655                           int supports_444)
656 {
657     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
658     const AVPixFmtDescriptor *sw_desc;
659     int cuvid_codec_type, cuvid_chroma_format, chroma_444;
660 
661     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
662     if (!sw_desc)
663         return AVERROR_BUG;
664 
665     cuvid_codec_type = map_avcodec_id(avctx->codec_id);
666     if (cuvid_codec_type < 0) {
667         av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
668         return AVERROR_BUG;
669     }
670 
671     cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
672     if (cuvid_chroma_format < 0) {
673         av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
674         return AVERROR(EINVAL);
675     }
676     chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
677 
678     frames_ctx->format            = AV_PIX_FMT_CUDA;
679     frames_ctx->width             = (avctx->coded_width + 1) & ~1;
680     frames_ctx->height            = (avctx->coded_height + 1) & ~1;
681     /*
682      * We add two extra frames to the pool to account for deinterlacing filters
683      * holding onto their frames.
684      */
685     frames_ctx->initial_pool_size = dpb_size + 2;
686 
687     switch (sw_desc->comp[0].depth) {
688     case 8:
689         frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
690         break;
691     case 10:
692         frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
693         break;
694     case 12:
695         frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
696         break;
697     default:
698         return AVERROR(EINVAL);
699     }
700 
701     return 0;
702 }
703 
ff_nvdec_get_ref_idx(AVFrame * frame)704 int ff_nvdec_get_ref_idx(AVFrame *frame)
705 {
706     FrameDecodeData *fdd;
707     NVDECFrame *cf;
708 
709     if (!frame || !frame->private_ref)
710         return -1;
711 
712     fdd = (FrameDecodeData*)frame->private_ref->data;
713     cf  = (NVDECFrame*)fdd->hwaccel_priv;
714     if (!cf)
715         return -1;
716 
717     return cf->idx;
718 }
719