• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * H.264/HEVC hardware encoding using nvidia nvenc
3  * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "config.h"
23 
24 #include "nvenc.h"
25 #include "hevc_sei.h"
26 
27 #include "libavutil/hwcontext_cuda.h"
28 #include "libavutil/hwcontext.h"
29 #include "libavutil/cuda_check.h"
30 #include "libavutil/imgutils.h"
31 #include "libavutil/avassert.h"
32 #include "libavutil/mem.h"
33 #include "libavutil/pixdesc.h"
34 #include "atsc_a53.h"
35 #include "encode.h"
36 #include "internal.h"
37 #include "packet_internal.h"
38 
39 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
40 
41 #define NVENC_CAP 0x30
42 #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR ||             \
43                     rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
44                     rc == NV_ENC_PARAMS_RC_CBR_HQ)
45 
46 const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
47     AV_PIX_FMT_YUV420P,
48     AV_PIX_FMT_NV12,
49     AV_PIX_FMT_P010,
50     AV_PIX_FMT_YUV444P,
51     AV_PIX_FMT_P016,      // Truncated to 10bits
52     AV_PIX_FMT_YUV444P16, // Truncated to 10bits
53     AV_PIX_FMT_0RGB32,
54     AV_PIX_FMT_0BGR32,
55     AV_PIX_FMT_CUDA,
56 #if CONFIG_D3D11VA
57     AV_PIX_FMT_D3D11,
58 #endif
59     AV_PIX_FMT_NONE
60 };
61 
62 const AVCodecHWConfigInternal *const ff_nvenc_hw_configs[] = {
63     HW_CONFIG_ENCODER_FRAMES(CUDA,  CUDA),
64     HW_CONFIG_ENCODER_DEVICE(NONE,  CUDA),
65 #if CONFIG_D3D11VA
66     HW_CONFIG_ENCODER_FRAMES(D3D11, D3D11VA),
67     HW_CONFIG_ENCODER_DEVICE(NONE,  D3D11VA),
68 #endif
69     NULL,
70 };
71 
72 #define IS_10BIT(pix_fmt)  (pix_fmt == AV_PIX_FMT_P010    || \
73                             pix_fmt == AV_PIX_FMT_P016    || \
74                             pix_fmt == AV_PIX_FMT_YUV444P16)
75 
76 #define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \
77                             pix_fmt == AV_PIX_FMT_YUV444P16)
78 
79 static const struct {
80     NVENCSTATUS nverr;
81     int         averr;
82     const char *desc;
83 } nvenc_errors[] = {
84     { NV_ENC_SUCCESS,                      0,                "success"                  },
85     { NV_ENC_ERR_NO_ENCODE_DEVICE,         AVERROR(ENOENT),  "no encode device"         },
86     { NV_ENC_ERR_UNSUPPORTED_DEVICE,       AVERROR(ENOSYS),  "unsupported device"       },
87     { NV_ENC_ERR_INVALID_ENCODERDEVICE,    AVERROR(EINVAL),  "invalid encoder device"   },
88     { NV_ENC_ERR_INVALID_DEVICE,           AVERROR(EINVAL),  "invalid device"           },
89     { NV_ENC_ERR_DEVICE_NOT_EXIST,         AVERROR(EIO),     "device does not exist"    },
90     { NV_ENC_ERR_INVALID_PTR,              AVERROR(EFAULT),  "invalid ptr"              },
91     { NV_ENC_ERR_INVALID_EVENT,            AVERROR(EINVAL),  "invalid event"            },
92     { NV_ENC_ERR_INVALID_PARAM,            AVERROR(EINVAL),  "invalid param"            },
93     { NV_ENC_ERR_INVALID_CALL,             AVERROR(EINVAL),  "invalid call"             },
94     { NV_ENC_ERR_OUT_OF_MEMORY,            AVERROR(ENOMEM),  "out of memory"            },
95     { NV_ENC_ERR_ENCODER_NOT_INITIALIZED,  AVERROR(EINVAL),  "encoder not initialized"  },
96     { NV_ENC_ERR_UNSUPPORTED_PARAM,        AVERROR(ENOSYS),  "unsupported param"        },
97     { NV_ENC_ERR_LOCK_BUSY,                AVERROR(EAGAIN),  "lock busy"                },
98     { NV_ENC_ERR_NOT_ENOUGH_BUFFER,        AVERROR_BUFFER_TOO_SMALL, "not enough buffer"},
99     { NV_ENC_ERR_INVALID_VERSION,          AVERROR(EINVAL),  "invalid version"          },
100     { NV_ENC_ERR_MAP_FAILED,               AVERROR(EIO),     "map failed"               },
101     { NV_ENC_ERR_NEED_MORE_INPUT,          AVERROR(EAGAIN),  "need more input"          },
102     { NV_ENC_ERR_ENCODER_BUSY,             AVERROR(EAGAIN),  "encoder busy"             },
103     { NV_ENC_ERR_EVENT_NOT_REGISTERD,      AVERROR(EBADF),   "event not registered"     },
104     { NV_ENC_ERR_GENERIC,                  AVERROR_UNKNOWN,  "generic error"            },
105     { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY,  AVERROR(EINVAL),  "incompatible client key"  },
106     { NV_ENC_ERR_UNIMPLEMENTED,            AVERROR(ENOSYS),  "unimplemented"            },
107     { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO),     "resource register failed" },
108     { NV_ENC_ERR_RESOURCE_NOT_REGISTERED,  AVERROR(EBADF),   "resource not registered"  },
109     { NV_ENC_ERR_RESOURCE_NOT_MAPPED,      AVERROR(EBADF),   "resource not mapped"      },
110 };
111 
nvenc_map_error(NVENCSTATUS err,const char ** desc)112 static int nvenc_map_error(NVENCSTATUS err, const char **desc)
113 {
114     int i;
115     for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) {
116         if (nvenc_errors[i].nverr == err) {
117             if (desc)
118                 *desc = nvenc_errors[i].desc;
119             return nvenc_errors[i].averr;
120         }
121     }
122     if (desc)
123         *desc = "unknown error";
124     return AVERROR_UNKNOWN;
125 }
126 
nvenc_print_error(AVCodecContext * avctx,NVENCSTATUS err,const char * error_string)127 static int nvenc_print_error(AVCodecContext *avctx, NVENCSTATUS err,
128                              const char *error_string)
129 {
130     const char *desc;
131     const char *details = "(no details)";
132     int ret = nvenc_map_error(err, &desc);
133 
134 #ifdef NVENC_HAVE_GETLASTERRORSTRING
135     NvencContext *ctx = avctx->priv_data;
136     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
137 
138     if (p_nvenc && ctx->nvencoder)
139         details = p_nvenc->nvEncGetLastErrorString(ctx->nvencoder);
140 #endif
141 
142     av_log(avctx, AV_LOG_ERROR, "%s: %s (%d): %s\n", error_string, desc, err, details);
143 
144     return ret;
145 }
146 
147 typedef struct GUIDTuple {
148     const GUID guid;
149     int flags;
150 } GUIDTuple;
151 
152 #define PRESET_ALIAS(alias, name, ...) \
153     [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
154 
155 #define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
156 
nvenc_map_preset(NvencContext * ctx)157 static void nvenc_map_preset(NvencContext *ctx)
158 {
159     GUIDTuple presets[] = {
160 #ifdef NVENC_HAVE_NEW_PRESETS
161         PRESET(P1),
162         PRESET(P2),
163         PRESET(P3),
164         PRESET(P4),
165         PRESET(P5),
166         PRESET(P6),
167         PRESET(P7),
168         PRESET_ALIAS(SLOW,   P7, NVENC_TWO_PASSES),
169         PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS),
170         PRESET_ALIAS(FAST,   P1, NVENC_ONE_PASS),
171         // Compat aliases
172         PRESET_ALIAS(DEFAULT,             P4, NVENC_DEPRECATED_PRESET),
173         PRESET_ALIAS(HP,                  P1, NVENC_DEPRECATED_PRESET),
174         PRESET_ALIAS(HQ,                  P7, NVENC_DEPRECATED_PRESET),
175         PRESET_ALIAS(BD,                  P5, NVENC_DEPRECATED_PRESET),
176         PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
177         PRESET_ALIAS(LOW_LATENCY_HP,      P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
178         PRESET_ALIAS(LOW_LATENCY_HQ,      P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
179         PRESET_ALIAS(LOSSLESS_DEFAULT,    P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
180         PRESET_ALIAS(LOSSLESS_HP,         P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
181 #else
182         PRESET(DEFAULT),
183         PRESET(HP),
184         PRESET(HQ),
185         PRESET(BD),
186         PRESET_ALIAS(SLOW,   HQ,    NVENC_TWO_PASSES),
187         PRESET_ALIAS(MEDIUM, HQ,    NVENC_ONE_PASS),
188         PRESET_ALIAS(FAST,   HP,    NVENC_ONE_PASS),
189         PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
190         PRESET(LOW_LATENCY_HP,      NVENC_LOWLATENCY),
191         PRESET(LOW_LATENCY_HQ,      NVENC_LOWLATENCY),
192         PRESET(LOSSLESS_DEFAULT,    NVENC_LOSSLESS),
193         PRESET(LOSSLESS_HP,         NVENC_LOSSLESS),
194 #endif
195     };
196 
197     GUIDTuple *t = &presets[ctx->preset];
198 
199     ctx->init_encode_params.presetGUID = t->guid;
200     ctx->flags = t->flags;
201 
202 #ifdef NVENC_HAVE_NEW_PRESETS
203     if (ctx->tuning_info == NV_ENC_TUNING_INFO_LOSSLESS)
204         ctx->flags |= NVENC_LOSSLESS;
205 #endif
206 }
207 
208 #undef PRESET
209 #undef PRESET_ALIAS
210 
nvenc_print_driver_requirement(AVCodecContext * avctx,int level)211 static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
212 {
213 #if NVENCAPI_CHECK_VERSION(11, 1)
214     const char *minver = "(unknown)";
215 #elif NVENCAPI_CHECK_VERSION(11, 0)
216 # if defined(_WIN32) || defined(__CYGWIN__)
217     const char *minver = "456.71";
218 # else
219     const char *minver = "455.28";
220 # endif
221 #elif NVENCAPI_CHECK_VERSION(10, 0)
222 # if defined(_WIN32) || defined(__CYGWIN__)
223     const char *minver = "450.51";
224 # else
225     const char *minver = "445.87";
226 # endif
227 #elif NVENCAPI_CHECK_VERSION(9, 1)
228 # if defined(_WIN32) || defined(__CYGWIN__)
229     const char *minver = "436.15";
230 # else
231     const char *minver = "435.21";
232 # endif
233 #elif NVENCAPI_CHECK_VERSION(9, 0)
234 # if defined(_WIN32) || defined(__CYGWIN__)
235     const char *minver = "418.81";
236 # else
237     const char *minver = "418.30";
238 # endif
239 #elif NVENCAPI_CHECK_VERSION(8, 2)
240 # if defined(_WIN32) || defined(__CYGWIN__)
241     const char *minver = "397.93";
242 # else
243     const char *minver = "396.24";
244 #endif
245 #elif NVENCAPI_CHECK_VERSION(8, 1)
246 # if defined(_WIN32) || defined(__CYGWIN__)
247     const char *minver = "390.77";
248 # else
249     const char *minver = "390.25";
250 # endif
251 #else
252 # if defined(_WIN32) || defined(__CYGWIN__)
253     const char *minver = "378.66";
254 # else
255     const char *minver = "378.13";
256 # endif
257 #endif
258     av_log(avctx, level, "The minimum required Nvidia driver for nvenc is %s or newer\n", minver);
259 }
260 
nvenc_load_libraries(AVCodecContext * avctx)261 static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
262 {
263     NvencContext *ctx            = avctx->priv_data;
264     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
265     NVENCSTATUS err;
266     uint32_t nvenc_max_ver;
267     int ret;
268 
269     ret = cuda_load_functions(&dl_fn->cuda_dl, avctx);
270     if (ret < 0)
271         return ret;
272 
273     ret = nvenc_load_functions(&dl_fn->nvenc_dl, avctx);
274     if (ret < 0) {
275         nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
276         return ret;
277     }
278 
279     err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver);
280     if (err != NV_ENC_SUCCESS)
281         return nvenc_print_error(avctx, err, "Failed to query nvenc max version");
282 
283     av_log(avctx, AV_LOG_VERBOSE, "Loaded Nvenc version %d.%d\n", nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
284 
285     if ((NVENCAPI_MAJOR_VERSION << 4 | NVENCAPI_MINOR_VERSION) > nvenc_max_ver) {
286         av_log(avctx, AV_LOG_ERROR, "Driver does not support the required nvenc API version. "
287                "Required: %d.%d Found: %d.%d\n",
288                NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION,
289                nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
290         nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
291         return AVERROR(ENOSYS);
292     }
293 
294     dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
295 
296     err = dl_fn->nvenc_dl->NvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
297     if (err != NV_ENC_SUCCESS)
298         return nvenc_print_error(avctx, err, "Failed to create nvenc instance");
299 
300     av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
301 
302     return 0;
303 }
304 
nvenc_push_context(AVCodecContext * avctx)305 static int nvenc_push_context(AVCodecContext *avctx)
306 {
307     NvencContext *ctx            = avctx->priv_data;
308     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
309 
310     if (ctx->d3d11_device)
311         return 0;
312 
313     return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context));
314 }
315 
nvenc_pop_context(AVCodecContext * avctx)316 static int nvenc_pop_context(AVCodecContext *avctx)
317 {
318     NvencContext *ctx            = avctx->priv_data;
319     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
320     CUcontext dummy;
321 
322     if (ctx->d3d11_device)
323         return 0;
324 
325     return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy));
326 }
327 
nvenc_open_session(AVCodecContext * avctx)328 static av_cold int nvenc_open_session(AVCodecContext *avctx)
329 {
330     NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
331     NvencContext *ctx = avctx->priv_data;
332     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
333     NVENCSTATUS ret;
334 
335     params.version    = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
336     params.apiVersion = NVENCAPI_VERSION;
337     if (ctx->d3d11_device) {
338         params.device     = ctx->d3d11_device;
339         params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
340     } else {
341         params.device     = ctx->cu_context;
342         params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
343     }
344 
345     ret = p_nvenc->nvEncOpenEncodeSessionEx(&params, &ctx->nvencoder);
346     if (ret != NV_ENC_SUCCESS) {
347         ctx->nvencoder = NULL;
348         return nvenc_print_error(avctx, ret, "OpenEncodeSessionEx failed");
349     }
350 
351     return 0;
352 }
353 
nvenc_check_codec_support(AVCodecContext * avctx)354 static int nvenc_check_codec_support(AVCodecContext *avctx)
355 {
356     NvencContext *ctx                    = avctx->priv_data;
357     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
358     int i, ret, count = 0;
359     GUID *guids = NULL;
360 
361     ret = p_nvenc->nvEncGetEncodeGUIDCount(ctx->nvencoder, &count);
362 
363     if (ret != NV_ENC_SUCCESS || !count)
364         return AVERROR(ENOSYS);
365 
366     guids = av_malloc(count * sizeof(GUID));
367     if (!guids)
368         return AVERROR(ENOMEM);
369 
370     ret = p_nvenc->nvEncGetEncodeGUIDs(ctx->nvencoder, guids, count, &count);
371     if (ret != NV_ENC_SUCCESS) {
372         ret = AVERROR(ENOSYS);
373         goto fail;
374     }
375 
376     ret = AVERROR(ENOSYS);
377     for (i = 0; i < count; i++) {
378         if (!memcmp(&guids[i], &ctx->init_encode_params.encodeGUID, sizeof(*guids))) {
379             ret = 0;
380             break;
381         }
382     }
383 
384 fail:
385     av_free(guids);
386 
387     return ret;
388 }
389 
nvenc_check_cap(AVCodecContext * avctx,NV_ENC_CAPS cap)390 static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
391 {
392     NvencContext *ctx = avctx->priv_data;
393     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
394     NV_ENC_CAPS_PARAM params        = { 0 };
395     int ret, val = 0;
396 
397     params.version     = NV_ENC_CAPS_PARAM_VER;
398     params.capsToQuery = cap;
399 
400     ret = p_nvenc->nvEncGetEncodeCaps(ctx->nvencoder, ctx->init_encode_params.encodeGUID, &params, &val);
401 
402     if (ret == NV_ENC_SUCCESS)
403         return val;
404     return 0;
405 }
406 
nvenc_check_capabilities(AVCodecContext * avctx)407 static int nvenc_check_capabilities(AVCodecContext *avctx)
408 {
409     NvencContext *ctx = avctx->priv_data;
410     int ret;
411 
412     ret = nvenc_check_codec_support(avctx);
413     if (ret < 0) {
414         av_log(avctx, AV_LOG_WARNING, "Codec not supported\n");
415         return ret;
416     }
417 
418     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
419     if (IS_YUV444(ctx->data_pix_fmt) && ret <= 0) {
420         av_log(avctx, AV_LOG_WARNING, "YUV444P not supported\n");
421         return AVERROR(ENOSYS);
422     }
423 
424     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
425     if (ctx->flags & NVENC_LOSSLESS && ret <= 0) {
426         av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n");
427         return AVERROR(ENOSYS);
428     }
429 
430     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX);
431     if (ret < avctx->width) {
432         av_log(avctx, AV_LOG_WARNING, "Width %d exceeds %d\n",
433                avctx->width, ret);
434         return AVERROR(ENOSYS);
435     }
436 
437     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX);
438     if (ret < avctx->height) {
439         av_log(avctx, AV_LOG_WARNING, "Height %d exceeds %d\n",
440                avctx->height, ret);
441         return AVERROR(ENOSYS);
442     }
443 
444     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES);
445     if (ret < avctx->max_b_frames) {
446         av_log(avctx, AV_LOG_WARNING, "Max B-frames %d exceed %d\n",
447                avctx->max_b_frames, ret);
448 
449         return AVERROR(ENOSYS);
450     }
451 
452     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_FIELD_ENCODING);
453     if (ret < 1 && avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
454         av_log(avctx, AV_LOG_WARNING,
455                "Interlaced encoding is not supported. Supported level: %d\n",
456                ret);
457         return AVERROR(ENOSYS);
458     }
459 
460     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
461     if (IS_10BIT(ctx->data_pix_fmt) && ret <= 0) {
462         av_log(avctx, AV_LOG_WARNING, "10 bit encode not supported\n");
463         return AVERROR(ENOSYS);
464     }
465 
466     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD);
467     if (ctx->rc_lookahead > 0 && ret <= 0) {
468         av_log(avctx, AV_LOG_WARNING, "RC lookahead not supported\n");
469         return AVERROR(ENOSYS);
470     }
471 
472     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ);
473     if (ctx->temporal_aq > 0 && ret <= 0) {
474         av_log(avctx, AV_LOG_WARNING, "Temporal AQ not supported\n");
475         return AVERROR(ENOSYS);
476     }
477 
478     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION);
479     if (ctx->weighted_pred > 0 && ret <= 0) {
480         av_log (avctx, AV_LOG_WARNING, "Weighted Prediction not supported\n");
481         return AVERROR(ENOSYS);
482     }
483 
484     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_CABAC);
485     if (ctx->coder == NV_ENC_H264_ENTROPY_CODING_MODE_CABAC && ret <= 0) {
486         av_log(avctx, AV_LOG_WARNING, "CABAC entropy coding not supported\n");
487         return AVERROR(ENOSYS);
488     }
489 
490 #ifdef NVENC_HAVE_BFRAME_REF_MODE
491     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE);
492     if (ctx->b_ref_mode == NV_ENC_BFRAME_REF_MODE_EACH && ret != 1 && ret != 3) {
493         av_log(avctx, AV_LOG_WARNING, "Each B frame as reference is not supported\n");
494         return AVERROR(ENOSYS);
495     } else if (ctx->b_ref_mode != NV_ENC_BFRAME_REF_MODE_DISABLED && ret == 0) {
496         av_log(avctx, AV_LOG_WARNING, "B frames as references are not supported\n");
497         return AVERROR(ENOSYS);
498     }
499 #else
500     if (ctx->b_ref_mode != 0) {
501         av_log(avctx, AV_LOG_WARNING, "B frames as references need SDK 8.1 at build time\n");
502         return AVERROR(ENOSYS);
503     }
504 #endif
505 
506 #ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
507     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES);
508     if(avctx->refs != NV_ENC_NUM_REF_FRAMES_AUTOSELECT && ret <= 0) {
509         av_log(avctx, AV_LOG_WARNING, "Multiple reference frames are not supported by the device\n");
510         return AVERROR(ENOSYS);
511     }
512 #else
513     if(avctx->refs != 0) {
514         av_log(avctx, AV_LOG_WARNING, "Multiple reference frames need SDK 9.1 at build time\n");
515         return AVERROR(ENOSYS);
516     }
517 #endif
518 
519     ctx->support_dyn_bitrate = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
520 
521     return 0;
522 }
523 
nvenc_check_device(AVCodecContext * avctx,int idx)524 static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
525 {
526     NvencContext *ctx = avctx->priv_data;
527     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
528     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
529     char name[128] = { 0};
530     int major, minor, ret;
531     CUdevice cu_device;
532     int loglevel = AV_LOG_VERBOSE;
533 
534     if (ctx->device == LIST_DEVICES)
535         loglevel = AV_LOG_INFO;
536 
537     ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx));
538     if (ret < 0)
539         return ret;
540 
541     ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device));
542     if (ret < 0)
543         return ret;
544 
545     ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device));
546     if (ret < 0)
547         return ret;
548 
549     av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
550     if (((major << 4) | minor) < NVENC_CAP) {
551         av_log(avctx, loglevel, "does not support NVENC\n");
552         goto fail;
553     }
554 
555     if (ctx->device != idx && ctx->device != ANY_DEVICE)
556         return -1;
557 
558     ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device));
559     if (ret < 0)
560         goto fail;
561 
562     ctx->cu_context = ctx->cu_context_internal;
563     ctx->cu_stream = NULL;
564 
565     if ((ret = nvenc_pop_context(avctx)) < 0)
566         goto fail2;
567 
568     if ((ret = nvenc_open_session(avctx)) < 0)
569         goto fail2;
570 
571     if ((ret = nvenc_check_capabilities(avctx)) < 0)
572         goto fail3;
573 
574     av_log(avctx, loglevel, "supports NVENC\n");
575 
576     dl_fn->nvenc_device_count++;
577 
578     if (ctx->device == idx || ctx->device == ANY_DEVICE)
579         return 0;
580 
581 fail3:
582     if ((ret = nvenc_push_context(avctx)) < 0)
583         return ret;
584 
585     p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
586     ctx->nvencoder = NULL;
587 
588     if ((ret = nvenc_pop_context(avctx)) < 0)
589         return ret;
590 
591 fail2:
592     CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
593     ctx->cu_context_internal = NULL;
594 
595 fail:
596     return AVERROR(ENOSYS);
597 }
598 
nvenc_setup_device(AVCodecContext * avctx)599 static av_cold int nvenc_setup_device(AVCodecContext *avctx)
600 {
601     NvencContext *ctx            = avctx->priv_data;
602     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
603 
604     switch (avctx->codec->id) {
605     case AV_CODEC_ID_H264:
606         ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
607         break;
608     case AV_CODEC_ID_HEVC:
609         ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
610         break;
611     default:
612         return AVERROR_BUG;
613     }
614 
615     nvenc_map_preset(ctx);
616 
617     if (ctx->flags & NVENC_DEPRECATED_PRESET)
618         av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n");
619 
620     if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
621         AVHWFramesContext   *frames_ctx;
622         AVHWDeviceContext   *hwdev_ctx;
623         AVCUDADeviceContext *cuda_device_hwctx = NULL;
624 #if CONFIG_D3D11VA
625         AVD3D11VADeviceContext *d3d11_device_hwctx = NULL;
626 #endif
627         int ret;
628 
629         if (avctx->hw_frames_ctx) {
630             frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
631             if (frames_ctx->format == AV_PIX_FMT_CUDA)
632                 cuda_device_hwctx = frames_ctx->device_ctx->hwctx;
633 #if CONFIG_D3D11VA
634             else if (frames_ctx->format == AV_PIX_FMT_D3D11)
635                 d3d11_device_hwctx = frames_ctx->device_ctx->hwctx;
636 #endif
637             else
638                 return AVERROR(EINVAL);
639         } else if (avctx->hw_device_ctx) {
640             hwdev_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
641             if (hwdev_ctx->type == AV_HWDEVICE_TYPE_CUDA)
642                 cuda_device_hwctx = hwdev_ctx->hwctx;
643 #if CONFIG_D3D11VA
644             else if (hwdev_ctx->type == AV_HWDEVICE_TYPE_D3D11VA)
645                 d3d11_device_hwctx = hwdev_ctx->hwctx;
646 #endif
647             else
648                 return AVERROR(EINVAL);
649         } else {
650             return AVERROR(EINVAL);
651         }
652 
653         if (cuda_device_hwctx) {
654             ctx->cu_context = cuda_device_hwctx->cuda_ctx;
655             ctx->cu_stream = cuda_device_hwctx->stream;
656         }
657 #if CONFIG_D3D11VA
658         else if (d3d11_device_hwctx) {
659             ctx->d3d11_device = d3d11_device_hwctx->device;
660             ID3D11Device_AddRef(ctx->d3d11_device);
661         }
662 #endif
663 
664         ret = nvenc_open_session(avctx);
665         if (ret < 0)
666             return ret;
667 
668         ret = nvenc_check_capabilities(avctx);
669         if (ret < 0) {
670             av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n");
671             return ret;
672         }
673     } else {
674         int i, nb_devices = 0;
675 
676         if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0)
677             return AVERROR_UNKNOWN;
678 
679         if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0)
680             return AVERROR_UNKNOWN;
681 
682         if (!nb_devices) {
683             av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
684                 return AVERROR_EXTERNAL;
685         }
686 
687         av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", nb_devices);
688 
689         dl_fn->nvenc_device_count = 0;
690         for (i = 0; i < nb_devices; ++i) {
691             if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES)
692                 return 0;
693         }
694 
695         if (ctx->device == LIST_DEVICES)
696             return AVERROR_EXIT;
697 
698         if (!dl_fn->nvenc_device_count) {
699             av_log(avctx, AV_LOG_FATAL, "No capable devices found\n");
700             return AVERROR_EXTERNAL;
701         }
702 
703         av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->device, nb_devices);
704         return AVERROR(EINVAL);
705     }
706 
707     return 0;
708 }
709 
set_constqp(AVCodecContext * avctx)710 static av_cold void set_constqp(AVCodecContext *avctx)
711 {
712     NvencContext *ctx = avctx->priv_data;
713     NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
714 
715     rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
716 
717     if (ctx->init_qp_p >= 0) {
718         rc->constQP.qpInterP = ctx->init_qp_p;
719         if (ctx->init_qp_i >= 0 && ctx->init_qp_b >= 0) {
720             rc->constQP.qpIntra = ctx->init_qp_i;
721             rc->constQP.qpInterB = ctx->init_qp_b;
722         } else if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
723             rc->constQP.qpIntra = av_clip(
724                 rc->constQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
725             rc->constQP.qpInterB = av_clip(
726                 rc->constQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
727         } else {
728             rc->constQP.qpIntra = rc->constQP.qpInterP;
729             rc->constQP.qpInterB = rc->constQP.qpInterP;
730         }
731     } else if (ctx->cqp >= 0) {
732         rc->constQP.qpInterP = rc->constQP.qpInterB = rc->constQP.qpIntra = ctx->cqp;
733         if (avctx->b_quant_factor != 0.0)
734             rc->constQP.qpInterB = av_clip(ctx->cqp * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
735         if (avctx->i_quant_factor != 0.0)
736             rc->constQP.qpIntra = av_clip(ctx->cqp * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
737     }
738 
739     avctx->qmin = -1;
740     avctx->qmax = -1;
741 }
742 
set_vbr(AVCodecContext * avctx)743 static av_cold void set_vbr(AVCodecContext *avctx)
744 {
745     NvencContext *ctx = avctx->priv_data;
746     NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
747     int qp_inter_p;
748 
749     if (avctx->qmin >= 0 && avctx->qmax >= 0) {
750         rc->enableMinQP = 1;
751         rc->enableMaxQP = 1;
752 
753         rc->minQP.qpInterB = avctx->qmin;
754         rc->minQP.qpInterP = avctx->qmin;
755         rc->minQP.qpIntra  = avctx->qmin;
756 
757         rc->maxQP.qpInterB = avctx->qmax;
758         rc->maxQP.qpInterP = avctx->qmax;
759         rc->maxQP.qpIntra = avctx->qmax;
760 
761         qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
762     } else if (avctx->qmin >= 0) {
763         rc->enableMinQP = 1;
764 
765         rc->minQP.qpInterB = avctx->qmin;
766         rc->minQP.qpInterP = avctx->qmin;
767         rc->minQP.qpIntra = avctx->qmin;
768 
769         qp_inter_p = avctx->qmin;
770     } else {
771         qp_inter_p = 26; // default to 26
772     }
773 
774     rc->enableInitialRCQP = 1;
775 
776     if (ctx->init_qp_p < 0) {
777         rc->initialRCQP.qpInterP  = qp_inter_p;
778     } else {
779         rc->initialRCQP.qpInterP = ctx->init_qp_p;
780     }
781 
782     if (ctx->init_qp_i < 0) {
783         if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
784             rc->initialRCQP.qpIntra = av_clip(
785                 rc->initialRCQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
786         } else {
787             rc->initialRCQP.qpIntra = rc->initialRCQP.qpInterP;
788         }
789     } else {
790         rc->initialRCQP.qpIntra = ctx->init_qp_i;
791     }
792 
793     if (ctx->init_qp_b < 0) {
794         if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
795             rc->initialRCQP.qpInterB = av_clip(
796                 rc->initialRCQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
797         } else {
798             rc->initialRCQP.qpInterB = rc->initialRCQP.qpInterP;
799         }
800     } else {
801         rc->initialRCQP.qpInterB = ctx->init_qp_b;
802     }
803 }
804 
set_lossless(AVCodecContext * avctx)805 static av_cold void set_lossless(AVCodecContext *avctx)
806 {
807     NvencContext *ctx = avctx->priv_data;
808     NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
809 
810     rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
811     rc->constQP.qpInterB = 0;
812     rc->constQP.qpInterP = 0;
813     rc->constQP.qpIntra  = 0;
814 
815     avctx->qmin = -1;
816     avctx->qmax = -1;
817 }
818 
nvenc_override_rate_control(AVCodecContext * avctx)819 static void nvenc_override_rate_control(AVCodecContext *avctx)
820 {
821     NvencContext *ctx    = avctx->priv_data;
822     NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
823 
824     switch (ctx->rc) {
825     case NV_ENC_PARAMS_RC_CONSTQP:
826         set_constqp(avctx);
827         return;
828     case NV_ENC_PARAMS_RC_VBR_MINQP:
829         if (avctx->qmin < 0) {
830             av_log(avctx, AV_LOG_WARNING,
831                    "The variable bitrate rate-control requires "
832                    "the 'qmin' option set.\n");
833             set_vbr(avctx);
834             return;
835         }
836         /* fall through */
837     case NV_ENC_PARAMS_RC_VBR_HQ:
838     case NV_ENC_PARAMS_RC_VBR:
839         set_vbr(avctx);
840         break;
841     case NV_ENC_PARAMS_RC_CBR:
842     case NV_ENC_PARAMS_RC_CBR_HQ:
843     case NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ:
844         break;
845     }
846 
847     rc->rateControlMode = ctx->rc;
848 }
849 
nvenc_recalc_surfaces(AVCodecContext * avctx)850 static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
851 {
852     NvencContext *ctx = avctx->priv_data;
853     // default minimum of 4 surfaces
854     // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
855     // another multiply by 2 to avoid blocking next PBB group
856     int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
857 
858     // lookahead enabled
859     if (ctx->rc_lookahead > 0) {
860         // +1 is to account for lkd_bound calculation later
861         // +4 is to allow sufficient pipelining with lookahead
862         nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
863         if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
864         {
865             av_log(avctx, AV_LOG_WARNING,
866                    "Defined rc_lookahead requires more surfaces, "
867                    "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
868         }
869         ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
870     } else {
871         if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
872         {
873             av_log(avctx, AV_LOG_WARNING,
874                    "Defined b-frame requires more surfaces, "
875                    "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
876             ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
877         }
878         else if (ctx->nb_surfaces <= 0)
879             ctx->nb_surfaces = nb_surfaces;
880         // otherwise use user specified value
881     }
882 
883     ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
884     ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
885 
886     return 0;
887 }
888 
nvenc_setup_rate_control(AVCodecContext * avctx)889 static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx)
890 {
891     NvencContext *ctx = avctx->priv_data;
892 
893     if (avctx->global_quality > 0)
894         av_log(avctx, AV_LOG_WARNING, "Using global_quality with nvenc is deprecated. Use qp instead.\n");
895 
896     if (ctx->cqp < 0 && avctx->global_quality > 0)
897         ctx->cqp = avctx->global_quality;
898 
899     if (avctx->bit_rate > 0) {
900         ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
901     } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
902         ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
903     }
904 
905     if (avctx->rc_max_rate > 0)
906         ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
907 
908 #ifdef NVENC_HAVE_MULTIPASS
909     ctx->encode_config.rcParams.multiPass = ctx->multipass;
910 
911     if (ctx->flags & NVENC_ONE_PASS)
912         ctx->encode_config.rcParams.multiPass = NV_ENC_MULTI_PASS_DISABLED;
913     if (ctx->flags & NVENC_TWO_PASSES || ctx->twopass > 0)
914         ctx->encode_config.rcParams.multiPass = NV_ENC_TWO_PASS_FULL_RESOLUTION;
915 
916     if (ctx->rc < 0) {
917         if (ctx->cbr) {
918             ctx->rc = NV_ENC_PARAMS_RC_CBR;
919         } else if (ctx->cqp >= 0) {
920             ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
921         } else if (ctx->quality >= 0.0f) {
922             ctx->rc = NV_ENC_PARAMS_RC_VBR;
923         }
924     }
925 #else
926     if (ctx->rc < 0) {
927         if (ctx->flags & NVENC_ONE_PASS)
928             ctx->twopass = 0;
929         if (ctx->flags & NVENC_TWO_PASSES)
930             ctx->twopass = 1;
931 
932         if (ctx->twopass < 0)
933             ctx->twopass = (ctx->flags & NVENC_LOWLATENCY) != 0;
934 
935         if (ctx->cbr) {
936             if (ctx->twopass) {
937                 ctx->rc = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ;
938             } else {
939                 ctx->rc = NV_ENC_PARAMS_RC_CBR;
940             }
941         } else if (ctx->cqp >= 0) {
942             ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
943         } else if (ctx->twopass) {
944             ctx->rc = NV_ENC_PARAMS_RC_VBR_HQ;
945         } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
946             ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP;
947         }
948     }
949 #endif
950 
951     if (ctx->rc >= 0 && ctx->rc & RC_MODE_DEPRECATED) {
952         av_log(avctx, AV_LOG_WARNING, "Specified rc mode is deprecated.\n");
953         av_log(avctx, AV_LOG_WARNING, "Use -rc constqp/cbr/vbr, -tune and -multipass instead.\n");
954 
955         ctx->rc &= ~RC_MODE_DEPRECATED;
956     }
957 
958 #ifdef NVENC_HAVE_LDKFS
959     if (ctx->ldkfs)
960          ctx->encode_config.rcParams.lowDelayKeyFrameScale = ctx->ldkfs;
961 #endif
962 
963     if (ctx->flags & NVENC_LOSSLESS) {
964         set_lossless(avctx);
965     } else if (ctx->rc >= 0) {
966         nvenc_override_rate_control(avctx);
967     } else {
968         ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
969         set_vbr(avctx);
970     }
971 
972     if (avctx->rc_buffer_size > 0) {
973         ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
974     } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
975         avctx->rc_buffer_size = ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
976     }
977 
978     if (ctx->aq) {
979         ctx->encode_config.rcParams.enableAQ   = 1;
980         ctx->encode_config.rcParams.aqStrength = ctx->aq_strength;
981         av_log(avctx, AV_LOG_VERBOSE, "AQ enabled.\n");
982     }
983 
984     if (ctx->temporal_aq) {
985         ctx->encode_config.rcParams.enableTemporalAQ = 1;
986         av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ enabled.\n");
987     }
988 
989     if (ctx->rc_lookahead > 0) {
990         int lkd_bound = FFMIN(ctx->nb_surfaces, ctx->async_depth) -
991                         ctx->encode_config.frameIntervalP - 4;
992 
993         if (lkd_bound < 0) {
994             av_log(avctx, AV_LOG_WARNING,
995                    "Lookahead not enabled. Increase buffer delay (-delay).\n");
996         } else {
997             ctx->encode_config.rcParams.enableLookahead = 1;
998             ctx->encode_config.rcParams.lookaheadDepth  = av_clip(ctx->rc_lookahead, 0, lkd_bound);
999             ctx->encode_config.rcParams.disableIadapt   = ctx->no_scenecut;
1000             ctx->encode_config.rcParams.disableBadapt   = !ctx->b_adapt;
1001             av_log(avctx, AV_LOG_VERBOSE,
1002                    "Lookahead enabled: depth %d, scenecut %s, B-adapt %s.\n",
1003                    ctx->encode_config.rcParams.lookaheadDepth,
1004                    ctx->encode_config.rcParams.disableIadapt ? "disabled" : "enabled",
1005                    ctx->encode_config.rcParams.disableBadapt ? "disabled" : "enabled");
1006         }
1007     }
1008 
1009     if (ctx->strict_gop) {
1010         ctx->encode_config.rcParams.strictGOPTarget = 1;
1011         av_log(avctx, AV_LOG_VERBOSE, "Strict GOP target enabled.\n");
1012     }
1013 
1014     if (ctx->nonref_p)
1015         ctx->encode_config.rcParams.enableNonRefP = 1;
1016 
1017     if (ctx->zerolatency)
1018         ctx->encode_config.rcParams.zeroReorderDelay = 1;
1019 
1020     if (ctx->quality) {
1021         //convert from float to fixed point 8.8
1022         int tmp_quality = (int)(ctx->quality * 256.0f);
1023         ctx->encode_config.rcParams.targetQuality = (uint8_t)(tmp_quality >> 8);
1024         ctx->encode_config.rcParams.targetQualityLSB = (uint8_t)(tmp_quality & 0xff);
1025 
1026         av_log(avctx, AV_LOG_VERBOSE, "CQ(%d) mode enabled.\n", tmp_quality);
1027 
1028         //CQ mode shall discard avg bitrate & honor max bitrate;
1029         ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate = 0;
1030         ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
1031     }
1032 }
1033 
nvenc_setup_h264_config(AVCodecContext * avctx)1034 static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
1035 {
1036     NvencContext *ctx                      = avctx->priv_data;
1037     NV_ENC_CONFIG *cc                      = &ctx->encode_config;
1038     NV_ENC_CONFIG_H264 *h264               = &cc->encodeCodecConfig.h264Config;
1039     NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
1040 
1041     vui->colourMatrix = avctx->colorspace;
1042     vui->colourPrimaries = avctx->color_primaries;
1043     vui->transferCharacteristics = avctx->color_trc;
1044     vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
1045         || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
1046 
1047     vui->colourDescriptionPresentFlag =
1048         (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
1049 
1050     vui->videoSignalTypePresentFlag =
1051         (vui->colourDescriptionPresentFlag
1052         || vui->videoFormat != 5
1053         || vui->videoFullRangeFlag != 0);
1054 
1055     h264->sliceMode = 3;
1056     h264->sliceModeData = 1;
1057 
1058     h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
1059     h264->repeatSPSPPS  = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
1060     h264->outputAUD     = ctx->aud;
1061 
1062     if (ctx->dpb_size >= 0) {
1063         /* 0 means "let the hardware decide" */
1064         h264->maxNumRefFrames = ctx->dpb_size;
1065     }
1066     if (avctx->gop_size >= 0) {
1067         h264->idrPeriod = cc->gopLength;
1068     }
1069 
1070     if (IS_CBR(cc->rcParams.rateControlMode)) {
1071         h264->outputBufferingPeriodSEI = 1;
1072     }
1073 
1074     h264->outputPictureTimingSEI = 1;
1075 
1076     if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ||
1077         cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_HQ ||
1078         cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_VBR_HQ) {
1079         h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
1080         h264->fmoMode = NV_ENC_H264_FMO_DISABLE;
1081     }
1082 
1083     if (ctx->flags & NVENC_LOSSLESS) {
1084         h264->qpPrimeYZeroTransformBypassFlag = 1;
1085     } else {
1086         switch(ctx->profile) {
1087         case NV_ENC_H264_PROFILE_BASELINE:
1088             cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
1089             avctx->profile = FF_PROFILE_H264_BASELINE;
1090             break;
1091         case NV_ENC_H264_PROFILE_MAIN:
1092             cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
1093             avctx->profile = FF_PROFILE_H264_MAIN;
1094             break;
1095         case NV_ENC_H264_PROFILE_HIGH:
1096             cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
1097             avctx->profile = FF_PROFILE_H264_HIGH;
1098             break;
1099         case NV_ENC_H264_PROFILE_HIGH_444P:
1100             cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
1101             avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
1102             break;
1103         }
1104     }
1105 
1106     // force setting profile as high444p if input is AV_PIX_FMT_YUV444P
1107     if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P) {
1108         cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
1109         avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
1110     }
1111 
1112     h264->chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
1113 
1114     h264->level = ctx->level;
1115 
1116     if (ctx->coder >= 0)
1117         h264->entropyCodingMode = ctx->coder;
1118 
1119 #ifdef NVENC_HAVE_BFRAME_REF_MODE
1120     h264->useBFramesAsRef = ctx->b_ref_mode;
1121 #endif
1122 
1123 #ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
1124     h264->numRefL0 = avctx->refs;
1125     h264->numRefL1 = avctx->refs;
1126 #endif
1127 
1128     return 0;
1129 }
1130 
nvenc_setup_hevc_config(AVCodecContext * avctx)1131 static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
1132 {
1133     NvencContext *ctx                      = avctx->priv_data;
1134     NV_ENC_CONFIG *cc                      = &ctx->encode_config;
1135     NV_ENC_CONFIG_HEVC *hevc               = &cc->encodeCodecConfig.hevcConfig;
1136     NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters;
1137 
1138     vui->colourMatrix = avctx->colorspace;
1139     vui->colourPrimaries = avctx->color_primaries;
1140     vui->transferCharacteristics = avctx->color_trc;
1141     vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
1142         || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
1143 
1144     vui->colourDescriptionPresentFlag =
1145         (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
1146 
1147     vui->videoSignalTypePresentFlag =
1148         (vui->colourDescriptionPresentFlag
1149         || vui->videoFormat != 5
1150         || vui->videoFullRangeFlag != 0);
1151 
1152     hevc->sliceMode = 3;
1153     hevc->sliceModeData = 1;
1154 
1155     hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
1156     hevc->repeatSPSPPS  = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
1157     hevc->outputAUD     = ctx->aud;
1158 
1159     if (ctx->dpb_size >= 0) {
1160         /* 0 means "let the hardware decide" */
1161         hevc->maxNumRefFramesInDPB = ctx->dpb_size;
1162     }
1163     if (avctx->gop_size >= 0) {
1164         hevc->idrPeriod = cc->gopLength;
1165     }
1166 
1167     if (IS_CBR(cc->rcParams.rateControlMode)) {
1168         hevc->outputBufferingPeriodSEI = 1;
1169     }
1170 
1171     hevc->outputPictureTimingSEI = 1;
1172 
1173     switch (ctx->profile) {
1174     case NV_ENC_HEVC_PROFILE_MAIN:
1175         cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
1176         avctx->profile  = FF_PROFILE_HEVC_MAIN;
1177         break;
1178     case NV_ENC_HEVC_PROFILE_MAIN_10:
1179         cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
1180         avctx->profile  = FF_PROFILE_HEVC_MAIN_10;
1181         break;
1182     case NV_ENC_HEVC_PROFILE_REXT:
1183         cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
1184         avctx->profile  = FF_PROFILE_HEVC_REXT;
1185         break;
1186     }
1187 
1188     // force setting profile as main10 if input is 10 bit
1189     if (IS_10BIT(ctx->data_pix_fmt)) {
1190         cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
1191         avctx->profile = FF_PROFILE_HEVC_MAIN_10;
1192     }
1193 
1194     // force setting profile as rext if input is yuv444
1195     if (IS_YUV444(ctx->data_pix_fmt)) {
1196         cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
1197         avctx->profile = FF_PROFILE_HEVC_REXT;
1198     }
1199 
1200     hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
1201 
1202     hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0;
1203 
1204     hevc->level = ctx->level;
1205 
1206     hevc->tier = ctx->tier;
1207 
1208 #ifdef NVENC_HAVE_HEVC_BFRAME_REF_MODE
1209     hevc->useBFramesAsRef = ctx->b_ref_mode;
1210 #endif
1211 
1212 #ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
1213     hevc->numRefL0 = avctx->refs;
1214     hevc->numRefL1 = avctx->refs;
1215 #endif
1216 
1217     return 0;
1218 }
1219 
nvenc_setup_codec_config(AVCodecContext * avctx)1220 static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx)
1221 {
1222     switch (avctx->codec->id) {
1223     case AV_CODEC_ID_H264:
1224         return nvenc_setup_h264_config(avctx);
1225     case AV_CODEC_ID_HEVC:
1226         return nvenc_setup_hevc_config(avctx);
1227     /* Earlier switch/case will return if unknown codec is passed. */
1228     }
1229 
1230     return 0;
1231 }
1232 
compute_dar(AVCodecContext * avctx,int * dw,int * dh)1233 static void compute_dar(AVCodecContext *avctx, int *dw, int *dh) {
1234     int sw, sh;
1235 
1236     sw = avctx->width;
1237     sh = avctx->height;
1238 
1239     if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) {
1240         sw *= avctx->sample_aspect_ratio.num;
1241         sh *= avctx->sample_aspect_ratio.den;
1242     }
1243 
1244     av_reduce(dw, dh, sw, sh, 1024 * 1024);
1245 }
1246 
nvenc_setup_encoder(AVCodecContext * avctx)1247 static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
1248 {
1249     NvencContext *ctx = avctx->priv_data;
1250     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1251     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1252 
1253     NV_ENC_PRESET_CONFIG preset_config = { 0 };
1254     NVENCSTATUS nv_status = NV_ENC_SUCCESS;
1255     AVCPBProperties *cpb_props;
1256     int res = 0;
1257     int dw, dh;
1258 
1259     ctx->encode_config.version = NV_ENC_CONFIG_VER;
1260     ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
1261 
1262     ctx->init_encode_params.encodeHeight = avctx->height;
1263     ctx->init_encode_params.encodeWidth = avctx->width;
1264 
1265     ctx->init_encode_params.encodeConfig = &ctx->encode_config;
1266 
1267     preset_config.version = NV_ENC_PRESET_CONFIG_VER;
1268     preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
1269 
1270 #ifdef NVENC_HAVE_NEW_PRESETS
1271     ctx->init_encode_params.tuningInfo = ctx->tuning_info;
1272 
1273     if (ctx->flags & NVENC_LOSSLESS)
1274         ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOSSLESS;
1275     else if (ctx->flags & NVENC_LOWLATENCY)
1276         ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY;
1277 
1278     nv_status = p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder,
1279         ctx->init_encode_params.encodeGUID,
1280         ctx->init_encode_params.presetGUID,
1281         ctx->init_encode_params.tuningInfo,
1282         &preset_config);
1283 #else
1284     nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder,
1285         ctx->init_encode_params.encodeGUID,
1286         ctx->init_encode_params.presetGUID,
1287         &preset_config);
1288 #endif
1289     if (nv_status != NV_ENC_SUCCESS)
1290         return nvenc_print_error(avctx, nv_status, "Cannot get the preset configuration");
1291 
1292     memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
1293 
1294     ctx->encode_config.version = NV_ENC_CONFIG_VER;
1295 
1296     compute_dar(avctx, &dw, &dh);
1297     ctx->init_encode_params.darHeight = dh;
1298     ctx->init_encode_params.darWidth = dw;
1299 
1300     if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
1301         ctx->init_encode_params.frameRateNum = avctx->framerate.num;
1302         ctx->init_encode_params.frameRateDen = avctx->framerate.den;
1303     } else {
1304         ctx->init_encode_params.frameRateNum = avctx->time_base.den;
1305         ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
1306     }
1307 
1308     ctx->init_encode_params.enableEncodeAsync = 0;
1309     ctx->init_encode_params.enablePTD = 1;
1310 
1311 #ifdef NVENC_HAVE_NEW_PRESETS
1312     /* If lookahead isn't set from CLI, use value from preset.
1313      * P6 & P7 presets may enable lookahead for better quality.
1314      * */
1315     if (ctx->rc_lookahead == 0 && ctx->encode_config.rcParams.enableLookahead)
1316         ctx->rc_lookahead = ctx->encode_config.rcParams.lookaheadDepth;
1317 #endif
1318 
1319     if (ctx->weighted_pred == 1)
1320         ctx->init_encode_params.enableWeightedPrediction = 1;
1321 
1322     if (ctx->bluray_compat) {
1323         ctx->aud = 1;
1324         ctx->dpb_size = FFMIN(FFMAX(avctx->refs, 0), 6);
1325         avctx->max_b_frames = FFMIN(avctx->max_b_frames, 3);
1326         switch (avctx->codec->id) {
1327         case AV_CODEC_ID_H264:
1328             /* maximum level depends on used resolution */
1329             break;
1330         case AV_CODEC_ID_HEVC:
1331             ctx->level = NV_ENC_LEVEL_HEVC_51;
1332             ctx->tier = NV_ENC_TIER_HEVC_HIGH;
1333             break;
1334         }
1335     }
1336 
1337     if (avctx->gop_size > 0) {
1338         if (avctx->max_b_frames >= 0) {
1339             /* 0 is intra-only, 1 is I/P only, 2 is one B-Frame, 3 two B-frames, and so on. */
1340             ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
1341         }
1342 
1343         ctx->encode_config.gopLength = avctx->gop_size;
1344     } else if (avctx->gop_size == 0) {
1345         ctx->encode_config.frameIntervalP = 0;
1346         ctx->encode_config.gopLength = 1;
1347     }
1348 
1349     nvenc_recalc_surfaces(avctx);
1350 
1351     nvenc_setup_rate_control(avctx);
1352 
1353     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
1354         ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
1355     } else {
1356         ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
1357     }
1358 
1359     res = nvenc_setup_codec_config(avctx);
1360     if (res)
1361         return res;
1362 
1363     res = nvenc_push_context(avctx);
1364     if (res < 0)
1365         return res;
1366 
1367     nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
1368     if (nv_status != NV_ENC_SUCCESS) {
1369         nvenc_pop_context(avctx);
1370         return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
1371     }
1372 
1373 #ifdef NVENC_HAVE_CUSTREAM_PTR
1374     if (ctx->cu_context) {
1375         nv_status = p_nvenc->nvEncSetIOCudaStreams(ctx->nvencoder, &ctx->cu_stream, &ctx->cu_stream);
1376         if (nv_status != NV_ENC_SUCCESS) {
1377             nvenc_pop_context(avctx);
1378             return nvenc_print_error(avctx, nv_status, "SetIOCudaStreams failed");
1379         }
1380     }
1381 #endif
1382 
1383     res = nvenc_pop_context(avctx);
1384     if (res < 0)
1385         return res;
1386 
1387     if (ctx->encode_config.frameIntervalP > 1)
1388         avctx->has_b_frames = 2;
1389 
1390     if (ctx->encode_config.rcParams.averageBitRate > 0)
1391         avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
1392 
1393     cpb_props = ff_add_cpb_side_data(avctx);
1394     if (!cpb_props)
1395         return AVERROR(ENOMEM);
1396     cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate;
1397     cpb_props->avg_bitrate = avctx->bit_rate;
1398     cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
1399 
1400     return 0;
1401 }
1402 
nvenc_map_buffer_format(enum AVPixelFormat pix_fmt)1403 static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt)
1404 {
1405     switch (pix_fmt) {
1406     case AV_PIX_FMT_YUV420P:
1407         return NV_ENC_BUFFER_FORMAT_YV12_PL;
1408     case AV_PIX_FMT_NV12:
1409         return NV_ENC_BUFFER_FORMAT_NV12_PL;
1410     case AV_PIX_FMT_P010:
1411     case AV_PIX_FMT_P016:
1412         return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
1413     case AV_PIX_FMT_YUV444P:
1414         return NV_ENC_BUFFER_FORMAT_YUV444_PL;
1415     case AV_PIX_FMT_YUV444P16:
1416         return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
1417     case AV_PIX_FMT_0RGB32:
1418         return NV_ENC_BUFFER_FORMAT_ARGB;
1419     case AV_PIX_FMT_0BGR32:
1420         return NV_ENC_BUFFER_FORMAT_ABGR;
1421     default:
1422         return NV_ENC_BUFFER_FORMAT_UNDEFINED;
1423     }
1424 }
1425 
nvenc_alloc_surface(AVCodecContext * avctx,int idx)1426 static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
1427 {
1428     NvencContext *ctx = avctx->priv_data;
1429     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1430     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1431     NvencSurface* tmp_surface = &ctx->surfaces[idx];
1432 
1433     NVENCSTATUS nv_status;
1434     NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
1435     allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
1436 
1437     if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1438         ctx->surfaces[idx].in_ref = av_frame_alloc();
1439         if (!ctx->surfaces[idx].in_ref)
1440             return AVERROR(ENOMEM);
1441     } else {
1442         NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
1443 
1444         ctx->surfaces[idx].format = nvenc_map_buffer_format(ctx->data_pix_fmt);
1445         if (ctx->surfaces[idx].format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
1446             av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
1447                    av_get_pix_fmt_name(ctx->data_pix_fmt));
1448             return AVERROR(EINVAL);
1449         }
1450 
1451         allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
1452         allocSurf.width = avctx->width;
1453         allocSurf.height = avctx->height;
1454         allocSurf.bufferFmt = ctx->surfaces[idx].format;
1455 
1456         nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
1457         if (nv_status != NV_ENC_SUCCESS) {
1458             return nvenc_print_error(avctx, nv_status, "CreateInputBuffer failed");
1459         }
1460 
1461         ctx->surfaces[idx].input_surface = allocSurf.inputBuffer;
1462         ctx->surfaces[idx].width = allocSurf.width;
1463         ctx->surfaces[idx].height = allocSurf.height;
1464     }
1465 
1466     nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
1467     if (nv_status != NV_ENC_SUCCESS) {
1468         int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
1469         if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
1470             p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface);
1471         av_frame_free(&ctx->surfaces[idx].in_ref);
1472         return err;
1473     }
1474 
1475     ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
1476 
1477     av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL);
1478 
1479     return 0;
1480 }
1481 
nvenc_setup_surfaces(AVCodecContext * avctx)1482 static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
1483 {
1484     NvencContext *ctx = avctx->priv_data;
1485     int i, res = 0, res2;
1486 
1487     ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
1488     if (!ctx->surfaces)
1489         return AVERROR(ENOMEM);
1490 
1491     ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
1492     if (!ctx->timestamp_list)
1493         return AVERROR(ENOMEM);
1494 
1495     ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
1496     if (!ctx->unused_surface_queue)
1497         return AVERROR(ENOMEM);
1498 
1499     ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
1500     if (!ctx->output_surface_queue)
1501         return AVERROR(ENOMEM);
1502     ctx->output_surface_ready_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
1503     if (!ctx->output_surface_ready_queue)
1504         return AVERROR(ENOMEM);
1505 
1506     res = nvenc_push_context(avctx);
1507     if (res < 0)
1508         return res;
1509 
1510     for (i = 0; i < ctx->nb_surfaces; i++) {
1511         if ((res = nvenc_alloc_surface(avctx, i)) < 0)
1512             goto fail;
1513     }
1514 
1515 fail:
1516     res2 = nvenc_pop_context(avctx);
1517     if (res2 < 0)
1518         return res2;
1519 
1520     return res;
1521 }
1522 
nvenc_setup_extradata(AVCodecContext * avctx)1523 static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
1524 {
1525     NvencContext *ctx = avctx->priv_data;
1526     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1527     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1528 
1529     NVENCSTATUS nv_status;
1530     uint32_t outSize = 0;
1531     char tmpHeader[256];
1532     NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
1533     payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
1534 
1535     payload.spsppsBuffer = tmpHeader;
1536     payload.inBufferSize = sizeof(tmpHeader);
1537     payload.outSPSPPSPayloadSize = &outSize;
1538 
1539     nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
1540     if (nv_status != NV_ENC_SUCCESS) {
1541         return nvenc_print_error(avctx, nv_status, "GetSequenceParams failed");
1542     }
1543 
1544     avctx->extradata_size = outSize;
1545     avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
1546 
1547     if (!avctx->extradata) {
1548         return AVERROR(ENOMEM);
1549     }
1550 
1551     memcpy(avctx->extradata, tmpHeader, outSize);
1552 
1553     return 0;
1554 }
1555 
ff_nvenc_encode_close(AVCodecContext * avctx)1556 av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
1557 {
1558     NvencContext *ctx               = avctx->priv_data;
1559     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1560     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1561     int i, res;
1562 
1563     /* the encoder has to be flushed before it can be closed */
1564     if (ctx->nvencoder) {
1565         NV_ENC_PIC_PARAMS params        = { .version        = NV_ENC_PIC_PARAMS_VER,
1566                                             .encodePicFlags = NV_ENC_PIC_FLAG_EOS };
1567 
1568         res = nvenc_push_context(avctx);
1569         if (res < 0)
1570             return res;
1571 
1572         p_nvenc->nvEncEncodePicture(ctx->nvencoder, &params);
1573     }
1574 
1575     av_fifo_freep(&ctx->timestamp_list);
1576     av_fifo_freep(&ctx->output_surface_ready_queue);
1577     av_fifo_freep(&ctx->output_surface_queue);
1578     av_fifo_freep(&ctx->unused_surface_queue);
1579 
1580     if (ctx->surfaces && (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11)) {
1581         for (i = 0; i < ctx->nb_registered_frames; i++) {
1582             if (ctx->registered_frames[i].mapped)
1583                 p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[i].in_map.mappedResource);
1584             if (ctx->registered_frames[i].regptr)
1585                 p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
1586         }
1587         ctx->nb_registered_frames = 0;
1588     }
1589 
1590     if (ctx->surfaces) {
1591         for (i = 0; i < ctx->nb_surfaces; ++i) {
1592             if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
1593                 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
1594             av_frame_free(&ctx->surfaces[i].in_ref);
1595             p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
1596         }
1597     }
1598     av_freep(&ctx->surfaces);
1599     ctx->nb_surfaces = 0;
1600 
1601     av_frame_free(&ctx->frame);
1602 
1603     if (ctx->nvencoder) {
1604         p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
1605 
1606         res = nvenc_pop_context(avctx);
1607         if (res < 0)
1608             return res;
1609     }
1610     ctx->nvencoder = NULL;
1611 
1612     if (ctx->cu_context_internal)
1613         CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
1614     ctx->cu_context = ctx->cu_context_internal = NULL;
1615 
1616 #if CONFIG_D3D11VA
1617     if (ctx->d3d11_device) {
1618         ID3D11Device_Release(ctx->d3d11_device);
1619         ctx->d3d11_device = NULL;
1620     }
1621 #endif
1622 
1623     nvenc_free_functions(&dl_fn->nvenc_dl);
1624     cuda_free_functions(&dl_fn->cuda_dl);
1625 
1626     dl_fn->nvenc_device_count = 0;
1627 
1628     av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
1629 
1630     return 0;
1631 }
1632 
ff_nvenc_encode_init(AVCodecContext * avctx)1633 av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
1634 {
1635     NvencContext *ctx = avctx->priv_data;
1636     int ret;
1637 
1638     if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1639         AVHWFramesContext *frames_ctx;
1640         if (!avctx->hw_frames_ctx) {
1641             av_log(avctx, AV_LOG_ERROR,
1642                    "hw_frames_ctx must be set when using GPU frames as input\n");
1643             return AVERROR(EINVAL);
1644         }
1645         frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
1646         if (frames_ctx->format != avctx->pix_fmt) {
1647             av_log(avctx, AV_LOG_ERROR,
1648                    "hw_frames_ctx must match the GPU frame type\n");
1649             return AVERROR(EINVAL);
1650         }
1651         ctx->data_pix_fmt = frames_ctx->sw_format;
1652     } else {
1653         ctx->data_pix_fmt = avctx->pix_fmt;
1654     }
1655 
1656     ctx->frame = av_frame_alloc();
1657     if (!ctx->frame)
1658         return AVERROR(ENOMEM);
1659 
1660     if ((ret = nvenc_load_libraries(avctx)) < 0)
1661         return ret;
1662 
1663     if ((ret = nvenc_setup_device(avctx)) < 0)
1664         return ret;
1665 
1666     if ((ret = nvenc_setup_encoder(avctx)) < 0)
1667         return ret;
1668 
1669     if ((ret = nvenc_setup_surfaces(avctx)) < 0)
1670         return ret;
1671 
1672     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1673         if ((ret = nvenc_setup_extradata(avctx)) < 0)
1674             return ret;
1675     }
1676 
1677     return 0;
1678 }
1679 
get_free_frame(NvencContext * ctx)1680 static NvencSurface *get_free_frame(NvencContext *ctx)
1681 {
1682     NvencSurface *tmp_surf;
1683 
1684     if (!(av_fifo_size(ctx->unused_surface_queue) > 0))
1685         // queue empty
1686         return NULL;
1687 
1688     av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL);
1689     return tmp_surf;
1690 }
1691 
nvenc_copy_frame(AVCodecContext * avctx,NvencSurface * nv_surface,NV_ENC_LOCK_INPUT_BUFFER * lock_buffer_params,const AVFrame * frame)1692 static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
1693             NV_ENC_LOCK_INPUT_BUFFER *lock_buffer_params, const AVFrame *frame)
1694 {
1695     int dst_linesize[4] = {
1696         lock_buffer_params->pitch,
1697         lock_buffer_params->pitch,
1698         lock_buffer_params->pitch,
1699         lock_buffer_params->pitch
1700     };
1701     uint8_t *dst_data[4];
1702     int ret;
1703 
1704     if (frame->format == AV_PIX_FMT_YUV420P)
1705         dst_linesize[1] = dst_linesize[2] >>= 1;
1706 
1707     ret = av_image_fill_pointers(dst_data, frame->format, nv_surface->height,
1708                                  lock_buffer_params->bufferDataPtr, dst_linesize);
1709     if (ret < 0)
1710         return ret;
1711 
1712     if (frame->format == AV_PIX_FMT_YUV420P)
1713         FFSWAP(uint8_t*, dst_data[1], dst_data[2]);
1714 
1715     av_image_copy(dst_data, dst_linesize,
1716                   (const uint8_t**)frame->data, frame->linesize, frame->format,
1717                   avctx->width, avctx->height);
1718 
1719     return 0;
1720 }
1721 
nvenc_find_free_reg_resource(AVCodecContext * avctx)1722 static int nvenc_find_free_reg_resource(AVCodecContext *avctx)
1723 {
1724     NvencContext *ctx = avctx->priv_data;
1725     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1726     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1727     NVENCSTATUS nv_status;
1728 
1729     int i, first_round;
1730 
1731     if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) {
1732         for (first_round = 1; first_round >= 0; first_round--) {
1733             for (i = 0; i < ctx->nb_registered_frames; i++) {
1734                 if (!ctx->registered_frames[i].mapped) {
1735                     if (ctx->registered_frames[i].regptr) {
1736                         if (first_round)
1737                             continue;
1738                         nv_status = p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
1739                         if (nv_status != NV_ENC_SUCCESS)
1740                             return nvenc_print_error(avctx, nv_status, "Failed unregistering unused input resource");
1741                         ctx->registered_frames[i].ptr = NULL;
1742                         ctx->registered_frames[i].regptr = NULL;
1743                     }
1744                     return i;
1745                 }
1746             }
1747         }
1748     } else {
1749         return ctx->nb_registered_frames++;
1750     }
1751 
1752     av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n");
1753     return AVERROR(ENOMEM);
1754 }
1755 
nvenc_register_frame(AVCodecContext * avctx,const AVFrame * frame)1756 static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
1757 {
1758     NvencContext *ctx = avctx->priv_data;
1759     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1760     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1761 
1762     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
1763     NV_ENC_REGISTER_RESOURCE reg;
1764     int i, idx, ret;
1765 
1766     for (i = 0; i < ctx->nb_registered_frames; i++) {
1767         if (avctx->pix_fmt == AV_PIX_FMT_CUDA && ctx->registered_frames[i].ptr == frame->data[0])
1768             return i;
1769         else if (avctx->pix_fmt == AV_PIX_FMT_D3D11 && ctx->registered_frames[i].ptr == frame->data[0] && ctx->registered_frames[i].ptr_index == (intptr_t)frame->data[1])
1770             return i;
1771     }
1772 
1773     idx = nvenc_find_free_reg_resource(avctx);
1774     if (idx < 0)
1775         return idx;
1776 
1777     reg.version            = NV_ENC_REGISTER_RESOURCE_VER;
1778     reg.width              = frames_ctx->width;
1779     reg.height             = frames_ctx->height;
1780     reg.pitch              = frame->linesize[0];
1781     reg.resourceToRegister = frame->data[0];
1782 
1783     if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1784         reg.resourceType   = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
1785     }
1786     else if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1787         reg.resourceType     = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
1788         reg.subResourceIndex = (intptr_t)frame->data[1];
1789     }
1790 
1791     reg.bufferFormat       = nvenc_map_buffer_format(frames_ctx->sw_format);
1792     if (reg.bufferFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
1793         av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
1794                av_get_pix_fmt_name(frames_ctx->sw_format));
1795         return AVERROR(EINVAL);
1796     }
1797 
1798     ret = p_nvenc->nvEncRegisterResource(ctx->nvencoder, &reg);
1799     if (ret != NV_ENC_SUCCESS) {
1800         nvenc_print_error(avctx, ret, "Error registering an input resource");
1801         return AVERROR_UNKNOWN;
1802     }
1803 
1804     ctx->registered_frames[idx].ptr       = frame->data[0];
1805     ctx->registered_frames[idx].ptr_index = reg.subResourceIndex;
1806     ctx->registered_frames[idx].regptr    = reg.registeredResource;
1807     return idx;
1808 }
1809 
nvenc_upload_frame(AVCodecContext * avctx,const AVFrame * frame,NvencSurface * nvenc_frame)1810 static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
1811                                       NvencSurface *nvenc_frame)
1812 {
1813     NvencContext *ctx = avctx->priv_data;
1814     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1815     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1816 
1817     int res;
1818     NVENCSTATUS nv_status;
1819 
1820     if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1821         int reg_idx = nvenc_register_frame(avctx, frame);
1822         if (reg_idx < 0) {
1823             av_log(avctx, AV_LOG_ERROR, "Could not register an input HW frame\n");
1824             return reg_idx;
1825         }
1826 
1827         res = av_frame_ref(nvenc_frame->in_ref, frame);
1828         if (res < 0)
1829             return res;
1830 
1831         if (!ctx->registered_frames[reg_idx].mapped) {
1832             ctx->registered_frames[reg_idx].in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
1833             ctx->registered_frames[reg_idx].in_map.registeredResource = ctx->registered_frames[reg_idx].regptr;
1834             nv_status = p_nvenc->nvEncMapInputResource(ctx->nvencoder, &ctx->registered_frames[reg_idx].in_map);
1835             if (nv_status != NV_ENC_SUCCESS) {
1836                 av_frame_unref(nvenc_frame->in_ref);
1837                 return nvenc_print_error(avctx, nv_status, "Error mapping an input resource");
1838             }
1839         }
1840 
1841         ctx->registered_frames[reg_idx].mapped += 1;
1842 
1843         nvenc_frame->reg_idx                   = reg_idx;
1844         nvenc_frame->input_surface             = ctx->registered_frames[reg_idx].in_map.mappedResource;
1845         nvenc_frame->format                    = ctx->registered_frames[reg_idx].in_map.mappedBufferFmt;
1846         nvenc_frame->pitch                     = frame->linesize[0];
1847 
1848         return 0;
1849     } else {
1850         NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
1851 
1852         lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1853         lockBufferParams.inputBuffer = nvenc_frame->input_surface;
1854 
1855         nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
1856         if (nv_status != NV_ENC_SUCCESS) {
1857             return nvenc_print_error(avctx, nv_status, "Failed locking nvenc input buffer");
1858         }
1859 
1860         nvenc_frame->pitch = lockBufferParams.pitch;
1861         res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame);
1862 
1863         nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface);
1864         if (nv_status != NV_ENC_SUCCESS) {
1865             return nvenc_print_error(avctx, nv_status, "Failed unlocking input buffer!");
1866         }
1867 
1868         return res;
1869     }
1870 }
1871 
nvenc_codec_specific_pic_params(AVCodecContext * avctx,NV_ENC_PIC_PARAMS * params,NV_ENC_SEI_PAYLOAD * sei_data,int sei_count)1872 static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
1873                                             NV_ENC_PIC_PARAMS *params,
1874                                             NV_ENC_SEI_PAYLOAD *sei_data,
1875                                             int sei_count)
1876 {
1877     NvencContext *ctx = avctx->priv_data;
1878 
1879     switch (avctx->codec->id) {
1880     case AV_CODEC_ID_H264:
1881         params->codecPicParams.h264PicParams.sliceMode =
1882             ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
1883         params->codecPicParams.h264PicParams.sliceModeData =
1884             ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1885         if (sei_count > 0) {
1886             params->codecPicParams.h264PicParams.seiPayloadArray = sei_data;
1887             params->codecPicParams.h264PicParams.seiPayloadArrayCnt = sei_count;
1888         }
1889 
1890       break;
1891     case AV_CODEC_ID_HEVC:
1892         params->codecPicParams.hevcPicParams.sliceMode =
1893             ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
1894         params->codecPicParams.hevcPicParams.sliceModeData =
1895             ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
1896         if (sei_count > 0) {
1897             params->codecPicParams.hevcPicParams.seiPayloadArray = sei_data;
1898             params->codecPicParams.hevcPicParams.seiPayloadArrayCnt = sei_count;
1899         }
1900 
1901         break;
1902     }
1903 }
1904 
timestamp_queue_enqueue(AVFifoBuffer * queue,int64_t timestamp)1905 static inline void timestamp_queue_enqueue(AVFifoBuffer* queue, int64_t timestamp)
1906 {
1907     av_fifo_generic_write(queue, &timestamp, sizeof(timestamp), NULL);
1908 }
1909 
timestamp_queue_dequeue(AVFifoBuffer * queue)1910 static inline int64_t timestamp_queue_dequeue(AVFifoBuffer* queue)
1911 {
1912     int64_t timestamp = AV_NOPTS_VALUE;
1913     if (av_fifo_size(queue) > 0)
1914         av_fifo_generic_read(queue, &timestamp, sizeof(timestamp), NULL);
1915 
1916     return timestamp;
1917 }
1918 
nvenc_set_timestamp(AVCodecContext * avctx,NV_ENC_LOCK_BITSTREAM * params,AVPacket * pkt)1919 static int nvenc_set_timestamp(AVCodecContext *avctx,
1920                                NV_ENC_LOCK_BITSTREAM *params,
1921                                AVPacket *pkt)
1922 {
1923     NvencContext *ctx = avctx->priv_data;
1924 
1925     pkt->pts = params->outputTimeStamp;
1926     pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list);
1927 
1928     pkt->dts -= FFMAX(ctx->encode_config.frameIntervalP - 1, 0) * FFMAX(avctx->ticks_per_frame, 1);
1929 
1930     return 0;
1931 }
1932 
process_output_surface(AVCodecContext * avctx,AVPacket * pkt,NvencSurface * tmpoutsurf)1933 static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf)
1934 {
1935     NvencContext *ctx = avctx->priv_data;
1936     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1937     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1938 
1939     uint32_t slice_mode_data;
1940     uint32_t *slice_offsets = NULL;
1941     NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
1942     NVENCSTATUS nv_status;
1943     int res = 0;
1944 
1945     enum AVPictureType pict_type;
1946 
1947     switch (avctx->codec->id) {
1948     case AV_CODEC_ID_H264:
1949       slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1950       break;
1951     case AV_CODEC_ID_H265:
1952       slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
1953       break;
1954     default:
1955       av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
1956       res = AVERROR(EINVAL);
1957       goto error;
1958     }
1959     slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
1960 
1961     if (!slice_offsets) {
1962         res = AVERROR(ENOMEM);
1963         goto error;
1964     }
1965 
1966     lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
1967 
1968     lock_params.doNotWait = 0;
1969     lock_params.outputBitstream = tmpoutsurf->output_surface;
1970     lock_params.sliceOffsets = slice_offsets;
1971 
1972     nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
1973     if (nv_status != NV_ENC_SUCCESS) {
1974         res = nvenc_print_error(avctx, nv_status, "Failed locking bitstream buffer");
1975         goto error;
1976     }
1977 
1978     res = ff_get_encode_buffer(avctx, pkt, lock_params.bitstreamSizeInBytes, 0);
1979 
1980     if (res < 0) {
1981         p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
1982         goto error;
1983     }
1984 
1985     memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
1986 
1987     nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
1988     if (nv_status != NV_ENC_SUCCESS) {
1989         res = nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open");
1990         goto error;
1991     }
1992 
1993 
1994     if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1995         ctx->registered_frames[tmpoutsurf->reg_idx].mapped -= 1;
1996         if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped == 0) {
1997             nv_status = p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[tmpoutsurf->reg_idx].in_map.mappedResource);
1998             if (nv_status != NV_ENC_SUCCESS) {
1999                 res = nvenc_print_error(avctx, nv_status, "Failed unmapping input resource");
2000                 goto error;
2001             }
2002         } else if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped < 0) {
2003             res = AVERROR_BUG;
2004             goto error;
2005         }
2006 
2007         av_frame_unref(tmpoutsurf->in_ref);
2008 
2009         tmpoutsurf->input_surface = NULL;
2010     }
2011 
2012     switch (lock_params.pictureType) {
2013     case NV_ENC_PIC_TYPE_IDR:
2014         pkt->flags |= AV_PKT_FLAG_KEY;
2015     case NV_ENC_PIC_TYPE_I:
2016         pict_type = AV_PICTURE_TYPE_I;
2017         break;
2018     case NV_ENC_PIC_TYPE_P:
2019         pict_type = AV_PICTURE_TYPE_P;
2020         break;
2021     case NV_ENC_PIC_TYPE_B:
2022         pict_type = AV_PICTURE_TYPE_B;
2023         break;
2024     case NV_ENC_PIC_TYPE_BI:
2025         pict_type = AV_PICTURE_TYPE_BI;
2026         break;
2027     default:
2028         av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n");
2029         av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
2030         res = AVERROR_EXTERNAL;
2031         goto error;
2032     }
2033 
2034 #if FF_API_CODED_FRAME
2035 FF_DISABLE_DEPRECATION_WARNINGS
2036     avctx->coded_frame->pict_type = pict_type;
2037 FF_ENABLE_DEPRECATION_WARNINGS
2038 #endif
2039 
2040     ff_side_data_set_encoder_stats(pkt,
2041         (lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
2042 
2043     res = nvenc_set_timestamp(avctx, &lock_params, pkt);
2044     if (res < 0)
2045         goto error2;
2046 
2047     av_free(slice_offsets);
2048 
2049     return 0;
2050 
2051 error:
2052     timestamp_queue_dequeue(ctx->timestamp_list);
2053 
2054 error2:
2055     av_free(slice_offsets);
2056 
2057     return res;
2058 }
2059 
output_ready(AVCodecContext * avctx,int flush)2060 static int output_ready(AVCodecContext *avctx, int flush)
2061 {
2062     NvencContext *ctx = avctx->priv_data;
2063     int nb_ready, nb_pending;
2064 
2065     nb_ready   = av_fifo_size(ctx->output_surface_ready_queue)   / sizeof(NvencSurface*);
2066     nb_pending = av_fifo_size(ctx->output_surface_queue)         / sizeof(NvencSurface*);
2067     if (flush)
2068         return nb_ready > 0;
2069     return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);
2070 }
2071 
reconfig_encoder(AVCodecContext * avctx,const AVFrame * frame)2072 static void reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)
2073 {
2074     NvencContext *ctx = avctx->priv_data;
2075     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
2076     NVENCSTATUS ret;
2077 
2078     NV_ENC_RECONFIGURE_PARAMS params = { 0 };
2079     int needs_reconfig = 0;
2080     int needs_encode_config = 0;
2081     int reconfig_bitrate = 0, reconfig_dar = 0;
2082     int dw, dh;
2083 
2084     params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
2085     params.reInitEncodeParams = ctx->init_encode_params;
2086 
2087     compute_dar(avctx, &dw, &dh);
2088     if (dw != ctx->init_encode_params.darWidth || dh != ctx->init_encode_params.darHeight) {
2089         av_log(avctx, AV_LOG_VERBOSE,
2090                "aspect ratio change (DAR): %d:%d -> %d:%d\n",
2091                ctx->init_encode_params.darWidth,
2092                ctx->init_encode_params.darHeight, dw, dh);
2093 
2094         params.reInitEncodeParams.darHeight = dh;
2095         params.reInitEncodeParams.darWidth = dw;
2096 
2097         needs_reconfig = 1;
2098         reconfig_dar = 1;
2099     }
2100 
2101     if (ctx->rc != NV_ENC_PARAMS_RC_CONSTQP && ctx->support_dyn_bitrate) {
2102         if (avctx->bit_rate > 0 && params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate != avctx->bit_rate) {
2103             av_log(avctx, AV_LOG_VERBOSE,
2104                    "avg bitrate change: %d -> %d\n",
2105                    params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate,
2106                    (uint32_t)avctx->bit_rate);
2107 
2108             params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate = avctx->bit_rate;
2109             reconfig_bitrate = 1;
2110         }
2111 
2112         if (avctx->rc_max_rate > 0 && ctx->encode_config.rcParams.maxBitRate != avctx->rc_max_rate) {
2113             av_log(avctx, AV_LOG_VERBOSE,
2114                    "max bitrate change: %d -> %d\n",
2115                    params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate,
2116                    (uint32_t)avctx->rc_max_rate);
2117 
2118             params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate = avctx->rc_max_rate;
2119             reconfig_bitrate = 1;
2120         }
2121 
2122         if (avctx->rc_buffer_size > 0 && ctx->encode_config.rcParams.vbvBufferSize != avctx->rc_buffer_size) {
2123             av_log(avctx, AV_LOG_VERBOSE,
2124                    "vbv buffer size change: %d -> %d\n",
2125                    params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize,
2126                    avctx->rc_buffer_size);
2127 
2128             params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize = avctx->rc_buffer_size;
2129             reconfig_bitrate = 1;
2130         }
2131 
2132         if (reconfig_bitrate) {
2133             params.resetEncoder = 1;
2134             params.forceIDR = 1;
2135 
2136             needs_encode_config = 1;
2137             needs_reconfig = 1;
2138         }
2139     }
2140 
2141     if (!needs_encode_config)
2142         params.reInitEncodeParams.encodeConfig = NULL;
2143 
2144     if (needs_reconfig) {
2145         ret = p_nvenc->nvEncReconfigureEncoder(ctx->nvencoder, &params);
2146         if (ret != NV_ENC_SUCCESS) {
2147             nvenc_print_error(avctx, ret, "failed to reconfigure nvenc");
2148         } else {
2149             if (reconfig_dar) {
2150                 ctx->init_encode_params.darHeight = dh;
2151                 ctx->init_encode_params.darWidth = dw;
2152             }
2153 
2154             if (reconfig_bitrate) {
2155                 ctx->encode_config.rcParams.averageBitRate = params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate;
2156                 ctx->encode_config.rcParams.maxBitRate = params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate;
2157                 ctx->encode_config.rcParams.vbvBufferSize = params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize;
2158             }
2159 
2160         }
2161     }
2162 }
2163 
nvenc_send_frame(AVCodecContext * avctx,const AVFrame * frame)2164 static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
2165 {
2166     NVENCSTATUS nv_status;
2167     NvencSurface *tmp_out_surf, *in_surf;
2168     int res, res2;
2169     NV_ENC_SEI_PAYLOAD sei_data[8];
2170     int sei_count = 0;
2171     int i;
2172 
2173     NvencContext *ctx = avctx->priv_data;
2174     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
2175     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
2176 
2177     NV_ENC_PIC_PARAMS pic_params = { 0 };
2178     pic_params.version = NV_ENC_PIC_PARAMS_VER;
2179 
2180     if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
2181         return AVERROR(EINVAL);
2182 
2183     if (frame && frame->buf[0]) {
2184         in_surf = get_free_frame(ctx);
2185         if (!in_surf)
2186             return AVERROR(EAGAIN);
2187 
2188         res = nvenc_push_context(avctx);
2189         if (res < 0)
2190             return res;
2191 
2192         reconfig_encoder(avctx, frame);
2193 
2194         res = nvenc_upload_frame(avctx, frame, in_surf);
2195 
2196         res2 = nvenc_pop_context(avctx);
2197         if (res2 < 0)
2198             return res2;
2199 
2200         if (res)
2201             return res;
2202 
2203         pic_params.inputBuffer = in_surf->input_surface;
2204         pic_params.bufferFmt = in_surf->format;
2205         pic_params.inputWidth = in_surf->width;
2206         pic_params.inputHeight = in_surf->height;
2207         pic_params.inputPitch = in_surf->pitch;
2208         pic_params.outputBitstream = in_surf->output_surface;
2209 
2210         if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2211             if (frame->top_field_first)
2212                 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
2213             else
2214                 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
2215         } else {
2216             pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
2217         }
2218 
2219         if (ctx->forced_idr >= 0 && frame->pict_type == AV_PICTURE_TYPE_I) {
2220             pic_params.encodePicFlags =
2221                 ctx->forced_idr ? NV_ENC_PIC_FLAG_FORCEIDR : NV_ENC_PIC_FLAG_FORCEINTRA;
2222         } else {
2223             pic_params.encodePicFlags = 0;
2224         }
2225 
2226         pic_params.inputTimeStamp = frame->pts;
2227 
2228         if (ctx->a53_cc && av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) {
2229             void *a53_data = NULL;
2230             size_t a53_size = 0;
2231 
2232             if (ff_alloc_a53_sei(frame, 0, (void**)&a53_data, &a53_size) < 0) {
2233                 av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2234             }
2235 
2236             if (a53_data) {
2237                 sei_data[sei_count].payloadSize = (uint32_t)a53_size;
2238                 sei_data[sei_count].payloadType = 4;
2239                 sei_data[sei_count].payload = (uint8_t*)a53_data;
2240                 sei_count ++;
2241             }
2242         }
2243 
2244         if (ctx->s12m_tc && av_frame_get_side_data(frame, AV_FRAME_DATA_S12M_TIMECODE)) {
2245             void *tc_data = NULL;
2246             size_t tc_size = 0;
2247 
2248             if (ff_alloc_timecode_sei(frame, avctx->framerate, 0, (void**)&tc_data, &tc_size) < 0) {
2249                 av_log(ctx, AV_LOG_ERROR, "Not enough memory for timecode sei, skipping\n");
2250             }
2251 
2252             if (tc_data) {
2253                 sei_data[sei_count].payloadSize = (uint32_t)tc_size;
2254                 sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
2255                 sei_data[sei_count].payload = (uint8_t*)tc_data;
2256                 sei_count ++;
2257             }
2258         }
2259 
2260         nvenc_codec_specific_pic_params(avctx, &pic_params, sei_data, sei_count);
2261     } else {
2262         pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
2263     }
2264 
2265     res = nvenc_push_context(avctx);
2266     if (res < 0)
2267         return res;
2268 
2269     nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
2270 
2271     for ( i = 0; i < sei_count; i++)
2272         av_freep(&sei_data[i].payload);
2273 
2274     res = nvenc_pop_context(avctx);
2275     if (res < 0)
2276         return res;
2277 
2278     if (nv_status != NV_ENC_SUCCESS &&
2279         nv_status != NV_ENC_ERR_NEED_MORE_INPUT)
2280         return nvenc_print_error(avctx, nv_status, "EncodePicture failed!");
2281 
2282     if (frame && frame->buf[0]) {
2283         av_fifo_generic_write(ctx->output_surface_queue, &in_surf, sizeof(in_surf), NULL);
2284         timestamp_queue_enqueue(ctx->timestamp_list, frame->pts);
2285     }
2286 
2287     /* all the pending buffers are now ready for output */
2288     if (nv_status == NV_ENC_SUCCESS) {
2289         while (av_fifo_size(ctx->output_surface_queue) > 0) {
2290             av_fifo_generic_read(ctx->output_surface_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
2291             av_fifo_generic_write(ctx->output_surface_ready_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
2292         }
2293     }
2294 
2295     return 0;
2296 }
2297 
ff_nvenc_receive_packet(AVCodecContext * avctx,AVPacket * pkt)2298 int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
2299 {
2300     NvencSurface *tmp_out_surf;
2301     int res, res2;
2302 
2303     NvencContext *ctx = avctx->priv_data;
2304 
2305     AVFrame *frame = ctx->frame;
2306 
2307     if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
2308         return AVERROR(EINVAL);
2309 
2310     if (!frame->buf[0]) {
2311         res = ff_encode_get_frame(avctx, frame);
2312         if (res < 0 && res != AVERROR_EOF)
2313             return res;
2314     }
2315 
2316     res = nvenc_send_frame(avctx, frame);
2317     if (res < 0) {
2318         if (res != AVERROR(EAGAIN))
2319             return res;
2320     } else
2321         av_frame_unref(frame);
2322 
2323     if (output_ready(avctx, avctx->internal->draining)) {
2324         av_fifo_generic_read(ctx->output_surface_ready_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
2325 
2326         res = nvenc_push_context(avctx);
2327         if (res < 0)
2328             return res;
2329 
2330         res = process_output_surface(avctx, pkt, tmp_out_surf);
2331 
2332         res2 = nvenc_pop_context(avctx);
2333         if (res2 < 0)
2334             return res2;
2335 
2336         if (res)
2337             return res;
2338 
2339         av_fifo_generic_write(ctx->unused_surface_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
2340     } else if (avctx->internal->draining) {
2341         return AVERROR_EOF;
2342     } else {
2343         return AVERROR(EAGAIN);
2344     }
2345 
2346     return 0;
2347 }
2348 
ff_nvenc_encode_flush(AVCodecContext * avctx)2349 av_cold void ff_nvenc_encode_flush(AVCodecContext *avctx)
2350 {
2351     NvencContext *ctx = avctx->priv_data;
2352 
2353     nvenc_send_frame(avctx, NULL);
2354     av_fifo_reset(ctx->timestamp_list);
2355 }
2356