• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <VideoToolbox/VideoToolbox.h>
22 #include <CoreVideo/CoreVideo.h>
23 #include <CoreMedia/CoreMedia.h>
24 #include <TargetConditionals.h>
25 #include <Availability.h>
26 #include "avcodec.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/avassert.h"
29 #include "libavutil/avstring.h"
30 #include "libavcodec/avcodec.h"
31 #include "libavutil/pixdesc.h"
32 #include "libavutil/hwcontext_videotoolbox.h"
33 #include "codec_internal.h"
34 #include "internal.h"
35 #include <pthread.h>
36 #include "atsc_a53.h"
37 #include "encode.h"
38 #include "h264.h"
39 #include "h264_sei.h"
40 #include <dlfcn.h>
41 
42 #if !HAVE_KCMVIDEOCODECTYPE_HEVC
43 enum { kCMVideoCodecType_HEVC = 'hvc1' };
44 #endif
45 
46 #if !HAVE_KCMVIDEOCODECTYPE_HEVCWITHALPHA
47 enum { kCMVideoCodecType_HEVCWithAlpha = 'muxa' };
48 #endif
49 
50 #if !HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE
51 enum { kCVPixelFormatType_420YpCbCr10BiPlanarFullRange = 'xf20' };
52 enum { kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange = 'x420' };
53 #endif
54 
55 #ifndef TARGET_CPU_ARM64
56 #   define TARGET_CPU_ARM64 0
57 #endif
58 
59 typedef OSStatus (*getParameterSetAtIndex)(CMFormatDescriptionRef videoDesc,
60                                            size_t parameterSetIndex,
61                                            const uint8_t **parameterSetPointerOut,
62                                            size_t *parameterSetSizeOut,
63                                            size_t *parameterSetCountOut,
64                                            int *NALUnitHeaderLengthOut);
65 
66 //These symbols may not be present
67 static struct{
68     CFStringRef kCVImageBufferColorPrimaries_ITU_R_2020;
69     CFStringRef kCVImageBufferTransferFunction_ITU_R_2020;
70     CFStringRef kCVImageBufferYCbCrMatrix_ITU_R_2020;
71 
72     CFStringRef kVTCompressionPropertyKey_H264EntropyMode;
73     CFStringRef kVTH264EntropyMode_CAVLC;
74     CFStringRef kVTH264EntropyMode_CABAC;
75 
76     CFStringRef kVTProfileLevel_H264_Baseline_4_0;
77     CFStringRef kVTProfileLevel_H264_Baseline_4_2;
78     CFStringRef kVTProfileLevel_H264_Baseline_5_0;
79     CFStringRef kVTProfileLevel_H264_Baseline_5_1;
80     CFStringRef kVTProfileLevel_H264_Baseline_5_2;
81     CFStringRef kVTProfileLevel_H264_Baseline_AutoLevel;
82     CFStringRef kVTProfileLevel_H264_Main_4_2;
83     CFStringRef kVTProfileLevel_H264_Main_5_1;
84     CFStringRef kVTProfileLevel_H264_Main_5_2;
85     CFStringRef kVTProfileLevel_H264_Main_AutoLevel;
86     CFStringRef kVTProfileLevel_H264_High_3_0;
87     CFStringRef kVTProfileLevel_H264_High_3_1;
88     CFStringRef kVTProfileLevel_H264_High_3_2;
89     CFStringRef kVTProfileLevel_H264_High_4_0;
90     CFStringRef kVTProfileLevel_H264_High_4_1;
91     CFStringRef kVTProfileLevel_H264_High_4_2;
92     CFStringRef kVTProfileLevel_H264_High_5_1;
93     CFStringRef kVTProfileLevel_H264_High_5_2;
94     CFStringRef kVTProfileLevel_H264_High_AutoLevel;
95     CFStringRef kVTProfileLevel_H264_Extended_5_0;
96     CFStringRef kVTProfileLevel_H264_Extended_AutoLevel;
97 
98     CFStringRef kVTProfileLevel_HEVC_Main_AutoLevel;
99     CFStringRef kVTProfileLevel_HEVC_Main10_AutoLevel;
100 
101     CFStringRef kVTCompressionPropertyKey_RealTime;
102     CFStringRef kVTCompressionPropertyKey_TargetQualityForAlpha;
103     CFStringRef kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality;
104 
105     CFStringRef kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder;
106     CFStringRef kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder;
107 
108     getParameterSetAtIndex CMVideoFormatDescriptionGetHEVCParameterSetAtIndex;
109 } compat_keys;
110 
111 #define GET_SYM(symbol, defaultVal)                                     \
112 do{                                                                     \
113     CFStringRef* handle = (CFStringRef*)dlsym(RTLD_DEFAULT, #symbol);   \
114     if(!handle)                                                         \
115         compat_keys.symbol = CFSTR(defaultVal);                         \
116     else                                                                \
117         compat_keys.symbol = *handle;                                   \
118 }while(0)
119 
120 static pthread_once_t once_ctrl = PTHREAD_ONCE_INIT;
121 
loadVTEncSymbols()122 static void loadVTEncSymbols(){
123     compat_keys.CMVideoFormatDescriptionGetHEVCParameterSetAtIndex =
124         (getParameterSetAtIndex)dlsym(
125             RTLD_DEFAULT,
126             "CMVideoFormatDescriptionGetHEVCParameterSetAtIndex"
127         );
128 
129     GET_SYM(kCVImageBufferColorPrimaries_ITU_R_2020,   "ITU_R_2020");
130     GET_SYM(kCVImageBufferTransferFunction_ITU_R_2020, "ITU_R_2020");
131     GET_SYM(kCVImageBufferYCbCrMatrix_ITU_R_2020,      "ITU_R_2020");
132 
133     GET_SYM(kVTCompressionPropertyKey_H264EntropyMode, "H264EntropyMode");
134     GET_SYM(kVTH264EntropyMode_CAVLC, "CAVLC");
135     GET_SYM(kVTH264EntropyMode_CABAC, "CABAC");
136 
137     GET_SYM(kVTProfileLevel_H264_Baseline_4_0,       "H264_Baseline_4_0");
138     GET_SYM(kVTProfileLevel_H264_Baseline_4_2,       "H264_Baseline_4_2");
139     GET_SYM(kVTProfileLevel_H264_Baseline_5_0,       "H264_Baseline_5_0");
140     GET_SYM(kVTProfileLevel_H264_Baseline_5_1,       "H264_Baseline_5_1");
141     GET_SYM(kVTProfileLevel_H264_Baseline_5_2,       "H264_Baseline_5_2");
142     GET_SYM(kVTProfileLevel_H264_Baseline_AutoLevel, "H264_Baseline_AutoLevel");
143     GET_SYM(kVTProfileLevel_H264_Main_4_2,           "H264_Main_4_2");
144     GET_SYM(kVTProfileLevel_H264_Main_5_1,           "H264_Main_5_1");
145     GET_SYM(kVTProfileLevel_H264_Main_5_2,           "H264_Main_5_2");
146     GET_SYM(kVTProfileLevel_H264_Main_AutoLevel,     "H264_Main_AutoLevel");
147     GET_SYM(kVTProfileLevel_H264_High_3_0,           "H264_High_3_0");
148     GET_SYM(kVTProfileLevel_H264_High_3_1,           "H264_High_3_1");
149     GET_SYM(kVTProfileLevel_H264_High_3_2,           "H264_High_3_2");
150     GET_SYM(kVTProfileLevel_H264_High_4_0,           "H264_High_4_0");
151     GET_SYM(kVTProfileLevel_H264_High_4_1,           "H264_High_4_1");
152     GET_SYM(kVTProfileLevel_H264_High_4_2,           "H264_High_4_2");
153     GET_SYM(kVTProfileLevel_H264_High_5_1,           "H264_High_5_1");
154     GET_SYM(kVTProfileLevel_H264_High_5_2,           "H264_High_5_2");
155     GET_SYM(kVTProfileLevel_H264_High_AutoLevel,     "H264_High_AutoLevel");
156     GET_SYM(kVTProfileLevel_H264_Extended_5_0,       "H264_Extended_5_0");
157     GET_SYM(kVTProfileLevel_H264_Extended_AutoLevel, "H264_Extended_AutoLevel");
158 
159     GET_SYM(kVTProfileLevel_HEVC_Main_AutoLevel,     "HEVC_Main_AutoLevel");
160     GET_SYM(kVTProfileLevel_HEVC_Main10_AutoLevel,   "HEVC_Main10_AutoLevel");
161 
162     GET_SYM(kVTCompressionPropertyKey_RealTime, "RealTime");
163     GET_SYM(kVTCompressionPropertyKey_TargetQualityForAlpha,
164             "TargetQualityForAlpha");
165     GET_SYM(kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality,
166             "PrioritizeEncodingSpeedOverQuality");
167 
168     GET_SYM(kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
169             "EnableHardwareAcceleratedVideoEncoder");
170     GET_SYM(kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
171             "RequireHardwareAcceleratedVideoEncoder");
172 }
173 
174 typedef enum VT_H264Profile {
175     H264_PROF_AUTO,
176     H264_PROF_BASELINE,
177     H264_PROF_MAIN,
178     H264_PROF_HIGH,
179     H264_PROF_EXTENDED,
180     H264_PROF_COUNT
181 } VT_H264Profile;
182 
183 typedef enum VTH264Entropy{
184     VT_ENTROPY_NOT_SET,
185     VT_CAVLC,
186     VT_CABAC
187 } VTH264Entropy;
188 
189 typedef enum VT_HEVCProfile {
190     HEVC_PROF_AUTO,
191     HEVC_PROF_MAIN,
192     HEVC_PROF_MAIN10,
193     HEVC_PROF_COUNT
194 } VT_HEVCProfile;
195 
196 static const uint8_t start_code[] = { 0, 0, 0, 1 };
197 
198 typedef struct ExtraSEI {
199   void *data;
200   size_t size;
201 } ExtraSEI;
202 
203 typedef struct BufNode {
204     CMSampleBufferRef cm_buffer;
205     ExtraSEI *sei;
206     struct BufNode* next;
207     int error;
208 } BufNode;
209 
210 typedef struct VTEncContext {
211     AVClass *class;
212     enum AVCodecID codec_id;
213     VTCompressionSessionRef session;
214     CFStringRef ycbcr_matrix;
215     CFStringRef color_primaries;
216     CFStringRef transfer_function;
217     getParameterSetAtIndex get_param_set_func;
218 
219     pthread_mutex_t lock;
220     pthread_cond_t  cv_sample_sent;
221 
222     int async_error;
223 
224     BufNode *q_head;
225     BufNode *q_tail;
226 
227     int64_t frame_ct_out;
228     int64_t frame_ct_in;
229 
230     int64_t first_pts;
231     int64_t dts_delta;
232 
233     int64_t profile;
234     int level;
235     int entropy;
236     int realtime;
237     int frames_before;
238     int frames_after;
239 
240     int allow_sw;
241     int require_sw;
242     double alpha_quality;
243     int prio_speed;
244 
245     bool flushing;
246     int has_b_frames;
247     bool warned_color_range;
248 
249     /* can't be bool type since AVOption will access it as int */
250     int a53_cc;
251 } VTEncContext;
252 
253 static int vtenc_populate_extradata(AVCodecContext   *avctx,
254                                     CMVideoCodecType codec_type,
255                                     CFStringRef      profile_level,
256                                     CFNumberRef      gamma_level,
257                                     CFDictionaryRef  enc_info,
258                                     CFDictionaryRef  pixel_buffer_info);
259 
260 /**
261  * NULL-safe release of *refPtr, and sets value to NULL.
262  */
vt_release_num(CFNumberRef * refPtr)263 static void vt_release_num(CFNumberRef* refPtr){
264     if (!*refPtr) {
265         return;
266     }
267 
268     CFRelease(*refPtr);
269     *refPtr = NULL;
270 }
271 
set_async_error(VTEncContext * vtctx,int err)272 static void set_async_error(VTEncContext *vtctx, int err)
273 {
274     BufNode *info;
275 
276     pthread_mutex_lock(&vtctx->lock);
277 
278     vtctx->async_error = err;
279 
280     info = vtctx->q_head;
281     vtctx->q_head = vtctx->q_tail = NULL;
282 
283     while (info) {
284         BufNode *next = info->next;
285         CFRelease(info->cm_buffer);
286         av_free(info);
287         info = next;
288     }
289 
290     pthread_mutex_unlock(&vtctx->lock);
291 }
292 
clear_frame_queue(VTEncContext * vtctx)293 static void clear_frame_queue(VTEncContext *vtctx)
294 {
295     set_async_error(vtctx, 0);
296 }
297 
vtenc_q_pop(VTEncContext * vtctx,bool wait,CMSampleBufferRef * buf,ExtraSEI ** sei)298 static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf, ExtraSEI **sei)
299 {
300     BufNode *info;
301 
302     pthread_mutex_lock(&vtctx->lock);
303 
304     if (vtctx->async_error) {
305         pthread_mutex_unlock(&vtctx->lock);
306         return vtctx->async_error;
307     }
308 
309     if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
310         *buf = NULL;
311 
312         pthread_mutex_unlock(&vtctx->lock);
313         return 0;
314     }
315 
316     while (!vtctx->q_head && !vtctx->async_error && wait && !vtctx->flushing) {
317         pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
318     }
319 
320     if (!vtctx->q_head) {
321         pthread_mutex_unlock(&vtctx->lock);
322         *buf = NULL;
323         return 0;
324     }
325 
326     info = vtctx->q_head;
327     vtctx->q_head = vtctx->q_head->next;
328     if (!vtctx->q_head) {
329         vtctx->q_tail = NULL;
330     }
331 
332     vtctx->frame_ct_out++;
333     pthread_mutex_unlock(&vtctx->lock);
334 
335     *buf = info->cm_buffer;
336     if (sei && *buf) {
337         *sei = info->sei;
338     } else if (info->sei) {
339         if (info->sei->data) av_free(info->sei->data);
340         av_free(info->sei);
341     }
342     av_free(info);
343 
344 
345     return 0;
346 }
347 
vtenc_q_push(VTEncContext * vtctx,CMSampleBufferRef buffer,ExtraSEI * sei)348 static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer, ExtraSEI *sei)
349 {
350     BufNode *info = av_malloc(sizeof(BufNode));
351     if (!info) {
352         set_async_error(vtctx, AVERROR(ENOMEM));
353         return;
354     }
355 
356     CFRetain(buffer);
357     info->cm_buffer = buffer;
358     info->sei = sei;
359     info->next = NULL;
360 
361     pthread_mutex_lock(&vtctx->lock);
362 
363     if (!vtctx->q_head) {
364         vtctx->q_head = info;
365     } else {
366         vtctx->q_tail->next = info;
367     }
368 
369     vtctx->q_tail = info;
370 
371     pthread_cond_signal(&vtctx->cv_sample_sent);
372     pthread_mutex_unlock(&vtctx->lock);
373 }
374 
count_nalus(size_t length_code_size,CMSampleBufferRef sample_buffer,int * count)375 static int count_nalus(size_t length_code_size,
376                        CMSampleBufferRef sample_buffer,
377                        int *count)
378 {
379     size_t offset = 0;
380     int status;
381     int nalu_ct = 0;
382     uint8_t size_buf[4];
383     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
384     CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
385 
386     if (length_code_size > 4)
387         return AVERROR_INVALIDDATA;
388 
389     while (offset < src_size) {
390         size_t curr_src_len;
391         size_t box_len = 0;
392         size_t i;
393 
394         status = CMBlockBufferCopyDataBytes(block,
395                                             offset,
396                                             length_code_size,
397                                             size_buf);
398 
399         for (i = 0; i < length_code_size; i++) {
400             box_len <<= 8;
401             box_len |= size_buf[i];
402         }
403 
404         curr_src_len = box_len + length_code_size;
405         offset += curr_src_len;
406 
407         nalu_ct++;
408     }
409 
410     *count = nalu_ct;
411     return 0;
412 }
413 
get_cm_codec_type(AVCodecContext * avctx,int64_t profile,double alpha_quality)414 static CMVideoCodecType get_cm_codec_type(AVCodecContext *avctx,
415                                           int64_t profile,
416                                           double alpha_quality)
417 {
418     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX ? avctx->sw_pix_fmt : avctx->pix_fmt);
419     switch (avctx->codec_id) {
420     case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
421     case AV_CODEC_ID_HEVC:
422         if (desc && (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpha_quality > 0.0) {
423             return kCMVideoCodecType_HEVCWithAlpha;
424         }
425         return kCMVideoCodecType_HEVC;
426     case AV_CODEC_ID_PRORES:
427         switch (profile) {
428         case FF_PROFILE_PRORES_PROXY:
429             return MKBETAG('a','p','c','o'); // kCMVideoCodecType_AppleProRes422Proxy
430         case FF_PROFILE_PRORES_LT:
431             return MKBETAG('a','p','c','s'); // kCMVideoCodecType_AppleProRes422LT
432         case FF_PROFILE_PRORES_STANDARD:
433             return MKBETAG('a','p','c','n'); // kCMVideoCodecType_AppleProRes422
434         case FF_PROFILE_PRORES_HQ:
435             return MKBETAG('a','p','c','h'); // kCMVideoCodecType_AppleProRes422HQ
436         case FF_PROFILE_PRORES_4444:
437             return MKBETAG('a','p','4','h'); // kCMVideoCodecType_AppleProRes4444
438         case FF_PROFILE_PRORES_XQ:
439             return MKBETAG('a','p','4','x'); // kCMVideoCodecType_AppleProRes4444XQ
440 
441         default:
442             av_log(avctx, AV_LOG_ERROR, "Unknown profile ID: %"PRId64", using auto\n", profile);
443         case FF_PROFILE_UNKNOWN:
444             if (desc &&
445                 ((desc->flags & AV_PIX_FMT_FLAG_ALPHA) ||
446                   desc->log2_chroma_w == 0))
447                 return MKBETAG('a','p','4','h'); // kCMVideoCodecType_AppleProRes4444
448             else
449                 return MKBETAG('a','p','c','n'); // kCMVideoCodecType_AppleProRes422
450         }
451     default:               return 0;
452     }
453 }
454 
455 /**
456  * Get the parameter sets from a CMSampleBufferRef.
457  * @param dst If *dst isn't NULL, the parameters are copied into existing
458  *            memory. *dst_size must be set accordingly when *dst != NULL.
459  *            If *dst is NULL, it will be allocated.
460  *            In all cases, *dst_size is set to the number of bytes used starting
461  *            at *dst.
462  */
get_params_size(AVCodecContext * avctx,CMVideoFormatDescriptionRef vid_fmt,size_t * size)463 static int get_params_size(
464     AVCodecContext              *avctx,
465     CMVideoFormatDescriptionRef vid_fmt,
466     size_t                      *size)
467 {
468     VTEncContext *vtctx = avctx->priv_data;
469     size_t total_size = 0;
470     size_t ps_count;
471     int is_count_bad = 0;
472     size_t i;
473     int status;
474     status = vtctx->get_param_set_func(vid_fmt,
475                                        0,
476                                        NULL,
477                                        NULL,
478                                        &ps_count,
479                                        NULL);
480     if (status) {
481         is_count_bad = 1;
482         ps_count     = 0;
483         status       = 0;
484     }
485 
486     for (i = 0; i < ps_count || is_count_bad; i++) {
487         const uint8_t *ps;
488         size_t ps_size;
489         status = vtctx->get_param_set_func(vid_fmt,
490                                            i,
491                                            &ps,
492                                            &ps_size,
493                                            NULL,
494                                            NULL);
495         if (status) {
496             /*
497              * When ps_count is invalid, status != 0 ends the loop normally
498              * unless we didn't get any parameter sets.
499              */
500             if (i > 0 && is_count_bad) status = 0;
501 
502             break;
503         }
504 
505         total_size += ps_size + sizeof(start_code);
506     }
507 
508     if (status) {
509         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
510         return AVERROR_EXTERNAL;
511     }
512 
513     *size = total_size;
514     return 0;
515 }
516 
copy_param_sets(AVCodecContext * avctx,CMVideoFormatDescriptionRef vid_fmt,uint8_t * dst,size_t dst_size)517 static int copy_param_sets(
518     AVCodecContext              *avctx,
519     CMVideoFormatDescriptionRef vid_fmt,
520     uint8_t                     *dst,
521     size_t                      dst_size)
522 {
523     VTEncContext *vtctx = avctx->priv_data;
524     size_t ps_count;
525     int is_count_bad = 0;
526     int status;
527     size_t offset = 0;
528     size_t i;
529 
530     status = vtctx->get_param_set_func(vid_fmt,
531                                        0,
532                                        NULL,
533                                        NULL,
534                                        &ps_count,
535                                        NULL);
536     if (status) {
537         is_count_bad = 1;
538         ps_count     = 0;
539         status       = 0;
540     }
541 
542 
543     for (i = 0; i < ps_count || is_count_bad; i++) {
544         const uint8_t *ps;
545         size_t ps_size;
546         size_t next_offset;
547 
548         status = vtctx->get_param_set_func(vid_fmt,
549                                            i,
550                                            &ps,
551                                            &ps_size,
552                                            NULL,
553                                            NULL);
554         if (status) {
555             if (i > 0 && is_count_bad) status = 0;
556 
557             break;
558         }
559 
560         next_offset = offset + sizeof(start_code) + ps_size;
561         if (dst_size < next_offset) {
562             av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
563             return AVERROR_BUFFER_TOO_SMALL;
564         }
565 
566         memcpy(dst + offset, start_code, sizeof(start_code));
567         offset += sizeof(start_code);
568 
569         memcpy(dst + offset, ps, ps_size);
570         offset = next_offset;
571     }
572 
573     if (status) {
574         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
575         return AVERROR_EXTERNAL;
576     }
577 
578     return 0;
579 }
580 
set_extradata(AVCodecContext * avctx,CMSampleBufferRef sample_buffer)581 static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
582 {
583     VTEncContext *vtctx = avctx->priv_data;
584     CMVideoFormatDescriptionRef vid_fmt;
585     size_t total_size;
586     int status;
587 
588     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
589     if (!vid_fmt) {
590         av_log(avctx, AV_LOG_ERROR, "No video format.\n");
591         return AVERROR_EXTERNAL;
592     }
593 
594     if (vtctx->get_param_set_func) {
595         status = get_params_size(avctx, vid_fmt, &total_size);
596         if (status) {
597             av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
598             return status;
599         }
600 
601         avctx->extradata = av_mallocz(total_size + AV_INPUT_BUFFER_PADDING_SIZE);
602         if (!avctx->extradata) {
603             return AVERROR(ENOMEM);
604         }
605         avctx->extradata_size = total_size;
606 
607         status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
608 
609         if (status) {
610             av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
611             return status;
612         }
613     } else {
614         CFDataRef data = CMFormatDescriptionGetExtension(vid_fmt, kCMFormatDescriptionExtension_VerbatimSampleDescription);
615         if (data && CFGetTypeID(data) == CFDataGetTypeID()) {
616             CFIndex size = CFDataGetLength(data);
617 
618             avctx->extradata = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
619             if (!avctx->extradata)
620                 return AVERROR(ENOMEM);
621             avctx->extradata_size = size;
622 
623             CFDataGetBytes(data, CFRangeMake(0, size), avctx->extradata);
624         }
625     }
626 
627     return 0;
628 }
629 
vtenc_output_callback(void * ctx,void * sourceFrameCtx,OSStatus status,VTEncodeInfoFlags flags,CMSampleBufferRef sample_buffer)630 static void vtenc_output_callback(
631     void *ctx,
632     void *sourceFrameCtx,
633     OSStatus status,
634     VTEncodeInfoFlags flags,
635     CMSampleBufferRef sample_buffer)
636 {
637     AVCodecContext *avctx = ctx;
638     VTEncContext   *vtctx = avctx->priv_data;
639     ExtraSEI *sei = sourceFrameCtx;
640 
641     if (vtctx->async_error) {
642         return;
643     }
644 
645     if (status) {
646         av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
647         set_async_error(vtctx, AVERROR_EXTERNAL);
648         return;
649     }
650 
651     if (!sample_buffer) {
652         return;
653     }
654 
655     if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
656         int set_status = set_extradata(avctx, sample_buffer);
657         if (set_status) {
658             set_async_error(vtctx, set_status);
659             return;
660         }
661     }
662 
663     vtenc_q_push(vtctx, sample_buffer, sei);
664 }
665 
get_length_code_size(AVCodecContext * avctx,CMSampleBufferRef sample_buffer,size_t * size)666 static int get_length_code_size(
667     AVCodecContext    *avctx,
668     CMSampleBufferRef sample_buffer,
669     size_t            *size)
670 {
671     VTEncContext *vtctx = avctx->priv_data;
672     CMVideoFormatDescriptionRef vid_fmt;
673     int isize;
674     int status;
675 
676     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
677     if (!vid_fmt) {
678         av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
679         return AVERROR_EXTERNAL;
680     }
681 
682     status = vtctx->get_param_set_func(vid_fmt,
683                                        0,
684                                        NULL,
685                                        NULL,
686                                        NULL,
687                                        &isize);
688     if (status) {
689         av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
690         return AVERROR_EXTERNAL;
691     }
692 
693     *size = isize;
694     return 0;
695 }
696 
697 /*
698  * Returns true on success.
699  *
700  * If profile_level_val is NULL and this method returns true, don't specify the
701  * profile/level to the encoder.
702  */
get_vt_h264_profile_level(AVCodecContext * avctx,CFStringRef * profile_level_val)703 static bool get_vt_h264_profile_level(AVCodecContext *avctx,
704                                       CFStringRef    *profile_level_val)
705 {
706     VTEncContext *vtctx = avctx->priv_data;
707     int64_t profile = vtctx->profile;
708 
709     if (profile == H264_PROF_AUTO && vtctx->level) {
710         //Need to pick a profile if level is not auto-selected.
711         profile = vtctx->has_b_frames ? H264_PROF_MAIN : H264_PROF_BASELINE;
712     }
713 
714     *profile_level_val = NULL;
715 
716     switch (profile) {
717         case H264_PROF_AUTO:
718             return true;
719 
720         case H264_PROF_BASELINE:
721             switch (vtctx->level) {
722                 case  0: *profile_level_val =
723                                   compat_keys.kVTProfileLevel_H264_Baseline_AutoLevel; break;
724                 case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3;       break;
725                 case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0;       break;
726                 case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1;       break;
727                 case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2;       break;
728                 case 40: *profile_level_val =
729                                   compat_keys.kVTProfileLevel_H264_Baseline_4_0;       break;
730                 case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1;       break;
731                 case 42: *profile_level_val =
732                                   compat_keys.kVTProfileLevel_H264_Baseline_4_2;       break;
733                 case 50: *profile_level_val =
734                                   compat_keys.kVTProfileLevel_H264_Baseline_5_0;       break;
735                 case 51: *profile_level_val =
736                                   compat_keys.kVTProfileLevel_H264_Baseline_5_1;       break;
737                 case 52: *profile_level_val =
738                                   compat_keys.kVTProfileLevel_H264_Baseline_5_2;       break;
739             }
740             break;
741 
742         case H264_PROF_MAIN:
743             switch (vtctx->level) {
744                 case  0: *profile_level_val =
745                                   compat_keys.kVTProfileLevel_H264_Main_AutoLevel; break;
746                 case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0;       break;
747                 case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1;       break;
748                 case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2;       break;
749                 case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0;       break;
750                 case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1;       break;
751                 case 42: *profile_level_val =
752                                   compat_keys.kVTProfileLevel_H264_Main_4_2;       break;
753                 case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0;       break;
754                 case 51: *profile_level_val =
755                                   compat_keys.kVTProfileLevel_H264_Main_5_1;       break;
756                 case 52: *profile_level_val =
757                                   compat_keys.kVTProfileLevel_H264_Main_5_2;       break;
758             }
759             break;
760 
761         case H264_PROF_HIGH:
762             switch (vtctx->level) {
763                 case  0: *profile_level_val =
764                                   compat_keys.kVTProfileLevel_H264_High_AutoLevel; break;
765                 case 30: *profile_level_val =
766                                   compat_keys.kVTProfileLevel_H264_High_3_0;       break;
767                 case 31: *profile_level_val =
768                                   compat_keys.kVTProfileLevel_H264_High_3_1;       break;
769                 case 32: *profile_level_val =
770                                   compat_keys.kVTProfileLevel_H264_High_3_2;       break;
771                 case 40: *profile_level_val =
772                                   compat_keys.kVTProfileLevel_H264_High_4_0;       break;
773                 case 41: *profile_level_val =
774                                   compat_keys.kVTProfileLevel_H264_High_4_1;       break;
775                 case 42: *profile_level_val =
776                                   compat_keys.kVTProfileLevel_H264_High_4_2;       break;
777                 case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0;       break;
778                 case 51: *profile_level_val =
779                                   compat_keys.kVTProfileLevel_H264_High_5_1;       break;
780                 case 52: *profile_level_val =
781                                   compat_keys.kVTProfileLevel_H264_High_5_2;       break;
782             }
783             break;
784         case H264_PROF_EXTENDED:
785             switch (vtctx->level) {
786                 case  0: *profile_level_val =
787                                   compat_keys.kVTProfileLevel_H264_Extended_AutoLevel; break;
788                 case 50: *profile_level_val =
789                                   compat_keys.kVTProfileLevel_H264_Extended_5_0;       break;
790             }
791             break;
792     }
793 
794     if (!*profile_level_val) {
795         av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
796         return false;
797     }
798 
799     return true;
800 }
801 
802 /*
803  * Returns true on success.
804  *
805  * If profile_level_val is NULL and this method returns true, don't specify the
806  * profile/level to the encoder.
807  */
get_vt_hevc_profile_level(AVCodecContext * avctx,CFStringRef * profile_level_val)808 static bool get_vt_hevc_profile_level(AVCodecContext *avctx,
809                                       CFStringRef    *profile_level_val)
810 {
811     VTEncContext *vtctx = avctx->priv_data;
812     int64_t profile = vtctx->profile;
813 
814     *profile_level_val = NULL;
815 
816     switch (profile) {
817         case HEVC_PROF_AUTO:
818             return true;
819         case HEVC_PROF_MAIN:
820             *profile_level_val =
821                 compat_keys.kVTProfileLevel_HEVC_Main_AutoLevel;
822             break;
823         case HEVC_PROF_MAIN10:
824             *profile_level_val =
825                 compat_keys.kVTProfileLevel_HEVC_Main10_AutoLevel;
826             break;
827     }
828 
829     if (!*profile_level_val) {
830         av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
831         return false;
832     }
833 
834     return true;
835 }
836 
get_cv_pixel_format(AVCodecContext * avctx,enum AVPixelFormat fmt,enum AVColorRange range,int * av_pixel_format,int * range_guessed)837 static int get_cv_pixel_format(AVCodecContext* avctx,
838                                enum AVPixelFormat fmt,
839                                enum AVColorRange range,
840                                int* av_pixel_format,
841                                int* range_guessed)
842 {
843     if (range_guessed) *range_guessed = range != AVCOL_RANGE_MPEG &&
844                                         range != AVCOL_RANGE_JPEG;
845 
846     //MPEG range is used when no range is set
847     *av_pixel_format = av_map_videotoolbox_format_from_pixfmt2(fmt, range == AVCOL_RANGE_JPEG);
848 
849     return *av_pixel_format ? 0 : AVERROR(EINVAL);
850 }
851 
add_color_attr(AVCodecContext * avctx,CFMutableDictionaryRef dict)852 static void add_color_attr(AVCodecContext *avctx, CFMutableDictionaryRef dict) {
853     VTEncContext *vtctx = avctx->priv_data;
854 
855     if (vtctx->color_primaries) {
856         CFDictionarySetValue(dict,
857                              kCVImageBufferColorPrimariesKey,
858                              vtctx->color_primaries);
859     }
860 
861     if (vtctx->transfer_function) {
862         CFDictionarySetValue(dict,
863                              kCVImageBufferTransferFunctionKey,
864                              vtctx->transfer_function);
865     }
866 
867     if (vtctx->ycbcr_matrix) {
868         CFDictionarySetValue(dict,
869                              kCVImageBufferYCbCrMatrixKey,
870                              vtctx->ycbcr_matrix);
871     }
872 }
873 
create_cv_pixel_buffer_info(AVCodecContext * avctx,CFMutableDictionaryRef * dict)874 static int create_cv_pixel_buffer_info(AVCodecContext* avctx,
875                                        CFMutableDictionaryRef* dict)
876 {
877     CFNumberRef cv_color_format_num = NULL;
878     CFNumberRef width_num = NULL;
879     CFNumberRef height_num = NULL;
880     CFMutableDictionaryRef pixel_buffer_info = NULL;
881     int cv_color_format;
882     int status = get_cv_pixel_format(avctx,
883                                      avctx->pix_fmt,
884                                      avctx->color_range,
885                                      &cv_color_format,
886                                      NULL);
887     if (status) return status;
888 
889     pixel_buffer_info = CFDictionaryCreateMutable(
890                             kCFAllocatorDefault,
891                             20,
892                             &kCFCopyStringDictionaryKeyCallBacks,
893                             &kCFTypeDictionaryValueCallBacks);
894 
895     if (!pixel_buffer_info) goto pbinfo_nomem;
896 
897     cv_color_format_num = CFNumberCreate(kCFAllocatorDefault,
898                                          kCFNumberSInt32Type,
899                                          &cv_color_format);
900     if (!cv_color_format_num) goto pbinfo_nomem;
901 
902     CFDictionarySetValue(pixel_buffer_info,
903                          kCVPixelBufferPixelFormatTypeKey,
904                          cv_color_format_num);
905     vt_release_num(&cv_color_format_num);
906 
907     width_num = CFNumberCreate(kCFAllocatorDefault,
908                                kCFNumberSInt32Type,
909                                &avctx->width);
910     if (!width_num) goto pbinfo_nomem;
911 
912     CFDictionarySetValue(pixel_buffer_info,
913                          kCVPixelBufferWidthKey,
914                          width_num);
915     vt_release_num(&width_num);
916 
917     height_num = CFNumberCreate(kCFAllocatorDefault,
918                                 kCFNumberSInt32Type,
919                                 &avctx->height);
920     if (!height_num) goto pbinfo_nomem;
921 
922     CFDictionarySetValue(pixel_buffer_info,
923                          kCVPixelBufferHeightKey,
924                          height_num);
925     vt_release_num(&height_num);
926 
927     add_color_attr(avctx, pixel_buffer_info);
928 
929     *dict = pixel_buffer_info;
930     return 0;
931 
932 pbinfo_nomem:
933     vt_release_num(&cv_color_format_num);
934     vt_release_num(&width_num);
935     vt_release_num(&height_num);
936     if (pixel_buffer_info) CFRelease(pixel_buffer_info);
937 
938     return AVERROR(ENOMEM);
939 }
940 
get_cv_color_primaries(AVCodecContext * avctx,CFStringRef * primaries)941 static int get_cv_color_primaries(AVCodecContext *avctx,
942                                   CFStringRef *primaries)
943 {
944     enum AVColorPrimaries pri = avctx->color_primaries;
945     switch (pri) {
946         case AVCOL_PRI_UNSPECIFIED:
947             *primaries = NULL;
948             break;
949 
950         case AVCOL_PRI_BT470BG:
951             *primaries = kCVImageBufferColorPrimaries_EBU_3213;
952             break;
953 
954         case AVCOL_PRI_SMPTE170M:
955             *primaries = kCVImageBufferColorPrimaries_SMPTE_C;
956             break;
957 
958         case AVCOL_PRI_BT709:
959             *primaries = kCVImageBufferColorPrimaries_ITU_R_709_2;
960             break;
961 
962         case AVCOL_PRI_BT2020:
963             *primaries = compat_keys.kCVImageBufferColorPrimaries_ITU_R_2020;
964             break;
965 
966         default:
967             av_log(avctx, AV_LOG_ERROR, "Color primaries %s is not supported.\n", av_color_primaries_name(pri));
968             *primaries = NULL;
969             return -1;
970     }
971 
972     return 0;
973 }
974 
get_cv_transfer_function(AVCodecContext * avctx,CFStringRef * transfer_fnc,CFNumberRef * gamma_level)975 static int get_cv_transfer_function(AVCodecContext *avctx,
976                                     CFStringRef *transfer_fnc,
977                                     CFNumberRef *gamma_level)
978 {
979     enum AVColorTransferCharacteristic trc = avctx->color_trc;
980     Float32 gamma;
981     *gamma_level = NULL;
982 
983     switch (trc) {
984         case AVCOL_TRC_UNSPECIFIED:
985             *transfer_fnc = NULL;
986             break;
987 
988         case AVCOL_TRC_BT709:
989             *transfer_fnc = kCVImageBufferTransferFunction_ITU_R_709_2;
990             break;
991 
992         case AVCOL_TRC_SMPTE240M:
993             *transfer_fnc = kCVImageBufferTransferFunction_SMPTE_240M_1995;
994             break;
995 
996 #if HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_SMPTE_ST_2084_PQ
997         case AVCOL_TRC_SMPTE2084:
998             *transfer_fnc = kCVImageBufferTransferFunction_SMPTE_ST_2084_PQ;
999             break;
1000 #endif
1001 #if HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_LINEAR
1002         case AVCOL_TRC_LINEAR:
1003             *transfer_fnc = kCVImageBufferTransferFunction_Linear;
1004             break;
1005 #endif
1006 #if HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_ITU_R_2100_HLG
1007         case AVCOL_TRC_ARIB_STD_B67:
1008             *transfer_fnc = kCVImageBufferTransferFunction_ITU_R_2100_HLG;
1009             break;
1010 #endif
1011 
1012         case AVCOL_TRC_GAMMA22:
1013             gamma = 2.2;
1014             *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
1015             *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
1016             break;
1017 
1018         case AVCOL_TRC_GAMMA28:
1019             gamma = 2.8;
1020             *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
1021             *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
1022             break;
1023 
1024         case AVCOL_TRC_BT2020_10:
1025         case AVCOL_TRC_BT2020_12:
1026             *transfer_fnc = compat_keys.kCVImageBufferTransferFunction_ITU_R_2020;
1027             break;
1028 
1029         default:
1030             *transfer_fnc = NULL;
1031             av_log(avctx, AV_LOG_ERROR, "Transfer function %s is not supported.\n", av_color_transfer_name(trc));
1032             return -1;
1033     }
1034 
1035     return 0;
1036 }
1037 
get_cv_ycbcr_matrix(AVCodecContext * avctx,CFStringRef * matrix)1038 static int get_cv_ycbcr_matrix(AVCodecContext *avctx, CFStringRef *matrix) {
1039     switch(avctx->colorspace) {
1040         case AVCOL_SPC_BT709:
1041             *matrix = kCVImageBufferYCbCrMatrix_ITU_R_709_2;
1042             break;
1043 
1044         case AVCOL_SPC_UNSPECIFIED:
1045         case AVCOL_SPC_RGB:
1046             *matrix = NULL;
1047             break;
1048 
1049         case AVCOL_SPC_BT470BG:
1050         case AVCOL_SPC_SMPTE170M:
1051             *matrix = kCVImageBufferYCbCrMatrix_ITU_R_601_4;
1052             break;
1053 
1054         case AVCOL_SPC_SMPTE240M:
1055             *matrix = kCVImageBufferYCbCrMatrix_SMPTE_240M_1995;
1056             break;
1057 
1058         case AVCOL_SPC_BT2020_NCL:
1059             *matrix = compat_keys.kCVImageBufferYCbCrMatrix_ITU_R_2020;
1060             break;
1061 
1062         default:
1063             av_log(avctx, AV_LOG_ERROR, "Color space %s is not supported.\n", av_color_space_name(avctx->colorspace));
1064             return -1;
1065     }
1066 
1067     return 0;
1068 }
1069 
1070 // constant quality only on Macs with Apple Silicon
vtenc_qscale_enabled(void)1071 static bool vtenc_qscale_enabled(void)
1072 {
1073     return !TARGET_OS_IPHONE && TARGET_CPU_ARM64;
1074 }
1075 
vtenc_create_encoder(AVCodecContext * avctx,CMVideoCodecType codec_type,CFStringRef profile_level,CFNumberRef gamma_level,CFDictionaryRef enc_info,CFDictionaryRef pixel_buffer_info,VTCompressionSessionRef * session)1076 static int vtenc_create_encoder(AVCodecContext   *avctx,
1077                                 CMVideoCodecType codec_type,
1078                                 CFStringRef      profile_level,
1079                                 CFNumberRef      gamma_level,
1080                                 CFDictionaryRef  enc_info,
1081                                 CFDictionaryRef  pixel_buffer_info,
1082                                 VTCompressionSessionRef *session)
1083 {
1084     VTEncContext *vtctx = avctx->priv_data;
1085     SInt32       bit_rate = avctx->bit_rate;
1086     SInt32       max_rate = avctx->rc_max_rate;
1087     Float32      quality = avctx->global_quality / FF_QP2LAMBDA;
1088     CFNumberRef  bit_rate_num;
1089     CFNumberRef  quality_num;
1090     CFNumberRef  bytes_per_second;
1091     CFNumberRef  one_second;
1092     CFArrayRef   data_rate_limits;
1093     int64_t      bytes_per_second_value = 0;
1094     int64_t      one_second_value = 0;
1095     void         *nums[2];
1096 
1097     int status = VTCompressionSessionCreate(kCFAllocatorDefault,
1098                                             avctx->width,
1099                                             avctx->height,
1100                                             codec_type,
1101                                             enc_info,
1102                                             pixel_buffer_info,
1103                                             kCFAllocatorDefault,
1104                                             vtenc_output_callback,
1105                                             avctx,
1106                                             session);
1107 
1108     if (status || !vtctx->session) {
1109         av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
1110 
1111 #if !TARGET_OS_IPHONE
1112         if (!vtctx->allow_sw) {
1113             av_log(avctx, AV_LOG_ERROR, "Try -allow_sw 1. The hardware encoder may be busy, or not supported.\n");
1114         }
1115 #endif
1116 
1117         return AVERROR_EXTERNAL;
1118     }
1119 
1120     if (avctx->flags & AV_CODEC_FLAG_QSCALE && !vtenc_qscale_enabled()) {
1121         av_log(avctx, AV_LOG_ERROR, "Error: -q:v qscale not available for encoder. Use -b:v bitrate instead.\n");
1122         return AVERROR_EXTERNAL;
1123     }
1124 
1125     if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
1126         quality = quality >= 100 ? 1.0 : quality / 100;
1127         quality_num = CFNumberCreate(kCFAllocatorDefault,
1128                                      kCFNumberFloat32Type,
1129                                      &quality);
1130         if (!quality_num) return AVERROR(ENOMEM);
1131 
1132         status = VTSessionSetProperty(vtctx->session,
1133                                       kVTCompressionPropertyKey_Quality,
1134                                       quality_num);
1135         CFRelease(quality_num);
1136     } else if (avctx->codec_id != AV_CODEC_ID_PRORES) {
1137         bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
1138                                       kCFNumberSInt32Type,
1139                                       &bit_rate);
1140         if (!bit_rate_num) return AVERROR(ENOMEM);
1141 
1142         status = VTSessionSetProperty(vtctx->session,
1143                                       kVTCompressionPropertyKey_AverageBitRate,
1144                                       bit_rate_num);
1145         CFRelease(bit_rate_num);
1146     }
1147 
1148     if (status) {
1149         av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
1150         return AVERROR_EXTERNAL;
1151     }
1152 
1153     if (vtctx->prio_speed >= 0) {
1154         status = VTSessionSetProperty(vtctx->session,
1155                                       compat_keys.kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality,
1156                                       vtctx->prio_speed ? kCFBooleanTrue : kCFBooleanFalse);
1157         if (status) {
1158             av_log(avctx, AV_LOG_WARNING, "PrioritizeEncodingSpeedOverQuality property is not supported on this device. Ignoring.\n");
1159         }
1160     }
1161 
1162     if ((vtctx->codec_id == AV_CODEC_ID_H264 || vtctx->codec_id == AV_CODEC_ID_HEVC)
1163             && max_rate > 0) {
1164         bytes_per_second_value = max_rate >> 3;
1165         bytes_per_second = CFNumberCreate(kCFAllocatorDefault,
1166                                           kCFNumberSInt64Type,
1167                                           &bytes_per_second_value);
1168         if (!bytes_per_second) {
1169             return AVERROR(ENOMEM);
1170         }
1171         one_second_value = 1;
1172         one_second = CFNumberCreate(kCFAllocatorDefault,
1173                                     kCFNumberSInt64Type,
1174                                     &one_second_value);
1175         if (!one_second) {
1176             CFRelease(bytes_per_second);
1177             return AVERROR(ENOMEM);
1178         }
1179         nums[0] = (void *)bytes_per_second;
1180         nums[1] = (void *)one_second;
1181         data_rate_limits = CFArrayCreate(kCFAllocatorDefault,
1182                                          (const void **)nums,
1183                                          2,
1184                                          &kCFTypeArrayCallBacks);
1185 
1186         if (!data_rate_limits) {
1187             CFRelease(bytes_per_second);
1188             CFRelease(one_second);
1189             return AVERROR(ENOMEM);
1190         }
1191         status = VTSessionSetProperty(vtctx->session,
1192                                       kVTCompressionPropertyKey_DataRateLimits,
1193                                       data_rate_limits);
1194 
1195         CFRelease(bytes_per_second);
1196         CFRelease(one_second);
1197         CFRelease(data_rate_limits);
1198 
1199         if (status) {
1200             av_log(avctx, AV_LOG_ERROR, "Error setting max bitrate property: %d\n", status);
1201             // kVTCompressionPropertyKey_DataRateLimits is available for HEVC
1202             // now but not on old release. There is no document about since
1203             // when. So ignore the error if it failed for hevc.
1204             if (vtctx->codec_id != AV_CODEC_ID_HEVC)
1205                 return AVERROR_EXTERNAL;
1206         }
1207     }
1208 
1209     if (vtctx->codec_id == AV_CODEC_ID_HEVC) {
1210         if (avctx->pix_fmt == AV_PIX_FMT_BGRA && vtctx->alpha_quality > 0.0) {
1211             CFNumberRef alpha_quality_num = CFNumberCreate(kCFAllocatorDefault,
1212                                                            kCFNumberDoubleType,
1213                                                            &vtctx->alpha_quality);
1214             if (!alpha_quality_num) return AVERROR(ENOMEM);
1215 
1216             status = VTSessionSetProperty(vtctx->session,
1217                                           compat_keys.kVTCompressionPropertyKey_TargetQualityForAlpha,
1218                                           alpha_quality_num);
1219             CFRelease(alpha_quality_num);
1220         }
1221     }
1222 
1223     if (profile_level) {
1224         status = VTSessionSetProperty(vtctx->session,
1225                                       kVTCompressionPropertyKey_ProfileLevel,
1226                                       profile_level);
1227         if (status) {
1228             av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d. Output will be encoded using a supported profile/level combination.\n", status);
1229         }
1230     }
1231 
1232     if (avctx->gop_size > 0 && avctx->codec_id != AV_CODEC_ID_PRORES) {
1233         CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
1234                                               kCFNumberIntType,
1235                                               &avctx->gop_size);
1236         if (!interval) {
1237             return AVERROR(ENOMEM);
1238         }
1239 
1240         status = VTSessionSetProperty(vtctx->session,
1241                                       kVTCompressionPropertyKey_MaxKeyFrameInterval,
1242                                       interval);
1243         CFRelease(interval);
1244 
1245         if (status) {
1246             av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
1247             return AVERROR_EXTERNAL;
1248         }
1249     }
1250 
1251     if (vtctx->frames_before) {
1252         status = VTSessionSetProperty(vtctx->session,
1253                                       kVTCompressionPropertyKey_MoreFramesBeforeStart,
1254                                       kCFBooleanTrue);
1255 
1256         if (status == kVTPropertyNotSupportedErr) {
1257             av_log(avctx, AV_LOG_WARNING, "frames_before property is not supported on this device. Ignoring.\n");
1258         } else if (status) {
1259             av_log(avctx, AV_LOG_ERROR, "Error setting frames_before property: %d\n", status);
1260         }
1261     }
1262 
1263     if (vtctx->frames_after) {
1264         status = VTSessionSetProperty(vtctx->session,
1265                                       kVTCompressionPropertyKey_MoreFramesAfterEnd,
1266                                       kCFBooleanTrue);
1267 
1268         if (status == kVTPropertyNotSupportedErr) {
1269             av_log(avctx, AV_LOG_WARNING, "frames_after property is not supported on this device. Ignoring.\n");
1270         } else if (status) {
1271             av_log(avctx, AV_LOG_ERROR, "Error setting frames_after property: %d\n", status);
1272         }
1273     }
1274 
1275     if (avctx->sample_aspect_ratio.num != 0) {
1276         CFNumberRef num;
1277         CFNumberRef den;
1278         CFMutableDictionaryRef par;
1279         AVRational *avpar = &avctx->sample_aspect_ratio;
1280 
1281         av_reduce(&avpar->num, &avpar->den,
1282                    avpar->num,  avpar->den,
1283                   0xFFFFFFFF);
1284 
1285         num = CFNumberCreate(kCFAllocatorDefault,
1286                              kCFNumberIntType,
1287                              &avpar->num);
1288 
1289         den = CFNumberCreate(kCFAllocatorDefault,
1290                              kCFNumberIntType,
1291                              &avpar->den);
1292 
1293 
1294 
1295         par = CFDictionaryCreateMutable(kCFAllocatorDefault,
1296                                         2,
1297                                         &kCFCopyStringDictionaryKeyCallBacks,
1298                                         &kCFTypeDictionaryValueCallBacks);
1299 
1300         if (!par || !num || !den) {
1301             if (par) CFRelease(par);
1302             if (num) CFRelease(num);
1303             if (den) CFRelease(den);
1304 
1305             return AVERROR(ENOMEM);
1306         }
1307 
1308         CFDictionarySetValue(
1309             par,
1310             kCMFormatDescriptionKey_PixelAspectRatioHorizontalSpacing,
1311             num);
1312 
1313         CFDictionarySetValue(
1314             par,
1315             kCMFormatDescriptionKey_PixelAspectRatioVerticalSpacing,
1316             den);
1317 
1318         status = VTSessionSetProperty(vtctx->session,
1319                                       kVTCompressionPropertyKey_PixelAspectRatio,
1320                                       par);
1321 
1322         CFRelease(par);
1323         CFRelease(num);
1324         CFRelease(den);
1325 
1326         if (status) {
1327             av_log(avctx,
1328                    AV_LOG_ERROR,
1329                    "Error setting pixel aspect ratio to %d:%d: %d.\n",
1330                    avctx->sample_aspect_ratio.num,
1331                    avctx->sample_aspect_ratio.den,
1332                    status);
1333 
1334             return AVERROR_EXTERNAL;
1335         }
1336     }
1337 
1338 
1339     if (vtctx->transfer_function) {
1340         status = VTSessionSetProperty(vtctx->session,
1341                                       kVTCompressionPropertyKey_TransferFunction,
1342                                       vtctx->transfer_function);
1343 
1344         if (status) {
1345             av_log(avctx, AV_LOG_WARNING, "Could not set transfer function: %d\n", status);
1346         }
1347     }
1348 
1349 
1350     if (vtctx->ycbcr_matrix) {
1351         status = VTSessionSetProperty(vtctx->session,
1352                                       kVTCompressionPropertyKey_YCbCrMatrix,
1353                                       vtctx->ycbcr_matrix);
1354 
1355         if (status) {
1356             av_log(avctx, AV_LOG_WARNING, "Could not set ycbcr matrix: %d\n", status);
1357         }
1358     }
1359 
1360 
1361     if (vtctx->color_primaries) {
1362         status = VTSessionSetProperty(vtctx->session,
1363                                       kVTCompressionPropertyKey_ColorPrimaries,
1364                                       vtctx->color_primaries);
1365 
1366         if (status) {
1367             av_log(avctx, AV_LOG_WARNING, "Could not set color primaries: %d\n", status);
1368         }
1369     }
1370 
1371     if (gamma_level) {
1372         status = VTSessionSetProperty(vtctx->session,
1373                                       kCVImageBufferGammaLevelKey,
1374                                       gamma_level);
1375 
1376         if (status) {
1377             av_log(avctx, AV_LOG_WARNING, "Could not set gamma level: %d\n", status);
1378         }
1379     }
1380 
1381     if (!vtctx->has_b_frames && avctx->codec_id != AV_CODEC_ID_PRORES) {
1382         status = VTSessionSetProperty(vtctx->session,
1383                                       kVTCompressionPropertyKey_AllowFrameReordering,
1384                                       kCFBooleanFalse);
1385 
1386         if (status) {
1387             av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
1388             return AVERROR_EXTERNAL;
1389         }
1390     }
1391 
1392     if (vtctx->entropy != VT_ENTROPY_NOT_SET) {
1393         CFStringRef entropy = vtctx->entropy == VT_CABAC ?
1394                                 compat_keys.kVTH264EntropyMode_CABAC:
1395                                 compat_keys.kVTH264EntropyMode_CAVLC;
1396 
1397         status = VTSessionSetProperty(vtctx->session,
1398                                       compat_keys.kVTCompressionPropertyKey_H264EntropyMode,
1399                                       entropy);
1400 
1401         if (status) {
1402             av_log(avctx, AV_LOG_ERROR, "Error setting entropy property: %d\n", status);
1403         }
1404     }
1405 
1406     if (vtctx->realtime >= 0) {
1407         status = VTSessionSetProperty(vtctx->session,
1408                                       compat_keys.kVTCompressionPropertyKey_RealTime,
1409                                       vtctx->realtime ? kCFBooleanTrue : kCFBooleanFalse);
1410 
1411         if (status) {
1412             av_log(avctx, AV_LOG_ERROR, "Error setting realtime property: %d\n", status);
1413         }
1414     }
1415 
1416     status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
1417     if (status) {
1418         av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
1419         return AVERROR_EXTERNAL;
1420     }
1421 
1422     return 0;
1423 }
1424 
vtenc_configure_encoder(AVCodecContext * avctx)1425 static int vtenc_configure_encoder(AVCodecContext *avctx)
1426 {
1427     CFMutableDictionaryRef enc_info;
1428     CFMutableDictionaryRef pixel_buffer_info;
1429     CMVideoCodecType       codec_type;
1430     VTEncContext           *vtctx = avctx->priv_data;
1431     CFStringRef            profile_level = NULL;
1432     CFNumberRef            gamma_level = NULL;
1433     int                    status;
1434 
1435     codec_type = get_cm_codec_type(avctx, vtctx->profile, vtctx->alpha_quality);
1436     if (!codec_type) {
1437         av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
1438         return AVERROR(EINVAL);
1439     }
1440 
1441 #if defined(MAC_OS_X_VERSION_10_9) && !TARGET_OS_IPHONE && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_9)
1442     if (avctx->codec_id == AV_CODEC_ID_PRORES) {
1443         if (__builtin_available(macOS 10.10, *)) {
1444             VTRegisterProfessionalVideoWorkflowVideoEncoders();
1445         }
1446     }
1447 #endif
1448 
1449     vtctx->codec_id = avctx->codec_id;
1450 
1451     if (vtctx->codec_id == AV_CODEC_ID_H264) {
1452         vtctx->get_param_set_func = CMVideoFormatDescriptionGetH264ParameterSetAtIndex;
1453 
1454         vtctx->has_b_frames = avctx->max_b_frames > 0;
1455         if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
1456             av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
1457             vtctx->has_b_frames = 0;
1458         }
1459 
1460         if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
1461             av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
1462             vtctx->entropy = VT_ENTROPY_NOT_SET;
1463         }
1464 
1465         if (!get_vt_h264_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
1466     } else if (vtctx->codec_id == AV_CODEC_ID_HEVC) {
1467         vtctx->get_param_set_func = compat_keys.CMVideoFormatDescriptionGetHEVCParameterSetAtIndex;
1468         if (!vtctx->get_param_set_func) return AVERROR(EINVAL);
1469         if (!get_vt_hevc_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
1470         // HEVC has b-byramid
1471         vtctx->has_b_frames = avctx->max_b_frames > 0 ? 2 : 0;
1472     } else if (vtctx->codec_id == AV_CODEC_ID_PRORES) {
1473         avctx->codec_tag = av_bswap32(codec_type);
1474     }
1475 
1476     enc_info = CFDictionaryCreateMutable(
1477         kCFAllocatorDefault,
1478         20,
1479         &kCFCopyStringDictionaryKeyCallBacks,
1480         &kCFTypeDictionaryValueCallBacks
1481     );
1482 
1483     if (!enc_info) return AVERROR(ENOMEM);
1484 
1485 #if !TARGET_OS_IPHONE
1486     if(vtctx->require_sw) {
1487         CFDictionarySetValue(enc_info,
1488                              compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
1489                              kCFBooleanFalse);
1490     } else if (!vtctx->allow_sw) {
1491         CFDictionarySetValue(enc_info,
1492                              compat_keys.kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
1493                              kCFBooleanTrue);
1494     } else {
1495         CFDictionarySetValue(enc_info,
1496                              compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
1497                              kCFBooleanTrue);
1498     }
1499 #endif
1500 
1501     if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {
1502         status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);
1503         if (status)
1504             goto init_cleanup;
1505     } else {
1506         pixel_buffer_info = NULL;
1507     }
1508 
1509     vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
1510 
1511     get_cv_transfer_function(avctx, &vtctx->transfer_function, &gamma_level);
1512     get_cv_ycbcr_matrix(avctx, &vtctx->ycbcr_matrix);
1513     get_cv_color_primaries(avctx, &vtctx->color_primaries);
1514 
1515 
1516     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1517         status = vtenc_populate_extradata(avctx,
1518                                           codec_type,
1519                                           profile_level,
1520                                           gamma_level,
1521                                           enc_info,
1522                                           pixel_buffer_info);
1523         if (status)
1524             goto init_cleanup;
1525     }
1526 
1527     status = vtenc_create_encoder(avctx,
1528                                   codec_type,
1529                                   profile_level,
1530                                   gamma_level,
1531                                   enc_info,
1532                                   pixel_buffer_info,
1533                                   &vtctx->session);
1534 
1535 init_cleanup:
1536     if (gamma_level)
1537         CFRelease(gamma_level);
1538 
1539     if (pixel_buffer_info)
1540         CFRelease(pixel_buffer_info);
1541 
1542     CFRelease(enc_info);
1543 
1544     return status;
1545 }
1546 
vtenc_init(AVCodecContext * avctx)1547 static av_cold int vtenc_init(AVCodecContext *avctx)
1548 {
1549     VTEncContext    *vtctx = avctx->priv_data;
1550     CFBooleanRef    has_b_frames_cfbool;
1551     int             status;
1552 
1553     pthread_once(&once_ctrl, loadVTEncSymbols);
1554 
1555     pthread_mutex_init(&vtctx->lock, NULL);
1556     pthread_cond_init(&vtctx->cv_sample_sent, NULL);
1557 
1558     vtctx->session = NULL;
1559     status = vtenc_configure_encoder(avctx);
1560     if (status) return status;
1561 
1562     status = VTSessionCopyProperty(vtctx->session,
1563                                    kVTCompressionPropertyKey_AllowFrameReordering,
1564                                    kCFAllocatorDefault,
1565                                    &has_b_frames_cfbool);
1566 
1567     if (!status && has_b_frames_cfbool) {
1568         //Some devices don't output B-frames for main profile, even if requested.
1569         // HEVC has b-pyramid
1570         if (CFBooleanGetValue(has_b_frames_cfbool))
1571             vtctx->has_b_frames = avctx->codec_id == AV_CODEC_ID_HEVC ? 2 : 1;
1572         else
1573             vtctx->has_b_frames = 0;
1574         CFRelease(has_b_frames_cfbool);
1575     }
1576     avctx->has_b_frames = vtctx->has_b_frames;
1577 
1578     return 0;
1579 }
1580 
vtenc_get_frame_info(CMSampleBufferRef buffer,bool * is_key_frame)1581 static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
1582 {
1583     CFArrayRef      attachments;
1584     CFDictionaryRef attachment;
1585     CFBooleanRef    not_sync;
1586     CFIndex         len;
1587 
1588     attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
1589     len = !attachments ? 0 : CFArrayGetCount(attachments);
1590 
1591     if (!len) {
1592         *is_key_frame = true;
1593         return;
1594     }
1595 
1596     attachment = CFArrayGetValueAtIndex(attachments, 0);
1597 
1598     if (CFDictionaryGetValueIfPresent(attachment,
1599                                       kCMSampleAttachmentKey_NotSync,
1600                                       (const void **)&not_sync))
1601     {
1602         *is_key_frame = !CFBooleanGetValue(not_sync);
1603     } else {
1604         *is_key_frame = true;
1605     }
1606 }
1607 
is_post_sei_nal_type(int nal_type)1608 static int is_post_sei_nal_type(int nal_type){
1609     return nal_type != H264_NAL_SEI &&
1610            nal_type != H264_NAL_SPS &&
1611            nal_type != H264_NAL_PPS &&
1612            nal_type != H264_NAL_AUD;
1613 }
1614 
1615 /*
1616  * Finds the sei message start/size of type find_sei_type.
1617  * If more than one of that type exists, the last one is returned.
1618  */
find_sei_end(AVCodecContext * avctx,uint8_t * nal_data,size_t nal_size,uint8_t ** sei_end)1619 static int find_sei_end(AVCodecContext *avctx,
1620                         uint8_t        *nal_data,
1621                         size_t          nal_size,
1622                         uint8_t       **sei_end)
1623 {
1624     int nal_type;
1625     size_t sei_payload_size = 0;
1626     int sei_payload_type = 0;
1627     *sei_end = NULL;
1628     uint8_t *nal_start = nal_data;
1629 
1630     if (!nal_size)
1631         return 0;
1632 
1633     nal_type = *nal_data & 0x1F;
1634     if (nal_type != H264_NAL_SEI)
1635         return 0;
1636 
1637     nal_data++;
1638     nal_size--;
1639 
1640     if (nal_data[nal_size - 1] == 0x80)
1641         nal_size--;
1642 
1643     while (nal_size > 0 && *nal_data > 0) {
1644         do{
1645             sei_payload_type += *nal_data;
1646             nal_data++;
1647             nal_size--;
1648         } while (nal_size > 0 && *nal_data == 0xFF);
1649 
1650         if (!nal_size) {
1651             av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing type.\n");
1652             return AVERROR_INVALIDDATA;
1653         }
1654 
1655         do{
1656             sei_payload_size += *nal_data;
1657             nal_data++;
1658             nal_size--;
1659         } while (nal_size > 0 && *nal_data == 0xFF);
1660 
1661         if (nal_size < sei_payload_size) {
1662             av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing size.\n");
1663             return AVERROR_INVALIDDATA;
1664         }
1665 
1666         nal_data += sei_payload_size;
1667         nal_size -= sei_payload_size;
1668     }
1669 
1670     *sei_end = nal_data;
1671 
1672     return nal_data - nal_start + 1;
1673 }
1674 
1675 /**
1676  * Copies the data inserting emulation prevention bytes as needed.
1677  * Existing data in the destination can be taken into account by providing
1678  * dst with a dst_offset > 0.
1679  *
1680  * @return The number of bytes copied on success. On failure, the negative of
1681  *         the number of bytes needed to copy src is returned.
1682  */
copy_emulation_prev(const uint8_t * src,size_t src_size,uint8_t * dst,ssize_t dst_offset,size_t dst_size)1683 static int copy_emulation_prev(const uint8_t *src,
1684                                size_t         src_size,
1685                                uint8_t       *dst,
1686                                ssize_t        dst_offset,
1687                                size_t         dst_size)
1688 {
1689     int zeros = 0;
1690     int wrote_bytes;
1691     uint8_t* dst_start;
1692     uint8_t* dst_end = dst + dst_size;
1693     const uint8_t* src_end = src + src_size;
1694     int start_at = dst_offset > 2 ? dst_offset - 2 : 0;
1695     int i;
1696     for (i = start_at; i < dst_offset && i < dst_size; i++) {
1697         if (!dst[i])
1698             zeros++;
1699         else
1700             zeros = 0;
1701     }
1702 
1703     dst += dst_offset;
1704     dst_start = dst;
1705     for (; src < src_end; src++, dst++) {
1706         if (zeros == 2) {
1707             int insert_ep3_byte = *src <= 3;
1708             if (insert_ep3_byte) {
1709                 if (dst < dst_end)
1710                     *dst = 3;
1711                 dst++;
1712             }
1713 
1714             zeros = 0;
1715         }
1716 
1717         if (dst < dst_end)
1718             *dst = *src;
1719 
1720         if (!*src)
1721             zeros++;
1722         else
1723             zeros = 0;
1724     }
1725 
1726     wrote_bytes = dst - dst_start;
1727 
1728     if (dst > dst_end)
1729         return -wrote_bytes;
1730 
1731     return wrote_bytes;
1732 }
1733 
write_sei(const ExtraSEI * sei,int sei_type,uint8_t * dst,size_t dst_size)1734 static int write_sei(const ExtraSEI *sei,
1735                      int             sei_type,
1736                      uint8_t        *dst,
1737                      size_t          dst_size)
1738 {
1739     uint8_t *sei_start = dst;
1740     size_t remaining_sei_size = sei->size;
1741     size_t remaining_dst_size = dst_size;
1742     int header_bytes;
1743     int bytes_written;
1744     ssize_t offset;
1745 
1746     if (!remaining_dst_size)
1747         return AVERROR_BUFFER_TOO_SMALL;
1748 
1749     while (sei_type && remaining_dst_size != 0) {
1750         int sei_byte = sei_type > 255 ? 255 : sei_type;
1751         *dst = sei_byte;
1752 
1753         sei_type -= sei_byte;
1754         dst++;
1755         remaining_dst_size--;
1756     }
1757 
1758     if (!dst_size)
1759         return AVERROR_BUFFER_TOO_SMALL;
1760 
1761     while (remaining_sei_size && remaining_dst_size != 0) {
1762         int size_byte = remaining_sei_size > 255 ? 255 : remaining_sei_size;
1763         *dst = size_byte;
1764 
1765         remaining_sei_size -= size_byte;
1766         dst++;
1767         remaining_dst_size--;
1768     }
1769 
1770     if (remaining_dst_size < sei->size)
1771         return AVERROR_BUFFER_TOO_SMALL;
1772 
1773     header_bytes = dst - sei_start;
1774 
1775     offset = header_bytes;
1776     bytes_written = copy_emulation_prev(sei->data,
1777                                         sei->size,
1778                                         sei_start,
1779                                         offset,
1780                                         dst_size);
1781     if (bytes_written < 0)
1782         return AVERROR_BUFFER_TOO_SMALL;
1783 
1784     bytes_written += header_bytes;
1785     return bytes_written;
1786 }
1787 
1788 /**
1789  * Copies NAL units and replaces length codes with
1790  * H.264 Annex B start codes. On failure, the contents of
1791  * dst_data may have been modified.
1792  *
1793  * @param length_code_size Byte length of each length code
1794  * @param sample_buffer NAL units prefixed with length codes.
1795  * @param sei Optional A53 closed captions SEI data.
1796  * @param dst_data Must be zeroed before calling this function.
1797  *                 Contains the copied NAL units prefixed with
1798  *                 start codes when the function returns
1799  *                 successfully.
1800  * @param dst_size Length of dst_data
1801  * @return 0 on success
1802  *         AVERROR_INVALIDDATA if length_code_size is invalid
1803  *         AVERROR_BUFFER_TOO_SMALL if dst_data is too small
1804  *         or if a length_code in src_data specifies data beyond
1805  *         the end of its buffer.
1806  */
copy_replace_length_codes(AVCodecContext * avctx,size_t length_code_size,CMSampleBufferRef sample_buffer,ExtraSEI * sei,uint8_t * dst_data,size_t dst_size)1807 static int copy_replace_length_codes(
1808     AVCodecContext *avctx,
1809     size_t        length_code_size,
1810     CMSampleBufferRef sample_buffer,
1811     ExtraSEI      *sei,
1812     uint8_t       *dst_data,
1813     size_t        dst_size)
1814 {
1815     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1816     size_t remaining_src_size = src_size;
1817     size_t remaining_dst_size = dst_size;
1818     size_t src_offset = 0;
1819     int wrote_sei = 0;
1820     int status;
1821     uint8_t size_buf[4];
1822     uint8_t nal_type;
1823     CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
1824 
1825     if (length_code_size > 4) {
1826         return AVERROR_INVALIDDATA;
1827     }
1828 
1829     while (remaining_src_size > 0) {
1830         size_t curr_src_len;
1831         size_t curr_dst_len;
1832         size_t box_len = 0;
1833         size_t i;
1834 
1835         uint8_t       *dst_box;
1836 
1837         status = CMBlockBufferCopyDataBytes(block,
1838                                             src_offset,
1839                                             length_code_size,
1840                                             size_buf);
1841         if (status) {
1842             av_log(avctx, AV_LOG_ERROR, "Cannot copy length: %d\n", status);
1843             return AVERROR_EXTERNAL;
1844         }
1845 
1846         status = CMBlockBufferCopyDataBytes(block,
1847                                             src_offset + length_code_size,
1848                                             1,
1849                                             &nal_type);
1850 
1851         if (status) {
1852             av_log(avctx, AV_LOG_ERROR, "Cannot copy type: %d\n", status);
1853             return AVERROR_EXTERNAL;
1854         }
1855 
1856         nal_type &= 0x1F;
1857 
1858         for (i = 0; i < length_code_size; i++) {
1859             box_len <<= 8;
1860             box_len |= size_buf[i];
1861         }
1862 
1863         if (sei && !wrote_sei && is_post_sei_nal_type(nal_type)) {
1864             //No SEI NAL unit - insert.
1865             int wrote_bytes;
1866 
1867             memcpy(dst_data, start_code, sizeof(start_code));
1868             dst_data += sizeof(start_code);
1869             remaining_dst_size -= sizeof(start_code);
1870 
1871             *dst_data = H264_NAL_SEI;
1872             dst_data++;
1873             remaining_dst_size--;
1874 
1875             wrote_bytes = write_sei(sei,
1876                                     SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35,
1877                                     dst_data,
1878                                     remaining_dst_size);
1879 
1880             if (wrote_bytes < 0)
1881                 return wrote_bytes;
1882 
1883             remaining_dst_size -= wrote_bytes;
1884             dst_data += wrote_bytes;
1885 
1886             if (remaining_dst_size <= 0)
1887                 return AVERROR_BUFFER_TOO_SMALL;
1888 
1889             *dst_data = 0x80;
1890 
1891             dst_data++;
1892             remaining_dst_size--;
1893 
1894             wrote_sei = 1;
1895         }
1896 
1897         curr_src_len = box_len + length_code_size;
1898         curr_dst_len = box_len + sizeof(start_code);
1899 
1900         if (remaining_src_size < curr_src_len) {
1901             return AVERROR_BUFFER_TOO_SMALL;
1902         }
1903 
1904         if (remaining_dst_size < curr_dst_len) {
1905             return AVERROR_BUFFER_TOO_SMALL;
1906         }
1907 
1908         dst_box = dst_data + sizeof(start_code);
1909 
1910         memcpy(dst_data, start_code, sizeof(start_code));
1911         status = CMBlockBufferCopyDataBytes(block,
1912                                             src_offset + length_code_size,
1913                                             box_len,
1914                                             dst_box);
1915 
1916         if (status) {
1917             av_log(avctx, AV_LOG_ERROR, "Cannot copy data: %d\n", status);
1918             return AVERROR_EXTERNAL;
1919         }
1920 
1921         if (sei && !wrote_sei && nal_type == H264_NAL_SEI) {
1922             //Found SEI NAL unit - append.
1923             int wrote_bytes;
1924             int old_sei_length;
1925             int extra_bytes;
1926             uint8_t *new_sei;
1927             old_sei_length = find_sei_end(avctx, dst_box, box_len, &new_sei);
1928             if (old_sei_length < 0)
1929                 return status;
1930 
1931             wrote_bytes = write_sei(sei,
1932                                     SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35,
1933                                     new_sei,
1934                                     remaining_dst_size - old_sei_length);
1935             if (wrote_bytes < 0)
1936                 return wrote_bytes;
1937 
1938             if (new_sei + wrote_bytes >= dst_data + remaining_dst_size)
1939                 return AVERROR_BUFFER_TOO_SMALL;
1940 
1941             new_sei[wrote_bytes++] = 0x80;
1942             extra_bytes = wrote_bytes - (dst_box + box_len - new_sei);
1943 
1944             dst_data += extra_bytes;
1945             remaining_dst_size -= extra_bytes;
1946 
1947             wrote_sei = 1;
1948         }
1949 
1950         src_offset += curr_src_len;
1951         dst_data += curr_dst_len;
1952 
1953         remaining_src_size -= curr_src_len;
1954         remaining_dst_size -= curr_dst_len;
1955     }
1956 
1957     return 0;
1958 }
1959 
1960 /**
1961  * Returns a sufficient number of bytes to contain the sei data.
1962  * It may be greater than the minimum required.
1963  */
get_sei_msg_bytes(const ExtraSEI * sei,int type)1964 static int get_sei_msg_bytes(const ExtraSEI* sei, int type){
1965     int copied_size;
1966     if (sei->size == 0)
1967         return 0;
1968 
1969     copied_size = -copy_emulation_prev(sei->data,
1970                                        sei->size,
1971                                        NULL,
1972                                        0,
1973                                        0);
1974 
1975     if ((sei->size % 255) == 0) //may result in an extra byte
1976         copied_size++;
1977 
1978     return copied_size + sei->size / 255 + 1 + type / 255 + 1;
1979 }
1980 
vtenc_cm_to_avpacket(AVCodecContext * avctx,CMSampleBufferRef sample_buffer,AVPacket * pkt,ExtraSEI * sei)1981 static int vtenc_cm_to_avpacket(
1982     AVCodecContext    *avctx,
1983     CMSampleBufferRef sample_buffer,
1984     AVPacket          *pkt,
1985     ExtraSEI          *sei)
1986 {
1987     VTEncContext *vtctx = avctx->priv_data;
1988 
1989     int     status;
1990     bool    is_key_frame;
1991     bool    add_header;
1992     size_t  length_code_size;
1993     size_t  header_size = 0;
1994     size_t  in_buf_size;
1995     size_t  out_buf_size;
1996     size_t  sei_nalu_size = 0;
1997     int64_t dts_delta;
1998     int64_t time_base_num;
1999     int nalu_count;
2000     CMTime  pts;
2001     CMTime  dts;
2002     CMVideoFormatDescriptionRef vid_fmt;
2003 
2004     vtenc_get_frame_info(sample_buffer, &is_key_frame);
2005 
2006     if (vtctx->get_param_set_func) {
2007         status = get_length_code_size(avctx, sample_buffer, &length_code_size);
2008         if (status) return status;
2009 
2010         add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
2011 
2012         if (add_header) {
2013             vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
2014             if (!vid_fmt) {
2015                 av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
2016                 return AVERROR_EXTERNAL;
2017             }
2018 
2019             int status = get_params_size(avctx, vid_fmt, &header_size);
2020             if (status) return status;
2021         }
2022 
2023         status = count_nalus(length_code_size, sample_buffer, &nalu_count);
2024         if(status)
2025             return status;
2026 
2027         if (sei) {
2028             size_t msg_size = get_sei_msg_bytes(sei,
2029                                                 SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35);
2030 
2031             sei_nalu_size = sizeof(start_code) + 1 + msg_size + 1;
2032         }
2033 
2034         in_buf_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
2035         out_buf_size = header_size +
2036                        in_buf_size +
2037                        sei_nalu_size +
2038                        nalu_count * ((int)sizeof(start_code) - (int)length_code_size);
2039 
2040         status = ff_get_encode_buffer(avctx, pkt, out_buf_size, 0);
2041         if (status < 0)
2042             return status;
2043 
2044         if (add_header) {
2045             status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
2046             if(status) return status;
2047         }
2048 
2049         status = copy_replace_length_codes(
2050             avctx,
2051             length_code_size,
2052             sample_buffer,
2053             sei,
2054             pkt->data + header_size,
2055             pkt->size - header_size
2056         );
2057 
2058         if (status) {
2059             av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d\n", status);
2060             return status;
2061         }
2062     } else {
2063         size_t len;
2064         CMBlockBufferRef buf = CMSampleBufferGetDataBuffer(sample_buffer);
2065         if (!buf) {
2066             av_log(avctx, AV_LOG_ERROR, "Error getting block buffer\n");
2067             return AVERROR_EXTERNAL;
2068         }
2069 
2070         len = CMBlockBufferGetDataLength(buf);
2071 
2072         status = ff_get_encode_buffer(avctx, pkt, len, 0);
2073         if (status < 0)
2074             return status;
2075 
2076         status = CMBlockBufferCopyDataBytes(buf, 0, len, pkt->data);
2077         if (status) {
2078             av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d\n", status);
2079             return AVERROR_EXTERNAL;
2080         }
2081     }
2082 
2083     if (is_key_frame) {
2084         pkt->flags |= AV_PKT_FLAG_KEY;
2085     }
2086 
2087     pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
2088     dts = CMSampleBufferGetDecodeTimeStamp      (sample_buffer);
2089 
2090     if (CMTIME_IS_INVALID(dts)) {
2091         if (!vtctx->has_b_frames) {
2092             dts = pts;
2093         } else {
2094             av_log(avctx, AV_LOG_ERROR, "DTS is invalid.\n");
2095             return AVERROR_EXTERNAL;
2096         }
2097     }
2098 
2099     dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
2100     time_base_num = avctx->time_base.num;
2101     pkt->pts = pts.value / time_base_num;
2102     pkt->dts = dts.value / time_base_num - dts_delta;
2103 
2104     return 0;
2105 }
2106 
2107 /*
2108  * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
2109  * containing all planes if so.
2110  */
get_cv_pixel_info(AVCodecContext * avctx,const AVFrame * frame,int * color,int * plane_count,size_t * widths,size_t * heights,size_t * strides,size_t * contiguous_buf_size)2111 static int get_cv_pixel_info(
2112     AVCodecContext *avctx,
2113     const AVFrame  *frame,
2114     int            *color,
2115     int            *plane_count,
2116     size_t         *widths,
2117     size_t         *heights,
2118     size_t         *strides,
2119     size_t         *contiguous_buf_size)
2120 {
2121     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
2122     VTEncContext *vtctx = avctx->priv_data;
2123     int av_format       = frame->format;
2124     int av_color_range  = frame->color_range;
2125     int i;
2126     int range_guessed;
2127     int status;
2128 
2129     if (!desc)
2130         return AVERROR(EINVAL);
2131 
2132     status = get_cv_pixel_format(avctx, av_format, av_color_range, color, &range_guessed);
2133     if (status) {
2134         av_log(avctx,
2135             AV_LOG_ERROR,
2136             "Could not get pixel format for color format '%s' range '%s'.\n",
2137             av_get_pix_fmt_name(av_format),
2138             av_color_range > AVCOL_RANGE_UNSPECIFIED &&
2139             av_color_range < AVCOL_RANGE_NB ?
2140                av_color_range_name(av_color_range) :
2141                "Unknown");
2142 
2143         return AVERROR(EINVAL);
2144     }
2145 
2146     if (range_guessed) {
2147         if (!vtctx->warned_color_range) {
2148             vtctx->warned_color_range = true;
2149             av_log(avctx,
2150                    AV_LOG_WARNING,
2151                    "Color range not set for %s. Using MPEG range.\n",
2152                    av_get_pix_fmt_name(av_format));
2153         }
2154     }
2155 
2156     *plane_count = av_pix_fmt_count_planes(avctx->pix_fmt);
2157 
2158     for (i = 0; i < desc->nb_components; i++) {
2159         int p = desc->comp[i].plane;
2160         bool hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA);
2161         bool isAlpha = hasAlpha && (p + 1 == *plane_count);
2162         bool isChroma = (p != 0) && !isAlpha;
2163         int shiftw = isChroma ? desc->log2_chroma_w : 0;
2164         int shifth = isChroma ? desc->log2_chroma_h : 0;
2165         widths[p]  = (avctx->width  + ((1 << shiftw) >> 1)) >> shiftw;
2166         heights[p] = (avctx->height + ((1 << shifth) >> 1)) >> shifth;
2167         strides[p] = frame->linesize[p];
2168     }
2169 
2170     *contiguous_buf_size = 0;
2171     for (i = 0; i < *plane_count; i++) {
2172         if (i < *plane_count - 1 &&
2173             frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
2174             *contiguous_buf_size = 0;
2175             break;
2176         }
2177 
2178         *contiguous_buf_size += strides[i] * heights[i];
2179     }
2180 
2181     return 0;
2182 }
2183 
2184 //Not used on OSX - frame is never copied.
copy_avframe_to_pixel_buffer(AVCodecContext * avctx,const AVFrame * frame,CVPixelBufferRef cv_img,const size_t * plane_strides,const size_t * plane_rows)2185 static int copy_avframe_to_pixel_buffer(AVCodecContext   *avctx,
2186                                         const AVFrame    *frame,
2187                                         CVPixelBufferRef cv_img,
2188                                         const size_t     *plane_strides,
2189                                         const size_t     *plane_rows)
2190 {
2191     int i, j;
2192     size_t plane_count;
2193     int status;
2194     int rows;
2195     int src_stride;
2196     int dst_stride;
2197     uint8_t *src_addr;
2198     uint8_t *dst_addr;
2199     size_t copy_bytes;
2200 
2201     status = CVPixelBufferLockBaseAddress(cv_img, 0);
2202     if (status) {
2203         av_log(
2204             avctx,
2205             AV_LOG_ERROR,
2206             "Error: Could not lock base address of CVPixelBuffer: %d.\n",
2207             status
2208         );
2209     }
2210 
2211     if (CVPixelBufferIsPlanar(cv_img)) {
2212         plane_count = CVPixelBufferGetPlaneCount(cv_img);
2213         for (i = 0; frame->data[i]; i++) {
2214             if (i == plane_count) {
2215                 CVPixelBufferUnlockBaseAddress(cv_img, 0);
2216                 av_log(avctx,
2217                     AV_LOG_ERROR,
2218                     "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
2219                 );
2220 
2221                 return AVERROR_EXTERNAL;
2222             }
2223 
2224             dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
2225             src_addr = (uint8_t*)frame->data[i];
2226             dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
2227             src_stride = plane_strides[i];
2228             rows = plane_rows[i];
2229 
2230             if (dst_stride == src_stride) {
2231                 memcpy(dst_addr, src_addr, src_stride * rows);
2232             } else {
2233                 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
2234 
2235                 for (j = 0; j < rows; j++) {
2236                     memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
2237                 }
2238             }
2239         }
2240     } else {
2241         if (frame->data[1]) {
2242             CVPixelBufferUnlockBaseAddress(cv_img, 0);
2243             av_log(avctx,
2244                 AV_LOG_ERROR,
2245                 "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
2246             );
2247 
2248             return AVERROR_EXTERNAL;
2249         }
2250 
2251         dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
2252         src_addr = (uint8_t*)frame->data[0];
2253         dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
2254         src_stride = plane_strides[0];
2255         rows = plane_rows[0];
2256 
2257         if (dst_stride == src_stride) {
2258             memcpy(dst_addr, src_addr, src_stride * rows);
2259         } else {
2260             copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
2261 
2262             for (j = 0; j < rows; j++) {
2263                 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
2264             }
2265         }
2266     }
2267 
2268     status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
2269     if (status) {
2270         av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
2271         return AVERROR_EXTERNAL;
2272     }
2273 
2274     return 0;
2275 }
2276 
create_cv_pixel_buffer(AVCodecContext * avctx,const AVFrame * frame,CVPixelBufferRef * cv_img)2277 static int create_cv_pixel_buffer(AVCodecContext   *avctx,
2278                                   const AVFrame    *frame,
2279                                   CVPixelBufferRef *cv_img)
2280 {
2281     int plane_count;
2282     int color;
2283     size_t widths [AV_NUM_DATA_POINTERS];
2284     size_t heights[AV_NUM_DATA_POINTERS];
2285     size_t strides[AV_NUM_DATA_POINTERS];
2286     int status;
2287     size_t contiguous_buf_size;
2288     CVPixelBufferPoolRef pix_buf_pool;
2289     VTEncContext* vtctx = avctx->priv_data;
2290 
2291     if (avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX) {
2292         av_assert0(frame->format == AV_PIX_FMT_VIDEOTOOLBOX);
2293 
2294         *cv_img = (CVPixelBufferRef)frame->data[3];
2295         av_assert0(*cv_img);
2296 
2297         CFRetain(*cv_img);
2298         return 0;
2299     }
2300 
2301     memset(widths,  0, sizeof(widths));
2302     memset(heights, 0, sizeof(heights));
2303     memset(strides, 0, sizeof(strides));
2304 
2305     status = get_cv_pixel_info(
2306         avctx,
2307         frame,
2308         &color,
2309         &plane_count,
2310         widths,
2311         heights,
2312         strides,
2313         &contiguous_buf_size
2314     );
2315 
2316     if (status) {
2317         av_log(
2318             avctx,
2319             AV_LOG_ERROR,
2320             "Error: Cannot convert format %d color_range %d: %d\n",
2321             frame->format,
2322             frame->color_range,
2323             status
2324         );
2325 
2326         return AVERROR_EXTERNAL;
2327     }
2328 
2329     pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2330     if (!pix_buf_pool) {
2331         /* On iOS, the VT session is invalidated when the APP switches from
2332          * foreground to background and vice versa. Fetch the actual error code
2333          * of the VT session to detect that case and restart the VT session
2334          * accordingly. */
2335         OSStatus vtstatus;
2336 
2337         vtstatus = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
2338         if (vtstatus == kVTInvalidSessionErr) {
2339             CFRelease(vtctx->session);
2340             vtctx->session = NULL;
2341             status = vtenc_configure_encoder(avctx);
2342             if (status == 0)
2343                 pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2344         }
2345         if (!pix_buf_pool) {
2346             av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
2347             return AVERROR_EXTERNAL;
2348         }
2349         else
2350             av_log(avctx, AV_LOG_WARNING, "VT session restarted because of a "
2351                    "kVTInvalidSessionErr error.\n");
2352     }
2353 
2354     status = CVPixelBufferPoolCreatePixelBuffer(NULL,
2355                                                 pix_buf_pool,
2356                                                 cv_img);
2357 
2358 
2359     if (status) {
2360         av_log(avctx, AV_LOG_ERROR, "Could not create pixel buffer from pool: %d.\n", status);
2361         return AVERROR_EXTERNAL;
2362     }
2363 
2364     status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
2365     if (status) {
2366         CFRelease(*cv_img);
2367         *cv_img = NULL;
2368         return status;
2369     }
2370 
2371     return 0;
2372 }
2373 
create_encoder_dict_h264(const AVFrame * frame,CFDictionaryRef * dict_out)2374 static int create_encoder_dict_h264(const AVFrame *frame,
2375                                     CFDictionaryRef* dict_out)
2376 {
2377     CFDictionaryRef dict = NULL;
2378     if (frame->pict_type == AV_PICTURE_TYPE_I) {
2379         const void *keys[] = { kVTEncodeFrameOptionKey_ForceKeyFrame };
2380         const void *vals[] = { kCFBooleanTrue };
2381 
2382         dict = CFDictionaryCreate(NULL, keys, vals, 1, NULL, NULL);
2383         if(!dict) return AVERROR(ENOMEM);
2384     }
2385 
2386     *dict_out = dict;
2387     return 0;
2388 }
2389 
vtenc_send_frame(AVCodecContext * avctx,VTEncContext * vtctx,const AVFrame * frame)2390 static int vtenc_send_frame(AVCodecContext *avctx,
2391                             VTEncContext   *vtctx,
2392                             const AVFrame  *frame)
2393 {
2394     CMTime time;
2395     CFDictionaryRef frame_dict;
2396     CVPixelBufferRef cv_img = NULL;
2397     AVFrameSideData *side_data = NULL;
2398     ExtraSEI *sei = NULL;
2399     int status = create_cv_pixel_buffer(avctx, frame, &cv_img);
2400 
2401     if (status) return status;
2402 
2403     status = create_encoder_dict_h264(frame, &frame_dict);
2404     if (status) {
2405         CFRelease(cv_img);
2406         return status;
2407     }
2408 
2409 #if CONFIG_ATSC_A53
2410     side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
2411     if (vtctx->a53_cc && side_data && side_data->size) {
2412         sei = av_mallocz(sizeof(*sei));
2413         if (!sei) {
2414             av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2415         } else {
2416             int ret = ff_alloc_a53_sei(frame, 0, &sei->data, &sei->size);
2417             if (ret < 0) {
2418                 av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2419                 av_free(sei);
2420                 sei = NULL;
2421             }
2422         }
2423     }
2424 #endif
2425 
2426     time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
2427     status = VTCompressionSessionEncodeFrame(
2428         vtctx->session,
2429         cv_img,
2430         time,
2431         kCMTimeInvalid,
2432         frame_dict,
2433         sei,
2434         NULL
2435     );
2436 
2437     if (frame_dict) CFRelease(frame_dict);
2438     CFRelease(cv_img);
2439 
2440     if (status) {
2441         av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
2442         return AVERROR_EXTERNAL;
2443     }
2444 
2445     return 0;
2446 }
2447 
vtenc_frame(AVCodecContext * avctx,AVPacket * pkt,const AVFrame * frame,int * got_packet)2448 static av_cold int vtenc_frame(
2449     AVCodecContext *avctx,
2450     AVPacket       *pkt,
2451     const AVFrame  *frame,
2452     int            *got_packet)
2453 {
2454     VTEncContext *vtctx = avctx->priv_data;
2455     bool get_frame;
2456     int status;
2457     CMSampleBufferRef buf = NULL;
2458     ExtraSEI *sei = NULL;
2459 
2460     if (frame) {
2461         status = vtenc_send_frame(avctx, vtctx, frame);
2462 
2463         if (status) {
2464             status = AVERROR_EXTERNAL;
2465             goto end_nopkt;
2466         }
2467 
2468         if (vtctx->frame_ct_in == 0) {
2469             vtctx->first_pts = frame->pts;
2470         } else if(vtctx->frame_ct_in == vtctx->has_b_frames) {
2471             vtctx->dts_delta = frame->pts - vtctx->first_pts;
2472         }
2473 
2474         vtctx->frame_ct_in++;
2475     } else if(!vtctx->flushing) {
2476         vtctx->flushing = true;
2477 
2478         status = VTCompressionSessionCompleteFrames(vtctx->session,
2479                                                     kCMTimeIndefinite);
2480 
2481         if (status) {
2482             av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
2483             status = AVERROR_EXTERNAL;
2484             goto end_nopkt;
2485         }
2486     }
2487 
2488     *got_packet = 0;
2489     get_frame = vtctx->dts_delta >= 0 || !frame;
2490     if (!get_frame) {
2491         status = 0;
2492         goto end_nopkt;
2493     }
2494 
2495     status = vtenc_q_pop(vtctx, !frame, &buf, &sei);
2496     if (status) goto end_nopkt;
2497     if (!buf)   goto end_nopkt;
2498 
2499     status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei);
2500     if (sei) {
2501         if (sei->data) av_free(sei->data);
2502         av_free(sei);
2503     }
2504     CFRelease(buf);
2505     if (status) goto end_nopkt;
2506 
2507     *got_packet = 1;
2508     return 0;
2509 
2510 end_nopkt:
2511     av_packet_unref(pkt);
2512     return status;
2513 }
2514 
vtenc_populate_extradata(AVCodecContext * avctx,CMVideoCodecType codec_type,CFStringRef profile_level,CFNumberRef gamma_level,CFDictionaryRef enc_info,CFDictionaryRef pixel_buffer_info)2515 static int vtenc_populate_extradata(AVCodecContext   *avctx,
2516                                     CMVideoCodecType codec_type,
2517                                     CFStringRef      profile_level,
2518                                     CFNumberRef      gamma_level,
2519                                     CFDictionaryRef  enc_info,
2520                                     CFDictionaryRef  pixel_buffer_info)
2521 {
2522     VTEncContext *vtctx = avctx->priv_data;
2523     int status;
2524     CVPixelBufferPoolRef pool = NULL;
2525     CVPixelBufferRef pix_buf = NULL;
2526     CMTime time;
2527     CMSampleBufferRef buf = NULL;
2528 
2529     status = vtenc_create_encoder(avctx,
2530                                   codec_type,
2531                                   profile_level,
2532                                   gamma_level,
2533                                   enc_info,
2534                                   pixel_buffer_info,
2535                                   &vtctx->session);
2536     if (status)
2537         goto pe_cleanup;
2538 
2539     pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2540     if(!pool){
2541         av_log(avctx, AV_LOG_ERROR, "Error getting pixel buffer pool.\n");
2542         goto pe_cleanup;
2543     }
2544 
2545     status = CVPixelBufferPoolCreatePixelBuffer(NULL,
2546                                                 pool,
2547                                                 &pix_buf);
2548 
2549     if(status != kCVReturnSuccess){
2550         av_log(avctx, AV_LOG_ERROR, "Error creating frame from pool: %d\n", status);
2551         goto pe_cleanup;
2552     }
2553 
2554     time = CMTimeMake(0, avctx->time_base.den);
2555     status = VTCompressionSessionEncodeFrame(vtctx->session,
2556                                              pix_buf,
2557                                              time,
2558                                              kCMTimeInvalid,
2559                                              NULL,
2560                                              NULL,
2561                                              NULL);
2562 
2563     if (status) {
2564         av_log(avctx,
2565                AV_LOG_ERROR,
2566                "Error sending frame for extradata: %d\n",
2567                status);
2568 
2569         goto pe_cleanup;
2570     }
2571 
2572     //Populates extradata - output frames are flushed and param sets are available.
2573     status = VTCompressionSessionCompleteFrames(vtctx->session,
2574                                                 kCMTimeIndefinite);
2575 
2576     if (status)
2577         goto pe_cleanup;
2578 
2579     status = vtenc_q_pop(vtctx, 0, &buf, NULL);
2580     if (status) {
2581         av_log(avctx, AV_LOG_ERROR, "popping: %d\n", status);
2582         goto pe_cleanup;
2583     }
2584 
2585     CFRelease(buf);
2586 
2587 
2588 
2589 pe_cleanup:
2590     CVPixelBufferRelease(pix_buf);
2591     if(vtctx->session)
2592         CFRelease(vtctx->session);
2593 
2594     vtctx->session = NULL;
2595     vtctx->frame_ct_out = 0;
2596 
2597     av_assert0(status != 0 || (avctx->extradata && avctx->extradata_size > 0));
2598 
2599     return status;
2600 }
2601 
vtenc_close(AVCodecContext * avctx)2602 static av_cold int vtenc_close(AVCodecContext *avctx)
2603 {
2604     VTEncContext *vtctx = avctx->priv_data;
2605 
2606     if(!vtctx->session) {
2607         pthread_cond_destroy(&vtctx->cv_sample_sent);
2608         pthread_mutex_destroy(&vtctx->lock);
2609         return 0;
2610     }
2611 
2612     VTCompressionSessionCompleteFrames(vtctx->session,
2613                                        kCMTimeIndefinite);
2614     clear_frame_queue(vtctx);
2615     pthread_cond_destroy(&vtctx->cv_sample_sent);
2616     pthread_mutex_destroy(&vtctx->lock);
2617     CFRelease(vtctx->session);
2618     vtctx->session = NULL;
2619 
2620     if (vtctx->color_primaries) {
2621         CFRelease(vtctx->color_primaries);
2622         vtctx->color_primaries = NULL;
2623     }
2624 
2625     if (vtctx->transfer_function) {
2626         CFRelease(vtctx->transfer_function);
2627         vtctx->transfer_function = NULL;
2628     }
2629 
2630     if (vtctx->ycbcr_matrix) {
2631         CFRelease(vtctx->ycbcr_matrix);
2632         vtctx->ycbcr_matrix = NULL;
2633     }
2634 
2635     return 0;
2636 }
2637 
2638 static const enum AVPixelFormat avc_pix_fmts[] = {
2639     AV_PIX_FMT_VIDEOTOOLBOX,
2640     AV_PIX_FMT_NV12,
2641     AV_PIX_FMT_YUV420P,
2642     AV_PIX_FMT_NONE
2643 };
2644 
2645 static const enum AVPixelFormat hevc_pix_fmts[] = {
2646     AV_PIX_FMT_VIDEOTOOLBOX,
2647     AV_PIX_FMT_NV12,
2648     AV_PIX_FMT_YUV420P,
2649     AV_PIX_FMT_BGRA,
2650     AV_PIX_FMT_P010LE,
2651     AV_PIX_FMT_NONE
2652 };
2653 
2654 static const enum AVPixelFormat prores_pix_fmts[] = {
2655     AV_PIX_FMT_VIDEOTOOLBOX,
2656     AV_PIX_FMT_YUV420P,
2657 #ifdef kCFCoreFoundationVersionNumber10_7
2658     AV_PIX_FMT_NV12,
2659     AV_PIX_FMT_AYUV64,
2660 #endif
2661     AV_PIX_FMT_UYVY422,
2662 #if HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE
2663     AV_PIX_FMT_P010,
2664 #endif
2665 #if HAVE_KCVPIXELFORMATTYPE_422YPCBCR8BIPLANARVIDEORANGE
2666     AV_PIX_FMT_NV16,
2667 #endif
2668 #if HAVE_KCVPIXELFORMATTYPE_422YPCBCR10BIPLANARVIDEORANGE
2669     AV_PIX_FMT_P210,
2670 #endif
2671 #if HAVE_KCVPIXELFORMATTYPE_422YPCBCR16BIPLANARVIDEORANGE
2672     AV_PIX_FMT_P216,
2673 #endif
2674 #if HAVE_KCVPIXELFORMATTYPE_444YPCBCR8BIPLANARVIDEORANGE
2675     AV_PIX_FMT_NV24,
2676 #endif
2677 #if HAVE_KCVPIXELFORMATTYPE_444YPCBCR10BIPLANARVIDEORANGE
2678     AV_PIX_FMT_P410,
2679 #endif
2680 #if HAVE_KCVPIXELFORMATTYPE_444YPCBCR16BIPLANARVIDEORANGE
2681     AV_PIX_FMT_P416,
2682 #endif
2683     AV_PIX_FMT_BGRA,
2684     AV_PIX_FMT_NONE
2685 };
2686 
2687 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
2688 #define COMMON_OPTIONS \
2689     { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL, \
2690         { .i64 = 0 }, 0, 1, VE }, \
2691     { "require_sw", "Require software encoding", OFFSET(require_sw), AV_OPT_TYPE_BOOL, \
2692         { .i64 = 0 }, 0, 1, VE }, \
2693     { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).", \
2694         OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, -1, 1, VE }, \
2695     { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.", \
2696         OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
2697     { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.", \
2698         OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
2699     { "prio_speed", "prioritize encoding speed", OFFSET(prio_speed), AV_OPT_TYPE_BOOL, \
2700         { .i64 = -1 }, -1, 1, VE }, \
2701 
2702 #define OFFSET(x) offsetof(VTEncContext, x)
2703 static const AVOption h264_options[] = {
2704     { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT64, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
2705     { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
2706     { "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN     }, INT_MIN, INT_MAX, VE, "profile" },
2707     { "high",     "High Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH     }, INT_MIN, INT_MAX, VE, "profile" },
2708     { "extended", "Extend Profile",   0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_EXTENDED }, INT_MIN, INT_MAX, VE, "profile" },
2709 
2710     { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },
2711     { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },
2712     { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },
2713     { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },
2714     { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },
2715     { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },
2716     { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },
2717     { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },
2718     { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },
2719     { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
2720     { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
2721 
2722     { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },
2723     { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2724     { "vlc",   "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2725     { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2726     { "ac",    "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2727 
2728     { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, VE },
2729 
2730     COMMON_OPTIONS
2731     { NULL },
2732 };
2733 
2734 static const AVClass h264_videotoolbox_class = {
2735     .class_name = "h264_videotoolbox",
2736     .item_name  = av_default_item_name,
2737     .option     = h264_options,
2738     .version    = LIBAVUTIL_VERSION_INT,
2739 };
2740 
2741 const FFCodec ff_h264_videotoolbox_encoder = {
2742     .p.name           = "h264_videotoolbox",
2743     .p.long_name      = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),
2744     .p.type           = AVMEDIA_TYPE_VIDEO,
2745     .p.id             = AV_CODEC_ID_H264,
2746     .p.capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
2747     .priv_data_size   = sizeof(VTEncContext),
2748     .p.pix_fmts       = avc_pix_fmts,
2749     .init             = vtenc_init,
2750     FF_CODEC_ENCODE_CB(vtenc_frame),
2751     .close            = vtenc_close,
2752     .p.priv_class     = &h264_videotoolbox_class,
2753     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
2754                         FF_CODEC_CAP_INIT_CLEANUP,
2755 };
2756 
2757 static const AVOption hevc_options[] = {
2758     { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT64, { .i64 = HEVC_PROF_AUTO }, HEVC_PROF_AUTO, HEVC_PROF_COUNT, VE, "profile" },
2759     { "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = HEVC_PROF_MAIN   }, INT_MIN, INT_MAX, VE, "profile" },
2760     { "main10",   "Main10 Profile",   0, AV_OPT_TYPE_CONST, { .i64 = HEVC_PROF_MAIN10 }, INT_MIN, INT_MAX, VE, "profile" },
2761 
2762     { "alpha_quality", "Compression quality for the alpha channel", OFFSET(alpha_quality), AV_OPT_TYPE_DOUBLE, { .dbl = 0.0 }, 0.0, 1.0, VE },
2763 
2764     COMMON_OPTIONS
2765     { NULL },
2766 };
2767 
2768 static const AVClass hevc_videotoolbox_class = {
2769     .class_name = "hevc_videotoolbox",
2770     .item_name  = av_default_item_name,
2771     .option     = hevc_options,
2772     .version    = LIBAVUTIL_VERSION_INT,
2773 };
2774 
2775 const FFCodec ff_hevc_videotoolbox_encoder = {
2776     .p.name           = "hevc_videotoolbox",
2777     .p.long_name      = NULL_IF_CONFIG_SMALL("VideoToolbox H.265 Encoder"),
2778     .p.type           = AVMEDIA_TYPE_VIDEO,
2779     .p.id             = AV_CODEC_ID_HEVC,
2780     .p.capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
2781                         AV_CODEC_CAP_HARDWARE,
2782     .priv_data_size   = sizeof(VTEncContext),
2783     .p.pix_fmts       = hevc_pix_fmts,
2784     .init             = vtenc_init,
2785     FF_CODEC_ENCODE_CB(vtenc_frame),
2786     .close            = vtenc_close,
2787     .p.priv_class     = &hevc_videotoolbox_class,
2788     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
2789                         FF_CODEC_CAP_INIT_CLEANUP,
2790     .p.wrapper_name   = "videotoolbox",
2791 };
2792 
2793 static const AVOption prores_options[] = {
2794     { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT64, { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, FF_PROFILE_PRORES_XQ, VE, "profile" },
2795     { "auto",     "Automatically determine based on input format", 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_UNKNOWN },            INT_MIN, INT_MAX, VE, "profile" },
2796     { "proxy",    "ProRes 422 Proxy",                              0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_PRORES_PROXY },       INT_MIN, INT_MAX, VE, "profile" },
2797     { "lt",       "ProRes 422 LT",                                 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_PRORES_LT },          INT_MIN, INT_MAX, VE, "profile" },
2798     { "standard", "ProRes 422",                                    0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_PRORES_STANDARD },    INT_MIN, INT_MAX, VE, "profile" },
2799     { "hq",       "ProRes 422 HQ",                                 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_PRORES_HQ },          INT_MIN, INT_MAX, VE, "profile" },
2800     { "4444",     "ProRes 4444",                                   0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_PRORES_4444 },        INT_MIN, INT_MAX, VE, "profile" },
2801     { "xq",       "ProRes 4444 XQ",                                0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_PRORES_XQ },          INT_MIN, INT_MAX, VE, "profile" },
2802 
2803     COMMON_OPTIONS
2804     { NULL },
2805 };
2806 
2807 static const AVClass prores_videotoolbox_class = {
2808     .class_name = "prores_videotoolbox",
2809     .item_name  = av_default_item_name,
2810     .option     = prores_options,
2811     .version    = LIBAVUTIL_VERSION_INT,
2812 };
2813 
2814 const FFCodec ff_prores_videotoolbox_encoder = {
2815     .p.name           = "prores_videotoolbox",
2816     .p.long_name      = NULL_IF_CONFIG_SMALL("VideoToolbox ProRes Encoder"),
2817     .p.type           = AVMEDIA_TYPE_VIDEO,
2818     .p.id             = AV_CODEC_ID_PRORES,
2819     .p.capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
2820                         AV_CODEC_CAP_HARDWARE,
2821     .priv_data_size   = sizeof(VTEncContext),
2822     .p.pix_fmts       = prores_pix_fmts,
2823     .init             = vtenc_init,
2824     FF_CODEC_ENCODE_CB(vtenc_frame),
2825     .close            = vtenc_close,
2826     .p.priv_class     = &prores_videotoolbox_class,
2827     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
2828                         FF_CODEC_CAP_INIT_CLEANUP,
2829     .p.wrapper_name   = "videotoolbox",
2830 };
2831