• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #define COBJMACROS
20 #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
21 #undef _WIN32_WINNT
22 #define _WIN32_WINNT 0x0602
23 #endif
24 
25 #include "encode.h"
26 #include "mf_utils.h"
27 #include "libavutil/imgutils.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/time.h"
30 #include "internal.h"
31 
32 typedef struct MFContext {
33     AVClass *av_class;
34     AVFrame *frame;
35     int is_video, is_audio;
36     GUID main_subtype;
37     IMFTransform *mft;
38     IMFMediaEventGenerator *async_events;
39     DWORD in_stream_id, out_stream_id;
40     MFT_INPUT_STREAM_INFO in_info;
41     MFT_OUTPUT_STREAM_INFO out_info;
42     int out_stream_provides_samples;
43     int draining, draining_done;
44     int sample_sent;
45     int async_need_input, async_have_output, async_marker;
46     int64_t reorder_delay;
47     ICodecAPI *codec_api;
48     // set by AVOption
49     int opt_enc_rc;
50     int opt_enc_quality;
51     int opt_enc_scenario;
52     int opt_enc_hw;
53 } MFContext;
54 
55 static int mf_choose_output_type(AVCodecContext *avctx);
56 static int mf_setup_context(AVCodecContext *avctx);
57 
58 #define MF_TIMEBASE (AVRational){1, 10000000}
59 // Sentinel value only used by us.
60 #define MF_INVALID_TIME AV_NOPTS_VALUE
61 
mf_wait_events(AVCodecContext * avctx)62 static int mf_wait_events(AVCodecContext *avctx)
63 {
64     MFContext *c = avctx->priv_data;
65 
66     if (!c->async_events)
67         return 0;
68 
69     while (!(c->async_need_input || c->async_have_output || c->draining_done || c->async_marker)) {
70         IMFMediaEvent *ev = NULL;
71         MediaEventType ev_id = 0;
72         HRESULT hr = IMFMediaEventGenerator_GetEvent(c->async_events, 0, &ev);
73         if (FAILED(hr)) {
74             av_log(avctx, AV_LOG_ERROR, "IMFMediaEventGenerator_GetEvent() failed: %s\n",
75                    ff_hr_str(hr));
76             return AVERROR_EXTERNAL;
77         }
78         IMFMediaEvent_GetType(ev, &ev_id);
79         switch (ev_id) {
80         case ff_METransformNeedInput:
81             if (!c->draining)
82                 c->async_need_input = 1;
83             break;
84         case ff_METransformHaveOutput:
85             c->async_have_output = 1;
86             break;
87         case ff_METransformDrainComplete:
88             c->draining_done = 1;
89             break;
90         case ff_METransformMarker:
91             c->async_marker = 1;
92             break;
93         default: ;
94         }
95         IMFMediaEvent_Release(ev);
96     }
97 
98     return 0;
99 }
100 
mf_get_tb(AVCodecContext * avctx)101 static AVRational mf_get_tb(AVCodecContext *avctx)
102 {
103     if (avctx->pkt_timebase.num > 0 && avctx->pkt_timebase.den > 0)
104         return avctx->pkt_timebase;
105     if (avctx->time_base.num > 0 && avctx->time_base.den > 0)
106         return avctx->time_base;
107     return MF_TIMEBASE;
108 }
109 
mf_to_mf_time(AVCodecContext * avctx,int64_t av_pts)110 static LONGLONG mf_to_mf_time(AVCodecContext *avctx, int64_t av_pts)
111 {
112     if (av_pts == AV_NOPTS_VALUE)
113         return MF_INVALID_TIME;
114     return av_rescale_q(av_pts, mf_get_tb(avctx), MF_TIMEBASE);
115 }
116 
mf_sample_set_pts(AVCodecContext * avctx,IMFSample * sample,int64_t av_pts)117 static void mf_sample_set_pts(AVCodecContext *avctx, IMFSample *sample, int64_t av_pts)
118 {
119     LONGLONG stime = mf_to_mf_time(avctx, av_pts);
120     if (stime != MF_INVALID_TIME)
121         IMFSample_SetSampleTime(sample, stime);
122 }
123 
mf_from_mf_time(AVCodecContext * avctx,LONGLONG stime)124 static int64_t mf_from_mf_time(AVCodecContext *avctx, LONGLONG stime)
125 {
126     return av_rescale_q(stime, MF_TIMEBASE, mf_get_tb(avctx));
127 }
128 
mf_sample_get_pts(AVCodecContext * avctx,IMFSample * sample)129 static int64_t mf_sample_get_pts(AVCodecContext *avctx, IMFSample *sample)
130 {
131     LONGLONG pts;
132     HRESULT hr = IMFSample_GetSampleTime(sample, &pts);
133     if (FAILED(hr))
134         return AV_NOPTS_VALUE;
135     return mf_from_mf_time(avctx, pts);
136 }
137 
mf_enca_output_type_get(AVCodecContext * avctx,IMFMediaType * type)138 static int mf_enca_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
139 {
140     MFContext *c = avctx->priv_data;
141     HRESULT hr;
142     UINT32 sz;
143 
144     if (avctx->codec_id != AV_CODEC_ID_MP3 && avctx->codec_id != AV_CODEC_ID_AC3) {
145         hr = IMFAttributes_GetBlobSize(type, &MF_MT_USER_DATA, &sz);
146         if (!FAILED(hr) && sz > 0) {
147             avctx->extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
148             if (!avctx->extradata)
149                 return AVERROR(ENOMEM);
150             avctx->extradata_size = sz;
151             hr = IMFAttributes_GetBlob(type, &MF_MT_USER_DATA, avctx->extradata, sz, NULL);
152             if (FAILED(hr))
153                 return AVERROR_EXTERNAL;
154 
155             if (avctx->codec_id == AV_CODEC_ID_AAC && avctx->extradata_size >= 12) {
156                 // Get rid of HEAACWAVEINFO (after wfx field, 12 bytes).
157                 avctx->extradata_size = avctx->extradata_size - 12;
158                 memmove(avctx->extradata, avctx->extradata + 12, avctx->extradata_size);
159             }
160         }
161     }
162 
163     // I don't know where it's documented that we need this. It happens with the
164     // MS mp3 encoder MFT. The idea for the workaround is taken from NAudio.
165     // (Certainly any lossy codec will have frames much smaller than 1 second.)
166     if (!c->out_info.cbSize && !c->out_stream_provides_samples) {
167         hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &sz);
168         if (!FAILED(hr)) {
169             av_log(avctx, AV_LOG_VERBOSE, "MFT_OUTPUT_STREAM_INFO.cbSize set to 0, "
170                    "assuming %d bytes instead.\n", (int)sz);
171             c->out_info.cbSize = sz;
172         }
173     }
174 
175     return 0;
176 }
177 
mf_encv_output_type_get(AVCodecContext * avctx,IMFMediaType * type)178 static int mf_encv_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
179 {
180     HRESULT hr;
181     UINT32 sz;
182 
183     hr = IMFAttributes_GetBlobSize(type, &MF_MT_MPEG_SEQUENCE_HEADER, &sz);
184     if (!FAILED(hr) && sz > 0) {
185         uint8_t *extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
186         if (!extradata)
187             return AVERROR(ENOMEM);
188         hr = IMFAttributes_GetBlob(type, &MF_MT_MPEG_SEQUENCE_HEADER, extradata, sz, NULL);
189         if (FAILED(hr)) {
190             av_free(extradata);
191             return AVERROR_EXTERNAL;
192         }
193         av_freep(&avctx->extradata);
194         avctx->extradata = extradata;
195         avctx->extradata_size = sz;
196     }
197 
198     return 0;
199 }
200 
mf_output_type_get(AVCodecContext * avctx)201 static int mf_output_type_get(AVCodecContext *avctx)
202 {
203     MFContext *c = avctx->priv_data;
204     HRESULT hr;
205     IMFMediaType *type;
206     int ret;
207 
208     hr = IMFTransform_GetOutputCurrentType(c->mft, c->out_stream_id, &type);
209     if (FAILED(hr)) {
210         av_log(avctx, AV_LOG_ERROR, "could not get output type\n");
211         return AVERROR_EXTERNAL;
212     }
213 
214     av_log(avctx, AV_LOG_VERBOSE, "final output type:\n");
215     ff_media_type_dump(avctx, type);
216 
217     ret = 0;
218     if (c->is_video) {
219         ret = mf_encv_output_type_get(avctx, type);
220     } else if (c->is_audio) {
221         ret = mf_enca_output_type_get(avctx, type);
222     }
223 
224     if (ret < 0)
225         av_log(avctx, AV_LOG_ERROR, "output type not supported\n");
226 
227     IMFMediaType_Release(type);
228     return ret;
229 }
230 
mf_sample_to_avpacket(AVCodecContext * avctx,IMFSample * sample,AVPacket * avpkt)231 static int mf_sample_to_avpacket(AVCodecContext *avctx, IMFSample *sample, AVPacket *avpkt)
232 {
233     MFContext *c = avctx->priv_data;
234     HRESULT hr;
235     int ret;
236     DWORD len;
237     IMFMediaBuffer *buffer;
238     BYTE *data;
239     UINT64 t;
240     UINT32 t32;
241 
242     hr = IMFSample_GetTotalLength(sample, &len);
243     if (FAILED(hr))
244         return AVERROR_EXTERNAL;
245 
246     if ((ret = ff_get_encode_buffer(avctx, avpkt, len, 0)) < 0)
247         return ret;
248 
249     IMFSample_ConvertToContiguousBuffer(sample, &buffer);
250     if (FAILED(hr))
251         return AVERROR_EXTERNAL;
252 
253     hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
254     if (FAILED(hr)) {
255         IMFMediaBuffer_Release(buffer);
256         return AVERROR_EXTERNAL;
257     }
258 
259     memcpy(avpkt->data, data, len);
260 
261     IMFMediaBuffer_Unlock(buffer);
262     IMFMediaBuffer_Release(buffer);
263 
264     avpkt->pts = avpkt->dts = mf_sample_get_pts(avctx, sample);
265 
266     hr = IMFAttributes_GetUINT32(sample, &MFSampleExtension_CleanPoint, &t32);
267     if (c->is_audio || (!FAILED(hr) && t32 != 0))
268         avpkt->flags |= AV_PKT_FLAG_KEY;
269 
270     hr = IMFAttributes_GetUINT64(sample, &MFSampleExtension_DecodeTimestamp, &t);
271     if (!FAILED(hr)) {
272         avpkt->dts = mf_from_mf_time(avctx, t);
273         // At least on Qualcomm's HEVC encoder on SD 835, the output dts
274         // starts from the input pts of the first frame, while the output pts
275         // is shifted forward. Therefore, shift the output values back so that
276         // the output pts matches the input.
277         if (c->reorder_delay == AV_NOPTS_VALUE)
278             c->reorder_delay = avpkt->pts - avpkt->dts;
279         avpkt->dts -= c->reorder_delay;
280         avpkt->pts -= c->reorder_delay;
281     }
282 
283     return 0;
284 }
285 
mf_a_avframe_to_sample(AVCodecContext * avctx,const AVFrame * frame)286 static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
287 {
288     MFContext *c = avctx->priv_data;
289     size_t len;
290     size_t bps;
291     IMFSample *sample;
292 
293     bps = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels;
294     len = frame->nb_samples * bps;
295 
296     sample = ff_create_memory_sample(frame->data[0], len, c->in_info.cbAlignment);
297     if (sample)
298         IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->nb_samples));
299     return sample;
300 }
301 
mf_v_avframe_to_sample(AVCodecContext * avctx,const AVFrame * frame)302 static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
303 {
304     MFContext *c = avctx->priv_data;
305     IMFSample *sample;
306     IMFMediaBuffer *buffer;
307     BYTE *data;
308     HRESULT hr;
309     int ret;
310     int size;
311 
312     size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
313     if (size < 0)
314         return NULL;
315 
316     sample = ff_create_memory_sample(NULL, size, c->in_info.cbAlignment);
317     if (!sample)
318         return NULL;
319 
320     hr = IMFSample_GetBufferByIndex(sample, 0, &buffer);
321     if (FAILED(hr)) {
322         IMFSample_Release(sample);
323         return NULL;
324     }
325 
326     hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
327     if (FAILED(hr)) {
328         IMFMediaBuffer_Release(buffer);
329         IMFSample_Release(sample);
330         return NULL;
331     }
332 
333     ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize,
334                                   avctx->pix_fmt, avctx->width, avctx->height, 1);
335     IMFMediaBuffer_SetCurrentLength(buffer, size);
336     IMFMediaBuffer_Unlock(buffer);
337     IMFMediaBuffer_Release(buffer);
338     if (ret < 0) {
339         IMFSample_Release(sample);
340         return NULL;
341     }
342 
343     IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->pkt_duration));
344 
345     return sample;
346 }
347 
mf_avframe_to_sample(AVCodecContext * avctx,const AVFrame * frame)348 static IMFSample *mf_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
349 {
350     MFContext *c = avctx->priv_data;
351     IMFSample *sample;
352 
353     if (c->is_audio) {
354         sample = mf_a_avframe_to_sample(avctx, frame);
355     } else {
356         sample = mf_v_avframe_to_sample(avctx, frame);
357     }
358 
359     if (sample)
360         mf_sample_set_pts(avctx, sample, frame->pts);
361 
362     return sample;
363 }
364 
mf_send_sample(AVCodecContext * avctx,IMFSample * sample)365 static int mf_send_sample(AVCodecContext *avctx, IMFSample *sample)
366 {
367     MFContext *c = avctx->priv_data;
368     HRESULT hr;
369     int ret;
370 
371     if (sample) {
372         if (c->async_events) {
373             if ((ret = mf_wait_events(avctx)) < 0)
374                 return ret;
375             if (!c->async_need_input)
376                 return AVERROR(EAGAIN);
377         }
378         if (!c->sample_sent)
379             IMFSample_SetUINT32(sample, &MFSampleExtension_Discontinuity, TRUE);
380         c->sample_sent = 1;
381         hr = IMFTransform_ProcessInput(c->mft, c->in_stream_id, sample, 0);
382         if (hr == MF_E_NOTACCEPTING) {
383             return AVERROR(EAGAIN);
384         } else if (FAILED(hr)) {
385             av_log(avctx, AV_LOG_ERROR, "failed processing input: %s\n", ff_hr_str(hr));
386             return AVERROR_EXTERNAL;
387         }
388         c->async_need_input = 0;
389     } else if (!c->draining) {
390         hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_COMMAND_DRAIN, 0);
391         if (FAILED(hr))
392             av_log(avctx, AV_LOG_ERROR, "failed draining: %s\n", ff_hr_str(hr));
393         // Some MFTs (AC3) will send a frame after each drain command (???), so
394         // this is required to make draining actually terminate.
395         c->draining = 1;
396         c->async_need_input = 0;
397     } else {
398         return AVERROR_EOF;
399     }
400     return 0;
401 }
402 
mf_receive_sample(AVCodecContext * avctx,IMFSample ** out_sample)403 static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
404 {
405     MFContext *c = avctx->priv_data;
406     HRESULT hr;
407     DWORD st;
408     MFT_OUTPUT_DATA_BUFFER out_buffers;
409     IMFSample *sample;
410     int ret = 0;
411 
412     while (1) {
413         *out_sample = NULL;
414         sample = NULL;
415 
416         if (c->async_events) {
417             if ((ret = mf_wait_events(avctx)) < 0)
418                 return ret;
419             if (!c->async_have_output || c->draining_done) {
420                 ret = 0;
421                 break;
422             }
423         }
424 
425         if (!c->out_stream_provides_samples) {
426             sample = ff_create_memory_sample(NULL, c->out_info.cbSize, c->out_info.cbAlignment);
427             if (!sample)
428                 return AVERROR(ENOMEM);
429         }
430 
431         out_buffers = (MFT_OUTPUT_DATA_BUFFER) {
432             .dwStreamID = c->out_stream_id,
433             .pSample = sample,
434         };
435 
436         st = 0;
437         hr = IMFTransform_ProcessOutput(c->mft, 0, 1, &out_buffers, &st);
438 
439         if (out_buffers.pEvents)
440             IMFCollection_Release(out_buffers.pEvents);
441 
442         if (!FAILED(hr)) {
443             *out_sample = out_buffers.pSample;
444             ret = 0;
445             break;
446         }
447 
448         if (out_buffers.pSample)
449             IMFSample_Release(out_buffers.pSample);
450 
451         if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
452             if (c->draining)
453                 c->draining_done = 1;
454             ret = 0;
455         } else if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
456             av_log(avctx, AV_LOG_WARNING, "stream format change\n");
457             ret = mf_choose_output_type(avctx);
458             if (ret == 0) // we don't expect renegotiating the input type
459                 ret = AVERROR_EXTERNAL;
460             if (ret > 0) {
461                 ret = mf_setup_context(avctx);
462                 if (ret >= 0) {
463                     c->async_have_output = 0;
464                     continue;
465                 }
466             }
467         } else {
468             av_log(avctx, AV_LOG_ERROR, "failed processing output: %s\n", ff_hr_str(hr));
469             ret = AVERROR_EXTERNAL;
470         }
471 
472         break;
473     }
474 
475     c->async_have_output = 0;
476 
477     if (ret >= 0 && !*out_sample)
478         ret = c->draining_done ? AVERROR_EOF : AVERROR(EAGAIN);
479 
480     return ret;
481 }
482 
mf_receive_packet(AVCodecContext * avctx,AVPacket * avpkt)483 static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
484 {
485     MFContext *c = avctx->priv_data;
486     IMFSample *sample = NULL;
487     int ret;
488 
489     if (!c->frame->buf[0]) {
490         ret = ff_encode_get_frame(avctx, c->frame);
491         if (ret < 0 && ret != AVERROR_EOF)
492             return ret;
493     }
494 
495     if (c->frame->buf[0]) {
496         sample = mf_avframe_to_sample(avctx, c->frame);
497         if (!sample) {
498             av_frame_unref(c->frame);
499             return AVERROR(ENOMEM);
500         }
501         if (c->is_video && c->codec_api) {
502             if (c->frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
503                 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
504         }
505     }
506 
507     ret = mf_send_sample(avctx, sample);
508     if (sample)
509         IMFSample_Release(sample);
510     if (ret != AVERROR(EAGAIN))
511         av_frame_unref(c->frame);
512     if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
513         return ret;
514 
515     ret = mf_receive_sample(avctx, &sample);
516     if (ret < 0)
517         return ret;
518 
519     ret = mf_sample_to_avpacket(avctx, sample, avpkt);
520     IMFSample_Release(sample);
521 
522     return ret;
523 }
524 
525 // Most encoders seem to enumerate supported audio formats on the output types,
526 // at least as far as channel configuration and sample rate is concerned. Pick
527 // the one which seems to match best.
mf_enca_output_score(AVCodecContext * avctx,IMFMediaType * type)528 static int64_t mf_enca_output_score(AVCodecContext *avctx, IMFMediaType *type)
529 {
530     MFContext *c = avctx->priv_data;
531     HRESULT hr;
532     UINT32 t;
533     GUID tg;
534     int64_t score = 0;
535 
536     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
537     if (!FAILED(hr) && t == avctx->sample_rate)
538         score |= 1LL << 32;
539 
540     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
541     if (!FAILED(hr) && t == avctx->channels)
542         score |= 2LL << 32;
543 
544     hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
545     if (!FAILED(hr)) {
546         if (IsEqualGUID(&c->main_subtype, &tg))
547             score |= 4LL << 32;
548     }
549 
550     // Select the bitrate (lowest priority).
551     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &t);
552     if (!FAILED(hr)) {
553         int diff = (int)t - avctx->bit_rate / 8;
554         if (diff >= 0) {
555             score |= (1LL << 31) - diff; // prefer lower bitrate
556         } else {
557             score |= (1LL << 30) + diff; // prefer higher bitrate
558         }
559     }
560 
561     hr = IMFAttributes_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &t);
562     if (!FAILED(hr) && t != 0)
563         return -1;
564 
565     return score;
566 }
567 
mf_enca_output_adjust(AVCodecContext * avctx,IMFMediaType * type)568 static int mf_enca_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
569 {
570     // (some decoders allow adjusting this freely, but it can also cause failure
571     //  to set the output type - so it's commented for being too fragile)
572     //IMFAttributes_SetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, avctx->bit_rate / 8);
573     //IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
574 
575     return 0;
576 }
577 
mf_enca_input_score(AVCodecContext * avctx,IMFMediaType * type)578 static int64_t mf_enca_input_score(AVCodecContext *avctx, IMFMediaType *type)
579 {
580     HRESULT hr;
581     UINT32 t;
582     int64_t score = 0;
583 
584     enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
585     if (sformat == AV_SAMPLE_FMT_NONE)
586         return -1; // can not use
587 
588     if (sformat == avctx->sample_fmt)
589         score |= 1;
590 
591     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
592     if (!FAILED(hr) && t == avctx->sample_rate)
593         score |= 2;
594 
595     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
596     if (!FAILED(hr) && t == avctx->channels)
597         score |= 4;
598 
599     return score;
600 }
601 
mf_enca_input_adjust(AVCodecContext * avctx,IMFMediaType * type)602 static int mf_enca_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
603 {
604     HRESULT hr;
605     UINT32 t;
606 
607     enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
608     if (sformat != avctx->sample_fmt) {
609         av_log(avctx, AV_LOG_ERROR, "unsupported input sample format set\n");
610         return AVERROR(EINVAL);
611     }
612 
613     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
614     if (FAILED(hr) || t != avctx->sample_rate) {
615         av_log(avctx, AV_LOG_ERROR, "unsupported input sample rate set\n");
616         return AVERROR(EINVAL);
617     }
618 
619     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
620     if (FAILED(hr) || t != avctx->channels) {
621         av_log(avctx, AV_LOG_ERROR, "unsupported input channel number set\n");
622         return AVERROR(EINVAL);
623     }
624 
625     return 0;
626 }
627 
mf_encv_output_score(AVCodecContext * avctx,IMFMediaType * type)628 static int64_t mf_encv_output_score(AVCodecContext *avctx, IMFMediaType *type)
629 {
630     MFContext *c = avctx->priv_data;
631     GUID tg;
632     HRESULT hr;
633     int score = -1;
634 
635     hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
636     if (!FAILED(hr)) {
637         if (IsEqualGUID(&c->main_subtype, &tg))
638             score = 1;
639     }
640 
641     return score;
642 }
643 
mf_encv_output_adjust(AVCodecContext * avctx,IMFMediaType * type)644 static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
645 {
646     MFContext *c = avctx->priv_data;
647     AVRational framerate;
648 
649     ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
650     IMFAttributes_SetUINT32(type, &MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
651 
652     if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
653         framerate = avctx->framerate;
654     } else {
655         framerate = av_inv_q(avctx->time_base);
656         framerate.den *= avctx->ticks_per_frame;
657     }
658 
659     ff_MFSetAttributeRatio((IMFAttributes *)type, &MF_MT_FRAME_RATE, framerate.num, framerate.den);
660 
661     // (MS HEVC supports eAVEncH265VProfile_Main_420_8 only.)
662     if (avctx->codec_id == AV_CODEC_ID_H264) {
663         UINT32 profile = ff_eAVEncH264VProfile_Base;
664         switch (avctx->profile) {
665         case FF_PROFILE_H264_MAIN:
666             profile = ff_eAVEncH264VProfile_Main;
667             break;
668         case FF_PROFILE_H264_HIGH:
669             profile = ff_eAVEncH264VProfile_High;
670             break;
671         }
672         IMFAttributes_SetUINT32(type, &MF_MT_MPEG2_PROFILE, profile);
673     }
674 
675     IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
676 
677     // Note that some of the ICodecAPI options must be set before SetOutputType.
678     if (c->codec_api) {
679         if (avctx->bit_rate)
680             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonMeanBitRate, FF_VAL_VT_UI4(avctx->bit_rate));
681 
682         if (c->opt_enc_rc >= 0)
683             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonRateControlMode, FF_VAL_VT_UI4(c->opt_enc_rc));
684 
685         if (c->opt_enc_quality >= 0)
686             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonQuality, FF_VAL_VT_UI4(c->opt_enc_quality));
687 
688         // Always set the number of b-frames. Qualcomm's HEVC encoder on SD835
689         // defaults this to 1, and that setting is buggy with many of the
690         // rate control modes. (0 or 2 b-frames works fine with most rate
691         // control modes, but 2 seems buggy with the u_vbr mode.) Setting
692         // "scenario" to "camera_record" sets it in CFR mode (where the default
693         // is VFR), which makes the encoder avoid dropping frames.
694         ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncMPVDefaultBPictureCount, FF_VAL_VT_UI4(avctx->max_b_frames));
695         avctx->has_b_frames = avctx->max_b_frames > 0;
696 
697         ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncH264CABACEnable, FF_VAL_VT_BOOL(1));
698 
699         if (c->opt_enc_scenario >= 0)
700             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVScenarioInfo, FF_VAL_VT_UI4(c->opt_enc_scenario));
701     }
702 
703     return 0;
704 }
705 
mf_encv_input_score(AVCodecContext * avctx,IMFMediaType * type)706 static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
707 {
708     enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
709     if (pix_fmt != avctx->pix_fmt)
710         return -1; // can not use
711 
712     return 0;
713 }
714 
mf_encv_input_adjust(AVCodecContext * avctx,IMFMediaType * type)715 static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
716 {
717     enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
718     if (pix_fmt != avctx->pix_fmt) {
719         av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
720         return AVERROR(EINVAL);
721     }
722 
723     //ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
724 
725     return 0;
726 }
727 
mf_choose_output_type(AVCodecContext * avctx)728 static int mf_choose_output_type(AVCodecContext *avctx)
729 {
730     MFContext *c = avctx->priv_data;
731     HRESULT hr;
732     int ret;
733     IMFMediaType *out_type = NULL;
734     int64_t out_type_score = -1;
735     int out_type_index = -1;
736     int n;
737 
738     av_log(avctx, AV_LOG_VERBOSE, "output types:\n");
739     for (n = 0; ; n++) {
740         IMFMediaType *type;
741         int64_t score = -1;
742 
743         hr = IMFTransform_GetOutputAvailableType(c->mft, c->out_stream_id, n, &type);
744         if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
745             break;
746         if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
747             av_log(avctx, AV_LOG_VERBOSE, "(need to set input type)\n");
748             ret = 0;
749             goto done;
750         }
751         if (FAILED(hr)) {
752             av_log(avctx, AV_LOG_ERROR, "error getting output type: %s\n", ff_hr_str(hr));
753             ret = AVERROR_EXTERNAL;
754             goto done;
755         }
756 
757         av_log(avctx, AV_LOG_VERBOSE, "output type %d:\n", n);
758         ff_media_type_dump(avctx, type);
759 
760         if (c->is_video) {
761             score = mf_encv_output_score(avctx, type);
762         } else if (c->is_audio) {
763             score = mf_enca_output_score(avctx, type);
764         }
765 
766         if (score > out_type_score) {
767             if (out_type)
768                 IMFMediaType_Release(out_type);
769             out_type = type;
770             out_type_score = score;
771             out_type_index = n;
772             IMFMediaType_AddRef(out_type);
773         }
774 
775         IMFMediaType_Release(type);
776     }
777 
778     if (out_type) {
779         av_log(avctx, AV_LOG_VERBOSE, "picking output type %d.\n", out_type_index);
780     } else {
781         hr = MFCreateMediaType(&out_type);
782         if (FAILED(hr)) {
783             ret = AVERROR(ENOMEM);
784             goto done;
785         }
786     }
787 
788     ret = 0;
789     if (c->is_video) {
790         ret = mf_encv_output_adjust(avctx, out_type);
791     } else if (c->is_audio) {
792         ret = mf_enca_output_adjust(avctx, out_type);
793     }
794 
795     if (ret >= 0) {
796         av_log(avctx, AV_LOG_VERBOSE, "setting output type:\n");
797         ff_media_type_dump(avctx, out_type);
798 
799         hr = IMFTransform_SetOutputType(c->mft, c->out_stream_id, out_type, 0);
800         if (!FAILED(hr)) {
801             ret = 1;
802         } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
803             av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set input type\n");
804             ret = 0;
805         } else {
806             av_log(avctx, AV_LOG_ERROR, "could not set output type (%s)\n", ff_hr_str(hr));
807             ret = AVERROR_EXTERNAL;
808         }
809     }
810 
811 done:
812     if (out_type)
813         IMFMediaType_Release(out_type);
814     return ret;
815 }
816 
mf_choose_input_type(AVCodecContext * avctx)817 static int mf_choose_input_type(AVCodecContext *avctx)
818 {
819     MFContext *c = avctx->priv_data;
820     HRESULT hr;
821     int ret;
822     IMFMediaType *in_type = NULL;
823     int64_t in_type_score = -1;
824     int in_type_index = -1;
825     int n;
826 
827     av_log(avctx, AV_LOG_VERBOSE, "input types:\n");
828     for (n = 0; ; n++) {
829         IMFMediaType *type = NULL;
830         int64_t score = -1;
831 
832         hr = IMFTransform_GetInputAvailableType(c->mft, c->in_stream_id, n, &type);
833         if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
834             break;
835         if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
836             av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 1)\n");
837             ret = 0;
838             goto done;
839         }
840         if (FAILED(hr)) {
841             av_log(avctx, AV_LOG_ERROR, "error getting input type: %s\n", ff_hr_str(hr));
842             ret = AVERROR_EXTERNAL;
843             goto done;
844         }
845 
846         av_log(avctx, AV_LOG_VERBOSE, "input type %d:\n", n);
847         ff_media_type_dump(avctx, type);
848 
849         if (c->is_video) {
850             score = mf_encv_input_score(avctx, type);
851         } else if (c->is_audio) {
852             score = mf_enca_input_score(avctx, type);
853         }
854 
855         if (score > in_type_score) {
856             if (in_type)
857                 IMFMediaType_Release(in_type);
858             in_type = type;
859             in_type_score = score;
860             in_type_index = n;
861             IMFMediaType_AddRef(in_type);
862         }
863 
864         IMFMediaType_Release(type);
865     }
866 
867     if (in_type) {
868         av_log(avctx, AV_LOG_VERBOSE, "picking input type %d.\n", in_type_index);
869     } else {
870         // Some buggy MFTs (WMA encoder) fail to return MF_E_TRANSFORM_TYPE_NOT_SET.
871         av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 2)\n");
872         ret = 0;
873         goto done;
874     }
875 
876     ret = 0;
877     if (c->is_video) {
878         ret = mf_encv_input_adjust(avctx, in_type);
879     } else if (c->is_audio) {
880         ret = mf_enca_input_adjust(avctx, in_type);
881     }
882 
883     if (ret >= 0) {
884         av_log(avctx, AV_LOG_VERBOSE, "setting input type:\n");
885         ff_media_type_dump(avctx, in_type);
886 
887         hr = IMFTransform_SetInputType(c->mft, c->in_stream_id, in_type, 0);
888         if (!FAILED(hr)) {
889             ret = 1;
890         } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
891             av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set output type\n");
892             ret = 0;
893         } else {
894             av_log(avctx, AV_LOG_ERROR, "could not set input type (%s)\n", ff_hr_str(hr));
895             ret = AVERROR_EXTERNAL;
896         }
897     }
898 
899 done:
900     if (in_type)
901         IMFMediaType_Release(in_type);
902     return ret;
903 }
904 
mf_negotiate_types(AVCodecContext * avctx)905 static int mf_negotiate_types(AVCodecContext *avctx)
906 {
907     // This follows steps 1-5 on:
908     //  https://msdn.microsoft.com/en-us/library/windows/desktop/aa965264(v=vs.85).aspx
909     // If every MFT implementer does this correctly, this loop should at worst
910     // be repeated once.
911     int need_input = 1, need_output = 1;
912     int n;
913     for (n = 0; n < 2 && (need_input || need_output); n++) {
914         int ret;
915         ret = mf_choose_input_type(avctx);
916         if (ret < 0)
917             return ret;
918         need_input = ret < 1;
919         ret = mf_choose_output_type(avctx);
920         if (ret < 0)
921             return ret;
922         need_output = ret < 1;
923     }
924     if (need_input || need_output) {
925         av_log(avctx, AV_LOG_ERROR, "format negotiation failed (%d/%d)\n",
926                need_input, need_output);
927         return AVERROR_EXTERNAL;
928     }
929     return 0;
930 }
931 
mf_setup_context(AVCodecContext * avctx)932 static int mf_setup_context(AVCodecContext *avctx)
933 {
934     MFContext *c = avctx->priv_data;
935     HRESULT hr;
936     int ret;
937 
938     hr = IMFTransform_GetInputStreamInfo(c->mft, c->in_stream_id, &c->in_info);
939     if (FAILED(hr))
940         return AVERROR_EXTERNAL;
941     av_log(avctx, AV_LOG_VERBOSE, "in_info: size=%d, align=%d\n",
942            (int)c->in_info.cbSize, (int)c->in_info.cbAlignment);
943 
944     hr = IMFTransform_GetOutputStreamInfo(c->mft, c->out_stream_id, &c->out_info);
945     if (FAILED(hr))
946         return AVERROR_EXTERNAL;
947     c->out_stream_provides_samples =
948         (c->out_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) ||
949         (c->out_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES);
950     av_log(avctx, AV_LOG_VERBOSE, "out_info: size=%d, align=%d%s\n",
951            (int)c->out_info.cbSize, (int)c->out_info.cbAlignment,
952            c->out_stream_provides_samples ? " (provides samples)" : "");
953 
954     if ((ret = mf_output_type_get(avctx)) < 0)
955         return ret;
956 
957     return 0;
958 }
959 
mf_unlock_async(AVCodecContext * avctx)960 static int mf_unlock_async(AVCodecContext *avctx)
961 {
962     MFContext *c = avctx->priv_data;
963     HRESULT hr;
964     IMFAttributes *attrs;
965     UINT32 v;
966     int res = AVERROR_EXTERNAL;
967 
968     // For hw encoding we unfortunately need to use async mode, otherwise
969     // play it safe and avoid it.
970     if (!(c->is_video && c->opt_enc_hw))
971         return 0;
972 
973     hr = IMFTransform_GetAttributes(c->mft, &attrs);
974     if (FAILED(hr)) {
975         av_log(avctx, AV_LOG_ERROR, "error retrieving MFT attributes: %s\n", ff_hr_str(hr));
976         goto err;
977     }
978 
979     hr = IMFAttributes_GetUINT32(attrs, &MF_TRANSFORM_ASYNC, &v);
980     if (FAILED(hr)) {
981         av_log(avctx, AV_LOG_ERROR, "error querying async: %s\n", ff_hr_str(hr));
982         goto err;
983     }
984 
985     if (!v) {
986         av_log(avctx, AV_LOG_ERROR, "hardware MFT is not async\n");
987         goto err;
988     }
989 
990     hr = IMFAttributes_SetUINT32(attrs, &MF_TRANSFORM_ASYNC_UNLOCK, TRUE);
991     if (FAILED(hr)) {
992         av_log(avctx, AV_LOG_ERROR, "could not set async unlock: %s\n", ff_hr_str(hr));
993         goto err;
994     }
995 
996     hr = IMFTransform_QueryInterface(c->mft, &IID_IMFMediaEventGenerator, (void **)&c->async_events);
997     if (FAILED(hr)) {
998         av_log(avctx, AV_LOG_ERROR, "could not get async interface\n");
999         goto err;
1000     }
1001 
1002     res = 0;
1003 
1004 err:
1005     IMFAttributes_Release(attrs);
1006     return res;
1007 }
1008 
mf_create(void * log,IMFTransform ** mft,const AVCodec * codec,int use_hw)1009 static int mf_create(void *log, IMFTransform **mft, const AVCodec *codec, int use_hw)
1010 {
1011     int is_audio = codec->type == AVMEDIA_TYPE_AUDIO;
1012     const CLSID *subtype = ff_codec_to_mf_subtype(codec->id);
1013     MFT_REGISTER_TYPE_INFO reg = {0};
1014     GUID category;
1015     int ret;
1016 
1017     *mft = NULL;
1018 
1019     if (!subtype)
1020         return AVERROR(ENOSYS);
1021 
1022     reg.guidSubtype = *subtype;
1023 
1024     if (is_audio) {
1025         reg.guidMajorType = MFMediaType_Audio;
1026         category = MFT_CATEGORY_AUDIO_ENCODER;
1027     } else {
1028         reg.guidMajorType = MFMediaType_Video;
1029         category = MFT_CATEGORY_VIDEO_ENCODER;
1030     }
1031 
1032     if ((ret = ff_instantiate_mf(log, category, NULL, &reg, use_hw, mft)) < 0)
1033         return ret;
1034 
1035     return 0;
1036 }
1037 
mf_init(AVCodecContext * avctx)1038 static int mf_init(AVCodecContext *avctx)
1039 {
1040     MFContext *c = avctx->priv_data;
1041     HRESULT hr;
1042     int ret;
1043     const CLSID *subtype = ff_codec_to_mf_subtype(avctx->codec_id);
1044     int use_hw = 0;
1045 
1046     c->frame = av_frame_alloc();
1047     if (!c->frame)
1048         return AVERROR(ENOMEM);
1049 
1050     c->is_audio = avctx->codec_type == AVMEDIA_TYPE_AUDIO;
1051     c->is_video = !c->is_audio;
1052     c->reorder_delay = AV_NOPTS_VALUE;
1053 
1054     if (c->is_video && c->opt_enc_hw)
1055         use_hw = 1;
1056 
1057     if (!subtype)
1058         return AVERROR(ENOSYS);
1059 
1060     c->main_subtype = *subtype;
1061 
1062     if ((ret = mf_create(avctx, &c->mft, avctx->codec, use_hw)) < 0)
1063         return ret;
1064 
1065     if ((ret = mf_unlock_async(avctx)) < 0)
1066         return ret;
1067 
1068     hr = IMFTransform_QueryInterface(c->mft, &IID_ICodecAPI, (void **)&c->codec_api);
1069     if (!FAILED(hr))
1070         av_log(avctx, AV_LOG_VERBOSE, "MFT supports ICodecAPI.\n");
1071 
1072 
1073     hr = IMFTransform_GetStreamIDs(c->mft, 1, &c->in_stream_id, 1, &c->out_stream_id);
1074     if (hr == E_NOTIMPL) {
1075         c->in_stream_id = c->out_stream_id = 0;
1076     } else if (FAILED(hr)) {
1077         av_log(avctx, AV_LOG_ERROR, "could not get stream IDs (%s)\n", ff_hr_str(hr));
1078         return AVERROR_EXTERNAL;
1079     }
1080 
1081     if ((ret = mf_negotiate_types(avctx)) < 0)
1082         return ret;
1083 
1084     if ((ret = mf_setup_context(avctx)) < 0)
1085         return ret;
1086 
1087     hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
1088     if (FAILED(hr)) {
1089         av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
1090         return AVERROR_EXTERNAL;
1091     }
1092 
1093     hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
1094     if (FAILED(hr)) {
1095         av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
1096         return AVERROR_EXTERNAL;
1097     }
1098 
1099     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events &&
1100         c->is_video && !avctx->extradata) {
1101         int sleep = 10000, total = 0;
1102         av_log(avctx, AV_LOG_VERBOSE, "Awaiting extradata\n");
1103         while (total < 70*1000) {
1104             // The Qualcomm H264 encoder on SD835 doesn't provide extradata
1105             // immediately, but it becomes available soon after init (without
1106             // any waitable event). In practice, it's available after less
1107             // than 10 ms, but wait for up to 70 ms before giving up.
1108             // Some encoders (Qualcomm's HEVC encoder on SD835, some versions
1109             // of the QSV H264 encoder at least) don't provide extradata this
1110             // way at all, not even after encoding a frame - it's only
1111             // available prepended to frames.
1112             av_usleep(sleep);
1113             total += sleep;
1114             mf_output_type_get(avctx);
1115             if (avctx->extradata)
1116                 break;
1117             sleep *= 2;
1118         }
1119         av_log(avctx, AV_LOG_VERBOSE, "%s extradata in %d ms\n",
1120                avctx->extradata ? "Got" : "Didn't get", total / 1000);
1121     }
1122 
1123     return 0;
1124 }
1125 
mf_close(AVCodecContext * avctx)1126 static int mf_close(AVCodecContext *avctx)
1127 {
1128     MFContext *c = avctx->priv_data;
1129 
1130     if (c->codec_api)
1131         ICodecAPI_Release(c->codec_api);
1132 
1133     if (c->async_events)
1134         IMFMediaEventGenerator_Release(c->async_events);
1135 
1136     ff_free_mf(&c->mft);
1137 
1138     av_frame_free(&c->frame);
1139 
1140     av_freep(&avctx->extradata);
1141     avctx->extradata_size = 0;
1142 
1143     return 0;
1144 }
1145 
1146 #define OFFSET(x) offsetof(MFContext, x)
1147 
1148 #define MF_ENCODER(MEDIATYPE, NAME, ID, OPTS, EXTRA) \
1149     static const AVClass ff_ ## NAME ## _mf_encoder_class = {                  \
1150         .class_name = #NAME "_mf",                                             \
1151         .item_name  = av_default_item_name,                                    \
1152         .option     = OPTS,                                                    \
1153         .version    = LIBAVUTIL_VERSION_INT,                                   \
1154     };                                                                         \
1155     AVCodec ff_ ## NAME ## _mf_encoder = {                                     \
1156         .priv_class     = &ff_ ## NAME ## _mf_encoder_class,                   \
1157         .name           = #NAME "_mf",                                         \
1158         .long_name      = NULL_IF_CONFIG_SMALL(#ID " via MediaFoundation"),    \
1159         .type           = AVMEDIA_TYPE_ ## MEDIATYPE,                          \
1160         .id             = AV_CODEC_ID_ ## ID,                                  \
1161         .priv_data_size = sizeof(MFContext),                                   \
1162         .init           = mf_init,                                             \
1163         .close          = mf_close,                                            \
1164         .receive_packet = mf_receive_packet,                                   \
1165         EXTRA                                                                  \
1166         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID |           \
1167                           AV_CODEC_CAP_DR1,                                    \
1168         .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |                       \
1169                           FF_CODEC_CAP_INIT_CLEANUP,                           \
1170     };
1171 
1172 #define AFMTS \
1173         .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,    \
1174                                                          AV_SAMPLE_FMT_NONE },
1175 
1176 MF_ENCODER(AUDIO, aac,         AAC, NULL, AFMTS);
1177 MF_ENCODER(AUDIO, ac3,         AC3, NULL, AFMTS);
1178 MF_ENCODER(AUDIO, mp3,         MP3, NULL, AFMTS);
1179 
1180 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1181 static const AVOption venc_opts[] = {
1182     {"rate_control",  "Select rate control mode", OFFSET(opt_enc_rc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "rate_control"},
1183     { "default",      "Default mode", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "rate_control"},
1184     { "cbr",          "CBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_CBR}, 0, 0, VE, "rate_control"},
1185     { "pc_vbr",       "Peak constrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_PeakConstrainedVBR}, 0, 0, VE, "rate_control"},
1186     { "u_vbr",        "Unconstrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_UnconstrainedVBR}, 0, 0, VE, "rate_control"},
1187     { "quality",      "Quality mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_Quality}, 0, 0, VE, "rate_control" },
1188     // The following rate_control modes require Windows 8.
1189     { "ld_vbr",       "Low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_LowDelayVBR}, 0, 0, VE, "rate_control"},
1190     { "g_vbr",        "Global VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalVBR}, 0, 0, VE, "rate_control" },
1191     { "gld_vbr",      "Global low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalLowDelayVBR}, 0, 0, VE, "rate_control"},
1192 
1193     {"scenario",          "Select usage scenario", OFFSET(opt_enc_scenario), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "scenario"},
1194     { "default",          "Default scenario", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "scenario"},
1195     { "display_remoting", "Display remoting", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemoting}, 0, 0, VE, "scenario"},
1196     { "video_conference", "Video conference", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_VideoConference}, 0, 0, VE, "scenario"},
1197     { "archive",          "Archive", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_Archive}, 0, 0, VE, "scenario"},
1198     { "live_streaming",   "Live streaming", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_LiveStreaming}, 0, 0, VE, "scenario"},
1199     { "camera_record",    "Camera record", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_CameraRecord}, 0, 0, VE, "scenario"},
1200     { "display_remoting_with_feature_map", "Display remoting with feature map", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemotingWithFeatureMap}, 0, 0, VE, "scenario"},
1201 
1202     {"quality",       "Quality", OFFSET(opt_enc_quality), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
1203     {"hw_encoding",   "Force hardware encoding", OFFSET(opt_enc_hw), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, VE},
1204     {NULL}
1205 };
1206 
1207 #define VFMTS \
1208         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,       \
1209                                                         AV_PIX_FMT_YUV420P,    \
1210                                                         AV_PIX_FMT_NONE },
1211 
1212 MF_ENCODER(VIDEO, h264,        H264, venc_opts, VFMTS);
1213 MF_ENCODER(VIDEO, hevc,        HEVC, venc_opts, VFMTS);
1214