• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #define COBJMACROS
20 #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
21 #undef _WIN32_WINNT
22 #define _WIN32_WINNT 0x0602
23 #endif
24 
25 #include "encode.h"
26 #include "mf_utils.h"
27 #include "libavutil/imgutils.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/time.h"
30 #include "codec_internal.h"
31 #include "internal.h"
32 #include "compat/w32dlfcn.h"
33 
34 typedef struct MFContext {
35     AVClass *av_class;
36     HMODULE library;
37     MFFunctions functions;
38     AVFrame *frame;
39     int is_video, is_audio;
40     GUID main_subtype;
41     IMFTransform *mft;
42     IMFMediaEventGenerator *async_events;
43     DWORD in_stream_id, out_stream_id;
44     MFT_INPUT_STREAM_INFO in_info;
45     MFT_OUTPUT_STREAM_INFO out_info;
46     int out_stream_provides_samples;
47     int draining, draining_done;
48     int sample_sent;
49     int async_need_input, async_have_output, async_marker;
50     int64_t reorder_delay;
51     ICodecAPI *codec_api;
52     // set by AVOption
53     int opt_enc_rc;
54     int opt_enc_quality;
55     int opt_enc_scenario;
56     int opt_enc_hw;
57 } MFContext;
58 
59 static int mf_choose_output_type(AVCodecContext *avctx);
60 static int mf_setup_context(AVCodecContext *avctx);
61 
62 #define MF_TIMEBASE (AVRational){1, 10000000}
63 // Sentinel value only used by us.
64 #define MF_INVALID_TIME AV_NOPTS_VALUE
65 
mf_wait_events(AVCodecContext * avctx)66 static int mf_wait_events(AVCodecContext *avctx)
67 {
68     MFContext *c = avctx->priv_data;
69 
70     if (!c->async_events)
71         return 0;
72 
73     while (!(c->async_need_input || c->async_have_output || c->draining_done || c->async_marker)) {
74         IMFMediaEvent *ev = NULL;
75         MediaEventType ev_id = 0;
76         HRESULT hr = IMFMediaEventGenerator_GetEvent(c->async_events, 0, &ev);
77         if (FAILED(hr)) {
78             av_log(avctx, AV_LOG_ERROR, "IMFMediaEventGenerator_GetEvent() failed: %s\n",
79                    ff_hr_str(hr));
80             return AVERROR_EXTERNAL;
81         }
82         IMFMediaEvent_GetType(ev, &ev_id);
83         switch (ev_id) {
84         case ff_METransformNeedInput:
85             if (!c->draining)
86                 c->async_need_input = 1;
87             break;
88         case ff_METransformHaveOutput:
89             c->async_have_output = 1;
90             break;
91         case ff_METransformDrainComplete:
92             c->draining_done = 1;
93             break;
94         case ff_METransformMarker:
95             c->async_marker = 1;
96             break;
97         default: ;
98         }
99         IMFMediaEvent_Release(ev);
100     }
101 
102     return 0;
103 }
104 
mf_get_tb(AVCodecContext * avctx)105 static AVRational mf_get_tb(AVCodecContext *avctx)
106 {
107     if (avctx->time_base.num > 0 && avctx->time_base.den > 0)
108         return avctx->time_base;
109     return MF_TIMEBASE;
110 }
111 
mf_to_mf_time(AVCodecContext * avctx,int64_t av_pts)112 static LONGLONG mf_to_mf_time(AVCodecContext *avctx, int64_t av_pts)
113 {
114     if (av_pts == AV_NOPTS_VALUE)
115         return MF_INVALID_TIME;
116     return av_rescale_q(av_pts, mf_get_tb(avctx), MF_TIMEBASE);
117 }
118 
mf_sample_set_pts(AVCodecContext * avctx,IMFSample * sample,int64_t av_pts)119 static void mf_sample_set_pts(AVCodecContext *avctx, IMFSample *sample, int64_t av_pts)
120 {
121     LONGLONG stime = mf_to_mf_time(avctx, av_pts);
122     if (stime != MF_INVALID_TIME)
123         IMFSample_SetSampleTime(sample, stime);
124 }
125 
mf_from_mf_time(AVCodecContext * avctx,LONGLONG stime)126 static int64_t mf_from_mf_time(AVCodecContext *avctx, LONGLONG stime)
127 {
128     return av_rescale_q(stime, MF_TIMEBASE, mf_get_tb(avctx));
129 }
130 
mf_sample_get_pts(AVCodecContext * avctx,IMFSample * sample)131 static int64_t mf_sample_get_pts(AVCodecContext *avctx, IMFSample *sample)
132 {
133     LONGLONG pts;
134     HRESULT hr = IMFSample_GetSampleTime(sample, &pts);
135     if (FAILED(hr))
136         return AV_NOPTS_VALUE;
137     return mf_from_mf_time(avctx, pts);
138 }
139 
mf_enca_output_type_get(AVCodecContext * avctx,IMFMediaType * type)140 static int mf_enca_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
141 {
142     MFContext *c = avctx->priv_data;
143     HRESULT hr;
144     UINT32 sz;
145 
146     if (avctx->codec_id != AV_CODEC_ID_MP3 && avctx->codec_id != AV_CODEC_ID_AC3) {
147         hr = IMFAttributes_GetBlobSize(type, &MF_MT_USER_DATA, &sz);
148         if (!FAILED(hr) && sz > 0) {
149             avctx->extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
150             if (!avctx->extradata)
151                 return AVERROR(ENOMEM);
152             avctx->extradata_size = sz;
153             hr = IMFAttributes_GetBlob(type, &MF_MT_USER_DATA, avctx->extradata, sz, NULL);
154             if (FAILED(hr))
155                 return AVERROR_EXTERNAL;
156 
157             if (avctx->codec_id == AV_CODEC_ID_AAC && avctx->extradata_size >= 12) {
158                 // Get rid of HEAACWAVEINFO (after wfx field, 12 bytes).
159                 avctx->extradata_size = avctx->extradata_size - 12;
160                 memmove(avctx->extradata, avctx->extradata + 12, avctx->extradata_size);
161             }
162         }
163     }
164 
165     // I don't know where it's documented that we need this. It happens with the
166     // MS mp3 encoder MFT. The idea for the workaround is taken from NAudio.
167     // (Certainly any lossy codec will have frames much smaller than 1 second.)
168     if (!c->out_info.cbSize && !c->out_stream_provides_samples) {
169         hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &sz);
170         if (!FAILED(hr)) {
171             av_log(avctx, AV_LOG_VERBOSE, "MFT_OUTPUT_STREAM_INFO.cbSize set to 0, "
172                    "assuming %d bytes instead.\n", (int)sz);
173             c->out_info.cbSize = sz;
174         }
175     }
176 
177     return 0;
178 }
179 
mf_encv_output_type_get(AVCodecContext * avctx,IMFMediaType * type)180 static int mf_encv_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
181 {
182     HRESULT hr;
183     UINT32 sz;
184 
185     hr = IMFAttributes_GetBlobSize(type, &MF_MT_MPEG_SEQUENCE_HEADER, &sz);
186     if (!FAILED(hr) && sz > 0) {
187         uint8_t *extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
188         if (!extradata)
189             return AVERROR(ENOMEM);
190         hr = IMFAttributes_GetBlob(type, &MF_MT_MPEG_SEQUENCE_HEADER, extradata, sz, NULL);
191         if (FAILED(hr)) {
192             av_free(extradata);
193             return AVERROR_EXTERNAL;
194         }
195         av_freep(&avctx->extradata);
196         avctx->extradata = extradata;
197         avctx->extradata_size = sz;
198     }
199 
200     return 0;
201 }
202 
mf_output_type_get(AVCodecContext * avctx)203 static int mf_output_type_get(AVCodecContext *avctx)
204 {
205     MFContext *c = avctx->priv_data;
206     HRESULT hr;
207     IMFMediaType *type;
208     int ret;
209 
210     hr = IMFTransform_GetOutputCurrentType(c->mft, c->out_stream_id, &type);
211     if (FAILED(hr)) {
212         av_log(avctx, AV_LOG_ERROR, "could not get output type\n");
213         return AVERROR_EXTERNAL;
214     }
215 
216     av_log(avctx, AV_LOG_VERBOSE, "final output type:\n");
217     ff_media_type_dump(avctx, type);
218 
219     ret = 0;
220     if (c->is_video) {
221         ret = mf_encv_output_type_get(avctx, type);
222     } else if (c->is_audio) {
223         ret = mf_enca_output_type_get(avctx, type);
224     }
225 
226     if (ret < 0)
227         av_log(avctx, AV_LOG_ERROR, "output type not supported\n");
228 
229     IMFMediaType_Release(type);
230     return ret;
231 }
232 
mf_sample_to_avpacket(AVCodecContext * avctx,IMFSample * sample,AVPacket * avpkt)233 static int mf_sample_to_avpacket(AVCodecContext *avctx, IMFSample *sample, AVPacket *avpkt)
234 {
235     MFContext *c = avctx->priv_data;
236     HRESULT hr;
237     int ret;
238     DWORD len;
239     IMFMediaBuffer *buffer;
240     BYTE *data;
241     UINT64 t;
242     UINT32 t32;
243 
244     hr = IMFSample_GetTotalLength(sample, &len);
245     if (FAILED(hr))
246         return AVERROR_EXTERNAL;
247 
248     if ((ret = ff_get_encode_buffer(avctx, avpkt, len, 0)) < 0)
249         return ret;
250 
251     IMFSample_ConvertToContiguousBuffer(sample, &buffer);
252     if (FAILED(hr))
253         return AVERROR_EXTERNAL;
254 
255     hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
256     if (FAILED(hr)) {
257         IMFMediaBuffer_Release(buffer);
258         return AVERROR_EXTERNAL;
259     }
260 
261     memcpy(avpkt->data, data, len);
262 
263     IMFMediaBuffer_Unlock(buffer);
264     IMFMediaBuffer_Release(buffer);
265 
266     avpkt->pts = avpkt->dts = mf_sample_get_pts(avctx, sample);
267 
268     hr = IMFAttributes_GetUINT32(sample, &MFSampleExtension_CleanPoint, &t32);
269     if (c->is_audio || (!FAILED(hr) && t32 != 0))
270         avpkt->flags |= AV_PKT_FLAG_KEY;
271 
272     hr = IMFAttributes_GetUINT64(sample, &MFSampleExtension_DecodeTimestamp, &t);
273     if (!FAILED(hr)) {
274         avpkt->dts = mf_from_mf_time(avctx, t);
275         // At least on Qualcomm's HEVC encoder on SD 835, the output dts
276         // starts from the input pts of the first frame, while the output pts
277         // is shifted forward. Therefore, shift the output values back so that
278         // the output pts matches the input.
279         if (c->reorder_delay == AV_NOPTS_VALUE)
280             c->reorder_delay = avpkt->pts - avpkt->dts;
281         avpkt->dts -= c->reorder_delay;
282         avpkt->pts -= c->reorder_delay;
283     }
284 
285     return 0;
286 }
287 
mf_a_avframe_to_sample(AVCodecContext * avctx,const AVFrame * frame)288 static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
289 {
290     MFContext *c = avctx->priv_data;
291     size_t len;
292     size_t bps;
293     IMFSample *sample;
294 
295     bps = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->ch_layout.nb_channels;
296     len = frame->nb_samples * bps;
297 
298     sample = ff_create_memory_sample(&c->functions, frame->data[0], len,
299                                      c->in_info.cbAlignment);
300     if (sample)
301         IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->nb_samples));
302     return sample;
303 }
304 
mf_v_avframe_to_sample(AVCodecContext * avctx,const AVFrame * frame)305 static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
306 {
307     MFContext *c = avctx->priv_data;
308     IMFSample *sample;
309     IMFMediaBuffer *buffer;
310     BYTE *data;
311     HRESULT hr;
312     int ret;
313     int size;
314 
315     size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
316     if (size < 0)
317         return NULL;
318 
319     sample = ff_create_memory_sample(&c->functions, NULL, size,
320                                      c->in_info.cbAlignment);
321     if (!sample)
322         return NULL;
323 
324     hr = IMFSample_GetBufferByIndex(sample, 0, &buffer);
325     if (FAILED(hr)) {
326         IMFSample_Release(sample);
327         return NULL;
328     }
329 
330     hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
331     if (FAILED(hr)) {
332         IMFMediaBuffer_Release(buffer);
333         IMFSample_Release(sample);
334         return NULL;
335     }
336 
337     ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize,
338                                   avctx->pix_fmt, avctx->width, avctx->height, 1);
339     IMFMediaBuffer_SetCurrentLength(buffer, size);
340     IMFMediaBuffer_Unlock(buffer);
341     IMFMediaBuffer_Release(buffer);
342     if (ret < 0) {
343         IMFSample_Release(sample);
344         return NULL;
345     }
346 
347     IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->pkt_duration));
348 
349     return sample;
350 }
351 
mf_avframe_to_sample(AVCodecContext * avctx,const AVFrame * frame)352 static IMFSample *mf_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
353 {
354     MFContext *c = avctx->priv_data;
355     IMFSample *sample;
356 
357     if (c->is_audio) {
358         sample = mf_a_avframe_to_sample(avctx, frame);
359     } else {
360         sample = mf_v_avframe_to_sample(avctx, frame);
361     }
362 
363     if (sample)
364         mf_sample_set_pts(avctx, sample, frame->pts);
365 
366     return sample;
367 }
368 
mf_send_sample(AVCodecContext * avctx,IMFSample * sample)369 static int mf_send_sample(AVCodecContext *avctx, IMFSample *sample)
370 {
371     MFContext *c = avctx->priv_data;
372     HRESULT hr;
373     int ret;
374 
375     if (sample) {
376         if (c->async_events) {
377             if ((ret = mf_wait_events(avctx)) < 0)
378                 return ret;
379             if (!c->async_need_input)
380                 return AVERROR(EAGAIN);
381         }
382         if (!c->sample_sent)
383             IMFSample_SetUINT32(sample, &MFSampleExtension_Discontinuity, TRUE);
384         c->sample_sent = 1;
385         hr = IMFTransform_ProcessInput(c->mft, c->in_stream_id, sample, 0);
386         if (hr == MF_E_NOTACCEPTING) {
387             return AVERROR(EAGAIN);
388         } else if (FAILED(hr)) {
389             av_log(avctx, AV_LOG_ERROR, "failed processing input: %s\n", ff_hr_str(hr));
390             return AVERROR_EXTERNAL;
391         }
392         c->async_need_input = 0;
393     } else if (!c->draining) {
394         hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_COMMAND_DRAIN, 0);
395         if (FAILED(hr))
396             av_log(avctx, AV_LOG_ERROR, "failed draining: %s\n", ff_hr_str(hr));
397         // Some MFTs (AC3) will send a frame after each drain command (???), so
398         // this is required to make draining actually terminate.
399         c->draining = 1;
400         c->async_need_input = 0;
401     } else {
402         return AVERROR_EOF;
403     }
404     return 0;
405 }
406 
mf_receive_sample(AVCodecContext * avctx,IMFSample ** out_sample)407 static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
408 {
409     MFContext *c = avctx->priv_data;
410     HRESULT hr;
411     DWORD st;
412     MFT_OUTPUT_DATA_BUFFER out_buffers;
413     IMFSample *sample;
414     int ret = 0;
415 
416     while (1) {
417         *out_sample = NULL;
418         sample = NULL;
419 
420         if (c->async_events) {
421             if ((ret = mf_wait_events(avctx)) < 0)
422                 return ret;
423             if (!c->async_have_output || c->draining_done) {
424                 ret = 0;
425                 break;
426             }
427         }
428 
429         if (!c->out_stream_provides_samples) {
430             sample = ff_create_memory_sample(&c->functions, NULL,
431                                              c->out_info.cbSize,
432                                              c->out_info.cbAlignment);
433             if (!sample)
434                 return AVERROR(ENOMEM);
435         }
436 
437         out_buffers = (MFT_OUTPUT_DATA_BUFFER) {
438             .dwStreamID = c->out_stream_id,
439             .pSample = sample,
440         };
441 
442         st = 0;
443         hr = IMFTransform_ProcessOutput(c->mft, 0, 1, &out_buffers, &st);
444 
445         if (out_buffers.pEvents)
446             IMFCollection_Release(out_buffers.pEvents);
447 
448         if (!FAILED(hr)) {
449             *out_sample = out_buffers.pSample;
450             ret = 0;
451             break;
452         }
453 
454         if (out_buffers.pSample)
455             IMFSample_Release(out_buffers.pSample);
456 
457         if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
458             if (c->draining)
459                 c->draining_done = 1;
460             ret = 0;
461         } else if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
462             av_log(avctx, AV_LOG_WARNING, "stream format change\n");
463             ret = mf_choose_output_type(avctx);
464             if (ret == 0) // we don't expect renegotiating the input type
465                 ret = AVERROR_EXTERNAL;
466             if (ret > 0) {
467                 ret = mf_setup_context(avctx);
468                 if (ret >= 0) {
469                     c->async_have_output = 0;
470                     continue;
471                 }
472             }
473         } else {
474             av_log(avctx, AV_LOG_ERROR, "failed processing output: %s\n", ff_hr_str(hr));
475             ret = AVERROR_EXTERNAL;
476         }
477 
478         break;
479     }
480 
481     c->async_have_output = 0;
482 
483     if (ret >= 0 && !*out_sample)
484         ret = c->draining_done ? AVERROR_EOF : AVERROR(EAGAIN);
485 
486     return ret;
487 }
488 
mf_receive_packet(AVCodecContext * avctx,AVPacket * avpkt)489 static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
490 {
491     MFContext *c = avctx->priv_data;
492     IMFSample *sample = NULL;
493     int ret;
494 
495     if (!c->frame->buf[0]) {
496         ret = ff_encode_get_frame(avctx, c->frame);
497         if (ret < 0 && ret != AVERROR_EOF)
498             return ret;
499     }
500 
501     if (c->frame->buf[0]) {
502         sample = mf_avframe_to_sample(avctx, c->frame);
503         if (!sample) {
504             av_frame_unref(c->frame);
505             return AVERROR(ENOMEM);
506         }
507         if (c->is_video && c->codec_api) {
508             if (c->frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
509                 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
510         }
511     }
512 
513     ret = mf_send_sample(avctx, sample);
514     if (sample)
515         IMFSample_Release(sample);
516     if (ret != AVERROR(EAGAIN))
517         av_frame_unref(c->frame);
518     if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
519         return ret;
520 
521     ret = mf_receive_sample(avctx, &sample);
522     if (ret < 0)
523         return ret;
524 
525     ret = mf_sample_to_avpacket(avctx, sample, avpkt);
526     IMFSample_Release(sample);
527 
528     return ret;
529 }
530 
531 // Most encoders seem to enumerate supported audio formats on the output types,
532 // at least as far as channel configuration and sample rate is concerned. Pick
533 // the one which seems to match best.
mf_enca_output_score(AVCodecContext * avctx,IMFMediaType * type)534 static int64_t mf_enca_output_score(AVCodecContext *avctx, IMFMediaType *type)
535 {
536     MFContext *c = avctx->priv_data;
537     HRESULT hr;
538     UINT32 t;
539     GUID tg;
540     int64_t score = 0;
541 
542     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
543     if (!FAILED(hr) && t == avctx->sample_rate)
544         score |= 1LL << 32;
545 
546     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
547     if (!FAILED(hr) && t == avctx->ch_layout.nb_channels)
548         score |= 2LL << 32;
549 
550     hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
551     if (!FAILED(hr)) {
552         if (IsEqualGUID(&c->main_subtype, &tg))
553             score |= 4LL << 32;
554     }
555 
556     // Select the bitrate (lowest priority).
557     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &t);
558     if (!FAILED(hr)) {
559         int diff = (int)t - avctx->bit_rate / 8;
560         if (diff >= 0) {
561             score |= (1LL << 31) - diff; // prefer lower bitrate
562         } else {
563             score |= (1LL << 30) + diff; // prefer higher bitrate
564         }
565     }
566 
567     hr = IMFAttributes_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &t);
568     if (!FAILED(hr) && t != 0)
569         return -1;
570 
571     return score;
572 }
573 
mf_enca_output_adjust(AVCodecContext * avctx,IMFMediaType * type)574 static int mf_enca_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
575 {
576     // (some decoders allow adjusting this freely, but it can also cause failure
577     //  to set the output type - so it's commented for being too fragile)
578     //IMFAttributes_SetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, avctx->bit_rate / 8);
579     //IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
580 
581     return 0;
582 }
583 
mf_enca_input_score(AVCodecContext * avctx,IMFMediaType * type)584 static int64_t mf_enca_input_score(AVCodecContext *avctx, IMFMediaType *type)
585 {
586     HRESULT hr;
587     UINT32 t;
588     int64_t score = 0;
589 
590     enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
591     if (sformat == AV_SAMPLE_FMT_NONE)
592         return -1; // can not use
593 
594     if (sformat == avctx->sample_fmt)
595         score |= 1;
596 
597     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
598     if (!FAILED(hr) && t == avctx->sample_rate)
599         score |= 2;
600 
601     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
602     if (!FAILED(hr) && t == avctx->ch_layout.nb_channels)
603         score |= 4;
604 
605     return score;
606 }
607 
mf_enca_input_adjust(AVCodecContext * avctx,IMFMediaType * type)608 static int mf_enca_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
609 {
610     HRESULT hr;
611     UINT32 t;
612 
613     enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
614     if (sformat != avctx->sample_fmt) {
615         av_log(avctx, AV_LOG_ERROR, "unsupported input sample format set\n");
616         return AVERROR(EINVAL);
617     }
618 
619     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
620     if (FAILED(hr) || t != avctx->sample_rate) {
621         av_log(avctx, AV_LOG_ERROR, "unsupported input sample rate set\n");
622         return AVERROR(EINVAL);
623     }
624 
625     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
626     if (FAILED(hr) || t != avctx->ch_layout.nb_channels) {
627         av_log(avctx, AV_LOG_ERROR, "unsupported input channel number set\n");
628         return AVERROR(EINVAL);
629     }
630 
631     return 0;
632 }
633 
mf_encv_output_score(AVCodecContext * avctx,IMFMediaType * type)634 static int64_t mf_encv_output_score(AVCodecContext *avctx, IMFMediaType *type)
635 {
636     MFContext *c = avctx->priv_data;
637     GUID tg;
638     HRESULT hr;
639     int score = -1;
640 
641     hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
642     if (!FAILED(hr)) {
643         if (IsEqualGUID(&c->main_subtype, &tg))
644             score = 1;
645     }
646 
647     return score;
648 }
649 
mf_encv_output_adjust(AVCodecContext * avctx,IMFMediaType * type)650 static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
651 {
652     MFContext *c = avctx->priv_data;
653     AVRational framerate;
654 
655     ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
656     IMFAttributes_SetUINT32(type, &MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
657 
658     if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
659         framerate = avctx->framerate;
660     } else {
661         framerate = av_inv_q(avctx->time_base);
662         framerate.den *= avctx->ticks_per_frame;
663     }
664 
665     ff_MFSetAttributeRatio((IMFAttributes *)type, &MF_MT_FRAME_RATE, framerate.num, framerate.den);
666 
667     // (MS HEVC supports eAVEncH265VProfile_Main_420_8 only.)
668     if (avctx->codec_id == AV_CODEC_ID_H264) {
669         UINT32 profile = ff_eAVEncH264VProfile_Base;
670         switch (avctx->profile) {
671         case FF_PROFILE_H264_MAIN:
672             profile = ff_eAVEncH264VProfile_Main;
673             break;
674         case FF_PROFILE_H264_HIGH:
675             profile = ff_eAVEncH264VProfile_High;
676             break;
677         }
678         IMFAttributes_SetUINT32(type, &MF_MT_MPEG2_PROFILE, profile);
679     }
680 
681     IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
682 
683     // Note that some of the ICodecAPI options must be set before SetOutputType.
684     if (c->codec_api) {
685         if (avctx->bit_rate)
686             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonMeanBitRate, FF_VAL_VT_UI4(avctx->bit_rate));
687 
688         if (c->opt_enc_rc >= 0)
689             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonRateControlMode, FF_VAL_VT_UI4(c->opt_enc_rc));
690 
691         if (c->opt_enc_quality >= 0)
692             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonQuality, FF_VAL_VT_UI4(c->opt_enc_quality));
693 
694         // Always set the number of b-frames. Qualcomm's HEVC encoder on SD835
695         // defaults this to 1, and that setting is buggy with many of the
696         // rate control modes. (0 or 2 b-frames works fine with most rate
697         // control modes, but 2 seems buggy with the u_vbr mode.) Setting
698         // "scenario" to "camera_record" sets it in CFR mode (where the default
699         // is VFR), which makes the encoder avoid dropping frames.
700         ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncMPVDefaultBPictureCount, FF_VAL_VT_UI4(avctx->max_b_frames));
701         avctx->has_b_frames = avctx->max_b_frames > 0;
702 
703         ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncH264CABACEnable, FF_VAL_VT_BOOL(1));
704 
705         if (c->opt_enc_scenario >= 0)
706             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVScenarioInfo, FF_VAL_VT_UI4(c->opt_enc_scenario));
707     }
708 
709     return 0;
710 }
711 
mf_encv_input_score(AVCodecContext * avctx,IMFMediaType * type)712 static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
713 {
714     enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
715     if (pix_fmt != avctx->pix_fmt)
716         return -1; // can not use
717 
718     return 0;
719 }
720 
mf_encv_input_adjust(AVCodecContext * avctx,IMFMediaType * type)721 static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
722 {
723     enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
724     if (pix_fmt != avctx->pix_fmt) {
725         av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
726         return AVERROR(EINVAL);
727     }
728 
729     //ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
730 
731     return 0;
732 }
733 
mf_choose_output_type(AVCodecContext * avctx)734 static int mf_choose_output_type(AVCodecContext *avctx)
735 {
736     MFContext *c = avctx->priv_data;
737     HRESULT hr;
738     int ret;
739     IMFMediaType *out_type = NULL;
740     int64_t out_type_score = -1;
741     int out_type_index = -1;
742     int n;
743 
744     av_log(avctx, AV_LOG_VERBOSE, "output types:\n");
745     for (n = 0; ; n++) {
746         IMFMediaType *type;
747         int64_t score = -1;
748 
749         hr = IMFTransform_GetOutputAvailableType(c->mft, c->out_stream_id, n, &type);
750         if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
751             break;
752         if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
753             av_log(avctx, AV_LOG_VERBOSE, "(need to set input type)\n");
754             ret = 0;
755             goto done;
756         }
757         if (FAILED(hr)) {
758             av_log(avctx, AV_LOG_ERROR, "error getting output type: %s\n", ff_hr_str(hr));
759             ret = AVERROR_EXTERNAL;
760             goto done;
761         }
762 
763         av_log(avctx, AV_LOG_VERBOSE, "output type %d:\n", n);
764         ff_media_type_dump(avctx, type);
765 
766         if (c->is_video) {
767             score = mf_encv_output_score(avctx, type);
768         } else if (c->is_audio) {
769             score = mf_enca_output_score(avctx, type);
770         }
771 
772         if (score > out_type_score) {
773             if (out_type)
774                 IMFMediaType_Release(out_type);
775             out_type = type;
776             out_type_score = score;
777             out_type_index = n;
778             IMFMediaType_AddRef(out_type);
779         }
780 
781         IMFMediaType_Release(type);
782     }
783 
784     if (out_type) {
785         av_log(avctx, AV_LOG_VERBOSE, "picking output type %d.\n", out_type_index);
786     } else {
787         hr = c->functions.MFCreateMediaType(&out_type);
788         if (FAILED(hr)) {
789             ret = AVERROR(ENOMEM);
790             goto done;
791         }
792     }
793 
794     ret = 0;
795     if (c->is_video) {
796         ret = mf_encv_output_adjust(avctx, out_type);
797     } else if (c->is_audio) {
798         ret = mf_enca_output_adjust(avctx, out_type);
799     }
800 
801     if (ret >= 0) {
802         av_log(avctx, AV_LOG_VERBOSE, "setting output type:\n");
803         ff_media_type_dump(avctx, out_type);
804 
805         hr = IMFTransform_SetOutputType(c->mft, c->out_stream_id, out_type, 0);
806         if (!FAILED(hr)) {
807             ret = 1;
808         } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
809             av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set input type\n");
810             ret = 0;
811         } else {
812             av_log(avctx, AV_LOG_ERROR, "could not set output type (%s)\n", ff_hr_str(hr));
813             ret = AVERROR_EXTERNAL;
814         }
815     }
816 
817 done:
818     if (out_type)
819         IMFMediaType_Release(out_type);
820     return ret;
821 }
822 
mf_choose_input_type(AVCodecContext * avctx)823 static int mf_choose_input_type(AVCodecContext *avctx)
824 {
825     MFContext *c = avctx->priv_data;
826     HRESULT hr;
827     int ret;
828     IMFMediaType *in_type = NULL;
829     int64_t in_type_score = -1;
830     int in_type_index = -1;
831     int n;
832 
833     av_log(avctx, AV_LOG_VERBOSE, "input types:\n");
834     for (n = 0; ; n++) {
835         IMFMediaType *type = NULL;
836         int64_t score = -1;
837 
838         hr = IMFTransform_GetInputAvailableType(c->mft, c->in_stream_id, n, &type);
839         if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
840             break;
841         if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
842             av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 1)\n");
843             ret = 0;
844             goto done;
845         }
846         if (FAILED(hr)) {
847             av_log(avctx, AV_LOG_ERROR, "error getting input type: %s\n", ff_hr_str(hr));
848             ret = AVERROR_EXTERNAL;
849             goto done;
850         }
851 
852         av_log(avctx, AV_LOG_VERBOSE, "input type %d:\n", n);
853         ff_media_type_dump(avctx, type);
854 
855         if (c->is_video) {
856             score = mf_encv_input_score(avctx, type);
857         } else if (c->is_audio) {
858             score = mf_enca_input_score(avctx, type);
859         }
860 
861         if (score > in_type_score) {
862             if (in_type)
863                 IMFMediaType_Release(in_type);
864             in_type = type;
865             in_type_score = score;
866             in_type_index = n;
867             IMFMediaType_AddRef(in_type);
868         }
869 
870         IMFMediaType_Release(type);
871     }
872 
873     if (in_type) {
874         av_log(avctx, AV_LOG_VERBOSE, "picking input type %d.\n", in_type_index);
875     } else {
876         // Some buggy MFTs (WMA encoder) fail to return MF_E_TRANSFORM_TYPE_NOT_SET.
877         av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 2)\n");
878         ret = 0;
879         goto done;
880     }
881 
882     ret = 0;
883     if (c->is_video) {
884         ret = mf_encv_input_adjust(avctx, in_type);
885     } else if (c->is_audio) {
886         ret = mf_enca_input_adjust(avctx, in_type);
887     }
888 
889     if (ret >= 0) {
890         av_log(avctx, AV_LOG_VERBOSE, "setting input type:\n");
891         ff_media_type_dump(avctx, in_type);
892 
893         hr = IMFTransform_SetInputType(c->mft, c->in_stream_id, in_type, 0);
894         if (!FAILED(hr)) {
895             ret = 1;
896         } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
897             av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set output type\n");
898             ret = 0;
899         } else {
900             av_log(avctx, AV_LOG_ERROR, "could not set input type (%s)\n", ff_hr_str(hr));
901             ret = AVERROR_EXTERNAL;
902         }
903     }
904 
905 done:
906     if (in_type)
907         IMFMediaType_Release(in_type);
908     return ret;
909 }
910 
mf_negotiate_types(AVCodecContext * avctx)911 static int mf_negotiate_types(AVCodecContext *avctx)
912 {
913     // This follows steps 1-5 on:
914     //  https://msdn.microsoft.com/en-us/library/windows/desktop/aa965264(v=vs.85).aspx
915     // If every MFT implementer does this correctly, this loop should at worst
916     // be repeated once.
917     int need_input = 1, need_output = 1;
918     int n;
919     for (n = 0; n < 2 && (need_input || need_output); n++) {
920         int ret;
921         ret = mf_choose_input_type(avctx);
922         if (ret < 0)
923             return ret;
924         need_input = ret < 1;
925         ret = mf_choose_output_type(avctx);
926         if (ret < 0)
927             return ret;
928         need_output = ret < 1;
929     }
930     if (need_input || need_output) {
931         av_log(avctx, AV_LOG_ERROR, "format negotiation failed (%d/%d)\n",
932                need_input, need_output);
933         return AVERROR_EXTERNAL;
934     }
935     return 0;
936 }
937 
mf_setup_context(AVCodecContext * avctx)938 static int mf_setup_context(AVCodecContext *avctx)
939 {
940     MFContext *c = avctx->priv_data;
941     HRESULT hr;
942     int ret;
943 
944     hr = IMFTransform_GetInputStreamInfo(c->mft, c->in_stream_id, &c->in_info);
945     if (FAILED(hr))
946         return AVERROR_EXTERNAL;
947     av_log(avctx, AV_LOG_VERBOSE, "in_info: size=%d, align=%d\n",
948            (int)c->in_info.cbSize, (int)c->in_info.cbAlignment);
949 
950     hr = IMFTransform_GetOutputStreamInfo(c->mft, c->out_stream_id, &c->out_info);
951     if (FAILED(hr))
952         return AVERROR_EXTERNAL;
953     c->out_stream_provides_samples =
954         (c->out_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) ||
955         (c->out_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES);
956     av_log(avctx, AV_LOG_VERBOSE, "out_info: size=%d, align=%d%s\n",
957            (int)c->out_info.cbSize, (int)c->out_info.cbAlignment,
958            c->out_stream_provides_samples ? " (provides samples)" : "");
959 
960     if ((ret = mf_output_type_get(avctx)) < 0)
961         return ret;
962 
963     return 0;
964 }
965 
mf_unlock_async(AVCodecContext * avctx)966 static int mf_unlock_async(AVCodecContext *avctx)
967 {
968     MFContext *c = avctx->priv_data;
969     HRESULT hr;
970     IMFAttributes *attrs;
971     UINT32 v;
972     int res = AVERROR_EXTERNAL;
973 
974     // For hw encoding we unfortunately need to use async mode, otherwise
975     // play it safe and avoid it.
976     if (!(c->is_video && c->opt_enc_hw))
977         return 0;
978 
979     hr = IMFTransform_GetAttributes(c->mft, &attrs);
980     if (FAILED(hr)) {
981         av_log(avctx, AV_LOG_ERROR, "error retrieving MFT attributes: %s\n", ff_hr_str(hr));
982         goto err;
983     }
984 
985     hr = IMFAttributes_GetUINT32(attrs, &MF_TRANSFORM_ASYNC, &v);
986     if (FAILED(hr)) {
987         av_log(avctx, AV_LOG_ERROR, "error querying async: %s\n", ff_hr_str(hr));
988         goto err;
989     }
990 
991     if (!v) {
992         av_log(avctx, AV_LOG_ERROR, "hardware MFT is not async\n");
993         goto err;
994     }
995 
996     hr = IMFAttributes_SetUINT32(attrs, &MF_TRANSFORM_ASYNC_UNLOCK, TRUE);
997     if (FAILED(hr)) {
998         av_log(avctx, AV_LOG_ERROR, "could not set async unlock: %s\n", ff_hr_str(hr));
999         goto err;
1000     }
1001 
1002     hr = IMFTransform_QueryInterface(c->mft, &IID_IMFMediaEventGenerator, (void **)&c->async_events);
1003     if (FAILED(hr)) {
1004         av_log(avctx, AV_LOG_ERROR, "could not get async interface\n");
1005         goto err;
1006     }
1007 
1008     res = 0;
1009 
1010 err:
1011     IMFAttributes_Release(attrs);
1012     return res;
1013 }
1014 
mf_create(void * log,MFFunctions * f,IMFTransform ** mft,const AVCodec * codec,int use_hw)1015 static int mf_create(void *log, MFFunctions *f, IMFTransform **mft,
1016                      const AVCodec *codec, int use_hw)
1017 {
1018     int is_audio = codec->type == AVMEDIA_TYPE_AUDIO;
1019     const CLSID *subtype = ff_codec_to_mf_subtype(codec->id);
1020     MFT_REGISTER_TYPE_INFO reg = {0};
1021     GUID category;
1022     int ret;
1023 
1024     *mft = NULL;
1025 
1026     if (!subtype)
1027         return AVERROR(ENOSYS);
1028 
1029     reg.guidSubtype = *subtype;
1030 
1031     if (is_audio) {
1032         reg.guidMajorType = MFMediaType_Audio;
1033         category = MFT_CATEGORY_AUDIO_ENCODER;
1034     } else {
1035         reg.guidMajorType = MFMediaType_Video;
1036         category = MFT_CATEGORY_VIDEO_ENCODER;
1037     }
1038 
1039     if ((ret = ff_instantiate_mf(log, f, category, NULL, &reg, use_hw, mft)) < 0)
1040         return ret;
1041 
1042     return 0;
1043 }
1044 
mf_init_encoder(AVCodecContext * avctx)1045 static int mf_init_encoder(AVCodecContext *avctx)
1046 {
1047     MFContext *c = avctx->priv_data;
1048     HRESULT hr;
1049     int ret;
1050     const CLSID *subtype = ff_codec_to_mf_subtype(avctx->codec_id);
1051     int use_hw = 0;
1052 
1053     c->frame = av_frame_alloc();
1054     if (!c->frame)
1055         return AVERROR(ENOMEM);
1056 
1057     c->is_audio = avctx->codec_type == AVMEDIA_TYPE_AUDIO;
1058     c->is_video = !c->is_audio;
1059     c->reorder_delay = AV_NOPTS_VALUE;
1060 
1061     if (c->is_video && c->opt_enc_hw)
1062         use_hw = 1;
1063 
1064     if (!subtype)
1065         return AVERROR(ENOSYS);
1066 
1067     c->main_subtype = *subtype;
1068 
1069     if ((ret = mf_create(avctx, &c->functions, &c->mft, avctx->codec, use_hw)) < 0)
1070         return ret;
1071 
1072     if ((ret = mf_unlock_async(avctx)) < 0)
1073         return ret;
1074 
1075     hr = IMFTransform_QueryInterface(c->mft, &IID_ICodecAPI, (void **)&c->codec_api);
1076     if (!FAILED(hr))
1077         av_log(avctx, AV_LOG_VERBOSE, "MFT supports ICodecAPI.\n");
1078 
1079 
1080     hr = IMFTransform_GetStreamIDs(c->mft, 1, &c->in_stream_id, 1, &c->out_stream_id);
1081     if (hr == E_NOTIMPL) {
1082         c->in_stream_id = c->out_stream_id = 0;
1083     } else if (FAILED(hr)) {
1084         av_log(avctx, AV_LOG_ERROR, "could not get stream IDs (%s)\n", ff_hr_str(hr));
1085         return AVERROR_EXTERNAL;
1086     }
1087 
1088     if ((ret = mf_negotiate_types(avctx)) < 0)
1089         return ret;
1090 
1091     if ((ret = mf_setup_context(avctx)) < 0)
1092         return ret;
1093 
1094     hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
1095     if (FAILED(hr)) {
1096         av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
1097         return AVERROR_EXTERNAL;
1098     }
1099 
1100     hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
1101     if (FAILED(hr)) {
1102         av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
1103         return AVERROR_EXTERNAL;
1104     }
1105 
1106     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events &&
1107         c->is_video && !avctx->extradata) {
1108         int sleep = 10000, total = 0;
1109         av_log(avctx, AV_LOG_VERBOSE, "Awaiting extradata\n");
1110         while (total < 70*1000) {
1111             // The Qualcomm H264 encoder on SD835 doesn't provide extradata
1112             // immediately, but it becomes available soon after init (without
1113             // any waitable event). In practice, it's available after less
1114             // than 10 ms, but wait for up to 70 ms before giving up.
1115             // Some encoders (Qualcomm's HEVC encoder on SD835, some versions
1116             // of the QSV H264 encoder at least) don't provide extradata this
1117             // way at all, not even after encoding a frame - it's only
1118             // available prepended to frames.
1119             av_usleep(sleep);
1120             total += sleep;
1121             mf_output_type_get(avctx);
1122             if (avctx->extradata)
1123                 break;
1124             sleep *= 2;
1125         }
1126         av_log(avctx, AV_LOG_VERBOSE, "%s extradata in %d ms\n",
1127                avctx->extradata ? "Got" : "Didn't get", total / 1000);
1128     }
1129 
1130     return 0;
1131 }
1132 
1133 #if !HAVE_UWP
1134 #define LOAD_MF_FUNCTION(context, func_name) \
1135     context->functions.func_name = (void *)dlsym(context->library, #func_name); \
1136     if (!context->functions.func_name) { \
1137         av_log(context, AV_LOG_ERROR, "DLL mfplat.dll failed to find function "\
1138            #func_name "\n"); \
1139         return AVERROR_UNKNOWN; \
1140     }
1141 #else
1142 // In UWP (which lacks LoadLibrary), just link directly against
1143 // the functions - this requires building with new/complete enough
1144 // import libraries.
1145 #define LOAD_MF_FUNCTION(context, func_name) \
1146     context->functions.func_name = func_name; \
1147     if (!context->functions.func_name) { \
1148         av_log(context, AV_LOG_ERROR, "Failed to find function " #func_name \
1149                "\n"); \
1150         return AVERROR_UNKNOWN; \
1151     }
1152 #endif
1153 
1154 // Windows N editions does not provide MediaFoundation by default.
1155 // So to avoid DLL loading error, MediaFoundation is dynamically loaded except
1156 // on UWP build since LoadLibrary is not available on it.
mf_load_library(AVCodecContext * avctx)1157 static int mf_load_library(AVCodecContext *avctx)
1158 {
1159     MFContext *c = avctx->priv_data;
1160 
1161 #if !HAVE_UWP
1162     c->library = dlopen("mfplat.dll", 0);
1163 
1164     if (!c->library) {
1165         av_log(c, AV_LOG_ERROR, "DLL mfplat.dll failed to open\n");
1166         return AVERROR_UNKNOWN;
1167     }
1168 #endif
1169 
1170     LOAD_MF_FUNCTION(c, MFStartup);
1171     LOAD_MF_FUNCTION(c, MFShutdown);
1172     LOAD_MF_FUNCTION(c, MFCreateAlignedMemoryBuffer);
1173     LOAD_MF_FUNCTION(c, MFCreateSample);
1174     LOAD_MF_FUNCTION(c, MFCreateMediaType);
1175     // MFTEnumEx is missing in Windows Vista's mfplat.dll.
1176     LOAD_MF_FUNCTION(c, MFTEnumEx);
1177 
1178     return 0;
1179 }
1180 
mf_close(AVCodecContext * avctx)1181 static int mf_close(AVCodecContext *avctx)
1182 {
1183     MFContext *c = avctx->priv_data;
1184 
1185     if (c->codec_api)
1186         ICodecAPI_Release(c->codec_api);
1187 
1188     if (c->async_events)
1189         IMFMediaEventGenerator_Release(c->async_events);
1190 
1191 #if !HAVE_UWP
1192     if (c->library)
1193         ff_free_mf(&c->functions, &c->mft);
1194 
1195     dlclose(c->library);
1196     c->library = NULL;
1197 #else
1198     ff_free_mf(&c->functions, &c->mft);
1199 #endif
1200 
1201     av_frame_free(&c->frame);
1202 
1203     av_freep(&avctx->extradata);
1204     avctx->extradata_size = 0;
1205 
1206     return 0;
1207 }
1208 
mf_init(AVCodecContext * avctx)1209 static int mf_init(AVCodecContext *avctx)
1210 {
1211     int ret;
1212     if ((ret = mf_load_library(avctx)) == 0) {
1213            if ((ret = mf_init_encoder(avctx)) == 0) {
1214                 return 0;
1215         }
1216     }
1217     mf_close(avctx);
1218     return ret;
1219 }
1220 
1221 #define OFFSET(x) offsetof(MFContext, x)
1222 
1223 #define MF_ENCODER(MEDIATYPE, NAME, ID, OPTS, FMTS, CAPS) \
1224     static const AVClass ff_ ## NAME ## _mf_encoder_class = {                  \
1225         .class_name = #NAME "_mf",                                             \
1226         .item_name  = av_default_item_name,                                    \
1227         .option     = OPTS,                                                    \
1228         .version    = LIBAVUTIL_VERSION_INT,                                   \
1229     };                                                                         \
1230     const FFCodec ff_ ## NAME ## _mf_encoder = {                               \
1231         .p.priv_class   = &ff_ ## NAME ## _mf_encoder_class,                   \
1232         .p.name         = #NAME "_mf",                                         \
1233         .p.long_name    = NULL_IF_CONFIG_SMALL(#ID " via MediaFoundation"),    \
1234         .p.type         = AVMEDIA_TYPE_ ## MEDIATYPE,                          \
1235         .p.id           = AV_CODEC_ID_ ## ID,                                  \
1236         .priv_data_size = sizeof(MFContext),                                   \
1237         .init           = mf_init,                                             \
1238         .close          = mf_close,                                            \
1239         FF_CODEC_RECEIVE_PACKET_CB(mf_receive_packet),                         \
1240         FMTS                                                                   \
1241         CAPS                                                                   \
1242         .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |                       \
1243                           FF_CODEC_CAP_INIT_CLEANUP,                           \
1244     };
1245 
1246 #define AFMTS \
1247         .p.sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,    \
1248                                                          AV_SAMPLE_FMT_NONE },
1249 #define ACAPS \
1250         .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID |           \
1251                           AV_CODEC_CAP_DR1 | AV_CODEC_CAP_VARIABLE_FRAME_SIZE,
1252 
1253 MF_ENCODER(AUDIO, aac,         AAC, NULL, AFMTS, ACAPS);
1254 MF_ENCODER(AUDIO, ac3,         AC3, NULL, AFMTS, ACAPS);
1255 MF_ENCODER(AUDIO, mp3,         MP3, NULL, AFMTS, ACAPS);
1256 
1257 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1258 static const AVOption venc_opts[] = {
1259     {"rate_control",  "Select rate control mode", OFFSET(opt_enc_rc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "rate_control"},
1260     { "default",      "Default mode", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "rate_control"},
1261     { "cbr",          "CBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_CBR}, 0, 0, VE, "rate_control"},
1262     { "pc_vbr",       "Peak constrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_PeakConstrainedVBR}, 0, 0, VE, "rate_control"},
1263     { "u_vbr",        "Unconstrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_UnconstrainedVBR}, 0, 0, VE, "rate_control"},
1264     { "quality",      "Quality mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_Quality}, 0, 0, VE, "rate_control" },
1265     // The following rate_control modes require Windows 8.
1266     { "ld_vbr",       "Low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_LowDelayVBR}, 0, 0, VE, "rate_control"},
1267     { "g_vbr",        "Global VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalVBR}, 0, 0, VE, "rate_control" },
1268     { "gld_vbr",      "Global low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalLowDelayVBR}, 0, 0, VE, "rate_control"},
1269 
1270     {"scenario",          "Select usage scenario", OFFSET(opt_enc_scenario), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "scenario"},
1271     { "default",          "Default scenario", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "scenario"},
1272     { "display_remoting", "Display remoting", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemoting}, 0, 0, VE, "scenario"},
1273     { "video_conference", "Video conference", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_VideoConference}, 0, 0, VE, "scenario"},
1274     { "archive",          "Archive", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_Archive}, 0, 0, VE, "scenario"},
1275     { "live_streaming",   "Live streaming", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_LiveStreaming}, 0, 0, VE, "scenario"},
1276     { "camera_record",    "Camera record", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_CameraRecord}, 0, 0, VE, "scenario"},
1277     { "display_remoting_with_feature_map", "Display remoting with feature map", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemotingWithFeatureMap}, 0, 0, VE, "scenario"},
1278 
1279     {"quality",       "Quality", OFFSET(opt_enc_quality), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
1280     {"hw_encoding",   "Force hardware encoding", OFFSET(opt_enc_hw), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, VE},
1281     {NULL}
1282 };
1283 
1284 #define VFMTS \
1285         .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,       \
1286                                                         AV_PIX_FMT_YUV420P,    \
1287                                                         AV_PIX_FMT_NONE },
1288 #define VCAPS \
1289         .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID |           \
1290                           AV_CODEC_CAP_DR1,
1291 
1292 MF_ENCODER(VIDEO, h264,        H264, venc_opts, VFMTS, VCAPS);
1293 MF_ENCODER(VIDEO, hevc,        HEVC, venc_opts, VFMTS, VCAPS);
1294