1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "media/audio/win/audio_unified_win.h"
6
7 #include <Functiondiscoverykeys_devpkey.h>
8
9 #include "base/debug/trace_event.h"
10 #ifndef NDEBUG
11 #include "base/file_util.h"
12 #include "base/path_service.h"
13 #endif
14 #include "base/time/time.h"
15 #include "base/win/scoped_com_initializer.h"
16 #include "media/audio/win/audio_manager_win.h"
17 #include "media/audio/win/avrt_wrapper_win.h"
18 #include "media/audio/win/core_audio_util_win.h"
19
20 using base::win::ScopedComPtr;
21 using base::win::ScopedCOMInitializer;
22 using base::win::ScopedCoMem;
23
24 // Smoothing factor in exponential smoothing filter where 0 < alpha < 1.
25 // Larger values of alpha reduce the level of smoothing.
26 // See http://en.wikipedia.org/wiki/Exponential_smoothing for details.
27 static const double kAlpha = 0.1;
28
29 // Compute a rate compensation which always attracts us back to a specified
30 // target level over a period of |kCorrectionTimeSeconds|.
31 static const double kCorrectionTimeSeconds = 0.1;
32
33 #ifndef NDEBUG
34 // Max number of columns in the output text file |kUnifiedAudioDebugFileName|.
35 // See LogElementNames enumerator for details on what each column represents.
36 static const size_t kMaxNumSampleTypes = 4;
37
38 static const size_t kMaxNumParams = 2;
39
40 // Max number of rows in the output file |kUnifiedAudioDebugFileName|.
41 // Each row corresponds to one set of sample values for (approximately) the
42 // same time instant (stored in the first column).
43 static const size_t kMaxFileSamples = 10000;
44
45 // Name of output debug file used for off-line analysis of measurements which
46 // can be utilized for performance tuning of this class.
47 static const char kUnifiedAudioDebugFileName[] = "unified_win_debug.txt";
48
49 // Name of output debug file used for off-line analysis of measurements.
50 // This file will contain a list of audio parameters.
51 static const char kUnifiedAudioParamsFileName[] = "unified_win_params.txt";
52 #endif
53
54 // Use the acquired IAudioClock interface to derive a time stamp of the audio
55 // sample which is currently playing through the speakers.
SpeakerStreamPosInMilliseconds(IAudioClock * clock)56 static double SpeakerStreamPosInMilliseconds(IAudioClock* clock) {
57 UINT64 device_frequency = 0, position = 0;
58 if (FAILED(clock->GetFrequency(&device_frequency)) ||
59 FAILED(clock->GetPosition(&position, NULL))) {
60 return 0.0;
61 }
62 return base::Time::kMillisecondsPerSecond *
63 (static_cast<double>(position) / device_frequency);
64 }
65
66 // Get a time stamp in milliseconds given number of audio frames in |num_frames|
67 // using the current sample rate |fs| as scale factor.
68 // Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms].
CurrentStreamPosInMilliseconds(UINT64 num_frames,DWORD fs)69 static double CurrentStreamPosInMilliseconds(UINT64 num_frames, DWORD fs) {
70 return base::Time::kMillisecondsPerSecond *
71 (static_cast<double>(num_frames) / fs);
72 }
73
74 // Convert a timestamp in milliseconds to byte units given the audio format
75 // in |format|.
76 // Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size
77 // is 4 bytes per audio frame => 480 * 4 = 1920 [bytes].
MillisecondsToBytes(double ts_milliseconds,const WAVEFORMATPCMEX & format)78 static int MillisecondsToBytes(double ts_milliseconds,
79 const WAVEFORMATPCMEX& format) {
80 double seconds = ts_milliseconds / base::Time::kMillisecondsPerSecond;
81 return static_cast<int>(seconds * format.Format.nSamplesPerSec *
82 format.Format.nBlockAlign + 0.5);
83 }
84
85 // Convert frame count to milliseconds given the audio format in |format|.
FrameCountToMilliseconds(int num_frames,const WAVEFORMATPCMEX & format)86 static double FrameCountToMilliseconds(int num_frames,
87 const WAVEFORMATPCMEX& format) {
88 return (base::Time::kMillisecondsPerSecond * num_frames) /
89 static_cast<double>(format.Format.nSamplesPerSec);
90 }
91
92 namespace media {
93
WASAPIUnifiedStream(AudioManagerWin * manager,const AudioParameters & params,const std::string & input_device_id)94 WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin* manager,
95 const AudioParameters& params,
96 const std::string& input_device_id)
97 : creating_thread_id_(base::PlatformThread::CurrentId()),
98 manager_(manager),
99 params_(params),
100 input_channels_(params.input_channels()),
101 output_channels_(params.channels()),
102 input_device_id_(input_device_id),
103 share_mode_(CoreAudioUtil::GetShareMode()),
104 opened_(false),
105 volume_(1.0),
106 output_buffer_size_frames_(0),
107 input_buffer_size_frames_(0),
108 endpoint_render_buffer_size_frames_(0),
109 endpoint_capture_buffer_size_frames_(0),
110 num_written_frames_(0),
111 total_delay_ms_(0.0),
112 total_delay_bytes_(0),
113 source_(NULL),
114 input_callback_received_(false),
115 io_sample_rate_ratio_(1),
116 target_fifo_frames_(0),
117 average_delta_(0),
118 fifo_rate_compensation_(1),
119 update_output_delay_(false),
120 capture_delay_ms_(0) {
121 TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream");
122 VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()";
123 DCHECK(manager_);
124
125 VLOG(1) << "Input channels : " << input_channels_;
126 VLOG(1) << "Output channels: " << output_channels_;
127 VLOG(1) << "Sample rate : " << params_.sample_rate();
128 VLOG(1) << "Buffer size : " << params.frames_per_buffer();
129
130 #ifndef NDEBUG
131 input_time_stamps_.reset(new int64[kMaxFileSamples]);
132 num_frames_in_fifo_.reset(new int[kMaxFileSamples]);
133 resampler_margin_.reset(new int[kMaxFileSamples]);
134 fifo_rate_comps_.reset(new double[kMaxFileSamples]);
135 num_elements_.reset(new int[kMaxNumSampleTypes]);
136 std::fill(num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes, 0);
137 input_params_.reset(new int[kMaxNumParams]);
138 output_params_.reset(new int[kMaxNumParams]);
139 #endif
140
141 DVLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE)
142 << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled.";
143
144 // Load the Avrt DLL if not already loaded. Required to support MMCSS.
145 bool avrt_init = avrt::Initialize();
146 DCHECK(avrt_init) << "Failed to load the avrt.dll";
147
148 // All events are auto-reset events and non-signaled initially.
149
150 // Create the event which the audio engine will signal each time a buffer
151 // has been recorded.
152 capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
153
154 // Create the event which will be set in Stop() when straeming shall stop.
155 stop_streaming_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
156 }
157
~WASAPIUnifiedStream()158 WASAPIUnifiedStream::~WASAPIUnifiedStream() {
159 VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()";
160 #ifndef NDEBUG
161 base::FilePath data_file_name;
162 PathService::Get(base::DIR_EXE, &data_file_name);
163 data_file_name = data_file_name.AppendASCII(kUnifiedAudioDebugFileName);
164 data_file_ = base::OpenFile(data_file_name, "wt");
165 DVLOG(1) << ">> Output file " << data_file_name.value() << " is created.";
166
167 size_t n = 0;
168 size_t elements_to_write = *std::min_element(
169 num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes);
170 while (n < elements_to_write) {
171 fprintf(data_file_, "%I64d %d %d %10.9f\n",
172 input_time_stamps_[n],
173 num_frames_in_fifo_[n],
174 resampler_margin_[n],
175 fifo_rate_comps_[n]);
176 ++n;
177 }
178 base::CloseFile(data_file_);
179
180 base::FilePath param_file_name;
181 PathService::Get(base::DIR_EXE, ¶m_file_name);
182 param_file_name = param_file_name.AppendASCII(kUnifiedAudioParamsFileName);
183 param_file_ = base::OpenFile(param_file_name, "wt");
184 DVLOG(1) << ">> Output file " << param_file_name.value() << " is created.";
185 fprintf(param_file_, "%d %d\n", input_params_[0], input_params_[1]);
186 fprintf(param_file_, "%d %d\n", output_params_[0], output_params_[1]);
187 base::CloseFile(param_file_);
188 #endif
189 }
190
Open()191 bool WASAPIUnifiedStream::Open() {
192 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open");
193 DVLOG(1) << "WASAPIUnifiedStream::Open()";
194 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
195 if (opened_)
196 return true;
197
198 AudioParameters hw_output_params;
199 HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters(
200 eRender, eConsole, &hw_output_params);
201 if (FAILED(hr)) {
202 LOG(ERROR) << "Failed to get preferred output audio parameters.";
203 return false;
204 }
205
206 AudioParameters hw_input_params;
207 if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
208 // Query native parameters for the default capture device.
209 hr = CoreAudioUtil::GetPreferredAudioParameters(
210 eCapture, eConsole, &hw_input_params);
211 } else {
212 // Query native parameters for the capture device given by
213 // |input_device_id_|.
214 hr = CoreAudioUtil::GetPreferredAudioParameters(
215 input_device_id_, &hw_input_params);
216 }
217 if (FAILED(hr)) {
218 LOG(ERROR) << "Failed to get preferred input audio parameters.";
219 return false;
220 }
221
222 // It is currently only possible to open up the output audio device using
223 // the native number of channels.
224 if (output_channels_ != hw_output_params.channels()) {
225 LOG(ERROR) << "Audio device does not support requested output channels.";
226 return false;
227 }
228
229 // It is currently only possible to open up the input audio device using
230 // the native number of channels. If the client asks for a higher channel
231 // count, we will do channel upmixing in this class. The most typical
232 // example is that the client provides stereo but the hardware can only be
233 // opened in mono mode. We will do mono to stereo conversion in this case.
234 if (input_channels_ < hw_input_params.channels()) {
235 LOG(ERROR) << "Audio device does not support requested input channels.";
236 return false;
237 } else if (input_channels_ > hw_input_params.channels()) {
238 ChannelLayout input_layout =
239 GuessChannelLayout(hw_input_params.channels());
240 ChannelLayout output_layout = GuessChannelLayout(input_channels_);
241 channel_mixer_.reset(new ChannelMixer(input_layout, output_layout));
242 DVLOG(1) << "Remixing input channel layout from " << input_layout
243 << " to " << output_layout << "; from "
244 << hw_input_params.channels() << " channels to "
245 << input_channels_;
246 }
247
248 if (hw_output_params.sample_rate() != params_.sample_rate()) {
249 LOG(ERROR) << "Requested sample-rate: " << params_.sample_rate()
250 << " must match the hardware sample-rate: "
251 << hw_output_params.sample_rate();
252 return false;
253 }
254
255 if (hw_output_params.frames_per_buffer() != params_.frames_per_buffer()) {
256 LOG(ERROR) << "Requested buffer size: " << params_.frames_per_buffer()
257 << " must match the hardware buffer size: "
258 << hw_output_params.frames_per_buffer();
259 return false;
260 }
261
262 // Set up WAVEFORMATPCMEX structures for input and output given the specified
263 // audio parameters.
264 SetIOFormats(hw_input_params, params_);
265
266 // Create the input and output busses.
267 input_bus_ = AudioBus::Create(
268 hw_input_params.channels(), input_buffer_size_frames_);
269 output_bus_ = AudioBus::Create(params_);
270
271 // One extra bus is needed for the input channel mixing case.
272 if (channel_mixer_) {
273 DCHECK_LT(hw_input_params.channels(), input_channels_);
274 // The size of the |channel_bus_| must be the same as the size of the
275 // output bus to ensure that the channel manager can deal with both
276 // resampled and non-resampled data as input.
277 channel_bus_ = AudioBus::Create(
278 input_channels_, params_.frames_per_buffer());
279 }
280
281 // Check if FIFO and resampling is required to match the input rate to the
282 // output rate. If so, a special thread loop, optimized for this case, will
283 // be used. This mode is also called varispeed mode.
284 // Note that we can also use this mode when input and output rates are the
285 // same but native buffer sizes differ (can happen if two different audio
286 // devices are used). For this case, the resampler uses a target ratio of
287 // 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is
288 // required to compensate for the difference in buffer sizes.
289 // TODO(henrika): we could perhaps improve the performance for the second
290 // case here by only using the FIFO and avoid resampling. Not sure how much
291 // that would give and we risk not compensation for clock drift.
292 if (hw_input_params.sample_rate() != params_.sample_rate() ||
293 hw_input_params.frames_per_buffer() != params_.frames_per_buffer()) {
294 DoVarispeedInitialization(hw_input_params, params_);
295 }
296
297 // Render side (event driven only in varispeed mode):
298
299 ScopedComPtr<IAudioClient> audio_output_client =
300 CoreAudioUtil::CreateDefaultClient(eRender, eConsole);
301 if (!audio_output_client)
302 return false;
303
304 if (!CoreAudioUtil::IsFormatSupported(audio_output_client,
305 share_mode_,
306 &output_format_)) {
307 return false;
308 }
309
310 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
311 // The |render_event_| will be NULL unless varispeed mode is utilized.
312 hr = CoreAudioUtil::SharedModeInitialize(
313 audio_output_client, &output_format_, render_event_.Get(),
314 &endpoint_render_buffer_size_frames_);
315 } else {
316 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
317 }
318 if (FAILED(hr))
319 return false;
320
321 ScopedComPtr<IAudioRenderClient> audio_render_client =
322 CoreAudioUtil::CreateRenderClient(audio_output_client);
323 if (!audio_render_client)
324 return false;
325
326 // Capture side (always event driven but format depends on varispeed or not):
327
328 ScopedComPtr<IAudioClient> audio_input_client;
329 if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
330 audio_input_client = CoreAudioUtil::CreateDefaultClient(eCapture, eConsole);
331 } else {
332 ScopedComPtr<IMMDevice> audio_input_device(
333 CoreAudioUtil::CreateDevice(input_device_id_));
334 audio_input_client = CoreAudioUtil::CreateClient(audio_input_device);
335 }
336 if (!audio_input_client)
337 return false;
338
339 if (!CoreAudioUtil::IsFormatSupported(audio_input_client,
340 share_mode_,
341 &input_format_)) {
342 return false;
343 }
344
345 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
346 // Include valid event handle for event-driven initialization.
347 // The input side is always event driven independent of if varispeed is
348 // used or not.
349 hr = CoreAudioUtil::SharedModeInitialize(
350 audio_input_client, &input_format_, capture_event_.Get(),
351 &endpoint_capture_buffer_size_frames_);
352 } else {
353 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
354 }
355 if (FAILED(hr))
356 return false;
357
358 ScopedComPtr<IAudioCaptureClient> audio_capture_client =
359 CoreAudioUtil::CreateCaptureClient(audio_input_client);
360 if (!audio_capture_client)
361 return false;
362
363 // Varispeed mode requires additional preparations.
364 if (VarispeedMode())
365 ResetVarispeed();
366
367 // Store all valid COM interfaces.
368 audio_output_client_ = audio_output_client;
369 audio_render_client_ = audio_render_client;
370 audio_input_client_ = audio_input_client;
371 audio_capture_client_ = audio_capture_client;
372
373 opened_ = true;
374 return SUCCEEDED(hr);
375 }
376
Start(AudioSourceCallback * callback)377 void WASAPIUnifiedStream::Start(AudioSourceCallback* callback) {
378 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start");
379 DVLOG(1) << "WASAPIUnifiedStream::Start()";
380 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
381 CHECK(callback);
382 CHECK(opened_);
383
384 if (audio_io_thread_) {
385 CHECK_EQ(callback, source_);
386 return;
387 }
388
389 source_ = callback;
390
391 if (VarispeedMode()) {
392 ResetVarispeed();
393 fifo_rate_compensation_ = 1.0;
394 average_delta_ = 0.0;
395 input_callback_received_ = false;
396 update_output_delay_ = false;
397 }
398
399 // Create and start the thread that will listen for capture events.
400 // We will also listen on render events on the same thread if varispeed
401 // mode is utilized.
402 audio_io_thread_.reset(
403 new base::DelegateSimpleThread(this, "wasapi_io_thread"));
404 audio_io_thread_->Start();
405 if (!audio_io_thread_->HasBeenStarted()) {
406 DLOG(ERROR) << "Failed to start WASAPI IO thread.";
407 return;
408 }
409
410 // Start input streaming data between the endpoint buffer and the audio
411 // engine.
412 HRESULT hr = audio_input_client_->Start();
413 if (FAILED(hr)) {
414 StopAndJoinThread(hr);
415 return;
416 }
417
418 // Ensure that the endpoint buffer is prepared with silence.
419 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
420 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
421 audio_output_client_, audio_render_client_)) {
422 DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
423 return;
424 }
425 }
426 num_written_frames_ = endpoint_render_buffer_size_frames_;
427
428 // Start output streaming data between the endpoint buffer and the audio
429 // engine.
430 hr = audio_output_client_->Start();
431 if (FAILED(hr)) {
432 StopAndJoinThread(hr);
433 return;
434 }
435 }
436
Stop()437 void WASAPIUnifiedStream::Stop() {
438 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop");
439 DVLOG(1) << "WASAPIUnifiedStream::Stop()";
440 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
441 if (!audio_io_thread_)
442 return;
443
444 // Stop input audio streaming.
445 HRESULT hr = audio_input_client_->Stop();
446 if (FAILED(hr)) {
447 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
448 << "Failed to stop input streaming: " << std::hex << hr;
449 }
450
451 // Stop output audio streaming.
452 hr = audio_output_client_->Stop();
453 if (FAILED(hr)) {
454 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
455 << "Failed to stop output streaming: " << std::hex << hr;
456 }
457
458 // Wait until the thread completes and perform cleanup.
459 SetEvent(stop_streaming_event_.Get());
460 audio_io_thread_->Join();
461 audio_io_thread_.reset();
462
463 // Ensure that we don't quit the main thread loop immediately next
464 // time Start() is called.
465 ResetEvent(stop_streaming_event_.Get());
466
467 // Clear source callback, it'll be set again on the next Start() call.
468 source_ = NULL;
469
470 // Flush all pending data and reset the audio clock stream position to 0.
471 hr = audio_output_client_->Reset();
472 if (FAILED(hr)) {
473 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
474 << "Failed to reset output streaming: " << std::hex << hr;
475 }
476
477 audio_input_client_->Reset();
478 if (FAILED(hr)) {
479 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
480 << "Failed to reset input streaming: " << std::hex << hr;
481 }
482
483 // Extra safety check to ensure that the buffers are cleared.
484 // If the buffers are not cleared correctly, the next call to Start()
485 // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
486 // TODO(henrika): this check is is only needed for shared-mode streams.
487 UINT32 num_queued_frames = 0;
488 audio_output_client_->GetCurrentPadding(&num_queued_frames);
489 DCHECK_EQ(0u, num_queued_frames);
490 }
491
Close()492 void WASAPIUnifiedStream::Close() {
493 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close");
494 DVLOG(1) << "WASAPIUnifiedStream::Close()";
495 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
496
497 // It is valid to call Close() before calling open or Start().
498 // It is also valid to call Close() after Start() has been called.
499 Stop();
500
501 // Inform the audio manager that we have been closed. This will cause our
502 // destruction.
503 manager_->ReleaseOutputStream(this);
504 }
505
SetVolume(double volume)506 void WASAPIUnifiedStream::SetVolume(double volume) {
507 DVLOG(1) << "SetVolume(volume=" << volume << ")";
508 if (volume < 0 || volume > 1)
509 return;
510 volume_ = volume;
511 }
512
GetVolume(double * volume)513 void WASAPIUnifiedStream::GetVolume(double* volume) {
514 DVLOG(1) << "GetVolume()";
515 *volume = static_cast<double>(volume_);
516 }
517
518
ProvideInput(int frame_delay,AudioBus * audio_bus)519 void WASAPIUnifiedStream::ProvideInput(int frame_delay, AudioBus* audio_bus) {
520 // TODO(henrika): utilize frame_delay?
521 // A non-zero framed delay means multiple callbacks were necessary to
522 // fulfill the requested number of frames.
523 if (frame_delay > 0)
524 DVLOG(3) << "frame_delay: " << frame_delay;
525
526 #ifndef NDEBUG
527 resampler_margin_[num_elements_[RESAMPLER_MARGIN]] =
528 fifo_->frames() - audio_bus->frames();
529 num_elements_[RESAMPLER_MARGIN]++;
530 #endif
531
532 if (fifo_->frames() < audio_bus->frames()) {
533 DVLOG(ERROR) << "Not enough data in the FIFO ("
534 << fifo_->frames() << " < " << audio_bus->frames() << ")";
535 audio_bus->Zero();
536 return;
537 }
538
539 fifo_->Consume(audio_bus, 0, audio_bus->frames());
540 }
541
SetIOFormats(const AudioParameters & input_params,const AudioParameters & output_params)542 void WASAPIUnifiedStream::SetIOFormats(const AudioParameters& input_params,
543 const AudioParameters& output_params) {
544 for (int n = 0; n < 2; ++n) {
545 const AudioParameters& params = (n == 0) ? input_params : output_params;
546 WAVEFORMATPCMEX* xformat = (n == 0) ? &input_format_ : &output_format_;
547 WAVEFORMATEX* format = &xformat->Format;
548
549 // Begin with the WAVEFORMATEX structure that specifies the basic format.
550 format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
551 format->nChannels = params.channels();
552 format->nSamplesPerSec = params.sample_rate();
553 format->wBitsPerSample = params.bits_per_sample();
554 format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
555 format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;
556 format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
557
558 // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
559 // Note that we always open up using the native channel layout.
560 (*xformat).Samples.wValidBitsPerSample = format->wBitsPerSample;
561 (*xformat).dwChannelMask =
562 CoreAudioUtil::GetChannelConfig(
563 std::string(), n == 0 ? eCapture : eRender);
564 (*xformat).SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
565 }
566
567 input_buffer_size_frames_ = input_params.frames_per_buffer();
568 output_buffer_size_frames_ = output_params.frames_per_buffer();
569 VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_;
570 VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_;
571
572 #ifndef NDEBUG
573 input_params_[0] = input_format_.Format.nSamplesPerSec;
574 input_params_[1] = input_buffer_size_frames_;
575 output_params_[0] = output_format_.Format.nSamplesPerSec;
576 output_params_[1] = output_buffer_size_frames_;
577 #endif
578 }
579
DoVarispeedInitialization(const AudioParameters & input_params,const AudioParameters & output_params)580 void WASAPIUnifiedStream::DoVarispeedInitialization(
581 const AudioParameters& input_params, const AudioParameters& output_params) {
582 DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()";
583
584 // A FIFO is required in this mode for input to output buffering.
585 // Note that it will add some latency.
586 fifo_.reset(new AudioFifo(input_params.channels(), kFifoSize));
587 VLOG(1) << "Using FIFO of size " << fifo_->max_frames()
588 << " (#channels=" << input_params.channels() << ")";
589
590 // Create the multi channel resampler using the initial sample rate ratio.
591 // We will call MultiChannelResampler::SetRatio() during runtime to
592 // allow arbitrary combinations of input and output devices running off
593 // different clocks and using different drivers, with potentially
594 // differing sample-rates. Note that the requested block size is given by
595 // the native input buffer size |input_buffer_size_frames_|.
596 io_sample_rate_ratio_ = input_params.sample_rate() /
597 static_cast<double>(output_params.sample_rate());
598 DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_;
599 resampler_.reset(new MultiChannelResampler(
600 input_params.channels(), io_sample_rate_ratio_, input_buffer_size_frames_,
601 base::Bind(&WASAPIUnifiedStream::ProvideInput, base::Unretained(this))));
602 VLOG(1) << "Resampling from " << input_params.sample_rate() << " to "
603 << output_params.sample_rate();
604
605 // The optimal number of frames we'd like to keep in the FIFO at all times.
606 // The actual size will vary but the goal is to ensure that the average size
607 // is given by this value.
608 target_fifo_frames_ = kTargetFifoSafetyFactor * input_buffer_size_frames_;
609 VLOG(1) << "Target FIFO size: " << target_fifo_frames_;
610
611 // Create the event which the audio engine will signal each time it
612 // wants an audio buffer to render.
613 render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
614
615 // Allocate memory for temporary audio bus used to store resampled input
616 // audio.
617 resampled_bus_ = AudioBus::Create(
618 input_params.channels(), output_buffer_size_frames_);
619
620 // Buffer initial silence corresponding to target I/O buffering.
621 ResetVarispeed();
622 }
623
ResetVarispeed()624 void WASAPIUnifiedStream::ResetVarispeed() {
625 DCHECK(VarispeedMode());
626
627 // Buffer initial silence corresponding to target I/O buffering.
628 fifo_->Clear();
629 scoped_ptr<AudioBus> silence =
630 AudioBus::Create(input_format_.Format.nChannels,
631 target_fifo_frames_);
632 silence->Zero();
633 fifo_->Push(silence.get());
634 resampler_->Flush();
635 }
636
Run()637 void WASAPIUnifiedStream::Run() {
638 ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
639
640 // Increase the thread priority.
641 audio_io_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
642
643 // Enable MMCSS to ensure that this thread receives prioritized access to
644 // CPU resources.
645 // TODO(henrika): investigate if it is possible to include these additional
646 // settings in SetThreadPriority() as well.
647 DWORD task_index = 0;
648 HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
649 &task_index);
650 bool mmcss_is_ok =
651 (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
652 if (!mmcss_is_ok) {
653 // Failed to enable MMCSS on this thread. It is not fatal but can lead
654 // to reduced QoS at high load.
655 DWORD err = GetLastError();
656 LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
657 }
658
659 // The IAudioClock interface enables us to monitor a stream's data
660 // rate and the current position in the stream. Allocate it before we
661 // start spinning.
662 ScopedComPtr<IAudioClock> audio_output_clock;
663 HRESULT hr = audio_output_client_->GetService(
664 __uuidof(IAudioClock), audio_output_clock.ReceiveVoid());
665 LOG_IF(WARNING, FAILED(hr)) << "Failed to create IAudioClock: "
666 << std::hex << hr;
667
668 bool streaming = true;
669 bool error = false;
670
671 HANDLE wait_array[3];
672 size_t num_handles = 0;
673 wait_array[num_handles++] = stop_streaming_event_;
674 wait_array[num_handles++] = capture_event_;
675 if (render_event_) {
676 // One extra event handle is needed in varispeed mode.
677 wait_array[num_handles++] = render_event_;
678 }
679
680 // Keep streaming audio until stop event is signaled.
681 // Capture events are always used but render events are only active in
682 // varispeed mode.
683 while (streaming && !error) {
684 // Wait for a close-down event, or a new capture event.
685 DWORD wait_result = WaitForMultipleObjects(num_handles,
686 wait_array,
687 FALSE,
688 INFINITE);
689 switch (wait_result) {
690 case WAIT_OBJECT_0 + 0:
691 // |stop_streaming_event_| has been set.
692 streaming = false;
693 break;
694 case WAIT_OBJECT_0 + 1:
695 // |capture_event_| has been set
696 if (VarispeedMode()) {
697 ProcessInputAudio();
698 } else {
699 ProcessInputAudio();
700 ProcessOutputAudio(audio_output_clock);
701 }
702 break;
703 case WAIT_OBJECT_0 + 2:
704 DCHECK(VarispeedMode());
705 // |render_event_| has been set
706 ProcessOutputAudio(audio_output_clock);
707 break;
708 default:
709 error = true;
710 break;
711 }
712 }
713
714 if (streaming && error) {
715 // Stop audio streaming since something has gone wrong in our main thread
716 // loop. Note that, we are still in a "started" state, hence a Stop() call
717 // is required to join the thread properly.
718 audio_input_client_->Stop();
719 audio_output_client_->Stop();
720 PLOG(ERROR) << "WASAPI streaming failed.";
721 }
722
723 // Disable MMCSS.
724 if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
725 PLOG(WARNING) << "Failed to disable MMCSS";
726 }
727 }
728
ProcessInputAudio()729 void WASAPIUnifiedStream::ProcessInputAudio() {
730 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio");
731
732 BYTE* data_ptr = NULL;
733 UINT32 num_captured_frames = 0;
734 DWORD flags = 0;
735 UINT64 device_position = 0;
736 UINT64 capture_time_stamp = 0;
737
738 const int bytes_per_sample = input_format_.Format.wBitsPerSample >> 3;
739
740 base::TimeTicks now_tick = base::TimeTicks::HighResNow();
741
742 #ifndef NDEBUG
743 if (VarispeedMode()) {
744 input_time_stamps_[num_elements_[INPUT_TIME_STAMP]] =
745 now_tick.ToInternalValue();
746 num_elements_[INPUT_TIME_STAMP]++;
747 }
748 #endif
749
750 // Retrieve the amount of data in the capture endpoint buffer.
751 // |endpoint_capture_time_stamp| is the value of the performance
752 // counter at the time that the audio endpoint device recorded
753 // the device position of the first audio frame in the data packet.
754 HRESULT hr = audio_capture_client_->GetBuffer(&data_ptr,
755 &num_captured_frames,
756 &flags,
757 &device_position,
758 &capture_time_stamp);
759 if (FAILED(hr)) {
760 DLOG(ERROR) << "Failed to get data from the capture buffer";
761 return;
762 }
763
764 if (hr == AUDCLNT_S_BUFFER_EMPTY) {
765 // The return coded is a success code but a new packet is *not* available
766 // and none of the output parameters in the GetBuffer() call contains valid
767 // values. Best we can do is to deliver silence and avoid setting
768 // |input_callback_received_| since this only seems to happen for the
769 // initial event(s) on some devices.
770 input_bus_->Zero();
771 } else {
772 // Valid data has been recorded and it is now OK to set the flag which
773 // informs the render side that capturing has started.
774 input_callback_received_ = true;
775 }
776
777 if (num_captured_frames != 0) {
778 if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
779 // Clear out the capture buffer since silence is reported.
780 input_bus_->Zero();
781 } else {
782 // Store captured data in an audio bus after de-interleaving
783 // the data to match the audio bus structure.
784 input_bus_->FromInterleaved(
785 data_ptr, num_captured_frames, bytes_per_sample);
786 }
787 }
788
789 hr = audio_capture_client_->ReleaseBuffer(num_captured_frames);
790 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
791
792 // Buffer input into FIFO if varispeed mode is used. The render event
793 // will drive resampling of this data to match the output side.
794 if (VarispeedMode()) {
795 int available_frames = fifo_->max_frames() - fifo_->frames();
796 if (input_bus_->frames() <= available_frames) {
797 fifo_->Push(input_bus_.get());
798 }
799 #ifndef NDEBUG
800 num_frames_in_fifo_[num_elements_[NUM_FRAMES_IN_FIFO]] =
801 fifo_->frames();
802 num_elements_[NUM_FRAMES_IN_FIFO]++;
803 #endif
804 }
805
806 // Save resource by not asking for new delay estimates each time.
807 // These estimates are fairly stable and it is perfectly safe to only
808 // sample at a rate of ~1Hz.
809 // TODO(henrika): we might have to increase the update rate in varispeed
810 // mode since the delay variations are higher in this mode.
811 if ((now_tick - last_delay_sample_time_).InMilliseconds() >
812 kTimeDiffInMillisecondsBetweenDelayMeasurements &&
813 input_callback_received_) {
814 // Calculate the estimated capture delay, i.e., the latency between
815 // the recording time and the time we when we are notified about
816 // the recorded data. Note that the capture time stamp is given in
817 // 100-nanosecond (0.1 microseconds) units.
818 base::TimeDelta diff =
819 now_tick - base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp);
820 capture_delay_ms_ = diff.InMillisecondsF();
821
822 last_delay_sample_time_ = now_tick;
823 update_output_delay_ = true;
824 }
825 }
826
ProcessOutputAudio(IAudioClock * audio_output_clock)827 void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock* audio_output_clock) {
828 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio");
829
830 if (!input_callback_received_) {
831 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
832 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
833 audio_output_client_, audio_render_client_))
834 DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
835 }
836 return;
837 }
838
839 // Rate adjusted resampling is required in varispeed mode. It means that
840 // recorded audio samples will be read from the FIFO, resampled to match the
841 // output sample-rate and then stored in |resampled_bus_|.
842 if (VarispeedMode()) {
843 // Calculate a varispeed rate scalar factor to compensate for drift between
844 // input and output. We use the actual number of frames still in the FIFO
845 // compared with the ideal value of |target_fifo_frames_|.
846 int delta = fifo_->frames() - target_fifo_frames_;
847
848 // Average |delta| because it can jitter back/forth quite frequently
849 // by +/- the hardware buffer-size *if* the input and output callbacks are
850 // happening at almost exactly the same time. Also, if the input and output
851 // sample-rates are different then |delta| will jitter quite a bit due to
852 // the rate conversion happening in the varispeed, plus the jittering of
853 // the callbacks. The average value is what's important here.
854 // We use an exponential smoothing filter to reduce the variations.
855 average_delta_ += kAlpha * (delta - average_delta_);
856
857 // Compute a rate compensation which always attracts us back to the
858 // |target_fifo_frames_| over a period of kCorrectionTimeSeconds.
859 double correction_time_frames =
860 kCorrectionTimeSeconds * output_format_.Format.nSamplesPerSec;
861 fifo_rate_compensation_ =
862 (correction_time_frames + average_delta_) / correction_time_frames;
863
864 #ifndef NDEBUG
865 fifo_rate_comps_[num_elements_[RATE_COMPENSATION]] =
866 fifo_rate_compensation_;
867 num_elements_[RATE_COMPENSATION]++;
868 #endif
869
870 // Adjust for FIFO drift.
871 const double new_ratio = io_sample_rate_ratio_ * fifo_rate_compensation_;
872 resampler_->SetRatio(new_ratio);
873 // Get resampled input audio from FIFO where the size is given by the
874 // output side.
875 resampler_->Resample(resampled_bus_->frames(), resampled_bus_.get());
876 }
877
878 // Derive a new total delay estimate if the capture side has set the
879 // |update_output_delay_| flag.
880 if (update_output_delay_) {
881 // Calculate the estimated render delay, i.e., the time difference
882 // between the time when data is added to the endpoint buffer and
883 // when the data is played out on the actual speaker.
884 const double stream_pos = CurrentStreamPosInMilliseconds(
885 num_written_frames_ + output_buffer_size_frames_,
886 output_format_.Format.nSamplesPerSec);
887 const double speaker_pos =
888 SpeakerStreamPosInMilliseconds(audio_output_clock);
889 const double render_delay_ms = stream_pos - speaker_pos;
890 const double fifo_delay_ms = VarispeedMode() ?
891 FrameCountToMilliseconds(target_fifo_frames_, input_format_) : 0;
892
893 // Derive the total delay, i.e., the sum of the input and output
894 // delays. Also convert the value into byte units. An extra FIFO delay
895 // is added for varispeed usage cases.
896 total_delay_ms_ = VarispeedMode() ?
897 capture_delay_ms_ + render_delay_ms + fifo_delay_ms :
898 capture_delay_ms_ + render_delay_ms;
899 DVLOG(2) << "total_delay_ms : " << total_delay_ms_;
900 DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_;
901 DVLOG(3) << " render_delay_ms : " << render_delay_ms;
902 DVLOG(3) << " fifo_delay_ms : " << fifo_delay_ms;
903 total_delay_bytes_ = MillisecondsToBytes(total_delay_ms_, output_format_);
904
905 // Wait for new signal from the capture side.
906 update_output_delay_ = false;
907 }
908
909 // Select source depending on if varispeed is utilized or not.
910 // Also, the source might be the output of a channel mixer if channel mixing
911 // is required to match the native input channels to the number of input
912 // channels used by the client (given by |input_channels_| in this case).
913 AudioBus* input_bus = VarispeedMode() ?
914 resampled_bus_.get() : input_bus_.get();
915 if (channel_mixer_) {
916 DCHECK_EQ(input_bus->frames(), channel_bus_->frames());
917 // Most common case is 1->2 channel upmixing.
918 channel_mixer_->Transform(input_bus, channel_bus_.get());
919 // Use the output from the channel mixer as new input bus.
920 input_bus = channel_bus_.get();
921 }
922
923 // Prepare for rendering by calling OnMoreIOData().
924 int frames_filled = source_->OnMoreIOData(
925 input_bus,
926 output_bus_.get(),
927 AudioBuffersState(0, total_delay_bytes_));
928 DCHECK_EQ(frames_filled, output_bus_->frames());
929
930 // Keep track of number of rendered frames since we need it for
931 // our delay calculations.
932 num_written_frames_ += frames_filled;
933
934 // Derive the the amount of available space in the endpoint buffer.
935 // Avoid render attempt if there is no room for a captured packet.
936 UINT32 num_queued_frames = 0;
937 audio_output_client_->GetCurrentPadding(&num_queued_frames);
938 if (endpoint_render_buffer_size_frames_ - num_queued_frames <
939 output_buffer_size_frames_)
940 return;
941
942 // Grab all available space in the rendering endpoint buffer
943 // into which the client can write a data packet.
944 uint8* audio_data = NULL;
945 HRESULT hr = audio_render_client_->GetBuffer(output_buffer_size_frames_,
946 &audio_data);
947 if (FAILED(hr)) {
948 DLOG(ERROR) << "Failed to access render buffer";
949 return;
950 }
951
952 const int bytes_per_sample = output_format_.Format.wBitsPerSample >> 3;
953
954 // Convert the audio bus content to interleaved integer data using
955 // |audio_data| as destination.
956 output_bus_->Scale(volume_);
957 output_bus_->ToInterleaved(
958 output_buffer_size_frames_, bytes_per_sample, audio_data);
959
960 // Release the buffer space acquired in the GetBuffer() call.
961 audio_render_client_->ReleaseBuffer(output_buffer_size_frames_, 0);
962 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer";
963
964 return;
965 }
966
HandleError(HRESULT err)967 void WASAPIUnifiedStream::HandleError(HRESULT err) {
968 CHECK((started() && GetCurrentThreadId() == audio_io_thread_->tid()) ||
969 (!started() && GetCurrentThreadId() == creating_thread_id_));
970 NOTREACHED() << "Error code: " << std::hex << err;
971 if (source_)
972 source_->OnError(this);
973 }
974
StopAndJoinThread(HRESULT err)975 void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err) {
976 CHECK(GetCurrentThreadId() == creating_thread_id_);
977 DCHECK(audio_io_thread_.get());
978 SetEvent(stop_streaming_event_.Get());
979 audio_io_thread_->Join();
980 audio_io_thread_.reset();
981 HandleError(err);
982 }
983
984 } // namespace media
985