// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ #define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ #include #include #include #include "base/compiler_specific.h" #include "base/gtest_prod_util.h" #include "base/threading/platform_thread.h" #include "base/threading/simple_thread.h" #include "base/win/scoped_co_mem.h" #include "base/win/scoped_comptr.h" #include "base/win/scoped_handle.h" #include "media/audio/audio_io.h" #include "media/audio/audio_parameters.h" #include "media/base/audio_fifo.h" #include "media/base/channel_mixer.h" #include "media/base/media_export.h" #include "media/base/multi_channel_resampler.h" namespace media { class AudioManagerWin; // Implementation of AudioOutputStream for Windows using the Core Audio API // where both capturing and rendering takes place on the same thread to enable // audio I/O. This class allows arbitrary combinations of input and output // devices running off different clocks and using different drivers, with // potentially differing sample-rates. // // It is required to first acquire the native sample rate of the selected // output device and then use the same rate when creating this object. // The inner operation depends on the input sample rate which is determined // during construction. Three different main modes are supported: // // 1) input rate == output rate => input side drives output side directly. // 2) input rate != output rate => both sides are driven independently by // events and a FIFO plus a resampling unit is used to compensate for // differences in sample rates between the two sides. // 3) input rate == output rate but native buffer sizes are not identical => // same inner functionality as in (2) to compensate for the differences // in buffer sizes and also compensate for any potential clock drift // between the two devices. // // Mode detection is is done at construction and using mode (1) will lead to // best performance (lower delay and no "varispeed distortion"), i.e., it is // recommended to use same sample rates for input and output. Mode (2) uses a // resampler which supports rate adjustments to fine tune for things like // clock drift and differences in sample rates between different devices. // Mode (2) - which uses a FIFO and a adjustable multi-channel resampler - // is also called the varispeed mode and it is used for case (3) as well to // compensate for the difference in buffer sizes mainly. // Mode (3) can happen if two different audio devices are used. // As an example: some devices needs a buffer size of 441 @ 44.1kHz and others // 448 @ 44.1kHz. This is a rare case and will only happen for sample rates // which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.). // // Implementation notes: // // - Open() can fail if the input and output parameters do not fulfill // certain conditions. See source for Open() for more details. // - Channel mixing will be performed if the clients asks for a larger // number of channels than the native audio layer provides. // Example: client wants stereo but audio layer provides mono. In this case // upmixing from mono to stereo (1->2) will be done. // // TODO(henrika): // // - Add support for exclusive mode. // - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float // as internal sample-value representation. // - Perform fine-tuning for non-matching sample rates to reduce latency. // class MEDIA_EXPORT WASAPIUnifiedStream : public AudioOutputStream, public base::DelegateSimpleThread::Delegate { public: // The ctor takes all the usual parameters, plus |manager| which is the // the audio manager who is creating this object. WASAPIUnifiedStream(AudioManagerWin* manager, const AudioParameters& params, const std::string& input_device_id); // The dtor is typically called by the AudioManager only and it is usually // triggered by calling AudioOutputStream::Close(). virtual ~WASAPIUnifiedStream(); // Implementation of AudioOutputStream. virtual bool Open() OVERRIDE; virtual void Start(AudioSourceCallback* callback) OVERRIDE; virtual void Stop() OVERRIDE; virtual void Close() OVERRIDE; virtual void SetVolume(double volume) OVERRIDE; virtual void GetVolume(double* volume) OVERRIDE; bool started() const { return audio_io_thread_.get() != NULL; } // Returns true if input sample rate differs from the output sample rate. // A FIFO and a adjustable multi-channel resampler are utilized in this mode. bool VarispeedMode() const { return (fifo_ && resampler_); } private: enum { // Time in milliseconds between two successive delay measurements. // We save resources by not updating the delay estimates for each capture // event (typically 100Hz rate). kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000, // Max possible FIFO size. kFifoSize = 16384, // This value was determined empirically for minimum latency while still // guarding against FIFO under-runs. The actual target size will be equal // to kTargetFifoSafetyFactor * (native input buffer size). // TODO(henrika): tune this value for lowest possible latency for all // possible sample rate combinations. kTargetFifoSafetyFactor = 2 }; // Additional initialization required when input and output sample rate // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|, // and the |capture_bus_| and configures the |input_format_| structure // given the provided input and output audio parameters. void DoVarispeedInitialization(const AudioParameters& input_params, const AudioParameters& output_params); // Clears varispeed related components such as the FIFO and the resampler. void ResetVarispeed(); // Builds WAVEFORMATEX structures for input and output based on input and // output audio parameters. void SetIOFormats(const AudioParameters& input_params, const AudioParameters& output_params); // DelegateSimpleThread::Delegate implementation. virtual void Run() OVERRIDE; // MultiChannelResampler::MultiChannelAudioSourceProvider implementation. // Callback for providing more data into the resampler. // Only used in varispeed mode, i.e., when input rate != output rate. virtual void ProvideInput(int frame_delay, AudioBus* audio_bus); // Issues the OnError() callback to the |source_|. void HandleError(HRESULT err); // Stops and joins the audio thread in case of an error. void StopAndJoinThread(HRESULT err); // Converts unique endpoint ID to user-friendly device name. std::string GetDeviceName(LPCWSTR device_id) const; // Called on the audio IO thread for each capture event. // Buffers captured audio into a FIFO if varispeed is used or into an audio // bus if input and output sample rates are identical. void ProcessInputAudio(); // Called on the audio IO thread for each render event when varispeed is // active or for each capture event when varispeed is not used. // In varispeed mode, it triggers a resampling callback, which reads from the // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled // input signal and at the same time asks for data to play out. // If input and output rates are the same - instead of reading from the FIFO // and do resampling - we read directly from the audio bus used to store // captured data in ProcessInputAudio. void ProcessOutputAudio(IAudioClock* audio_output_clock); // Contains the thread ID of the creating thread. base::PlatformThreadId creating_thread_id_; // Our creator, the audio manager needs to be notified when we close. AudioManagerWin* manager_; // Contains the audio parameter structure provided at construction. AudioParameters params_; // For convenience, same as in params_. int input_channels_; int output_channels_; // Unique ID of the input device to be opened. const std::string input_device_id_; // The sharing mode for the streams. // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE // where AUDCLNT_SHAREMODE_SHARED is the default. AUDCLNT_SHAREMODE share_mode_; // Rendering and capturing is driven by this thread (no message loop). // All OnMoreIOData() callbacks will be called from this thread. scoped_ptr audio_io_thread_; // Contains the desired audio output format which is set up at construction. // It is required to first acquire the native sample rate of the selected // output device and then use the same rate when creating this object. WAVEFORMATPCMEX output_format_; // Contains the native audio input format which is set up at construction // if varispeed mode is utilized. WAVEFORMATPCMEX input_format_; // True when successfully opened. bool opened_; // Volume level from 0 to 1 used for output scaling. double volume_; // Size in audio frames of each audio packet where an audio packet // is defined as the block of data which the destination is expected to // receive in each OnMoreIOData() callback. size_t output_buffer_size_frames_; // Size in audio frames of each audio packet where an audio packet // is defined as the block of data which the source is expected to // deliver in each OnMoreIOData() callback. size_t input_buffer_size_frames_; // Length of the audio endpoint buffer. uint32 endpoint_render_buffer_size_frames_; uint32 endpoint_capture_buffer_size_frames_; // Counts the number of audio frames written to the endpoint buffer. uint64 num_written_frames_; // Time stamp for last delay measurement. base::TimeTicks last_delay_sample_time_; // Contains the total (sum of render and capture) delay in milliseconds. double total_delay_ms_; // Contains the total (sum of render and capture and possibly FIFO) delay // in bytes. The update frequency is set by a constant called // |kTimeDiffInMillisecondsBetweenDelayMeasurements|. int total_delay_bytes_; // Pointer to the client that will deliver audio samples to be played out. AudioSourceCallback* source_; // IMMDevice interfaces which represents audio endpoint devices. base::win::ScopedComPtr endpoint_render_device_; base::win::ScopedComPtr endpoint_capture_device_; // IAudioClient interfaces which enables a client to create and initialize // an audio stream between an audio application and the audio engine. base::win::ScopedComPtr audio_output_client_; base::win::ScopedComPtr audio_input_client_; // IAudioRenderClient interfaces enables a client to write output // data to a rendering endpoint buffer. base::win::ScopedComPtr audio_render_client_; // IAudioCaptureClient interfaces enables a client to read input // data from a capturing endpoint buffer. base::win::ScopedComPtr audio_capture_client_; // The audio engine will signal this event each time a buffer has been // recorded. base::win::ScopedHandle capture_event_; // The audio engine will signal this event each time it needs a new // audio buffer to play out. // Only utilized in varispeed mode. base::win::ScopedHandle render_event_; // This event will be signaled when streaming shall stop. base::win::ScopedHandle stop_streaming_event_; // Container for retrieving data from AudioSourceCallback::OnMoreIOData(). scoped_ptr output_bus_; // Container for sending data to AudioSourceCallback::OnMoreIOData(). scoped_ptr input_bus_; // Container for storing output from the channel mixer. scoped_ptr channel_bus_; // All members below are only allocated, or used, in varispeed mode: // Temporary storage of resampled input audio data. scoped_ptr resampled_bus_; // Set to true first time a capture event has been received in varispeed // mode. bool input_callback_received_; // MultiChannelResampler is a multi channel wrapper for SincResampler; // allowing high quality sample rate conversion of multiple channels at once. scoped_ptr resampler_; // Resampler I/O ratio. double io_sample_rate_ratio_; // Used for input to output buffering. scoped_ptr fifo_; // The channel mixer is only created and utilized if number of input channels // is larger than the native number of input channels (e.g client wants // stereo but the audio device only supports mono). scoped_ptr channel_mixer_; // The optimal number of frames we'd like to keep in the FIFO at all times. int target_fifo_frames_; // A running average of the measured delta between actual number of frames // in the FIFO versus |target_fifo_frames_|. double average_delta_; // A varispeed rate scalar which is calculated based on FIFO drift. double fifo_rate_compensation_; // Set to true when input side signals output side that a new delay // estimate is needed. bool update_output_delay_; // Capture side stores its delay estimate so the sum can be derived in // the render side. double capture_delay_ms_; // TODO(henrika): possibly remove these members once the performance is // properly tuned. Only used for off-line debugging. #ifndef NDEBUG enum LogElementNames { INPUT_TIME_STAMP, NUM_FRAMES_IN_FIFO, RESAMPLER_MARGIN, RATE_COMPENSATION }; scoped_ptr input_time_stamps_; scoped_ptr num_frames_in_fifo_; scoped_ptr resampler_margin_; scoped_ptr fifo_rate_comps_; scoped_ptr num_elements_; scoped_ptr input_params_; scoped_ptr output_params_; FILE* data_file_; FILE* param_file_; #endif DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream); }; } // namespace media #endif // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_