1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/renderer/media/webrtc_audio_renderer.h"
6
7 #include "base/logging.h"
8 #include "base/metrics/histogram.h"
9 #include "base/strings/string_util.h"
10 #include "base/strings/stringprintf.h"
11 #include "content/renderer/media/audio_device_factory.h"
12 #include "content/renderer/media/webrtc_audio_device_impl.h"
13 #include "content/renderer/media/webrtc_logging.h"
14 #include "media/audio/audio_output_device.h"
15 #include "media/audio/audio_parameters.h"
16 #include "media/audio/sample_rates.h"
17
18 #if defined(OS_WIN)
19 #include "base/win/windows_version.h"
20 #include "media/audio/win/core_audio_util_win.h"
21 #endif
22
23 namespace content {
24
25 namespace {
26
27 // Supported hardware sample rates for output sides.
28 #if defined(OS_WIN) || defined(OS_MACOSX)
29 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its
30 // current sample rate (set by the user) on Windows and Mac OS X. The listed
31 // rates below adds restrictions and Initialize() will fail if the user selects
32 // any rate outside these ranges.
33 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};
34 #elif defined(OS_LINUX) || defined(OS_OPENBSD)
35 const int kValidOutputRates[] = {48000, 44100};
36 #elif defined(OS_ANDROID)
37 // TODO(leozwang): We want to use native sampling rate on Android to achieve
38 // low latency, currently 16000 is used to work around audio problem on some
39 // Android devices.
40 const int kValidOutputRates[] = {48000, 44100, 16000};
41 #else
42 const int kValidOutputRates[] = {44100};
43 #endif
44
45 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.
46 enum AudioFramesPerBuffer {
47 k160,
48 k320,
49 k440,
50 k480,
51 k640,
52 k880,
53 k960,
54 k1440,
55 k1920,
56 kUnexpectedAudioBufferSize // Must always be last!
57 };
58
59 // Helper method to convert integral values to their respective enum values
60 // above, or kUnexpectedAudioBufferSize if no match exists.
61 // We map 441 to k440 to avoid changes in the XML part for histograms.
62 // It is still possible to map the histogram result to the actual buffer size.
63 // See http://crbug.com/243450 for details.
AsAudioFramesPerBuffer(int frames_per_buffer)64 AudioFramesPerBuffer AsAudioFramesPerBuffer(int frames_per_buffer) {
65 switch (frames_per_buffer) {
66 case 160: return k160;
67 case 320: return k320;
68 case 441: return k440;
69 case 480: return k480;
70 case 640: return k640;
71 case 880: return k880;
72 case 960: return k960;
73 case 1440: return k1440;
74 case 1920: return k1920;
75 }
76 return kUnexpectedAudioBufferSize;
77 }
78
AddHistogramFramesPerBuffer(int param)79 void AddHistogramFramesPerBuffer(int param) {
80 AudioFramesPerBuffer afpb = AsAudioFramesPerBuffer(param);
81 if (afpb != kUnexpectedAudioBufferSize) {
82 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
83 afpb, kUnexpectedAudioBufferSize);
84 } else {
85 // Report unexpected sample rates using a unique histogram name.
86 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputFramesPerBufferUnexpected", param);
87 }
88 }
89
90 // This is a simple wrapper class that's handed out to users of a shared
91 // WebRtcAudioRenderer instance. This class maintains the per-user 'playing'
92 // and 'started' states to avoid problems related to incorrect usage which
93 // might violate the implementation assumptions inside WebRtcAudioRenderer
94 // (see the play reference count).
95 class SharedAudioRenderer : public MediaStreamAudioRenderer {
96 public:
SharedAudioRenderer(const scoped_refptr<MediaStreamAudioRenderer> & delegate)97 SharedAudioRenderer(const scoped_refptr<MediaStreamAudioRenderer>& delegate)
98 : delegate_(delegate), started_(false), playing_(false) {
99 }
100
101 protected:
~SharedAudioRenderer()102 virtual ~SharedAudioRenderer() {
103 DCHECK(thread_checker_.CalledOnValidThread());
104 DVLOG(1) << __FUNCTION__;
105 Stop();
106 }
107
Start()108 virtual void Start() OVERRIDE {
109 DCHECK(thread_checker_.CalledOnValidThread());
110 if (started_)
111 return;
112 started_ = true;
113 delegate_->Start();
114 }
115
Play()116 virtual void Play() OVERRIDE {
117 DCHECK(thread_checker_.CalledOnValidThread());
118 DCHECK(started_);
119 if (playing_)
120 return;
121 playing_ = true;
122 delegate_->Play();
123 }
124
Pause()125 virtual void Pause() OVERRIDE {
126 DCHECK(thread_checker_.CalledOnValidThread());
127 DCHECK(started_);
128 if (!playing_)
129 return;
130 playing_ = false;
131 delegate_->Pause();
132 }
133
Stop()134 virtual void Stop() OVERRIDE {
135 DCHECK(thread_checker_.CalledOnValidThread());
136 if (!started_)
137 return;
138 Pause();
139 started_ = false;
140 delegate_->Stop();
141 }
142
SetVolume(float volume)143 virtual void SetVolume(float volume) OVERRIDE {
144 DCHECK(thread_checker_.CalledOnValidThread());
145 return delegate_->SetVolume(volume);
146 }
147
GetCurrentRenderTime() const148 virtual base::TimeDelta GetCurrentRenderTime() const OVERRIDE {
149 DCHECK(thread_checker_.CalledOnValidThread());
150 return delegate_->GetCurrentRenderTime();
151 }
152
IsLocalRenderer() const153 virtual bool IsLocalRenderer() const OVERRIDE {
154 DCHECK(thread_checker_.CalledOnValidThread());
155 return delegate_->IsLocalRenderer();
156 }
157
158 private:
159 base::ThreadChecker thread_checker_;
160 scoped_refptr<MediaStreamAudioRenderer> delegate_;
161 bool started_;
162 bool playing_;
163 };
164
165 } // namespace
166
WebRtcAudioRenderer(int source_render_view_id,int session_id,int sample_rate,int frames_per_buffer)167 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id,
168 int session_id,
169 int sample_rate,
170 int frames_per_buffer)
171 : state_(UNINITIALIZED),
172 source_render_view_id_(source_render_view_id),
173 session_id_(session_id),
174 source_(NULL),
175 play_ref_count_(0),
176 start_ref_count_(0),
177 audio_delay_milliseconds_(0),
178 fifo_delay_milliseconds_(0),
179 sample_rate_(sample_rate),
180 frames_per_buffer_(frames_per_buffer) {
181 WebRtcLogMessage(base::StringPrintf(
182 "WAR::WAR. source_render_view_id=%d"
183 ", session_id=%d, sample_rate=%d, frames_per_buffer=%d",
184 source_render_view_id,
185 session_id,
186 sample_rate,
187 frames_per_buffer));
188 }
189
~WebRtcAudioRenderer()190 WebRtcAudioRenderer::~WebRtcAudioRenderer() {
191 DCHECK(thread_checker_.CalledOnValidThread());
192 DCHECK_EQ(state_, UNINITIALIZED);
193 buffer_.reset();
194 }
195
Initialize(WebRtcAudioRendererSource * source)196 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {
197 DVLOG(1) << "WebRtcAudioRenderer::Initialize()";
198 DCHECK(thread_checker_.CalledOnValidThread());
199 base::AutoLock auto_lock(lock_);
200 DCHECK_EQ(state_, UNINITIALIZED);
201 DCHECK(source);
202 DCHECK(!sink_.get());
203 DCHECK(!source_);
204
205 // Use stereo output on all platforms.
206 media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;
207
208 // TODO(tommi,henrika): Maybe we should just change |sample_rate_| to be
209 // immutable and change its value instead of using a temporary?
210 int sample_rate = sample_rate_;
211 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;
212
213 // WebRTC does not yet support higher rates than 96000 on the client side
214 // and 48000 is the preferred sample rate. Therefore, if 192000 is detected,
215 // we change the rate to 48000 instead. The consequence is that the native
216 // layer will be opened up at 192kHz but WebRTC will provide data at 48kHz
217 // which will then be resampled by the audio converted on the browser side
218 // to match the native audio layer.
219 if (sample_rate == 192000) {
220 DVLOG(1) << "Resampling from 48000 to 192000 is required";
221 sample_rate = 48000;
222 }
223 media::AudioSampleRate asr = media::AsAudioSampleRate(sample_rate);
224 if (asr != media::kUnexpectedAudioSampleRate) {
225 UMA_HISTOGRAM_ENUMERATION(
226 "WebRTC.AudioOutputSampleRate", asr, media::kUnexpectedAudioSampleRate);
227 } else {
228 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected", sample_rate);
229 }
230
231 // Verify that the reported output hardware sample rate is supported
232 // on the current platform.
233 if (std::find(&kValidOutputRates[0],
234 &kValidOutputRates[0] + arraysize(kValidOutputRates),
235 sample_rate) ==
236 &kValidOutputRates[arraysize(kValidOutputRates)]) {
237 DLOG(ERROR) << sample_rate << " is not a supported output rate.";
238 return false;
239 }
240
241 // Set up audio parameters for the source, i.e., the WebRTC client.
242
243 // The WebRTC client only supports multiples of 10ms as buffer size where
244 // 10ms is preferred for lowest possible delay.
245 media::AudioParameters source_params;
246 int buffer_size = (sample_rate / 100);
247 DVLOG(1) << "Using WebRTC output buffer size: " << buffer_size;
248
249 int channels = ChannelLayoutToChannelCount(channel_layout);
250 source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
251 channel_layout, channels, 0,
252 sample_rate, 16, buffer_size);
253
254 // Set up audio parameters for the sink, i.e., the native audio output stream.
255 // We strive to open up using native parameters to achieve best possible
256 // performance and to ensure that no FIFO is needed on the browser side to
257 // match the client request. Any mismatch between the source and the sink is
258 // taken care of in this class instead using a pull FIFO.
259
260 media::AudioParameters sink_params;
261
262 // Use native output siz as default.
263 buffer_size = frames_per_buffer_;
264 #if defined(OS_ANDROID)
265 // TODO(henrika): Keep tuning this scheme and espcicially for low-latency
266 // cases. Might not be possible to come up with the perfect solution using
267 // the render side only.
268 const int frames_per_10ms = (sample_rate / 100);
269 if (buffer_size < 2 * frames_per_10ms) {
270 // Examples of low-latency frame sizes and the resulting |buffer_size|:
271 // Nexus 7 : 240 audio frames => 2*480 = 960
272 // Nexus 10 : 256 => 2*441 = 882
273 // Galaxy Nexus: 144 => 2*441 = 882
274 buffer_size = 2 * frames_per_10ms;
275 DVLOG(1) << "Low-latency output detected on Android";
276 }
277 #endif
278 DVLOG(1) << "Using sink output buffer size: " << buffer_size;
279
280 sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
281 channel_layout, channels, 0, sample_rate, 16, buffer_size);
282
283 // Create a FIFO if re-buffering is required to match the source input with
284 // the sink request. The source acts as provider here and the sink as
285 // consumer.
286 fifo_delay_milliseconds_ = 0;
287 if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {
288 DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
289 << " to " << sink_params.frames_per_buffer();
290 audio_fifo_.reset(new media::AudioPullFifo(
291 source_params.channels(),
292 source_params.frames_per_buffer(),
293 base::Bind(
294 &WebRtcAudioRenderer::SourceCallback,
295 base::Unretained(this))));
296
297 if (sink_params.frames_per_buffer() > source_params.frames_per_buffer()) {
298 int frame_duration_milliseconds = base::Time::kMillisecondsPerSecond /
299 static_cast<double>(source_params.sample_rate());
300 fifo_delay_milliseconds_ = (sink_params.frames_per_buffer() -
301 source_params.frames_per_buffer()) * frame_duration_milliseconds;
302 }
303 }
304
305 // Allocate local audio buffers based on the parameters above.
306 // It is assumed that each audio sample contains 16 bits and each
307 // audio frame contains one or two audio samples depending on the
308 // number of channels.
309 buffer_.reset(
310 new int16[source_params.frames_per_buffer() * source_params.channels()]);
311
312 source_ = source;
313 source->SetRenderFormat(source_params);
314
315 // Configure the audio rendering client and start rendering.
316 sink_ = AudioDeviceFactory::NewOutputDevice(source_render_view_id_);
317
318 // TODO(tommi): Rename InitializeUnifiedStream to rather reflect association
319 // with a session.
320 DCHECK_GE(session_id_, 0);
321 sink_->InitializeUnifiedStream(sink_params, this, session_id_);
322
323 sink_->Start();
324
325 // User must call Play() before any audio can be heard.
326 state_ = PAUSED;
327
328 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",
329 source_params.channel_layout(),
330 media::CHANNEL_LAYOUT_MAX);
331 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
332 source_params.frames_per_buffer(),
333 kUnexpectedAudioBufferSize);
334 AddHistogramFramesPerBuffer(source_params.frames_per_buffer());
335
336 return true;
337 }
338
339 scoped_refptr<MediaStreamAudioRenderer>
CreateSharedAudioRendererProxy()340 WebRtcAudioRenderer::CreateSharedAudioRendererProxy() {
341 return new SharedAudioRenderer(this);
342 }
343
IsStarted() const344 bool WebRtcAudioRenderer::IsStarted() const {
345 DCHECK(thread_checker_.CalledOnValidThread());
346 return start_ref_count_ != 0;
347 }
348
Start()349 void WebRtcAudioRenderer::Start() {
350 DVLOG(1) << "WebRtcAudioRenderer::Start()";
351 DCHECK(thread_checker_.CalledOnValidThread());
352 ++start_ref_count_;
353 }
354
Play()355 void WebRtcAudioRenderer::Play() {
356 DVLOG(1) << "WebRtcAudioRenderer::Play()";
357 DCHECK(thread_checker_.CalledOnValidThread());
358 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
359 base::AutoLock auto_lock(lock_);
360 if (state_ == UNINITIALIZED)
361 return;
362
363 DCHECK(play_ref_count_ == 0 || state_ == PLAYING);
364 ++play_ref_count_;
365
366 if (state_ != PLAYING) {
367 state_ = PLAYING;
368
369 if (audio_fifo_) {
370 audio_delay_milliseconds_ = 0;
371 audio_fifo_->Clear();
372 }
373 }
374 }
375
Pause()376 void WebRtcAudioRenderer::Pause() {
377 DVLOG(1) << "WebRtcAudioRenderer::Pause()";
378 DCHECK(thread_checker_.CalledOnValidThread());
379 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
380 base::AutoLock auto_lock(lock_);
381 if (state_ == UNINITIALIZED)
382 return;
383
384 DCHECK_EQ(state_, PLAYING);
385 DCHECK_GT(play_ref_count_, 0);
386 if (!--play_ref_count_)
387 state_ = PAUSED;
388 }
389
Stop()390 void WebRtcAudioRenderer::Stop() {
391 DVLOG(1) << "WebRtcAudioRenderer::Stop()";
392 DCHECK(thread_checker_.CalledOnValidThread());
393 {
394 base::AutoLock auto_lock(lock_);
395 if (state_ == UNINITIALIZED)
396 return;
397
398 if (--start_ref_count_)
399 return;
400
401 DVLOG(1) << "Calling RemoveAudioRenderer and Stop().";
402
403 source_->RemoveAudioRenderer(this);
404 source_ = NULL;
405 state_ = UNINITIALIZED;
406 }
407
408 // Make sure to stop the sink while _not_ holding the lock since the Render()
409 // callback may currently be executing and try to grab the lock while we're
410 // stopping the thread on which it runs.
411 sink_->Stop();
412 }
413
SetVolume(float volume)414 void WebRtcAudioRenderer::SetVolume(float volume) {
415 DCHECK(thread_checker_.CalledOnValidThread());
416 base::AutoLock auto_lock(lock_);
417 if (state_ == UNINITIALIZED)
418 return;
419
420 sink_->SetVolume(volume);
421 }
422
GetCurrentRenderTime() const423 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {
424 return base::TimeDelta();
425 }
426
IsLocalRenderer() const427 bool WebRtcAudioRenderer::IsLocalRenderer() const {
428 return false;
429 }
430
Render(media::AudioBus * audio_bus,int audio_delay_milliseconds)431 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
432 int audio_delay_milliseconds) {
433 base::AutoLock auto_lock(lock_);
434 if (!source_)
435 return 0;
436
437 DVLOG(2) << "WebRtcAudioRenderer::Render()";
438 DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds;
439
440 audio_delay_milliseconds_ = audio_delay_milliseconds;
441
442 if (audio_fifo_)
443 audio_fifo_->Consume(audio_bus, audio_bus->frames());
444 else
445 SourceCallback(0, audio_bus);
446
447 return (state_ == PLAYING) ? audio_bus->frames() : 0;
448 }
449
OnRenderError()450 void WebRtcAudioRenderer::OnRenderError() {
451 NOTIMPLEMENTED();
452 LOG(ERROR) << "OnRenderError()";
453 }
454
455 // Called by AudioPullFifo when more data is necessary.
SourceCallback(int fifo_frame_delay,media::AudioBus * audio_bus)456 void WebRtcAudioRenderer::SourceCallback(
457 int fifo_frame_delay, media::AudioBus* audio_bus) {
458 DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("
459 << fifo_frame_delay << ", "
460 << audio_bus->frames() << ")";
461
462 int output_delay_milliseconds = audio_delay_milliseconds_;
463 output_delay_milliseconds += fifo_delay_milliseconds_;
464 DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds;
465
466 // We need to keep render data for the |source_| regardless of |state_|,
467 // otherwise the data will be buffered up inside |source_|.
468 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),
469 audio_bus->channels(), audio_bus->frames(),
470 output_delay_milliseconds);
471
472 // Avoid filling up the audio bus if we are not playing; instead
473 // return here and ensure that the returned value in Render() is 0.
474 if (state_ != PLAYING) {
475 audio_bus->Zero();
476 return;
477 }
478
479 // De-interleave each channel and convert to 32-bit floating-point
480 // with nominal range -1.0 -> +1.0 to match the callback format.
481 audio_bus->FromInterleaved(buffer_.get(),
482 audio_bus->frames(),
483 sizeof(buffer_[0]));
484 }
485
486 } // namespace content
487