1/* 2 * Copyright 2016 The WebRTC Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#import "voice_processing_audio_unit.h" 12 13#include "rtc_base/checks.h" 14#include "system_wrappers/include/metrics.h" 15 16#import "base/RTCLogging.h" 17#import "sdk/objc/components/audio/RTCAudioSessionConfiguration.h" 18 19#if !defined(NDEBUG) 20static void LogStreamDescription(AudioStreamBasicDescription description) { 21 char formatIdString[5]; 22 UInt32 formatId = CFSwapInt32HostToBig(description.mFormatID); 23 bcopy(&formatId, formatIdString, 4); 24 formatIdString[4] = '\0'; 25 RTCLog(@"AudioStreamBasicDescription: {\n" 26 " mSampleRate: %.2f\n" 27 " formatIDString: %s\n" 28 " mFormatFlags: 0x%X\n" 29 " mBytesPerPacket: %u\n" 30 " mFramesPerPacket: %u\n" 31 " mBytesPerFrame: %u\n" 32 " mChannelsPerFrame: %u\n" 33 " mBitsPerChannel: %u\n" 34 " mReserved: %u\n}", 35 description.mSampleRate, formatIdString, 36 static_cast<unsigned int>(description.mFormatFlags), 37 static_cast<unsigned int>(description.mBytesPerPacket), 38 static_cast<unsigned int>(description.mFramesPerPacket), 39 static_cast<unsigned int>(description.mBytesPerFrame), 40 static_cast<unsigned int>(description.mChannelsPerFrame), 41 static_cast<unsigned int>(description.mBitsPerChannel), 42 static_cast<unsigned int>(description.mReserved)); 43} 44#endif 45 46namespace webrtc { 47namespace ios_adm { 48 49// Calls to AudioUnitInitialize() can fail if called back-to-back on different 50// ADM instances. A fall-back solution is to allow multiple sequential calls 51// with as small delay between each. This factor sets the max number of allowed 52// initialization attempts. 53static const int kMaxNumberOfAudioUnitInitializeAttempts = 5; 54// A VP I/O unit's bus 1 connects to input hardware (microphone). 55static const AudioUnitElement kInputBus = 1; 56// A VP I/O unit's bus 0 connects to output hardware (speaker). 57static const AudioUnitElement kOutputBus = 0; 58 59// Returns the automatic gain control (AGC) state on the processed microphone 60// signal. Should be on by default for Voice Processing audio units. 61static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) { 62 RTC_DCHECK(audio_unit); 63 UInt32 size = sizeof(*enabled); 64 OSStatus result = AudioUnitGetProperty(audio_unit, 65 kAUVoiceIOProperty_VoiceProcessingEnableAGC, 66 kAudioUnitScope_Global, 67 kInputBus, 68 enabled, 69 &size); 70 RTCLog(@"VPIO unit AGC: %u", static_cast<unsigned int>(*enabled)); 71 return result; 72} 73 74VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(bool bypass_voice_processing, 75 VoiceProcessingAudioUnitObserver* observer) 76 : bypass_voice_processing_(bypass_voice_processing), 77 observer_(observer), 78 vpio_unit_(nullptr), 79 state_(kInitRequired) { 80 RTC_DCHECK(observer); 81} 82 83VoiceProcessingAudioUnit::~VoiceProcessingAudioUnit() { 84 DisposeAudioUnit(); 85} 86 87const UInt32 VoiceProcessingAudioUnit::kBytesPerSample = 2; 88 89bool VoiceProcessingAudioUnit::Init() { 90 RTC_DCHECK_EQ(state_, kInitRequired); 91 92 // Create an audio component description to identify the Voice Processing 93 // I/O audio unit. 94 AudioComponentDescription vpio_unit_description; 95 vpio_unit_description.componentType = kAudioUnitType_Output; 96 vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO; 97 vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple; 98 vpio_unit_description.componentFlags = 0; 99 vpio_unit_description.componentFlagsMask = 0; 100 101 // Obtain an audio unit instance given the description. 102 AudioComponent found_vpio_unit_ref = 103 AudioComponentFindNext(nullptr, &vpio_unit_description); 104 105 // Create a Voice Processing IO audio unit. 106 OSStatus result = noErr; 107 result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_); 108 if (result != noErr) { 109 vpio_unit_ = nullptr; 110 RTCLogError(@"AudioComponentInstanceNew failed. Error=%ld.", (long)result); 111 return false; 112 } 113 114 // Enable input on the input scope of the input element. 115 UInt32 enable_input = 1; 116 result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO, 117 kAudioUnitScope_Input, kInputBus, &enable_input, 118 sizeof(enable_input)); 119 if (result != noErr) { 120 DisposeAudioUnit(); 121 RTCLogError(@"Failed to enable input on input scope of input element. " 122 "Error=%ld.", 123 (long)result); 124 return false; 125 } 126 127 // Enable output on the output scope of the output element. 128 UInt32 enable_output = 1; 129 result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO, 130 kAudioUnitScope_Output, kOutputBus, 131 &enable_output, sizeof(enable_output)); 132 if (result != noErr) { 133 DisposeAudioUnit(); 134 RTCLogError(@"Failed to enable output on output scope of output element. " 135 "Error=%ld.", 136 (long)result); 137 return false; 138 } 139 140 // Specify the callback function that provides audio samples to the audio 141 // unit. 142 AURenderCallbackStruct render_callback; 143 render_callback.inputProc = OnGetPlayoutData; 144 render_callback.inputProcRefCon = this; 145 result = AudioUnitSetProperty( 146 vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input, 147 kOutputBus, &render_callback, sizeof(render_callback)); 148 if (result != noErr) { 149 DisposeAudioUnit(); 150 RTCLogError(@"Failed to specify the render callback on the output bus. " 151 "Error=%ld.", 152 (long)result); 153 return false; 154 } 155 156 // Disable AU buffer allocation for the recorder, we allocate our own. 157 // TODO(henrika): not sure that it actually saves resource to make this call. 158 UInt32 flag = 0; 159 result = AudioUnitSetProperty( 160 vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer, 161 kAudioUnitScope_Output, kInputBus, &flag, sizeof(flag)); 162 if (result != noErr) { 163 DisposeAudioUnit(); 164 RTCLogError(@"Failed to disable buffer allocation on the input bus. " 165 "Error=%ld.", 166 (long)result); 167 return false; 168 } 169 170 // Specify the callback to be called by the I/O thread to us when input audio 171 // is available. The recorded samples can then be obtained by calling the 172 // AudioUnitRender() method. 173 AURenderCallbackStruct input_callback; 174 input_callback.inputProc = OnDeliverRecordedData; 175 input_callback.inputProcRefCon = this; 176 result = AudioUnitSetProperty(vpio_unit_, 177 kAudioOutputUnitProperty_SetInputCallback, 178 kAudioUnitScope_Global, kInputBus, 179 &input_callback, sizeof(input_callback)); 180 if (result != noErr) { 181 DisposeAudioUnit(); 182 RTCLogError(@"Failed to specify the input callback on the input bus. " 183 "Error=%ld.", 184 (long)result); 185 return false; 186 } 187 188 state_ = kUninitialized; 189 return true; 190} 191 192VoiceProcessingAudioUnit::State VoiceProcessingAudioUnit::GetState() const { 193 return state_; 194} 195 196bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) { 197 RTC_DCHECK_GE(state_, kUninitialized); 198 RTCLog(@"Initializing audio unit with sample rate: %f", sample_rate); 199 200 OSStatus result = noErr; 201 AudioStreamBasicDescription format = GetFormat(sample_rate); 202 UInt32 size = sizeof(format); 203#if !defined(NDEBUG) 204 LogStreamDescription(format); 205#endif 206 207 // Set the format on the output scope of the input element/bus. 208 result = 209 AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat, 210 kAudioUnitScope_Output, kInputBus, &format, size); 211 if (result != noErr) { 212 RTCLogError(@"Failed to set format on output scope of input bus. " 213 "Error=%ld.", 214 (long)result); 215 return false; 216 } 217 218 // Set the format on the input scope of the output element/bus. 219 result = 220 AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat, 221 kAudioUnitScope_Input, kOutputBus, &format, size); 222 if (result != noErr) { 223 RTCLogError(@"Failed to set format on input scope of output bus. " 224 "Error=%ld.", 225 (long)result); 226 return false; 227 } 228 229 // Initialize the Voice Processing I/O unit instance. 230 // Calls to AudioUnitInitialize() can fail if called back-to-back on 231 // different ADM instances. The error message in this case is -66635 which is 232 // undocumented. Tests have shown that calling AudioUnitInitialize a second 233 // time, after a short sleep, avoids this issue. 234 // See webrtc:5166 for details. 235 int failed_initalize_attempts = 0; 236 result = AudioUnitInitialize(vpio_unit_); 237 while (result != noErr) { 238 RTCLogError(@"Failed to initialize the Voice Processing I/O unit. " 239 "Error=%ld.", 240 (long)result); 241 ++failed_initalize_attempts; 242 if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) { 243 // Max number of initialization attempts exceeded, hence abort. 244 RTCLogError(@"Too many initialization attempts."); 245 return false; 246 } 247 RTCLog(@"Pause 100ms and try audio unit initialization again..."); 248 [NSThread sleepForTimeInterval:0.1f]; 249 result = AudioUnitInitialize(vpio_unit_); 250 } 251 if (result == noErr) { 252 RTCLog(@"Voice Processing I/O unit is now initialized."); 253 } 254 255 if (bypass_voice_processing_) { 256 // Attempt to disable builtin voice processing. 257 UInt32 toggle = 1; 258 result = AudioUnitSetProperty(vpio_unit_, 259 kAUVoiceIOProperty_BypassVoiceProcessing, 260 kAudioUnitScope_Global, 261 kInputBus, 262 &toggle, 263 sizeof(toggle)); 264 if (result == noErr) { 265 RTCLog(@"Successfully bypassed voice processing."); 266 } else { 267 RTCLogError(@"Failed to bypass voice processing. Error=%ld.", (long)result); 268 } 269 state_ = kInitialized; 270 return true; 271 } 272 273 // AGC should be enabled by default for Voice Processing I/O units but it is 274 // checked below and enabled explicitly if needed. This scheme is used 275 // to be absolutely sure that the AGC is enabled since we have seen cases 276 // where only zeros are recorded and a disabled AGC could be one of the 277 // reasons why it happens. 278 int agc_was_enabled_by_default = 0; 279 UInt32 agc_is_enabled = 0; 280 result = GetAGCState(vpio_unit_, &agc_is_enabled); 281 if (result != noErr) { 282 RTCLogError(@"Failed to get AGC state (1st attempt). " 283 "Error=%ld.", 284 (long)result); 285 // Example of error code: kAudioUnitErr_NoConnection (-10876). 286 // All error codes related to audio units are negative and are therefore 287 // converted into a postive value to match the UMA APIs. 288 RTC_HISTOGRAM_COUNTS_SPARSE_100000( 289 "WebRTC.Audio.GetAGCStateErrorCode1", (-1) * result); 290 } else if (agc_is_enabled) { 291 // Remember that the AGC was enabled by default. Will be used in UMA. 292 agc_was_enabled_by_default = 1; 293 } else { 294 // AGC was initially disabled => try to enable it explicitly. 295 UInt32 enable_agc = 1; 296 result = 297 AudioUnitSetProperty(vpio_unit_, 298 kAUVoiceIOProperty_VoiceProcessingEnableAGC, 299 kAudioUnitScope_Global, kInputBus, &enable_agc, 300 sizeof(enable_agc)); 301 if (result != noErr) { 302 RTCLogError(@"Failed to enable the built-in AGC. " 303 "Error=%ld.", 304 (long)result); 305 RTC_HISTOGRAM_COUNTS_SPARSE_100000( 306 "WebRTC.Audio.SetAGCStateErrorCode", (-1) * result); 307 } 308 result = GetAGCState(vpio_unit_, &agc_is_enabled); 309 if (result != noErr) { 310 RTCLogError(@"Failed to get AGC state (2nd attempt). " 311 "Error=%ld.", 312 (long)result); 313 RTC_HISTOGRAM_COUNTS_SPARSE_100000( 314 "WebRTC.Audio.GetAGCStateErrorCode2", (-1) * result); 315 } 316 } 317 318 // Track if the built-in AGC was enabled by default (as it should) or not. 319 RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCWasEnabledByDefault", 320 agc_was_enabled_by_default); 321 RTCLog(@"WebRTC.Audio.BuiltInAGCWasEnabledByDefault: %d", 322 agc_was_enabled_by_default); 323 // As a final step, add an UMA histogram for tracking the AGC state. 324 // At this stage, the AGC should be enabled, and if it is not, more work is 325 // needed to find out the root cause. 326 RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCIsEnabled", agc_is_enabled); 327 RTCLog(@"WebRTC.Audio.BuiltInAGCIsEnabled: %u", 328 static_cast<unsigned int>(agc_is_enabled)); 329 330 state_ = kInitialized; 331 return true; 332} 333 334OSStatus VoiceProcessingAudioUnit::Start() { 335 RTC_DCHECK_GE(state_, kUninitialized); 336 RTCLog(@"Starting audio unit."); 337 338 OSStatus result = AudioOutputUnitStart(vpio_unit_); 339 if (result != noErr) { 340 RTCLogError(@"Failed to start audio unit. Error=%ld", (long)result); 341 return result; 342 } else { 343 RTCLog(@"Started audio unit"); 344 } 345 state_ = kStarted; 346 return noErr; 347} 348 349bool VoiceProcessingAudioUnit::Stop() { 350 RTC_DCHECK_GE(state_, kUninitialized); 351 RTCLog(@"Stopping audio unit."); 352 353 OSStatus result = AudioOutputUnitStop(vpio_unit_); 354 if (result != noErr) { 355 RTCLogError(@"Failed to stop audio unit. Error=%ld", (long)result); 356 return false; 357 } else { 358 RTCLog(@"Stopped audio unit"); 359 } 360 361 state_ = kInitialized; 362 return true; 363} 364 365bool VoiceProcessingAudioUnit::Uninitialize() { 366 RTC_DCHECK_GE(state_, kUninitialized); 367 RTCLog(@"Unintializing audio unit."); 368 369 OSStatus result = AudioUnitUninitialize(vpio_unit_); 370 if (result != noErr) { 371 RTCLogError(@"Failed to uninitialize audio unit. Error=%ld", (long)result); 372 return false; 373 } else { 374 RTCLog(@"Uninitialized audio unit."); 375 } 376 377 state_ = kUninitialized; 378 return true; 379} 380 381OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags, 382 const AudioTimeStamp* time_stamp, 383 UInt32 output_bus_number, 384 UInt32 num_frames, 385 AudioBufferList* io_data) { 386 RTC_DCHECK(vpio_unit_) << "Init() not called."; 387 388 OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp, 389 output_bus_number, num_frames, io_data); 390 if (result != noErr) { 391 RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result); 392 } 393 return result; 394} 395 396OSStatus VoiceProcessingAudioUnit::OnGetPlayoutData( 397 void* in_ref_con, 398 AudioUnitRenderActionFlags* flags, 399 const AudioTimeStamp* time_stamp, 400 UInt32 bus_number, 401 UInt32 num_frames, 402 AudioBufferList* io_data) { 403 VoiceProcessingAudioUnit* audio_unit = 404 static_cast<VoiceProcessingAudioUnit*>(in_ref_con); 405 return audio_unit->NotifyGetPlayoutData(flags, time_stamp, bus_number, 406 num_frames, io_data); 407} 408 409OSStatus VoiceProcessingAudioUnit::OnDeliverRecordedData( 410 void* in_ref_con, 411 AudioUnitRenderActionFlags* flags, 412 const AudioTimeStamp* time_stamp, 413 UInt32 bus_number, 414 UInt32 num_frames, 415 AudioBufferList* io_data) { 416 VoiceProcessingAudioUnit* audio_unit = 417 static_cast<VoiceProcessingAudioUnit*>(in_ref_con); 418 return audio_unit->NotifyDeliverRecordedData(flags, time_stamp, bus_number, 419 num_frames, io_data); 420} 421 422OSStatus VoiceProcessingAudioUnit::NotifyGetPlayoutData( 423 AudioUnitRenderActionFlags* flags, 424 const AudioTimeStamp* time_stamp, 425 UInt32 bus_number, 426 UInt32 num_frames, 427 AudioBufferList* io_data) { 428 return observer_->OnGetPlayoutData(flags, time_stamp, bus_number, num_frames, 429 io_data); 430} 431 432OSStatus VoiceProcessingAudioUnit::NotifyDeliverRecordedData( 433 AudioUnitRenderActionFlags* flags, 434 const AudioTimeStamp* time_stamp, 435 UInt32 bus_number, 436 UInt32 num_frames, 437 AudioBufferList* io_data) { 438 return observer_->OnDeliverRecordedData(flags, time_stamp, bus_number, 439 num_frames, io_data); 440} 441 442AudioStreamBasicDescription VoiceProcessingAudioUnit::GetFormat( 443 Float64 sample_rate) const { 444 // Set the application formats for input and output: 445 // - use same format in both directions 446 // - avoid resampling in the I/O unit by using the hardware sample rate 447 // - linear PCM => noncompressed audio data format with one frame per packet 448 // - no need to specify interleaving since only mono is supported 449 AudioStreamBasicDescription format; 450 RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels); 451 format.mSampleRate = sample_rate; 452 format.mFormatID = kAudioFormatLinearPCM; 453 format.mFormatFlags = 454 kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; 455 format.mBytesPerPacket = kBytesPerSample; 456 format.mFramesPerPacket = 1; // uncompressed. 457 format.mBytesPerFrame = kBytesPerSample; 458 format.mChannelsPerFrame = kRTCAudioSessionPreferredNumberOfChannels; 459 format.mBitsPerChannel = 8 * kBytesPerSample; 460 return format; 461} 462 463void VoiceProcessingAudioUnit::DisposeAudioUnit() { 464 if (vpio_unit_) { 465 switch (state_) { 466 case kStarted: 467 Stop(); 468 [[fallthrough]]; 469 case kInitialized: 470 Uninitialize(); 471 break; 472 case kUninitialized: 473 case kInitRequired: 474 break; 475 } 476 477 RTCLog(@"Disposing audio unit."); 478 OSStatus result = AudioComponentInstanceDispose(vpio_unit_); 479 if (result != noErr) { 480 RTCLogError(@"AudioComponentInstanceDispose failed. Error=%ld.", 481 (long)result); 482 } 483 vpio_unit_ = nullptr; 484 } 485} 486 487} // namespace ios_adm 488} // namespace webrtc 489