• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#import "voice_processing_audio_unit.h"
12
13#include "rtc_base/checks.h"
14#include "system_wrappers/include/metrics.h"
15
16#import "base/RTCLogging.h"
17#import "sdk/objc/components/audio/RTCAudioSessionConfiguration.h"
18
19#if !defined(NDEBUG)
20static void LogStreamDescription(AudioStreamBasicDescription description) {
21  char formatIdString[5];
22  UInt32 formatId = CFSwapInt32HostToBig(description.mFormatID);
23  bcopy(&formatId, formatIdString, 4);
24  formatIdString[4] = '\0';
25  RTCLog(@"AudioStreamBasicDescription: {\n"
26          "  mSampleRate: %.2f\n"
27          "  formatIDString: %s\n"
28          "  mFormatFlags: 0x%X\n"
29          "  mBytesPerPacket: %u\n"
30          "  mFramesPerPacket: %u\n"
31          "  mBytesPerFrame: %u\n"
32          "  mChannelsPerFrame: %u\n"
33          "  mBitsPerChannel: %u\n"
34          "  mReserved: %u\n}",
35         description.mSampleRate, formatIdString,
36         static_cast<unsigned int>(description.mFormatFlags),
37         static_cast<unsigned int>(description.mBytesPerPacket),
38         static_cast<unsigned int>(description.mFramesPerPacket),
39         static_cast<unsigned int>(description.mBytesPerFrame),
40         static_cast<unsigned int>(description.mChannelsPerFrame),
41         static_cast<unsigned int>(description.mBitsPerChannel),
42         static_cast<unsigned int>(description.mReserved));
43}
44#endif
45
46namespace webrtc {
47namespace ios_adm {
48
49// Calls to AudioUnitInitialize() can fail if called back-to-back on different
50// ADM instances. A fall-back solution is to allow multiple sequential calls
51// with as small delay between each. This factor sets the max number of allowed
52// initialization attempts.
53static const int kMaxNumberOfAudioUnitInitializeAttempts = 5;
54// A VP I/O unit's bus 1 connects to input hardware (microphone).
55static const AudioUnitElement kInputBus = 1;
56// A VP I/O unit's bus 0 connects to output hardware (speaker).
57static const AudioUnitElement kOutputBus = 0;
58
59// Returns the automatic gain control (AGC) state on the processed microphone
60// signal. Should be on by default for Voice Processing audio units.
61static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) {
62  RTC_DCHECK(audio_unit);
63  UInt32 size = sizeof(*enabled);
64  OSStatus result = AudioUnitGetProperty(audio_unit,
65                                         kAUVoiceIOProperty_VoiceProcessingEnableAGC,
66                                         kAudioUnitScope_Global,
67                                         kInputBus,
68                                         enabled,
69                                         &size);
70  RTCLog(@"VPIO unit AGC: %u", static_cast<unsigned int>(*enabled));
71  return result;
72}
73
74VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(bool bypass_voice_processing,
75                                                   VoiceProcessingAudioUnitObserver* observer)
76    : bypass_voice_processing_(bypass_voice_processing),
77      observer_(observer),
78      vpio_unit_(nullptr),
79      state_(kInitRequired) {
80  RTC_DCHECK(observer);
81}
82
83VoiceProcessingAudioUnit::~VoiceProcessingAudioUnit() {
84  DisposeAudioUnit();
85}
86
87const UInt32 VoiceProcessingAudioUnit::kBytesPerSample = 2;
88
89bool VoiceProcessingAudioUnit::Init() {
90  RTC_DCHECK_EQ(state_, kInitRequired);
91
92  // Create an audio component description to identify the Voice Processing
93  // I/O audio unit.
94  AudioComponentDescription vpio_unit_description;
95  vpio_unit_description.componentType = kAudioUnitType_Output;
96  vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO;
97  vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple;
98  vpio_unit_description.componentFlags = 0;
99  vpio_unit_description.componentFlagsMask = 0;
100
101  // Obtain an audio unit instance given the description.
102  AudioComponent found_vpio_unit_ref =
103      AudioComponentFindNext(nullptr, &vpio_unit_description);
104
105  // Create a Voice Processing IO audio unit.
106  OSStatus result = noErr;
107  result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_);
108  if (result != noErr) {
109    vpio_unit_ = nullptr;
110    RTCLogError(@"AudioComponentInstanceNew failed. Error=%ld.", (long)result);
111    return false;
112  }
113
114  // Enable input on the input scope of the input element.
115  UInt32 enable_input = 1;
116  result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
117                                kAudioUnitScope_Input, kInputBus, &enable_input,
118                                sizeof(enable_input));
119  if (result != noErr) {
120    DisposeAudioUnit();
121    RTCLogError(@"Failed to enable input on input scope of input element. "
122                 "Error=%ld.",
123                (long)result);
124    return false;
125  }
126
127  // Enable output on the output scope of the output element.
128  UInt32 enable_output = 1;
129  result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
130                                kAudioUnitScope_Output, kOutputBus,
131                                &enable_output, sizeof(enable_output));
132  if (result != noErr) {
133    DisposeAudioUnit();
134    RTCLogError(@"Failed to enable output on output scope of output element. "
135                 "Error=%ld.",
136                (long)result);
137    return false;
138  }
139
140  // Specify the callback function that provides audio samples to the audio
141  // unit.
142  AURenderCallbackStruct render_callback;
143  render_callback.inputProc = OnGetPlayoutData;
144  render_callback.inputProcRefCon = this;
145  result = AudioUnitSetProperty(
146      vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input,
147      kOutputBus, &render_callback, sizeof(render_callback));
148  if (result != noErr) {
149    DisposeAudioUnit();
150    RTCLogError(@"Failed to specify the render callback on the output bus. "
151                 "Error=%ld.",
152                (long)result);
153    return false;
154  }
155
156  // Disable AU buffer allocation for the recorder, we allocate our own.
157  // TODO(henrika): not sure that it actually saves resource to make this call.
158  UInt32 flag = 0;
159  result = AudioUnitSetProperty(
160      vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer,
161      kAudioUnitScope_Output, kInputBus, &flag, sizeof(flag));
162  if (result != noErr) {
163    DisposeAudioUnit();
164    RTCLogError(@"Failed to disable buffer allocation on the input bus. "
165                 "Error=%ld.",
166                (long)result);
167    return false;
168  }
169
170  // Specify the callback to be called by the I/O thread to us when input audio
171  // is available. The recorded samples can then be obtained by calling the
172  // AudioUnitRender() method.
173  AURenderCallbackStruct input_callback;
174  input_callback.inputProc = OnDeliverRecordedData;
175  input_callback.inputProcRefCon = this;
176  result = AudioUnitSetProperty(vpio_unit_,
177                                kAudioOutputUnitProperty_SetInputCallback,
178                                kAudioUnitScope_Global, kInputBus,
179                                &input_callback, sizeof(input_callback));
180  if (result != noErr) {
181    DisposeAudioUnit();
182    RTCLogError(@"Failed to specify the input callback on the input bus. "
183                 "Error=%ld.",
184                (long)result);
185    return false;
186  }
187
188  state_ = kUninitialized;
189  return true;
190}
191
192VoiceProcessingAudioUnit::State VoiceProcessingAudioUnit::GetState() const {
193  return state_;
194}
195
196bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) {
197  RTC_DCHECK_GE(state_, kUninitialized);
198  RTCLog(@"Initializing audio unit with sample rate: %f", sample_rate);
199
200  OSStatus result = noErr;
201  AudioStreamBasicDescription format = GetFormat(sample_rate);
202  UInt32 size = sizeof(format);
203#if !defined(NDEBUG)
204  LogStreamDescription(format);
205#endif
206
207  // Set the format on the output scope of the input element/bus.
208  result =
209      AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
210                           kAudioUnitScope_Output, kInputBus, &format, size);
211  if (result != noErr) {
212    RTCLogError(@"Failed to set format on output scope of input bus. "
213                 "Error=%ld.",
214                (long)result);
215    return false;
216  }
217
218  // Set the format on the input scope of the output element/bus.
219  result =
220      AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
221                           kAudioUnitScope_Input, kOutputBus, &format, size);
222  if (result != noErr) {
223    RTCLogError(@"Failed to set format on input scope of output bus. "
224                 "Error=%ld.",
225                (long)result);
226    return false;
227  }
228
229  // Initialize the Voice Processing I/O unit instance.
230  // Calls to AudioUnitInitialize() can fail if called back-to-back on
231  // different ADM instances. The error message in this case is -66635 which is
232  // undocumented. Tests have shown that calling AudioUnitInitialize a second
233  // time, after a short sleep, avoids this issue.
234  // See webrtc:5166 for details.
235  int failed_initalize_attempts = 0;
236  result = AudioUnitInitialize(vpio_unit_);
237  while (result != noErr) {
238    RTCLogError(@"Failed to initialize the Voice Processing I/O unit. "
239                 "Error=%ld.",
240                (long)result);
241    ++failed_initalize_attempts;
242    if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) {
243      // Max number of initialization attempts exceeded, hence abort.
244      RTCLogError(@"Too many initialization attempts.");
245      return false;
246    }
247    RTCLog(@"Pause 100ms and try audio unit initialization again...");
248    [NSThread sleepForTimeInterval:0.1f];
249    result = AudioUnitInitialize(vpio_unit_);
250  }
251  if (result == noErr) {
252    RTCLog(@"Voice Processing I/O unit is now initialized.");
253  }
254
255  if (bypass_voice_processing_) {
256    // Attempt to disable builtin voice processing.
257    UInt32 toggle = 1;
258    result = AudioUnitSetProperty(vpio_unit_,
259                                  kAUVoiceIOProperty_BypassVoiceProcessing,
260                                  kAudioUnitScope_Global,
261                                  kInputBus,
262                                  &toggle,
263                                  sizeof(toggle));
264    if (result == noErr) {
265      RTCLog(@"Successfully bypassed voice processing.");
266    } else {
267      RTCLogError(@"Failed to bypass voice processing. Error=%ld.", (long)result);
268    }
269    state_ = kInitialized;
270    return true;
271  }
272
273  // AGC should be enabled by default for Voice Processing I/O units but it is
274  // checked below and enabled explicitly if needed. This scheme is used
275  // to be absolutely sure that the AGC is enabled since we have seen cases
276  // where only zeros are recorded and a disabled AGC could be one of the
277  // reasons why it happens.
278  int agc_was_enabled_by_default = 0;
279  UInt32 agc_is_enabled = 0;
280  result = GetAGCState(vpio_unit_, &agc_is_enabled);
281  if (result != noErr) {
282    RTCLogError(@"Failed to get AGC state (1st attempt). "
283                 "Error=%ld.",
284                (long)result);
285    // Example of error code: kAudioUnitErr_NoConnection (-10876).
286    // All error codes related to audio units are negative and are therefore
287    // converted into a postive value to match the UMA APIs.
288    RTC_HISTOGRAM_COUNTS_SPARSE_100000(
289        "WebRTC.Audio.GetAGCStateErrorCode1", (-1) * result);
290  } else if (agc_is_enabled) {
291    // Remember that the AGC was enabled by default. Will be used in UMA.
292    agc_was_enabled_by_default = 1;
293  } else {
294    // AGC was initially disabled => try to enable it explicitly.
295    UInt32 enable_agc = 1;
296    result =
297        AudioUnitSetProperty(vpio_unit_,
298                             kAUVoiceIOProperty_VoiceProcessingEnableAGC,
299                             kAudioUnitScope_Global, kInputBus, &enable_agc,
300                             sizeof(enable_agc));
301    if (result != noErr) {
302      RTCLogError(@"Failed to enable the built-in AGC. "
303                   "Error=%ld.",
304                  (long)result);
305      RTC_HISTOGRAM_COUNTS_SPARSE_100000(
306          "WebRTC.Audio.SetAGCStateErrorCode", (-1) * result);
307    }
308    result = GetAGCState(vpio_unit_, &agc_is_enabled);
309    if (result != noErr) {
310      RTCLogError(@"Failed to get AGC state (2nd attempt). "
311                   "Error=%ld.",
312                  (long)result);
313      RTC_HISTOGRAM_COUNTS_SPARSE_100000(
314          "WebRTC.Audio.GetAGCStateErrorCode2", (-1) * result);
315    }
316  }
317
318  // Track if the built-in AGC was enabled by default (as it should) or not.
319  RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCWasEnabledByDefault",
320                        agc_was_enabled_by_default);
321  RTCLog(@"WebRTC.Audio.BuiltInAGCWasEnabledByDefault: %d",
322         agc_was_enabled_by_default);
323  // As a final step, add an UMA histogram for tracking the AGC state.
324  // At this stage, the AGC should be enabled, and if it is not, more work is
325  // needed to find out the root cause.
326  RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCIsEnabled", agc_is_enabled);
327  RTCLog(@"WebRTC.Audio.BuiltInAGCIsEnabled: %u",
328         static_cast<unsigned int>(agc_is_enabled));
329
330  state_ = kInitialized;
331  return true;
332}
333
334OSStatus VoiceProcessingAudioUnit::Start() {
335  RTC_DCHECK_GE(state_, kUninitialized);
336  RTCLog(@"Starting audio unit.");
337
338  OSStatus result = AudioOutputUnitStart(vpio_unit_);
339  if (result != noErr) {
340    RTCLogError(@"Failed to start audio unit. Error=%ld", (long)result);
341    return result;
342  } else {
343    RTCLog(@"Started audio unit");
344  }
345  state_ = kStarted;
346  return noErr;
347}
348
349bool VoiceProcessingAudioUnit::Stop() {
350  RTC_DCHECK_GE(state_, kUninitialized);
351  RTCLog(@"Stopping audio unit.");
352
353  OSStatus result = AudioOutputUnitStop(vpio_unit_);
354  if (result != noErr) {
355    RTCLogError(@"Failed to stop audio unit. Error=%ld", (long)result);
356    return false;
357  } else {
358    RTCLog(@"Stopped audio unit");
359  }
360
361  state_ = kInitialized;
362  return true;
363}
364
365bool VoiceProcessingAudioUnit::Uninitialize() {
366  RTC_DCHECK_GE(state_, kUninitialized);
367  RTCLog(@"Unintializing audio unit.");
368
369  OSStatus result = AudioUnitUninitialize(vpio_unit_);
370  if (result != noErr) {
371    RTCLogError(@"Failed to uninitialize audio unit. Error=%ld", (long)result);
372    return false;
373  } else {
374    RTCLog(@"Uninitialized audio unit.");
375  }
376
377  state_ = kUninitialized;
378  return true;
379}
380
381OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags,
382                                          const AudioTimeStamp* time_stamp,
383                                          UInt32 output_bus_number,
384                                          UInt32 num_frames,
385                                          AudioBufferList* io_data) {
386  RTC_DCHECK(vpio_unit_) << "Init() not called.";
387
388  OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp,
389                                    output_bus_number, num_frames, io_data);
390  if (result != noErr) {
391    RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result);
392  }
393  return result;
394}
395
396OSStatus VoiceProcessingAudioUnit::OnGetPlayoutData(
397    void* in_ref_con,
398    AudioUnitRenderActionFlags* flags,
399    const AudioTimeStamp* time_stamp,
400    UInt32 bus_number,
401    UInt32 num_frames,
402    AudioBufferList* io_data) {
403  VoiceProcessingAudioUnit* audio_unit =
404      static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
405  return audio_unit->NotifyGetPlayoutData(flags, time_stamp, bus_number,
406                                          num_frames, io_data);
407}
408
409OSStatus VoiceProcessingAudioUnit::OnDeliverRecordedData(
410    void* in_ref_con,
411    AudioUnitRenderActionFlags* flags,
412    const AudioTimeStamp* time_stamp,
413    UInt32 bus_number,
414    UInt32 num_frames,
415    AudioBufferList* io_data) {
416  VoiceProcessingAudioUnit* audio_unit =
417      static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
418  return audio_unit->NotifyDeliverRecordedData(flags, time_stamp, bus_number,
419                                               num_frames, io_data);
420}
421
422OSStatus VoiceProcessingAudioUnit::NotifyGetPlayoutData(
423    AudioUnitRenderActionFlags* flags,
424    const AudioTimeStamp* time_stamp,
425    UInt32 bus_number,
426    UInt32 num_frames,
427    AudioBufferList* io_data) {
428  return observer_->OnGetPlayoutData(flags, time_stamp, bus_number, num_frames,
429                                     io_data);
430}
431
432OSStatus VoiceProcessingAudioUnit::NotifyDeliverRecordedData(
433    AudioUnitRenderActionFlags* flags,
434    const AudioTimeStamp* time_stamp,
435    UInt32 bus_number,
436    UInt32 num_frames,
437    AudioBufferList* io_data) {
438  return observer_->OnDeliverRecordedData(flags, time_stamp, bus_number,
439                                          num_frames, io_data);
440}
441
442AudioStreamBasicDescription VoiceProcessingAudioUnit::GetFormat(
443    Float64 sample_rate) const {
444  // Set the application formats for input and output:
445  // - use same format in both directions
446  // - avoid resampling in the I/O unit by using the hardware sample rate
447  // - linear PCM => noncompressed audio data format with one frame per packet
448  // - no need to specify interleaving since only mono is supported
449  AudioStreamBasicDescription format;
450  RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels);
451  format.mSampleRate = sample_rate;
452  format.mFormatID = kAudioFormatLinearPCM;
453  format.mFormatFlags =
454      kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
455  format.mBytesPerPacket = kBytesPerSample;
456  format.mFramesPerPacket = 1;  // uncompressed.
457  format.mBytesPerFrame = kBytesPerSample;
458  format.mChannelsPerFrame = kRTCAudioSessionPreferredNumberOfChannels;
459  format.mBitsPerChannel = 8 * kBytesPerSample;
460  return format;
461}
462
463void VoiceProcessingAudioUnit::DisposeAudioUnit() {
464  if (vpio_unit_) {
465    switch (state_) {
466      case kStarted:
467        Stop();
468        [[fallthrough]];
469      case kInitialized:
470        Uninitialize();
471        break;
472      case kUninitialized:
473      case kInitRequired:
474        break;
475    }
476
477    RTCLog(@"Disposing audio unit.");
478    OSStatus result = AudioComponentInstanceDispose(vpio_unit_);
479    if (result != noErr) {
480      RTCLogError(@"AudioComponentInstanceDispose failed. Error=%ld.",
481                  (long)result);
482    }
483    vpio_unit_ = nullptr;
484  }
485}
486
487}  // namespace ios_adm
488}  // namespace webrtc
489