1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/micro/examples/micro_speech/audio_provider.h"
17
18 #include <AudioToolbox/AudioToolbox.h>
19
20 #include "tensorflow/lite/micro/examples/micro_speech/simple_features/simple_model_settings.h"
21
22 namespace {
23
24 constexpr int kNumberRecordBuffers = 3;
25 bool g_is_audio_initialized = false;
26 constexpr int kAudioCaptureBufferSize = kAudioSampleFrequency * 0.5;
27 int16_t g_audio_capture_buffer[kAudioCaptureBufferSize];
28 int16_t g_audio_output_buffer[kMaxAudioSampleSize];
29 int32_t g_latest_audio_timestamp = 0;
30
31 // Checks for MacOS errors, prints information and returns a TF Lite version.
32 #define RETURN_IF_OS_ERROR(error, error_reporter) \
33 do { \
34 if (error != noErr) { \
35 error_reporter->Report("Error: %s:%d (%d)\n", __FILE__, __LINE__, \
36 error); \
37 return kTfLiteError; \
38 } \
39 } while (0);
40
41 // Called when an audio input buffer has been filled.
OnAudioBufferFilledCallback(void * user_data,AudioQueueRef queue,AudioQueueBufferRef buffer,const AudioTimeStamp * start_time,UInt32 num_packets,const AudioStreamPacketDescription * packet_description)42 void OnAudioBufferFilledCallback(
43 void* user_data, AudioQueueRef queue, AudioQueueBufferRef buffer,
44 const AudioTimeStamp* start_time, UInt32 num_packets,
45 const AudioStreamPacketDescription* packet_description) {
46 const int sample_size = buffer->mAudioDataByteSize / sizeof(float);
47 const int64_t sample_offset = start_time->mSampleTime;
48 const int32_t time_in_ms =
49 (sample_offset + sample_size) / (kAudioSampleFrequency / 1000);
50 const float* float_samples = static_cast<const float*>(buffer->mAudioData);
51 for (int i = 0; i < sample_size; ++i) {
52 const int capture_index = (sample_offset + i) % kAudioCaptureBufferSize;
53 g_audio_capture_buffer[capture_index] = float_samples[i] * ((1 << 15) - 1);
54 }
55 // This is how we let the outside world know that new audio data has arrived.
56 g_latest_audio_timestamp = time_in_ms;
57 AudioQueueEnqueueBuffer(queue, buffer, 0, nullptr);
58 }
59
60 // Set up everything we need to capture audio samples from the default recording
61 // device on MacOS.
InitAudioRecording(tflite::ErrorReporter * error_reporter)62 TfLiteStatus InitAudioRecording(tflite::ErrorReporter* error_reporter) {
63 // Set up the format of the audio - single channel, 32-bit float at 16KHz.
64 AudioStreamBasicDescription recordFormat = {0};
65 recordFormat.mSampleRate = kAudioSampleFrequency;
66 recordFormat.mFormatID = kAudioFormatLinearPCM;
67 recordFormat.mFormatFlags =
68 kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked;
69 recordFormat.mBitsPerChannel = 8 * sizeof(float);
70 recordFormat.mChannelsPerFrame = 1;
71 recordFormat.mBytesPerFrame = sizeof(float) * recordFormat.mChannelsPerFrame;
72 recordFormat.mFramesPerPacket = 1;
73 recordFormat.mBytesPerPacket =
74 recordFormat.mBytesPerFrame * recordFormat.mFramesPerPacket;
75 recordFormat.mReserved = 0;
76
77 UInt32 propSize = sizeof(recordFormat);
78 RETURN_IF_OS_ERROR(AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0,
79 NULL, &propSize, &recordFormat),
80 error_reporter);
81
82 // Create a recording queue.
83 AudioQueueRef queue;
84 RETURN_IF_OS_ERROR(
85 AudioQueueNewInput(&recordFormat, OnAudioBufferFilledCallback,
86 error_reporter, nullptr, nullptr, 0, &queue),
87 error_reporter);
88
89 // Set up the buffers we'll need.
90 int buffer_bytes = 512 * sizeof(float);
91 for (int i = 0; i < kNumberRecordBuffers; ++i) {
92 AudioQueueBufferRef buffer;
93 RETURN_IF_OS_ERROR(AudioQueueAllocateBuffer(queue, buffer_bytes, &buffer),
94 error_reporter);
95 RETURN_IF_OS_ERROR(AudioQueueEnqueueBuffer(queue, buffer, 0, nullptr),
96 error_reporter);
97 }
98
99 // Start capturing audio.
100 RETURN_IF_OS_ERROR(AudioQueueStart(queue, nullptr), error_reporter);
101
102 return kTfLiteOk;
103 }
104
105 } // namespace
106
GetAudioSamples(tflite::ErrorReporter * error_reporter,int start_ms,int duration_ms,int * audio_samples_size,int16_t ** audio_samples)107 TfLiteStatus GetAudioSamples(tflite::ErrorReporter* error_reporter,
108 int start_ms, int duration_ms,
109 int* audio_samples_size, int16_t** audio_samples) {
110 if (!g_is_audio_initialized) {
111 TfLiteStatus init_status = InitAudioRecording(error_reporter);
112 if (init_status != kTfLiteOk) {
113 return init_status;
114 }
115 for (int i = 0; i < kMaxAudioSampleSize; ++i) {
116 g_audio_output_buffer[i] = 0;
117 }
118 g_is_audio_initialized = true;
119 }
120 // This should only be called when the main thread notices that the latest
121 // audio sample data timestamp has changed, so that there's new data in the
122 // capture ring buffer. The ring buffer will eventually wrap around and
123 // overwrite the data, but the assumption is that the main thread is checking
124 // often enough and the buffer is large enough that this call will be made
125 // before that happens.
126 const int start_offset = start_ms * (kAudioSampleFrequency / 1000);
127 const int duration_sample_count =
128 duration_ms * (kAudioSampleFrequency / 1000);
129 for (int i = 0; i < duration_sample_count; ++i) {
130 const int capture_index = (start_offset + i) % kAudioCaptureBufferSize;
131 g_audio_output_buffer[i] = g_audio_capture_buffer[capture_index];
132 }
133
134 *audio_samples_size = kMaxAudioSampleSize;
135 *audio_samples = g_audio_output_buffer;
136 return kTfLiteOk;
137 }
138
LatestAudioTimestamp()139 int32_t LatestAudioTimestamp() { return g_latest_audio_timestamp; }
140