1 /*
2 ** Copyright 2011, The Android Open-Source Project
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 ** http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "echo_reference"
19
20 #include <errno.h>
21 #include <inttypes.h>
22 #include <pthread.h>
23 #include <stdlib.h>
24
25 #include <log/log.h>
26 #include <system/audio.h>
27 #include <audio_utils/resampler.h>
28 #include <audio_utils/echo_reference.h>
29
30 // echo reference state: bit field indicating if read, write or both are active.
31 enum state {
32 ECHOREF_IDLE = 0x00, // idle
33 ECHOREF_READING = 0x01, // reading is active
34 ECHOREF_WRITING = 0x02 // writing is active
35 };
36
37 struct echo_reference {
38 struct echo_reference_itfe itfe;
39 int status; // init status
40 uint32_t state; // active state: reading, writing or both
41 audio_format_t rd_format; // read sample format
42 uint32_t rd_channel_count; // read number of channels
43 uint32_t rd_sampling_rate; // read sampling rate in Hz
44 size_t rd_frame_size; // read frame size (bytes per sample)
45 audio_format_t wr_format; // write sample format
46 uint32_t wr_channel_count; // write number of channels
47 uint32_t wr_sampling_rate; // write sampling rate in Hz
48 size_t wr_frame_size; // write frame size (bytes per sample)
49 void *buffer; // main buffer
50 size_t buf_size; // main buffer size in frames
51 size_t frames_in; // number of frames in main buffer
52 void *wr_buf; // buffer for input conversions
53 size_t wr_buf_size; // size of conversion buffer in frames
54 size_t wr_frames_in; // number of frames in conversion buffer
55 size_t wr_curr_frame_size; // number of frames given to current write() function
56 void *wr_src_buf; // resampler input buf (either wr_buf or buffer used by write())
57 struct timespec wr_render_time; // latest render time indicated by write()
58 // default ALSA gettimeofday() format
59 int32_t playback_delay; // playback buffer delay indicated by last write()
60 int16_t prev_delta_sign; // sign of previous delay difference:
61 // 1: positive, -1: negative, 0: unknown
62 uint16_t delta_count; // number of consecutive delay differences with same sign
63 pthread_mutex_t lock; // mutex protecting read/write concurrency
64 pthread_cond_t cond; // condition signaled when data is ready to read
65 struct resampler_itfe *resampler; // input resampler
66 struct resampler_buffer_provider provider; // resampler buffer provider
67 };
68
69
echo_reference_get_next_buffer(struct resampler_buffer_provider * buffer_provider,struct resampler_buffer * buffer)70 int echo_reference_get_next_buffer(struct resampler_buffer_provider *buffer_provider,
71 struct resampler_buffer* buffer)
72 {
73 struct echo_reference *er;
74
75 if (buffer_provider == NULL) {
76 return -EINVAL;
77 }
78
79 er = (struct echo_reference *)((char *)buffer_provider -
80 offsetof(struct echo_reference, provider));
81
82 if (er->wr_src_buf == NULL || er->wr_frames_in == 0) {
83 buffer->raw = NULL;
84 buffer->frame_count = 0;
85 return -ENODATA;
86 }
87
88 buffer->frame_count = (buffer->frame_count > er->wr_frames_in) ?
89 er->wr_frames_in : buffer->frame_count;
90 // this is er->rd_channel_count here as we resample after stereo to mono conversion if any
91 buffer->i16 = (int16_t *)er->wr_src_buf + (er->wr_curr_frame_size - er->wr_frames_in) *
92 er->rd_channel_count;
93
94 return 0;
95 }
96
echo_reference_release_buffer(struct resampler_buffer_provider * buffer_provider,struct resampler_buffer * buffer)97 void echo_reference_release_buffer(struct resampler_buffer_provider *buffer_provider,
98 struct resampler_buffer* buffer)
99 {
100 struct echo_reference *er;
101
102 if (buffer_provider == NULL) {
103 return;
104 }
105
106 er = (struct echo_reference *)((char *)buffer_provider -
107 offsetof(struct echo_reference, provider));
108
109 er->wr_frames_in -= buffer->frame_count;
110 }
111
echo_reference_reset_l(struct echo_reference * er)112 static void echo_reference_reset_l(struct echo_reference *er)
113 {
114 ALOGV("echo_reference_reset_l()");
115 free(er->buffer);
116 er->buffer = NULL;
117 er->buf_size = 0;
118 er->frames_in = 0;
119 free(er->wr_buf);
120 er->wr_buf = NULL;
121 er->wr_buf_size = 0;
122 er->wr_render_time.tv_sec = 0;
123 er->wr_render_time.tv_nsec = 0;
124 er->delta_count = 0;
125 er->prev_delta_sign = 0;
126 }
127
128 /* additional space in resampler buffer allowing for extra samples to be returned
129 * by speex resampler when sample rates ratio is not an integer.
130 */
131 #define RESAMPLER_HEADROOM_SAMPLES 10
132
echo_reference_write(struct echo_reference_itfe * echo_reference,struct echo_reference_buffer * buffer)133 static int echo_reference_write(struct echo_reference_itfe *echo_reference,
134 struct echo_reference_buffer *buffer)
135 {
136 struct echo_reference *er = (struct echo_reference *)echo_reference;
137 int status = 0;
138
139 if (er == NULL) {
140 return -EINVAL;
141 }
142
143 pthread_mutex_lock(&er->lock);
144
145 if (buffer == NULL) {
146 ALOGV("echo_reference_write() stop write");
147 er->state &= ~ECHOREF_WRITING;
148 echo_reference_reset_l(er);
149 goto exit;
150 }
151
152 ALOGV("echo_reference_write() START trying to write %zu frames", buffer->frame_count);
153 ALOGV("echo_reference_write() playbackTimestamp:[%d].[%d], er->playback_delay:[%" PRId32 "]",
154 (int)buffer->time_stamp.tv_sec,
155 (int)buffer->time_stamp.tv_nsec, er->playback_delay);
156
157 //ALOGV("echo_reference_write() %d frames", buffer->frame_count);
158 // discard writes until a valid time stamp is provided.
159
160 if ((buffer->time_stamp.tv_sec == 0) && (buffer->time_stamp.tv_nsec == 0) &&
161 (er->wr_render_time.tv_sec == 0) && (er->wr_render_time.tv_nsec == 0)) {
162 goto exit;
163 }
164
165 if ((er->state & ECHOREF_WRITING) == 0) {
166 ALOGV("echo_reference_write() start write");
167 if (er->resampler != NULL) {
168 er->resampler->reset(er->resampler);
169 }
170 er->state |= ECHOREF_WRITING;
171 }
172
173 if ((er->state & ECHOREF_READING) == 0) {
174 goto exit;
175 }
176
177 er->wr_render_time.tv_sec = buffer->time_stamp.tv_sec;
178 er->wr_render_time.tv_nsec = buffer->time_stamp.tv_nsec;
179
180 er->playback_delay = buffer->delay_ns;
181
182 // this will be used in the get_next_buffer, to support variable input buffer sizes
183 er->wr_curr_frame_size = buffer->frame_count;
184
185 void *srcBuf;
186 size_t inFrames;
187 // do stereo to mono and down sampling if necessary
188 if (er->rd_channel_count != er->wr_channel_count ||
189 er->rd_sampling_rate != er->wr_sampling_rate) {
190 size_t wrBufSize = buffer->frame_count;
191
192 inFrames = buffer->frame_count;
193
194 if (er->rd_sampling_rate != er->wr_sampling_rate) {
195 inFrames = (buffer->frame_count * er->rd_sampling_rate) / er->wr_sampling_rate +
196 RESAMPLER_HEADROOM_SAMPLES;
197 // wr_buf is not only used as resampler output but also for stereo to mono conversion
198 // output so buffer size is driven by both write and read sample rates
199 if (inFrames > wrBufSize) {
200 wrBufSize = inFrames;
201 }
202 }
203
204 if (er->wr_buf_size < wrBufSize) {
205 ALOGV("echo_reference_write() increasing write buffer size from %zu to %zu",
206 er->wr_buf_size, wrBufSize);
207 er->wr_buf_size = wrBufSize;
208 er->wr_buf = realloc(er->wr_buf, er->wr_buf_size * er->rd_frame_size);
209 }
210
211 if (er->rd_channel_count != er->wr_channel_count) {
212 // must be stereo to mono
213 int16_t *src16 = (int16_t *)buffer->raw;
214 int16_t *dst16 = (int16_t *)er->wr_buf;
215 size_t frames = buffer->frame_count;
216 while (frames--) {
217 *dst16++ = (int16_t)(((int32_t)*src16 + (int32_t)*(src16 + 1)) >> 1);
218 src16 += 2;
219 }
220 }
221 if (er->wr_sampling_rate != er->rd_sampling_rate) {
222 if (er->resampler == NULL) {
223 int rc;
224 ALOGV("echo_reference_write() new ReSampler(%d, %d)",
225 er->wr_sampling_rate, er->rd_sampling_rate);
226 er->provider.get_next_buffer = echo_reference_get_next_buffer;
227 er->provider.release_buffer = echo_reference_release_buffer;
228 rc = create_resampler(er->wr_sampling_rate,
229 er->rd_sampling_rate,
230 er->rd_channel_count,
231 RESAMPLER_QUALITY_DEFAULT,
232 &er->provider,
233 &er->resampler);
234 if (rc != 0) {
235 er->resampler = NULL;
236 ALOGV("echo_reference_write() failure to create resampler %d", rc);
237 status = -ENODEV;
238 goto exit;
239 }
240 }
241 // er->wr_src_buf and er->wr_frames_in are used by getNexBuffer() called by the
242 // resampler to get new frames
243 if (er->rd_channel_count != er->wr_channel_count) {
244 er->wr_src_buf = er->wr_buf;
245 } else {
246 er->wr_src_buf = buffer->raw;
247 }
248 er->wr_frames_in = buffer->frame_count;
249 // inFrames is always more than we need here to get frames remaining from previous runs
250 // inFrames is updated by resample() with the number of frames produced
251 ALOGV("echo_reference_write() ReSampling(%d, %d)",
252 er->wr_sampling_rate, er->rd_sampling_rate);
253 er->resampler->resample_from_provider(er->resampler,
254 (int16_t *)er->wr_buf, &inFrames);
255 ALOGV_IF(er->wr_frames_in != 0,
256 "echo_reference_write() er->wr_frames_in not 0 (%d) after resampler",
257 er->wr_frames_in);
258 }
259 srcBuf = er->wr_buf;
260 } else {
261 inFrames = buffer->frame_count;
262 srcBuf = buffer->raw;
263 }
264
265 if (er->frames_in + inFrames > er->buf_size) {
266 ALOGV("echo_reference_write() increasing buffer size from %zu to %zu",
267 er->buf_size, er->frames_in + inFrames);
268 er->buf_size = er->frames_in + inFrames;
269 er->buffer = realloc(er->buffer, er->buf_size * er->rd_frame_size);
270 }
271 memcpy((char *)er->buffer + er->frames_in * er->rd_frame_size,
272 srcBuf,
273 inFrames * er->rd_frame_size);
274 er->frames_in += inFrames;
275
276 ALOGV("echo_reference_write() frames written:[%zu], frames total:[%zu] buffer size:[%zu]\n"
277 " er->wr_render_time:[%d].[%d], er->playback_delay:[%" PRId32 "]",
278 inFrames, er->frames_in, er->buf_size,
279 (int)er->wr_render_time.tv_sec, (int)er->wr_render_time.tv_nsec, er->playback_delay);
280
281 pthread_cond_signal(&er->cond);
282 exit:
283 pthread_mutex_unlock(&er->lock);
284 ALOGV("echo_reference_write() END");
285 return status;
286 }
287
288 // delay jump threshold to update ref buffer: 6 samples at 8kHz in nsecs
289 #define MIN_DELAY_DELTA_NS (375000*2)
290 // number of consecutive delta with same sign between expected and actual delay before adjusting
291 // the buffer
292 #define MIN_DELTA_NUM 4
293
294
echo_reference_read(struct echo_reference_itfe * echo_reference,struct echo_reference_buffer * buffer)295 static int echo_reference_read(struct echo_reference_itfe *echo_reference,
296 struct echo_reference_buffer *buffer)
297 {
298 struct echo_reference *er = (struct echo_reference *)echo_reference;
299
300 if (er == NULL) {
301 return -EINVAL;
302 }
303
304 pthread_mutex_lock(&er->lock);
305
306 if (buffer == NULL) {
307 ALOGV("echo_reference_read() stop read");
308 er->state &= ~ECHOREF_READING;
309 goto exit;
310 }
311
312 ALOGV("echo_reference_read() START, delayCapture:[%" PRId32 "], "
313 "er->frames_in:[%zu],buffer->frame_count:[%zu]",
314 buffer->delay_ns, er->frames_in, buffer->frame_count);
315
316 if ((er->state & ECHOREF_READING) == 0) {
317 ALOGV("echo_reference_read() start read");
318 echo_reference_reset_l(er);
319 er->state |= ECHOREF_READING;
320 }
321
322 if ((er->state & ECHOREF_WRITING) == 0) {
323 memset(buffer->raw, 0, er->rd_frame_size * buffer->frame_count);
324 buffer->delay_ns = 0;
325 goto exit;
326 }
327
328 // ALOGV("echo_reference_read() %d frames", buffer->frame_count);
329
330 // allow some time for new frames to arrive if not enough frames are ready for read
331 if (er->frames_in < buffer->frame_count) {
332 uint32_t timeoutMs = (uint32_t)((1000 * buffer->frame_count) / er->rd_sampling_rate / 2);
333 struct timespec ts = {0, 0};
334
335 #ifndef HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE
336 clock_gettime(CLOCK_REALTIME, &ts);
337 #endif
338
339 ts.tv_sec += timeoutMs/1000;
340 ts.tv_nsec += (timeoutMs%1000) * 1000000;
341 if (ts.tv_nsec >= 1000000000) {
342 ts.tv_nsec -= 1000000000;
343 ts.tv_sec += 1;
344 }
345
346 #ifdef HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE
347 pthread_cond_timedwait_relative_np(&er->cond, &er->lock, &ts);
348 #else
349 pthread_cond_timedwait(&er->cond, &er->lock, &ts);
350 #endif
351
352 ALOGV_IF((er->frames_in < buffer->frame_count),
353 "echo_reference_read() waited %d ms but still not enough frames"\
354 " er->frames_in: %d, buffer->frame_count = %d",
355 timeoutMs, er->frames_in, buffer->frame_count);
356 }
357
358 int64_t timeDiff;
359 struct timespec tmp;
360
361 if ((er->wr_render_time.tv_sec == 0 && er->wr_render_time.tv_nsec == 0) ||
362 (buffer->time_stamp.tv_sec == 0 && buffer->time_stamp.tv_nsec == 0)) {
363 ALOGV("echo_reference_read(): NEW:timestamp is zero---------setting timeDiff = 0, "\
364 "not updating delay this time");
365 timeDiff = 0;
366 } else {
367 if (buffer->time_stamp.tv_nsec < er->wr_render_time.tv_nsec) {
368 tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec - 1;
369 tmp.tv_nsec = 1000000000 + buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
370 } else {
371 tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec;
372 tmp.tv_nsec = buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
373 }
374 timeDiff = (((int64_t)tmp.tv_sec * 1000000000 + tmp.tv_nsec));
375
376 int64_t expectedDelayNs = er->playback_delay + buffer->delay_ns - timeDiff;
377
378 if (er->resampler != NULL) {
379 // Resampler already compensates part of the delay
380 int32_t rsmp_delay = er->resampler->delay_ns(er->resampler);
381 expectedDelayNs -= rsmp_delay;
382 }
383
384 ALOGV("echo_reference_read(): expectedDelayNs[%" PRId64 "] = "
385 "er->playback_delay[%" PRId32 "] + delayCapture[%" PRId32
386 "] - timeDiff[%" PRId64 "]",
387 expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
388
389 if (expectedDelayNs > 0) {
390 int64_t delayNs = ((int64_t)er->frames_in * 1000000000) / er->rd_sampling_rate;
391
392 int64_t deltaNs = delayNs - expectedDelayNs;
393
394 ALOGV("echo_reference_read(): EchoPathDelayDeviation between reference and DMA [%" PRId64 "]",
395 deltaNs);
396 if (abs(deltaNs) >= MIN_DELAY_DELTA_NS) {
397 // smooth the variation and update the reference buffer only
398 // if a deviation in the same direction is observed for more than MIN_DELTA_NUM
399 // consecutive reads.
400 int16_t delay_sign = (deltaNs >= 0) ? 1 : -1;
401 if (delay_sign == er->prev_delta_sign) {
402 er->delta_count++;
403 } else {
404 er->delta_count = 1;
405 }
406 er->prev_delta_sign = delay_sign;
407
408 if (er->delta_count > MIN_DELTA_NUM) {
409 size_t previousFrameIn = er->frames_in;
410 er->frames_in = (size_t)((expectedDelayNs * er->rd_sampling_rate)/1000000000);
411 int offset = er->frames_in - previousFrameIn;
412
413 ALOGV("echo_reference_read(): deltaNs ENOUGH and %s: "
414 "er->frames_in: %zu, previousFrameIn = %zu",
415 delay_sign ? "positive" : "negative", er->frames_in, previousFrameIn);
416
417 if (deltaNs < 0) {
418 // Less data available in the reference buffer than expected
419 if (er->frames_in > er->buf_size) {
420 er->buf_size = er->frames_in;
421 er->buffer = realloc(er->buffer, er->buf_size * er->rd_frame_size);
422 ALOGV("echo_reference_read(): increasing buffer size to %zu",
423 er->buf_size);
424 }
425
426 if (offset > 0) {
427 memset((char *)er->buffer + previousFrameIn * er->rd_frame_size,
428 0, offset * er->rd_frame_size);
429 ALOGV("echo_reference_read(): pushing ref buffer by [%d]", offset);
430 }
431 } else {
432 // More data available in the reference buffer than expected
433 offset = -offset;
434 if (offset > 0) {
435 memcpy(er->buffer, (char *)er->buffer + (offset * er->rd_frame_size),
436 er->frames_in * er->rd_frame_size);
437 ALOGV("echo_reference_read(): shifting ref buffer by [%zu]",
438 er->frames_in);
439 }
440 }
441 }
442 } else {
443 er->delta_count = 0;
444 er->prev_delta_sign = 0;
445 ALOGV("echo_reference_read(): Constant EchoPathDelay - difference "
446 "between reference and DMA %" PRId64, deltaNs);
447 }
448 } else {
449 ALOGV("echo_reference_read(): NEGATIVE expectedDelayNs[%" PRId64
450 "] = er->playback_delay[%" PRId32 "] + delayCapture[%" PRId32
451 "] - timeDiff[%" PRId64 "]",
452 expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
453 }
454 }
455
456 if (er->frames_in < buffer->frame_count) {
457 if (buffer->frame_count > er->buf_size) {
458 er->buf_size = buffer->frame_count;
459 er->buffer = realloc(er->buffer, er->buf_size * er->rd_frame_size);
460 ALOGV("echo_reference_read(): increasing buffer size to %zu", er->buf_size);
461 }
462 // filling up the reference buffer with 0s to match the expected delay.
463 memset((char *)er->buffer + er->frames_in * er->rd_frame_size,
464 0, (buffer->frame_count - er->frames_in) * er->rd_frame_size);
465 er->frames_in = buffer->frame_count;
466 }
467
468 memcpy(buffer->raw,
469 (char *)er->buffer,
470 buffer->frame_count * er->rd_frame_size);
471
472 er->frames_in -= buffer->frame_count;
473 memcpy(er->buffer,
474 (char *)er->buffer + buffer->frame_count * er->rd_frame_size,
475 er->frames_in * er->rd_frame_size);
476
477 // As the reference buffer is now time aligned to the microphone signal there is a zero delay
478 buffer->delay_ns = 0;
479
480 ALOGV("echo_reference_read() END %zu frames, total frames in %zu",
481 buffer->frame_count, er->frames_in);
482
483 pthread_cond_signal(&er->cond);
484
485 exit:
486 pthread_mutex_unlock(&er->lock);
487 return 0;
488 }
489
490
create_echo_reference(audio_format_t rdFormat,uint32_t rdChannelCount,uint32_t rdSamplingRate,audio_format_t wrFormat,uint32_t wrChannelCount,uint32_t wrSamplingRate,struct echo_reference_itfe ** echo_reference)491 int create_echo_reference(audio_format_t rdFormat,
492 uint32_t rdChannelCount,
493 uint32_t rdSamplingRate,
494 audio_format_t wrFormat,
495 uint32_t wrChannelCount,
496 uint32_t wrSamplingRate,
497 struct echo_reference_itfe **echo_reference)
498 {
499 struct echo_reference *er;
500
501 ALOGV("create_echo_reference()");
502
503 if (echo_reference == NULL) {
504 return -EINVAL;
505 }
506
507 *echo_reference = NULL;
508
509 if (rdFormat != AUDIO_FORMAT_PCM_16_BIT ||
510 rdFormat != wrFormat) {
511 ALOGW("create_echo_reference bad format rd %d, wr %d", rdFormat, wrFormat);
512 return -EINVAL;
513 }
514 if ((rdChannelCount != 1 && rdChannelCount != 2) ||
515 wrChannelCount != 2) {
516 ALOGW("create_echo_reference bad channel count rd %d, wr %d", rdChannelCount,
517 wrChannelCount);
518 return -EINVAL;
519 }
520
521 er = (struct echo_reference *)calloc(1, sizeof(struct echo_reference));
522
523 er->itfe.read = echo_reference_read;
524 er->itfe.write = echo_reference_write;
525
526 er->state = ECHOREF_IDLE;
527 er->rd_format = rdFormat;
528 er->rd_channel_count = rdChannelCount;
529 er->rd_sampling_rate = rdSamplingRate;
530 er->wr_format = wrFormat;
531 er->wr_channel_count = wrChannelCount;
532 er->wr_sampling_rate = wrSamplingRate;
533 er->rd_frame_size = audio_bytes_per_sample(rdFormat) * rdChannelCount;
534 er->wr_frame_size = audio_bytes_per_sample(wrFormat) * wrChannelCount;
535 *echo_reference = &er->itfe;
536 return 0;
537 }
538
release_echo_reference(struct echo_reference_itfe * echo_reference)539 void release_echo_reference(struct echo_reference_itfe *echo_reference) {
540 struct echo_reference *er = (struct echo_reference *)echo_reference;
541
542 if (er == NULL) {
543 return;
544 }
545
546 ALOGV("EchoReference dstor");
547 echo_reference_reset_l(er);
548 if (er->resampler != NULL) {
549 release_resampler(er->resampler);
550 }
551 free(er);
552 }
553
554