1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /**
18 * Tools for measuring latency and for detecting glitches.
19 * These classes are pure math and can be used with any audio system.
20 */
21
22 #ifndef ANALYZER_LATENCY_ANALYZER_H
23 #define ANALYZER_LATENCY_ANALYZER_H
24
25 #include <algorithm>
26 #include <assert.h>
27 #include <cctype>
28 #include <iomanip>
29 #include <iostream>
30 #include <math.h>
31 #include <memory>
32 #include <sstream>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <unistd.h>
36 #include <vector>
37
38 #include "PeakDetector.h"
39 #include "PseudoRandom.h"
40 #include "RandomPulseGenerator.h"
41
42 // This is used when the code is in Oboe.
43 #ifndef ALOGD
44 #define ALOGD LOGD
45 #define ALOGE LOGE
46 #define ALOGW LOGW
47 #endif
48
49 #define LOOPBACK_RESULT_TAG "RESULT: "
50
51 static constexpr int32_t kDefaultSampleRate = 48000;
52 static constexpr int32_t kMillisPerSecond = 1000; // by definition
53 static constexpr int32_t kMaxLatencyMillis = 1000; // arbitrary and generous
54 static constexpr double kMinimumConfidence = 0.2;
55
56 struct LatencyReport {
57 int32_t latencyInFrames = 0.0;
58 double confidence = 0.0;
59
resetLatencyReport60 void reset() {
61 latencyInFrames = 0;
62 confidence = 0.0;
63 }
64 };
65
66 // Calculate a normalized cross correlation.
calculateNormalizedCorrelation(const float * a,const float * b,int windowSize)67 static double calculateNormalizedCorrelation(const float *a,
68 const float *b,
69 int windowSize) {
70 double correlation = 0.0;
71 double sumProducts = 0.0;
72 double sumSquares = 0.0;
73
74 // Correlate a against b.
75 for (int i = 0; i < windowSize; i++) {
76 float s1 = a[i];
77 float s2 = b[i];
78 // Use a normalized cross-correlation.
79 sumProducts += s1 * s2;
80 sumSquares += ((s1 * s1) + (s2 * s2));
81 }
82
83 if (sumSquares >= 1.0e-9) {
84 correlation = 2.0 * sumProducts / sumSquares;
85 }
86 return correlation;
87 }
88
calculateRootMeanSquare(float * data,int32_t numSamples)89 static double calculateRootMeanSquare(float *data, int32_t numSamples) {
90 double sum = 0.0;
91 for (int32_t i = 0; i < numSamples; i++) {
92 float sample = data[i];
93 sum += sample * sample;
94 }
95 return sqrt(sum / numSamples);
96 }
97
98 /**
99 * Monophonic recording with processing.
100 */
101 class AudioRecording
102 {
103 public:
104
allocate(int maxFrames)105 void allocate(int maxFrames) {
106 mData = std::make_unique<float[]>(maxFrames);
107 mMaxFrames = maxFrames;
108 }
109
110 // Write SHORT data from the first channel.
write(int16_t * inputData,int32_t inputChannelCount,int32_t numFrames)111 int32_t write(int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) {
112 // stop at end of buffer
113 if ((mFrameCounter + numFrames) > mMaxFrames) {
114 numFrames = mMaxFrames - mFrameCounter;
115 }
116 for (int i = 0; i < numFrames; i++) {
117 mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768);
118 }
119 return numFrames;
120 }
121
122 // Write FLOAT data from the first channel.
write(float * inputData,int32_t inputChannelCount,int32_t numFrames)123 int32_t write(float *inputData, int32_t inputChannelCount, int32_t numFrames) {
124 // stop at end of buffer
125 if ((mFrameCounter + numFrames) > mMaxFrames) {
126 numFrames = mMaxFrames - mFrameCounter;
127 }
128 for (int i = 0; i < numFrames; i++) {
129 mData[mFrameCounter++] = inputData[i * inputChannelCount];
130 }
131 return numFrames;
132 }
133
134 // Write FLOAT data from the first channel.
write(float sample)135 int32_t write(float sample) {
136 // stop at end of buffer
137 if (mFrameCounter < mMaxFrames) {
138 mData[mFrameCounter++] = sample;
139 return 1;
140 }
141 return 0;
142 }
143
clear()144 void clear() {
145 mFrameCounter = 0;
146 }
size()147 int32_t size() const {
148 return mFrameCounter;
149 }
150
isFull()151 bool isFull() const {
152 return mFrameCounter >= mMaxFrames;
153 }
154
getData()155 float *getData() const {
156 return mData.get();
157 }
158
setSampleRate(int32_t sampleRate)159 void setSampleRate(int32_t sampleRate) {
160 mSampleRate = sampleRate;
161 }
162
getSampleRate()163 int32_t getSampleRate() const {
164 return mSampleRate;
165 }
166
167 /**
168 * Square the samples so they are all positive and so the peaks are emphasized.
169 */
square()170 void square() {
171 float *x = mData.get();
172 for (int i = 0; i < mFrameCounter; i++) {
173 x[i] *= x[i];
174 }
175 }
176
177 /**
178 * Amplify a signal so that the peak matches the specified target.
179 *
180 * @param target final max value
181 * @return gain applied to signal
182 */
normalize(float target)183 float normalize(float target) {
184 float maxValue = 1.0e-9f;
185 for (int i = 0; i < mFrameCounter; i++) {
186 maxValue = std::max(maxValue, abs(mData[i]));
187 }
188 float gain = target / maxValue;
189 for (int i = 0; i < mFrameCounter; i++) {
190 mData[i] *= gain;
191 }
192 return gain;
193 }
194
195 private:
196 std::unique_ptr<float[]> mData;
197 int32_t mFrameCounter = 0;
198 int32_t mMaxFrames = 0;
199 int32_t mSampleRate = kDefaultSampleRate; // common default
200 };
201
measureLatencyFromPulse(AudioRecording & recorded,AudioRecording & pulse,LatencyReport * report)202 static int measureLatencyFromPulse(AudioRecording &recorded,
203 AudioRecording &pulse,
204 LatencyReport *report) {
205
206 report->latencyInFrames = 0;
207 report->confidence = 0.0;
208
209 int numCorrelations = recorded.size() - pulse.size();
210 if (numCorrelations < 10) {
211 ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size());
212 return -1;
213 }
214 std::unique_ptr<float[]> correlations= std::make_unique<float[]>(numCorrelations);
215
216 // Correlate pulse against the recorded data.
217 for (int i = 0; i < numCorrelations; i++) {
218 float correlation = (float) calculateNormalizedCorrelation(&recorded.getData()[i],
219 &pulse.getData()[0],
220 pulse.size());
221 correlations[i] = correlation;
222 }
223
224 // Find highest peak in correlation array.
225 float peakCorrelation = 0.0;
226 int peakIndex = -1;
227 for (int i = 0; i < numCorrelations; i++) {
228 float value = abs(correlations[i]);
229 if (value > peakCorrelation) {
230 peakCorrelation = value;
231 peakIndex = i;
232 }
233 }
234 if (peakIndex < 0) {
235 ALOGE("%s() no signal for correlation\n", __func__);
236 return -2;
237 }
238 #if 0
239 // Dump correlation data for charting.
240 else {
241 const int margin = 50;
242 int startIndex = std::max(0, peakIndex - margin);
243 int endIndex = std::min(numCorrelations - 1, peakIndex + margin);
244 for (int index = startIndex; index < endIndex; index++) {
245 ALOGD("Correlation, %d, %f", index, correlations[index]);
246 }
247 }
248 #endif
249
250 report->latencyInFrames = peakIndex;
251 report->confidence = peakCorrelation;
252
253 return 0;
254 }
255
256 // ====================================================================================
257 class LoopbackProcessor {
258 public:
259 virtual ~LoopbackProcessor() = default;
260
261 enum result_code {
262 RESULT_OK = 0,
263 ERROR_NOISY = -99,
264 ERROR_VOLUME_TOO_LOW,
265 ERROR_VOLUME_TOO_HIGH,
266 ERROR_CONFIDENCE,
267 ERROR_INVALID_STATE,
268 ERROR_GLITCHES,
269 ERROR_NO_LOCK
270 };
271
prepareToTest()272 virtual void prepareToTest() {
273 reset();
274 }
275
reset()276 virtual void reset() {
277 mResult = 0;
278 mResetCount++;
279 }
280
281 virtual result_code processInputFrame(float *frameData, int channelCount) = 0;
282 virtual result_code processOutputFrame(float *frameData, int channelCount) = 0;
283
process(float * inputData,int inputChannelCount,int numInputFrames,float * outputData,int outputChannelCount,int numOutputFrames)284 void process(float *inputData, int inputChannelCount, int numInputFrames,
285 float *outputData, int outputChannelCount, int numOutputFrames) {
286 int numBoth = std::min(numInputFrames, numOutputFrames);
287 // Process one frame at a time.
288 for (int i = 0; i < numBoth; i++) {
289 processInputFrame(inputData, inputChannelCount);
290 inputData += inputChannelCount;
291 processOutputFrame(outputData, outputChannelCount);
292 outputData += outputChannelCount;
293 }
294 // If there is more input than output.
295 for (int i = numBoth; i < numInputFrames; i++) {
296 processInputFrame(inputData, inputChannelCount);
297 inputData += inputChannelCount;
298 }
299 // If there is more output than input.
300 for (int i = numBoth; i < numOutputFrames; i++) {
301 processOutputFrame(outputData, outputChannelCount);
302 outputData += outputChannelCount;
303 }
304 }
305
306 virtual std::string analyze() = 0;
307
printStatus()308 virtual void printStatus() {};
309
getResult()310 int32_t getResult() {
311 return mResult;
312 }
313
setResult(int32_t result)314 void setResult(int32_t result) {
315 mResult = result;
316 }
317
isDone()318 virtual bool isDone() {
319 return false;
320 }
321
save(const char * fileName)322 virtual int save(const char *fileName) {
323 (void) fileName;
324 return -1;
325 }
326
load(const char * fileName)327 virtual int load(const char *fileName) {
328 (void) fileName;
329 return -1;
330 }
331
setSampleRate(int32_t sampleRate)332 virtual void setSampleRate(int32_t sampleRate) {
333 mSampleRate = sampleRate;
334 }
335
getSampleRate()336 int32_t getSampleRate() const {
337 return mSampleRate;
338 }
339
getResetCount()340 int32_t getResetCount() const {
341 return mResetCount;
342 }
343
344 /** Called when not enough input frames could be read after synchronization.
345 */
onInsufficientRead()346 virtual void onInsufficientRead() {
347 reset();
348 }
349
350 protected:
351 int32_t mResetCount = 0;
352
353 private:
354 int32_t mSampleRate = kDefaultSampleRate;
355 int32_t mResult = 0;
356 };
357
358 class LatencyAnalyzer : public LoopbackProcessor {
359 public:
360
LatencyAnalyzer()361 LatencyAnalyzer() : LoopbackProcessor() {}
362 virtual ~LatencyAnalyzer() = default;
363
364 virtual int32_t getProgress() const = 0;
365
366 virtual int getState() = 0;
367
368 // @return latency in frames
369 virtual int32_t getMeasuredLatency() = 0;
370
371 virtual double getMeasuredConfidence() = 0;
372
373 virtual double getBackgroundRMS() = 0;
374
375 virtual double getSignalRMS() = 0;
376
377 };
378
379 // ====================================================================================
380 /**
381 * Measure latency given a loopback stream data.
382 * Use an encoded bit train as the sound source because it
383 * has an unambiguous correlation value.
384 * Uses a state machine to cycle through various stages.
385 *
386 */
387 class PulseLatencyAnalyzer : public LatencyAnalyzer {
388 public:
389
PulseLatencyAnalyzer()390 PulseLatencyAnalyzer() : LatencyAnalyzer() {
391 int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond;
392 int32_t numPulseBits = getSampleRate() * kPulseLengthMillis
393 / (kFramesPerEncodedBit * kMillisPerSecond);
394 int32_t pulseLength = numPulseBits * kFramesPerEncodedBit;
395 mFramesToRecord = pulseLength + maxLatencyFrames;
396 mAudioRecording.allocate(mFramesToRecord);
397 mAudioRecording.setSampleRate(getSampleRate());
398 generateRandomPulse(pulseLength);
399 }
400
generateRandomPulse(int32_t pulseLength)401 void generateRandomPulse(int32_t pulseLength) {
402 mPulse.allocate(pulseLength);
403 RandomPulseGenerator pulser(kFramesPerEncodedBit);
404 for (int i = 0; i < pulseLength; i++) {
405 mPulse.write(pulser.nextFloat());
406 }
407 }
408
getState()409 int getState() override {
410 return mState;
411 }
412
setSampleRate(int32_t sampleRate)413 void setSampleRate(int32_t sampleRate) override {
414 LoopbackProcessor::setSampleRate(sampleRate);
415 mAudioRecording.setSampleRate(sampleRate);
416 }
417
reset()418 void reset() override {
419 LoopbackProcessor::reset();
420 mState = STATE_MEASURE_BACKGROUND;
421 mDownCounter = (int32_t) (getSampleRate() * kBackgroundMeasurementLengthSeconds);
422 mLoopCounter = 0;
423
424 mPulseCursor = 0;
425 mBackgroundSumSquare = 0.0f;
426 mBackgroundSumCount = 0;
427 mBackgroundRMS = 0.0f;
428 mSignalRMS = 0.0f;
429
430 mAudioRecording.clear();
431 mLatencyReport.reset();
432 }
433
hasEnoughData()434 bool hasEnoughData() {
435 return mAudioRecording.isFull();
436 }
437
isDone()438 bool isDone() override {
439 return mState == STATE_DONE;
440 }
441
getProgress()442 int32_t getProgress() const override {
443 return mAudioRecording.size();
444 }
445
analyze()446 std::string analyze() override {
447 std::stringstream report;
448 report << "PulseLatencyAnalyzer ---------------\n";
449 report << LOOPBACK_RESULT_TAG "test.state = "
450 << std::setw(8) << mState << "\n";
451 report << LOOPBACK_RESULT_TAG "test.state.name = "
452 << convertStateToText(mState) << "\n";
453 report << LOOPBACK_RESULT_TAG "background.rms = "
454 << std::setw(8) << mBackgroundRMS << "\n";
455
456 int32_t newResult = RESULT_OK;
457 if (mState != STATE_GOT_DATA) {
458 report << "WARNING - Bad state. Check volume on device.\n";
459 // setResult(ERROR_INVALID_STATE);
460 } else {
461 float gain = mAudioRecording.normalize(1.0f);
462 measureLatencyFromPulse(mAudioRecording,
463 mPulse,
464 &mLatencyReport);
465
466 if (mLatencyReport.confidence < kMinimumConfidence) {
467 report << " ERROR - confidence too low!";
468 newResult = ERROR_CONFIDENCE;
469 } else {
470 mSignalRMS = calculateRootMeanSquare(
471 &mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size())
472 / gain;
473 }
474 double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames
475 / getSampleRate();
476 report << LOOPBACK_RESULT_TAG "latency.frames = " << std::setw(8)
477 << mLatencyReport.latencyInFrames << "\n";
478 report << LOOPBACK_RESULT_TAG "latency.msec = " << std::setw(8)
479 << latencyMillis << "\n";
480 report << LOOPBACK_RESULT_TAG "latency.confidence = " << std::setw(8)
481 << mLatencyReport.confidence << "\n";
482 }
483 mState = STATE_DONE;
484 if (getResult() == RESULT_OK) {
485 setResult(newResult);
486 }
487
488 return report.str();
489 }
490
getMeasuredLatency()491 int32_t getMeasuredLatency() override {
492 return mLatencyReport.latencyInFrames;
493 }
494
getMeasuredConfidence()495 double getMeasuredConfidence() override {
496 return mLatencyReport.confidence;
497 }
498
getBackgroundRMS()499 double getBackgroundRMS() override {
500 return mBackgroundRMS;
501 }
502
getSignalRMS()503 double getSignalRMS() override {
504 return mSignalRMS;
505 }
506
isRecordingComplete()507 bool isRecordingComplete() {
508 return mState == STATE_GOT_DATA;
509 }
510
printStatus()511 void printStatus() override {
512 ALOGD("latency: st = %d = %s", mState, convertStateToText(mState));
513 }
514
processInputFrame(float * frameData,int channelCount)515 result_code processInputFrame(float *frameData, int channelCount) override {
516 echo_state nextState = mState;
517 mLoopCounter++;
518
519 switch (mState) {
520 case STATE_MEASURE_BACKGROUND:
521 // Measure background RMS on channel 0
522 mBackgroundSumSquare += frameData[0] * frameData[0];
523 mBackgroundSumCount++;
524 mDownCounter--;
525 if (mDownCounter <= 0) {
526 mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount);
527 nextState = STATE_IN_PULSE;
528 mPulseCursor = 0;
529 }
530 break;
531
532 case STATE_IN_PULSE:
533 // Record input until the mAudioRecording is full.
534 mAudioRecording.write(frameData, channelCount, 1);
535 if (hasEnoughData()) {
536 nextState = STATE_GOT_DATA;
537 }
538 break;
539
540 case STATE_GOT_DATA:
541 case STATE_DONE:
542 default:
543 break;
544 }
545
546 mState = nextState;
547 return RESULT_OK;
548 }
549
processOutputFrame(float * frameData,int channelCount)550 result_code processOutputFrame(float *frameData, int channelCount) override {
551 switch (mState) {
552 case STATE_IN_PULSE:
553 if (mPulseCursor < mPulse.size()) {
554 float pulseSample = mPulse.getData()[mPulseCursor++];
555 for (int i = 0; i < channelCount; i++) {
556 frameData[i] = pulseSample;
557 }
558 } else {
559 for (int i = 0; i < channelCount; i++) {
560 frameData[i] = 0;
561 }
562 }
563 break;
564
565 case STATE_MEASURE_BACKGROUND:
566 case STATE_GOT_DATA:
567 case STATE_DONE:
568 default:
569 for (int i = 0; i < channelCount; i++) {
570 frameData[i] = 0.0f; // silence
571 }
572 break;
573 }
574
575 return RESULT_OK;
576 }
577
578 private:
579
580 enum echo_state {
581 STATE_MEASURE_BACKGROUND,
582 STATE_IN_PULSE,
583 STATE_GOT_DATA, // must match RoundTripLatencyActivity.java
584 STATE_DONE,
585 };
586
convertStateToText(echo_state state)587 const char *convertStateToText(echo_state state) {
588 switch (state) {
589 case STATE_MEASURE_BACKGROUND:
590 return "INIT";
591 case STATE_IN_PULSE:
592 return "PULSE";
593 case STATE_GOT_DATA:
594 return "GOT_DATA";
595 case STATE_DONE:
596 return "DONE";
597 }
598 return "UNKNOWN";
599 }
600
601 int32_t mDownCounter = 500;
602 int32_t mLoopCounter = 0;
603 echo_state mState = STATE_MEASURE_BACKGROUND;
604
605 static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2
606 static constexpr int32_t kPulseLengthMillis = 500;
607 static constexpr double kBackgroundMeasurementLengthSeconds = 0.5;
608
609 AudioRecording mPulse;
610 int32_t mPulseCursor = 0;
611
612 double mBackgroundSumSquare = 0.0;
613 int32_t mBackgroundSumCount = 0;
614 double mBackgroundRMS = 0.0;
615 double mSignalRMS = 0.0;
616 int32_t mFramesToRecord = 0;
617
618 AudioRecording mAudioRecording; // contains only the input after starting the pulse
619 LatencyReport mLatencyReport;
620 };
621
622 #endif // ANALYZER_LATENCY_ANALYZER_H
623