1#!/usr/bin/env python3 2# 3# Copyright 2017 - The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""This module provides utilities to detect some artifacts and measure the 17 quality of audio.""" 18 19import logging 20import math 21import numpy 22 23import acts_contrib.test_utils.audio_analysis_lib.audio_analysis as audio_analysis 24 25# The input signal should be one sine wave with fixed frequency which 26# can have silence before and/or after sine wave. 27# For example: 28# silence sine wave silence 29# -----------|VVVVVVVVVVVVV|----------- 30# (a) (b) (c) 31# This module detects these artifacts: 32# 1. Detect noise in (a) and (c). 33# 2. Detect delay in (b). 34# 3. Detect burst in (b). 35# Assume the transitions between (a)(b) and (b)(c) are smooth and 36# amplitude increases/decreases linearly. 37# This module will detect artifacts in the sine wave. 38# This module also estimates the equivalent noise level by teager operator. 39# This module also detects volume changes in the sine wave. However, volume 40# changes may be affected by delay or burst. 41# Some artifacts may cause each other. 42 43# In this module, amplitude and frequency are derived from Hilbert transform. 44# Both amplitude and frequency are a function of time. 45 46# To detect each artifact, each point will be compared with 47# average amplitude of its block. The block size will be 1.5 ms. 48# Using average amplitude can mitigate the error caused by 49# Hilbert transform and noise. 50# In some case, for more accuracy, the block size may be modified 51# to other values. 52DEFAULT_BLOCK_SIZE_SECS = 0.0015 53 54# If the difference between average frequency of this block and 55# dominant frequency of full signal is less than 0.5 times of 56# dominant frequency, this block is considered to be within the 57# sine wave. In most cases, if there is no sine wave(only noise), 58# average frequency will be much greater than 5 times of 59# dominant frequency. 60# Also, for delay during playback, the frequency will be about 0 61# in perfect situation or much greater than 5 times of dominant 62# frequency if it's noised. 63DEFAULT_FREQUENCY_ERROR = 0.5 64 65# If the amplitude of some sample is less than 0.6 times of the 66# average amplitude of its left/right block, it will be considered 67# as a delay during playing. 68DEFAULT_DELAY_AMPLITUDE_THRESHOLD = 0.6 69 70# If the average amplitude of the block before or after playing 71# is more than 0.5 times to the average amplitude of the wave, 72# it will be considered as a noise artifact. 73DEFAULT_NOISE_AMPLITUDE_THRESHOLD = 0.5 74 75# In the sine wave, if the amplitude is more than 1.4 times of 76# its left side and its right side, it will be considered as 77# a burst. 78DEFAULT_BURST_AMPLITUDE_THRESHOLD = 1.4 79 80# When detecting burst, if the amplitude is lower than 0.5 times 81# average amplitude, we ignore it. 82DEFAULT_BURST_TOO_SMALL = 0.5 83 84# For a signal which is the combination of sine wave with fixed frequency f and 85# amplitude 1 and standard noise with amplitude k, the average teager value is 86# nearly linear to the noise level k. 87# Given frequency f, we simulate a sine wave with default noise level and 88# calculate its average teager value. Then, we can estimate the equivalent 89# noise level of input signal by the average teager value of input signal. 90DEFAULT_STANDARD_NOISE = 0.005 91 92# For delay, burst, volume increasing/decreasing, if two delay( 93# burst, volume increasing/decreasing) happen within 94# DEFAULT_SAME_EVENT_SECS seconds, we consider they are the 95# same event. 96DEFAULT_SAME_EVENT_SECS = 0.001 97 98# When detecting increasing/decreasing volume of signal, if the amplitude 99# is lower than 0.1 times average amplitude, we ignore it. 100DEFAULT_VOLUME_CHANGE_TOO_SMALL = 0.1 101 102# If average amplitude of right block is less/more than average 103# amplitude of left block times DEFAULT_VOLUME_CHANGE_AMPLITUDE, it will be 104# considered as decreasing/increasing on volume. 105DEFAULT_VOLUME_CHANGE_AMPLITUDE = 0.1 106 107# If the increasing/decreasing volume event is too close to the start or the end 108# of sine wave, we consider its volume change as part of rising/falling phase in 109# the start/end. 110NEAR_START_OR_END_SECS = 0.01 111 112# After applying Hilbert transform, the resulting amplitude and frequency may be 113# extremely large in the start and/or the end part. Thus, we will append zeros 114# before and after the whole wave for 0.1 secs. 115APPEND_ZEROS_SECS = 0.1 116 117# If the noise event is too close to the start or the end of the data, we 118# consider its noise as part of artifacts caused by edge effect of Hilbert 119# transform. 120# For example, originally, the data duration is 10 seconds. 121# We append 0.1 seconds of zeros in the beginning and the end of the data, so 122# the data becomes 10.2 seocnds long. 123# Then, we apply Hilbert transform to 10.2 seconds of data. 124# Near 0.1 seconds and 10.1 seconds, there will be edge effect of Hilbert 125# transform. We do not want these be treated as noise. 126# If NEAR_DATA_START_OR_END_SECS is set to 0.01, then the noise happened 127# at [0, 0.11] and [10.09, 10.1] will be ignored. 128NEAR_DATA_START_OR_END_SECS = 0.01 129 130# If the noise event is too close to the start or the end of the sine wave in 131# the data, we consider its noise as part of artifacts caused by edge effect of 132# Hilbert transform. 133# A |-------------|vvvvvvvvvvvvvvvvvvvvvvv|-------------| 134# B |ooooooooo| d | | d |ooooooooo| 135# 136# A is full signal. It contains a sine wave and silence before and after sine 137# wave. 138# In B, |oooo| shows the parts that we are going to check for noise before/after 139# sine wave. | d | is determined by NEAR_SINE_START_OR_END_SECS. 140NEAR_SINE_START_OR_END_SECS = 0.01 141 142 143class SineWaveNotFound(Exception): 144 """Error when there's no sine wave found in the signal""" 145 pass 146 147 148def hilbert(x): 149 """Hilbert transform copied from scipy. 150 151 More information can be found here: 152 http://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.hilbert.html 153 154 Args: 155 x: Real signal data to transform. 156 157 Returns: 158 Analytic signal of x, we can further extract amplitude and 159 frequency from it. 160 161 """ 162 x = numpy.asarray(x) 163 if numpy.iscomplexobj(x): 164 raise ValueError("x must be real.") 165 axis = -1 166 N = x.shape[axis] 167 if N <= 0: 168 raise ValueError("N must be positive.") 169 170 Xf = numpy.fft.fft(x, N, axis=axis) 171 h = numpy.zeros(N) 172 if N % 2 == 0: 173 h[0] = h[N // 2] = 1 174 h[1:N // 2] = 2 175 else: 176 h[0] = 1 177 h[1:(N + 1) // 2] = 2 178 179 if len(x.shape) > 1: 180 ind = [newaxis] * x.ndim 181 ind[axis] = slice(None) 182 h = h[ind] 183 x = numpy.fft.ifft(Xf * h, axis=axis) 184 return x 185 186 187def noised_sine_wave(frequency, rate, noise_level): 188 """Generates a sine wave of 2 second with specified noise level. 189 190 Args: 191 frequency: Frequency of sine wave. 192 rate: Sampling rate in samples per second. Example inputs: 44100, 193 48000 194 noise_level: Required noise level. 195 196 Returns: 197 A sine wave with specified noise level. 198 199 """ 200 wave = [] 201 for index in range(0, rate * 2): 202 sample = 2.0 * math.pi * frequency * float(index) / float(rate) 203 sine_wave = math.sin(sample) 204 noise = noise_level * numpy.random.standard_normal() 205 wave.append(sine_wave + noise) 206 return wave 207 208 209def average_teager_value(wave, amplitude): 210 """Computes the normalized average teager value. 211 212 After averaging the teager value, we will normalize the value by 213 dividing square of amplitude. 214 215 Args: 216 wave: Wave to apply teager operator. 217 amplitude: Average amplitude of given wave. 218 219 Returns: 220 Average teager value. 221 222 """ 223 teager_value, length = 0, len(wave) 224 for i in range(1, length - 1): 225 ith_teager_value = abs(wave[i] * wave[i] - wave[i - 1] * wave[i + 1]) 226 ith_teager_value *= max(1, abs(wave[i])) 227 teager_value += ith_teager_value 228 teager_value = (float(teager_value) / length) / (amplitude**2) 229 return teager_value 230 231 232def noise_level(amplitude, frequency, rate, teager_value_of_input): 233 """Computes the noise level compared with standard_noise. 234 235 For a signal which is the combination of sine wave with fixed frequency f 236 and amplitude 1 and standard noise with amplitude k, the average teager 237 value is nearly linear to the noise level k. 238 Thus, we can compute the average teager value of a sine wave with 239 standard_noise. Then, we can estimate the noise level of given input. 240 241 Args: 242 amplitude: Amplitude of input audio. 243 frequency: Dominant frequency of input audio. 244 rate: Sampling rate in samples per second. Example inputs: 44100, 245 48000 246 teager_value_of_input: Average teager value of input audio. 247 248 Returns: 249 A float value denotes the audio is equivalent to have how many times of 250 noise compared with its amplitude.For example, 0.02 denotes that the 251 wave has a noise which has standard distribution with standard 252 deviation being 0.02 times the amplitude of the wave. 253 254 """ 255 standard_noise = DEFAULT_STANDARD_NOISE 256 257 # Generates the standard sine wave with stdandard_noise level of noise. 258 standard_wave = noised_sine_wave(frequency, rate, standard_noise) 259 260 # Calculates the average teager value. 261 teager_value_of_std_wave = average_teager_value(standard_wave, amplitude) 262 263 return (teager_value_of_input / teager_value_of_std_wave) * standard_noise 264 265 266def error(f1, f2): 267 """Calculates the relative error between f1 and f2. 268 269 Args: 270 f1: Exact value. 271 f2: Test value. 272 273 Returns: 274 Relative error between f1 and f2. 275 276 """ 277 return abs(float(f1) - float(f2)) / float(f1) 278 279 280def hilbert_analysis(signal, rate, block_size): 281 """Finds amplitude and frequency of each time of signal by Hilbert transform. 282 283 Args: 284 signal: The wave to analyze. 285 rate: Sampling rate in samples per second. Example inputs: 44100, 286 48000 287 block_size: The size of block to transform. 288 289 Returns: 290 A tuple of list: (amplitude, frequency) composed of amplitude and 291 frequency of each time. 292 293 """ 294 # To apply Hilbert transform, the wave will be transformed 295 # segment by segment. For each segment, its size will be 296 # block_size and we will only take middle part of it. 297 # Thus, each segment looks like: |-----|=====|=====|-----|. 298 # "=...=" part will be taken while "-...-" part will be ignored. 299 # 300 # The whole size of taken part will be half of block_size 301 # which will be hilbert_block. 302 # The size of each ignored part will be half of hilbert_block 303 # which will be half_hilbert_block. 304 hilbert_block = block_size // 2 305 half_hilbert_block = hilbert_block // 2 306 # As mentioned above, for each block, we will only take middle 307 # part of it. Thus, the whole transformation will be completed as: 308 # |=====|=====|-----| |-----|=====|=====|-----| 309 # |-----|=====|=====|-----| |-----|=====|=====| 310 # |-----|=====|=====|-----| 311 # Specially, beginning and ending part may not have ignored part. 312 length = len(signal) 313 result = [] 314 for left_border in range(0, length, hilbert_block): 315 right_border = min(length, left_border + hilbert_block) 316 temp_left_border = max(0, left_border - half_hilbert_block) 317 temp_right_border = min(length, right_border + half_hilbert_block) 318 temp = hilbert(signal[temp_left_border:temp_right_border]) 319 for index in range(left_border, right_border): 320 result.append(temp[index - temp_left_border]) 321 result = numpy.asarray(result) 322 amplitude = numpy.abs(result) 323 phase = numpy.unwrap(numpy.angle(result)) 324 frequency = numpy.diff(phase) / (2.0 * numpy.pi) * rate 325 #frequency.append(frequency[len(frequency)-1]) 326 frequecny = numpy.append(frequency, frequency[len(frequency) - 1]) 327 return (amplitude, frequency) 328 329 330def find_block_average_value(arr, side_block_size, block_size): 331 """For each index, finds average value of its block, left block, right block. 332 333 It will find average value for each index in the range. 334 335 For each index, the range of its block is 336 [max(0, index - block_size / 2), min(length - 1, index + block_size / 2)] 337 For each index, the range of its left block is 338 [max(0, index - size_block_size), index] 339 For each index, the range of its right block is 340 [index, min(length - 1, index + side_block_size)] 341 342 Args: 343 arr: The array to be computed. 344 side_block_size: the size of the left_block and right_block. 345 block_size: the size of the block. 346 347 Returns: 348 A tuple of lists: (left_block_average_array, 349 right_block_average_array, 350 block_average_array) 351 """ 352 length = len(arr) 353 left_border, right_border = 0, 1 354 left_block_sum = arr[0] 355 right_block_sum = arr[0] 356 left_average_array = numpy.zeros(length) 357 right_average_array = numpy.zeros(length) 358 block_average_array = numpy.zeros(length) 359 for index in range(0, length): 360 while left_border < index - side_block_size: 361 left_block_sum -= arr[left_border] 362 left_border += 1 363 while right_border < min(length, index + side_block_size): 364 right_block_sum += arr[right_border] 365 right_border += 1 366 367 left_average_value = float(left_block_sum) / (index - left_border + 1) 368 right_average_value = float(right_block_sum) / (right_border - index) 369 left_average_array[index] = left_average_value 370 right_average_array[index] = right_average_value 371 372 if index + 1 < length: 373 left_block_sum += arr[index + 1] 374 right_block_sum -= arr[index] 375 left_border, right_border = 0, 1 376 block_sum = 0 377 for index in range(0, length): 378 while left_border < index - block_size / 2: 379 block_sum -= arr[left_border] 380 left_border += 1 381 while right_border < min(length, index + block_size / 2): 382 block_sum += arr[right_border] 383 right_border += 1 384 385 average_value = float(block_sum) / (right_border - left_border) 386 block_average_array[index] = average_value 387 return (left_average_array, right_average_array, block_average_array) 388 389 390def find_start_end_index(dominant_frequency, block_frequency_delta, block_size, 391 frequency_error_threshold): 392 """Finds start and end index of sine wave. 393 394 For each block with size of block_size, we check that whether its frequency 395 is close enough to the dominant_frequency. If yes, we will consider this 396 block to be within the sine wave. 397 Then, it will return the start and end index of sine wave indicating that 398 sine wave is between [start_index, end_index) 399 It's okay if the whole signal only contains sine wave. 400 401 Args: 402 dominant_frequency: Dominant frequency of signal. 403 block_frequency_delta: Average absolute difference between dominant 404 frequency and frequency of each block. For 405 each index, its block is 406 [max(0, index - block_size / 2), 407 min(length - 1, index + block_size / 2)] 408 block_size: Block size in samples. 409 410 Returns: 411 A tuple composed of (start_index, end_index) 412 413 """ 414 length = len(block_frequency_delta) 415 416 # Finds the start/end time index of playing based on dominant frequency 417 start_index, end_index = length - 1, 0 418 for index in range(0, length): 419 left_border = max(0, index - block_size / 2) 420 right_border = min(length - 1, index + block_size / 2) 421 frequency_error = block_frequency_delta[index] / dominant_frequency 422 if frequency_error < frequency_error_threshold: 423 start_index = min(start_index, left_border) 424 end_index = max(end_index, right_border + 1) 425 return (start_index, end_index) 426 427 428def noise_detection(start_index, end_index, block_amplitude, average_amplitude, 429 rate, noise_amplitude_threshold): 430 """Detects noise before/after sine wave. 431 432 If average amplitude of some sample's block before start of wave or after 433 end of wave is more than average_amplitude times noise_amplitude_threshold, 434 it will be considered as a noise. 435 436 Args: 437 start_index: Start index of sine wave. 438 end_index: End index of sine wave. 439 block_amplitude: An array for average amplitude of each block, where 440 amplitude is computed from Hilbert transform. 441 average_amplitude: Average amplitude of sine wave. 442 rate: Sampling rate in samples per second. Example inputs: 44100, 443 48000 444 noise_amplitude_threshold: If the average amplitude of a block is 445 higher than average amplitude of the wave times 446 noise_amplitude_threshold, it will be considered as 447 noise before/after playback. 448 449 Returns: 450 A tuple of lists indicating the time that noise happens: 451 (noise_before_playing, noise_after_playing). 452 453 """ 454 length = len(block_amplitude) 455 amplitude_threshold = average_amplitude * noise_amplitude_threshold 456 same_event_samples = rate * DEFAULT_SAME_EVENT_SECS 457 458 # Detects noise before playing. 459 noise_time_point = [] 460 last_noise_end_time_point = [] 461 previous_noise_index = None 462 times = 0 463 for index in range(0, length): 464 # Ignore noise too close to the beginning or the end of sine wave. 465 # Check the docstring of NEAR_SINE_START_OR_END_SECS. 466 if ((start_index - rate * NEAR_SINE_START_OR_END_SECS) <= index and 467 (index < end_index + rate * NEAR_SINE_START_OR_END_SECS)): 468 continue 469 470 # Ignore noise too close to the beginning or the end of original data. 471 # Check the docstring of NEAR_DATA_START_OR_END_SECS. 472 if (float(index) / rate <= 473 NEAR_DATA_START_OR_END_SECS + APPEND_ZEROS_SECS): 474 continue 475 if (float(length - index) / rate <= 476 NEAR_DATA_START_OR_END_SECS + APPEND_ZEROS_SECS): 477 continue 478 if block_amplitude[index] > amplitude_threshold: 479 same_event = False 480 if previous_noise_index: 481 same_event = (index - previous_noise_index 482 ) < same_event_samples 483 if not same_event: 484 index_start_sec = float(index) / rate - APPEND_ZEROS_SECS 485 index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS 486 noise_time_point.append(index_start_sec) 487 last_noise_end_time_point.append(index_end_sec) 488 times += 1 489 index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS 490 last_noise_end_time_point[times - 1] = index_end_sec 491 previous_noise_index = index 492 493 noise_before_playing, noise_after_playing = [], [] 494 for i in range(times): 495 duration = last_noise_end_time_point[i] - noise_time_point[i] 496 if noise_time_point[i] < float(start_index) / rate - APPEND_ZEROS_SECS: 497 noise_before_playing.append((noise_time_point[i], duration)) 498 else: 499 noise_after_playing.append((noise_time_point[i], duration)) 500 501 return (noise_before_playing, noise_after_playing) 502 503 504def delay_detection(start_index, end_index, block_amplitude, average_amplitude, 505 dominant_frequency, rate, left_block_amplitude, 506 right_block_amplitude, block_frequency_delta, 507 delay_amplitude_threshold, frequency_error_threshold): 508 """Detects delay during playing. 509 510 For each sample, we will check whether the average amplitude of its block 511 is less than average amplitude of its left block and its right block times 512 delay_amplitude_threshold. Also, we will check whether the frequency of 513 its block is far from the dominant frequency. 514 If at least one constraint fulfilled, it will be considered as a delay. 515 516 Args: 517 start_index: Start index of sine wave. 518 end_index: End index of sine wave. 519 block_amplitude: An array for average amplitude of each block, where 520 amplitude is computed from Hilbert transform. 521 average_amplitude: Average amplitude of sine wave. 522 dominant_frequency: Dominant frequency of signal. 523 rate: Sampling rate in samples per second. Example inputs: 44100, 524 48000 525 left_block_amplitude: Average amplitude of left block of each index. 526 Ref to find_block_average_value function. 527 right_block_amplitude: Average amplitude of right block of each index. 528 Ref to find_block_average_value function. 529 block_frequency_delta: Average absolute difference frequency to 530 dominant frequency of block of each index. 531 Ref to find_block_average_value function. 532 delay_amplitude_threshold: If the average amplitude of a block is 533 lower than average amplitude of the wave times 534 delay_amplitude_threshold, it will be considered 535 as delay. 536 frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR 537 538 Returns: 539 List of delay occurrence: 540 [(time_1, duration_1), (time_2, duration_2), ...], 541 where time and duration are in seconds. 542 543 """ 544 delay_time_points = [] 545 last_delay_end_time_points = [] 546 previous_delay_index = None 547 times = 0 548 same_event_samples = rate * DEFAULT_SAME_EVENT_SECS 549 start_time = float(start_index) / rate - APPEND_ZEROS_SECS 550 end_time = float(end_index) / rate - APPEND_ZEROS_SECS 551 for index in range(int(start_index), int(end_index)): 552 if block_amplitude[ 553 index] > average_amplitude * delay_amplitude_threshold: 554 continue 555 now_time = float(index) / rate - APPEND_ZEROS_SECS 556 if abs(now_time - start_time) < NEAR_START_OR_END_SECS: 557 continue 558 if abs(now_time - end_time) < NEAR_START_OR_END_SECS: 559 continue 560 # If amplitude less than its left/right side and small enough, 561 # it will be considered as a delay. 562 amp_threshold = average_amplitude * delay_amplitude_threshold 563 left_threshold = delay_amplitude_threshold * left_block_amplitude[ 564 index] 565 amp_threshold = min(amp_threshold, left_threshold) 566 right_threshold = delay_amplitude_threshold * right_block_amplitude[ 567 index] 568 amp_threshold = min(amp_threshold, right_threshold) 569 570 frequency_error = block_frequency_delta[index] / dominant_frequency 571 572 amplitude_too_small = block_amplitude[index] < amp_threshold 573 frequency_not_match = frequency_error > frequency_error_threshold 574 575 if amplitude_too_small or frequency_not_match: 576 same_event = False 577 if previous_delay_index: 578 same_event = (index - previous_delay_index 579 ) < same_event_samples 580 if not same_event: 581 index_start_sec = float(index) / rate - APPEND_ZEROS_SECS 582 index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS 583 delay_time_points.append(index_start_sec) 584 last_delay_end_time_points.append(index_end_sec) 585 times += 1 586 previous_delay_index = index 587 index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS 588 last_delay_end_time_points[times - 1] = index_end_sec 589 590 delay_list = [] 591 for i in range(len(delay_time_points)): 592 duration = last_delay_end_time_points[i] - delay_time_points[i] 593 delay_list.append((delay_time_points[i], duration)) 594 return delay_list 595 596 597def burst_detection(start_index, end_index, block_amplitude, average_amplitude, 598 dominant_frequency, rate, left_block_amplitude, 599 right_block_amplitude, block_frequency_delta, 600 burst_amplitude_threshold, frequency_error_threshold): 601 """Detects burst during playing. 602 603 For each sample, we will check whether the average amplitude of its block is 604 more than average amplitude of its left block and its right block times 605 burst_amplitude_threshold. Also, we will check whether the frequency of 606 its block is not compatible to the dominant frequency. 607 If at least one constraint fulfilled, it will be considered as a burst. 608 609 Args: 610 start_index: Start index of sine wave. 611 end_index: End index of sine wave. 612 block_amplitude: An array for average amplitude of each block, where 613 amplitude is computed from Hilbert transform. 614 average_amplitude: Average amplitude of sine wave. 615 dominant_frequency: Dominant frequency of signal. 616 rate: Sampling rate in samples per second. Example inputs: 44100, 617 48000 618 left_block_amplitude: Average amplitude of left block of each index. 619 Ref to find_block_average_value function. 620 right_block_amplitude: Average amplitude of right block of each index. 621 Ref to find_block_average_value function. 622 block_frequency_delta: Average absolute difference frequency to 623 dominant frequency of block of each index. 624 burst_amplitude_threshold: If the amplitude is higher than average 625 amplitude of its left block and its right block 626 times burst_amplitude_threshold. It will be 627 considered as a burst. 628 frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR 629 630 Returns: 631 List of burst occurence: [time_1, time_2, ...], 632 where time is in seconds. 633 634 """ 635 burst_time_points = [] 636 previous_burst_index = None 637 same_event_samples = rate * DEFAULT_SAME_EVENT_SECS 638 for index in range(int(start_index), int(end_index)): 639 # If amplitude higher than its left/right side and large enough, 640 # it will be considered as a burst. 641 if block_amplitude[ 642 index] <= average_amplitude * DEFAULT_BURST_TOO_SMALL: 643 continue 644 if abs(index - start_index) < rate * NEAR_START_OR_END_SECS: 645 continue 646 if abs(index - end_index) < rate * NEAR_START_OR_END_SECS: 647 continue 648 amp_threshold = average_amplitude * DEFAULT_BURST_TOO_SMALL 649 left_threshold = burst_amplitude_threshold * left_block_amplitude[ 650 index] 651 amp_threshold = max(amp_threshold, left_threshold) 652 right_threshold = burst_amplitude_threshold * right_block_amplitude[ 653 index] 654 amp_threshold = max(amp_threshold, right_threshold) 655 656 frequency_error = block_frequency_delta[index] / dominant_frequency 657 658 amplitude_too_large = block_amplitude[index] > amp_threshold 659 frequency_not_match = frequency_error > frequency_error_threshold 660 661 if amplitude_too_large or frequency_not_match: 662 same_event = False 663 if previous_burst_index: 664 same_event = index - previous_burst_index < same_event_samples 665 if not same_event: 666 burst_time_points.append( 667 float(index) / rate - APPEND_ZEROS_SECS) 668 previous_burst_index = index 669 670 return burst_time_points 671 672 673def changing_volume_detection(start_index, end_index, average_amplitude, rate, 674 left_block_amplitude, right_block_amplitude, 675 volume_changing_amplitude_threshold): 676 """Finds volume changing during playback. 677 678 For each index, we will compare average amplitude of its left block and its 679 right block. If average amplitude of right block is more than average 680 amplitude of left block times (1 + DEFAULT_VOLUME_CHANGE_AMPLITUDE), it will 681 be considered as an increasing volume. If the one of right block is less 682 than that of left block times (1 - DEFAULT_VOLUME_CHANGE_AMPLITUDE), it will 683 be considered as a decreasing volume. 684 685 Args: 686 start_index: Start index of sine wave. 687 end_index: End index of sine wave. 688 average_amplitude: Average amplitude of sine wave. 689 rate: Sampling rate in samples per second. Example inputs: 44100, 690 48000 691 left_block_amplitude: Average amplitude of left block of each index. 692 Ref to find_block_average_value function. 693 right_block_amplitude: Average amplitude of right block of each index. 694 Ref to find_block_average_value function. 695 volume_changing_amplitude_threshold: If the average amplitude of right 696 block is higher or lower than 697 that of left one times this 698 value, it will be considered as 699 a volume change. 700 Also refer to 701 DEFAULT_VOLUME_CHANGE_AMPLITUDE 702 703 Returns: 704 List of volume changing composed of 1 for increasing and -1 for 705 decreasing. 706 707 """ 708 length = len(left_block_amplitude) 709 710 # Detects rising and/or falling volume. 711 previous_rising_index, previous_falling_index = None, None 712 changing_time = [] 713 changing_events = [] 714 amplitude_threshold = average_amplitude * DEFAULT_VOLUME_CHANGE_TOO_SMALL 715 same_event_samples = rate * DEFAULT_SAME_EVENT_SECS 716 for index in range(int(start_index), int(end_index)): 717 # Skips if amplitude is too small. 718 if left_block_amplitude[index] < amplitude_threshold: 719 continue 720 if right_block_amplitude[index] < amplitude_threshold: 721 continue 722 # Skips if changing is from start or end time 723 if float(abs(start_index - index)) / rate < NEAR_START_OR_END_SECS: 724 continue 725 if float(abs(end_index - index)) / rate < NEAR_START_OR_END_SECS: 726 continue 727 728 delta_margin = volume_changing_amplitude_threshold 729 if left_block_amplitude[index] > 0: 730 delta_margin *= left_block_amplitude[index] 731 732 increasing_threshold = left_block_amplitude[index] + delta_margin 733 decreasing_threshold = left_block_amplitude[index] - delta_margin 734 735 if right_block_amplitude[index] > increasing_threshold: 736 same_event = False 737 if previous_rising_index: 738 same_event = index - previous_rising_index < same_event_samples 739 if not same_event: 740 changing_time.append(float(index) / rate - APPEND_ZEROS_SECS) 741 changing_events.append(+1) 742 previous_rising_index = index 743 if right_block_amplitude[index] < decreasing_threshold: 744 same_event = False 745 if previous_falling_index: 746 same_event = index - previous_falling_index < same_event_samples 747 if not same_event: 748 changing_time.append(float(index) / rate - APPEND_ZEROS_SECS) 749 changing_events.append(-1) 750 previous_falling_index = index 751 752 # Combines consecutive increasing/decreasing event. 753 combined_changing_events, prev = [], 0 754 for i in range(len(changing_events)): 755 if changing_events[i] == prev: 756 continue 757 combined_changing_events.append((changing_time[i], changing_events[i])) 758 prev = changing_events[i] 759 return combined_changing_events 760 761 762def quality_measurement( 763 signal, 764 rate, 765 dominant_frequency=None, 766 block_size_secs=DEFAULT_BLOCK_SIZE_SECS, 767 frequency_error_threshold=DEFAULT_FREQUENCY_ERROR, 768 delay_amplitude_threshold=DEFAULT_DELAY_AMPLITUDE_THRESHOLD, 769 noise_amplitude_threshold=DEFAULT_NOISE_AMPLITUDE_THRESHOLD, 770 burst_amplitude_threshold=DEFAULT_BURST_AMPLITUDE_THRESHOLD, 771 volume_changing_amplitude_threshold=DEFAULT_VOLUME_CHANGE_AMPLITUDE): 772 """Detects several artifacts and estimates the noise level. 773 774 This method detects artifact before playing, after playing, and delay 775 during playing. Also, it estimates the noise level of the signal. 776 To avoid the influence of noise, it calculates amplitude and frequency 777 block by block. 778 779 Args: 780 signal: A list of numbers for one-channel PCM data. The data should 781 be normalized to [-1,1]. 782 rate: Sampling rate in samples per second. Example inputs: 44100, 783 48000 784 dominant_frequency: Dominant frequency of signal. Set None to 785 recalculate the frequency in this function. 786 block_size_secs: Block size in seconds. The measurement will be done 787 block-by-block using average amplitude and frequency 788 in each block to avoid noise. 789 frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR. 790 delay_amplitude_threshold: If the average amplitude of a block is 791 lower than average amplitude of the wave 792 times delay_amplitude_threshold, it will 793 be considered as delay. 794 Also refer to delay_detection and 795 DEFAULT_DELAY_AMPLITUDE_THRESHOLD. 796 noise_amplitude_threshold: If the average amplitude of a block is 797 higher than average amplitude of the wave 798 times noise_amplitude_threshold, it will 799 be considered as noise before/after 800 playback. 801 Also refer to noise_detection and 802 DEFAULT_NOISE_AMPLITUDE_THRESHOLD. 803 burst_amplitude_threshold: If the average amplitude of a block is 804 higher than average amplitude of its left 805 block and its right block times 806 burst_amplitude_threshold. It will be 807 considered as a burst. 808 Also refer to burst_detection and 809 DEFAULT_BURST_AMPLITUDE_THRESHOLD. 810 volume_changing_amplitude_threshold: If the average amplitude of right 811 block is higher or lower than 812 that of left one times this 813 value, it will be considered as 814 a volume change. 815 Also refer to 816 changing_volume_detection and 817 DEFAULT_VOLUME_CHANGE_AMPLITUDE 818 819 Returns: 820 A dictoinary of detection/estimation: 821 {'artifacts': 822 {'noise_before_playback': 823 [(time_1, duration_1), (time_2, duration_2), ...], 824 'noise_after_playback': 825 [(time_1, duration_1), (time_2, duration_2), ...], 826 'delay_during_playback': 827 [(time_1, duration_1), (time_2, duration_2), ...], 828 'burst_during_playback': 829 [time_1, time_2, ...] 830 }, 831 'volume_changes': 832 [(time_1, flag_1), (time_2, flag_2), ...], 833 'equivalent_noise_level': level 834 } 835 where durations and time points are in seconds. And, 836 equivalence_noise_level is the quotient of noise and wave which 837 refers to DEFAULT_STANDARD_NOISE. volume_changes is a list of 838 tuples containing time stamps and decreasing/increasing flags for 839 volume change events. 840 841 """ 842 # Calculates the block size, from seconds to samples. 843 block_size = int(block_size_secs * rate) 844 845 signal = numpy.concatenate( 846 (numpy.zeros(int(rate * APPEND_ZEROS_SECS)), signal, 847 numpy.zeros(int(rate * APPEND_ZEROS_SECS)))) 848 signal = numpy.array(signal, dtype=float) 849 length = len(signal) 850 851 # Calculates the amplitude and frequency. 852 amplitude, frequency = hilbert_analysis(signal, rate, block_size) 853 854 # Finds the dominant frequency. 855 if not dominant_frequency: 856 dominant_frequency = audio_analysis.spectral_analysis(signal, 857 rate)[0][0] 858 859 # Finds the array which contains absolute difference between dominant 860 # frequency and frequency at each time point. 861 frequency_delta = abs(frequency - dominant_frequency) 862 863 # Computes average amplitude of each type of block 864 res = find_block_average_value(amplitude, block_size * 2, block_size) 865 left_block_amplitude, right_block_amplitude, block_amplitude = res 866 867 # Computes average absolute difference of frequency and dominant frequency 868 # of the block of each index 869 _, _, block_frequency_delta = find_block_average_value( 870 frequency_delta, block_size * 2, block_size) 871 872 # Finds start and end index of sine wave. 873 start_index, end_index = find_start_end_index( 874 dominant_frequency, block_frequency_delta, block_size, 875 frequency_error_threshold) 876 877 if start_index > end_index: 878 raise SineWaveNotFound('No sine wave found in signal') 879 880 logging.debug('Found sine wave: start: %s, end: %s', 881 float(start_index) / rate - APPEND_ZEROS_SECS, 882 float(end_index) / rate - APPEND_ZEROS_SECS) 883 884 sum_of_amplitude = float(sum(amplitude[int(start_index):int(end_index)])) 885 # Finds average amplitude of sine wave. 886 average_amplitude = sum_of_amplitude / (end_index - start_index) 887 888 # Finds noise before and/or after playback. 889 noise_before_playing, noise_after_playing = noise_detection( 890 start_index, end_index, block_amplitude, average_amplitude, rate, 891 noise_amplitude_threshold) 892 893 # Finds delay during playback. 894 delays = delay_detection(start_index, end_index, block_amplitude, 895 average_amplitude, dominant_frequency, rate, 896 left_block_amplitude, right_block_amplitude, 897 block_frequency_delta, delay_amplitude_threshold, 898 frequency_error_threshold) 899 900 # Finds burst during playback. 901 burst_time_points = burst_detection( 902 start_index, end_index, block_amplitude, average_amplitude, 903 dominant_frequency, rate, left_block_amplitude, right_block_amplitude, 904 block_frequency_delta, burst_amplitude_threshold, 905 frequency_error_threshold) 906 907 # Finds volume changing during playback. 908 volume_changes = changing_volume_detection( 909 start_index, end_index, average_amplitude, rate, left_block_amplitude, 910 right_block_amplitude, volume_changing_amplitude_threshold) 911 912 # Calculates the average teager value. 913 teager_value = average_teager_value( 914 signal[int(start_index):int(end_index)], average_amplitude) 915 916 # Finds out the noise level. 917 noise = noise_level(average_amplitude, dominant_frequency, rate, 918 teager_value) 919 920 return { 921 'artifacts': { 922 'noise_before_playback': noise_before_playing, 923 'noise_after_playback': noise_after_playing, 924 'delay_during_playback': delays, 925 'burst_during_playback': burst_time_points 926 }, 927 'volume_changes': volume_changes, 928 'equivalent_noise_level': noise 929 } 930