• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3#   Copyright 2017 - The Android Open Source Project
4#
5#   Licensed under the Apache License, Version 2.0 (the "License");
6#   you may not use this file except in compliance with the License.
7#   You may obtain a copy of the License at
8#
9#       http://www.apache.org/licenses/LICENSE-2.0
10#
11#   Unless required by applicable law or agreed to in writing, software
12#   distributed under the License is distributed on an "AS IS" BASIS,
13#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14#   See the License for the specific language governing permissions and
15#   limitations under the License.
16"""This module provides utilities to detect some artifacts and measure the
17    quality of audio."""
18
19import logging
20import math
21import numpy
22
23import acts_contrib.test_utils.audio_analysis_lib.audio_analysis as audio_analysis
24
25# The input signal should be one sine wave with fixed frequency which
26# can have silence before and/or after sine wave.
27# For example:
28#   silence      sine wave      silence
29#  -----------|VVVVVVVVVVVVV|-----------
30#     (a)           (b)           (c)
31# This module detects these artifacts:
32#   1. Detect noise in (a) and (c).
33#   2. Detect delay in (b).
34#   3. Detect burst in (b).
35# Assume the transitions between (a)(b) and (b)(c) are smooth and
36# amplitude increases/decreases linearly.
37# This module will detect artifacts in the sine wave.
38# This module also estimates the equivalent noise level by teager operator.
39# This module also detects volume changes in the sine wave. However, volume
40# changes may be affected by delay or burst.
41# Some artifacts may cause each other.
42
43# In this module, amplitude and frequency are derived from Hilbert transform.
44# Both amplitude and frequency are a function of time.
45
46# To detect each artifact, each point will be compared with
47# average amplitude of its block. The block size will be 1.5 ms.
48# Using average amplitude can mitigate the error caused by
49# Hilbert transform and noise.
50# In some case, for more accuracy, the block size may be modified
51# to other values.
52DEFAULT_BLOCK_SIZE_SECS = 0.0015
53
54# If the difference between average frequency of this block and
55# dominant frequency of full signal is less than 0.5 times of
56# dominant frequency, this block is considered to be within the
57# sine wave. In most cases, if there is no sine wave(only noise),
58# average frequency will be much greater than 5 times of
59# dominant frequency.
60# Also, for delay during playback, the frequency will be about 0
61# in perfect situation or much greater than 5 times of dominant
62# frequency if it's noised.
63DEFAULT_FREQUENCY_ERROR = 0.5
64
65# If the amplitude of some sample is less than 0.6 times of the
66# average amplitude of its left/right block, it will be considered
67# as a delay during playing.
68DEFAULT_DELAY_AMPLITUDE_THRESHOLD = 0.6
69
70# If the average amplitude of the block before or after playing
71# is more than 0.5 times to the average amplitude of the wave,
72# it will be considered as a noise artifact.
73DEFAULT_NOISE_AMPLITUDE_THRESHOLD = 0.5
74
75# In the sine wave, if the amplitude is more than 1.4 times of
76# its left side and its right side, it will be considered as
77# a burst.
78DEFAULT_BURST_AMPLITUDE_THRESHOLD = 1.4
79
80# When detecting burst, if the amplitude is lower than 0.5 times
81# average amplitude, we ignore it.
82DEFAULT_BURST_TOO_SMALL = 0.5
83
84# For a signal which is the combination of sine wave with fixed frequency f and
85# amplitude 1 and standard noise with amplitude k, the average teager value is
86# nearly linear to the noise level k.
87# Given frequency f, we simulate a sine wave with default noise level and
88# calculate its average teager value. Then, we can estimate the equivalent
89# noise level of input signal by the average teager value of input signal.
90DEFAULT_STANDARD_NOISE = 0.005
91
92# For delay, burst, volume increasing/decreasing, if two delay(
93# burst, volume increasing/decreasing) happen within
94# DEFAULT_SAME_EVENT_SECS seconds, we consider they are the
95# same event.
96DEFAULT_SAME_EVENT_SECS = 0.001
97
98# When detecting increasing/decreasing volume of signal, if the amplitude
99# is lower than 0.1 times average amplitude, we ignore it.
100DEFAULT_VOLUME_CHANGE_TOO_SMALL = 0.1
101
102# If average amplitude of right block is less/more than average
103# amplitude of left block times DEFAULT_VOLUME_CHANGE_AMPLITUDE, it will be
104# considered as decreasing/increasing on volume.
105DEFAULT_VOLUME_CHANGE_AMPLITUDE = 0.1
106
107# If the increasing/decreasing volume event is too close to the start or the end
108# of sine wave, we consider its volume change as part of rising/falling phase in
109# the start/end.
110NEAR_START_OR_END_SECS = 0.01
111
112# After applying Hilbert transform, the resulting amplitude and frequency may be
113# extremely large in the start and/or the end part. Thus, we will append zeros
114# before and after the whole wave for 0.1 secs.
115APPEND_ZEROS_SECS = 0.1
116
117# If the noise event is too close to the start or the end of the data, we
118# consider its noise as part of artifacts caused by edge effect of Hilbert
119# transform.
120# For example, originally, the data duration is 10 seconds.
121# We append 0.1 seconds of zeros in the beginning and the end of the data, so
122# the data becomes 10.2 seocnds long.
123# Then, we apply Hilbert transform to 10.2 seconds of data.
124# Near 0.1 seconds and 10.1 seconds, there will be edge effect of Hilbert
125# transform. We do not want these be treated as noise.
126# If NEAR_DATA_START_OR_END_SECS is set to 0.01, then the noise happened
127# at [0, 0.11] and [10.09, 10.1] will be ignored.
128NEAR_DATA_START_OR_END_SECS = 0.01
129
130# If the noise event is too close to the start or the end of the sine wave in
131# the data, we consider its noise as part of artifacts caused by edge effect of
132# Hilbert transform.
133# A |-------------|vvvvvvvvvvvvvvvvvvvvvvv|-------------|
134# B |ooooooooo| d |                       | d |ooooooooo|
135#
136# A is full signal. It contains a sine wave and silence before and after sine
137# wave.
138# In B, |oooo| shows the parts that we are going to check for noise before/after
139# sine wave. | d | is determined by NEAR_SINE_START_OR_END_SECS.
140NEAR_SINE_START_OR_END_SECS = 0.01
141
142
143class SineWaveNotFound(Exception):
144    """Error when there's no sine wave found in the signal"""
145    pass
146
147
148def hilbert(x):
149    """Hilbert transform copied from scipy.
150
151    More information can be found here:
152    http://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.hilbert.html
153
154    Args:
155        x: Real signal data to transform.
156
157    Returns:
158        Analytic signal of x, we can further extract amplitude and
159              frequency from it.
160
161    """
162    x = numpy.asarray(x)
163    if numpy.iscomplexobj(x):
164        raise ValueError("x must be real.")
165    axis = -1
166    N = x.shape[axis]
167    if N <= 0:
168        raise ValueError("N must be positive.")
169
170    Xf = numpy.fft.fft(x, N, axis=axis)
171    h = numpy.zeros(N)
172    if N % 2 == 0:
173        h[0] = h[N // 2] = 1
174        h[1:N // 2] = 2
175    else:
176        h[0] = 1
177        h[1:(N + 1) // 2] = 2
178
179    if len(x.shape) > 1:
180        ind = [newaxis] * x.ndim
181        ind[axis] = slice(None)
182        h = h[ind]
183    x = numpy.fft.ifft(Xf * h, axis=axis)
184    return x
185
186
187def noised_sine_wave(frequency, rate, noise_level):
188    """Generates a sine wave of 2 second with specified noise level.
189
190    Args:
191        frequency: Frequency of sine wave.
192        rate: Sampling rate in samples per second. Example inputs: 44100,
193        48000
194        noise_level: Required noise level.
195
196    Returns:
197        A sine wave with specified noise level.
198
199    """
200    wave = []
201    for index in range(0, rate * 2):
202        sample = 2.0 * math.pi * frequency * float(index) / float(rate)
203        sine_wave = math.sin(sample)
204        noise = noise_level * numpy.random.standard_normal()
205        wave.append(sine_wave + noise)
206    return wave
207
208
209def average_teager_value(wave, amplitude):
210    """Computes the normalized average teager value.
211
212    After averaging the teager value, we will normalize the value by
213    dividing square of amplitude.
214
215    Args:
216        wave: Wave to apply teager operator.
217        amplitude: Average amplitude of given wave.
218
219    Returns:
220        Average teager value.
221
222    """
223    teager_value, length = 0, len(wave)
224    for i in range(1, length - 1):
225        ith_teager_value = abs(wave[i] * wave[i] - wave[i - 1] * wave[i + 1])
226        ith_teager_value *= max(1, abs(wave[i]))
227        teager_value += ith_teager_value
228    teager_value = (float(teager_value) / length) / (amplitude**2)
229    return teager_value
230
231
232def noise_level(amplitude, frequency, rate, teager_value_of_input):
233    """Computes the noise level compared with standard_noise.
234
235    For a signal which is the combination of sine wave with fixed frequency f
236    and amplitude 1 and standard noise with amplitude k, the average teager
237    value is nearly linear to the noise level k.
238    Thus, we can compute the average teager value of a sine wave with
239    standard_noise. Then, we can estimate the noise level of given input.
240
241    Args:
242        amplitude: Amplitude of input audio.
243        frequency: Dominant frequency of input audio.
244        rate: Sampling rate in samples per second. Example inputs: 44100,
245        48000
246        teager_value_of_input: Average teager value of input audio.
247
248    Returns:
249        A float value denotes the audio is equivalent to have how many times of
250            noise compared with its amplitude.For example, 0.02 denotes that the
251            wave has a noise which has standard distribution with standard
252            deviation being 0.02 times the amplitude of the wave.
253
254    """
255    standard_noise = DEFAULT_STANDARD_NOISE
256
257    # Generates the standard sine wave with stdandard_noise level of noise.
258    standard_wave = noised_sine_wave(frequency, rate, standard_noise)
259
260    # Calculates the average teager value.
261    teager_value_of_std_wave = average_teager_value(standard_wave, amplitude)
262
263    return (teager_value_of_input / teager_value_of_std_wave) * standard_noise
264
265
266def error(f1, f2):
267    """Calculates the relative error between f1 and f2.
268
269    Args:
270        f1: Exact value.
271        f2: Test value.
272
273    Returns:
274        Relative error between f1 and f2.
275
276    """
277    return abs(float(f1) - float(f2)) / float(f1)
278
279
280def hilbert_analysis(signal, rate, block_size):
281    """Finds amplitude and frequency of each time of signal by Hilbert transform.
282
283    Args:
284        signal: The wave to analyze.
285        rate: Sampling rate in samples per second. Example inputs: 44100,
286        48000
287        block_size: The size of block to transform.
288
289    Returns:
290        A tuple of list: (amplitude, frequency) composed of amplitude and
291            frequency of each time.
292
293    """
294    # To apply Hilbert transform, the wave will be transformed
295    # segment by segment. For each segment, its size will be
296    # block_size and we will only take middle part of it.
297    # Thus, each segment looks like: |-----|=====|=====|-----|.
298    # "=...=" part will be taken while "-...-" part will be ignored.
299    #
300    # The whole size of taken part will be half of block_size
301    # which will be hilbert_block.
302    # The size of each ignored part will be half of hilbert_block
303    # which will be half_hilbert_block.
304    hilbert_block = block_size // 2
305    half_hilbert_block = hilbert_block // 2
306    # As mentioned above, for each block, we will only take middle
307    # part of it. Thus, the whole transformation will be completed as:
308    # |=====|=====|-----|           |-----|=====|=====|-----|
309    #       |-----|=====|=====|-----|           |-----|=====|=====|
310    #                   |-----|=====|=====|-----|
311    # Specially, beginning and ending part may not have ignored part.
312    length = len(signal)
313    result = []
314    for left_border in range(0, length, hilbert_block):
315        right_border = min(length, left_border + hilbert_block)
316        temp_left_border = max(0, left_border - half_hilbert_block)
317        temp_right_border = min(length, right_border + half_hilbert_block)
318        temp = hilbert(signal[temp_left_border:temp_right_border])
319        for index in range(left_border, right_border):
320            result.append(temp[index - temp_left_border])
321    result = numpy.asarray(result)
322    amplitude = numpy.abs(result)
323    phase = numpy.unwrap(numpy.angle(result))
324    frequency = numpy.diff(phase) / (2.0 * numpy.pi) * rate
325    #frequency.append(frequency[len(frequency)-1])
326    frequecny = numpy.append(frequency, frequency[len(frequency) - 1])
327    return (amplitude, frequency)
328
329
330def find_block_average_value(arr, side_block_size, block_size):
331    """For each index, finds average value of its block, left block, right block.
332
333    It will find average value for each index in the range.
334
335    For each index, the range of its block is
336        [max(0, index - block_size / 2), min(length - 1, index + block_size / 2)]
337    For each index, the range of its left block is
338        [max(0, index - size_block_size), index]
339    For each index, the range of its right block is
340        [index, min(length - 1, index + side_block_size)]
341
342    Args:
343        arr: The array to be computed.
344        side_block_size: the size of the left_block and right_block.
345        block_size: the size of the block.
346
347    Returns:
348        A tuple of lists: (left_block_average_array,
349                                 right_block_average_array,
350                                 block_average_array)
351    """
352    length = len(arr)
353    left_border, right_border = 0, 1
354    left_block_sum = arr[0]
355    right_block_sum = arr[0]
356    left_average_array = numpy.zeros(length)
357    right_average_array = numpy.zeros(length)
358    block_average_array = numpy.zeros(length)
359    for index in range(0, length):
360        while left_border < index - side_block_size:
361            left_block_sum -= arr[left_border]
362            left_border += 1
363        while right_border < min(length, index + side_block_size):
364            right_block_sum += arr[right_border]
365            right_border += 1
366
367        left_average_value = float(left_block_sum) / (index - left_border + 1)
368        right_average_value = float(right_block_sum) / (right_border - index)
369        left_average_array[index] = left_average_value
370        right_average_array[index] = right_average_value
371
372        if index + 1 < length:
373            left_block_sum += arr[index + 1]
374        right_block_sum -= arr[index]
375    left_border, right_border = 0, 1
376    block_sum = 0
377    for index in range(0, length):
378        while left_border < index - block_size / 2:
379            block_sum -= arr[left_border]
380            left_border += 1
381        while right_border < min(length, index + block_size / 2):
382            block_sum += arr[right_border]
383            right_border += 1
384
385        average_value = float(block_sum) / (right_border - left_border)
386        block_average_array[index] = average_value
387    return (left_average_array, right_average_array, block_average_array)
388
389
390def find_start_end_index(dominant_frequency, block_frequency_delta, block_size,
391                         frequency_error_threshold):
392    """Finds start and end index of sine wave.
393
394    For each block with size of block_size, we check that whether its frequency
395    is close enough to the dominant_frequency. If yes, we will consider this
396    block to be within the sine wave.
397    Then, it will return the start and end index of sine wave indicating that
398    sine wave is between [start_index, end_index)
399    It's okay if the whole signal only contains sine wave.
400
401    Args:
402        dominant_frequency: Dominant frequency of signal.
403        block_frequency_delta: Average absolute difference between dominant
404                                  frequency and frequency of each block. For
405                                  each index, its block is
406                                  [max(0, index - block_size / 2),
407                                   min(length - 1, index + block_size / 2)]
408        block_size: Block size in samples.
409
410    Returns:
411        A tuple composed of (start_index, end_index)
412
413    """
414    length = len(block_frequency_delta)
415
416    # Finds the start/end time index of playing based on dominant frequency
417    start_index, end_index = length - 1, 0
418    for index in range(0, length):
419        left_border = max(0, index - block_size / 2)
420        right_border = min(length - 1, index + block_size / 2)
421        frequency_error = block_frequency_delta[index] / dominant_frequency
422        if frequency_error < frequency_error_threshold:
423            start_index = min(start_index, left_border)
424            end_index = max(end_index, right_border + 1)
425    return (start_index, end_index)
426
427
428def noise_detection(start_index, end_index, block_amplitude, average_amplitude,
429                    rate, noise_amplitude_threshold):
430    """Detects noise before/after sine wave.
431
432    If average amplitude of some sample's block before start of wave or after
433    end of wave is more than average_amplitude times noise_amplitude_threshold,
434    it will be considered as a noise.
435
436    Args:
437        start_index: Start index of sine wave.
438        end_index: End index of sine wave.
439        block_amplitude: An array for average amplitude of each block, where
440                            amplitude is computed from Hilbert transform.
441        average_amplitude: Average amplitude of sine wave.
442        rate: Sampling rate in samples per second. Example inputs: 44100,
443        48000
444        noise_amplitude_threshold: If the average amplitude of a block is
445                        higher than average amplitude of the wave times
446                        noise_amplitude_threshold, it will be considered as
447                        noise before/after playback.
448
449    Returns:
450        A tuple of lists indicating the time that noise happens:
451            (noise_before_playing, noise_after_playing).
452
453    """
454    length = len(block_amplitude)
455    amplitude_threshold = average_amplitude * noise_amplitude_threshold
456    same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
457
458    # Detects noise before playing.
459    noise_time_point = []
460    last_noise_end_time_point = []
461    previous_noise_index = None
462    times = 0
463    for index in range(0, length):
464        # Ignore noise too close to the beginning or the end of sine wave.
465        # Check the docstring of NEAR_SINE_START_OR_END_SECS.
466        if ((start_index - rate * NEAR_SINE_START_OR_END_SECS) <= index and
467            (index < end_index + rate * NEAR_SINE_START_OR_END_SECS)):
468            continue
469
470        # Ignore noise too close to the beginning or the end of original data.
471        # Check the docstring of NEAR_DATA_START_OR_END_SECS.
472        if (float(index) / rate <=
473                NEAR_DATA_START_OR_END_SECS + APPEND_ZEROS_SECS):
474            continue
475        if (float(length - index) / rate <=
476                NEAR_DATA_START_OR_END_SECS + APPEND_ZEROS_SECS):
477            continue
478        if block_amplitude[index] > amplitude_threshold:
479            same_event = False
480            if previous_noise_index:
481                same_event = (index - previous_noise_index
482                              ) < same_event_samples
483            if not same_event:
484                index_start_sec = float(index) / rate - APPEND_ZEROS_SECS
485                index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
486                noise_time_point.append(index_start_sec)
487                last_noise_end_time_point.append(index_end_sec)
488                times += 1
489            index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
490            last_noise_end_time_point[times - 1] = index_end_sec
491            previous_noise_index = index
492
493    noise_before_playing, noise_after_playing = [], []
494    for i in range(times):
495        duration = last_noise_end_time_point[i] - noise_time_point[i]
496        if noise_time_point[i] < float(start_index) / rate - APPEND_ZEROS_SECS:
497            noise_before_playing.append((noise_time_point[i], duration))
498        else:
499            noise_after_playing.append((noise_time_point[i], duration))
500
501    return (noise_before_playing, noise_after_playing)
502
503
504def delay_detection(start_index, end_index, block_amplitude, average_amplitude,
505                    dominant_frequency, rate, left_block_amplitude,
506                    right_block_amplitude, block_frequency_delta,
507                    delay_amplitude_threshold, frequency_error_threshold):
508    """Detects delay during playing.
509
510    For each sample, we will check whether the average amplitude of its block
511    is less than average amplitude of its left block and its right block times
512    delay_amplitude_threshold. Also, we will check whether the frequency of
513    its block is far from the dominant frequency.
514    If at least one constraint fulfilled, it will be considered as a delay.
515
516    Args:
517        start_index: Start index of sine wave.
518        end_index: End index of sine wave.
519        block_amplitude: An array for average amplitude of each block, where
520                            amplitude is computed from Hilbert transform.
521        average_amplitude: Average amplitude of sine wave.
522        dominant_frequency: Dominant frequency of signal.
523        rate: Sampling rate in samples per second. Example inputs: 44100,
524        48000
525        left_block_amplitude: Average amplitude of left block of each index.
526                                Ref to find_block_average_value function.
527        right_block_amplitude: Average amplitude of right block of each index.
528                                Ref to find_block_average_value function.
529        block_frequency_delta: Average absolute difference frequency to
530                                dominant frequency of block of each index.
531                                Ref to find_block_average_value function.
532        delay_amplitude_threshold: If the average amplitude of a block is
533                        lower than average amplitude of the wave times
534                        delay_amplitude_threshold, it will be considered
535                        as delay.
536        frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR
537
538    Returns:
539        List of delay occurrence:
540                [(time_1, duration_1), (time_2, duration_2), ...],
541              where time and duration are in seconds.
542
543    """
544    delay_time_points = []
545    last_delay_end_time_points = []
546    previous_delay_index = None
547    times = 0
548    same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
549    start_time = float(start_index) / rate - APPEND_ZEROS_SECS
550    end_time = float(end_index) / rate - APPEND_ZEROS_SECS
551    for index in range(int(start_index), int(end_index)):
552        if block_amplitude[
553                index] > average_amplitude * delay_amplitude_threshold:
554            continue
555        now_time = float(index) / rate - APPEND_ZEROS_SECS
556        if abs(now_time - start_time) < NEAR_START_OR_END_SECS:
557            continue
558        if abs(now_time - end_time) < NEAR_START_OR_END_SECS:
559            continue
560        # If amplitude less than its left/right side and small enough,
561        # it will be considered as a delay.
562        amp_threshold = average_amplitude * delay_amplitude_threshold
563        left_threshold = delay_amplitude_threshold * left_block_amplitude[
564            index]
565        amp_threshold = min(amp_threshold, left_threshold)
566        right_threshold = delay_amplitude_threshold * right_block_amplitude[
567            index]
568        amp_threshold = min(amp_threshold, right_threshold)
569
570        frequency_error = block_frequency_delta[index] / dominant_frequency
571
572        amplitude_too_small = block_amplitude[index] < amp_threshold
573        frequency_not_match = frequency_error > frequency_error_threshold
574
575        if amplitude_too_small or frequency_not_match:
576            same_event = False
577            if previous_delay_index:
578                same_event = (index - previous_delay_index
579                              ) < same_event_samples
580            if not same_event:
581                index_start_sec = float(index) / rate - APPEND_ZEROS_SECS
582                index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
583                delay_time_points.append(index_start_sec)
584                last_delay_end_time_points.append(index_end_sec)
585                times += 1
586            previous_delay_index = index
587            index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
588            last_delay_end_time_points[times - 1] = index_end_sec
589
590    delay_list = []
591    for i in range(len(delay_time_points)):
592        duration = last_delay_end_time_points[i] - delay_time_points[i]
593        delay_list.append((delay_time_points[i], duration))
594    return delay_list
595
596
597def burst_detection(start_index, end_index, block_amplitude, average_amplitude,
598                    dominant_frequency, rate, left_block_amplitude,
599                    right_block_amplitude, block_frequency_delta,
600                    burst_amplitude_threshold, frequency_error_threshold):
601    """Detects burst during playing.
602
603    For each sample, we will check whether the average amplitude of its block is
604    more than average amplitude of its left block and its right block times
605    burst_amplitude_threshold. Also, we will check whether the frequency of
606    its block is not compatible to the dominant frequency.
607    If at least one constraint fulfilled, it will be considered as a burst.
608
609    Args:
610        start_index: Start index of sine wave.
611        end_index: End index of sine wave.
612        block_amplitude: An array for average amplitude of each block, where
613                            amplitude is computed from Hilbert transform.
614        average_amplitude: Average amplitude of sine wave.
615        dominant_frequency: Dominant frequency of signal.
616        rate: Sampling rate in samples per second. Example inputs: 44100,
617        48000
618        left_block_amplitude: Average amplitude of left block of each index.
619                                Ref to find_block_average_value function.
620        right_block_amplitude: Average amplitude of right block of each index.
621                                Ref to find_block_average_value function.
622        block_frequency_delta: Average absolute difference frequency to
623                                dominant frequency of block of each index.
624        burst_amplitude_threshold: If the amplitude is higher than average
625                            amplitude of its left block and its right block
626                            times burst_amplitude_threshold. It will be
627                            considered as a burst.
628        frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR
629
630    Returns:
631        List of burst occurence: [time_1, time_2, ...],
632              where time is in seconds.
633
634    """
635    burst_time_points = []
636    previous_burst_index = None
637    same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
638    for index in range(int(start_index), int(end_index)):
639        # If amplitude higher than its left/right side and large enough,
640        # it will be considered as a burst.
641        if block_amplitude[
642                index] <= average_amplitude * DEFAULT_BURST_TOO_SMALL:
643            continue
644        if abs(index - start_index) < rate * NEAR_START_OR_END_SECS:
645            continue
646        if abs(index - end_index) < rate * NEAR_START_OR_END_SECS:
647            continue
648        amp_threshold = average_amplitude * DEFAULT_BURST_TOO_SMALL
649        left_threshold = burst_amplitude_threshold * left_block_amplitude[
650            index]
651        amp_threshold = max(amp_threshold, left_threshold)
652        right_threshold = burst_amplitude_threshold * right_block_amplitude[
653            index]
654        amp_threshold = max(amp_threshold, right_threshold)
655
656        frequency_error = block_frequency_delta[index] / dominant_frequency
657
658        amplitude_too_large = block_amplitude[index] > amp_threshold
659        frequency_not_match = frequency_error > frequency_error_threshold
660
661        if amplitude_too_large or frequency_not_match:
662            same_event = False
663            if previous_burst_index:
664                same_event = index - previous_burst_index < same_event_samples
665            if not same_event:
666                burst_time_points.append(
667                    float(index) / rate - APPEND_ZEROS_SECS)
668            previous_burst_index = index
669
670    return burst_time_points
671
672
673def changing_volume_detection(start_index, end_index, average_amplitude, rate,
674                              left_block_amplitude, right_block_amplitude,
675                              volume_changing_amplitude_threshold):
676    """Finds volume changing during playback.
677
678    For each index, we will compare average amplitude of its left block and its
679    right block. If average amplitude of right block is more than average
680    amplitude of left block times (1 + DEFAULT_VOLUME_CHANGE_AMPLITUDE), it will
681    be considered as an increasing volume. If the one of right block is less
682    than that of left block times (1 - DEFAULT_VOLUME_CHANGE_AMPLITUDE), it will
683    be considered as a decreasing volume.
684
685    Args:
686        start_index: Start index of sine wave.
687        end_index: End index of sine wave.
688        average_amplitude: Average amplitude of sine wave.
689        rate: Sampling rate in samples per second. Example inputs: 44100,
690        48000
691        left_block_amplitude: Average amplitude of left block of each index.
692                                Ref to find_block_average_value function.
693        right_block_amplitude: Average amplitude of right block of each index.
694                                Ref to find_block_average_value function.
695        volume_changing_amplitude_threshold: If the average amplitude of right
696                                                block is higher or lower than
697                                                that of left one times this
698                                                value, it will be considered as
699                                                a volume change.
700                                                Also refer to
701                                                DEFAULT_VOLUME_CHANGE_AMPLITUDE
702
703    Returns:
704        List of volume changing composed of 1 for increasing and -1 for
705            decreasing.
706
707    """
708    length = len(left_block_amplitude)
709
710    # Detects rising and/or falling volume.
711    previous_rising_index, previous_falling_index = None, None
712    changing_time = []
713    changing_events = []
714    amplitude_threshold = average_amplitude * DEFAULT_VOLUME_CHANGE_TOO_SMALL
715    same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
716    for index in range(int(start_index), int(end_index)):
717        # Skips if amplitude is too small.
718        if left_block_amplitude[index] < amplitude_threshold:
719            continue
720        if right_block_amplitude[index] < amplitude_threshold:
721            continue
722        # Skips if changing is from start or end time
723        if float(abs(start_index - index)) / rate < NEAR_START_OR_END_SECS:
724            continue
725        if float(abs(end_index - index)) / rate < NEAR_START_OR_END_SECS:
726            continue
727
728        delta_margin = volume_changing_amplitude_threshold
729        if left_block_amplitude[index] > 0:
730            delta_margin *= left_block_amplitude[index]
731
732        increasing_threshold = left_block_amplitude[index] + delta_margin
733        decreasing_threshold = left_block_amplitude[index] - delta_margin
734
735        if right_block_amplitude[index] > increasing_threshold:
736            same_event = False
737            if previous_rising_index:
738                same_event = index - previous_rising_index < same_event_samples
739            if not same_event:
740                changing_time.append(float(index) / rate - APPEND_ZEROS_SECS)
741                changing_events.append(+1)
742            previous_rising_index = index
743        if right_block_amplitude[index] < decreasing_threshold:
744            same_event = False
745            if previous_falling_index:
746                same_event = index - previous_falling_index < same_event_samples
747            if not same_event:
748                changing_time.append(float(index) / rate - APPEND_ZEROS_SECS)
749                changing_events.append(-1)
750            previous_falling_index = index
751
752    # Combines consecutive increasing/decreasing event.
753    combined_changing_events, prev = [], 0
754    for i in range(len(changing_events)):
755        if changing_events[i] == prev:
756            continue
757        combined_changing_events.append((changing_time[i], changing_events[i]))
758        prev = changing_events[i]
759    return combined_changing_events
760
761
762def quality_measurement(
763        signal,
764        rate,
765        dominant_frequency=None,
766        block_size_secs=DEFAULT_BLOCK_SIZE_SECS,
767        frequency_error_threshold=DEFAULT_FREQUENCY_ERROR,
768        delay_amplitude_threshold=DEFAULT_DELAY_AMPLITUDE_THRESHOLD,
769        noise_amplitude_threshold=DEFAULT_NOISE_AMPLITUDE_THRESHOLD,
770        burst_amplitude_threshold=DEFAULT_BURST_AMPLITUDE_THRESHOLD,
771        volume_changing_amplitude_threshold=DEFAULT_VOLUME_CHANGE_AMPLITUDE):
772    """Detects several artifacts and estimates the noise level.
773
774    This method detects artifact before playing, after playing, and delay
775    during playing. Also, it estimates the noise level of the signal.
776    To avoid the influence of noise, it calculates amplitude and frequency
777    block by block.
778
779    Args:
780        signal: A list of numbers for one-channel PCM data. The data should
781                   be normalized to [-1,1].
782        rate: Sampling rate in samples per second. Example inputs: 44100,
783        48000
784        dominant_frequency: Dominant frequency of signal. Set None to
785                               recalculate the frequency in this function.
786        block_size_secs: Block size in seconds. The measurement will be done
787                            block-by-block using average amplitude and frequency
788                            in each block to avoid noise.
789        frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR.
790        delay_amplitude_threshold: If the average amplitude of a block is
791                                      lower than average amplitude of the wave
792                                      times delay_amplitude_threshold, it will
793                                      be considered as delay.
794                                      Also refer to delay_detection and
795                                      DEFAULT_DELAY_AMPLITUDE_THRESHOLD.
796        noise_amplitude_threshold: If the average amplitude of a block is
797                                      higher than average amplitude of the wave
798                                      times noise_amplitude_threshold, it will
799                                      be considered as noise before/after
800                                      playback.
801                                      Also refer to noise_detection and
802                                      DEFAULT_NOISE_AMPLITUDE_THRESHOLD.
803        burst_amplitude_threshold: If the average amplitude of a block is
804                                      higher than average amplitude of its left
805                                      block and its right block times
806                                      burst_amplitude_threshold. It will be
807                                      considered as a burst.
808                                      Also refer to burst_detection and
809                                      DEFAULT_BURST_AMPLITUDE_THRESHOLD.
810        volume_changing_amplitude_threshold: If the average amplitude of right
811                                                block is higher or lower than
812                                                that of left one times this
813                                                value, it will be considered as
814                                                a volume change.
815                                                Also refer to
816                                                changing_volume_detection and
817                                                DEFAULT_VOLUME_CHANGE_AMPLITUDE
818
819    Returns:
820        A dictoinary of detection/estimation:
821              {'artifacts':
822                {'noise_before_playback':
823                    [(time_1, duration_1), (time_2, duration_2), ...],
824                 'noise_after_playback':
825                    [(time_1, duration_1), (time_2, duration_2), ...],
826                 'delay_during_playback':
827                    [(time_1, duration_1), (time_2, duration_2), ...],
828                 'burst_during_playback':
829                    [time_1, time_2, ...]
830                },
831               'volume_changes':
832                 [(time_1, flag_1), (time_2, flag_2), ...],
833               'equivalent_noise_level': level
834              }
835              where durations and time points are in seconds. And,
836              equivalence_noise_level is the quotient of noise and wave which
837              refers to DEFAULT_STANDARD_NOISE. volume_changes is a list of
838              tuples containing time stamps and decreasing/increasing flags for
839              volume change events.
840
841    """
842    # Calculates the block size, from seconds to samples.
843    block_size = int(block_size_secs * rate)
844
845    signal = numpy.concatenate(
846        (numpy.zeros(int(rate * APPEND_ZEROS_SECS)), signal,
847         numpy.zeros(int(rate * APPEND_ZEROS_SECS))))
848    signal = numpy.array(signal, dtype=float)
849    length = len(signal)
850
851    # Calculates the amplitude and frequency.
852    amplitude, frequency = hilbert_analysis(signal, rate, block_size)
853
854    # Finds the dominant frequency.
855    if not dominant_frequency:
856        dominant_frequency = audio_analysis.spectral_analysis(signal,
857                                                              rate)[0][0]
858
859    # Finds the array which contains absolute difference between dominant
860    # frequency and frequency at each time point.
861    frequency_delta = abs(frequency - dominant_frequency)
862
863    # Computes average amplitude of each type of block
864    res = find_block_average_value(amplitude, block_size * 2, block_size)
865    left_block_amplitude, right_block_amplitude, block_amplitude = res
866
867    # Computes average absolute difference of frequency and dominant frequency
868    # of the block of each index
869    _, _, block_frequency_delta = find_block_average_value(
870        frequency_delta, block_size * 2, block_size)
871
872    # Finds start and end index of sine wave.
873    start_index, end_index = find_start_end_index(
874        dominant_frequency, block_frequency_delta, block_size,
875        frequency_error_threshold)
876
877    if start_index > end_index:
878        raise SineWaveNotFound('No sine wave found in signal')
879
880    logging.debug('Found sine wave: start: %s, end: %s',
881                  float(start_index) / rate - APPEND_ZEROS_SECS,
882                  float(end_index) / rate - APPEND_ZEROS_SECS)
883
884    sum_of_amplitude = float(sum(amplitude[int(start_index):int(end_index)]))
885    # Finds average amplitude of sine wave.
886    average_amplitude = sum_of_amplitude / (end_index - start_index)
887
888    # Finds noise before and/or after playback.
889    noise_before_playing, noise_after_playing = noise_detection(
890        start_index, end_index, block_amplitude, average_amplitude, rate,
891        noise_amplitude_threshold)
892
893    # Finds delay during playback.
894    delays = delay_detection(start_index, end_index, block_amplitude,
895                             average_amplitude, dominant_frequency, rate,
896                             left_block_amplitude, right_block_amplitude,
897                             block_frequency_delta, delay_amplitude_threshold,
898                             frequency_error_threshold)
899
900    # Finds burst during playback.
901    burst_time_points = burst_detection(
902        start_index, end_index, block_amplitude, average_amplitude,
903        dominant_frequency, rate, left_block_amplitude, right_block_amplitude,
904        block_frequency_delta, burst_amplitude_threshold,
905        frequency_error_threshold)
906
907    # Finds volume changing during playback.
908    volume_changes = changing_volume_detection(
909        start_index, end_index, average_amplitude, rate, left_block_amplitude,
910        right_block_amplitude, volume_changing_amplitude_threshold)
911
912    # Calculates the average teager value.
913    teager_value = average_teager_value(
914        signal[int(start_index):int(end_index)], average_amplitude)
915
916    # Finds out the noise level.
917    noise = noise_level(average_amplitude, dominant_frequency, rate,
918                        teager_value)
919
920    return {
921        'artifacts': {
922            'noise_before_playback': noise_before_playing,
923            'noise_after_playback': noise_after_playing,
924            'delay_during_playback': delays,
925            'burst_during_playback': burst_time_points
926        },
927        'volume_changes': volume_changes,
928        'equivalent_noise_level': noise
929    }
930