1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""A collection of statistical utility functions to be used by metrics.""" 6 7import bisect 8import math 9 10 11def Clamp(value, low=0.0, high=1.0): 12 """Clamp a value between some low and high value.""" 13 return min(max(value, low), high) 14 15 16def NormalizeSamples(samples): 17 """Sorts the samples, and map them linearly to the range [0,1]. 18 19 They're mapped such that for the N samples, the first sample is 0.5/N and the 20 last sample is (N-0.5)/N. 21 22 Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N, 23 twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case 24 we don't want to distinguish between these two cases, as our original domain 25 is not bounded (it is for Monte Carlo integration, where discrepancy was 26 first used). 27 """ 28 if not samples: 29 return samples, 1.0 30 samples = sorted(samples) 31 low = min(samples) 32 high = max(samples) 33 new_low = 0.5 / len(samples) 34 new_high = (len(samples)-0.5) / len(samples) 35 if high-low == 0.0: 36 return samples, 1.0 37 scale = (new_high - new_low) / (high - low) 38 for i in xrange(0, len(samples)): 39 samples[i] = float(samples[i] - low) * scale + new_low 40 return samples, scale 41 42 43def Discrepancy(samples, interval_multiplier=10000): 44 """Computes the discrepancy of a set of 1D samples from the interval [0,1]. 45 46 The samples must be sorted. 47 48 http://en.wikipedia.org/wiki/Low-discrepancy_sequence 49 http://mathworld.wolfram.com/Discrepancy.html 50 """ 51 if not samples: 52 return 1.0 53 54 max_local_discrepancy = 0 55 locations = [] 56 # For each location, stores the number of samples less than that location. 57 left = [] 58 # For each location, stores the number of samples less than or equal to that 59 # location. 60 right = [] 61 62 interval_count = len(samples) * interval_multiplier 63 # Compute number of locations the will roughly result in the requested number 64 # of intervals. 65 location_count = int(math.ceil(math.sqrt(interval_count*2))) 66 inv_sample_count = 1.0 / len(samples) 67 68 # Generate list of equally spaced locations. 69 for i in xrange(0, location_count): 70 location = float(i) / (location_count-1) 71 locations.append(location) 72 left.append(bisect.bisect_left(samples, location)) 73 right.append(bisect.bisect_right(samples, location)) 74 75 # Iterate over the intervals defined by any pair of locations. 76 for i in xrange(0, len(locations)): 77 for j in xrange(i, len(locations)): 78 # Compute length of interval and number of samples in the interval. 79 length = locations[j] - locations[i] 80 count = right[j] - left[i] 81 82 # Compute local discrepancy and update max_local_discrepancy. 83 local_discrepancy = abs(float(count)*inv_sample_count - length) 84 max_local_discrepancy = max(local_discrepancy, max_local_discrepancy) 85 86 return max_local_discrepancy 87 88 89def FrameDiscrepancy(frame_timestamps, absolute=True, 90 interval_multiplier=10000): 91 """A discrepancy based metric for measuring jank. 92 93 FrameDiscrepancy quantifies the largest area of jank observed in a series 94 of timestamps. Note that this is different form metrics based on the 95 max_frame_time. For example, the time stamp series A = [0,1,2,3,5,6] and 96 B = [0,1,2,3,5,7] have the same max_frame_time = 2, but 97 Discrepancy(B) > Discrepancy(A). 98 99 Two variants of discrepancy can be computed: 100 101 Relative discrepancy is following the original definition of 102 discrepancy. It characterized the largest area of jank, relative to the 103 duration of the entire time stamp series. We normalize the raw results, 104 because the best case discrepancy for a set of N samples is 1/N (for 105 equally spaced samples), and we want our metric to report 0.0 in that 106 case. 107 108 Absolute discrepancy also characterizes the largest area of jank, but its 109 value wouldn't change (except for imprecisions due to a low 110 interval_multiplier) if additional 'good' frames were added to an 111 exisiting list of time stamps. Its range is [0,inf] and the unit is 112 milliseconds. 113 114 The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same 115 absolute discrepancy, but D has lower relative discrepancy than C. 116 """ 117 if not frame_timestamps: 118 return 1.0 119 samples, sample_scale = NormalizeSamples(frame_timestamps) 120 discrepancy = Discrepancy(samples, interval_multiplier) 121 inv_sample_count = 1.0 / len(samples) 122 if absolute: 123 # Compute absolute discrepancy 124 discrepancy /= sample_scale 125 else: 126 # Compute relative discrepancy 127 discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count)) 128 return discrepancy 129 130 131def ArithmeticMean(numerator, denominator): 132 """Calculates arithmetic mean. 133 134 Both numerator and denominator can be given as either individual 135 values or lists of values which will be summed. 136 137 Args: 138 numerator: A quantity that represents a sum total value. 139 denominator: A quantity that represents a count of the number of things. 140 141 Returns: 142 The arithmetic mean value, or 0 if the denominator value was 0. 143 """ 144 numerator_total = Total(numerator) 145 denominator_total = Total(denominator) 146 return DivideIfPossibleOrZero(numerator_total, denominator_total) 147 148 149def Total(data): 150 """Returns the float value of a number or the sum of a list.""" 151 if type(data) == float: 152 total = data 153 elif type(data) == int: 154 total = float(data) 155 elif type(data) == list: 156 total = float(sum(data)) 157 else: 158 raise TypeError 159 return total 160 161 162def DivideIfPossibleOrZero(numerator, denominator): 163 """Returns the quotient, or zero if the denominator is zero.""" 164 if not denominator: 165 return 0.0 166 else: 167 return numerator / denominator 168 169 170def GeneralizedMean(values, exponent): 171 """See http://en.wikipedia.org/wiki/Generalized_mean""" 172 if not values: 173 return 0.0 174 sum_of_powers = 0.0 175 for v in values: 176 sum_of_powers += v ** exponent 177 return (sum_of_powers / len(values)) ** (1.0/exponent) 178 179 180def Median(values): 181 """Gets the median of a list of values.""" 182 return Percentile(values, 50) 183 184 185def Percentile(values, percentile): 186 """Calculates the value below which a given percentage of values fall. 187 188 For example, if 17% of the values are less than 5.0, then 5.0 is the 17th 189 percentile for this set of values. When the percentage doesn't exactly 190 match a rank in the list of values, the percentile is computed using linear 191 interpolation between closest ranks. 192 193 Args: 194 values: A list of numerical values. 195 percentile: A number between 0 and 100. 196 197 Returns: 198 The Nth percentile for the list of values, where N is the given percentage. 199 """ 200 if not values: 201 return 0.0 202 sorted_values = sorted(values) 203 n = len(values) 204 percentile /= 100.0 205 if percentile <= 0.5 / n: 206 return sorted_values[0] 207 elif percentile >= (n - 0.5) / n: 208 return sorted_values[-1] 209 else: 210 floor_index = int(math.floor(n * percentile - 0.5)) 211 floor_value = sorted_values[floor_index] 212 ceil_value = sorted_values[floor_index+1] 213 alpha = n * percentile - 0.5 - floor_index 214 return floor_value + alpha * (ceil_value - floor_value) 215 216 217def GeometricMean(values): 218 """Compute a rounded geometric mean from an array of values.""" 219 if not values: 220 return None 221 # To avoid infinite value errors, make sure no value is less than 0.001. 222 new_values = [] 223 for value in values: 224 if value > 0.001: 225 new_values.append(value) 226 else: 227 new_values.append(0.001) 228 # Compute the sum of the log of the values. 229 log_sum = sum(map(math.log, new_values)) 230 # Raise e to that sum over the number of values. 231 mean = math.pow(math.e, (log_sum / len(new_values))) 232 # Return the rounded mean. 233 return int(round(mean)) 234 235