• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""A collection of statistical utility functions to be used by metrics."""
6
7import bisect
8import math
9
10
11def Clamp(value, low=0.0, high=1.0):
12  """Clamp a value between some low and high value."""
13  return min(max(value, low), high)
14
15
16def NormalizeSamples(samples):
17  """Sorts the samples, and map them linearly to the range [0,1].
18
19  They're mapped such that for the N samples, the first sample is 0.5/N and the
20  last sample is (N-0.5)/N.
21
22  Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N,
23  twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case
24  we don't want to distinguish between these two cases, as our original domain
25  is not bounded (it is for Monte Carlo integration, where discrepancy was
26  first used).
27  """
28  if not samples:
29    return samples, 1.0
30  samples = sorted(samples)
31  low = min(samples)
32  high = max(samples)
33  new_low = 0.5 / len(samples)
34  new_high = (len(samples)-0.5) / len(samples)
35  if high-low == 0.0:
36    return samples, 1.0
37  scale = (new_high - new_low) / (high - low)
38  for i in xrange(0, len(samples)):
39    samples[i] = float(samples[i] - low) * scale + new_low
40  return samples, scale
41
42
43def Discrepancy(samples, interval_multiplier=10000):
44  """Computes the discrepancy of a set of 1D samples from the interval [0,1].
45
46  The samples must be sorted.
47
48  http://en.wikipedia.org/wiki/Low-discrepancy_sequence
49  http://mathworld.wolfram.com/Discrepancy.html
50  """
51  if not samples:
52    return 1.0
53
54  max_local_discrepancy = 0
55  locations = []
56  # For each location, stores the number of samples less than that location.
57  left = []
58  # For each location, stores the number of samples less than or equal to that
59  # location.
60  right = []
61
62  interval_count = len(samples) * interval_multiplier
63  # Compute number of locations the will roughly result in the requested number
64  # of intervals.
65  location_count = int(math.ceil(math.sqrt(interval_count*2)))
66  inv_sample_count = 1.0 / len(samples)
67
68  # Generate list of equally spaced locations.
69  for i in xrange(0, location_count):
70    location = float(i) / (location_count-1)
71    locations.append(location)
72    left.append(bisect.bisect_left(samples, location))
73    right.append(bisect.bisect_right(samples, location))
74
75  # Iterate over the intervals defined by any pair of locations.
76  for i in xrange(0, len(locations)):
77    for j in xrange(i, len(locations)):
78      # Compute length of interval and number of samples in the interval.
79      length = locations[j] - locations[i]
80      count = right[j] - left[i]
81
82      # Compute local discrepancy and update max_local_discrepancy.
83      local_discrepancy = abs(float(count)*inv_sample_count - length)
84      max_local_discrepancy = max(local_discrepancy, max_local_discrepancy)
85
86  return max_local_discrepancy
87
88
89def FrameDiscrepancy(frame_timestamps, absolute=True,
90                     interval_multiplier=10000):
91  """A discrepancy based metric for measuring jank.
92
93  FrameDiscrepancy quantifies the largest area of jank observed in a series
94  of timestamps.  Note that this is different form metrics based on the
95  max_frame_time. For example, the time stamp series A = [0,1,2,3,5,6] and
96  B = [0,1,2,3,5,7] have the same max_frame_time = 2, but
97  Discrepancy(B) > Discrepancy(A).
98
99  Two variants of discrepancy can be computed:
100
101  Relative discrepancy is following the original definition of
102  discrepancy. It characterized the largest area of jank, relative to the
103  duration of the entire time stamp series.  We normalize the raw results,
104  because the best case discrepancy for a set of N samples is 1/N (for
105  equally spaced samples), and we want our metric to report 0.0 in that
106  case.
107
108  Absolute discrepancy also characterizes the largest area of jank, but its
109  value wouldn't change (except for imprecisions due to a low
110  interval_multiplier) if additional 'good' frames were added to an
111  exisiting list of time stamps.  Its range is [0,inf] and the unit is
112  milliseconds.
113
114  The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same
115  absolute discrepancy, but D has lower relative discrepancy than C.
116  """
117  if not frame_timestamps:
118    return 1.0
119  samples, sample_scale = NormalizeSamples(frame_timestamps)
120  discrepancy = Discrepancy(samples, interval_multiplier)
121  inv_sample_count = 1.0 / len(samples)
122  if absolute:
123    # Compute absolute discrepancy
124    discrepancy /= sample_scale
125  else:
126    # Compute relative discrepancy
127    discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))
128  return discrepancy
129
130
131def ArithmeticMean(numerator, denominator):
132  """Calculates arithmetic mean.
133
134  Both numerator and denominator can be given as either individual
135  values or lists of values which will be summed.
136
137  Args:
138    numerator: A quantity that represents a sum total value.
139    denominator: A quantity that represents a count of the number of things.
140
141  Returns:
142    The arithmetic mean value, or 0 if the denominator value was 0.
143  """
144  numerator_total = Total(numerator)
145  denominator_total = Total(denominator)
146  return DivideIfPossibleOrZero(numerator_total, denominator_total)
147
148
149def Total(data):
150  """Returns the float value of a number or the sum of a list."""
151  if type(data) == float:
152    total = data
153  elif type(data) == int:
154    total = float(data)
155  elif type(data) == list:
156    total = float(sum(data))
157  else:
158    raise TypeError
159  return total
160
161
162def DivideIfPossibleOrZero(numerator, denominator):
163  """Returns the quotient, or zero if the denominator is zero."""
164  if not denominator:
165    return 0.0
166  else:
167    return numerator / denominator
168
169
170def GeneralizedMean(values, exponent):
171  """See http://en.wikipedia.org/wiki/Generalized_mean"""
172  if not values:
173    return 0.0
174  sum_of_powers = 0.0
175  for v in values:
176    sum_of_powers += v ** exponent
177  return (sum_of_powers / len(values)) ** (1.0/exponent)
178
179
180def Median(values):
181  """Gets the median of a list of values."""
182  return Percentile(values, 50)
183
184
185def Percentile(values, percentile):
186  """Calculates the value below which a given percentage of values fall.
187
188  For example, if 17% of the values are less than 5.0, then 5.0 is the 17th
189  percentile for this set of values. When the percentage doesn't exactly
190  match a rank in the list of values, the percentile is computed using linear
191  interpolation between closest ranks.
192
193  Args:
194    values: A list of numerical values.
195    percentile: A number between 0 and 100.
196
197  Returns:
198    The Nth percentile for the list of values, where N is the given percentage.
199  """
200  if not values:
201    return 0.0
202  sorted_values = sorted(values)
203  n = len(values)
204  percentile /= 100.0
205  if percentile <= 0.5 / n:
206    return sorted_values[0]
207  elif percentile >= (n - 0.5) / n:
208    return sorted_values[-1]
209  else:
210    floor_index = int(math.floor(n * percentile -  0.5))
211    floor_value = sorted_values[floor_index]
212    ceil_value = sorted_values[floor_index+1]
213    alpha = n * percentile - 0.5 - floor_index
214    return floor_value + alpha * (ceil_value - floor_value)
215
216
217def GeometricMean(values):
218  """Compute a rounded geometric mean from an array of values."""
219  if not values:
220    return None
221  # To avoid infinite value errors, make sure no value is less than 0.001.
222  new_values = []
223  for value in values:
224    if value > 0.001:
225      new_values.append(value)
226    else:
227      new_values.append(0.001)
228  # Compute the sum of the log of the values.
229  log_sum = sum(map(math.log, new_values))
230  # Raise e to that sum over the number of values.
231  mean = math.pow(math.e, (log_sum / len(new_values)))
232  # Return the rounded mean.
233  return int(round(mean))
234
235