• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""General statistical or mathematical functions."""
6
7import math
8
9
10def TruncatedMean(data_set, truncate_percent):
11  """Calculates the truncated mean of a set of values.
12
13  Note that this isn't just the mean of the set of values with the highest
14  and lowest values discarded; the non-discarded values are also weighted
15  differently depending how many values are discarded.
16
17  Args:
18    data_set: Non-empty list of values.
19    truncate_percent: How much of the upper and lower portions of the data set
20        to discard, expressed as a value in [0, 1].
21
22  Returns:
23    The truncated mean as a float.
24
25  Raises:
26    TypeError: The data set was empty after discarding values.
27  """
28  if len(data_set) > 2:
29    data_set = sorted(data_set)
30
31    discard_num_float = len(data_set) * truncate_percent
32    discard_num_int = int(math.floor(discard_num_float))
33    kept_weight = len(data_set) - discard_num_float * 2
34
35    data_set = data_set[discard_num_int:len(data_set)-discard_num_int]
36
37    weight_left = 1.0 - (discard_num_float - discard_num_int)
38
39    if weight_left < 1:
40      # If the % to discard leaves a fractional portion, need to weight those
41      # values.
42      unweighted_vals = data_set[1:len(data_set)-1]
43      weighted_vals = [data_set[0], data_set[len(data_set)-1]]
44      weighted_vals = [w * weight_left for w in weighted_vals]
45      data_set = weighted_vals + unweighted_vals
46  else:
47    kept_weight = len(data_set)
48
49  truncated_mean = reduce(lambda x, y: float(x) + float(y),
50                          data_set) / kept_weight
51
52  return truncated_mean
53
54
55def Mean(values):
56  """Calculates the arithmetic mean of a list of values."""
57  return TruncatedMean(values, 0.0)
58
59
60def Variance(values):
61  """Calculates the sample variance."""
62  if len(values) == 1:
63    return 0.0
64  mean = Mean(values)
65  differences_from_mean = [float(x) - mean for x in values]
66  squared_differences = [float(x * x) for x in differences_from_mean]
67  variance = sum(squared_differences) / (len(values) - 1)
68  return variance
69
70
71def StandardDeviation(values):
72  """Calculates the sample standard deviation of the given list of values."""
73  return math.sqrt(Variance(values))
74
75
76def RelativeChange(before, after):
77  """Returns the relative change of before and after, relative to before.
78
79  There are several different ways to define relative difference between
80  two numbers; sometimes it is defined as relative to the smaller number,
81  or to the mean of the two numbers. This version returns the difference
82  relative to the first of the two numbers.
83
84  Args:
85    before: A number representing an earlier value.
86    after: Another number, representing a later value.
87
88  Returns:
89    A non-negative floating point number; 0.1 represents a 10% change.
90  """
91  if before == after:
92    return 0.0
93  if before == 0:
94    return float('nan')
95  difference = after - before
96  return math.fabs(difference / before)
97
98
99def PooledStandardError(work_sets):
100  """Calculates the pooled sample standard error for a set of samples.
101
102  Args:
103    work_sets: A collection of collections of numbers.
104
105  Returns:
106    Pooled sample standard error.
107  """
108  numerator = 0.0
109  denominator1 = 0.0
110  denominator2 = 0.0
111
112  for current_set in work_sets:
113    std_dev = StandardDeviation(current_set)
114    numerator += (len(current_set) - 1) * std_dev ** 2
115    denominator1 += len(current_set) - 1
116    if len(current_set) > 0:
117      denominator2 += 1.0 / len(current_set)
118
119  if denominator1 == 0:
120    return 0.0
121
122  return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)
123
124
125# Redefining built-in 'StandardError'
126# pylint: disable=W0622
127def StandardError(values):
128  """Calculates the standard error of a list of values."""
129  if len(values) <= 1:
130    return 0.0
131  std_dev = StandardDeviation(values)
132  return std_dev / math.sqrt(len(values))
133