1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""General statistical or mathematical functions.""" 6 7import math 8 9 10def TruncatedMean(data_set, truncate_percent): 11 """Calculates the truncated mean of a set of values. 12 13 Note that this isn't just the mean of the set of values with the highest 14 and lowest values discarded; the non-discarded values are also weighted 15 differently depending how many values are discarded. 16 17 Args: 18 data_set: Non-empty list of values. 19 truncate_percent: How much of the upper and lower portions of the data set 20 to discard, expressed as a value in [0, 1]. 21 22 Returns: 23 The truncated mean as a float. 24 25 Raises: 26 TypeError: The data set was empty after discarding values. 27 """ 28 if len(data_set) > 2: 29 data_set = sorted(data_set) 30 31 discard_num_float = len(data_set) * truncate_percent 32 discard_num_int = int(math.floor(discard_num_float)) 33 kept_weight = len(data_set) - discard_num_float * 2 34 35 data_set = data_set[discard_num_int:len(data_set)-discard_num_int] 36 37 weight_left = 1.0 - (discard_num_float - discard_num_int) 38 39 if weight_left < 1: 40 # If the % to discard leaves a fractional portion, need to weight those 41 # values. 42 unweighted_vals = data_set[1:len(data_set)-1] 43 weighted_vals = [data_set[0], data_set[len(data_set)-1]] 44 weighted_vals = [w * weight_left for w in weighted_vals] 45 data_set = weighted_vals + unweighted_vals 46 else: 47 kept_weight = len(data_set) 48 49 truncated_mean = reduce(lambda x, y: float(x) + float(y), 50 data_set) / kept_weight 51 52 return truncated_mean 53 54 55def Mean(values): 56 """Calculates the arithmetic mean of a list of values.""" 57 return TruncatedMean(values, 0.0) 58 59 60def Variance(values): 61 """Calculates the sample variance.""" 62 if len(values) == 1: 63 return 0.0 64 mean = Mean(values) 65 differences_from_mean = [float(x) - mean for x in values] 66 squared_differences = [float(x * x) for x in differences_from_mean] 67 variance = sum(squared_differences) / (len(values) - 1) 68 return variance 69 70 71def StandardDeviation(values): 72 """Calculates the sample standard deviation of the given list of values.""" 73 return math.sqrt(Variance(values)) 74 75 76def RelativeChange(before, after): 77 """Returns the relative change of before and after, relative to before. 78 79 There are several different ways to define relative difference between 80 two numbers; sometimes it is defined as relative to the smaller number, 81 or to the mean of the two numbers. This version returns the difference 82 relative to the first of the two numbers. 83 84 Args: 85 before: A number representing an earlier value. 86 after: Another number, representing a later value. 87 88 Returns: 89 A non-negative floating point number; 0.1 represents a 10% change. 90 """ 91 if before == after: 92 return 0.0 93 if before == 0: 94 return float('nan') 95 difference = after - before 96 return math.fabs(difference / before) 97 98 99def PooledStandardError(work_sets): 100 """Calculates the pooled sample standard error for a set of samples. 101 102 Args: 103 work_sets: A collection of collections of numbers. 104 105 Returns: 106 Pooled sample standard error. 107 """ 108 numerator = 0.0 109 denominator1 = 0.0 110 denominator2 = 0.0 111 112 for current_set in work_sets: 113 std_dev = StandardDeviation(current_set) 114 numerator += (len(current_set) - 1) * std_dev ** 2 115 denominator1 += len(current_set) - 1 116 if len(current_set) > 0: 117 denominator2 += 1.0 / len(current_set) 118 119 if denominator1 == 0: 120 return 0.0 121 122 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) 123 124 125# Redefining built-in 'StandardError' 126# pylint: disable=W0622 127def StandardError(values): 128 """Calculates the standard error of a list of values.""" 129 if len(values) <= 1: 130 return 0.0 131 std_dev = StandardDeviation(values) 132 return std_dev / math.sqrt(len(values)) 133