• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Functions for doing independent two-sample t-tests and looking up p-values.
6
7> A t-test is any statistical hypothesis test in which the test statistic
8> follows a Student's t distribution if the null hypothesis is supported.
9> It can be used to determine if two sets of data are significantly different
10> from each other.
11
12There are several conditions that the data under test should meet in order
13for a t-test to be completely applicable:
14 - The data should be roughly normal in distribution.
15 - The two samples that are compared should be roughly similar in size.
16
17If these conditions cannot be met, then a non-parametric test may be more
18appropriate (e.g. Mann-Whitney U test, K-S test or Anderson-Darley test).
19
20References:
21  http://en.wikipedia.org/wiki/Student%27s_t-test
22  http://en.wikipedia.org/wiki/Welch%27s_t-test
23  https://github.com/scipy/scipy/blob/master/scipy/stats/stats.py#L3244
24"""
25
26import bisect
27import collections
28import math
29
30from dashboard import math_utils
31
32
33# A container for the results of a t-test.
34TTestResult = collections.namedtuple('TTestResult', ('t', 'df', 'p'))
35
36
37def WelchsTTest(sample1, sample2):
38  """Performs Welch's t-test on the two samples.
39
40  Welch's t-test is an adaptation of Student's t-test which is used when the
41  two samples may have unequal variances. It is also an independent two-sample
42  t-test.
43
44  Args:
45    sample1: A collection of numbers.
46    sample2: Another collection of numbers.
47
48  Returns:
49    A 3-tuple (t-statistic, degrees of freedom, p-value).
50
51  Raises:
52    RuntimeError: Invalid input.
53  """
54  if not sample1:
55    raise RuntimeError('Empty sample 1: %s' % list(sample1))
56  if not sample2:
57    raise RuntimeError('Empty sample 2: %s' % list(sample2))
58
59  stats1 = _MakeSampleStats(sample1)
60  stats2 = _MakeSampleStats(sample2)
61  t = _TValue(stats1, stats2)
62  df = _DegreesOfFreedom(stats1, stats2)
63  p = _LookupPValue(t, df)
64  return TTestResult(t, df, p)
65
66
67# A SampleStats object contains pre-calculated stats about a sample.
68SampleStats = collections.namedtuple('SampleStats', ('mean', 'var', 'size'))
69
70
71def _MakeSampleStats(sample):
72  """Calculates relevant stats for a sample and makes a SampleStats object."""
73  return SampleStats(
74      math_utils.Mean(sample), math_utils.Variance(sample), len(sample))
75
76
77def _TValue(stats1, stats2):
78  """Calculates a t-statistic value using the formula for Welch's t-test.
79
80  The t value can be thought of as a signal-to-noise ratio; a higher t-value
81  tells you that the groups are more different.
82
83  Args:
84    stats1: An SampleStats named tuple for the first sample.
85    stats2: An SampleStats named tuple for the second sample.
86
87  Returns:
88    A t value, which may be negative or positive.
89  """
90  # If variance of both segments is zero, then a very high t-value should
91  # be returned because any difference between the two samples could be
92  # considered a very clear difference. Also, in the equation, as the
93  # variance approaches zero, the quotient approaches infinity.
94  if stats1.var == 0 and stats2.var == 0:
95    return float('inf')
96  return math_utils.Divide(
97      stats1.mean - stats2.mean,
98      math.sqrt(stats1.var / stats1.size +
99                stats2.var / stats2.size))
100
101
102def _DegreesOfFreedom(stats1, stats2):
103  """Calculates degrees of freedom using the Welch-Satterthwaite formula.
104
105  Degrees of freedom is a measure of sample size. For other types of tests,
106  degrees of freedom is sometimes N - 1, where N is the sample size. However,
107  for the Welch's t-test, the degrees of freedom is approximated with the
108  "Welch-Satterthwaite equation".
109
110  The degrees of freedom returned from this function should be at least 1.0
111  because the first row in the t-table is for degrees of freedom of 1.0.
112
113  Args:
114    stats1: An SampleStats named tuple for the first sample.
115    stats2: An SampleStats named tuple for the second sample.
116
117  Returns:
118    An estimate of degrees of freedom. Guaranteed to be at least 1.0.
119
120  Raises:
121    RuntimeError: Invalid input.
122  """
123  # When there's no variance in either sample, return 1.
124  if stats1.var == 0 and stats2.var == 0:
125    return 1.0
126  if stats1.size < 2:
127    raise RuntimeError('Sample 1 size < 2. Actual size: %s' % stats1.size)
128  if stats2.size < 2:
129    raise RuntimeError('Sample 2 size < 2. Actual size: %s' % stats2.size)
130  df = math_utils.Divide(
131      (stats1.var / stats1.size + stats2.var / stats2.size) ** 2,
132      math_utils.Divide(stats1.var ** 2,
133                        (stats1.size ** 2) * (stats1.size - 1)) +
134      math_utils.Divide(stats2.var ** 2,
135                        (stats2.size ** 2) * (stats2.size - 1)))
136  return max(1.0, df)
137
138
139# Below is a hard-coded table for looking up p-values.
140#
141# Normally, p-values are calculated based on the t-distribution formula.
142# Looking up pre-calculated values is a less accurate but less complicated
143# alternative.
144#
145# Reference: http://www.medcalc.org/manual/t-distribution.php
146
147# A list of p-values for a two-tailed test. The entries correspond to
148# entries in the rows of the table below.
149_TWO_TAIL = [1, 0.20, 0.10, 0.05, 0.02, 0.01, 0.005, 0.002, 0.001]
150
151# A map of degrees of freedom to lists of t-values. The index of the t-value
152# can be used to look up the corresponding p-value.
153_TABLE = [
154    (1, [0, 3.078, 6.314, 12.706, 31.820, 63.657, 127.321, 318.309, 636.619]),
155    (2, [0, 1.886, 2.920, 4.303, 6.965, 9.925, 14.089, 22.327, 31.599]),
156    (3, [0, 1.638, 2.353, 3.182, 4.541, 5.841, 7.453, 10.215, 12.924]),
157    (4, [0, 1.533, 2.132, 2.776, 3.747, 4.604, 5.598, 7.173, 8.610]),
158    (5, [0, 1.476, 2.015, 2.571, 3.365, 4.032, 4.773, 5.893, 6.869]),
159    (6, [0, 1.440, 1.943, 2.447, 3.143, 3.707, 4.317, 5.208, 5.959]),
160    (7, [0, 1.415, 1.895, 2.365, 2.998, 3.499, 4.029, 4.785, 5.408]),
161    (8, [0, 1.397, 1.860, 2.306, 2.897, 3.355, 3.833, 4.501, 5.041]),
162    (9, [0, 1.383, 1.833, 2.262, 2.821, 3.250, 3.690, 4.297, 4.781]),
163    (10, [0, 1.372, 1.812, 2.228, 2.764, 3.169, 3.581, 4.144, 4.587]),
164    (11, [0, 1.363, 1.796, 2.201, 2.718, 3.106, 3.497, 4.025, 4.437]),
165    (12, [0, 1.356, 1.782, 2.179, 2.681, 3.055, 3.428, 3.930, 4.318]),
166    (13, [0, 1.350, 1.771, 2.160, 2.650, 3.012, 3.372, 3.852, 4.221]),
167    (14, [0, 1.345, 1.761, 2.145, 2.625, 2.977, 3.326, 3.787, 4.140]),
168    (15, [0, 1.341, 1.753, 2.131, 2.602, 2.947, 3.286, 3.733, 4.073]),
169    (16, [0, 1.337, 1.746, 2.120, 2.584, 2.921, 3.252, 3.686, 4.015]),
170    (17, [0, 1.333, 1.740, 2.110, 2.567, 2.898, 3.222, 3.646, 3.965]),
171    (18, [0, 1.330, 1.734, 2.101, 2.552, 2.878, 3.197, 3.610, 3.922]),
172    (19, [0, 1.328, 1.729, 2.093, 2.539, 2.861, 3.174, 3.579, 3.883]),
173    (20, [0, 1.325, 1.725, 2.086, 2.528, 2.845, 3.153, 3.552, 3.850]),
174    (21, [0, 1.323, 1.721, 2.080, 2.518, 2.831, 3.135, 3.527, 3.819]),
175    (22, [0, 1.321, 1.717, 2.074, 2.508, 2.819, 3.119, 3.505, 3.792]),
176    (23, [0, 1.319, 1.714, 2.069, 2.500, 2.807, 3.104, 3.485, 3.768]),
177    (24, [0, 1.318, 1.711, 2.064, 2.492, 2.797, 3.090, 3.467, 3.745]),
178    (25, [0, 1.316, 1.708, 2.060, 2.485, 2.787, 3.078, 3.450, 3.725]),
179    (26, [0, 1.315, 1.706, 2.056, 2.479, 2.779, 3.067, 3.435, 3.707]),
180    (27, [0, 1.314, 1.703, 2.052, 2.473, 2.771, 3.057, 3.421, 3.690]),
181    (28, [0, 1.313, 1.701, 2.048, 2.467, 2.763, 3.047, 3.408, 3.674]),
182    (29, [0, 1.311, 1.699, 2.045, 2.462, 2.756, 3.038, 3.396, 3.659]),
183    (30, [0, 1.310, 1.697, 2.042, 2.457, 2.750, 3.030, 3.385, 3.646]),
184    (31, [0, 1.309, 1.695, 2.040, 2.453, 2.744, 3.022, 3.375, 3.633]),
185    (32, [0, 1.309, 1.694, 2.037, 2.449, 2.738, 3.015, 3.365, 3.622]),
186    (33, [0, 1.308, 1.692, 2.035, 2.445, 2.733, 3.008, 3.356, 3.611]),
187    (34, [0, 1.307, 1.691, 2.032, 2.441, 2.728, 3.002, 3.348, 3.601]),
188    (35, [0, 1.306, 1.690, 2.030, 2.438, 2.724, 2.996, 3.340, 3.591]),
189    (36, [0, 1.306, 1.688, 2.028, 2.434, 2.719, 2.991, 3.333, 3.582]),
190    (37, [0, 1.305, 1.687, 2.026, 2.431, 2.715, 2.985, 3.326, 3.574]),
191    (38, [0, 1.304, 1.686, 2.024, 2.429, 2.712, 2.980, 3.319, 3.566]),
192    (39, [0, 1.304, 1.685, 2.023, 2.426, 2.708, 2.976, 3.313, 3.558]),
193    (40, [0, 1.303, 1.684, 2.021, 2.423, 2.704, 2.971, 3.307, 3.551]),
194    (42, [0, 1.302, 1.682, 2.018, 2.418, 2.698, 2.963, 3.296, 3.538]),
195    (44, [0, 1.301, 1.680, 2.015, 2.414, 2.692, 2.956, 3.286, 3.526]),
196    (46, [0, 1.300, 1.679, 2.013, 2.410, 2.687, 2.949, 3.277, 3.515]),
197    (48, [0, 1.299, 1.677, 2.011, 2.407, 2.682, 2.943, 3.269, 3.505]),
198    (50, [0, 1.299, 1.676, 2.009, 2.403, 2.678, 2.937, 3.261, 3.496]),
199    (60, [0, 1.296, 1.671, 2.000, 2.390, 2.660, 2.915, 3.232, 3.460]),
200    (70, [0, 1.294, 1.667, 1.994, 2.381, 2.648, 2.899, 3.211, 3.435]),
201    (80, [0, 1.292, 1.664, 1.990, 2.374, 2.639, 2.887, 3.195, 3.416]),
202    (90, [0, 1.291, 1.662, 1.987, 2.369, 2.632, 2.878, 3.183, 3.402]),
203    (100, [0, 1.290, 1.660, 1.984, 2.364, 2.626, 2.871, 3.174, 3.391]),
204    (120, [0, 1.289, 1.658, 1.980, 2.358, 2.617, 2.860, 3.160, 3.373]),
205    (150, [0, 1.287, 1.655, 1.976, 2.351, 2.609, 2.849, 3.145, 3.357]),
206    (200, [0, 1.286, 1.652, 1.972, 2.345, 2.601, 2.839, 3.131, 3.340]),
207    (300, [0, 1.284, 1.650, 1.968, 2.339, 2.592, 2.828, 3.118, 3.323]),
208    (500, [0, 1.283, 1.648, 1.965, 2.334, 2.586, 2.820, 3.107, 3.310]),
209]
210
211
212def _LookupPValue(t, df):
213  """Looks up a p-value in a t-distribution table.
214
215  Args:
216    t: A t statistic value; the result of a t-test. The negative sign will be
217        ignored because this is a two-tail test.
218    df: Number of degrees of freedom.
219
220  Returns:
221    A p-value, which represents the likelihood of obtaining a result at least
222    as extreme as the one observed just by chance (the null hypothesis).
223  """
224  assert df >= 1.0, 'Degrees of freedom must at least 1.0.'
225
226  # bisect.bisect will return the index at which (df + 1,) would be
227  # inserted in the table; we want the row at the index before that.
228  t_table_row = _TABLE[bisect.bisect(_TABLE, (df + 1,)) - 1][1]
229
230  # In this line, bisect.bisect would return the index in the row
231  # where another t would be inserted. If the given t-value is between
232  # two entries in the row, we're getting the entry for the next-lowest
233  # t-value, so here we also subtract one from the result of bisect.bisect.
234  return _TWO_TAIL[bisect.bisect(t_table_row, abs(t)) - 1]
235