1 /* 2 * Copyright (C) 2012 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.common.math; 16 17 import static com.google.common.base.Preconditions.checkState; 18 import static com.google.common.math.DoubleUtils.ensureNonNegative; 19 import static com.google.common.primitives.Doubles.isFinite; 20 import static java.lang.Double.NaN; 21 import static java.lang.Double.isNaN; 22 23 import com.google.common.annotations.Beta; 24 import com.google.common.annotations.GwtIncompatible; 25 import java.util.Iterator; 26 27 /** 28 * A mutable object which accumulates double values and tracks some basic statistics over all the 29 * values added so far. The values may be added singly or in groups. This class is not thread safe. 30 * 31 * @author Pete Gillin 32 * @author Kevin Bourrillion 33 * @since 20.0 34 */ 35 @Beta 36 @GwtIncompatible 37 @ElementTypesAreNonnullByDefault 38 public final class StatsAccumulator { 39 40 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 41 // methods of this class. 42 private long count = 0; 43 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 44 private double sumOfSquaresOfDeltas = 0.0; 45 private double min = NaN; // any value will do 46 private double max = NaN; // any value will do 47 48 /** Adds the given value to the dataset. */ add(double value)49 public void add(double value) { 50 if (count == 0) { 51 count = 1; 52 mean = value; 53 min = value; 54 max = value; 55 if (!isFinite(value)) { 56 sumOfSquaresOfDeltas = NaN; 57 } 58 } else { 59 count++; 60 if (isFinite(value) && isFinite(mean)) { 61 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 62 double delta = value - mean; 63 mean += delta / count; 64 sumOfSquaresOfDeltas += delta * (value - mean); 65 } else { 66 mean = calculateNewMeanNonFinite(mean, value); 67 sumOfSquaresOfDeltas = NaN; 68 } 69 min = Math.min(min, value); 70 max = Math.max(max, value); 71 } 72 } 73 74 /** 75 * Adds the given values to the dataset. 76 * 77 * @param values a series of values, which will be converted to {@code double} values (this may 78 * cause loss of precision) 79 */ addAll(Iterable<? extends Number> values)80 public void addAll(Iterable<? extends Number> values) { 81 for (Number value : values) { 82 add(value.doubleValue()); 83 } 84 } 85 86 /** 87 * Adds the given values to the dataset. 88 * 89 * @param values a series of values, which will be converted to {@code double} values (this may 90 * cause loss of precision) 91 */ addAll(Iterator<? extends Number> values)92 public void addAll(Iterator<? extends Number> values) { 93 while (values.hasNext()) { 94 add(values.next().doubleValue()); 95 } 96 } 97 98 /** 99 * Adds the given values to the dataset. 100 * 101 * @param values a series of values 102 */ addAll(double... values)103 public void addAll(double... values) { 104 for (double value : values) { 105 add(value); 106 } 107 } 108 109 /** 110 * Adds the given values to the dataset. 111 * 112 * @param values a series of values 113 */ addAll(int... values)114 public void addAll(int... values) { 115 for (int value : values) { 116 add(value); 117 } 118 } 119 120 /** 121 * Adds the given values to the dataset. 122 * 123 * @param values a series of values, which will be converted to {@code double} values (this may 124 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 125 */ addAll(long... values)126 public void addAll(long... values) { 127 for (long value : values) { 128 add(value); 129 } 130 } 131 132 /** 133 * Adds the given statistics to the dataset, as if the individual values used to compute the 134 * statistics had been added directly. 135 */ addAll(Stats values)136 public void addAll(Stats values) { 137 if (values.count() == 0) { 138 return; 139 } 140 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 141 } 142 143 /** 144 * Adds the given statistics to the dataset, as if the individual values used to compute the 145 * statistics had been added directly. 146 * 147 * @since 28.2 148 */ addAll(StatsAccumulator values)149 public void addAll(StatsAccumulator values) { 150 if (values.count() == 0) { 151 return; 152 } 153 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 154 } 155 merge( long otherCount, double otherMean, double otherSumOfSquaresOfDeltas, double otherMin, double otherMax)156 private void merge( 157 long otherCount, 158 double otherMean, 159 double otherSumOfSquaresOfDeltas, 160 double otherMin, 161 double otherMax) { 162 if (count == 0) { 163 count = otherCount; 164 mean = otherMean; 165 sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 166 min = otherMin; 167 max = otherMax; 168 } else { 169 count += otherCount; 170 if (isFinite(mean) && isFinite(otherMean)) { 171 // This is a generalized version of the calculation in add(double) above. 172 double delta = otherMean - mean; 173 mean += delta * otherCount / count; 174 sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 175 } else { 176 mean = calculateNewMeanNonFinite(mean, otherMean); 177 sumOfSquaresOfDeltas = NaN; 178 } 179 min = Math.min(min, otherMin); 180 max = Math.max(max, otherMax); 181 } 182 } 183 184 /** Returns an immutable snapshot of the current statistics. */ snapshot()185 public Stats snapshot() { 186 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 187 } 188 189 /** Returns the number of values. */ count()190 public long count() { 191 return count; 192 } 193 194 /** 195 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 196 * values. The count must be non-zero. 197 * 198 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 199 * the arithmetic mean of the population. 200 * 201 * <h3>Non-finite values</h3> 202 * 203 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 204 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 205 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 206 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 207 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 208 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 209 * 210 * @throws IllegalStateException if the dataset is empty 211 */ mean()212 public double mean() { 213 checkState(count != 0); 214 return mean; 215 } 216 217 /** 218 * Returns the sum of the values. 219 * 220 * <h3>Non-finite values</h3> 221 * 222 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 223 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 224 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 225 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 226 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 227 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 228 */ sum()229 public final double sum() { 230 return mean * count; 231 } 232 233 /** 234 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 235 * variance</a> of the values. The count must be non-zero. 236 * 237 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 238 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 239 * due to numerical errors. However, it is guaranteed never to return a negative result. 240 * 241 * <h3>Non-finite values</h3> 242 * 243 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 244 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 245 * 246 * @throws IllegalStateException if the dataset is empty 247 */ populationVariance()248 public final double populationVariance() { 249 checkState(count != 0); 250 if (isNaN(sumOfSquaresOfDeltas)) { 251 return NaN; 252 } 253 if (count == 1) { 254 return 0.0; 255 } 256 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 257 } 258 259 /** 260 * Returns the <a 261 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 262 * population standard deviation</a> of the values. The count must be non-zero. 263 * 264 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 265 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 266 * due to numerical errors. However, it is guaranteed never to return a negative result. 267 * 268 * <h3>Non-finite values</h3> 269 * 270 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 271 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 272 * 273 * @throws IllegalStateException if the dataset is empty 274 */ populationStandardDeviation()275 public final double populationStandardDeviation() { 276 return Math.sqrt(populationVariance()); 277 } 278 279 /** 280 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 281 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 282 * unbiased estimator of the population variance of the population. The count must be greater than 283 * one. 284 * 285 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 286 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 287 * 288 * <h3>Non-finite values</h3> 289 * 290 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 291 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 292 * 293 * @throws IllegalStateException if the dataset is empty or contains a single value 294 */ sampleVariance()295 public final double sampleVariance() { 296 checkState(count > 1); 297 if (isNaN(sumOfSquaresOfDeltas)) { 298 return NaN; 299 } 300 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 301 } 302 303 /** 304 * Returns the <a 305 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 306 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 307 * population, this is an estimator of the population standard deviation of the population which 308 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 309 * the distribution). The count must be greater than one. 310 * 311 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 312 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 313 * 314 * <h3>Non-finite values</h3> 315 * 316 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 317 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 318 * 319 * @throws IllegalStateException if the dataset is empty or contains a single value 320 */ sampleStandardDeviation()321 public final double sampleStandardDeviation() { 322 return Math.sqrt(sampleVariance()); 323 } 324 325 /** 326 * Returns the lowest value in the dataset. The count must be non-zero. 327 * 328 * <h3>Non-finite values</h3> 329 * 330 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 331 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 332 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 333 * only then the result is the lowest finite value. If it contains {@link 334 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 335 * 336 * @throws IllegalStateException if the dataset is empty 337 */ min()338 public double min() { 339 checkState(count != 0); 340 return min; 341 } 342 343 /** 344 * Returns the highest value in the dataset. The count must be non-zero. 345 * 346 * <h3>Non-finite values</h3> 347 * 348 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 349 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 350 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 351 * only then the result is the highest finite value. If it contains {@link 352 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 353 * 354 * @throws IllegalStateException if the dataset is empty 355 */ max()356 public double max() { 357 checkState(count != 0); 358 return max; 359 } 360 sumOfSquaresOfDeltas()361 double sumOfSquaresOfDeltas() { 362 return sumOfSquaresOfDeltas; 363 } 364 365 /** 366 * Calculates the new value for the accumulated mean when a value is added, in the case where at 367 * least one of the previous mean and the value is non-finite. 368 */ calculateNewMeanNonFinite(double previousMean, double value)369 static double calculateNewMeanNonFinite(double previousMean, double value) { 370 /* 371 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 372 * the update formula can subtract infinities in cases where the naive formula would add them. 373 * 374 * Consequently: 375 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 376 * value (whether it is NaN or infinity). 377 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 378 * (whether it is NaN or infinity). 379 * 3. If both the previous mean and the new value are non-finite and... 380 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 381 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 382 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 383 */ 384 if (isFinite(previousMean)) { 385 // This is case 1. 386 return value; 387 } else if (isFinite(value) || previousMean == value) { 388 // This is case 2. or 3b. 389 return previousMean; 390 } else { 391 // This is case 3a. or 3c. 392 return NaN; 393 } 394 } 395 } 396