• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.math;
16 
17 import static com.google.common.base.Preconditions.checkState;
18 import static com.google.common.math.DoubleUtils.ensureNonNegative;
19 import static com.google.common.primitives.Doubles.isFinite;
20 import static java.lang.Double.NaN;
21 import static java.lang.Double.isNaN;
22 
23 import com.google.common.annotations.Beta;
24 import com.google.common.annotations.GwtIncompatible;
25 import java.util.Iterator;
26 
27 /**
28  * A mutable object which accumulates double values and tracks some basic statistics over all the
29  * values added so far. The values may be added singly or in groups. This class is not thread safe.
30  *
31  * @author Pete Gillin
32  * @author Kevin Bourrillion
33  * @since 20.0
34  */
35 @Beta
36 @GwtIncompatible
37 @ElementTypesAreNonnullByDefault
38 public final class StatsAccumulator {
39 
40   // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
41   // methods of this class.
42   private long count = 0;
43   private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
44   private double sumOfSquaresOfDeltas = 0.0;
45   private double min = NaN; // any value will do
46   private double max = NaN; // any value will do
47 
48   /** Adds the given value to the dataset. */
add(double value)49   public void add(double value) {
50     if (count == 0) {
51       count = 1;
52       mean = value;
53       min = value;
54       max = value;
55       if (!isFinite(value)) {
56         sumOfSquaresOfDeltas = NaN;
57       }
58     } else {
59       count++;
60       if (isFinite(value) && isFinite(mean)) {
61         // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
62         double delta = value - mean;
63         mean += delta / count;
64         sumOfSquaresOfDeltas += delta * (value - mean);
65       } else {
66         mean = calculateNewMeanNonFinite(mean, value);
67         sumOfSquaresOfDeltas = NaN;
68       }
69       min = Math.min(min, value);
70       max = Math.max(max, value);
71     }
72   }
73 
74   /**
75    * Adds the given values to the dataset.
76    *
77    * @param values a series of values, which will be converted to {@code double} values (this may
78    *     cause loss of precision)
79    */
addAll(Iterable<? extends Number> values)80   public void addAll(Iterable<? extends Number> values) {
81     for (Number value : values) {
82       add(value.doubleValue());
83     }
84   }
85 
86   /**
87    * Adds the given values to the dataset.
88    *
89    * @param values a series of values, which will be converted to {@code double} values (this may
90    *     cause loss of precision)
91    */
addAll(Iterator<? extends Number> values)92   public void addAll(Iterator<? extends Number> values) {
93     while (values.hasNext()) {
94       add(values.next().doubleValue());
95     }
96   }
97 
98   /**
99    * Adds the given values to the dataset.
100    *
101    * @param values a series of values
102    */
addAll(double... values)103   public void addAll(double... values) {
104     for (double value : values) {
105       add(value);
106     }
107   }
108 
109   /**
110    * Adds the given values to the dataset.
111    *
112    * @param values a series of values
113    */
addAll(int... values)114   public void addAll(int... values) {
115     for (int value : values) {
116       add(value);
117     }
118   }
119 
120   /**
121    * Adds the given values to the dataset.
122    *
123    * @param values a series of values, which will be converted to {@code double} values (this may
124    *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
125    */
addAll(long... values)126   public void addAll(long... values) {
127     for (long value : values) {
128       add(value);
129     }
130   }
131 
132   /**
133    * Adds the given statistics to the dataset, as if the individual values used to compute the
134    * statistics had been added directly.
135    */
addAll(Stats values)136   public void addAll(Stats values) {
137     if (values.count() == 0) {
138       return;
139     }
140     merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
141   }
142 
143   /**
144    * Adds the given statistics to the dataset, as if the individual values used to compute the
145    * statistics had been added directly.
146    *
147    * @since 28.2
148    */
addAll(StatsAccumulator values)149   public void addAll(StatsAccumulator values) {
150     if (values.count() == 0) {
151       return;
152     }
153     merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
154   }
155 
merge( long otherCount, double otherMean, double otherSumOfSquaresOfDeltas, double otherMin, double otherMax)156   private void merge(
157       long otherCount,
158       double otherMean,
159       double otherSumOfSquaresOfDeltas,
160       double otherMin,
161       double otherMax) {
162     if (count == 0) {
163       count = otherCount;
164       mean = otherMean;
165       sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas;
166       min = otherMin;
167       max = otherMax;
168     } else {
169       count += otherCount;
170       if (isFinite(mean) && isFinite(otherMean)) {
171         // This is a generalized version of the calculation in add(double) above.
172         double delta = otherMean - mean;
173         mean += delta * otherCount / count;
174         sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount;
175       } else {
176         mean = calculateNewMeanNonFinite(mean, otherMean);
177         sumOfSquaresOfDeltas = NaN;
178       }
179       min = Math.min(min, otherMin);
180       max = Math.max(max, otherMax);
181     }
182   }
183 
184   /** Returns an immutable snapshot of the current statistics. */
snapshot()185   public Stats snapshot() {
186     return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
187   }
188 
189   /** Returns the number of values. */
count()190   public long count() {
191     return count;
192   }
193 
194   /**
195    * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
196    * values. The count must be non-zero.
197    *
198    * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
199    * the arithmetic mean of the population.
200    *
201    * <h3>Non-finite values</h3>
202    *
203    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
204    * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
205    * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
206    * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
207    * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
208    * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
209    *
210    * @throws IllegalStateException if the dataset is empty
211    */
mean()212   public double mean() {
213     checkState(count != 0);
214     return mean;
215   }
216 
217   /**
218    * Returns the sum of the values.
219    *
220    * <h3>Non-finite values</h3>
221    *
222    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
223    * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
224    * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
225    * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
226    * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
227    * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
228    */
sum()229   public final double sum() {
230     return mean * count;
231   }
232 
233   /**
234    * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
235    * variance</a> of the values. The count must be non-zero.
236    *
237    * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
238    * is not guaranteed to return zero when the dataset consists of the same value multiple times,
239    * due to numerical errors. However, it is guaranteed never to return a negative result.
240    *
241    * <h3>Non-finite values</h3>
242    *
243    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
244    * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
245    *
246    * @throws IllegalStateException if the dataset is empty
247    */
populationVariance()248   public final double populationVariance() {
249     checkState(count != 0);
250     if (isNaN(sumOfSquaresOfDeltas)) {
251       return NaN;
252     }
253     if (count == 1) {
254       return 0.0;
255     }
256     return ensureNonNegative(sumOfSquaresOfDeltas) / count;
257   }
258 
259   /**
260    * Returns the <a
261    * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
262    * population standard deviation</a> of the values. The count must be non-zero.
263    *
264    * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
265    * is not guaranteed to return zero when the dataset consists of the same value multiple times,
266    * due to numerical errors. However, it is guaranteed never to return a negative result.
267    *
268    * <h3>Non-finite values</h3>
269    *
270    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
271    * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
272    *
273    * @throws IllegalStateException if the dataset is empty
274    */
populationStandardDeviation()275   public final double populationStandardDeviation() {
276     return Math.sqrt(populationVariance());
277   }
278 
279   /**
280    * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
281    * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
282    * unbiased estimator of the population variance of the population. The count must be greater than
283    * one.
284    *
285    * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
286    * times, due to numerical errors. However, it is guaranteed never to return a negative result.
287    *
288    * <h3>Non-finite values</h3>
289    *
290    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
291    * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
292    *
293    * @throws IllegalStateException if the dataset is empty or contains a single value
294    */
sampleVariance()295   public final double sampleVariance() {
296     checkState(count > 1);
297     if (isNaN(sumOfSquaresOfDeltas)) {
298       return NaN;
299     }
300     return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
301   }
302 
303   /**
304    * Returns the <a
305    * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
306    * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
307    * population, this is an estimator of the population standard deviation of the population which
308    * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
309    * the distribution). The count must be greater than one.
310    *
311    * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
312    * times, due to numerical errors. However, it is guaranteed never to return a negative result.
313    *
314    * <h3>Non-finite values</h3>
315    *
316    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
317    * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
318    *
319    * @throws IllegalStateException if the dataset is empty or contains a single value
320    */
sampleStandardDeviation()321   public final double sampleStandardDeviation() {
322     return Math.sqrt(sampleVariance());
323   }
324 
325   /**
326    * Returns the lowest value in the dataset. The count must be non-zero.
327    *
328    * <h3>Non-finite values</h3>
329    *
330    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
331    * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
332    * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
333    * only then the result is the lowest finite value. If it contains {@link
334    * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
335    *
336    * @throws IllegalStateException if the dataset is empty
337    */
min()338   public double min() {
339     checkState(count != 0);
340     return min;
341   }
342 
343   /**
344    * Returns the highest value in the dataset. The count must be non-zero.
345    *
346    * <h3>Non-finite values</h3>
347    *
348    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
349    * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
350    * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
351    * only then the result is the highest finite value. If it contains {@link
352    * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
353    *
354    * @throws IllegalStateException if the dataset is empty
355    */
max()356   public double max() {
357     checkState(count != 0);
358     return max;
359   }
360 
sumOfSquaresOfDeltas()361   double sumOfSquaresOfDeltas() {
362     return sumOfSquaresOfDeltas;
363   }
364 
365   /**
366    * Calculates the new value for the accumulated mean when a value is added, in the case where at
367    * least one of the previous mean and the value is non-finite.
368    */
calculateNewMeanNonFinite(double previousMean, double value)369   static double calculateNewMeanNonFinite(double previousMean, double value) {
370     /*
371      * Desired behaviour is to match the results of applying the naive mean formula. In particular,
372      * the update formula can subtract infinities in cases where the naive formula would add them.
373      *
374      * Consequently:
375      * 1. If the previous mean is finite and the new value is non-finite then the new mean is that
376      *    value (whether it is NaN or infinity).
377      * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
378      *    (whether it is NaN or infinity).
379      * 3. If both the previous mean and the new value are non-finite and...
380      * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
381      * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
382      * 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
383      */
384     if (isFinite(previousMean)) {
385       // This is case 1.
386       return value;
387     } else if (isFinite(value) || previousMean == value) {
388       // This is case 2. or 3b.
389       return previousMean;
390     } else {
391       // This is case 3a. or 3c.
392       return NaN;
393     }
394   }
395 }
396