• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.math;
16 
17 import static com.google.common.base.Preconditions.checkArgument;
18 import static java.lang.Double.NEGATIVE_INFINITY;
19 import static java.lang.Double.NaN;
20 import static java.lang.Double.POSITIVE_INFINITY;
21 import static java.util.Arrays.sort;
22 import static java.util.Collections.unmodifiableMap;
23 
24 import com.google.common.annotations.Beta;
25 import com.google.common.annotations.GwtIncompatible;
26 import com.google.common.primitives.Doubles;
27 import com.google.common.primitives.Ints;
28 import java.math.RoundingMode;
29 import java.util.Collection;
30 import java.util.LinkedHashMap;
31 import java.util.Map;
32 
33 /**
34  * Provides a fluent API for calculating <a
35  * href="http://en.wikipedia.org/wiki/Quantile">quantiles</a>.
36  *
37  * <h3>Examples</h3>
38  *
39  * <p>To compute the median:
40  *
41  * <pre>{@code
42  * double myMedian = median().compute(myDataset);
43  * }</pre>
44  *
45  * where {@link #median()} has been statically imported.
46  *
47  * <p>To compute the 99th percentile:
48  *
49  * <pre>{@code
50  * double myPercentile99 = percentiles().index(99).compute(myDataset);
51  * }</pre>
52  *
53  * where {@link #percentiles()} has been statically imported.
54  *
55  * <p>To compute median and the 90th and 99th percentiles:
56  *
57  * <pre>{@code
58  * Map<Integer, Double> myPercentiles =
59  *     percentiles().indexes(50, 90, 99).compute(myDataset);
60  * }</pre>
61  *
62  * where {@link #percentiles()} has been statically imported: {@code myPercentiles} maps the keys
63  * 50, 90, and 99, to their corresponding quantile values.
64  *
65  * <p>To compute quartiles, use {@link #quartiles()} instead of {@link #percentiles()}. To compute
66  * arbitrary q-quantiles, use {@link #scale scale(q)}.
67  *
68  * <p>These examples all take a copy of your dataset. If you have a double array, you are okay with
69  * it being arbitrarily reordered, and you want to avoid that copy, you can use {@code
70  * computeInPlace} instead of {@code compute}.
71  *
72  * <h3>Definition and notes on interpolation</h3>
73  *
74  * <p>The definition of the kth q-quantile of N values is as follows: define x = k * (N - 1) / q; if
75  * x is an integer, the result is the value which would appear at index x in the sorted dataset
76  * (unless there are {@link Double#NaN NaN} values, see below); otherwise, the result is the average
77  * of the values which would appear at the indexes floor(x) and ceil(x) weighted by (1-frac(x)) and
78  * frac(x) respectively. This is the same definition as used by Excel and by S, it is the Type 7
79  * definition in <a
80  * href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">R</a>, and it is
81  * described by <a
82  * href="http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population">
83  * wikipedia</a> as providing "Linear interpolation of the modes for the order statistics for the
84  * uniform distribution on [0,1]."
85  *
86  * <h3>Handling of non-finite values</h3>
87  *
88  * <p>If any values in the input are {@link Double#NaN NaN} then all values returned are {@link
89  * Double#NaN NaN}. (This is the one occasion when the behaviour is not the same as you'd get from
90  * sorting with {@link java.util.Arrays#sort(double[]) Arrays.sort(double[])} or {@link
91  * java.util.Collections#sort(java.util.List) Collections.sort(List&lt;Double&gt;)} and selecting
92  * the required value(s). Those methods would sort {@link Double#NaN NaN} as if it is greater than
93  * any other value and place them at the end of the dataset, even after {@link
94  * Double#POSITIVE_INFINITY POSITIVE_INFINITY}.)
95  *
96  * <p>Otherwise, {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link
97  * Double#POSITIVE_INFINITY POSITIVE_INFINITY} sort to the beginning and the end of the dataset, as
98  * you would expect.
99  *
100  * <p>If required to do a weighted average between an infinity and a finite value, or between an
101  * infinite value and itself, the infinite value is returned. If required to do a weighted average
102  * between {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link Double#POSITIVE_INFINITY
103  * POSITIVE_INFINITY}, {@link Double#NaN NaN} is returned (note that this will only happen if the
104  * dataset contains no finite values).
105  *
106  * <h3>Performance</h3>
107  *
108  * <p>The average time complexity of the computation is O(N) in the size of the dataset. There is a
109  * worst case time complexity of O(N^2). You are extremely unlikely to hit this quadratic case on
110  * randomly ordered data (the probability decreases faster than exponentially in N), but if you are
111  * passing in unsanitized user data then a malicious user could force it. A light shuffle of the
112  * data using an unpredictable seed should normally be enough to thwart this attack.
113  *
114  * <p>The time taken to compute multiple quantiles on the same dataset using {@link Scale#indexes
115  * indexes} is generally less than the total time taken to compute each of them separately, and
116  * sometimes much less. For example, on a large enough dataset, computing the 90th and 99th
117  * percentiles together takes about 55% as long as computing them separately.
118  *
119  * <p>When calling {@link ScaleAndIndex#compute} (in {@linkplain ScaleAndIndexes#compute either
120  * form}), the memory requirement is 8*N bytes for the copy of the dataset plus an overhead which is
121  * independent of N (but depends on the quantiles being computed). When calling {@link
122  * ScaleAndIndex#computeInPlace computeInPlace} (in {@linkplain ScaleAndIndexes#computeInPlace
123  * either form}), only the overhead is required. The number of object allocations is independent of
124  * N in both cases.
125  *
126  * @author Pete Gillin
127  * @since 20.0
128  */
129 @Beta
130 @GwtIncompatible
131 @ElementTypesAreNonnullByDefault
132 public final class Quantiles {
133 
134   /** Specifies the computation of a median (i.e. the 1st 2-quantile). */
median()135   public static ScaleAndIndex median() {
136     return scale(2).index(1);
137   }
138 
139   /** Specifies the computation of quartiles (i.e. 4-quantiles). */
quartiles()140   public static Scale quartiles() {
141     return scale(4);
142   }
143 
144   /** Specifies the computation of percentiles (i.e. 100-quantiles). */
percentiles()145   public static Scale percentiles() {
146     return scale(100);
147   }
148 
149   /**
150    * Specifies the computation of q-quantiles.
151    *
152    * @param scale the scale for the quantiles to be calculated, i.e. the q of the q-quantiles, which
153    *     must be positive
154    */
scale(int scale)155   public static Scale scale(int scale) {
156     return new Scale(scale);
157   }
158 
159   /**
160    * Describes the point in a fluent API chain where only the scale (i.e. the q in q-quantiles) has
161    * been specified.
162    *
163    * @since 20.0
164    */
165   public static final class Scale {
166 
167     private final int scale;
168 
Scale(int scale)169     private Scale(int scale) {
170       checkArgument(scale > 0, "Quantile scale must be positive");
171       this.scale = scale;
172     }
173 
174     /**
175      * Specifies a single quantile index to be calculated, i.e. the k in the kth q-quantile.
176      *
177      * @param index the quantile index, which must be in the inclusive range [0, q] for q-quantiles
178      */
index(int index)179     public ScaleAndIndex index(int index) {
180       return new ScaleAndIndex(scale, index);
181     }
182 
183     /**
184      * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
185      * q-quantile.
186      *
187      * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
188      *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
189      *     set will be snapshotted when this method is called
190      * @throws IllegalArgumentException if {@code indexes} is empty
191      */
indexes(int... indexes)192     public ScaleAndIndexes indexes(int... indexes) {
193       return new ScaleAndIndexes(scale, indexes.clone());
194     }
195 
196     /**
197      * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
198      * q-quantile.
199      *
200      * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
201      *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
202      *     set will be snapshotted when this method is called
203      * @throws IllegalArgumentException if {@code indexes} is empty
204      */
indexes(Collection<Integer> indexes)205     public ScaleAndIndexes indexes(Collection<Integer> indexes) {
206       return new ScaleAndIndexes(scale, Ints.toArray(indexes));
207     }
208   }
209 
210   /**
211    * Describes the point in a fluent API chain where the scale and a single quantile index (i.e. the
212    * q and the k in the kth q-quantile) have been specified.
213    *
214    * @since 20.0
215    */
216   public static final class ScaleAndIndex {
217 
218     private final int scale;
219     private final int index;
220 
ScaleAndIndex(int scale, int index)221     private ScaleAndIndex(int scale, int index) {
222       checkIndex(index, scale);
223       this.scale = scale;
224       this.index = index;
225     }
226 
227     /**
228      * Computes the quantile value of the given dataset.
229      *
230      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
231      *     cast to doubles (with any associated lost of precision), and which will not be mutated by
232      *     this call (it is copied instead)
233      * @return the quantile value
234      */
compute(Collection<? extends Number> dataset)235     public double compute(Collection<? extends Number> dataset) {
236       return computeInPlace(Doubles.toArray(dataset));
237     }
238 
239     /**
240      * Computes the quantile value of the given dataset.
241      *
242      * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
243      *     be mutated by this call (it is copied instead)
244      * @return the quantile value
245      */
compute(double... dataset)246     public double compute(double... dataset) {
247       return computeInPlace(dataset.clone());
248     }
249 
250     /**
251      * Computes the quantile value of the given dataset.
252      *
253      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
254      *     cast to doubles (with any associated lost of precision), and which will not be mutated by
255      *     this call (it is copied instead)
256      * @return the quantile value
257      */
compute(long... dataset)258     public double compute(long... dataset) {
259       return computeInPlace(longsToDoubles(dataset));
260     }
261 
262     /**
263      * Computes the quantile value of the given dataset.
264      *
265      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
266      *     cast to doubles, and which will not be mutated by this call (it is copied instead)
267      * @return the quantile value
268      */
compute(int... dataset)269     public double compute(int... dataset) {
270       return computeInPlace(intsToDoubles(dataset));
271     }
272 
273     /**
274      * Computes the quantile value of the given dataset, performing the computation in-place.
275      *
276      * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
277      *     be arbitrarily reordered by this method call
278      * @return the quantile value
279      */
computeInPlace(double... dataset)280     public double computeInPlace(double... dataset) {
281       checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
282       if (containsNaN(dataset)) {
283         return NaN;
284       }
285 
286       // Calculate the quotient and remainder in the integer division x = k * (N-1) / q, i.e.
287       // index * (dataset.length - 1) / scale. If there is no remainder, we can just find the value
288       // whose index in the sorted dataset equals the quotient; if there is a remainder, we
289       // interpolate between that and the next value.
290 
291       // Since index and (dataset.length - 1) are non-negative ints, their product can be expressed
292       // as a long, without risk of overflow:
293       long numerator = (long) index * (dataset.length - 1);
294       // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
295       // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to get
296       // a rounded ratio and a remainder which can be expressed as ints, without risk of overflow:
297       int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
298       int remainder = (int) (numerator - (long) quotient * scale);
299       selectInPlace(quotient, dataset, 0, dataset.length - 1);
300       if (remainder == 0) {
301         return dataset[quotient];
302       } else {
303         selectInPlace(quotient + 1, dataset, quotient + 1, dataset.length - 1);
304         return interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale);
305       }
306     }
307   }
308 
309   /**
310    * Describes the point in a fluent API chain where the scale and a multiple quantile indexes (i.e.
311    * the q and a set of values for the k in the kth q-quantile) have been specified.
312    *
313    * @since 20.0
314    */
315   public static final class ScaleAndIndexes {
316 
317     private final int scale;
318     private final int[] indexes;
319 
ScaleAndIndexes(int scale, int[] indexes)320     private ScaleAndIndexes(int scale, int[] indexes) {
321       for (int index : indexes) {
322         checkIndex(index, scale);
323       }
324       checkArgument(indexes.length > 0, "Indexes must be a non empty array");
325       this.scale = scale;
326       this.indexes = indexes;
327     }
328 
329     /**
330      * Computes the quantile values of the given dataset.
331      *
332      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
333      *     cast to doubles (with any associated lost of precision), and which will not be mutated by
334      *     this call (it is copied instead)
335      * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
336      *     indexes, and the values the corresponding quantile values. When iterating, entries in the
337      *     map are ordered by quantile index in the same order they were passed to the {@code
338      *     indexes} method.
339      */
compute(Collection<? extends Number> dataset)340     public Map<Integer, Double> compute(Collection<? extends Number> dataset) {
341       return computeInPlace(Doubles.toArray(dataset));
342     }
343 
344     /**
345      * Computes the quantile values of the given dataset.
346      *
347      * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
348      *     be mutated by this call (it is copied instead)
349      * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
350      *     indexes, and the values the corresponding quantile values. When iterating, entries in the
351      *     map are ordered by quantile index in the same order they were passed to the {@code
352      *     indexes} method.
353      */
compute(double... dataset)354     public Map<Integer, Double> compute(double... dataset) {
355       return computeInPlace(dataset.clone());
356     }
357 
358     /**
359      * Computes the quantile values of the given dataset.
360      *
361      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
362      *     cast to doubles (with any associated lost of precision), and which will not be mutated by
363      *     this call (it is copied instead)
364      * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
365      *     indexes, and the values the corresponding quantile values. When iterating, entries in the
366      *     map are ordered by quantile index in the same order they were passed to the {@code
367      *     indexes} method.
368      */
compute(long... dataset)369     public Map<Integer, Double> compute(long... dataset) {
370       return computeInPlace(longsToDoubles(dataset));
371     }
372 
373     /**
374      * Computes the quantile values of the given dataset.
375      *
376      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
377      *     cast to doubles, and which will not be mutated by this call (it is copied instead)
378      * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
379      *     indexes, and the values the corresponding quantile values. When iterating, entries in the
380      *     map are ordered by quantile index in the same order they were passed to the {@code
381      *     indexes} method.
382      */
compute(int... dataset)383     public Map<Integer, Double> compute(int... dataset) {
384       return computeInPlace(intsToDoubles(dataset));
385     }
386 
387     /**
388      * Computes the quantile values of the given dataset, performing the computation in-place.
389      *
390      * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
391      *     be arbitrarily reordered by this method call
392      * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
393      *     indexes, and the values the corresponding quantile values. When iterating, entries in the
394      *     map are ordered by quantile index in the same order that the indexes were passed to the
395      *     {@code indexes} method.
396      */
computeInPlace(double... dataset)397     public Map<Integer, Double> computeInPlace(double... dataset) {
398       checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
399       if (containsNaN(dataset)) {
400         Map<Integer, Double> nanMap = new LinkedHashMap<>();
401         for (int index : indexes) {
402           nanMap.put(index, NaN);
403         }
404         return unmodifiableMap(nanMap);
405       }
406 
407       // Calculate the quotients and remainders in the integer division x = k * (N - 1) / q, i.e.
408       // index * (dataset.length - 1) / scale for each index in indexes. For each, if there is no
409       // remainder, we can just select the value whose index in the sorted dataset equals the
410       // quotient; if there is a remainder, we interpolate between that and the next value.
411 
412       int[] quotients = new int[indexes.length];
413       int[] remainders = new int[indexes.length];
414       // The indexes to select. In the worst case, we'll need one each side of each quantile.
415       int[] requiredSelections = new int[indexes.length * 2];
416       int requiredSelectionsCount = 0;
417       for (int i = 0; i < indexes.length; i++) {
418         // Since index and (dataset.length - 1) are non-negative ints, their product can be
419         // expressed as a long, without risk of overflow:
420         long numerator = (long) indexes[i] * (dataset.length - 1);
421         // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
422         // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to
423         // get a rounded ratio and a remainder which can be expressed as ints, without risk of
424         // overflow:
425         int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
426         int remainder = (int) (numerator - (long) quotient * scale);
427         quotients[i] = quotient;
428         remainders[i] = remainder;
429         requiredSelections[requiredSelectionsCount] = quotient;
430         requiredSelectionsCount++;
431         if (remainder != 0) {
432           requiredSelections[requiredSelectionsCount] = quotient + 1;
433           requiredSelectionsCount++;
434         }
435       }
436       sort(requiredSelections, 0, requiredSelectionsCount);
437       selectAllInPlace(
438           requiredSelections, 0, requiredSelectionsCount - 1, dataset, 0, dataset.length - 1);
439       Map<Integer, Double> ret = new LinkedHashMap<>();
440       for (int i = 0; i < indexes.length; i++) {
441         int quotient = quotients[i];
442         int remainder = remainders[i];
443         if (remainder == 0) {
444           ret.put(indexes[i], dataset[quotient]);
445         } else {
446           ret.put(
447               indexes[i], interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale));
448         }
449       }
450       return unmodifiableMap(ret);
451     }
452   }
453 
454   /** Returns whether any of the values in {@code dataset} are {@code NaN}. */
containsNaN(double... dataset)455   private static boolean containsNaN(double... dataset) {
456     for (double value : dataset) {
457       if (Double.isNaN(value)) {
458         return true;
459       }
460     }
461     return false;
462   }
463 
464   /**
465    * Returns a value a fraction {@code (remainder / scale)} of the way between {@code lower} and
466    * {@code upper}. Assumes that {@code lower <= upper}. Correctly handles infinities (but not
467    * {@code NaN}).
468    */
interpolate(double lower, double upper, double remainder, double scale)469   private static double interpolate(double lower, double upper, double remainder, double scale) {
470     if (lower == NEGATIVE_INFINITY) {
471       if (upper == POSITIVE_INFINITY) {
472         // Return NaN when lower == NEGATIVE_INFINITY and upper == POSITIVE_INFINITY:
473         return NaN;
474       }
475       // Return NEGATIVE_INFINITY when NEGATIVE_INFINITY == lower <= upper < POSITIVE_INFINITY:
476       return NEGATIVE_INFINITY;
477     }
478     if (upper == POSITIVE_INFINITY) {
479       // Return POSITIVE_INFINITY when NEGATIVE_INFINITY < lower <= upper == POSITIVE_INFINITY:
480       return POSITIVE_INFINITY;
481     }
482     return lower + (upper - lower) * remainder / scale;
483   }
484 
checkIndex(int index, int scale)485   private static void checkIndex(int index, int scale) {
486     if (index < 0 || index > scale) {
487       throw new IllegalArgumentException(
488           "Quantile indexes must be between 0 and the scale, which is " + scale);
489     }
490   }
491 
longsToDoubles(long[] longs)492   private static double[] longsToDoubles(long[] longs) {
493     int len = longs.length;
494     double[] doubles = new double[len];
495     for (int i = 0; i < len; i++) {
496       doubles[i] = longs[i];
497     }
498     return doubles;
499   }
500 
intsToDoubles(int[] ints)501   private static double[] intsToDoubles(int[] ints) {
502     int len = ints.length;
503     double[] doubles = new double[len];
504     for (int i = 0; i < len; i++) {
505       doubles[i] = ints[i];
506     }
507     return doubles;
508   }
509 
510   /**
511    * Performs an in-place selection to find the element which would appear at a given index in a
512    * dataset if it were sorted. The following preconditions should hold:
513    *
514    * <ul>
515    *   <li>{@code required}, {@code from}, and {@code to} should all be indexes into {@code array};
516    *   <li>{@code required} should be in the range [{@code from}, {@code to}];
517    *   <li>all the values with indexes in the range [0, {@code from}) should be less than or equal
518    *       to all the values with indexes in the range [{@code from}, {@code to}];
519    *   <li>all the values with indexes in the range ({@code to}, {@code array.length - 1}] should be
520    *       greater than or equal to all the values with indexes in the range [{@code from}, {@code
521    *       to}].
522    * </ul>
523    *
524    * This method will reorder the values with indexes in the range [{@code from}, {@code to}] such
525    * that all the values with indexes in the range [{@code from}, {@code required}) are less than or
526    * equal to the value with index {@code required}, and all the values with indexes in the range
527    * ({@code required}, {@code to}] are greater than or equal to that value. Therefore, the value at
528    * {@code required} is the value which would appear at that index in the sorted dataset.
529    */
selectInPlace(int required, double[] array, int from, int to)530   private static void selectInPlace(int required, double[] array, int from, int to) {
531     // If we are looking for the least element in the range, we can just do a linear search for it.
532     // (We will hit this whenever we are doing quantile interpolation: our first selection finds
533     // the lower value, our second one finds the upper value by looking for the next least element.)
534     if (required == from) {
535       int min = from;
536       for (int index = from + 1; index <= to; index++) {
537         if (array[min] > array[index]) {
538           min = index;
539         }
540       }
541       if (min != from) {
542         swap(array, min, from);
543       }
544       return;
545     }
546 
547     // Let's play quickselect! We'll repeatedly partition the range [from, to] containing the
548     // required element, as long as it has more than one element.
549     while (to > from) {
550       int partitionPoint = partition(array, from, to);
551       if (partitionPoint >= required) {
552         to = partitionPoint - 1;
553       }
554       if (partitionPoint <= required) {
555         from = partitionPoint + 1;
556       }
557     }
558   }
559 
560   /**
561    * Performs a partition operation on the slice of {@code array} with elements in the range [{@code
562    * from}, {@code to}]. Uses the median of {@code from}, {@code to}, and the midpoint between them
563    * as a pivot. Returns the index which the slice is partitioned around, i.e. if it returns {@code
564    * ret} then we know that the values with indexes in [{@code from}, {@code ret}) are less than or
565    * equal to the value at {@code ret} and the values with indexes in ({@code ret}, {@code to}] are
566    * greater than or equal to that.
567    */
partition(double[] array, int from, int to)568   private static int partition(double[] array, int from, int to) {
569     // Select a pivot, and move it to the start of the slice i.e. to index from.
570     movePivotToStartOfSlice(array, from, to);
571     double pivot = array[from];
572 
573     // Move all elements with indexes in (from, to] which are greater than the pivot to the end of
574     // the array. Keep track of where those elements begin.
575     int partitionPoint = to;
576     for (int i = to; i > from; i--) {
577       if (array[i] > pivot) {
578         swap(array, partitionPoint, i);
579         partitionPoint--;
580       }
581     }
582 
583     // We now know that all elements with indexes in (from, partitionPoint] are less than or equal
584     // to the pivot at from, and all elements with indexes in (partitionPoint, to] are greater than
585     // it. We swap the pivot into partitionPoint and we know the array is partitioned around that.
586     swap(array, from, partitionPoint);
587     return partitionPoint;
588   }
589 
590   /**
591    * Selects the pivot to use, namely the median of the values at {@code from}, {@code to}, and
592    * halfway between the two (rounded down), from {@code array}, and ensure (by swapping elements if
593    * necessary) that that pivot value appears at the start of the slice i.e. at {@code from}.
594    * Expects that {@code from} is strictly less than {@code to}.
595    */
movePivotToStartOfSlice(double[] array, int from, int to)596   private static void movePivotToStartOfSlice(double[] array, int from, int to) {
597     int mid = (from + to) >>> 1;
598     // We want to make a swap such that either array[to] <= array[from] <= array[mid], or
599     // array[mid] <= array[from] <= array[to]. We know that from < to, so we know mid < to
600     // (although it's possible that mid == from, if to == from + 1). Note that the postcondition
601     // would be impossible to fulfil if mid == to unless we also have array[from] == array[to].
602     boolean toLessThanMid = (array[to] < array[mid]);
603     boolean midLessThanFrom = (array[mid] < array[from]);
604     boolean toLessThanFrom = (array[to] < array[from]);
605     if (toLessThanMid == midLessThanFrom) {
606       // Either array[to] < array[mid] < array[from] or array[from] <= array[mid] <= array[to].
607       swap(array, mid, from);
608     } else if (toLessThanMid != toLessThanFrom) {
609       // Either array[from] <= array[to] < array[mid] or array[mid] <= array[to] < array[from].
610       swap(array, from, to);
611     }
612     // The postcondition now holds. So the median, our chosen pivot, is at from.
613   }
614 
615   /**
616    * Performs an in-place selection, like {@link #selectInPlace}, to select all the indexes {@code
617    * allRequired[i]} for {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. These
618    * indexes must be sorted in the array and must all be in the range [{@code from}, {@code to}].
619    */
selectAllInPlace( int[] allRequired, int requiredFrom, int requiredTo, double[] array, int from, int to)620   private static void selectAllInPlace(
621       int[] allRequired, int requiredFrom, int requiredTo, double[] array, int from, int to) {
622     // Choose the first selection to do...
623     int requiredChosen = chooseNextSelection(allRequired, requiredFrom, requiredTo, from, to);
624     int required = allRequired[requiredChosen];
625 
626     // ...do the first selection...
627     selectInPlace(required, array, from, to);
628 
629     // ...then recursively perform the selections in the range below...
630     int requiredBelow = requiredChosen - 1;
631     while (requiredBelow >= requiredFrom && allRequired[requiredBelow] == required) {
632       requiredBelow--; // skip duplicates of required in the range below
633     }
634     if (requiredBelow >= requiredFrom) {
635       selectAllInPlace(allRequired, requiredFrom, requiredBelow, array, from, required - 1);
636     }
637 
638     // ...and then recursively perform the selections in the range above.
639     int requiredAbove = requiredChosen + 1;
640     while (requiredAbove <= requiredTo && allRequired[requiredAbove] == required) {
641       requiredAbove++; // skip duplicates of required in the range above
642     }
643     if (requiredAbove <= requiredTo) {
644       selectAllInPlace(allRequired, requiredAbove, requiredTo, array, required + 1, to);
645     }
646   }
647 
648   /**
649    * Chooses the next selection to do from the required selections. It is required that the array
650    * {@code allRequired} is sorted and that {@code allRequired[i]} are in the range [{@code from},
651    * {@code to}] for all {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. The
652    * value returned by this method is the {@code i} in that range such that {@code allRequired[i]}
653    * is as close as possible to the center of the range [{@code from}, {@code to}]. Choosing the
654    * value closest to the center of the range first is the most efficient strategy because it
655    * minimizes the size of the subranges from which the remaining selections must be done.
656    */
chooseNextSelection( int[] allRequired, int requiredFrom, int requiredTo, int from, int to)657   private static int chooseNextSelection(
658       int[] allRequired, int requiredFrom, int requiredTo, int from, int to) {
659     if (requiredFrom == requiredTo) {
660       return requiredFrom; // only one thing to choose, so choose it
661     }
662 
663     // Find the center and round down. The true center is either centerFloor or halfway between
664     // centerFloor and centerFloor + 1.
665     int centerFloor = (from + to) >>> 1;
666 
667     // Do a binary search until we're down to the range of two which encloses centerFloor (unless
668     // all values are lower or higher than centerFloor, in which case we find the two highest or
669     // lowest respectively). If centerFloor is in allRequired, we will definitely find it. If not,
670     // but centerFloor + 1 is, we'll definitely find that. The closest value to the true (unrounded)
671     // center will be at either low or high.
672     int low = requiredFrom;
673     int high = requiredTo;
674     while (high > low + 1) {
675       int mid = (low + high) >>> 1;
676       if (allRequired[mid] > centerFloor) {
677         high = mid;
678       } else if (allRequired[mid] < centerFloor) {
679         low = mid;
680       } else {
681         return mid; // allRequired[mid] = centerFloor, so we can't get closer than that
682       }
683     }
684 
685     // Now pick the closest of the two candidates. Note that there is no rounding here.
686     if (from + to - allRequired[low] - allRequired[high] > 0) {
687       return high;
688     } else {
689       return low;
690     }
691   }
692 
693   /** Swaps the values at {@code i} and {@code j} in {@code array}. */
swap(double[] array, int i, int j)694   private static void swap(double[] array, int i, int j) {
695     double temp = array[i];
696     array[i] = array[j];
697     array[j] = temp;
698   }
699 }
700