1 /* 2 * Created by Joachim on 16/04/2019. 3 * Adapted from donated nonius code. 4 * 5 * Distributed under the Boost Software License, Version 1.0. (See accompanying 6 * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 */ 8 9 // Statistical analysis tools 10 11 #ifndef TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED 12 #define TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED 13 14 #include "../catch_clock.hpp" 15 #include "../catch_estimate.hpp" 16 #include "../catch_outlier_classification.hpp" 17 18 #include <algorithm> 19 #include <functional> 20 #include <vector> 21 #include <iterator> 22 #include <numeric> 23 #include <tuple> 24 #include <cmath> 25 #include <utility> 26 #include <cstddef> 27 #include <random> 28 29 namespace Catch { 30 namespace Benchmark { 31 namespace Detail { 32 using sample = std::vector<double>; 33 34 double weighted_average_quantile(int k, int q, std::vector<double>::iterator first, std::vector<double>::iterator last); 35 36 template <typename Iterator> classify_outliers(Iterator first,Iterator last)37 OutlierClassification classify_outliers(Iterator first, Iterator last) { 38 std::vector<double> copy(first, last); 39 40 auto q1 = weighted_average_quantile(1, 4, copy.begin(), copy.end()); 41 auto q3 = weighted_average_quantile(3, 4, copy.begin(), copy.end()); 42 auto iqr = q3 - q1; 43 auto los = q1 - (iqr * 3.); 44 auto lom = q1 - (iqr * 1.5); 45 auto him = q3 + (iqr * 1.5); 46 auto his = q3 + (iqr * 3.); 47 48 OutlierClassification o; 49 for (; first != last; ++first) { 50 auto&& t = *first; 51 if (t < los) ++o.low_severe; 52 else if (t < lom) ++o.low_mild; 53 else if (t > his) ++o.high_severe; 54 else if (t > him) ++o.high_mild; 55 ++o.samples_seen; 56 } 57 return o; 58 } 59 60 template <typename Iterator> mean(Iterator first,Iterator last)61 double mean(Iterator first, Iterator last) { 62 auto count = last - first; 63 double sum = std::accumulate(first, last, 0.); 64 return sum / count; 65 } 66 67 template <typename URng, typename Iterator, typename Estimator> resample(URng & rng,int resamples,Iterator first,Iterator last,Estimator & estimator)68 sample resample(URng& rng, int resamples, Iterator first, Iterator last, Estimator& estimator) { 69 auto n = last - first; 70 std::uniform_int_distribution<decltype(n)> dist(0, n - 1); 71 72 sample out; 73 out.reserve(resamples); 74 std::generate_n(std::back_inserter(out), resamples, [n, first, &estimator, &dist, &rng] { 75 std::vector<double> resampled; 76 resampled.reserve(n); 77 std::generate_n(std::back_inserter(resampled), n, [first, &dist, &rng] { return first[dist(rng)]; }); 78 return estimator(resampled.begin(), resampled.end()); 79 }); 80 std::sort(out.begin(), out.end()); 81 return out; 82 } 83 84 template <typename Estimator, typename Iterator> jackknife(Estimator && estimator,Iterator first,Iterator last)85 sample jackknife(Estimator&& estimator, Iterator first, Iterator last) { 86 auto n = last - first; 87 auto second = std::next(first); 88 sample results; 89 results.reserve(n); 90 91 for (auto it = first; it != last; ++it) { 92 std::iter_swap(it, first); 93 results.push_back(estimator(second, last)); 94 } 95 96 return results; 97 } 98 normal_cdf(double x)99 inline double normal_cdf(double x) { 100 return std::erfc(-x / std::sqrt(2.0)) / 2.0; 101 } 102 103 double erfc_inv(double x); 104 105 double normal_quantile(double p); 106 107 template <typename Iterator, typename Estimator> bootstrap(double confidence_level,Iterator first,Iterator last,sample const & resample,Estimator && estimator)108 Estimate<double> bootstrap(double confidence_level, Iterator first, Iterator last, sample const& resample, Estimator&& estimator) { 109 auto n_samples = last - first; 110 111 double point = estimator(first, last); 112 // Degenerate case with a single sample 113 if (n_samples == 1) return { point, point, point, confidence_level }; 114 115 sample jack = jackknife(estimator, first, last); 116 double jack_mean = mean(jack.begin(), jack.end()); 117 double sum_squares, sum_cubes; 118 std::tie(sum_squares, sum_cubes) = std::accumulate(jack.begin(), jack.end(), std::make_pair(0., 0.), [jack_mean](std::pair<double, double> sqcb, double x) -> std::pair<double, double> { 119 auto d = jack_mean - x; 120 auto d2 = d * d; 121 auto d3 = d2 * d; 122 return { sqcb.first + d2, sqcb.second + d3 }; 123 }); 124 125 double accel = sum_cubes / (6 * std::pow(sum_squares, 1.5)); 126 int n = static_cast<int>(resample.size()); 127 double prob_n = std::count_if(resample.begin(), resample.end(), [point](double x) { return x < point; }) / (double)n; 128 // degenerate case with uniform samples 129 if (prob_n == 0) return { point, point, point, confidence_level }; 130 131 double bias = normal_quantile(prob_n); 132 double z1 = normal_quantile((1. - confidence_level) / 2.); 133 134 auto cumn = [n](double x) -> int { 135 return std::lround(normal_cdf(x) * n); }; 136 auto a = [bias, accel](double b) { return bias + b / (1. - accel * b); }; 137 double b1 = bias + z1; 138 double b2 = bias - z1; 139 double a1 = a(b1); 140 double a2 = a(b2); 141 auto lo = std::max(cumn(a1), 0); 142 auto hi = std::min(cumn(a2), n - 1); 143 144 return { point, resample[lo], resample[hi], confidence_level }; 145 } 146 147 double outlier_variance(Estimate<double> mean, Estimate<double> stddev, int n); 148 149 struct bootstrap_analysis { 150 Estimate<double> mean; 151 Estimate<double> standard_deviation; 152 double outlier_variance; 153 }; 154 155 bootstrap_analysis analyse_samples(double confidence_level, int n_resamples, std::vector<double>::iterator first, std::vector<double>::iterator last); 156 } // namespace Detail 157 } // namespace Benchmark 158 } // namespace Catch 159 160 #endif // TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED 161