1 /*
2 * Copyright Nick Thompson, 2019
3 * Use, modification and distribution are subject to the
4 * Boost Software License, Version 1.0. (See accompanying file
5 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef BOOST_MATH_STATISTICS_RUNS_TEST_HPP
9 #define BOOST_MATH_STATISTICS_RUNS_TEST_HPP
10
11 #include <cmath>
12 #include <algorithm>
13 #include <utility>
14 #include <boost/math/statistics/univariate_statistics.hpp>
15 #include <boost/math/distributions/normal.hpp>
16
17 namespace boost::math::statistics {
18
19 template<class RandomAccessContainer>
runs_above_and_below_threshold(RandomAccessContainer const & v,typename RandomAccessContainer::value_type threshold)20 auto runs_above_and_below_threshold(RandomAccessContainer const & v,
21 typename RandomAccessContainer::value_type threshold)
22 {
23 using Real = typename RandomAccessContainer::value_type;
24 using std::sqrt;
25 using std::abs;
26 if (v.size() <= 1)
27 {
28 throw std::domain_error("At least 2 samples are required to get number of runs.");
29 }
30 typedef boost::math::policies::policy<
31 boost::math::policies::promote_float<false>,
32 boost::math::policies::promote_double<false> >
33 no_promote_policy;
34
35 decltype(v.size()) nabove = 0;
36 decltype(v.size()) nbelow = 0;
37
38 decltype(v.size()) imin = 0;
39
40 // Take care of the case that v[0] == threshold:
41 while (imin < v.size() && v[imin] == threshold) {
42 ++imin;
43 }
44
45 // Take care of the constant vector case:
46 if (imin == v.size()) {
47 return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0));
48 }
49
50 bool run_up = (v[imin] > threshold);
51 if (run_up) {
52 ++nabove;
53 } else {
54 ++nbelow;
55 }
56 decltype(v.size()) runs = 1;
57 for (decltype(v.size()) i = imin + 1; i < v.size(); ++i) {
58 if (v[i] == threshold) {
59 // skip values precisely equal to threshold (following R's randtests package)
60 continue;
61 }
62 bool above = (v[i] > threshold);
63 if (above) {
64 ++nabove;
65 } else {
66 ++nbelow;
67 }
68 if (run_up == above) {
69 continue;
70 }
71 else {
72 run_up = above;
73 runs++;
74 }
75 }
76
77 // If you make n an int, the subtraction is gonna be bad in the variance:
78 Real n = nabove + nbelow;
79
80 Real expected_runs = Real(1) + Real(2*nabove*nbelow)/Real(n);
81 Real variance = 2*nabove*nbelow*(2*nabove*nbelow-n)/Real(n*n*(n-1));
82
83 // Bizarre, pathological limits:
84 if (variance == 0)
85 {
86 if (runs == expected_runs)
87 {
88 Real statistic = 0;
89 Real pvalue = 1;
90 return std::make_pair(statistic, pvalue);
91 }
92 else
93 {
94 return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0));
95 }
96 }
97
98 Real sd = sqrt(variance);
99 Real statistic = (runs - expected_runs)/sd;
100
101 auto normal = boost::math::normal_distribution<Real, no_promote_policy>(0,1);
102 Real pvalue = 2*boost::math::cdf(normal, -abs(statistic));
103 return std::make_pair(statistic, pvalue);
104 }
105
106 template<class RandomAccessContainer>
runs_above_and_below_median(RandomAccessContainer const & v)107 auto runs_above_and_below_median(RandomAccessContainer const & v)
108 {
109 using Real = typename RandomAccessContainer::value_type;
110 using std::log;
111 using std::sqrt;
112
113 // We have to memcpy v because the median does a partial sort,
114 // and that would be catastrophic for the runs test.
115 auto w = v;
116 Real median = boost::math::statistics::median(w);
117 return runs_above_and_below_threshold(v, median);
118 }
119
120 }
121 #endif
122