1 // (C) Copyright Nick Thompson 2018.
2 // Use, modification and distribution are subject to the
3 // Boost Software License, Version 1.0. (See accompanying file
4 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 #ifndef BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
7 #define BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
8
9 #include <algorithm>
10 #include <iterator>
11 #include <tuple>
12 #include <boost/assert.hpp>
13 #include <boost/config/header_deprecated.hpp>
14
15 BOOST_HEADER_DEPRECATED("<boost/math/statistics/univariate_statistics.hpp>");
16
17 namespace boost::math::tools {
18
19 template<class ForwardIterator>
mean(ForwardIterator first,ForwardIterator last)20 auto mean(ForwardIterator first, ForwardIterator last)
21 {
22 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
23 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
24 if constexpr (std::is_integral<Real>::value)
25 {
26 double mu = 0;
27 double i = 1;
28 for(auto it = first; it != last; ++it) {
29 mu = mu + (*it - mu)/i;
30 i += 1;
31 }
32 return mu;
33 }
34 else if constexpr (std::is_same_v<typename std::iterator_traits<ForwardIterator>::iterator_category, std::random_access_iterator_tag>)
35 {
36 size_t elements = std::distance(first, last);
37 Real mu0 = 0;
38 Real mu1 = 0;
39 Real mu2 = 0;
40 Real mu3 = 0;
41 Real i = 1;
42 auto end = last - (elements % 4);
43 for(auto it = first; it != end; it += 4) {
44 Real inv = Real(1)/i;
45 Real tmp0 = (*it - mu0);
46 Real tmp1 = (*(it+1) - mu1);
47 Real tmp2 = (*(it+2) - mu2);
48 Real tmp3 = (*(it+3) - mu3);
49 // please generate a vectorized fma here
50 mu0 += tmp0*inv;
51 mu1 += tmp1*inv;
52 mu2 += tmp2*inv;
53 mu3 += tmp3*inv;
54 i += 1;
55 }
56 Real num1 = Real(elements - (elements %4))/Real(4);
57 Real num2 = num1 + Real(elements % 4);
58
59 for (auto it = end; it != last; ++it)
60 {
61 mu3 += (*it-mu3)/i;
62 i += 1;
63 }
64
65 return (num1*(mu0+mu1+mu2) + num2*mu3)/Real(elements);
66 }
67 else
68 {
69 auto it = first;
70 Real mu = *it;
71 Real i = 2;
72 while(++it != last)
73 {
74 mu += (*it - mu)/i;
75 i += 1;
76 }
77 return mu;
78 }
79 }
80
81 template<class Container>
mean(Container const & v)82 inline auto mean(Container const & v)
83 {
84 return mean(v.cbegin(), v.cend());
85 }
86
87 template<class ForwardIterator>
variance(ForwardIterator first,ForwardIterator last)88 auto variance(ForwardIterator first, ForwardIterator last)
89 {
90 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
91 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
92 // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
93 if constexpr (std::is_integral<Real>::value)
94 {
95 double M = *first;
96 double Q = 0;
97 double k = 2;
98 for (auto it = std::next(first); it != last; ++it)
99 {
100 double tmp = *it - M;
101 Q = Q + ((k-1)*tmp*tmp)/k;
102 M = M + tmp/k;
103 k += 1;
104 }
105 return Q/(k-1);
106 }
107 else
108 {
109 Real M = *first;
110 Real Q = 0;
111 Real k = 2;
112 for (auto it = std::next(first); it != last; ++it)
113 {
114 Real tmp = (*it - M)/k;
115 Q += k*(k-1)*tmp*tmp;
116 M += tmp;
117 k += 1;
118 }
119 return Q/(k-1);
120 }
121 }
122
123 template<class Container>
variance(Container const & v)124 inline auto variance(Container const & v)
125 {
126 return variance(v.cbegin(), v.cend());
127 }
128
129 template<class ForwardIterator>
sample_variance(ForwardIterator first,ForwardIterator last)130 auto sample_variance(ForwardIterator first, ForwardIterator last)
131 {
132 size_t n = std::distance(first, last);
133 BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance.");
134 return n*variance(first, last)/(n-1);
135 }
136
137 template<class Container>
sample_variance(Container const & v)138 inline auto sample_variance(Container const & v)
139 {
140 return sample_variance(v.cbegin(), v.cend());
141 }
142
143
144 // Follows equation 1.5 of:
145 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
146 template<class ForwardIterator>
skewness(ForwardIterator first,ForwardIterator last)147 auto skewness(ForwardIterator first, ForwardIterator last)
148 {
149 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
150 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness.");
151 if constexpr (std::is_integral<Real>::value)
152 {
153 double M1 = *first;
154 double M2 = 0;
155 double M3 = 0;
156 double n = 2;
157 for (auto it = std::next(first); it != last; ++it)
158 {
159 double delta21 = *it - M1;
160 double tmp = delta21/n;
161 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
162 M2 = M2 + tmp*(n-1)*delta21;
163 M1 = M1 + tmp;
164 n += 1;
165 }
166
167 double var = M2/(n-1);
168 if (var == 0)
169 {
170 // The limit is technically undefined, but the interpretation here is clear:
171 // A constant dataset has no skewness.
172 return double(0);
173 }
174 double skew = M3/(M2*sqrt(var));
175 return skew;
176 }
177 else
178 {
179 Real M1 = *first;
180 Real M2 = 0;
181 Real M3 = 0;
182 Real n = 2;
183 for (auto it = std::next(first); it != last; ++it)
184 {
185 Real delta21 = *it - M1;
186 Real tmp = delta21/n;
187 M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
188 M2 += tmp*(n-1)*delta21;
189 M1 += tmp;
190 n += 1;
191 }
192
193 Real var = M2/(n-1);
194 if (var == 0)
195 {
196 // The limit is technically undefined, but the interpretation here is clear:
197 // A constant dataset has no skewness.
198 return Real(0);
199 }
200 Real skew = M3/(M2*sqrt(var));
201 return skew;
202 }
203 }
204
205 template<class Container>
skewness(Container const & v)206 inline auto skewness(Container const & v)
207 {
208 return skewness(v.cbegin(), v.cend());
209 }
210
211 // Follows equation 1.5/1.6 of:
212 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
213 template<class ForwardIterator>
first_four_moments(ForwardIterator first,ForwardIterator last)214 auto first_four_moments(ForwardIterator first, ForwardIterator last)
215 {
216 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
217 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments.");
218 if constexpr (std::is_integral<Real>::value)
219 {
220 double M1 = *first;
221 double M2 = 0;
222 double M3 = 0;
223 double M4 = 0;
224 double n = 2;
225 for (auto it = std::next(first); it != last; ++it)
226 {
227 double delta21 = *it - M1;
228 double tmp = delta21/n;
229 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
230 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
231 M2 = M2 + tmp*(n-1)*delta21;
232 M1 = M1 + tmp;
233 n += 1;
234 }
235
236 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
237 }
238 else
239 {
240 Real M1 = *first;
241 Real M2 = 0;
242 Real M3 = 0;
243 Real M4 = 0;
244 Real n = 2;
245 for (auto it = std::next(first); it != last; ++it)
246 {
247 Real delta21 = *it - M1;
248 Real tmp = delta21/n;
249 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
250 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
251 M2 = M2 + tmp*(n-1)*delta21;
252 M1 = M1 + tmp;
253 n += 1;
254 }
255
256 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
257 }
258 }
259
260 template<class Container>
first_four_moments(Container const & v)261 inline auto first_four_moments(Container const & v)
262 {
263 return first_four_moments(v.cbegin(), v.cend());
264 }
265
266
267 // Follows equation 1.6 of:
268 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
269 template<class ForwardIterator>
kurtosis(ForwardIterator first,ForwardIterator last)270 auto kurtosis(ForwardIterator first, ForwardIterator last)
271 {
272 auto [M1, M2, M3, M4] = first_four_moments(first, last);
273 if (M2 == 0)
274 {
275 return M2;
276 }
277 return M4/(M2*M2);
278 }
279
280 template<class Container>
kurtosis(Container const & v)281 inline auto kurtosis(Container const & v)
282 {
283 return kurtosis(v.cbegin(), v.cend());
284 }
285
286 template<class ForwardIterator>
excess_kurtosis(ForwardIterator first,ForwardIterator last)287 auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
288 {
289 return kurtosis(first, last) - 3;
290 }
291
292 template<class Container>
excess_kurtosis(Container const & v)293 inline auto excess_kurtosis(Container const & v)
294 {
295 return excess_kurtosis(v.cbegin(), v.cend());
296 }
297
298
299 template<class RandomAccessIterator>
median(RandomAccessIterator first,RandomAccessIterator last)300 auto median(RandomAccessIterator first, RandomAccessIterator last)
301 {
302 size_t num_elems = std::distance(first, last);
303 BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
304 if (num_elems & 1)
305 {
306 auto middle = first + (num_elems - 1)/2;
307 std::nth_element(first, middle, last);
308 return *middle;
309 }
310 else
311 {
312 auto middle = first + num_elems/2 - 1;
313 std::nth_element(first, middle, last);
314 std::nth_element(middle, middle+1, last);
315 return (*middle + *(middle+1))/2;
316 }
317 }
318
319
320 template<class RandomAccessContainer>
median(RandomAccessContainer & v)321 inline auto median(RandomAccessContainer & v)
322 {
323 return median(v.begin(), v.end());
324 }
325
326 template<class RandomAccessIterator>
gini_coefficient(RandomAccessIterator first,RandomAccessIterator last)327 auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
328 {
329 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
330 BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
331
332 std::sort(first, last);
333 if constexpr (std::is_integral<Real>::value)
334 {
335 double i = 1;
336 double num = 0;
337 double denom = 0;
338 for (auto it = first; it != last; ++it)
339 {
340 num += *it*i;
341 denom += *it;
342 ++i;
343 }
344
345 // If the l1 norm is zero, all elements are zero, so every element is the same.
346 if (denom == 0)
347 {
348 return double(0);
349 }
350
351 return ((2*num)/denom - i)/(i-1);
352 }
353 else
354 {
355 Real i = 1;
356 Real num = 0;
357 Real denom = 0;
358 for (auto it = first; it != last; ++it)
359 {
360 num += *it*i;
361 denom += *it;
362 ++i;
363 }
364
365 // If the l1 norm is zero, all elements are zero, so every element is the same.
366 if (denom == 0)
367 {
368 return Real(0);
369 }
370
371 return ((2*num)/denom - i)/(i-1);
372 }
373 }
374
375 template<class RandomAccessContainer>
gini_coefficient(RandomAccessContainer & v)376 inline auto gini_coefficient(RandomAccessContainer & v)
377 {
378 return gini_coefficient(v.begin(), v.end());
379 }
380
381 template<class RandomAccessIterator>
sample_gini_coefficient(RandomAccessIterator first,RandomAccessIterator last)382 inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
383 {
384 size_t n = std::distance(first, last);
385 return n*gini_coefficient(first, last)/(n-1);
386 }
387
388 template<class RandomAccessContainer>
sample_gini_coefficient(RandomAccessContainer & v)389 inline auto sample_gini_coefficient(RandomAccessContainer & v)
390 {
391 return sample_gini_coefficient(v.begin(), v.end());
392 }
393
394 template<class RandomAccessIterator>
median_absolute_deviation(RandomAccessIterator first,RandomAccessIterator last,typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN ())395 auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN())
396 {
397 using std::abs;
398 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
399 using std::isnan;
400 if (isnan(center))
401 {
402 center = boost::math::tools::median(first, last);
403 }
404 size_t num_elems = std::distance(first, last);
405 BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined.");
406 auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);};
407 if (num_elems & 1)
408 {
409 auto middle = first + (num_elems - 1)/2;
410 std::nth_element(first, middle, last, comparator);
411 return abs(*middle);
412 }
413 else
414 {
415 auto middle = first + num_elems/2 - 1;
416 std::nth_element(first, middle, last, comparator);
417 std::nth_element(middle, middle+1, last, comparator);
418 return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2));
419 }
420 }
421
422 template<class RandomAccessContainer>
median_absolute_deviation(RandomAccessContainer & v,typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN ())423 inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN())
424 {
425 return median_absolute_deviation(v.begin(), v.end(), center);
426 }
427
428 }
429 #endif
430