• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  (C) Copyright Nick Thompson 2018.
2 //  Use, modification and distribution are subject to the
3 //  Boost Software License, Version 1.0. (See accompanying file
4 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
7 #define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
8 
9 #include <iterator>
10 #include <tuple>
11 #include <boost/assert.hpp>
12 #include <boost/config/header_deprecated.hpp>
13 
14 BOOST_HEADER_DEPRECATED("<boost/math/statistics/bivariate_statistics.hpp>");
15 
16 namespace boost{ namespace math{ namespace tools {
17 
18 template<class Container>
means_and_covariance(Container const & u,Container const & v)19 auto means_and_covariance(Container const & u, Container const & v)
20 {
21     using Real = typename Container::value_type;
22     using std::size;
23     BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
24     BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample.");
25 
26     // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
27     Real cov = 0;
28     Real mu_u = u[0];
29     Real mu_v = v[0];
30 
31     for(size_t i = 1; i < size(u); ++i)
32     {
33         Real u_tmp = (u[i] - mu_u)/(i+1);
34         Real v_tmp = v[i] - mu_v;
35         cov += i*u_tmp*v_tmp;
36         mu_u = mu_u + u_tmp;
37         mu_v = mu_v + v_tmp/(i+1);
38     }
39 
40     return std::make_tuple(mu_u, mu_v, cov/size(u));
41 }
42 
43 template<class Container>
covariance(Container const & u,Container const & v)44 auto covariance(Container const & u, Container const & v)
45 {
46     auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v);
47     return cov;
48 }
49 
50 template<class Container>
correlation_coefficient(Container const & u,Container const & v)51 auto correlation_coefficient(Container const & u, Container const & v)
52 {
53     using Real = typename Container::value_type;
54     using std::size;
55     BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
56     BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples.");
57 
58     Real cov = 0;
59     Real mu_u = u[0];
60     Real mu_v = v[0];
61     Real Qu = 0;
62     Real Qv = 0;
63 
64     for(size_t i = 1; i < size(u); ++i)
65     {
66         Real u_tmp = u[i] - mu_u;
67         Real v_tmp = v[i] - mu_v;
68         Qu = Qu + (i*u_tmp*u_tmp)/(i+1);
69         Qv = Qv + (i*v_tmp*v_tmp)/(i+1);
70         cov += i*u_tmp*v_tmp/(i+1);
71         mu_u = mu_u + u_tmp/(i+1);
72         mu_v = mu_v + v_tmp/(i+1);
73     }
74 
75     // If both datasets are constant, then they are perfectly correlated.
76     if (Qu == 0 && Qv == 0)
77     {
78         return Real(1);
79     }
80     // If one dataset is constant and the other isn't, then they have no correlation:
81     if (Qu == 0 || Qv == 0)
82     {
83         return Real(0);
84     }
85 
86     // Make sure rho in [-1, 1], even in the presence of numerical noise.
87     Real rho = cov/sqrt(Qu*Qv);
88     if (rho > 1) {
89         rho = 1;
90     }
91     if (rho < -1) {
92         rho = -1;
93     }
94     return rho;
95 }
96 
97 }}}
98 #endif
99