• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright Nick Thompson, 2019
3  * Use, modification and distribution are subject to the
4  * Boost Software License, Version 1.0. (See accompanying file
5  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  */
7 
8 #ifndef BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
9 #define BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
10 
11 #include <cmath>
12 #include <algorithm>
13 #include <utility>
14 #include <boost/math/statistics/univariate_statistics.hpp>
15 #include <boost/math/statistics/bivariate_statistics.hpp>
16 
17 namespace boost::math::statistics {
18 
19 
20 template<class RandomAccessContainer>
simple_ordinary_least_squares(RandomAccessContainer const & x,RandomAccessContainer const & y)21 auto simple_ordinary_least_squares(RandomAccessContainer const & x,
22                                    RandomAccessContainer const & y)
23 {
24     using Real = typename RandomAccessContainer::value_type;
25     if (x.size() <= 1)
26     {
27         throw std::domain_error("At least 2 samples are required to perform a linear regression.");
28     }
29 
30     if (x.size() != y.size())
31     {
32         throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
33     }
34     auto [mu_x, mu_y, cov_xy] = boost::math::statistics::means_and_covariance(x, y);
35 
36     auto var_x = boost::math::statistics::variance(x);
37 
38     if (var_x <= 0) {
39         throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
40     }
41 
42 
43     Real c1 = cov_xy/var_x;
44     Real c0 = mu_y - c1*mu_x;
45 
46     return std::make_pair(c0, c1);
47 }
48 
49 template<class RandomAccessContainer>
simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x,RandomAccessContainer const & y)50 auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x,
51                                    RandomAccessContainer const & y)
52 {
53     using Real = typename RandomAccessContainer::value_type;
54     if (x.size() <= 1)
55     {
56         throw std::domain_error("At least 2 samples are required to perform a linear regression.");
57     }
58 
59     if (x.size() != y.size())
60     {
61         throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
62     }
63     auto [mu_x, mu_y, cov_xy] = boost::math::statistics::means_and_covariance(x, y);
64 
65     auto var_x = boost::math::statistics::variance(x);
66 
67     if (var_x <= 0) {
68         throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
69     }
70 
71 
72     Real c1 = cov_xy/var_x;
73     Real c0 = mu_y - c1*mu_x;
74 
75     Real squared_residuals = 0;
76     Real squared_mean_deviation = 0;
77     for(decltype(y.size()) i = 0; i < y.size(); ++i) {
78         squared_mean_deviation += (y[i] - mu_y)*(y[i]-mu_y);
79         Real ei = (c0 + c1*x[i]) - y[i];
80         squared_residuals += ei*ei;
81     }
82 
83     Real Rsquared;
84     if (squared_mean_deviation == 0) {
85         // Then y = constant, so the linear regression is perfect.
86         Rsquared = 1;
87     } else {
88         Rsquared = 1 - squared_residuals/squared_mean_deviation;
89     }
90 
91     return std::make_tuple(c0, c1, Rsquared);
92 }
93 
94 }
95 #endif
96