1 /*
2 * Copyright Nick Thompson, 2019
3 * Use, modification and distribution are subject to the
4 * Boost Software License, Version 1.0. (See accompanying file
5 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
9 #define BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
10
11 #include <cmath>
12 #include <algorithm>
13 #include <utility>
14 #include <boost/math/statistics/univariate_statistics.hpp>
15 #include <boost/math/statistics/bivariate_statistics.hpp>
16
17 namespace boost::math::statistics {
18
19
20 template<class RandomAccessContainer>
simple_ordinary_least_squares(RandomAccessContainer const & x,RandomAccessContainer const & y)21 auto simple_ordinary_least_squares(RandomAccessContainer const & x,
22 RandomAccessContainer const & y)
23 {
24 using Real = typename RandomAccessContainer::value_type;
25 if (x.size() <= 1)
26 {
27 throw std::domain_error("At least 2 samples are required to perform a linear regression.");
28 }
29
30 if (x.size() != y.size())
31 {
32 throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
33 }
34 auto [mu_x, mu_y, cov_xy] = boost::math::statistics::means_and_covariance(x, y);
35
36 auto var_x = boost::math::statistics::variance(x);
37
38 if (var_x <= 0) {
39 throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
40 }
41
42
43 Real c1 = cov_xy/var_x;
44 Real c0 = mu_y - c1*mu_x;
45
46 return std::make_pair(c0, c1);
47 }
48
49 template<class RandomAccessContainer>
simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x,RandomAccessContainer const & y)50 auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x,
51 RandomAccessContainer const & y)
52 {
53 using Real = typename RandomAccessContainer::value_type;
54 if (x.size() <= 1)
55 {
56 throw std::domain_error("At least 2 samples are required to perform a linear regression.");
57 }
58
59 if (x.size() != y.size())
60 {
61 throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
62 }
63 auto [mu_x, mu_y, cov_xy] = boost::math::statistics::means_and_covariance(x, y);
64
65 auto var_x = boost::math::statistics::variance(x);
66
67 if (var_x <= 0) {
68 throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
69 }
70
71
72 Real c1 = cov_xy/var_x;
73 Real c0 = mu_y - c1*mu_x;
74
75 Real squared_residuals = 0;
76 Real squared_mean_deviation = 0;
77 for(decltype(y.size()) i = 0; i < y.size(); ++i) {
78 squared_mean_deviation += (y[i] - mu_y)*(y[i]-mu_y);
79 Real ei = (c0 + c1*x[i]) - y[i];
80 squared_residuals += ei*ei;
81 }
82
83 Real Rsquared;
84 if (squared_mean_deviation == 0) {
85 // Then y = constant, so the linear regression is perfect.
86 Rsquared = 1;
87 } else {
88 Rsquared = 1 - squared_residuals/squared_mean_deviation;
89 }
90
91 return std::make_tuple(c0, c1, Rsquared);
92 }
93
94 }
95 #endif
96