1 /////////////////////////////////////////////////////////////////////////////// 2 // variance.hpp 3 // 4 // Copyright 2005 Daniel Egloff, Eric Niebler. Distributed under the Boost 5 // Software License, Version 1.0. (See accompanying file 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #ifndef BOOST_ACCUMULATORS_STATISTICS_VARIANCE_HPP_EAN_28_10_2005 9 #define BOOST_ACCUMULATORS_STATISTICS_VARIANCE_HPP_EAN_28_10_2005 10 11 #include <boost/mpl/placeholders.hpp> 12 #include <boost/accumulators/framework/accumulator_base.hpp> 13 #include <boost/accumulators/framework/extractor.hpp> 14 #include <boost/accumulators/numeric/functional.hpp> 15 #include <boost/accumulators/framework/parameters/sample.hpp> 16 #include <boost/accumulators/framework/depends_on.hpp> 17 #include <boost/accumulators/statistics_fwd.hpp> 18 #include <boost/accumulators/statistics/count.hpp> 19 #include <boost/accumulators/statistics/sum.hpp> 20 #include <boost/accumulators/statistics/mean.hpp> 21 #include <boost/accumulators/statistics/moment.hpp> 22 23 namespace boost { namespace accumulators 24 { 25 26 namespace impl 27 { 28 //! Lazy calculation of variance. 29 /*! 30 Default sample variance implementation based on the second moment \f$ M_n^{(2)} \f$ moment<2>, mean and count. 31 \f[ 32 \sigma_n^2 = M_n^{(2)} - \mu_n^2. 33 \f] 34 where 35 \f[ 36 \mu_n = \frac{1}{n} \sum_{i = 1}^n x_i. 37 \f] 38 is the estimate of the sample mean and \f$n\f$ is the number of samples. 39 */ 40 template<typename Sample, typename MeanFeature> 41 struct lazy_variance_impl 42 : accumulator_base 43 { 44 // for boost::result_of 45 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type; 46 lazy_variance_implboost::accumulators::impl::lazy_variance_impl47 lazy_variance_impl(dont_care) {} 48 49 template<typename Args> resultboost::accumulators::impl::lazy_variance_impl50 result_type result(Args const &args) const 51 { 52 extractor<MeanFeature> mean; 53 result_type tmp = mean(args); 54 return accumulators::moment<2>(args) - tmp * tmp; 55 } 56 57 // serialization is done by accumulators it depends on 58 template<class Archive> serializeboost::accumulators::impl::lazy_variance_impl59 void serialize(Archive & ar, const unsigned int file_version) {} 60 }; 61 62 //! Iterative calculation of variance. 63 /*! 64 Iterative calculation of sample variance \f$\sigma_n^2\f$ according to the formula 65 \f[ 66 \sigma_n^2 = \frac{1}{n} \sum_{i = 1}^n (x_i - \mu_n)^2 = \frac{n-1}{n} \sigma_{n-1}^2 + \frac{1}{n-1}(x_n - \mu_n)^2. 67 \f] 68 where 69 \f[ 70 \mu_n = \frac{1}{n} \sum_{i = 1}^n x_i. 71 \f] 72 is the estimate of the sample mean and \f$n\f$ is the number of samples. 73 74 Note that the sample variance is not defined for \f$n <= 1\f$. 75 76 A simplification can be obtained by the approximate recursion 77 \f[ 78 \sigma_n^2 \approx \frac{n-1}{n} \sigma_{n-1}^2 + \frac{1}{n}(x_n - \mu_n)^2. 79 \f] 80 because the difference 81 \f[ 82 \left(\frac{1}{n-1} - \frac{1}{n}\right)(x_n - \mu_n)^2 = \frac{1}{n(n-1)}(x_n - \mu_n)^2. 83 \f] 84 converges to zero as \f$n \rightarrow \infty\f$. However, for small \f$ n \f$ the difference 85 can be non-negligible. 86 */ 87 template<typename Sample, typename MeanFeature, typename Tag> 88 struct variance_impl 89 : accumulator_base 90 { 91 // for boost::result_of 92 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type; 93 94 template<typename Args> variance_implboost::accumulators::impl::variance_impl95 variance_impl(Args const &args) 96 : variance(numeric::fdiv(args[sample | Sample()], numeric::one<std::size_t>::value)) 97 { 98 } 99 100 template<typename Args> operator ()boost::accumulators::impl::variance_impl101 void operator ()(Args const &args) 102 { 103 std::size_t cnt = count(args); 104 105 if(cnt > 1) 106 { 107 extractor<MeanFeature> mean; 108 result_type tmp = args[parameter::keyword<Tag>::get()] - mean(args); 109 this->variance = 110 numeric::fdiv(this->variance * (cnt - 1), cnt) 111 + numeric::fdiv(tmp * tmp, cnt - 1); 112 } 113 } 114 resultboost::accumulators::impl::variance_impl115 result_type result(dont_care) const 116 { 117 return this->variance; 118 } 119 120 // make this accumulator serializeable 121 template<class Archive> serializeboost::accumulators::impl::variance_impl122 void serialize(Archive & ar, const unsigned int file_version) 123 { 124 ar & variance; 125 } 126 127 private: 128 result_type variance; 129 }; 130 131 } // namespace impl 132 133 /////////////////////////////////////////////////////////////////////////////// 134 // tag::variance 135 // tag::immediate_variance 136 // 137 namespace tag 138 { 139 struct lazy_variance 140 : depends_on<moment<2>, mean> 141 { 142 /// INTERNAL ONLY 143 /// 144 typedef accumulators::impl::lazy_variance_impl<mpl::_1, mean> impl; 145 }; 146 147 struct variance 148 : depends_on<count, immediate_mean> 149 { 150 /// INTERNAL ONLY 151 /// 152 typedef accumulators::impl::variance_impl<mpl::_1, mean, sample> impl; 153 }; 154 } 155 156 /////////////////////////////////////////////////////////////////////////////// 157 // extract::lazy_variance 158 // extract::variance 159 // 160 namespace extract 161 { 162 extractor<tag::lazy_variance> const lazy_variance = {}; 163 extractor<tag::variance> const variance = {}; 164 165 BOOST_ACCUMULATORS_IGNORE_GLOBAL(lazy_variance) 166 BOOST_ACCUMULATORS_IGNORE_GLOBAL(variance) 167 } 168 169 using extract::lazy_variance; 170 using extract::variance; 171 172 // variance(lazy) -> lazy_variance 173 template<> 174 struct as_feature<tag::variance(lazy)> 175 { 176 typedef tag::lazy_variance type; 177 }; 178 179 // variance(immediate) -> variance 180 template<> 181 struct as_feature<tag::variance(immediate)> 182 { 183 typedef tag::variance type; 184 }; 185 186 // for the purposes of feature-based dependency resolution, 187 // immediate_variance provides the same feature as variance 188 template<> 189 struct feature_of<tag::lazy_variance> 190 : feature_of<tag::variance> 191 { 192 }; 193 194 // So that variance can be automatically substituted with 195 // weighted_variance when the weight parameter is non-void. 196 template<> 197 struct as_weighted_feature<tag::variance> 198 { 199 typedef tag::weighted_variance type; 200 }; 201 202 // for the purposes of feature-based dependency resolution, 203 // weighted_variance provides the same feature as variance 204 template<> 205 struct feature_of<tag::weighted_variance> 206 : feature_of<tag::variance> 207 { 208 }; 209 210 // So that immediate_variance can be automatically substituted with 211 // immediate_weighted_variance when the weight parameter is non-void. 212 template<> 213 struct as_weighted_feature<tag::lazy_variance> 214 { 215 typedef tag::lazy_weighted_variance type; 216 }; 217 218 // for the purposes of feature-based dependency resolution, 219 // immediate_weighted_variance provides the same feature as immediate_variance 220 template<> 221 struct feature_of<tag::lazy_weighted_variance> 222 : feature_of<tag::lazy_variance> 223 { 224 }; 225 226 //////////////////////////////////////////////////////////////////////////// 227 //// droppable_accumulator<variance_impl> 228 //// need to specialize droppable lazy variance to cache the result at the 229 //// point the accumulator is dropped. 230 ///// INTERNAL ONLY 231 ///// 232 //template<typename Sample, typename MeanFeature> 233 //struct droppable_accumulator<impl::variance_impl<Sample, MeanFeature> > 234 // : droppable_accumulator_base< 235 // with_cached_result<impl::variance_impl<Sample, MeanFeature> > 236 // > 237 //{ 238 // template<typename Args> 239 // droppable_accumulator(Args const &args) 240 // : droppable_accumulator::base(args) 241 // { 242 // } 243 //}; 244 245 }} // namespace boost::accumulators 246 247 #endif 248