1 /////////////////////////////////////////////////////////////////////////////// 2 // p_square_cumulative_distribution.hpp 3 // 4 // Copyright 2005 Daniel Egloff, Olivier Gygi. Distributed under the Boost 5 // Software License, Version 1.0. (See accompanying file 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #ifndef BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_CUMUL_DIST_HPP_DE_01_01_2006 9 #define BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_CUMUL_DIST_HPP_DE_01_01_2006 10 11 #include <vector> 12 #include <functional> 13 #include <boost/parameter/keyword.hpp> 14 #include <boost/range.hpp> 15 #include <boost/mpl/placeholders.hpp> 16 #include <boost/accumulators/accumulators_fwd.hpp> 17 #include <boost/accumulators/framework/accumulator_base.hpp> 18 #include <boost/accumulators/framework/extractor.hpp> 19 #include <boost/accumulators/numeric/functional.hpp> 20 #include <boost/accumulators/framework/parameters/sample.hpp> 21 #include <boost/accumulators/statistics_fwd.hpp> 22 #include <boost/accumulators/statistics/count.hpp> 23 #include <boost/serialization/vector.hpp> 24 #include <boost/serialization/utility.hpp> 25 26 namespace boost { namespace accumulators 27 { 28 /////////////////////////////////////////////////////////////////////////////// 29 // num_cells named parameter 30 // 31 BOOST_PARAMETER_NESTED_KEYWORD(tag, p_square_cumulative_distribution_num_cells, num_cells) 32 33 BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_cumulative_distribution_num_cells) 34 35 namespace impl 36 { 37 /////////////////////////////////////////////////////////////////////////////// 38 // p_square_cumulative_distribution_impl 39 // cumulative_distribution calculation (as histogram) 40 /** 41 @brief Histogram calculation of the cumulative distribution with the \f$P^2\f$ algorithm 42 43 A histogram of the sample cumulative distribution is computed dynamically without storing samples 44 based on the \f$ P^2 \f$ algorithm. The returned histogram has a specifiable amount (num_cells) 45 equiprobable (and not equal-sized) cells. 46 47 For further details, see 48 49 R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and 50 histograms without storing observations, Communications of the ACM, 51 Volume 28 (October), Number 10, 1985, p. 1076-1085. 52 53 @param p_square_cumulative_distribution_num_cells. 54 */ 55 template<typename Sample> 56 struct p_square_cumulative_distribution_impl 57 : accumulator_base 58 { 59 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; 60 typedef std::vector<float_type> array_type; 61 typedef std::vector<std::pair<float_type, float_type> > histogram_type; 62 // for boost::result_of 63 typedef iterator_range<typename histogram_type::iterator> result_type; 64 65 template<typename Args> p_square_cumulative_distribution_implboost::accumulators::impl::p_square_cumulative_distribution_impl66 p_square_cumulative_distribution_impl(Args const &args) 67 : num_cells(args[p_square_cumulative_distribution_num_cells]) 68 , heights(num_cells + 1) 69 , actual_positions(num_cells + 1) 70 , desired_positions(num_cells + 1) 71 , positions_increments(num_cells + 1) 72 , histogram(num_cells + 1) 73 , is_dirty(true) 74 { 75 std::size_t b = this->num_cells; 76 77 for (std::size_t i = 0; i < b + 1; ++i) 78 { 79 this->actual_positions[i] = i + 1.; 80 this->desired_positions[i] = i + 1.; 81 this->positions_increments[i] = numeric::fdiv(i, b); 82 } 83 } 84 85 template<typename Args> operator ()boost::accumulators::impl::p_square_cumulative_distribution_impl86 void operator ()(Args const &args) 87 { 88 this->is_dirty = true; 89 90 std::size_t cnt = count(args); 91 std::size_t sample_cell = 1; // k 92 std::size_t b = this->num_cells; 93 94 // accumulate num_cells + 1 first samples 95 if (cnt <= b + 1) 96 { 97 this->heights[cnt - 1] = args[sample]; 98 99 // complete the initialization of heights by sorting 100 if (cnt == b + 1) 101 { 102 std::sort(this->heights.begin(), this->heights.end()); 103 } 104 } 105 else 106 { 107 // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values 108 if (args[sample] < this->heights[0]) 109 { 110 this->heights[0] = args[sample]; 111 sample_cell = 1; 112 } 113 else if (this->heights[b] <= args[sample]) 114 { 115 this->heights[b] = args[sample]; 116 sample_cell = b; 117 } 118 else 119 { 120 typename array_type::iterator it; 121 it = std::upper_bound( 122 this->heights.begin() 123 , this->heights.end() 124 , args[sample] 125 ); 126 127 sample_cell = std::distance(this->heights.begin(), it); 128 } 129 130 // increment positions of markers above sample_cell 131 for (std::size_t i = sample_cell; i < b + 1; ++i) 132 { 133 ++this->actual_positions[i]; 134 } 135 136 // update desired position of markers 2 to num_cells + 1 137 // (desired position of first marker is always 1) 138 for (std::size_t i = 1; i < b + 1; ++i) 139 { 140 this->desired_positions[i] += this->positions_increments[i]; 141 } 142 143 // adjust heights of markers 2 to num_cells if necessary 144 for (std::size_t i = 1; i < b; ++i) 145 { 146 // offset to desire position 147 float_type d = this->desired_positions[i] - this->actual_positions[i]; 148 149 // offset to next position 150 float_type dp = this->actual_positions[i + 1] - this->actual_positions[i]; 151 152 // offset to previous position 153 float_type dm = this->actual_positions[i - 1] - this->actual_positions[i]; 154 155 // height ds 156 float_type hp = (this->heights[i + 1] - this->heights[i]) / dp; 157 float_type hm = (this->heights[i - 1] - this->heights[i]) / dm; 158 159 if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) ) 160 { 161 short sign_d = static_cast<short>(d / std::abs(d)); 162 163 // try adjusting heights[i] using p-squared formula 164 float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm ); 165 166 if ( this->heights[i - 1] < h && h < this->heights[i + 1] ) 167 { 168 this->heights[i] = h; 169 } 170 else 171 { 172 // use linear formula 173 if (d>0) 174 { 175 this->heights[i] += hp; 176 } 177 if (d<0) 178 { 179 this->heights[i] -= hm; 180 } 181 } 182 this->actual_positions[i] += sign_d; 183 } 184 } 185 } 186 } 187 188 template<typename Args> resultboost::accumulators::impl::p_square_cumulative_distribution_impl189 result_type result(Args const &args) const 190 { 191 if (this->is_dirty) 192 { 193 this->is_dirty = false; 194 195 // creates a vector of std::pair where each pair i holds 196 // the values heights[i] (x-axis of histogram) and 197 // actual_positions[i] / cnt (y-axis of histogram) 198 199 std::size_t cnt = count(args); 200 201 for (std::size_t i = 0; i < this->histogram.size(); ++i) 202 { 203 this->histogram[i] = std::make_pair(this->heights[i], numeric::fdiv(this->actual_positions[i], cnt)); 204 } 205 } 206 //return histogram; 207 return make_iterator_range(this->histogram); 208 } 209 210 // make this accumulator serializeable 211 // TODO split to save/load and check on parameters provided in ctor 212 template<class Archive> serializeboost::accumulators::impl::p_square_cumulative_distribution_impl213 void serialize(Archive & ar, const unsigned int file_version) 214 { 215 ar & num_cells; 216 ar & heights; 217 ar & actual_positions; 218 ar & desired_positions; 219 ar & positions_increments; 220 ar & histogram; 221 ar & is_dirty; 222 } 223 224 private: 225 std::size_t num_cells; // number of cells b 226 array_type heights; // q_i 227 array_type actual_positions; // n_i 228 array_type desired_positions; // n'_i 229 array_type positions_increments; // dn'_i 230 mutable histogram_type histogram; // histogram 231 mutable bool is_dirty; 232 }; 233 234 } // namespace detail 235 236 /////////////////////////////////////////////////////////////////////////////// 237 // tag::p_square_cumulative_distribution 238 // 239 namespace tag 240 { 241 struct p_square_cumulative_distribution 242 : depends_on<count> 243 , p_square_cumulative_distribution_num_cells 244 { 245 /// INTERNAL ONLY 246 /// 247 typedef accumulators::impl::p_square_cumulative_distribution_impl<mpl::_1> impl; 248 }; 249 } 250 251 /////////////////////////////////////////////////////////////////////////////// 252 // extract::p_square_cumulative_distribution 253 // 254 namespace extract 255 { 256 extractor<tag::p_square_cumulative_distribution> const p_square_cumulative_distribution = {}; 257 258 BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_cumulative_distribution) 259 } 260 261 using extract::p_square_cumulative_distribution; 262 263 // So that p_square_cumulative_distribution can be automatically substituted with 264 // weighted_p_square_cumulative_distribution when the weight parameter is non-void 265 template<> 266 struct as_weighted_feature<tag::p_square_cumulative_distribution> 267 { 268 typedef tag::weighted_p_square_cumulative_distribution type; 269 }; 270 271 template<> 272 struct feature_of<tag::weighted_p_square_cumulative_distribution> 273 : feature_of<tag::p_square_cumulative_distribution> 274 { 275 }; 276 277 }} // namespace boost::accumulators 278 279 #endif 280