1 /////////////////////////////////////////////////////////////////////////////// 2 // extended_p_square.hpp 3 // 4 // Copyright 2005 Daniel Egloff. Distributed under the Boost 5 // Software License, Version 1.0. (See accompanying file 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #ifndef BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006 9 #define BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006 10 11 #include <vector> 12 #include <functional> 13 #include <boost/range/begin.hpp> 14 #include <boost/range/end.hpp> 15 #include <boost/range/iterator_range.hpp> 16 #include <boost/iterator/transform_iterator.hpp> 17 #include <boost/iterator/counting_iterator.hpp> 18 #include <boost/iterator/permutation_iterator.hpp> 19 #include <boost/parameter/keyword.hpp> 20 #include <boost/mpl/placeholders.hpp> 21 #include <boost/accumulators/accumulators_fwd.hpp> 22 #include <boost/accumulators/framework/extractor.hpp> 23 #include <boost/accumulators/numeric/functional.hpp> 24 #include <boost/accumulators/framework/parameters/sample.hpp> 25 #include <boost/accumulators/framework/depends_on.hpp> 26 #include <boost/accumulators/statistics_fwd.hpp> 27 #include <boost/accumulators/statistics/count.hpp> 28 #include <boost/accumulators/statistics/times2_iterator.hpp> 29 #include <boost/serialization/vector.hpp> 30 31 namespace boost { namespace accumulators 32 { 33 /////////////////////////////////////////////////////////////////////////////// 34 // probabilities named parameter 35 // 36 BOOST_PARAMETER_NESTED_KEYWORD(tag, extended_p_square_probabilities, probabilities) 37 38 BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square_probabilities) 39 40 namespace impl 41 { 42 /////////////////////////////////////////////////////////////////////////////// 43 // extended_p_square_impl 44 // multiple quantile estimation 45 /** 46 @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm 47 48 Extended \f$P^2\f$ algorithm for estimation of several quantiles without storing samples. 49 Assume that \f$m\f$ quantiles \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated. 50 Instead of storing the whole sample cumulative distribution, the algorithm maintains only 51 \f$m+2\f$ principal markers and \f$m+1\f$ middle markers, whose positions are updated 52 with each sample and whose heights are adjusted (if necessary) using a piecewise-parablic 53 formula. The heights of these central markers are the current estimates of the quantiles 54 and returned as an iterator range. 55 56 For further details, see 57 58 K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49, 59 Number 4 (October), 1986, p. 159-164. 60 61 The extended \f$ P^2 \f$ algorithm generalizes the \f$ P^2 \f$ algorithm of 62 63 R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and 64 histograms without storing observations, Communications of the ACM, 65 Volume 28 (October), Number 10, 1985, p. 1076-1085. 66 67 @param extended_p_square_probabilities A vector of quantile probabilities. 68 */ 69 template<typename Sample> 70 struct extended_p_square_impl 71 : accumulator_base 72 { 73 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; 74 typedef std::vector<float_type> array_type; 75 // for boost::result_of 76 typedef iterator_range< 77 detail::lvalue_index_iterator< 78 permutation_iterator< 79 typename array_type::const_iterator 80 , detail::times2_iterator 81 > 82 > 83 > result_type; 84 85 template<typename Args> extended_p_square_implboost::accumulators::impl::extended_p_square_impl86 extended_p_square_impl(Args const &args) 87 : probabilities( 88 boost::begin(args[extended_p_square_probabilities]) 89 , boost::end(args[extended_p_square_probabilities]) 90 ) 91 , heights(2 * probabilities.size() + 3) 92 , actual_positions(heights.size()) 93 , desired_positions(heights.size()) 94 , positions_increments(heights.size()) 95 { 96 std::size_t num_quantiles = this->probabilities.size(); 97 std::size_t num_markers = this->heights.size(); 98 99 for(std::size_t i = 0; i < num_markers; ++i) 100 { 101 this->actual_positions[i] = i + 1; 102 } 103 104 this->positions_increments[0] = 0.; 105 this->positions_increments[num_markers - 1] = 1.; 106 107 for(std::size_t i = 0; i < num_quantiles; ++i) 108 { 109 this->positions_increments[2 * i + 2] = probabilities[i]; 110 } 111 112 for(std::size_t i = 0; i <= num_quantiles; ++i) 113 { 114 this->positions_increments[2 * i + 1] = 115 0.5 * (this->positions_increments[2 * i] + this->positions_increments[2 * i + 2]); 116 } 117 118 for(std::size_t i = 0; i < num_markers; ++i) 119 { 120 this->desired_positions[i] = 1. + 2. * (num_quantiles + 1.) * this->positions_increments[i]; 121 } 122 } 123 124 template<typename Args> operator ()boost::accumulators::impl::extended_p_square_impl125 void operator ()(Args const &args) 126 { 127 std::size_t cnt = count(args); 128 129 // m+2 principal markers and m+1 middle markers 130 std::size_t num_markers = 2 * this->probabilities.size() + 3; 131 132 // first accumulate num_markers samples 133 if(cnt <= num_markers) 134 { 135 this->heights[cnt - 1] = args[sample]; 136 137 // complete the initialization of heights by sorting 138 if(cnt == num_markers) 139 { 140 std::sort(this->heights.begin(), this->heights.end()); 141 } 142 } 143 else 144 { 145 std::size_t sample_cell = 1; 146 147 // find cell k = sample_cell such that heights[k-1] <= sample < heights[k] 148 if(args[sample] < this->heights[0]) 149 { 150 this->heights[0] = args[sample]; 151 sample_cell = 1; 152 } 153 else if(args[sample] >= this->heights[num_markers - 1]) 154 { 155 this->heights[num_markers - 1] = args[sample]; 156 sample_cell = num_markers - 1; 157 } 158 else 159 { 160 typedef typename array_type::iterator iterator; 161 iterator it = std::upper_bound( 162 this->heights.begin() 163 , this->heights.end() 164 , args[sample] 165 ); 166 167 sample_cell = std::distance(this->heights.begin(), it); 168 } 169 170 // update actual positions of all markers above sample_cell index 171 for(std::size_t i = sample_cell; i < num_markers; ++i) 172 { 173 ++this->actual_positions[i]; 174 } 175 176 // update desired positions of all markers 177 for(std::size_t i = 0; i < num_markers; ++i) 178 { 179 this->desired_positions[i] += this->positions_increments[i]; 180 } 181 182 // adjust heights and actual positions of markers 1 to num_markers-2 if necessary 183 for(std::size_t i = 1; i <= num_markers - 2; ++i) 184 { 185 // offset to desired position 186 float_type d = this->desired_positions[i] - this->actual_positions[i]; 187 188 // offset to next position 189 float_type dp = this->actual_positions[i+1] - this->actual_positions[i]; 190 191 // offset to previous position 192 float_type dm = this->actual_positions[i-1] - this->actual_positions[i]; 193 194 // height ds 195 float_type hp = (this->heights[i+1] - this->heights[i]) / dp; 196 float_type hm = (this->heights[i-1] - this->heights[i]) / dm; 197 198 if((d >= 1 && dp > 1) || (d <= -1 && dm < -1)) 199 { 200 short sign_d = static_cast<short>(d / std::abs(d)); 201 202 float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp 203 + (dp - sign_d) * hm); 204 205 // try adjusting heights[i] using p-squared formula 206 if(this->heights[i - 1] < h && h < this->heights[i + 1]) 207 { 208 this->heights[i] = h; 209 } 210 else 211 { 212 // use linear formula 213 if(d > 0) 214 { 215 this->heights[i] += hp; 216 } 217 if(d < 0) 218 { 219 this->heights[i] -= hm; 220 } 221 } 222 this->actual_positions[i] += sign_d; 223 } 224 } 225 } 226 } 227 resultboost::accumulators::impl::extended_p_square_impl228 result_type result(dont_care) const 229 { 230 // for i in [1,probabilities.size()], return heights[i * 2] 231 detail::times2_iterator idx_begin = detail::make_times2_iterator(1); 232 detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1); 233 234 return result_type( 235 make_permutation_iterator(this->heights.begin(), idx_begin) 236 , make_permutation_iterator(this->heights.begin(), idx_end) 237 ); 238 } 239 240 public: 241 // make this accumulator serializeable 242 // TODO: do we need to split to load/save and verify that the parameters did not change? 243 template<class Archive> serializeboost::accumulators::impl::extended_p_square_impl244 void serialize(Archive & ar, const unsigned int file_version) 245 { 246 ar & probabilities; 247 ar & heights; 248 ar & actual_positions; 249 ar & desired_positions; 250 ar & positions_increments; 251 } 252 253 private: 254 array_type probabilities; // the quantile probabilities 255 array_type heights; // q_i 256 array_type actual_positions; // n_i 257 array_type desired_positions; // d_i 258 array_type positions_increments; // f_i 259 }; 260 261 } // namespace impl 262 263 /////////////////////////////////////////////////////////////////////////////// 264 // tag::extended_p_square 265 // 266 namespace tag 267 { 268 struct extended_p_square 269 : depends_on<count> 270 , extended_p_square_probabilities 271 { 272 typedef accumulators::impl::extended_p_square_impl<mpl::_1> impl; 273 274 #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED 275 /// tag::extended_p_square::probabilities named parameter 276 static boost::parameter::keyword<tag::probabilities> const probabilities; 277 #endif 278 }; 279 } 280 281 /////////////////////////////////////////////////////////////////////////////// 282 // extract::extended_p_square 283 // 284 namespace extract 285 { 286 extractor<tag::extended_p_square> const extended_p_square = {}; 287 288 BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square) 289 } 290 291 using extract::extended_p_square; 292 293 // So that extended_p_square can be automatically substituted with 294 // weighted_extended_p_square when the weight parameter is non-void 295 template<> 296 struct as_weighted_feature<tag::extended_p_square> 297 { 298 typedef tag::weighted_extended_p_square type; 299 }; 300 301 template<> 302 struct feature_of<tag::weighted_extended_p_square> 303 : feature_of<tag::extended_p_square> 304 { 305 }; 306 307 }} // namespace boost::accumulators 308 309 #endif 310