• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////////////
2 // extended_p_square.hpp
3 //
4 //  Copyright 2005 Daniel Egloff. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006
9 #define BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006
10 
11 #include <vector>
12 #include <functional>
13 #include <boost/range/begin.hpp>
14 #include <boost/range/end.hpp>
15 #include <boost/range/iterator_range.hpp>
16 #include <boost/iterator/transform_iterator.hpp>
17 #include <boost/iterator/counting_iterator.hpp>
18 #include <boost/iterator/permutation_iterator.hpp>
19 #include <boost/parameter/keyword.hpp>
20 #include <boost/mpl/placeholders.hpp>
21 #include <boost/accumulators/accumulators_fwd.hpp>
22 #include <boost/accumulators/framework/extractor.hpp>
23 #include <boost/accumulators/numeric/functional.hpp>
24 #include <boost/accumulators/framework/parameters/sample.hpp>
25 #include <boost/accumulators/framework/depends_on.hpp>
26 #include <boost/accumulators/statistics_fwd.hpp>
27 #include <boost/accumulators/statistics/count.hpp>
28 #include <boost/accumulators/statistics/times2_iterator.hpp>
29 #include <boost/serialization/vector.hpp>
30 
31 namespace boost { namespace accumulators
32 {
33 ///////////////////////////////////////////////////////////////////////////////
34 // probabilities named parameter
35 //
36 BOOST_PARAMETER_NESTED_KEYWORD(tag, extended_p_square_probabilities, probabilities)
37 
38 BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square_probabilities)
39 
40 namespace impl
41 {
42     ///////////////////////////////////////////////////////////////////////////////
43     // extended_p_square_impl
44     //  multiple quantile estimation
45     /**
46         @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm
47 
48         Extended \f$P^2\f$ algorithm for estimation of several quantiles without storing samples.
49         Assume that \f$m\f$ quantiles \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated.
50         Instead of storing the whole sample cumulative distribution, the algorithm maintains only
51         \f$m+2\f$ principal markers and \f$m+1\f$ middle markers, whose positions are updated
52         with each sample and whose heights are adjusted (if necessary) using a piecewise-parablic
53         formula. The heights of these central markers are the current estimates of the quantiles
54         and returned as an iterator range.
55 
56         For further details, see
57 
58         K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49,
59         Number 4 (October), 1986, p. 159-164.
60 
61         The extended \f$ P^2 \f$ algorithm generalizes the \f$ P^2 \f$ algorithm of
62 
63         R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
64         histograms without storing observations, Communications of the ACM,
65         Volume 28 (October), Number 10, 1985, p. 1076-1085.
66 
67         @param extended_p_square_probabilities A vector of quantile probabilities.
68     */
69     template<typename Sample>
70     struct extended_p_square_impl
71       : accumulator_base
72     {
73         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
74         typedef std::vector<float_type> array_type;
75         // for boost::result_of
76         typedef iterator_range<
77             detail::lvalue_index_iterator<
78                 permutation_iterator<
79                     typename array_type::const_iterator
80                   , detail::times2_iterator
81                 >
82             >
83         > result_type;
84 
85         template<typename Args>
extended_p_square_implboost::accumulators::impl::extended_p_square_impl86         extended_p_square_impl(Args const &args)
87           : probabilities(
88                 boost::begin(args[extended_p_square_probabilities])
89               , boost::end(args[extended_p_square_probabilities])
90             )
91           , heights(2 * probabilities.size() + 3)
92           , actual_positions(heights.size())
93           , desired_positions(heights.size())
94           , positions_increments(heights.size())
95         {
96             std::size_t num_quantiles = this->probabilities.size();
97             std::size_t num_markers = this->heights.size();
98 
99             for(std::size_t i = 0; i < num_markers; ++i)
100             {
101                 this->actual_positions[i] = i + 1;
102             }
103 
104             this->positions_increments[0] = 0.;
105             this->positions_increments[num_markers - 1] = 1.;
106 
107             for(std::size_t i = 0; i < num_quantiles; ++i)
108             {
109                 this->positions_increments[2 * i + 2] = probabilities[i];
110             }
111 
112             for(std::size_t i = 0; i <= num_quantiles; ++i)
113             {
114                 this->positions_increments[2 * i + 1] =
115                     0.5 * (this->positions_increments[2 * i] + this->positions_increments[2 * i + 2]);
116             }
117 
118             for(std::size_t i = 0; i < num_markers; ++i)
119             {
120                 this->desired_positions[i] = 1. + 2. * (num_quantiles + 1.) * this->positions_increments[i];
121             }
122         }
123 
124         template<typename Args>
operator ()boost::accumulators::impl::extended_p_square_impl125         void operator ()(Args const &args)
126         {
127             std::size_t cnt = count(args);
128 
129             // m+2 principal markers and m+1 middle markers
130             std::size_t num_markers = 2 * this->probabilities.size() + 3;
131 
132             // first accumulate num_markers samples
133             if(cnt <= num_markers)
134             {
135                 this->heights[cnt - 1] = args[sample];
136 
137                 // complete the initialization of heights by sorting
138                 if(cnt == num_markers)
139                 {
140                     std::sort(this->heights.begin(), this->heights.end());
141                 }
142             }
143             else
144             {
145                 std::size_t sample_cell = 1;
146 
147                 // find cell k = sample_cell such that heights[k-1] <= sample < heights[k]
148                 if(args[sample] < this->heights[0])
149                 {
150                     this->heights[0] = args[sample];
151                     sample_cell = 1;
152                 }
153                 else if(args[sample] >= this->heights[num_markers - 1])
154                 {
155                     this->heights[num_markers - 1] = args[sample];
156                     sample_cell = num_markers - 1;
157                 }
158                 else
159                 {
160                     typedef typename array_type::iterator iterator;
161                     iterator it = std::upper_bound(
162                         this->heights.begin()
163                       , this->heights.end()
164                       , args[sample]
165                     );
166 
167                     sample_cell = std::distance(this->heights.begin(), it);
168                 }
169 
170                 // update actual positions of all markers above sample_cell index
171                 for(std::size_t i = sample_cell; i < num_markers; ++i)
172                 {
173                     ++this->actual_positions[i];
174                 }
175 
176                 // update desired positions of all markers
177                 for(std::size_t i = 0; i < num_markers; ++i)
178                 {
179                     this->desired_positions[i] += this->positions_increments[i];
180                 }
181 
182                 // adjust heights and actual positions of markers 1 to num_markers-2 if necessary
183                 for(std::size_t i = 1; i <= num_markers - 2; ++i)
184                 {
185                     // offset to desired position
186                     float_type d = this->desired_positions[i] - this->actual_positions[i];
187 
188                     // offset to next position
189                     float_type dp = this->actual_positions[i+1] - this->actual_positions[i];
190 
191                     // offset to previous position
192                     float_type dm = this->actual_positions[i-1] - this->actual_positions[i];
193 
194                     // height ds
195                     float_type hp = (this->heights[i+1] - this->heights[i]) / dp;
196                     float_type hm = (this->heights[i-1] - this->heights[i]) / dm;
197 
198                     if((d >= 1 && dp > 1) || (d <= -1 && dm < -1))
199                     {
200                         short sign_d = static_cast<short>(d / std::abs(d));
201 
202                         float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp
203                                      + (dp - sign_d) * hm);
204 
205                         // try adjusting heights[i] using p-squared formula
206                         if(this->heights[i - 1] < h && h < this->heights[i + 1])
207                         {
208                             this->heights[i] = h;
209                         }
210                         else
211                         {
212                             // use linear formula
213                             if(d > 0)
214                             {
215                                 this->heights[i] += hp;
216                             }
217                             if(d < 0)
218                             {
219                                 this->heights[i] -= hm;
220                             }
221                         }
222                         this->actual_positions[i] += sign_d;
223                     }
224                 }
225             }
226         }
227 
resultboost::accumulators::impl::extended_p_square_impl228         result_type result(dont_care) const
229         {
230             // for i in [1,probabilities.size()], return heights[i * 2]
231             detail::times2_iterator idx_begin = detail::make_times2_iterator(1);
232             detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1);
233 
234             return result_type(
235                 make_permutation_iterator(this->heights.begin(), idx_begin)
236               , make_permutation_iterator(this->heights.begin(), idx_end)
237             );
238         }
239 
240     public:
241         // make this accumulator serializeable
242         // TODO: do we need to split to load/save and verify that the parameters did not change?
243         template<class Archive>
serializeboost::accumulators::impl::extended_p_square_impl244         void serialize(Archive & ar, const unsigned int file_version)
245         {
246             ar & probabilities;
247             ar & heights;
248             ar & actual_positions;
249             ar & desired_positions;
250             ar & positions_increments;
251         }
252 
253     private:
254         array_type probabilities;         // the quantile probabilities
255         array_type heights;               // q_i
256         array_type actual_positions;      // n_i
257         array_type desired_positions;     // d_i
258         array_type positions_increments;  // f_i
259     };
260 
261 } // namespace impl
262 
263 ///////////////////////////////////////////////////////////////////////////////
264 // tag::extended_p_square
265 //
266 namespace tag
267 {
268     struct extended_p_square
269       : depends_on<count>
270       , extended_p_square_probabilities
271     {
272         typedef accumulators::impl::extended_p_square_impl<mpl::_1> impl;
273 
274         #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
275         /// tag::extended_p_square::probabilities named parameter
276         static boost::parameter::keyword<tag::probabilities> const probabilities;
277         #endif
278     };
279 }
280 
281 ///////////////////////////////////////////////////////////////////////////////
282 // extract::extended_p_square
283 //
284 namespace extract
285 {
286     extractor<tag::extended_p_square> const extended_p_square = {};
287 
288     BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square)
289 }
290 
291 using extract::extended_p_square;
292 
293 // So that extended_p_square can be automatically substituted with
294 // weighted_extended_p_square when the weight parameter is non-void
295 template<>
296 struct as_weighted_feature<tag::extended_p_square>
297 {
298     typedef tag::weighted_extended_p_square type;
299 };
300 
301 template<>
302 struct feature_of<tag::weighted_extended_p_square>
303   : feature_of<tag::extended_p_square>
304 {
305 };
306 
307 }} // namespace boost::accumulators
308 
309 #endif
310