• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////////////
2 // p_square_quantile.hpp
3 //
4 //  Copyright 2005 Daniel Egloff. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_QUANTILE_HPP_DE_01_01_2006
9 #define BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_QUANTILE_HPP_DE_01_01_2006
10 
11 #include <cmath>
12 #include <functional>
13 #include <boost/array.hpp>
14 #include <boost/mpl/placeholders.hpp>
15 #include <boost/type_traits/is_same.hpp>
16 #include <boost/parameter/keyword.hpp>
17 #include <boost/accumulators/framework/accumulator_base.hpp>
18 #include <boost/accumulators/framework/extractor.hpp>
19 #include <boost/accumulators/numeric/functional.hpp>
20 #include <boost/accumulators/framework/parameters/sample.hpp>
21 #include <boost/accumulators/framework/depends_on.hpp>
22 #include <boost/accumulators/statistics_fwd.hpp>
23 #include <boost/accumulators/statistics/count.hpp>
24 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
25 #include <boost/serialization/boost_array.hpp>
26 
27 namespace boost { namespace accumulators
28 {
29 
30 namespace impl
31 {
32     ///////////////////////////////////////////////////////////////////////////////
33     // p_square_quantile_impl
34     //  single quantile estimation
35     /**
36         @brief Single quantile estimation with the \f$P^2\f$ algorithm
37 
38         The \f$P^2\f$ algorithm estimates a quantile dynamically without storing samples. Instead of
39         storing the whole sample cumulative distribution, only five points (markers) are stored. The heights
40         of these markers are the minimum and the maximum of the samples and the current estimates of the
41         \f$(p/2)\f$-, \f$p\f$- and \f$(1+p)/2\f$-quantiles. Their positions are equal to the number
42         of samples that are smaller or equal to the markers. Each time a new samples is recorded, the
43         positions of the markers are updated and if necessary their heights are adjusted using a piecewise-
44         parabolic formula.
45 
46         For further details, see
47 
48         R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
49         histograms without storing observations, Communications of the ACM,
50         Volume 28 (October), Number 10, 1985, p. 1076-1085.
51 
52         @param quantile_probability
53     */
54     template<typename Sample, typename Impl>
55     struct p_square_quantile_impl
56       : accumulator_base
57     {
58         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
59         typedef array<float_type, 5> array_type;
60         // for boost::result_of
61         typedef float_type result_type;
62 
63         template<typename Args>
p_square_quantile_implboost::accumulators::impl::p_square_quantile_impl64         p_square_quantile_impl(Args const &args)
65           : p(is_same<Impl, for_median>::value ? float_type(0.5) : args[quantile_probability | float_type(0.5)])
66           , heights()
67           , actual_positions()
68           , desired_positions()
69           , positions_increments()
70         {
71             for(std::size_t i = 0; i < 5; ++i)
72             {
73                 this->actual_positions[i] = i + float_type(1.);
74             }
75 
76             this->desired_positions[0] = float_type(1.);
77             this->desired_positions[1] = float_type(1.) + float_type(2.) * this->p;
78             this->desired_positions[2] = float_type(1.) + float_type(4.) * this->p;
79             this->desired_positions[3] = float_type(3.) + float_type(2.) * this->p;
80             this->desired_positions[4] = float_type(5.);
81 
82 
83             this->positions_increments[0] = float_type(0.);
84             this->positions_increments[1] = this->p / float_type(2.);
85             this->positions_increments[2] = this->p;
86             this->positions_increments[3] = (float_type(1.) + this->p) / float_type(2.);
87             this->positions_increments[4] = float_type(1.);
88         }
89 
90         template<typename Args>
operator ()boost::accumulators::impl::p_square_quantile_impl91         void operator ()(Args const &args)
92         {
93             std::size_t cnt = count(args);
94 
95             // accumulate 5 first samples
96             if(cnt <= 5)
97             {
98                 this->heights[cnt - 1] = args[sample];
99 
100                 // complete the initialization of heights by sorting
101                 if(cnt == 5)
102                 {
103                     std::sort(this->heights.begin(), this->heights.end());
104                 }
105             }
106             else
107             {
108                 std::size_t sample_cell = 1; // k
109 
110                 // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values
111                 if (args[sample] < this->heights[0])
112                 {
113                     this->heights[0] = args[sample];
114                     sample_cell = 1;
115                 }
116                 else if (this->heights[4] <= args[sample])
117                 {
118                     this->heights[4] = args[sample];
119                     sample_cell = 4;
120                 }
121                 else
122                 {
123                     typedef typename array_type::iterator iterator;
124                     iterator it = std::upper_bound(
125                         this->heights.begin()
126                       , this->heights.end()
127                       , args[sample]
128                     );
129 
130                     sample_cell = std::distance(this->heights.begin(), it);
131                 }
132 
133                 // update positions of markers above sample_cell
134                 for(std::size_t i = sample_cell; i < 5; ++i)
135                 {
136                     ++this->actual_positions[i];
137                 }
138 
139                 // update desired positions of all markers
140                 for(std::size_t i = 0; i < 5; ++i)
141                 {
142                     this->desired_positions[i] += this->positions_increments[i];
143                 }
144 
145                 // adjust heights and actual positions of markers 1 to 3 if necessary
146                 for(std::size_t i = 1; i <= 3; ++i)
147                 {
148                     // offset to desired positions
149                     float_type d = this->desired_positions[i] - this->actual_positions[i];
150 
151                     // offset to next position
152                     float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
153 
154                     // offset to previous position
155                     float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
156 
157                     // height ds
158                     float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
159                     float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
160 
161                     if((d >= float_type(1.) && dp > float_type(1.)) || (d <= float_type(-1.) && dm < float_type(-1.)))
162                     {
163                         short sign_d = static_cast<short>(d / std::abs(d));
164 
165                         // try adjusting heights[i] using p-squared formula
166                         float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm) * hp
167                                      + (dp - sign_d) * hm);
168 
169                         if(this->heights[i - 1] < h && h < this->heights[i + 1])
170                         {
171                             this->heights[i] = h;
172                         }
173                         else
174                         {
175                             // use linear formula
176                             if(d > float_type(0))
177                             {
178                                 this->heights[i] += hp;
179                             }
180                             if(d < float_type(0))
181                             {
182                                 this->heights[i] -= hm;
183                             }
184                         }
185                         this->actual_positions[i] += sign_d;
186                     }
187                 }
188             }
189         }
190 
resultboost::accumulators::impl::p_square_quantile_impl191         result_type result(dont_care) const
192         {
193             return this->heights[2];
194         }
195 
196         // make this accumulator serializeable
197         // TODO: do we need to split to load/save and verify that P did not change?
198         template<class Archive>
serializeboost::accumulators::impl::p_square_quantile_impl199         void serialize(Archive & ar, const unsigned int file_version)
200         {
201             ar & p;
202             ar & heights;
203             ar & actual_positions;
204             ar & desired_positions;
205             ar & positions_increments;
206         }
207 
208     private:
209         float_type p;                    // the quantile probability p
210         array_type heights;              // q_i
211         array_type actual_positions;     // n_i
212         array_type desired_positions;    // n'_i
213         array_type positions_increments; // dn'_i
214     };
215 
216 } // namespace detail
217 
218 ///////////////////////////////////////////////////////////////////////////////
219 // tag::p_square_quantile
220 //
221 namespace tag
222 {
223     struct p_square_quantile
224       : depends_on<count>
225     {
226         /// INTERNAL ONLY
227         ///
228         typedef accumulators::impl::p_square_quantile_impl<mpl::_1, regular> impl;
229     };
230     struct p_square_quantile_for_median
231       : depends_on<count>
232     {
233         /// INTERNAL ONLY
234         ///
235         typedef accumulators::impl::p_square_quantile_impl<mpl::_1, for_median> impl;
236     };
237 }
238 
239 ///////////////////////////////////////////////////////////////////////////////
240 // extract::p_square_quantile
241 // extract::p_square_quantile_for_median
242 //
243 namespace extract
244 {
245     extractor<tag::p_square_quantile> const p_square_quantile = {};
246     extractor<tag::p_square_quantile_for_median> const p_square_quantile_for_median = {};
247 
248     BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile)
249     BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile_for_median)
250 }
251 
252 using extract::p_square_quantile;
253 using extract::p_square_quantile_for_median;
254 
255 // So that p_square_quantile can be automatically substituted with
256 // weighted_p_square_quantile when the weight parameter is non-void
257 template<>
258 struct as_weighted_feature<tag::p_square_quantile>
259 {
260     typedef tag::weighted_p_square_quantile type;
261 };
262 
263 template<>
264 struct feature_of<tag::weighted_p_square_quantile>
265   : feature_of<tag::p_square_quantile>
266 {
267 };
268 
269 }} // namespace boost::accumulators
270 
271 #endif
272