• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<html>
2<head>
3<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
4<title>Univariate Statistics</title>
5<link rel="stylesheet" href="../math.css" type="text/css">
6<meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
7<link rel="home" href="../index.html" title="Math Toolkit 2.12.0">
8<link rel="up" href="../statistics.html" title="Chapter 6. Statistics">
9<link rel="prev" href="../statistics.html" title="Chapter 6. Statistics">
10<link rel="next" href="bivariate_statistics.html" title="Bivariate Statistics">
11</head>
12<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
13<table cellpadding="2" width="100%"><tr>
14<td valign="top"><img alt="Boost C++ Libraries" width="277" height="86" src="../../../../../boost.png"></td>
15<td align="center"><a href="../../../../../index.html">Home</a></td>
16<td align="center"><a href="../../../../../libs/libraries.htm">Libraries</a></td>
17<td align="center"><a href="http://www.boost.org/users/people.html">People</a></td>
18<td align="center"><a href="http://www.boost.org/users/faq.html">FAQ</a></td>
19<td align="center"><a href="../../../../../more/index.htm">More</a></td>
20</tr></table>
21<hr>
22<div class="spirit-nav">
23<a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
24</div>
25<div class="section">
26<div class="titlepage"><div><div><h2 class="title" style="clear: both">
27<a name="math_toolkit.univariate_statistics"></a><a class="link" href="univariate_statistics.html" title="Univariate Statistics">Univariate Statistics</a>
28</h2></div></div></div>
29<h4>
30<a name="math_toolkit.univariate_statistics.h0"></a>
31      <span class="phrase"><a name="math_toolkit.univariate_statistics.synopsis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.synopsis">Synopsis</a>
32    </h4>
33<pre class="programlisting"><span class="preprocessor">#include</span> <span class="special">&lt;</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span>
34
35<span class="keyword">namespace</span> <span class="identifier">boost</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">math</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">statistics</span> <span class="special">{</span>
36
37    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
38    <span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
39
40    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
41    <span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
42
43    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
44    <span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
45
46    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
47    <span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
48
49    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
50    <span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
51
52    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
53    <span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
54
55    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
56    <span class="keyword">auto</span> <span class="identifier">mean_and_sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
57
58    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
59    <span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
60
61    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
62    <span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
63
64    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
65    <span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
66
67    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
68    <span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
69
70    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
71    <span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
72
73    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
74    <span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
75
76    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
77    <span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
78
79    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
80    <span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
81
82    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
83    <span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
84
85    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
86    <span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
87
88    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">RandomAccessIterator</span><span class="special">&gt;</span>
89    <span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">iterator_traits</span><span class="special">&lt;</span><span class="identifier">RandomAccessIterator</span><span class="special">&gt;::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special">&lt;</span><span class="identifier">Real</span><span class="special">&gt;::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
90
91    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">RandomAccessContainer</span><span class="special">&gt;</span>
92    <span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">RandomAccessContainer</span> <span class="identifier">v</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">RandomAccessContainer</span><span class="special">::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special">&lt;</span><span class="identifier">Real</span><span class="special">&gt;::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
93
94    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">RandomAccessIterator</span><span class="special">&gt;</span>
95    <span class="keyword">auto</span> <span class="identifier">interquartile_range</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
96
97    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">RandomAccessContainer</span><span class="special">&gt;</span>
98    <span class="keyword">auto</span> <span class="identifier">interquartile_range</span><span class="special">(</span><span class="identifier">RandomAccessContainer</span> <span class="identifier">v</span><span class="special">);</span>
99
100    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
101    <span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
102
103    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
104    <span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
105
106    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
107    <span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
108
109    <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
110    <span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
111
112<span class="special">}}}</span>
113</pre>
114<h4>
115<a name="math_toolkit.univariate_statistics.h1"></a>
116      <span class="phrase"><a name="math_toolkit.univariate_statistics.description"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.description">Description</a>
117    </h4>
118<p>
119      The file <code class="computeroutput"><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> is a
120      set of facilities for computing scalar values from vectors.
121    </p>
122<p>
123      Many of these functionals have trivial naive implementations, but experienced
124      programmers will recognize that even trivial algorithms are easy to screw up,
125      and that numerical instabilities often lurk in corner cases. We have attempted
126      to do our "due diligence" to root out these problems-scouring the
127      literature for numerically stable algorithms for even the simplest of functionals.
128    </p>
129<p>
130      <span class="emphasis"><em>Nota bene</em></span>: Some similar functionality is provided in
131      <a href="https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html" target="_top">Boost
132      Accumulators Framework</a>. These accumulators should be used in real-time
133      applications; <code class="computeroutput"><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> should
134      be used when CPU vectorization is needed. As a reminder, remember that to actually
135      <span class="emphasis"><em>get</em></span> vectorization, compile with <code class="computeroutput"><span class="special">-</span><span class="identifier">march</span><span class="special">=</span><span class="identifier">native</span>
136      <span class="special">-</span><span class="identifier">O3</span></code>
137      flags.
138    </p>
139<p>
140      We now describe each functional in detail. Our examples use <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span></code>
141      to hold the data, but this not required. In general, you can store your data
142      in an Eigen array, and Armadillo vector, <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">array</span></code>,
143      and for many of the routines, a <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">forward_list</span></code>.
144      These routines are usable in float, double, long double, and Boost.Multiprecision
145      precision, as well as their complex extensions whenever the computation is
146      well-defined. For certain operations (total variation, for example) integer
147      inputs are supported.
148    </p>
149<h4>
150<a name="math_toolkit.univariate_statistics.h2"></a>
151      <span class="phrase"><a name="math_toolkit.univariate_statistics.mean"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.mean">Mean</a>
152    </h4>
153<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
154<span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
155<span class="comment">// Alternative syntax if you want to use entire container:</span>
156<span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
157</pre>
158<p>
159      The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
160      1.6a</a>. The data is not modified and must be forward iterable. Works
161      with real and integer data. If the input is an integer type, the output is
162      a double precision float.
163    </p>
164<h4>
165<a name="math_toolkit.univariate_statistics.h3"></a>
166      <span class="phrase"><a name="math_toolkit.univariate_statistics.variance"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.variance">Variance</a>
167    </h4>
168<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
169<span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
170</pre>
171<p>
172      If you don't need to calculate on a subset of the input, then the range call
173      is more terse:
174    </p>
175<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
176<span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
177</pre>
178<p>
179      The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
180      1.6b</a>. The input data must be forward iterable and the range <code class="computeroutput"><span class="special">[</span><span class="identifier">first</span><span class="special">,</span>
181      <span class="identifier">last</span><span class="special">)</span></code>
182      must contain at least two elements. It is <span class="emphasis"><em>not</em></span> in general
183      sensible to pass complex numbers to this routine. If integers are passed as
184      input, then the output is a double precision float.
185    </p>
186<p>
187      <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span></code>
188      returns the population variance. If you want a sample variance, use
189    </p>
190<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
191<span class="identifier">Real</span> <span class="identifier">sn_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
192</pre>
193<h4>
194<a name="math_toolkit.univariate_statistics.h4"></a>
195      <span class="phrase"><a name="math_toolkit.univariate_statistics.skewness"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.skewness">Skewness</a>
196    </h4>
197<p>
198      Computes the skewness of a dataset:
199    </p>
200<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
201<span class="keyword">double</span> <span class="identifier">skewness</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">skewness</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
202<span class="comment">// skewness = 0.</span>
203</pre>
204<p>
205      The input vector is not modified, works with integral and real data. If the
206      input data is integral, the output is a double precision float.
207    </p>
208<p>
209      For a dataset consisting of a single constant value, we take the skewness to
210      be zero by definition.
211    </p>
212<p>
213      The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
214    </p>
215<h4>
216<a name="math_toolkit.univariate_statistics.h5"></a>
217      <span class="phrase"><a name="math_toolkit.univariate_statistics.kurtosis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.kurtosis">Kurtosis</a>
218    </h4>
219<p>
220      Computes the kurtosis of a dataset:
221    </p>
222<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
223<span class="keyword">double</span> <span class="identifier">kurtosis</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
224<span class="comment">// kurtosis = 17/10</span>
225</pre>
226<p>
227      The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
228      The input data must be forward iterable and must consist of real or integral
229      values. If the input data is integral, the output is a double precision float.
230      Note that this is <span class="emphasis"><em>not</em></span> the excess kurtosis. If you require
231      the excess kurtosis, use <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">excess_kurtosis</span></code>. This function simply subtracts
232      3 from the kurtosis, but it makes eminently clear our definition of kurtosis.
233    </p>
234<h4>
235<a name="math_toolkit.univariate_statistics.h6"></a>
236      <span class="phrase"><a name="math_toolkit.univariate_statistics.first_four_moments"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.first_four_moments">First
237      four moments</a>
238    </h4>
239<p>
240      Simultaneously computes the first four <a href="https://en.wikipedia.org/wiki/Central_moment" target="_top">central
241      moments</a> in a single pass through the data:
242    </p>
243<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
244<span class="keyword">auto</span> <span class="special">[</span><span class="identifier">M1</span><span class="special">,</span> <span class="identifier">M2</span><span class="special">,</span> <span class="identifier">M3</span><span class="special">,</span> <span class="identifier">M4</span><span class="special">]</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
245</pre>
246<h4>
247<a name="math_toolkit.univariate_statistics.h7"></a>
248      <span class="phrase"><a name="math_toolkit.univariate_statistics.median"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median">Median</a>
249    </h4>
250<p>
251      Computes the median of a dataset:
252    </p>
253<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
254<span class="keyword">double</span> <span class="identifier">m</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
255</pre>
256<p>
257      <span class="emphasis"><em>Nota bene: The input vector is modified.</em></span> The calculation
258      of the median is a thin wrapper around the C++11 <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>. Therefore, all requirements
259      of <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">nth_element</span></code> are inherited by the median calculation.
260      In particular, the container must allow random access.
261    </p>
262<h4>
263<a name="math_toolkit.univariate_statistics.h8"></a>
264      <span class="phrase"><a name="math_toolkit.univariate_statistics.median_absolute_deviation"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median_absolute_deviation">Median
265      Absolute Deviation</a>
266    </h4>
267<p>
268      Computes the <a href="https://en.wikipedia.org/wiki/Median_absolute_deviation" target="_top">median
269      absolute deviation</a> of a dataset:
270    </p>
271<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
272<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
273</pre>
274<p>
275      By default, the deviation from the median is used. If you have some prior that
276      the median is zero, or wish to compute the median absolute deviation from the
277      mean, use the following:
278    </p>
279<pre class="programlisting"><span class="comment">// prior is that center is zero:</span>
280<span class="keyword">double</span> <span class="identifier">center</span> <span class="special">=</span> <span class="number">0</span><span class="special">;</span>
281<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">center</span><span class="special">);</span>
282
283<span class="comment">// compute median absolute deviation from the mean:</span>
284<span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
285<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">mu</span><span class="special">);</span>
286</pre>
287<p>
288      <span class="emphasis"><em>Nota bene:</em></span> The input vector is modified. Again the vector
289      is passed into a call to <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>.
290    </p>
291<h4>
292<a name="math_toolkit.univariate_statistics.h9"></a>
293      <span class="phrase"><a name="math_toolkit.univariate_statistics.interquartile_range"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.interquartile_range">Interquartile
294      Range</a>
295    </h4>
296<p>
297      Computes the <a href="https://en.wikipedia.org/wiki/Interquartile_range" target="_top">interquartile
298      range</a> of a dataset:
299    </p>
300<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
301<span class="keyword">double</span> <span class="identifier">iqr</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">interquartile_range</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
302<span class="comment">// Q1 = 1.5, Q3 = 4.5 =&gt; iqr = 3</span>
303</pre>
304<p>
305      For a vector of length <span class="emphasis"><em>2n+1</em></span> or <span class="emphasis"><em>2n</em></span>,
306      the first quartile <span class="emphasis"><em>Q</em></span><sub>1</sub> is the median of the <span class="emphasis"><em>n</em></span>
307      smallest values, and the third quartile <span class="emphasis"><em>Q</em></span><sub>3</sub> is the median
308      of the <span class="emphasis"><em>n</em></span> largest values. The interquartile range is then
309      <span class="emphasis"><em>Q</em></span><sub>3</sub> - <span class="emphasis"><em>Q</em></span><sub>1</sub>. The function <code class="computeroutput"><span class="identifier">interquartile_range</span></code>, like the <code class="computeroutput"><span class="identifier">median</span></code>, calls into <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">nth_element</span></code>,
310      and hence partially sorts the data.
311    </p>
312<h4>
313<a name="math_toolkit.univariate_statistics.h10"></a>
314      <span class="phrase"><a name="math_toolkit.univariate_statistics.gini_coefficient"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.gini_coefficient">Gini
315      Coefficient</a>
316    </h4>
317<p>
318      Compute the Gini coefficient of a dataset:
319    </p>
320<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">};</span>
321<span class="keyword">double</span> <span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
322<span class="comment">// gini = 3/4</span>
323<span class="keyword">double</span> <span class="identifier">s_gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
324<span class="comment">// s_gini = 1.</span>
325<span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">w</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">};</span>
326<span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">w</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">w</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
327<span class="comment">// gini = 0, as all elements are now equal.</span>
328</pre>
329<p>
330      <span class="emphasis"><em>Nota bene</em></span>: The input data is altered: in particular, it
331      is sorted. Makes a call to <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">sort</span></code>, and
332      as such requires random access iterators.
333    </p>
334<p>
335      The sample Gini coefficient lies in the range [0,1], whereas the population
336      Gini coefficient is in the range [0, 1 - 1/ <span class="emphasis"><em>n</em></span>].
337    </p>
338<p>
339      <span class="emphasis"><em>Nota bene:</em></span> There is essentially no reason to pass negative
340      values to the Gini coefficient function. However, a use case (measuring wealth
341      inequality when some people have negative wealth) exists, so we do not throw
342      an exception when negative values are encountered. You should have <span class="emphasis"><em>very</em></span>
343      good cause to pass negative values to the Gini coefficient calculator. Another
344      use case is found in signal processing, but the sorting is by magnitude and
345      hence has a different implementation. See <code class="computeroutput"><span class="identifier">absolute_gini_coefficient</span></code>
346      for details.
347    </p>
348<h4>
349<a name="math_toolkit.univariate_statistics.h11"></a>
350      <span class="phrase"><a name="math_toolkit.univariate_statistics.references"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.references">References</a>
351    </h4>
352<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">
353<li class="listitem">
354          Higham, Nicholas J. <span class="emphasis"><em>Accuracy and stability of numerical algorithms.</em></span>
355          Vol. 80. Siam, 2002.
356        </li>
357<li class="listitem">
358          Philippe P. Pébay: <span class="quote">“<span class="quote">Formulas for Robust, One-Pass Parallel Computation
359          of Covariances and Arbitrary-Order Statistical Moments.</span>”</span> Technical
360          Report SAND2008-6212, Sandia National Laboratories, September 2008.
361        </li>
362</ul></div>
363</div>
364<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
365<td align="left"></td>
366<td align="right"><div class="copyright-footer">Copyright © 2006-2019 Nikhar
367      Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos,
368      Hubert Holin, Bruno Lalande, John Maddock, Jeremy Murphy, Matthew Pulver, Johan
369      Råde, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg,
370      Daryle Walker and Xiaogang Zhang<p>
371        Distributed under the Boost Software License, Version 1.0. (See accompanying
372        file LICENSE_1_0.txt or copy at <a href="http://www.boost.org/LICENSE_1_0.txt" target="_top">http://www.boost.org/LICENSE_1_0.txt</a>)
373      </p>
374</div></td>
375</tr></table>
376<hr>
377<div class="spirit-nav">
378<a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
379</div>
380</body>
381</html>
382